88 lines
1.9 KiB
Plaintext
88 lines
1.9 KiB
Plaintext
|
|
dataset,version,metric,mode,public/sense-voice-small@main
|
||
|
|
--------- 考试 Exam ---------,-,-,-,-
|
||
|
|
ceval,-,-,-,-
|
||
|
|
agieval,-,-,-,-
|
||
|
|
mmlu,-,-,-,-
|
||
|
|
GaokaoBench,-,-,-,-
|
||
|
|
ARC-c,-,-,-,-
|
||
|
|
--------- 语言 Language ---------,-,-,-,-
|
||
|
|
WiC,-,-,-,-
|
||
|
|
summedits,-,-,-,-
|
||
|
|
chid-dev,-,-,-,-
|
||
|
|
afqmc-dev,-,-,-,-
|
||
|
|
bustm-dev,-,-,-,-
|
||
|
|
cluewsc-dev,-,-,-,-
|
||
|
|
WSC,-,-,-,-
|
||
|
|
winogrande,-,-,-,-
|
||
|
|
flores_100,-,-,-,-
|
||
|
|
--------- 知识 Knowledge ---------,-,-,-,-
|
||
|
|
BoolQ,-,-,-,-
|
||
|
|
commonsense_qa,-,-,-,-
|
||
|
|
nq,-,-,-,-
|
||
|
|
triviaqa,-,-,-,-
|
||
|
|
--------- 推理 Reasoning ---------,-,-,-,-
|
||
|
|
cmnli,-,-,-,-
|
||
|
|
ocnli,-,-,-,-
|
||
|
|
ocnli_fc-dev,-,-,-,-
|
||
|
|
AX_b,-,-,-,-
|
||
|
|
AX_g,-,-,-,-
|
||
|
|
CB,-,-,-,-
|
||
|
|
RTE,-,-,-,-
|
||
|
|
story_cloze,-,-,-,-
|
||
|
|
COPA,-,-,-,-
|
||
|
|
ReCoRD,-,-,-,-
|
||
|
|
hellaswag,-,-,-,-
|
||
|
|
piqa,-,-,-,-
|
||
|
|
siqa,-,-,-,-
|
||
|
|
strategyqa,-,-,-,-
|
||
|
|
math,-,-,-,-
|
||
|
|
gsm8k,-,-,-,-
|
||
|
|
TheoremQA,-,-,-,-
|
||
|
|
openai_humaneval,-,-,-,-
|
||
|
|
mbpp,-,-,-,-
|
||
|
|
cmmlu,-,-,-,-
|
||
|
|
bbh,-,-,-,-
|
||
|
|
--------- 理解 Understanding ---------,-,-,-,-
|
||
|
|
C3,-,-,-,-
|
||
|
|
CMRC_dev,-,-,-,-
|
||
|
|
DRCD_dev,-,-,-,-
|
||
|
|
MultiRC,-,-,-,-
|
||
|
|
race-middle,-,-,-,-
|
||
|
|
race-high,-,-,-,-
|
||
|
|
openbookqa_fact,-,-,-,-
|
||
|
|
csl_dev,-,-,-,-
|
||
|
|
lcsts,-,-,-,-
|
||
|
|
Xsum,-,-,-,-
|
||
|
|
eprstmt-dev,-,-,-,-
|
||
|
|
lambada,-,-,-,-
|
||
|
|
tnews-dev,-,-,-,-
|
||
|
|
--------- 安全 Safety ---------,-,-,-,-
|
||
|
|
crows_pairs,-,-,-,-
|
||
|
|
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
||
|
|
LEval_coursera,-,-,-,-
|
||
|
|
LEval_gsm100,-,-,-,-
|
||
|
|
LEval_quality,-,-,-,-
|
||
|
|
LEval_tpo,-,-,-,-
|
||
|
|
LEval_topic_retrieval,-,-,-,-
|
||
|
|
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
||
|
|
LEval_financialqa,-,-,-,-
|
||
|
|
LEval_gov_report_summ,-,-,-,-
|
||
|
|
LEval_legal_contract_qa,-,-,-,-
|
||
|
|
LEval_meeting_summ,-,-,-,-
|
||
|
|
LEval_multidocqa,-,-,-,-
|
||
|
|
LEval_narrativeqa,-,-,-,-
|
||
|
|
LEval_nq,-,-,-,-
|
||
|
|
LEval_news_summ,-,-,-,-
|
||
|
|
LEval_paper_assistant,-,-,-,-
|
||
|
|
LEval_patent_summ,-,-,-,-
|
||
|
|
LEval_review_summ,-,-,-,-
|
||
|
|
LEval_scientificqa,-,-,-,-
|
||
|
|
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
||
|
|
longbench_lsht,-,-,-,-
|
||
|
|
longbench_vcsum,-,-,-,-
|
||
|
|
longbench_dureader,-,-,-,-
|
||
|
|
longbench_multifieldqa_zh,-,-,-,-
|
||
|
|
longbench_passage_retrieval_zh,-,-,-,-
|
||
|
|
--------- 单选 自定义数据 ---------,-,-,-,-
|
||
|
|
SageBench-exam,-,-,-,-
|