eval-sense-voice-small/summary/summary_20251010_200419.txt
2025-10-10 12:04:40 +00:00

194 lines
11 KiB
Plaintext

20251010_200419
tabulate format
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
dataset version metric mode public/sense-voice-small@main
----------------------------------------------------- --------- -------- ------ -------------------------------
--------- 考试 Exam --------- - - - -
ceval - - - -
agieval - - - -
mmlu - - - -
GaokaoBench - - - -
ARC-c - - - -
--------- 语言 Language --------- - - - -
WiC - - - -
summedits - - - -
chid-dev - - - -
afqmc-dev - - - -
bustm-dev - - - -
cluewsc-dev - - - -
WSC - - - -
winogrande - - - -
flores_100 - - - -
--------- 知识 Knowledge --------- - - - -
BoolQ - - - -
commonsense_qa - - - -
nq - - - -
triviaqa - - - -
--------- 推理 Reasoning --------- - - - -
cmnli - - - -
ocnli - - - -
ocnli_fc-dev - - - -
AX_b - - - -
AX_g - - - -
CB - - - -
RTE - - - -
story_cloze - - - -
COPA - - - -
ReCoRD - - - -
hellaswag - - - -
piqa - - - -
siqa - - - -
strategyqa - - - -
math - - - -
gsm8k - - - -
TheoremQA - - - -
openai_humaneval - - - -
mbpp - - - -
cmmlu - - - -
bbh - - - -
--------- 理解 Understanding --------- - - - -
C3 - - - -
CMRC_dev - - - -
DRCD_dev - - - -
MultiRC - - - -
race-middle - - - -
race-high - - - -
openbookqa_fact - - - -
csl_dev - - - -
lcsts - - - -
Xsum - - - -
eprstmt-dev - - - -
lambada - - - -
tnews-dev - - - -
--------- 安全 Safety --------- - - - -
crows_pairs - - - -
--------- LEval Exact Match (Acc) --------- - - - -
LEval_coursera - - - -
LEval_gsm100 - - - -
LEval_quality - - - -
LEval_tpo - - - -
LEval_topic_retrieval - - - -
--------- LEval Gen (ROUGE) --------- - - - -
LEval_financialqa - - - -
LEval_gov_report_summ - - - -
LEval_legal_contract_qa - - - -
LEval_meeting_summ - - - -
LEval_multidocqa - - - -
LEval_narrativeqa - - - -
LEval_nq - - - -
LEval_news_summ - - - -
LEval_paper_assistant - - - -
LEval_patent_summ - - - -
LEval_review_summ - - - -
LEval_scientificqa - - - -
LEval_tvshow_summ--------- 长文本 LongBench --------- - - - -
longbench_lsht - - - -
longbench_vcsum - - - -
longbench_dureader - - - -
longbench_multifieldqa_zh - - - -
longbench_passage_retrieval_zh - - - -
--------- 单选 自定义数据 --------- - - - -
SageBench-exam - - - -
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
csv format
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
dataset,version,metric,mode,public/sense-voice-small@main
--------- 考试 Exam ---------,-,-,-,-
ceval,-,-,-,-
agieval,-,-,-,-
mmlu,-,-,-,-
GaokaoBench,-,-,-,-
ARC-c,-,-,-,-
--------- 语言 Language ---------,-,-,-,-
WiC,-,-,-,-
summedits,-,-,-,-
chid-dev,-,-,-,-
afqmc-dev,-,-,-,-
bustm-dev,-,-,-,-
cluewsc-dev,-,-,-,-
WSC,-,-,-,-
winogrande,-,-,-,-
flores_100,-,-,-,-
--------- 知识 Knowledge ---------,-,-,-,-
BoolQ,-,-,-,-
commonsense_qa,-,-,-,-
nq,-,-,-,-
triviaqa,-,-,-,-
--------- 推理 Reasoning ---------,-,-,-,-
cmnli,-,-,-,-
ocnli,-,-,-,-
ocnli_fc-dev,-,-,-,-
AX_b,-,-,-,-
AX_g,-,-,-,-
CB,-,-,-,-
RTE,-,-,-,-
story_cloze,-,-,-,-
COPA,-,-,-,-
ReCoRD,-,-,-,-
hellaswag,-,-,-,-
piqa,-,-,-,-
siqa,-,-,-,-
strategyqa,-,-,-,-
math,-,-,-,-
gsm8k,-,-,-,-
TheoremQA,-,-,-,-
openai_humaneval,-,-,-,-
mbpp,-,-,-,-
cmmlu,-,-,-,-
bbh,-,-,-,-
--------- 理解 Understanding ---------,-,-,-,-
C3,-,-,-,-
CMRC_dev,-,-,-,-
DRCD_dev,-,-,-,-
MultiRC,-,-,-,-
race-middle,-,-,-,-
race-high,-,-,-,-
openbookqa_fact,-,-,-,-
csl_dev,-,-,-,-
lcsts,-,-,-,-
Xsum,-,-,-,-
eprstmt-dev,-,-,-,-
lambada,-,-,-,-
tnews-dev,-,-,-,-
--------- 安全 Safety ---------,-,-,-,-
crows_pairs,-,-,-,-
--------- LEval Exact Match (Acc) ---------,-,-,-,-
LEval_coursera,-,-,-,-
LEval_gsm100,-,-,-,-
LEval_quality,-,-,-,-
LEval_tpo,-,-,-,-
LEval_topic_retrieval,-,-,-,-
--------- LEval Gen (ROUGE) ---------,-,-,-,-
LEval_financialqa,-,-,-,-
LEval_gov_report_summ,-,-,-,-
LEval_legal_contract_qa,-,-,-,-
LEval_meeting_summ,-,-,-,-
LEval_multidocqa,-,-,-,-
LEval_narrativeqa,-,-,-,-
LEval_nq,-,-,-,-
LEval_news_summ,-,-,-,-
LEval_paper_assistant,-,-,-,-
LEval_patent_summ,-,-,-,-
LEval_review_summ,-,-,-,-
LEval_scientificqa,-,-,-,-
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
longbench_lsht,-,-,-,-
longbench_vcsum,-,-,-,-
longbench_dureader,-,-,-,-
longbench_multifieldqa_zh,-,-,-,-
longbench_passage_retrieval_zh,-,-,-,-
--------- 单选 自定义数据 ---------,-,-,-,-
SageBench-exam,-,-,-,-
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
raw format
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-------------------------------
Model: public/sense-voice-small@main
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$