2.0 KiB
2.0 KiB
| 1 | dataset | version | metric | mode | public/qwen2-5-vl-7b-instruct-awq@main |
|---|---|---|---|---|---|
| 2 | --------- 考试 Exam --------- | - | - | - | - |
| 3 | ceval | - | - | - | - |
| 4 | agieval | - | - | - | - |
| 5 | mmlu | - | - | - | - |
| 6 | GaokaoBench | - | - | - | - |
| 7 | ARC-c | - | - | - | - |
| 8 | --------- 语言 Language --------- | - | - | - | - |
| 9 | WiC | - | - | - | - |
| 10 | summedits | - | - | - | - |
| 11 | chid-dev | - | - | - | - |
| 12 | afqmc-dev | - | - | - | - |
| 13 | bustm-dev | - | - | - | - |
| 14 | cluewsc-dev | - | - | - | - |
| 15 | WSC | - | - | - | - |
| 16 | winogrande | - | - | - | - |
| 17 | flores_100 | - | - | - | - |
| 18 | --------- 知识 Knowledge --------- | - | - | - | - |
| 19 | BoolQ | - | - | - | - |
| 20 | commonsense_qa | - | - | - | - |
| 21 | nq | - | - | - | - |
| 22 | triviaqa | - | - | - | - |
| 23 | --------- 推理 Reasoning --------- | - | - | - | - |
| 24 | cmnli | - | - | - | - |
| 25 | ocnli | - | - | - | - |
| 26 | ocnli_fc-dev | - | - | - | - |
| 27 | AX_b | - | - | - | - |
| 28 | AX_g | - | - | - | - |
| 29 | CB | - | - | - | - |
| 30 | RTE | - | - | - | - |
| 31 | story_cloze | - | - | - | - |
| 32 | COPA | - | - | - | - |
| 33 | ReCoRD | - | - | - | - |
| 34 | hellaswag | - | - | - | - |
| 35 | piqa | - | - | - | - |
| 36 | siqa | - | - | - | - |
| 37 | strategyqa | - | - | - | - |
| 38 | math | - | - | - | - |
| 39 | gsm8k | - | - | - | - |
| 40 | TheoremQA | - | - | - | - |
| 41 | openai_humaneval | - | - | - | - |
| 42 | mbpp | - | - | - | - |
| 43 | cmmlu | - | - | - | - |
| 44 | bbh | - | - | - | - |
| 45 | --------- 理解 Understanding --------- | - | - | - | - |
| 46 | C3 | - | - | - | - |
| 47 | CMRC_dev | - | - | - | - |
| 48 | DRCD_dev | - | - | - | - |
| 49 | MultiRC | - | - | - | - |
| 50 | race-middle | - | - | - | - |
| 51 | race-high | - | - | - | - |
| 52 | openbookqa_fact | - | - | - | - |
| 53 | csl_dev | - | - | - | - |
| 54 | lcsts | - | - | - | - |
| 55 | Xsum | - | - | - | - |
| 56 | eprstmt-dev | - | - | - | - |
| 57 | lambada | - | - | - | - |
| 58 | tnews-dev | - | - | - | - |
| 59 | --------- 安全 Safety --------- | - | - | - | - |
| 60 | crows_pairs | - | - | - | - |
| 61 | --------- LEval Exact Match (Acc) --------- | - | - | - | - |
| 62 | LEval_coursera | - | - | - | - |
| 63 | LEval_gsm100 | - | - | - | - |
| 64 | LEval_quality | - | - | - | - |
| 65 | LEval_tpo | - | - | - | - |
| 66 | LEval_topic_retrieval | - | - | - | - |
| 67 | --------- LEval Gen (ROUGE) --------- | - | - | - | - |
| 68 | LEval_financialqa | - | - | - | - |
| 69 | LEval_gov_report_summ | - | - | - | - |
| 70 | LEval_legal_contract_qa | - | - | - | - |
| 71 | LEval_meeting_summ | - | - | - | - |
| 72 | LEval_multidocqa | - | - | - | - |
| 73 | LEval_narrativeqa | - | - | - | - |
| 74 | LEval_nq | - | - | - | - |
| 75 | LEval_news_summ | - | - | - | - |
| 76 | LEval_paper_assistant | - | - | - | - |
| 77 | LEval_patent_summ | - | - | - | - |
| 78 | LEval_review_summ | - | - | - | - |
| 79 | LEval_scientificqa | - | - | - | - |
| 80 | LEval_tvshow_summ--------- 长文本 LongBench --------- | - | - | - | - |
| 81 | longbench_lsht | - | - | - | - |
| 82 | longbench_vcsum | - | - | - | - |
| 83 | longbench_dureader | - | - | - | - |
| 84 | longbench_multifieldqa_zh | - | - | - | - |
| 85 | longbench_passage_retrieval_zh | - | - | - | - |
| 86 | --------- 单选 自定义数据 --------- | - | - | - | - |
| 87 | SageBench-exam | - | - | - | - |