From 7d5e1d12e5c36b66900560a7daa3faca4623f47e Mon Sep 17 00:00:00 2001
From: 4pdadmin <>
Date: Mon, 27 Oct 2025 08:22:57 +0000
Subject: [PATCH] commit file to repo

---
 .gitattributes                                |    1 +
 .gitignore                                    |    0
 configs/20251027_162236.py                    | 1292 +++++++++++++++++
 .../public/jina-reranker-m0@main/lambada.out  |    9 +
 .../jina-reranker-m0@main/lambada_0.out       |   35 +
 .../jina-reranker-m0@main/lambada_1.out       |   35 +
 .../jina-reranker-m0@main/lambada_2.out       |   35 +
 summary/summary_20251027_162236.csv           |   87 ++
 summary/summary_20251027_162236.txt           |  193 +++
 9 files changed, 1687 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .gitignore
 create mode 100644 configs/20251027_162236.py
 create mode 100644 logs/eval/public/jina-reranker-m0@main/lambada.out
 create mode 100644 logs/infer/public/jina-reranker-m0@main/lambada_0.out
 create mode 100644 logs/infer/public/jina-reranker-m0@main/lambada_1.out
 create mode 100644 logs/infer/public/jina-reranker-m0@main/lambada_2.out
 create mode 100644 summary/summary_20251027_162236.csv
 create mode 100644 summary/summary_20251027_162236.txt

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..7fe70d7
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/configs/20251027_162236.py b/configs/20251027_162236.py
new file mode 100644
index 0000000..fbaa457
--- /dev/null
+++ b/configs/20251027_162236.py
@@ -0,0 +1,1292 @@
+datasets=[
+    dict(abbr='lambada',
+        eval_cfg=dict(
+            evaluator=dict(
+                type='opencompass.datasets.LambadaEvaluator')),
+        infer_cfg=dict(
+            inferencer=dict(
+                max_out_len=5,
+                type='opencompass.openicl.icl_inferencer.GenInferencer'),
+            prompt_template=dict(
+                template=dict(
+                    round=[
+                        dict(prompt='Please complete the following sentence:\n{prompt}',
+                            role='HUMAN'),
+                        ]),
+                type='opencompass.openicl.icl_prompt_template.PromptTemplate'),
+            retriever=dict(
+                type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+        path='./data/lambada/test/data-00000-of-00001.arrow',
+        reader_cfg=dict(
+            input_columns=[
+                'prompt',
+                ],
+            output_column='label',
+            test_split='test',
+            train_split='test'),
+        type='opencompass.datasets.lambadaDataset'),
+    ]
+models=[
+    dict(abbr='{{$MODEL_ID:public/jina-reranker-m0@main}}',
+        batch_size=1,
+        key='fee1ce7f2b0843368012dfa938b261db',
+        max_out_len=100,
+        max_seq_len=2048,
+        openai_api_base='{{$MODEL_URL:http://modelhu-b0f7ds-nginx/learnware/models/openai/4pd/api/v1/chat/completions}}',
+        path='{{$MODEL_ID:public/jina-reranker-m0@main}}',
+        temperature=0.95,
+        type='opencompass.models.OpenAI'),
+    ]
+summarizer=dict(
+    dataset_abbrs=[
+        '--------- 考试 Exam ---------',
+        'ceval',
+        'agieval',
+        'mmlu',
+        'GaokaoBench',
+        'ARC-c',
+        '--------- 语言 Language ---------',
+        'WiC',
+        'summedits',
+        'chid-dev',
+        'afqmc-dev',
+        'bustm-dev',
+        'cluewsc-dev',
+        'WSC',
+        'winogrande',
+        'flores_100',
+        '--------- 知识 Knowledge ---------',
+        'BoolQ',
+        'commonsense_qa',
+        'nq',
+        'triviaqa',
+        '--------- 推理 Reasoning ---------',
+        'cmnli',
+        'ocnli',
+        'ocnli_fc-dev',
+        'AX_b',
+        'AX_g',
+        'CB',
+        'RTE',
+        'story_cloze',
+        'COPA',
+        'ReCoRD',
+        'hellaswag',
+        'piqa',
+        'siqa',
+        'strategyqa',
+        'math',
+        'gsm8k',
+        'TheoremQA',
+        'openai_humaneval',
+        'mbpp',
+        'cmmlu',
+        'bbh',
+        '--------- 理解 Understanding ---------',
+        'C3',
+        'CMRC_dev',
+        'DRCD_dev',
+        'MultiRC',
+        'race-middle',
+        'race-high',
+        'openbookqa_fact',
+        'csl_dev',
+        'lcsts',
+        'Xsum',
+        'eprstmt-dev',
+        'lambada',
+        'tnews-dev',
+        '--------- 安全 Safety ---------',
+        'crows_pairs',
+        '--------- LEval Exact Match (Acc) ---------',
+        'LEval_coursera',
+        'LEval_gsm100',
+        'LEval_quality',
+        'LEval_tpo',
+        'LEval_topic_retrieval',
+        '--------- LEval Gen (ROUGE) ---------',
+        'LEval_financialqa',
+        'LEval_gov_report_summ',
+        'LEval_legal_contract_qa',
+        'LEval_meeting_summ',
+        'LEval_multidocqa',
+        'LEval_narrativeqa',
+        'LEval_nq',
+        'LEval_news_summ',
+        'LEval_paper_assistant',
+        'LEval_patent_summ',
+        'LEval_review_summ',
+        'LEval_scientificqa',
+        'LEval_tvshow_summ--------- 长文本 LongBench ---------',
+        'longbench_lsht',
+        'longbench_vcsum',
+        'longbench_dureader',
+        'longbench_multifieldqa_zh',
+        'longbench_passage_retrieval_zh',
+        '--------- 单选 自定义数据 ---------',
+        'SageBench-exam',
+        ],
+    prompt_db=dict(
+        blacklist='.promptignore',
+        config_dir='configs/datasets',
+        database_path='configs/datasets/log.json'),
+    summary_groups=[
+        dict(name='agieval-chinese',
+            subsets=[
+                'agieval-gaokao-chinese',
+                'agieval-gaokao-english',
+                'agieval-gaokao-geography',
+                'agieval-gaokao-history',
+                'agieval-gaokao-biology',
+                'agieval-gaokao-chemistry',
+                'agieval-gaokao-physics',
+                'agieval-gaokao-mathqa',
+                'agieval-logiqa-zh',
+                'agieval-jec-qa-kd',
+                'agieval-jec-qa-ca',
+                'agieval-gaokao-mathcloze',
+                ]),
+        dict(name='agieval-english',
+            subsets=[
+                'agieval-lsat-ar',
+                'agieval-lsat-lr',
+                'agieval-lsat-rc',
+                'agieval-logiqa-en',
+                'agieval-sat-math',
+                'agieval-sat-en',
+                'agieval-sat-en-without-passage',
+                'agieval-aqua-rat',
+                'agieval-math',
+                ]),
+        dict(name='agieval-gaokao',
+            subsets=[
+                'agieval-gaokao-chinese',
+                'agieval-gaokao-english',
+                'agieval-gaokao-geography',
+                'agieval-gaokao-history',
+                'agieval-gaokao-biology',
+                'agieval-gaokao-chemistry',
+                'agieval-gaokao-physics',
+                'agieval-gaokao-mathqa',
+                'agieval-gaokao-mathcloze',
+                ]),
+        dict(name='agieval',
+            subsets=[
+                'agieval-gaokao-chinese',
+                'agieval-gaokao-english',
+                'agieval-gaokao-geography',
+                'agieval-gaokao-history',
+                'agieval-gaokao-biology',
+                'agieval-gaokao-chemistry',
+                'agieval-gaokao-physics',
+                'agieval-gaokao-mathqa',
+                'agieval-logiqa-zh',
+                'agieval-lsat-ar',
+                'agieval-lsat-lr',
+                'agieval-lsat-rc',
+                'agieval-logiqa-en',
+                'agieval-sat-math',
+                'agieval-sat-en',
+                'agieval-sat-en-without-passage',
+                'agieval-aqua-rat',
+                'agieval-jec-qa-kd',
+                'agieval-jec-qa-ca',
+                'agieval-gaokao-mathcloze',
+                'agieval-math',
+                ]),
+        dict(name='mmlu-humanities',
+            subsets=[
+                'lukaemon_mmlu_formal_logic',
+                'lukaemon_mmlu_high_school_european_history',
+                'lukaemon_mmlu_high_school_us_history',
+                'lukaemon_mmlu_high_school_world_history',
+                'lukaemon_mmlu_international_law',
+                'lukaemon_mmlu_jurisprudence',
+                'lukaemon_mmlu_logical_fallacies',
+                'lukaemon_mmlu_moral_disputes',
+                'lukaemon_mmlu_moral_scenarios',
+                'lukaemon_mmlu_philosophy',
+                'lukaemon_mmlu_prehistory',
+                'lukaemon_mmlu_professional_law',
+                'lukaemon_mmlu_world_religions',
+                ]),
+        dict(name='mmlu-stem',
+            subsets=[
+                'lukaemon_mmlu_abstract_algebra',
+                'lukaemon_mmlu_anatomy',
+                'lukaemon_mmlu_astronomy',
+                'lukaemon_mmlu_college_biology',
+                'lukaemon_mmlu_college_chemistry',
+                'lukaemon_mmlu_college_computer_science',
+                'lukaemon_mmlu_college_mathematics',
+                'lukaemon_mmlu_college_physics',
+                'lukaemon_mmlu_computer_security',
+                'lukaemon_mmlu_conceptual_physics',
+                'lukaemon_mmlu_electrical_engineering',
+                'lukaemon_mmlu_elementary_mathematics',
+                'lukaemon_mmlu_high_school_biology',
+                'lukaemon_mmlu_high_school_chemistry',
+                'lukaemon_mmlu_high_school_computer_science',
+                'lukaemon_mmlu_high_school_mathematics',
+                'lukaemon_mmlu_high_school_physics',
+                'lukaemon_mmlu_high_school_statistics',
+                'lukaemon_mmlu_machine_learning',
+                ]),
+        dict(name='mmlu-social-science',
+            subsets=[
+                'lukaemon_mmlu_econometrics',
+                'lukaemon_mmlu_high_school_geography',
+                'lukaemon_mmlu_high_school_government_and_politics',
+                'lukaemon_mmlu_high_school_macroeconomics',
+                'lukaemon_mmlu_high_school_microeconomics',
+                'lukaemon_mmlu_high_school_psychology',
+                'lukaemon_mmlu_human_sexuality',
+                'lukaemon_mmlu_professional_psychology',
+                'lukaemon_mmlu_public_relations',
+                'lukaemon_mmlu_security_studies',
+                'lukaemon_mmlu_sociology',
+                'lukaemon_mmlu_us_foreign_policy',
+                ]),
+        dict(name='mmlu-other',
+            subsets=[
+                'lukaemon_mmlu_business_ethics',
+                'lukaemon_mmlu_clinical_knowledge',
+                'lukaemon_mmlu_college_medicine',
+                'lukaemon_mmlu_global_facts',
+                'lukaemon_mmlu_human_aging',
+                'lukaemon_mmlu_management',
+                'lukaemon_mmlu_marketing',
+                'lukaemon_mmlu_medical_genetics',
+                'lukaemon_mmlu_miscellaneous',
+                'lukaemon_mmlu_nutrition',
+                'lukaemon_mmlu_professional_accounting',
+                'lukaemon_mmlu_professional_medicine',
+                'lukaemon_mmlu_virology',
+                ]),
+        dict(name='mmlu',
+            subsets=[
+                'lukaemon_mmlu_formal_logic',
+                'lukaemon_mmlu_high_school_european_history',
+                'lukaemon_mmlu_high_school_us_history',
+                'lukaemon_mmlu_high_school_world_history',
+                'lukaemon_mmlu_international_law',
+                'lukaemon_mmlu_jurisprudence',
+                'lukaemon_mmlu_logical_fallacies',
+                'lukaemon_mmlu_moral_disputes',
+                'lukaemon_mmlu_moral_scenarios',
+                'lukaemon_mmlu_philosophy',
+                'lukaemon_mmlu_prehistory',
+                'lukaemon_mmlu_professional_law',
+                'lukaemon_mmlu_world_religions',
+                'lukaemon_mmlu_abstract_algebra',
+                'lukaemon_mmlu_anatomy',
+                'lukaemon_mmlu_astronomy',
+                'lukaemon_mmlu_college_biology',
+                'lukaemon_mmlu_college_chemistry',
+                'lukaemon_mmlu_college_computer_science',
+                'lukaemon_mmlu_college_mathematics',
+                'lukaemon_mmlu_college_physics',
+                'lukaemon_mmlu_computer_security',
+                'lukaemon_mmlu_conceptual_physics',
+                'lukaemon_mmlu_electrical_engineering',
+                'lukaemon_mmlu_elementary_mathematics',
+                'lukaemon_mmlu_high_school_biology',
+                'lukaemon_mmlu_high_school_chemistry',
+                'lukaemon_mmlu_high_school_computer_science',
+                'lukaemon_mmlu_high_school_mathematics',
+                'lukaemon_mmlu_high_school_physics',
+                'lukaemon_mmlu_high_school_statistics',
+                'lukaemon_mmlu_machine_learning',
+                'lukaemon_mmlu_econometrics',
+                'lukaemon_mmlu_high_school_geography',
+                'lukaemon_mmlu_high_school_government_and_politics',
+                'lukaemon_mmlu_high_school_macroeconomics',
+                'lukaemon_mmlu_high_school_microeconomics',
+                'lukaemon_mmlu_high_school_psychology',
+                'lukaemon_mmlu_human_sexuality',
+                'lukaemon_mmlu_professional_psychology',
+                'lukaemon_mmlu_public_relations',
+                'lukaemon_mmlu_security_studies',
+                'lukaemon_mmlu_sociology',
+                'lukaemon_mmlu_us_foreign_policy',
+                'lukaemon_mmlu_business_ethics',
+                'lukaemon_mmlu_clinical_knowledge',
+                'lukaemon_mmlu_college_medicine',
+                'lukaemon_mmlu_global_facts',
+                'lukaemon_mmlu_human_aging',
+                'lukaemon_mmlu_management',
+                'lukaemon_mmlu_marketing',
+                'lukaemon_mmlu_medical_genetics',
+                'lukaemon_mmlu_miscellaneous',
+                'lukaemon_mmlu_nutrition',
+                'lukaemon_mmlu_professional_accounting',
+                'lukaemon_mmlu_professional_medicine',
+                'lukaemon_mmlu_virology',
+                ]),
+        dict(name='mmlu-weighted',
+            subsets=[
+                'lukaemon_mmlu_formal_logic',
+                'lukaemon_mmlu_high_school_european_history',
+                'lukaemon_mmlu_high_school_us_history',
+                'lukaemon_mmlu_high_school_world_history',
+                'lukaemon_mmlu_international_law',
+                'lukaemon_mmlu_jurisprudence',
+                'lukaemon_mmlu_logical_fallacies',
+                'lukaemon_mmlu_moral_disputes',
+                'lukaemon_mmlu_moral_scenarios',
+                'lukaemon_mmlu_philosophy',
+                'lukaemon_mmlu_prehistory',
+                'lukaemon_mmlu_professional_law',
+                'lukaemon_mmlu_world_religions',
+                'lukaemon_mmlu_abstract_algebra',
+                'lukaemon_mmlu_anatomy',
+                'lukaemon_mmlu_astronomy',
+                'lukaemon_mmlu_college_biology',
+                'lukaemon_mmlu_college_chemistry',
+                'lukaemon_mmlu_college_computer_science',
+                'lukaemon_mmlu_college_mathematics',
+                'lukaemon_mmlu_college_physics',
+                'lukaemon_mmlu_computer_security',
+                'lukaemon_mmlu_conceptual_physics',
+                'lukaemon_mmlu_electrical_engineering',
+                'lukaemon_mmlu_elementary_mathematics',
+                'lukaemon_mmlu_high_school_biology',
+                'lukaemon_mmlu_high_school_chemistry',
+                'lukaemon_mmlu_high_school_computer_science',
+                'lukaemon_mmlu_high_school_mathematics',
+                'lukaemon_mmlu_high_school_physics',
+                'lukaemon_mmlu_high_school_statistics',
+                'lukaemon_mmlu_machine_learning',
+                'lukaemon_mmlu_econometrics',
+                'lukaemon_mmlu_high_school_geography',
+                'lukaemon_mmlu_high_school_government_and_politics',
+                'lukaemon_mmlu_high_school_macroeconomics',
+                'lukaemon_mmlu_high_school_microeconomics',
+                'lukaemon_mmlu_high_school_psychology',
+                'lukaemon_mmlu_human_sexuality',
+                'lukaemon_mmlu_professional_psychology',
+                'lukaemon_mmlu_public_relations',
+                'lukaemon_mmlu_security_studies',
+                'lukaemon_mmlu_sociology',
+                'lukaemon_mmlu_us_foreign_policy',
+                'lukaemon_mmlu_business_ethics',
+                'lukaemon_mmlu_clinical_knowledge',
+                'lukaemon_mmlu_college_medicine',
+                'lukaemon_mmlu_global_facts',
+                'lukaemon_mmlu_human_aging',
+                'lukaemon_mmlu_management',
+                'lukaemon_mmlu_marketing',
+                'lukaemon_mmlu_medical_genetics',
+                'lukaemon_mmlu_miscellaneous',
+                'lukaemon_mmlu_nutrition',
+                'lukaemon_mmlu_professional_accounting',
+                'lukaemon_mmlu_professional_medicine',
+                'lukaemon_mmlu_virology',
+                ],
+            weights=dict(
+                lukaemon_mmlu_abstract_algebra=100,
+                lukaemon_mmlu_anatomy=135,
+                lukaemon_mmlu_astronomy=152,
+                lukaemon_mmlu_business_ethics=100,
+                lukaemon_mmlu_clinical_knowledge=265,
+                lukaemon_mmlu_college_biology=144,
+                lukaemon_mmlu_college_chemistry=100,
+                lukaemon_mmlu_college_computer_science=100,
+                lukaemon_mmlu_college_mathematics=100,
+                lukaemon_mmlu_college_medicine=173,
+                lukaemon_mmlu_college_physics=102,
+                lukaemon_mmlu_computer_security=100,
+                lukaemon_mmlu_conceptual_physics=235,
+                lukaemon_mmlu_econometrics=114,
+                lukaemon_mmlu_electrical_engineering=145,
+                lukaemon_mmlu_elementary_mathematics=378,
+                lukaemon_mmlu_formal_logic=126,
+                lukaemon_mmlu_global_facts=100,
+                lukaemon_mmlu_high_school_biology=310,
+                lukaemon_mmlu_high_school_chemistry=203,
+                lukaemon_mmlu_high_school_computer_science=100,
+                lukaemon_mmlu_high_school_european_history=165,
+                lukaemon_mmlu_high_school_geography=198,
+                lukaemon_mmlu_high_school_government_and_politics=193,
+                lukaemon_mmlu_high_school_macroeconomics=390,
+                lukaemon_mmlu_high_school_mathematics=270,
+                lukaemon_mmlu_high_school_microeconomics=238,
+                lukaemon_mmlu_high_school_physics=151,
+                lukaemon_mmlu_high_school_psychology=545,
+                lukaemon_mmlu_high_school_statistics=216,
+                lukaemon_mmlu_high_school_us_history=204,
+                lukaemon_mmlu_high_school_world_history=237,
+                lukaemon_mmlu_human_aging=223,
+                lukaemon_mmlu_human_sexuality=131,
+                lukaemon_mmlu_international_law=121,
+                lukaemon_mmlu_jurisprudence=108,
+                lukaemon_mmlu_logical_fallacies=163,
+                lukaemon_mmlu_machine_learning=112,
+                lukaemon_mmlu_management=103,
+                lukaemon_mmlu_marketing=234,
+                lukaemon_mmlu_medical_genetics=100,
+                lukaemon_mmlu_miscellaneous=783,
+                lukaemon_mmlu_moral_disputes=346,
+                lukaemon_mmlu_moral_scenarios=895,
+                lukaemon_mmlu_nutrition=306,
+                lukaemon_mmlu_philosophy=311,
+                lukaemon_mmlu_prehistory=324,
+                lukaemon_mmlu_professional_accounting=282,
+                lukaemon_mmlu_professional_law=1534,
+                lukaemon_mmlu_professional_medicine=272,
+                lukaemon_mmlu_professional_psychology=612,
+                lukaemon_mmlu_public_relations=110,
+                lukaemon_mmlu_security_studies=245,
+                lukaemon_mmlu_sociology=201,
+                lukaemon_mmlu_us_foreign_policy=100,
+                lukaemon_mmlu_virology=166,
+                lukaemon_mmlu_world_religions=171)),
+        dict(name='cmmlu-humanities',
+            subsets=[
+                'cmmlu-arts',
+                'cmmlu-chinese_history',
+                'cmmlu-chinese_literature',
+                'cmmlu-college_law',
+                'cmmlu-global_facts',
+                'cmmlu-international_law',
+                'cmmlu-jurisprudence',
+                'cmmlu-logical',
+                'cmmlu-marxist_theory',
+                'cmmlu-philosophy',
+                'cmmlu-professional_law',
+                'cmmlu-world_history',
+                'cmmlu-world_religions',
+                ]),
+        dict(name='cmmlu-stem',
+            subsets=[
+                'cmmlu-anatomy',
+                'cmmlu-astronomy',
+                'cmmlu-college_actuarial_science',
+                'cmmlu-college_engineering_hydrology',
+                'cmmlu-college_mathematics',
+                'cmmlu-college_medical_statistics',
+                'cmmlu-computer_science',
+                'cmmlu-conceptual_physics',
+                'cmmlu-electrical_engineering',
+                'cmmlu-elementary_mathematics',
+                'cmmlu-genetics',
+                'cmmlu-high_school_biology',
+                'cmmlu-high_school_chemistry',
+                'cmmlu-high_school_mathematics',
+                'cmmlu-high_school_physics',
+                'cmmlu-machine_learning',
+                'cmmlu-virology',
+                ]),
+        dict(name='cmmlu-social-science',
+            subsets=[
+                'cmmlu-ancient_chinese',
+                'cmmlu-business_ethics',
+                'cmmlu-chinese_civil_service_exam',
+                'cmmlu-chinese_food_culture',
+                'cmmlu-chinese_foreign_policy',
+                'cmmlu-chinese_teacher_qualification',
+                'cmmlu-college_education',
+                'cmmlu-economics',
+                'cmmlu-education',
+                'cmmlu-elementary_chinese',
+                'cmmlu-ethnology',
+                'cmmlu-high_school_geography',
+                'cmmlu-high_school_politics',
+                'cmmlu-journalism',
+                'cmmlu-management',
+                'cmmlu-marketing',
+                'cmmlu-modern_chinese',
+                'cmmlu-professional_accounting',
+                'cmmlu-professional_psychology',
+                'cmmlu-public_relations',
+                'cmmlu-security_study',
+                'cmmlu-sociology',
+                ]),
+        dict(name='cmmlu-other',
+            subsets=[
+                'cmmlu-agronomy',
+                'cmmlu-chinese_driving_rule',
+                'cmmlu-clinical_knowledge',
+                'cmmlu-college_medicine',
+                'cmmlu-computer_security',
+                'cmmlu-construction_project_management',
+                'cmmlu-elementary_commonsense',
+                'cmmlu-elementary_information_and_technology',
+                'cmmlu-food_science',
+                'cmmlu-human_sexuality',
+                'cmmlu-legal_and_moral_basis',
+                'cmmlu-nutrition',
+                'cmmlu-professional_medicine',
+                'cmmlu-sports_science',
+                'cmmlu-traditional_chinese_medicine',
+                ]),
+        dict(name='cmmlu-china-specific',
+            subsets=[
+                'cmmlu-ancient_chinese',
+                'cmmlu-chinese_civil_service_exam',
+                'cmmlu-chinese_driving_rule',
+                'cmmlu-chinese_food_culture',
+                'cmmlu-chinese_foreign_policy',
+                'cmmlu-chinese_history',
+                'cmmlu-chinese_literature',
+                'cmmlu-chinese_teacher_qualification',
+                'cmmlu-construction_project_management',
+                'cmmlu-elementary_chinese',
+                'cmmlu-elementary_commonsense',
+                'cmmlu-ethnology',
+                'cmmlu-high_school_politics',
+                'cmmlu-modern_chinese',
+                'cmmlu-traditional_chinese_medicine',
+                ]),
+        dict(name='cmmlu',
+            subsets=[
+                'cmmlu-agronomy',
+                'cmmlu-anatomy',
+                'cmmlu-ancient_chinese',
+                'cmmlu-arts',
+                'cmmlu-astronomy',
+                'cmmlu-business_ethics',
+                'cmmlu-chinese_civil_service_exam',
+                'cmmlu-chinese_driving_rule',
+                'cmmlu-chinese_food_culture',
+                'cmmlu-chinese_foreign_policy',
+                'cmmlu-chinese_history',
+                'cmmlu-chinese_literature',
+                'cmmlu-chinese_teacher_qualification',
+                'cmmlu-college_actuarial_science',
+                'cmmlu-college_education',
+                'cmmlu-college_engineering_hydrology',
+                'cmmlu-college_law',
+                'cmmlu-college_mathematics',
+                'cmmlu-college_medical_statistics',
+                'cmmlu-clinical_knowledge',
+                'cmmlu-college_medicine',
+                'cmmlu-computer_science',
+                'cmmlu-computer_security',
+                'cmmlu-conceptual_physics',
+                'cmmlu-construction_project_management',
+                'cmmlu-economics',
+                'cmmlu-education',
+                'cmmlu-elementary_chinese',
+                'cmmlu-elementary_commonsense',
+                'cmmlu-elementary_information_and_technology',
+                'cmmlu-electrical_engineering',
+                'cmmlu-elementary_mathematics',
+                'cmmlu-ethnology',
+                'cmmlu-food_science',
+                'cmmlu-genetics',
+                'cmmlu-global_facts',
+                'cmmlu-high_school_biology',
+                'cmmlu-high_school_chemistry',
+                'cmmlu-high_school_geography',
+                'cmmlu-high_school_mathematics',
+                'cmmlu-high_school_physics',
+                'cmmlu-high_school_politics',
+                'cmmlu-human_sexuality',
+                'cmmlu-international_law',
+                'cmmlu-journalism',
+                'cmmlu-jurisprudence',
+                'cmmlu-legal_and_moral_basis',
+                'cmmlu-logical',
+                'cmmlu-machine_learning',
+                'cmmlu-management',
+                'cmmlu-marketing',
+                'cmmlu-marxist_theory',
+                'cmmlu-modern_chinese',
+                'cmmlu-nutrition',
+                'cmmlu-philosophy',
+                'cmmlu-professional_accounting',
+                'cmmlu-professional_law',
+                'cmmlu-professional_medicine',
+                'cmmlu-professional_psychology',
+                'cmmlu-public_relations',
+                'cmmlu-security_study',
+                'cmmlu-sociology',
+                'cmmlu-sports_science',
+                'cmmlu-traditional_chinese_medicine',
+                'cmmlu-virology',
+                'cmmlu-world_history',
+                'cmmlu-world_religions',
+                ]),
+        dict(name='ceval-stem',
+            subsets=[
+                'ceval-computer_network',
+                'ceval-operating_system',
+                'ceval-computer_architecture',
+                'ceval-college_programming',
+                'ceval-college_physics',
+                'ceval-college_chemistry',
+                'ceval-advanced_mathematics',
+                'ceval-probability_and_statistics',
+                'ceval-discrete_mathematics',
+                'ceval-electrical_engineer',
+                'ceval-metrology_engineer',
+                'ceval-high_school_mathematics',
+                'ceval-high_school_physics',
+                'ceval-high_school_chemistry',
+                'ceval-high_school_biology',
+                'ceval-middle_school_mathematics',
+                'ceval-middle_school_biology',
+                'ceval-middle_school_physics',
+                'ceval-middle_school_chemistry',
+                'ceval-veterinary_medicine',
+                ]),
+        dict(name='ceval-social-science',
+            subsets=[
+                'ceval-college_economics',
+                'ceval-business_administration',
+                'ceval-marxism',
+                'ceval-mao_zedong_thought',
+                'ceval-education_science',
+                'ceval-teacher_qualification',
+                'ceval-high_school_politics',
+                'ceval-high_school_geography',
+                'ceval-middle_school_politics',
+                'ceval-middle_school_geography',
+                ]),
+        dict(name='ceval-humanities',
+            subsets=[
+                'ceval-modern_chinese_history',
+                'ceval-ideological_and_moral_cultivation',
+                'ceval-logic',
+                'ceval-law',
+                'ceval-chinese_language_and_literature',
+                'ceval-art_studies',
+                'ceval-professional_tour_guide',
+                'ceval-legal_professional',
+                'ceval-high_school_chinese',
+                'ceval-high_school_history',
+                'ceval-middle_school_history',
+                ]),
+        dict(name='ceval-other',
+            subsets=[
+                'ceval-civil_servant',
+                'ceval-sports_science',
+                'ceval-plant_protection',
+                'ceval-basic_medicine',
+                'ceval-clinical_medicine',
+                'ceval-urban_and_rural_planner',
+                'ceval-accountant',
+                'ceval-fire_engineer',
+                'ceval-environmental_impact_assessment_engineer',
+                'ceval-tax_accountant',
+                'ceval-physician',
+                ]),
+        dict(name='ceval-hard',
+            subsets=[
+                'ceval-advanced_mathematics',
+                'ceval-discrete_mathematics',
+                'ceval-probability_and_statistics',
+                'ceval-college_chemistry',
+                'ceval-college_physics',
+                'ceval-high_school_mathematics',
+                'ceval-high_school_chemistry',
+                'ceval-high_school_physics',
+                ]),
+        dict(name='ceval',
+            subsets=[
+                'ceval-computer_network',
+                'ceval-operating_system',
+                'ceval-computer_architecture',
+                'ceval-college_programming',
+                'ceval-college_physics',
+                'ceval-college_chemistry',
+                'ceval-advanced_mathematics',
+                'ceval-probability_and_statistics',
+                'ceval-discrete_mathematics',
+                'ceval-electrical_engineer',
+                'ceval-metrology_engineer',
+                'ceval-high_school_mathematics',
+                'ceval-high_school_physics',
+                'ceval-high_school_chemistry',
+                'ceval-high_school_biology',
+                'ceval-middle_school_mathematics',
+                'ceval-middle_school_biology',
+                'ceval-middle_school_physics',
+                'ceval-middle_school_chemistry',
+                'ceval-veterinary_medicine',
+                'ceval-college_economics',
+                'ceval-business_administration',
+                'ceval-marxism',
+                'ceval-mao_zedong_thought',
+                'ceval-education_science',
+                'ceval-teacher_qualification',
+                'ceval-high_school_politics',
+                'ceval-high_school_geography',
+                'ceval-middle_school_politics',
+                'ceval-middle_school_geography',
+                'ceval-modern_chinese_history',
+                'ceval-ideological_and_moral_cultivation',
+                'ceval-logic',
+                'ceval-law',
+                'ceval-chinese_language_and_literature',
+                'ceval-art_studies',
+                'ceval-professional_tour_guide',
+                'ceval-legal_professional',
+                'ceval-high_school_chinese',
+                'ceval-high_school_history',
+                'ceval-middle_school_history',
+                'ceval-civil_servant',
+                'ceval-sports_science',
+                'ceval-plant_protection',
+                'ceval-basic_medicine',
+                'ceval-clinical_medicine',
+                'ceval-urban_and_rural_planner',
+                'ceval-accountant',
+                'ceval-fire_engineer',
+                'ceval-environmental_impact_assessment_engineer',
+                'ceval-tax_accountant',
+                'ceval-physician',
+                ]),
+        dict(name='bbh',
+            subsets=[
+                'bbh-temporal_sequences',
+                'bbh-disambiguation_qa',
+                'bbh-date_understanding',
+                'bbh-tracking_shuffled_objects_three_objects',
+                'bbh-penguins_in_a_table',
+                'bbh-geometric_shapes',
+                'bbh-snarks',
+                'bbh-ruin_names',
+                'bbh-tracking_shuffled_objects_seven_objects',
+                'bbh-tracking_shuffled_objects_five_objects',
+                'bbh-logical_deduction_three_objects',
+                'bbh-hyperbaton',
+                'bbh-logical_deduction_five_objects',
+                'bbh-logical_deduction_seven_objects',
+                'bbh-movie_recommendation',
+                'bbh-salient_translation_error_detection',
+                'bbh-reasoning_about_colored_objects',
+                'bbh-multistep_arithmetic_two',
+                'bbh-navigate',
+                'bbh-dyck_languages',
+                'bbh-word_sorting',
+                'bbh-sports_understanding',
+                'bbh-boolean_expressions',
+                'bbh-object_counting',
+                'bbh-formal_fallacies',
+                'bbh-causal_judgement',
+                'bbh-web_of_lies',
+                ]),
+        dict(name='GaokaoBench',
+            subsets=[
+                'GaokaoBench_2010-2022_Math_II_MCQs',
+                'GaokaoBench_2010-2022_Math_I_MCQs',
+                'GaokaoBench_2010-2022_History_MCQs',
+                'GaokaoBench_2010-2022_Biology_MCQs',
+                'GaokaoBench_2010-2022_Political_Science_MCQs',
+                'GaokaoBench_2010-2022_Physics_MCQs',
+                'GaokaoBench_2010-2022_Chemistry_MCQs',
+                'GaokaoBench_2010-2013_English_MCQs',
+                'GaokaoBench_2010-2022_Chinese_Modern_Lit',
+                'GaokaoBench_2010-2022_English_Fill_in_Blanks',
+                'GaokaoBench_2012-2022_English_Cloze_Test',
+                'GaokaoBench_2010-2022_Geography_MCQs',
+                'GaokaoBench_2010-2022_English_Reading_Comp',
+                'GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs',
+                ],
+            weights=dict(
+                {'GaokaoBench_2010-2013_English_MCQs': 105,
+                'GaokaoBench_2010-2022_Biology_MCQs': 900,
+                'GaokaoBench_2010-2022_Chemistry_MCQs': 744,
+                'GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs': 240,
+                'GaokaoBench_2010-2022_Chinese_Modern_Lit': 261,
+                'GaokaoBench_2010-2022_English_Fill_in_Blanks': 900.0,
+                'GaokaoBench_2010-2022_English_Reading_Comp': 940,
+                'GaokaoBench_2010-2022_Geography_MCQs': 380,
+                'GaokaoBench_2010-2022_History_MCQs': 1148,
+                'GaokaoBench_2010-2022_Math_II_MCQs': 1090,
+                'GaokaoBench_2010-2022_Math_I_MCQs': 1070,
+                'GaokaoBench_2010-2022_Physics_MCQs': 384,
+                'GaokaoBench_2010-2022_Political_Science_MCQs': 1280,
+                'GaokaoBench_2012-2022_English_Cloze_Test': 260})),
+        dict(name='flores_100_Indo-European-Germanic_English',
+            subsets=[
+                'flores_100_afr-eng',
+                'flores_100_dan-eng',
+                'flores_100_deu-eng',
+                'flores_100_isl-eng',
+                'flores_100_ltz-eng',
+                'flores_100_nld-eng',
+                'flores_100_nob-eng',
+                'flores_100_swe-eng',
+                ]),
+        dict(name='flores_100_English_Indo-European-Germanic',
+            subsets=[
+                'flores_100_eng-afr',
+                'flores_100_eng-dan',
+                'flores_100_eng-deu',
+                'flores_100_eng-isl',
+                'flores_100_eng-ltz',
+                'flores_100_eng-nld',
+                'flores_100_eng-nob',
+                'flores_100_eng-swe',
+                ]),
+        dict(name='flores_100_Indo-European-Romance_English',
+            subsets=[
+                'flores_100_ast-eng',
+                'flores_100_cat-eng',
+                'flores_100_fra-eng',
+                'flores_100_glg-eng',
+                'flores_100_oci-eng',
+                'flores_100_por-eng',
+                'flores_100_ron-eng',
+                'flores_100_spa-eng',
+                ]),
+        dict(name='flores_100_English_Indo-European-Romance',
+            subsets=[
+                'flores_100_eng-ast',
+                'flores_100_eng-cat',
+                'flores_100_eng-fra',
+                'flores_100_eng-glg',
+                'flores_100_eng-oci',
+                'flores_100_eng-por',
+                'flores_100_eng-ron',
+                'flores_100_eng-spa',
+                ]),
+        dict(name='flores_100_Indo-European-Slavic_English',
+            subsets=[
+                'flores_100_bel-eng',
+                'flores_100_bos-eng',
+                'flores_100_bul-eng',
+                'flores_100_ces-eng',
+                'flores_100_hrv-eng',
+                'flores_100_mkd-eng',
+                'flores_100_pol-eng',
+                'flores_100_rus-eng',
+                'flores_100_slk-eng',
+                'flores_100_slv-eng',
+                'flores_100_srp-eng',
+                'flores_100_ukr-eng',
+                ]),
+        dict(name='flores_100_English_Indo-European-Slavic',
+            subsets=[
+                'flores_100_eng-bel',
+                'flores_100_eng-bos',
+                'flores_100_eng-bul',
+                'flores_100_eng-ces',
+                'flores_100_eng-hrv',
+                'flores_100_eng-mkd',
+                'flores_100_eng-pol',
+                'flores_100_eng-rus',
+                'flores_100_eng-slk',
+                'flores_100_eng-slv',
+                'flores_100_eng-srp',
+                'flores_100_eng-ukr',
+                ]),
+        dict(name='flores_100_Indo-European-Indo-Aryan_English',
+            subsets=[
+                'flores_100_asm-eng',
+                'flores_100_ben-eng',
+                'flores_100_guj-eng',
+                'flores_100_hin-eng',
+                'flores_100_mar-eng',
+                'flores_100_npi-eng',
+                'flores_100_ory-eng',
+                'flores_100_pan-eng',
+                'flores_100_snd-eng',
+                'flores_100_urd-eng',
+                ]),
+        dict(name='flores_100_English_Indo-European-Indo-Aryan',
+            subsets=[
+                'flores_100_eng-asm',
+                'flores_100_eng-ben',
+                'flores_100_eng-guj',
+                'flores_100_eng-hin',
+                'flores_100_eng-mar',
+                'flores_100_eng-npi',
+                'flores_100_eng-ory',
+                'flores_100_eng-pan',
+                'flores_100_eng-snd',
+                'flores_100_eng-urd',
+                ]),
+        dict(name='flores_100_Indo-European-Other_English',
+            subsets=[
+                'flores_100_ckb-eng',
+                'flores_100_cym-eng',
+                'flores_100_ell-eng',
+                'flores_100_fas-eng',
+                'flores_100_gle-eng',
+                'flores_100_hye-eng',
+                'flores_100_ita-eng',
+                'flores_100_lav-eng',
+                'flores_100_lit-eng',
+                'flores_100_pus-eng',
+                'flores_100_tgk-eng',
+                ]),
+        dict(name='flores_100_English_Indo-European-Other',
+            subsets=[
+                'flores_100_eng-ckb',
+                'flores_100_eng-cym',
+                'flores_100_eng-ell',
+                'flores_100_eng-fas',
+                'flores_100_eng-gle',
+                'flores_100_eng-hye',
+                'flores_100_eng-ita',
+                'flores_100_eng-lav',
+                'flores_100_eng-lit',
+                'flores_100_eng-pus',
+                'flores_100_eng-tgk',
+                ]),
+        dict(name='flores_100_Austronesian_English',
+            subsets=[
+                'flores_100_ceb-eng',
+                'flores_100_ind-eng',
+                'flores_100_jav-eng',
+                'flores_100_mri-eng',
+                'flores_100_msa-eng',
+                'flores_100_tgl-eng',
+                ]),
+        dict(name='flores_100_English_Austronesian',
+            subsets=[
+                'flores_100_eng-ceb',
+                'flores_100_eng-ind',
+                'flores_100_eng-jav',
+                'flores_100_eng-mri',
+                'flores_100_eng-msa',
+                'flores_100_eng-tgl',
+                ]),
+        dict(name='flores_100_Atlantic-Congo_English',
+            subsets=[
+                'flores_100_ibo-eng',
+                'flores_100_kam-eng',
+                'flores_100_kea-eng',
+                'flores_100_lin-eng',
+                'flores_100_lug-eng',
+                'flores_100_nso-eng',
+                'flores_100_nya-eng',
+                'flores_100_sna-eng',
+                'flores_100_swh-eng',
+                'flores_100_umb-eng',
+                'flores_100_wol-eng',
+                'flores_100_xho-eng',
+                'flores_100_yor-eng',
+                'flores_100_zul-eng',
+                ]),
+        dict(name='flores_100_English_Atlantic-Congo',
+            subsets=[
+                'flores_100_eng-ibo',
+                'flores_100_eng-kam',
+                'flores_100_eng-kea',
+                'flores_100_eng-lin',
+                'flores_100_eng-lug',
+                'flores_100_eng-nso',
+                'flores_100_eng-nya',
+                'flores_100_eng-sna',
+                'flores_100_eng-swh',
+                'flores_100_eng-umb',
+                'flores_100_eng-wol',
+                'flores_100_eng-xho',
+                'flores_100_eng-yor',
+                'flores_100_eng-zul',
+                ]),
+        dict(name='flores_100_Afro-Asiatic_English',
+            subsets=[
+                'flores_100_amh-eng',
+                'flores_100_ara-eng',
+                'flores_100_ful-eng',
+                'flores_100_mlt-eng',
+                'flores_100_orm-eng',
+                'flores_100_som-eng',
+                ]),
+        dict(name='flores_100_English_Afro-Asiatic',
+            subsets=[
+                'flores_100_eng-amh',
+                'flores_100_eng-ara',
+                'flores_100_eng-ful',
+                'flores_100_eng-mlt',
+                'flores_100_eng-orm',
+                'flores_100_eng-som',
+                ]),
+        dict(name='flores_100_Turkic_English',
+            subsets=[
+                'flores_100_azj-eng',
+                'flores_100_kaz-eng',
+                'flores_100_kir-eng',
+                'flores_100_tur-eng',
+                'flores_100_uzb-eng',
+                ]),
+        dict(name='flores_100_English_Turkic',
+            subsets=[
+                'flores_100_eng-azj',
+                'flores_100_eng-kaz',
+                'flores_100_eng-kir',
+                'flores_100_eng-tur',
+                'flores_100_eng-uzb',
+                ]),
+        dict(name='flores_100_Dravidian_English',
+            subsets=[
+                'flores_100_kan-eng',
+                'flores_100_mal-eng',
+                'flores_100_tam-eng',
+                'flores_100_tel-eng',
+                ]),
+        dict(name='flores_100_English_Dravidian',
+            subsets=[
+                'flores_100_eng-kan',
+                'flores_100_eng-mal',
+                'flores_100_eng-tam',
+                'flores_100_eng-tel',
+                ]),
+        dict(name='flores_100_Sino-Tibetan_English',
+            subsets=[
+                'flores_100_mya-eng',
+                'flores_100_zho_simpl-eng',
+                'flores_100_zho_trad-eng',
+                ]),
+        dict(name='flores_100_English_Sino-Tibetan',
+            subsets=[
+                'flores_100_eng-mya',
+                'flores_100_eng-zho_simpl',
+                'flores_100_eng-zho_trad',
+                ]),
+        dict(name='flores_100_Other_English',
+            subsets=[
+                'flores_100_est-eng',
+                'flores_100_fin-eng',
+                'flores_100_hau-eng',
+                'flores_100_heb-eng',
+                'flores_100_hun-eng',
+                'flores_100_jpn-eng',
+                'flores_100_kat-eng',
+                'flores_100_khm-eng',
+                'flores_100_kor-eng',
+                'flores_100_lao-eng',
+                'flores_100_luo-eng',
+                'flores_100_mon-eng',
+                'flores_100_tha-eng',
+                'flores_100_vie-eng',
+                ]),
+        dict(name='flores_100_English_Other',
+            subsets=[
+                'flores_100_eng-est',
+                'flores_100_eng-fin',
+                'flores_100_eng-hau',
+                'flores_100_eng-heb',
+                'flores_100_eng-hun',
+                'flores_100_eng-jpn',
+                'flores_100_eng-kat',
+                'flores_100_eng-khm',
+                'flores_100_eng-kor',
+                'flores_100_eng-lao',
+                'flores_100_eng-luo',
+                'flores_100_eng-mon',
+                'flores_100_eng-tha',
+                'flores_100_eng-vie',
+                ]),
+        dict(name='flores_100',
+            subsets=[
+                'flores_100_afr-eng',
+                'flores_100_dan-eng',
+                'flores_100_deu-eng',
+                'flores_100_isl-eng',
+                'flores_100_ltz-eng',
+                'flores_100_nld-eng',
+                'flores_100_nob-eng',
+                'flores_100_swe-eng',
+                'flores_100_ast-eng',
+                'flores_100_cat-eng',
+                'flores_100_fra-eng',
+                'flores_100_glg-eng',
+                'flores_100_oci-eng',
+                'flores_100_por-eng',
+                'flores_100_ron-eng',
+                'flores_100_spa-eng',
+                'flores_100_bel-eng',
+                'flores_100_bos-eng',
+                'flores_100_bul-eng',
+                'flores_100_ces-eng',
+                'flores_100_hrv-eng',
+                'flores_100_mkd-eng',
+                'flores_100_pol-eng',
+                'flores_100_rus-eng',
+                'flores_100_slk-eng',
+                'flores_100_slv-eng',
+                'flores_100_srp-eng',
+                'flores_100_ukr-eng',
+                'flores_100_asm-eng',
+                'flores_100_ben-eng',
+                'flores_100_guj-eng',
+                'flores_100_hin-eng',
+                'flores_100_mar-eng',
+                'flores_100_npi-eng',
+                'flores_100_ory-eng',
+                'flores_100_pan-eng',
+                'flores_100_snd-eng',
+                'flores_100_urd-eng',
+                'flores_100_ckb-eng',
+                'flores_100_cym-eng',
+                'flores_100_ell-eng',
+                'flores_100_fas-eng',
+                'flores_100_gle-eng',
+                'flores_100_hye-eng',
+                'flores_100_ita-eng',
+                'flores_100_lav-eng',
+                'flores_100_lit-eng',
+                'flores_100_pus-eng',
+                'flores_100_tgk-eng',
+                'flores_100_ceb-eng',
+                'flores_100_ind-eng',
+                'flores_100_jav-eng',
+                'flores_100_mri-eng',
+                'flores_100_msa-eng',
+                'flores_100_tgl-eng',
+                'flores_100_ibo-eng',
+                'flores_100_kam-eng',
+                'flores_100_kea-eng',
+                'flores_100_lin-eng',
+                'flores_100_lug-eng',
+                'flores_100_nso-eng',
+                'flores_100_nya-eng',
+                'flores_100_sna-eng',
+                'flores_100_swh-eng',
+                'flores_100_umb-eng',
+                'flores_100_wol-eng',
+                'flores_100_xho-eng',
+                'flores_100_yor-eng',
+                'flores_100_zul-eng',
+                'flores_100_amh-eng',
+                'flores_100_ara-eng',
+                'flores_100_ful-eng',
+                'flores_100_mlt-eng',
+                'flores_100_orm-eng',
+                'flores_100_som-eng',
+                'flores_100_azj-eng',
+                'flores_100_kaz-eng',
+                'flores_100_kir-eng',
+                'flores_100_tur-eng',
+                'flores_100_uzb-eng',
+                'flores_100_kan-eng',
+                'flores_100_mal-eng',
+                'flores_100_tam-eng',
+                'flores_100_tel-eng',
+                'flores_100_mya-eng',
+                'flores_100_zho_simpl-eng',
+                'flores_100_zho_trad-eng',
+                'flores_100_est-eng',
+                'flores_100_fin-eng',
+                'flores_100_hau-eng',
+                'flores_100_heb-eng',
+                'flores_100_hun-eng',
+                'flores_100_jpn-eng',
+                'flores_100_kat-eng',
+                'flores_100_khm-eng',
+                'flores_100_kor-eng',
+                'flores_100_lao-eng',
+                'flores_100_luo-eng',
+                'flores_100_mon-eng',
+                'flores_100_tha-eng',
+                'flores_100_vie-eng',
+                'flores_100_eng-afr',
+                'flores_100_eng-dan',
+                'flores_100_eng-deu',
+                'flores_100_eng-isl',
+                'flores_100_eng-ltz',
+                'flores_100_eng-nld',
+                'flores_100_eng-nob',
+                'flores_100_eng-swe',
+                'flores_100_eng-ast',
+                'flores_100_eng-cat',
+                'flores_100_eng-fra',
+                'flores_100_eng-glg',
+                'flores_100_eng-oci',
+                'flores_100_eng-por',
+                'flores_100_eng-ron',
+                'flores_100_eng-spa',
+                'flores_100_eng-bel',
+                'flores_100_eng-bos',
+                'flores_100_eng-bul',
+                'flores_100_eng-ces',
+                'flores_100_eng-hrv',
+                'flores_100_eng-mkd',
+                'flores_100_eng-pol',
+                'flores_100_eng-rus',
+                'flores_100_eng-slk',
+                'flores_100_eng-slv',
+                'flores_100_eng-srp',
+                'flores_100_eng-ukr',
+                'flores_100_eng-asm',
+                'flores_100_eng-ben',
+                'flores_100_eng-guj',
+                'flores_100_eng-hin',
+                'flores_100_eng-mar',
+                'flores_100_eng-npi',
+                'flores_100_eng-ory',
+                'flores_100_eng-pan',
+                'flores_100_eng-snd',
+                'flores_100_eng-urd',
+                'flores_100_eng-ckb',
+                'flores_100_eng-cym',
+                'flores_100_eng-ell',
+                'flores_100_eng-fas',
+                'flores_100_eng-gle',
+                'flores_100_eng-hye',
+                'flores_100_eng-ita',
+                'flores_100_eng-lav',
+                'flores_100_eng-lit',
+                'flores_100_eng-pus',
+                'flores_100_eng-tgk',
+                'flores_100_eng-ceb',
+                'flores_100_eng-ind',
+                'flores_100_eng-jav',
+                'flores_100_eng-mri',
+                'flores_100_eng-msa',
+                'flores_100_eng-tgl',
+                'flores_100_eng-ibo',
+                'flores_100_eng-kam',
+                'flores_100_eng-kea',
+                'flores_100_eng-lin',
+                'flores_100_eng-lug',
+                'flores_100_eng-nso',
+                'flores_100_eng-nya',
+                'flores_100_eng-sna',
+                'flores_100_eng-swh',
+                'flores_100_eng-umb',
+                'flores_100_eng-wol',
+                'flores_100_eng-xho',
+                'flores_100_eng-yor',
+                'flores_100_eng-zul',
+                'flores_100_eng-amh',
+                'flores_100_eng-ara',
+                'flores_100_eng-ful',
+                'flores_100_eng-mlt',
+                'flores_100_eng-orm',
+                'flores_100_eng-som',
+                'flores_100_eng-azj',
+                'flores_100_eng-kaz',
+                'flores_100_eng-kir',
+                'flores_100_eng-tur',
+                'flores_100_eng-uzb',
+                'flores_100_eng-kan',
+                'flores_100_eng-mal',
+                'flores_100_eng-tam',
+                'flores_100_eng-tel',
+                'flores_100_eng-mya',
+                'flores_100_eng-zho_simpl',
+                'flores_100_eng-zho_trad',
+                'flores_100_eng-est',
+                'flores_100_eng-fin',
+                'flores_100_eng-hau',
+                'flores_100_eng-heb',
+                'flores_100_eng-hun',
+                'flores_100_eng-jpn',
+                'flores_100_eng-kat',
+                'flores_100_eng-khm',
+                'flores_100_eng-kor',
+                'flores_100_eng-lao',
+                'flores_100_eng-luo',
+                'flores_100_eng-mon',
+                'flores_100_eng-tha',
+                'flores_100_eng-vie',
+                ]),
+        dict(name='jigsaw_multilingual',
+            subsets=[
+                'jigsaw_multilingual_es',
+                'jigsaw_multilingual_fr',
+                'jigsaw_multilingual_it',
+                'jigsaw_multilingual_pt',
+                'jigsaw_multilingual_ru',
+                'jigsaw_multilingual_tr',
+                ]),
+        ])
+work_dir='outputs/demo/20251027_162236'
\ No newline at end of file
diff --git a/logs/eval/public/jina-reranker-m0@main/lambada.out b/logs/eval/public/jina-reranker-m0@main/lambada.out
new file mode 100644
index 0000000..0b5c4aa
--- /dev/null
+++ b/logs/eval/public/jina-reranker-m0@main/lambada.out
@@ -0,0 +1,9 @@
+[4pdvGPU Msg(853:139765910674432:libvgpu.c:873)]: Initializing.....
+[4pdvGPU Msg(853:139765910674432:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated
+[4pdvGPU Msg(853:139765910674432:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated
+[4pdvGPU ERROR (pid:853 thread=139765910674432 libvgpu.c:924)]: cuInit failed:100
+10/27 16:22:52 - OpenCompass - ERROR - /models/opencompass/opencompass/tasks/openicl_eval.py - _score - 163 - Task [public/jina-reranker-m0@main/lambada]: No predictions found.
+10/27 16:22:52 - OpenCompass - INFO - time elapsed: 2.08s
+/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
+  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
+[4pdvGPU Msg(853:139765910674432:multiprocess_memory_limit.c:543)]: Calling exit handler 853
diff --git a/logs/infer/public/jina-reranker-m0@main/lambada_0.out b/logs/infer/public/jina-reranker-m0@main/lambada_0.out
new file mode 100644
index 0000000..e029f13
--- /dev/null
+++ b/logs/infer/public/jina-reranker-m0@main/lambada_0.out
@@ -0,0 +1,35 @@
+[4pdvGPU Msg(274:139803958045696:libvgpu.c:873)]: Initializing.....
+[4pdvGPU Msg(274:139803958045696:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated
+[4pdvGPU Msg(274:139803958045696:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated
+[4pdvGPU ERROR (pid:274 thread=139803958045696 libvgpu.c:924)]: cuInit failed:100
+10/27 16:22:43 - OpenCompass - INFO - Task [public/jina-reranker-m0@main/lambada_0]
+10/27 16:22:46 - OpenCompass - INFO - Start inferencing [public/jina-reranker-m0@main/lambada_0]
+/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
+  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
+[2025-10-27 16:22:46,260] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
+  0%|          | 0/1718 [00:00<?, ?it/s]  0%|          | 0/1718 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
+    inferencer.run()
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
+    self._inference()
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference
+    inferencer.inference(retriever,
+  File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
+    results = self.model.generate_from_template(
+  File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
+    return self.generate(inputs, max_out_len=max_out_len, **kwargs)
+  File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
+    results = list(
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
+    yield fs.pop().result()
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
+    return self.__get_result()
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
+    raise self._exception
+  File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
+    result = self.fn(*self.args, **self.kwargs)
+  File "/models/opencompass/opencompass/models/openai_api.py", line 235, in _generate
+    return response['choices'][0]['message']['content'].strip()
+TypeError: 'NoneType' object is not subscriptable
+[4pdvGPU Msg(274:139803958045696:multiprocess_memory_limit.c:543)]: Calling exit handler 274
diff --git a/logs/infer/public/jina-reranker-m0@main/lambada_1.out b/logs/infer/public/jina-reranker-m0@main/lambada_1.out
new file mode 100644
index 0000000..91ca252
--- /dev/null
+++ b/logs/infer/public/jina-reranker-m0@main/lambada_1.out
@@ -0,0 +1,35 @@
+[4pdvGPU Msg(277:140097609071616:libvgpu.c:873)]: Initializing.....
+[4pdvGPU Msg(277:140097609071616:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated
+[4pdvGPU Msg(277:140097609071616:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated
+[4pdvGPU ERROR (pid:277 thread=140097609071616 libvgpu.c:924)]: cuInit failed:100
+10/27 16:22:43 - OpenCompass - INFO - Task [public/jina-reranker-m0@main/lambada_1]
+10/27 16:22:46 - OpenCompass - INFO - Start inferencing [public/jina-reranker-m0@main/lambada_1]
+/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
+  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
+[2025-10-27 16:22:46,211] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
+  0%|          | 0/1718 [00:00<?, ?it/s]  0%|          | 0/1718 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
+    inferencer.run()
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
+    self._inference()
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference
+    inferencer.inference(retriever,
+  File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
+    results = self.model.generate_from_template(
+  File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
+    return self.generate(inputs, max_out_len=max_out_len, **kwargs)
+  File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
+    results = list(
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
+    yield fs.pop().result()
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
+    return self.__get_result()
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
+    raise self._exception
+  File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
+    result = self.fn(*self.args, **self.kwargs)
+  File "/models/opencompass/opencompass/models/openai_api.py", line 235, in _generate
+    return response['choices'][0]['message']['content'].strip()
+TypeError: 'NoneType' object is not subscriptable
+[4pdvGPU Msg(277:140097609071616:multiprocess_memory_limit.c:543)]: Calling exit handler 277
diff --git a/logs/infer/public/jina-reranker-m0@main/lambada_2.out b/logs/infer/public/jina-reranker-m0@main/lambada_2.out
new file mode 100644
index 0000000..36214f4
--- /dev/null
+++ b/logs/infer/public/jina-reranker-m0@main/lambada_2.out
@@ -0,0 +1,35 @@
+[4pdvGPU Msg(282:140164578561024:libvgpu.c:873)]: Initializing.....
+[4pdvGPU Msg(282:140164578561024:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated
+[4pdvGPU Msg(282:140164578561024:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated
+[4pdvGPU ERROR (pid:282 thread=140164578561024 libvgpu.c:924)]: cuInit failed:100
+10/27 16:22:43 - OpenCompass - INFO - Task [public/jina-reranker-m0@main/lambada_2]
+10/27 16:22:46 - OpenCompass - INFO - Start inferencing [public/jina-reranker-m0@main/lambada_2]
+/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
+  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
+[2025-10-27 16:22:46,281] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
+  0%|          | 0/1717 [00:00<?, ?it/s]  0%|          | 0/1717 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
+    inferencer.run()
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
+    self._inference()
+  File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference
+    inferencer.inference(retriever,
+  File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
+    results = self.model.generate_from_template(
+  File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
+    return self.generate(inputs, max_out_len=max_out_len, **kwargs)
+  File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
+    results = list(
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
+    yield fs.pop().result()
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
+    return self.__get_result()
+  File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
+    raise self._exception
+  File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
+    result = self.fn(*self.args, **self.kwargs)
+  File "/models/opencompass/opencompass/models/openai_api.py", line 235, in _generate
+    return response['choices'][0]['message']['content'].strip()
+TypeError: 'NoneType' object is not subscriptable
+[4pdvGPU Msg(282:140164578561024:multiprocess_memory_limit.c:543)]: Calling exit handler 282
diff --git a/summary/summary_20251027_162236.csv b/summary/summary_20251027_162236.csv
new file mode 100644
index 0000000..ce03534
--- /dev/null
+++ b/summary/summary_20251027_162236.csv
@@ -0,0 +1,87 @@
+dataset,version,metric,mode,public/jina-reranker-m0@main
+--------- 考试 Exam ---------,-,-,-,-
+ceval,-,-,-,-
+agieval,-,-,-,-
+mmlu,-,-,-,-
+GaokaoBench,-,-,-,-
+ARC-c,-,-,-,-
+--------- 语言 Language ---------,-,-,-,-
+WiC,-,-,-,-
+summedits,-,-,-,-
+chid-dev,-,-,-,-
+afqmc-dev,-,-,-,-
+bustm-dev,-,-,-,-
+cluewsc-dev,-,-,-,-
+WSC,-,-,-,-
+winogrande,-,-,-,-
+flores_100,-,-,-,-
+--------- 知识 Knowledge ---------,-,-,-,-
+BoolQ,-,-,-,-
+commonsense_qa,-,-,-,-
+nq,-,-,-,-
+triviaqa,-,-,-,-
+--------- 推理 Reasoning ---------,-,-,-,-
+cmnli,-,-,-,-
+ocnli,-,-,-,-
+ocnli_fc-dev,-,-,-,-
+AX_b,-,-,-,-
+AX_g,-,-,-,-
+CB,-,-,-,-
+RTE,-,-,-,-
+story_cloze,-,-,-,-
+COPA,-,-,-,-
+ReCoRD,-,-,-,-
+hellaswag,-,-,-,-
+piqa,-,-,-,-
+siqa,-,-,-,-
+strategyqa,-,-,-,-
+math,-,-,-,-
+gsm8k,-,-,-,-
+TheoremQA,-,-,-,-
+openai_humaneval,-,-,-,-
+mbpp,-,-,-,-
+cmmlu,-,-,-,-
+bbh,-,-,-,-
+--------- 理解 Understanding ---------,-,-,-,-
+C3,-,-,-,-
+CMRC_dev,-,-,-,-
+DRCD_dev,-,-,-,-
+MultiRC,-,-,-,-
+race-middle,-,-,-,-
+race-high,-,-,-,-
+openbookqa_fact,-,-,-,-
+csl_dev,-,-,-,-
+lcsts,-,-,-,-
+Xsum,-,-,-,-
+eprstmt-dev,-,-,-,-
+lambada,-,-,-,-
+tnews-dev,-,-,-,-
+--------- 安全 Safety ---------,-,-,-,-
+crows_pairs,-,-,-,-
+--------- LEval Exact Match (Acc) ---------,-,-,-,-
+LEval_coursera,-,-,-,-
+LEval_gsm100,-,-,-,-
+LEval_quality,-,-,-,-
+LEval_tpo,-,-,-,-
+LEval_topic_retrieval,-,-,-,-
+--------- LEval Gen (ROUGE) ---------,-,-,-,-
+LEval_financialqa,-,-,-,-
+LEval_gov_report_summ,-,-,-,-
+LEval_legal_contract_qa,-,-,-,-
+LEval_meeting_summ,-,-,-,-
+LEval_multidocqa,-,-,-,-
+LEval_narrativeqa,-,-,-,-
+LEval_nq,-,-,-,-
+LEval_news_summ,-,-,-,-
+LEval_paper_assistant,-,-,-,-
+LEval_patent_summ,-,-,-,-
+LEval_review_summ,-,-,-,-
+LEval_scientificqa,-,-,-,-
+LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
+longbench_lsht,-,-,-,-
+longbench_vcsum,-,-,-,-
+longbench_dureader,-,-,-,-
+longbench_multifieldqa_zh,-,-,-,-
+longbench_passage_retrieval_zh,-,-,-,-
+--------- 单选 自定义数据 ---------,-,-,-,-
+SageBench-exam,-,-,-,-
diff --git a/summary/summary_20251027_162236.txt b/summary/summary_20251027_162236.txt
new file mode 100644
index 0000000..70f997f
--- /dev/null
+++ b/summary/summary_20251027_162236.txt
@@ -0,0 +1,193 @@
+20251027_162236
+tabulate format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+dataset                                                version    metric    mode    public/jina-reranker-m0@main
+-----------------------------------------------------  ---------  --------  ------  ------------------------------
+--------- 考试 Exam ---------                          -          -         -       -
+ceval                                                  -          -         -       -
+agieval                                                -          -         -       -
+mmlu                                                   -          -         -       -
+GaokaoBench                                            -          -         -       -
+ARC-c                                                  -          -         -       -
+--------- 语言 Language ---------                      -          -         -       -
+WiC                                                    -          -         -       -
+summedits                                              -          -         -       -
+chid-dev                                               -          -         -       -
+afqmc-dev                                              -          -         -       -
+bustm-dev                                              -          -         -       -
+cluewsc-dev                                            -          -         -       -
+WSC                                                    -          -         -       -
+winogrande                                             -          -         -       -
+flores_100                                             -          -         -       -
+--------- 知识 Knowledge ---------                     -          -         -       -
+BoolQ                                                  -          -         -       -
+commonsense_qa                                         -          -         -       -
+nq                                                     -          -         -       -
+triviaqa                                               -          -         -       -
+--------- 推理 Reasoning ---------                     -          -         -       -
+cmnli                                                  -          -         -       -
+ocnli                                                  -          -         -       -
+ocnli_fc-dev                                           -          -         -       -
+AX_b                                                   -          -         -       -
+AX_g                                                   -          -         -       -
+CB                                                     -          -         -       -
+RTE                                                    -          -         -       -
+story_cloze                                            -          -         -       -
+COPA                                                   -          -         -       -
+ReCoRD                                                 -          -         -       -
+hellaswag                                              -          -         -       -
+piqa                                                   -          -         -       -
+siqa                                                   -          -         -       -
+strategyqa                                             -          -         -       -
+math                                                   -          -         -       -
+gsm8k                                                  -          -         -       -
+TheoremQA                                              -          -         -       -
+openai_humaneval                                       -          -         -       -
+mbpp                                                   -          -         -       -
+cmmlu                                                  -          -         -       -
+bbh                                                    -          -         -       -
+--------- 理解 Understanding ---------                 -          -         -       -
+C3                                                     -          -         -       -
+CMRC_dev                                               -          -         -       -
+DRCD_dev                                               -          -         -       -
+MultiRC                                                -          -         -       -
+race-middle                                            -          -         -       -
+race-high                                              -          -         -       -
+openbookqa_fact                                        -          -         -       -
+csl_dev                                                -          -         -       -
+lcsts                                                  -          -         -       -
+Xsum                                                   -          -         -       -
+eprstmt-dev                                            -          -         -       -
+lambada                                                -          -         -       -
+tnews-dev                                              -          -         -       -
+--------- 安全 Safety ---------                        -          -         -       -
+crows_pairs                                            -          -         -       -
+--------- LEval Exact Match (Acc) ---------            -          -         -       -
+LEval_coursera                                         -          -         -       -
+LEval_gsm100                                           -          -         -       -
+LEval_quality                                          -          -         -       -
+LEval_tpo                                              -          -         -       -
+LEval_topic_retrieval                                  -          -         -       -
+--------- LEval Gen (ROUGE) ---------                  -          -         -       -
+LEval_financialqa                                      -          -         -       -
+LEval_gov_report_summ                                  -          -         -       -
+LEval_legal_contract_qa                                -          -         -       -
+LEval_meeting_summ                                     -          -         -       -
+LEval_multidocqa                                       -          -         -       -
+LEval_narrativeqa                                      -          -         -       -
+LEval_nq                                               -          -         -       -
+LEval_news_summ                                        -          -         -       -
+LEval_paper_assistant                                  -          -         -       -
+LEval_patent_summ                                      -          -         -       -
+LEval_review_summ                                      -          -         -       -
+LEval_scientificqa                                     -          -         -       -
+LEval_tvshow_summ--------- 长文本 LongBench ---------  -          -         -       -
+longbench_lsht                                         -          -         -       -
+longbench_vcsum                                        -          -         -       -
+longbench_dureader                                     -          -         -       -
+longbench_multifieldqa_zh                              -          -         -       -
+longbench_passage_retrieval_zh                         -          -         -       -
+--------- 单选 自定义数据 ---------                    -          -         -       -
+SageBench-exam                                         -          -         -       -
+$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
+
+csv format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+dataset,version,metric,mode,public/jina-reranker-m0@main
+--------- 考试 Exam ---------,-,-,-,-
+ceval,-,-,-,-
+agieval,-,-,-,-
+mmlu,-,-,-,-
+GaokaoBench,-,-,-,-
+ARC-c,-,-,-,-
+--------- 语言 Language ---------,-,-,-,-
+WiC,-,-,-,-
+summedits,-,-,-,-
+chid-dev,-,-,-,-
+afqmc-dev,-,-,-,-
+bustm-dev,-,-,-,-
+cluewsc-dev,-,-,-,-
+WSC,-,-,-,-
+winogrande,-,-,-,-
+flores_100,-,-,-,-
+--------- 知识 Knowledge ---------,-,-,-,-
+BoolQ,-,-,-,-
+commonsense_qa,-,-,-,-
+nq,-,-,-,-
+triviaqa,-,-,-,-
+--------- 推理 Reasoning ---------,-,-,-,-
+cmnli,-,-,-,-
+ocnli,-,-,-,-
+ocnli_fc-dev,-,-,-,-
+AX_b,-,-,-,-
+AX_g,-,-,-,-
+CB,-,-,-,-
+RTE,-,-,-,-
+story_cloze,-,-,-,-
+COPA,-,-,-,-
+ReCoRD,-,-,-,-
+hellaswag,-,-,-,-
+piqa,-,-,-,-
+siqa,-,-,-,-
+strategyqa,-,-,-,-
+math,-,-,-,-
+gsm8k,-,-,-,-
+TheoremQA,-,-,-,-
+openai_humaneval,-,-,-,-
+mbpp,-,-,-,-
+cmmlu,-,-,-,-
+bbh,-,-,-,-
+--------- 理解 Understanding ---------,-,-,-,-
+C3,-,-,-,-
+CMRC_dev,-,-,-,-
+DRCD_dev,-,-,-,-
+MultiRC,-,-,-,-
+race-middle,-,-,-,-
+race-high,-,-,-,-
+openbookqa_fact,-,-,-,-
+csl_dev,-,-,-,-
+lcsts,-,-,-,-
+Xsum,-,-,-,-
+eprstmt-dev,-,-,-,-
+lambada,-,-,-,-
+tnews-dev,-,-,-,-
+--------- 安全 Safety ---------,-,-,-,-
+crows_pairs,-,-,-,-
+--------- LEval Exact Match (Acc) ---------,-,-,-,-
+LEval_coursera,-,-,-,-
+LEval_gsm100,-,-,-,-
+LEval_quality,-,-,-,-
+LEval_tpo,-,-,-,-
+LEval_topic_retrieval,-,-,-,-
+--------- LEval Gen (ROUGE) ---------,-,-,-,-
+LEval_financialqa,-,-,-,-
+LEval_gov_report_summ,-,-,-,-
+LEval_legal_contract_qa,-,-,-,-
+LEval_meeting_summ,-,-,-,-
+LEval_multidocqa,-,-,-,-
+LEval_narrativeqa,-,-,-,-
+LEval_nq,-,-,-,-
+LEval_news_summ,-,-,-,-
+LEval_paper_assistant,-,-,-,-
+LEval_patent_summ,-,-,-,-
+LEval_review_summ,-,-,-,-
+LEval_scientificqa,-,-,-,-
+LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
+longbench_lsht,-,-,-,-
+longbench_vcsum,-,-,-,-
+longbench_dureader,-,-,-,-
+longbench_multifieldqa_zh,-,-,-,-
+longbench_passage_retrieval_zh,-,-,-,-
+--------- 单选 自定义数据 ---------,-,-,-,-
+SageBench-exam,-,-,-,-
+$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
+
+raw format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-------------------------------
+Model: public/jina-reranker-m0@main
+$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$