From 7d5e1d12e5c36b66900560a7daa3faca4623f47e Mon Sep 17 00:00:00 2001 From: 4pdadmin <> Date: Mon, 27 Oct 2025 08:22:57 +0000 Subject: [PATCH] commit file to repo --- .gitattributes | 1 + .gitignore | 0 configs/20251027_162236.py | 1292 +++++++++++++++++ .../public/jina-reranker-m0@main/lambada.out | 9 + .../jina-reranker-m0@main/lambada_0.out | 35 + .../jina-reranker-m0@main/lambada_1.out | 35 + .../jina-reranker-m0@main/lambada_2.out | 35 + summary/summary_20251027_162236.csv | 87 ++ summary/summary_20251027_162236.txt | 193 +++ 9 files changed, 1687 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 configs/20251027_162236.py create mode 100644 logs/eval/public/jina-reranker-m0@main/lambada.out create mode 100644 logs/infer/public/jina-reranker-m0@main/lambada_0.out create mode 100644 logs/infer/public/jina-reranker-m0@main/lambada_1.out create mode 100644 logs/infer/public/jina-reranker-m0@main/lambada_2.out create mode 100644 summary/summary_20251027_162236.csv create mode 100644 summary/summary_20251027_162236.txt diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7fe70d7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.json filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/configs/20251027_162236.py b/configs/20251027_162236.py new file mode 100644 index 0000000..fbaa457 --- /dev/null +++ b/configs/20251027_162236.py @@ -0,0 +1,1292 @@ +datasets=[ + dict(abbr='lambada', + eval_cfg=dict( + evaluator=dict( + type='opencompass.datasets.LambadaEvaluator')), + infer_cfg=dict( + inferencer=dict( + max_out_len=5, + type='opencompass.openicl.icl_inferencer.GenInferencer'), + prompt_template=dict( + template=dict( + round=[ + dict(prompt='Please complete the following sentence:\n{prompt}', + role='HUMAN'), + ]), + type='opencompass.openicl.icl_prompt_template.PromptTemplate'), + retriever=dict( + type='opencompass.openicl.icl_retriever.ZeroRetriever')), + path='./data/lambada/test/data-00000-of-00001.arrow', + reader_cfg=dict( + input_columns=[ + 'prompt', + ], + output_column='label', + test_split='test', + train_split='test'), + type='opencompass.datasets.lambadaDataset'), + ] +models=[ + dict(abbr='{{$MODEL_ID:public/jina-reranker-m0@main}}', + batch_size=1, + key='fee1ce7f2b0843368012dfa938b261db', + max_out_len=100, + max_seq_len=2048, + openai_api_base='{{$MODEL_URL:http://modelhu-b0f7ds-nginx/learnware/models/openai/4pd/api/v1/chat/completions}}', + path='{{$MODEL_ID:public/jina-reranker-m0@main}}', + temperature=0.95, + type='opencompass.models.OpenAI'), + ] +summarizer=dict( + dataset_abbrs=[ + '--------- 考试 Exam ---------', + 'ceval', + 'agieval', + 'mmlu', + 'GaokaoBench', + 'ARC-c', + '--------- 语言 Language ---------', + 'WiC', + 'summedits', + 'chid-dev', + 'afqmc-dev', + 'bustm-dev', + 'cluewsc-dev', + 'WSC', + 'winogrande', + 'flores_100', + '--------- 知识 Knowledge ---------', + 'BoolQ', + 'commonsense_qa', + 'nq', + 'triviaqa', + '--------- 推理 Reasoning ---------', + 'cmnli', + 'ocnli', + 'ocnli_fc-dev', + 'AX_b', + 'AX_g', + 'CB', + 'RTE', + 'story_cloze', + 'COPA', + 'ReCoRD', + 'hellaswag', + 'piqa', + 'siqa', + 'strategyqa', + 'math', + 'gsm8k', + 'TheoremQA', + 'openai_humaneval', + 'mbpp', + 'cmmlu', + 'bbh', + '--------- 理解 Understanding ---------', + 'C3', + 'CMRC_dev', + 'DRCD_dev', + 'MultiRC', + 'race-middle', + 'race-high', + 'openbookqa_fact', + 'csl_dev', + 'lcsts', + 'Xsum', + 'eprstmt-dev', + 'lambada', + 'tnews-dev', + '--------- 安全 Safety ---------', + 'crows_pairs', + '--------- LEval Exact Match (Acc) ---------', + 'LEval_coursera', + 'LEval_gsm100', + 'LEval_quality', + 'LEval_tpo', + 'LEval_topic_retrieval', + '--------- LEval Gen (ROUGE) ---------', + 'LEval_financialqa', + 'LEval_gov_report_summ', + 'LEval_legal_contract_qa', + 'LEval_meeting_summ', + 'LEval_multidocqa', + 'LEval_narrativeqa', + 'LEval_nq', + 'LEval_news_summ', + 'LEval_paper_assistant', + 'LEval_patent_summ', + 'LEval_review_summ', + 'LEval_scientificqa', + 'LEval_tvshow_summ--------- 长文本 LongBench ---------', + 'longbench_lsht', + 'longbench_vcsum', + 'longbench_dureader', + 'longbench_multifieldqa_zh', + 'longbench_passage_retrieval_zh', + '--------- 单选 自定义数据 ---------', + 'SageBench-exam', + ], + prompt_db=dict( + blacklist='.promptignore', + config_dir='configs/datasets', + database_path='configs/datasets/log.json'), + summary_groups=[ + dict(name='agieval-chinese', + subsets=[ + 'agieval-gaokao-chinese', + 'agieval-gaokao-english', + 'agieval-gaokao-geography', + 'agieval-gaokao-history', + 'agieval-gaokao-biology', + 'agieval-gaokao-chemistry', + 'agieval-gaokao-physics', + 'agieval-gaokao-mathqa', + 'agieval-logiqa-zh', + 'agieval-jec-qa-kd', + 'agieval-jec-qa-ca', + 'agieval-gaokao-mathcloze', + ]), + dict(name='agieval-english', + subsets=[ + 'agieval-lsat-ar', + 'agieval-lsat-lr', + 'agieval-lsat-rc', + 'agieval-logiqa-en', + 'agieval-sat-math', + 'agieval-sat-en', + 'agieval-sat-en-without-passage', + 'agieval-aqua-rat', + 'agieval-math', + ]), + dict(name='agieval-gaokao', + subsets=[ + 'agieval-gaokao-chinese', + 'agieval-gaokao-english', + 'agieval-gaokao-geography', + 'agieval-gaokao-history', + 'agieval-gaokao-biology', + 'agieval-gaokao-chemistry', + 'agieval-gaokao-physics', + 'agieval-gaokao-mathqa', + 'agieval-gaokao-mathcloze', + ]), + dict(name='agieval', + subsets=[ + 'agieval-gaokao-chinese', + 'agieval-gaokao-english', + 'agieval-gaokao-geography', + 'agieval-gaokao-history', + 'agieval-gaokao-biology', + 'agieval-gaokao-chemistry', + 'agieval-gaokao-physics', + 'agieval-gaokao-mathqa', + 'agieval-logiqa-zh', + 'agieval-lsat-ar', + 'agieval-lsat-lr', + 'agieval-lsat-rc', + 'agieval-logiqa-en', + 'agieval-sat-math', + 'agieval-sat-en', + 'agieval-sat-en-without-passage', + 'agieval-aqua-rat', + 'agieval-jec-qa-kd', + 'agieval-jec-qa-ca', + 'agieval-gaokao-mathcloze', + 'agieval-math', + ]), + dict(name='mmlu-humanities', + subsets=[ + 'lukaemon_mmlu_formal_logic', + 'lukaemon_mmlu_high_school_european_history', + 'lukaemon_mmlu_high_school_us_history', + 'lukaemon_mmlu_high_school_world_history', + 'lukaemon_mmlu_international_law', + 'lukaemon_mmlu_jurisprudence', + 'lukaemon_mmlu_logical_fallacies', + 'lukaemon_mmlu_moral_disputes', + 'lukaemon_mmlu_moral_scenarios', + 'lukaemon_mmlu_philosophy', + 'lukaemon_mmlu_prehistory', + 'lukaemon_mmlu_professional_law', + 'lukaemon_mmlu_world_religions', + ]), + dict(name='mmlu-stem', + subsets=[ + 'lukaemon_mmlu_abstract_algebra', + 'lukaemon_mmlu_anatomy', + 'lukaemon_mmlu_astronomy', + 'lukaemon_mmlu_college_biology', + 'lukaemon_mmlu_college_chemistry', + 'lukaemon_mmlu_college_computer_science', + 'lukaemon_mmlu_college_mathematics', + 'lukaemon_mmlu_college_physics', + 'lukaemon_mmlu_computer_security', + 'lukaemon_mmlu_conceptual_physics', + 'lukaemon_mmlu_electrical_engineering', + 'lukaemon_mmlu_elementary_mathematics', + 'lukaemon_mmlu_high_school_biology', + 'lukaemon_mmlu_high_school_chemistry', + 'lukaemon_mmlu_high_school_computer_science', + 'lukaemon_mmlu_high_school_mathematics', + 'lukaemon_mmlu_high_school_physics', + 'lukaemon_mmlu_high_school_statistics', + 'lukaemon_mmlu_machine_learning', + ]), + dict(name='mmlu-social-science', + subsets=[ + 'lukaemon_mmlu_econometrics', + 'lukaemon_mmlu_high_school_geography', + 'lukaemon_mmlu_high_school_government_and_politics', + 'lukaemon_mmlu_high_school_macroeconomics', + 'lukaemon_mmlu_high_school_microeconomics', + 'lukaemon_mmlu_high_school_psychology', + 'lukaemon_mmlu_human_sexuality', + 'lukaemon_mmlu_professional_psychology', + 'lukaemon_mmlu_public_relations', + 'lukaemon_mmlu_security_studies', + 'lukaemon_mmlu_sociology', + 'lukaemon_mmlu_us_foreign_policy', + ]), + dict(name='mmlu-other', + subsets=[ + 'lukaemon_mmlu_business_ethics', + 'lukaemon_mmlu_clinical_knowledge', + 'lukaemon_mmlu_college_medicine', + 'lukaemon_mmlu_global_facts', + 'lukaemon_mmlu_human_aging', + 'lukaemon_mmlu_management', + 'lukaemon_mmlu_marketing', + 'lukaemon_mmlu_medical_genetics', + 'lukaemon_mmlu_miscellaneous', + 'lukaemon_mmlu_nutrition', + 'lukaemon_mmlu_professional_accounting', + 'lukaemon_mmlu_professional_medicine', + 'lukaemon_mmlu_virology', + ]), + dict(name='mmlu', + subsets=[ + 'lukaemon_mmlu_formal_logic', + 'lukaemon_mmlu_high_school_european_history', + 'lukaemon_mmlu_high_school_us_history', + 'lukaemon_mmlu_high_school_world_history', + 'lukaemon_mmlu_international_law', + 'lukaemon_mmlu_jurisprudence', + 'lukaemon_mmlu_logical_fallacies', + 'lukaemon_mmlu_moral_disputes', + 'lukaemon_mmlu_moral_scenarios', + 'lukaemon_mmlu_philosophy', + 'lukaemon_mmlu_prehistory', + 'lukaemon_mmlu_professional_law', + 'lukaemon_mmlu_world_religions', + 'lukaemon_mmlu_abstract_algebra', + 'lukaemon_mmlu_anatomy', + 'lukaemon_mmlu_astronomy', + 'lukaemon_mmlu_college_biology', + 'lukaemon_mmlu_college_chemistry', + 'lukaemon_mmlu_college_computer_science', + 'lukaemon_mmlu_college_mathematics', + 'lukaemon_mmlu_college_physics', + 'lukaemon_mmlu_computer_security', + 'lukaemon_mmlu_conceptual_physics', + 'lukaemon_mmlu_electrical_engineering', + 'lukaemon_mmlu_elementary_mathematics', + 'lukaemon_mmlu_high_school_biology', + 'lukaemon_mmlu_high_school_chemistry', + 'lukaemon_mmlu_high_school_computer_science', + 'lukaemon_mmlu_high_school_mathematics', + 'lukaemon_mmlu_high_school_physics', + 'lukaemon_mmlu_high_school_statistics', + 'lukaemon_mmlu_machine_learning', + 'lukaemon_mmlu_econometrics', + 'lukaemon_mmlu_high_school_geography', + 'lukaemon_mmlu_high_school_government_and_politics', + 'lukaemon_mmlu_high_school_macroeconomics', + 'lukaemon_mmlu_high_school_microeconomics', + 'lukaemon_mmlu_high_school_psychology', + 'lukaemon_mmlu_human_sexuality', + 'lukaemon_mmlu_professional_psychology', + 'lukaemon_mmlu_public_relations', + 'lukaemon_mmlu_security_studies', + 'lukaemon_mmlu_sociology', + 'lukaemon_mmlu_us_foreign_policy', + 'lukaemon_mmlu_business_ethics', + 'lukaemon_mmlu_clinical_knowledge', + 'lukaemon_mmlu_college_medicine', + 'lukaemon_mmlu_global_facts', + 'lukaemon_mmlu_human_aging', + 'lukaemon_mmlu_management', + 'lukaemon_mmlu_marketing', + 'lukaemon_mmlu_medical_genetics', + 'lukaemon_mmlu_miscellaneous', + 'lukaemon_mmlu_nutrition', + 'lukaemon_mmlu_professional_accounting', + 'lukaemon_mmlu_professional_medicine', + 'lukaemon_mmlu_virology', + ]), + dict(name='mmlu-weighted', + subsets=[ + 'lukaemon_mmlu_formal_logic', + 'lukaemon_mmlu_high_school_european_history', + 'lukaemon_mmlu_high_school_us_history', + 'lukaemon_mmlu_high_school_world_history', + 'lukaemon_mmlu_international_law', + 'lukaemon_mmlu_jurisprudence', + 'lukaemon_mmlu_logical_fallacies', + 'lukaemon_mmlu_moral_disputes', + 'lukaemon_mmlu_moral_scenarios', + 'lukaemon_mmlu_philosophy', + 'lukaemon_mmlu_prehistory', + 'lukaemon_mmlu_professional_law', + 'lukaemon_mmlu_world_religions', + 'lukaemon_mmlu_abstract_algebra', + 'lukaemon_mmlu_anatomy', + 'lukaemon_mmlu_astronomy', + 'lukaemon_mmlu_college_biology', + 'lukaemon_mmlu_college_chemistry', + 'lukaemon_mmlu_college_computer_science', + 'lukaemon_mmlu_college_mathematics', + 'lukaemon_mmlu_college_physics', + 'lukaemon_mmlu_computer_security', + 'lukaemon_mmlu_conceptual_physics', + 'lukaemon_mmlu_electrical_engineering', + 'lukaemon_mmlu_elementary_mathematics', + 'lukaemon_mmlu_high_school_biology', + 'lukaemon_mmlu_high_school_chemistry', + 'lukaemon_mmlu_high_school_computer_science', + 'lukaemon_mmlu_high_school_mathematics', + 'lukaemon_mmlu_high_school_physics', + 'lukaemon_mmlu_high_school_statistics', + 'lukaemon_mmlu_machine_learning', + 'lukaemon_mmlu_econometrics', + 'lukaemon_mmlu_high_school_geography', + 'lukaemon_mmlu_high_school_government_and_politics', + 'lukaemon_mmlu_high_school_macroeconomics', + 'lukaemon_mmlu_high_school_microeconomics', + 'lukaemon_mmlu_high_school_psychology', + 'lukaemon_mmlu_human_sexuality', + 'lukaemon_mmlu_professional_psychology', + 'lukaemon_mmlu_public_relations', + 'lukaemon_mmlu_security_studies', + 'lukaemon_mmlu_sociology', + 'lukaemon_mmlu_us_foreign_policy', + 'lukaemon_mmlu_business_ethics', + 'lukaemon_mmlu_clinical_knowledge', + 'lukaemon_mmlu_college_medicine', + 'lukaemon_mmlu_global_facts', + 'lukaemon_mmlu_human_aging', + 'lukaemon_mmlu_management', + 'lukaemon_mmlu_marketing', + 'lukaemon_mmlu_medical_genetics', + 'lukaemon_mmlu_miscellaneous', + 'lukaemon_mmlu_nutrition', + 'lukaemon_mmlu_professional_accounting', + 'lukaemon_mmlu_professional_medicine', + 'lukaemon_mmlu_virology', + ], + weights=dict( + lukaemon_mmlu_abstract_algebra=100, + lukaemon_mmlu_anatomy=135, + lukaemon_mmlu_astronomy=152, + lukaemon_mmlu_business_ethics=100, + lukaemon_mmlu_clinical_knowledge=265, + lukaemon_mmlu_college_biology=144, + lukaemon_mmlu_college_chemistry=100, + lukaemon_mmlu_college_computer_science=100, + lukaemon_mmlu_college_mathematics=100, + lukaemon_mmlu_college_medicine=173, + lukaemon_mmlu_college_physics=102, + lukaemon_mmlu_computer_security=100, + lukaemon_mmlu_conceptual_physics=235, + lukaemon_mmlu_econometrics=114, + lukaemon_mmlu_electrical_engineering=145, + lukaemon_mmlu_elementary_mathematics=378, + lukaemon_mmlu_formal_logic=126, + lukaemon_mmlu_global_facts=100, + lukaemon_mmlu_high_school_biology=310, + lukaemon_mmlu_high_school_chemistry=203, + lukaemon_mmlu_high_school_computer_science=100, + lukaemon_mmlu_high_school_european_history=165, + lukaemon_mmlu_high_school_geography=198, + lukaemon_mmlu_high_school_government_and_politics=193, + lukaemon_mmlu_high_school_macroeconomics=390, + lukaemon_mmlu_high_school_mathematics=270, + lukaemon_mmlu_high_school_microeconomics=238, + lukaemon_mmlu_high_school_physics=151, + lukaemon_mmlu_high_school_psychology=545, + lukaemon_mmlu_high_school_statistics=216, + lukaemon_mmlu_high_school_us_history=204, + lukaemon_mmlu_high_school_world_history=237, + lukaemon_mmlu_human_aging=223, + lukaemon_mmlu_human_sexuality=131, + lukaemon_mmlu_international_law=121, + lukaemon_mmlu_jurisprudence=108, + lukaemon_mmlu_logical_fallacies=163, + lukaemon_mmlu_machine_learning=112, + lukaemon_mmlu_management=103, + lukaemon_mmlu_marketing=234, + lukaemon_mmlu_medical_genetics=100, + lukaemon_mmlu_miscellaneous=783, + lukaemon_mmlu_moral_disputes=346, + lukaemon_mmlu_moral_scenarios=895, + lukaemon_mmlu_nutrition=306, + lukaemon_mmlu_philosophy=311, + lukaemon_mmlu_prehistory=324, + lukaemon_mmlu_professional_accounting=282, + lukaemon_mmlu_professional_law=1534, + lukaemon_mmlu_professional_medicine=272, + lukaemon_mmlu_professional_psychology=612, + lukaemon_mmlu_public_relations=110, + lukaemon_mmlu_security_studies=245, + lukaemon_mmlu_sociology=201, + lukaemon_mmlu_us_foreign_policy=100, + lukaemon_mmlu_virology=166, + lukaemon_mmlu_world_religions=171)), + dict(name='cmmlu-humanities', + subsets=[ + 'cmmlu-arts', + 'cmmlu-chinese_history', + 'cmmlu-chinese_literature', + 'cmmlu-college_law', + 'cmmlu-global_facts', + 'cmmlu-international_law', + 'cmmlu-jurisprudence', + 'cmmlu-logical', + 'cmmlu-marxist_theory', + 'cmmlu-philosophy', + 'cmmlu-professional_law', + 'cmmlu-world_history', + 'cmmlu-world_religions', + ]), + dict(name='cmmlu-stem', + subsets=[ + 'cmmlu-anatomy', + 'cmmlu-astronomy', + 'cmmlu-college_actuarial_science', + 'cmmlu-college_engineering_hydrology', + 'cmmlu-college_mathematics', + 'cmmlu-college_medical_statistics', + 'cmmlu-computer_science', + 'cmmlu-conceptual_physics', + 'cmmlu-electrical_engineering', + 'cmmlu-elementary_mathematics', + 'cmmlu-genetics', + 'cmmlu-high_school_biology', + 'cmmlu-high_school_chemistry', + 'cmmlu-high_school_mathematics', + 'cmmlu-high_school_physics', + 'cmmlu-machine_learning', + 'cmmlu-virology', + ]), + dict(name='cmmlu-social-science', + subsets=[ + 'cmmlu-ancient_chinese', + 'cmmlu-business_ethics', + 'cmmlu-chinese_civil_service_exam', + 'cmmlu-chinese_food_culture', + 'cmmlu-chinese_foreign_policy', + 'cmmlu-chinese_teacher_qualification', + 'cmmlu-college_education', + 'cmmlu-economics', + 'cmmlu-education', + 'cmmlu-elementary_chinese', + 'cmmlu-ethnology', + 'cmmlu-high_school_geography', + 'cmmlu-high_school_politics', + 'cmmlu-journalism', + 'cmmlu-management', + 'cmmlu-marketing', + 'cmmlu-modern_chinese', + 'cmmlu-professional_accounting', + 'cmmlu-professional_psychology', + 'cmmlu-public_relations', + 'cmmlu-security_study', + 'cmmlu-sociology', + ]), + dict(name='cmmlu-other', + subsets=[ + 'cmmlu-agronomy', + 'cmmlu-chinese_driving_rule', + 'cmmlu-clinical_knowledge', + 'cmmlu-college_medicine', + 'cmmlu-computer_security', + 'cmmlu-construction_project_management', + 'cmmlu-elementary_commonsense', + 'cmmlu-elementary_information_and_technology', + 'cmmlu-food_science', + 'cmmlu-human_sexuality', + 'cmmlu-legal_and_moral_basis', + 'cmmlu-nutrition', + 'cmmlu-professional_medicine', + 'cmmlu-sports_science', + 'cmmlu-traditional_chinese_medicine', + ]), + dict(name='cmmlu-china-specific', + subsets=[ + 'cmmlu-ancient_chinese', + 'cmmlu-chinese_civil_service_exam', + 'cmmlu-chinese_driving_rule', + 'cmmlu-chinese_food_culture', + 'cmmlu-chinese_foreign_policy', + 'cmmlu-chinese_history', + 'cmmlu-chinese_literature', + 'cmmlu-chinese_teacher_qualification', + 'cmmlu-construction_project_management', + 'cmmlu-elementary_chinese', + 'cmmlu-elementary_commonsense', + 'cmmlu-ethnology', + 'cmmlu-high_school_politics', + 'cmmlu-modern_chinese', + 'cmmlu-traditional_chinese_medicine', + ]), + dict(name='cmmlu', + subsets=[ + 'cmmlu-agronomy', + 'cmmlu-anatomy', + 'cmmlu-ancient_chinese', + 'cmmlu-arts', + 'cmmlu-astronomy', + 'cmmlu-business_ethics', + 'cmmlu-chinese_civil_service_exam', + 'cmmlu-chinese_driving_rule', + 'cmmlu-chinese_food_culture', + 'cmmlu-chinese_foreign_policy', + 'cmmlu-chinese_history', + 'cmmlu-chinese_literature', + 'cmmlu-chinese_teacher_qualification', + 'cmmlu-college_actuarial_science', + 'cmmlu-college_education', + 'cmmlu-college_engineering_hydrology', + 'cmmlu-college_law', + 'cmmlu-college_mathematics', + 'cmmlu-college_medical_statistics', + 'cmmlu-clinical_knowledge', + 'cmmlu-college_medicine', + 'cmmlu-computer_science', + 'cmmlu-computer_security', + 'cmmlu-conceptual_physics', + 'cmmlu-construction_project_management', + 'cmmlu-economics', + 'cmmlu-education', + 'cmmlu-elementary_chinese', + 'cmmlu-elementary_commonsense', + 'cmmlu-elementary_information_and_technology', + 'cmmlu-electrical_engineering', + 'cmmlu-elementary_mathematics', + 'cmmlu-ethnology', + 'cmmlu-food_science', + 'cmmlu-genetics', + 'cmmlu-global_facts', + 'cmmlu-high_school_biology', + 'cmmlu-high_school_chemistry', + 'cmmlu-high_school_geography', + 'cmmlu-high_school_mathematics', + 'cmmlu-high_school_physics', + 'cmmlu-high_school_politics', + 'cmmlu-human_sexuality', + 'cmmlu-international_law', + 'cmmlu-journalism', + 'cmmlu-jurisprudence', + 'cmmlu-legal_and_moral_basis', + 'cmmlu-logical', + 'cmmlu-machine_learning', + 'cmmlu-management', + 'cmmlu-marketing', + 'cmmlu-marxist_theory', + 'cmmlu-modern_chinese', + 'cmmlu-nutrition', + 'cmmlu-philosophy', + 'cmmlu-professional_accounting', + 'cmmlu-professional_law', + 'cmmlu-professional_medicine', + 'cmmlu-professional_psychology', + 'cmmlu-public_relations', + 'cmmlu-security_study', + 'cmmlu-sociology', + 'cmmlu-sports_science', + 'cmmlu-traditional_chinese_medicine', + 'cmmlu-virology', + 'cmmlu-world_history', + 'cmmlu-world_religions', + ]), + dict(name='ceval-stem', + subsets=[ + 'ceval-computer_network', + 'ceval-operating_system', + 'ceval-computer_architecture', + 'ceval-college_programming', + 'ceval-college_physics', + 'ceval-college_chemistry', + 'ceval-advanced_mathematics', + 'ceval-probability_and_statistics', + 'ceval-discrete_mathematics', + 'ceval-electrical_engineer', + 'ceval-metrology_engineer', + 'ceval-high_school_mathematics', + 'ceval-high_school_physics', + 'ceval-high_school_chemistry', + 'ceval-high_school_biology', + 'ceval-middle_school_mathematics', + 'ceval-middle_school_biology', + 'ceval-middle_school_physics', + 'ceval-middle_school_chemistry', + 'ceval-veterinary_medicine', + ]), + dict(name='ceval-social-science', + subsets=[ + 'ceval-college_economics', + 'ceval-business_administration', + 'ceval-marxism', + 'ceval-mao_zedong_thought', + 'ceval-education_science', + 'ceval-teacher_qualification', + 'ceval-high_school_politics', + 'ceval-high_school_geography', + 'ceval-middle_school_politics', + 'ceval-middle_school_geography', + ]), + dict(name='ceval-humanities', + subsets=[ + 'ceval-modern_chinese_history', + 'ceval-ideological_and_moral_cultivation', + 'ceval-logic', + 'ceval-law', + 'ceval-chinese_language_and_literature', + 'ceval-art_studies', + 'ceval-professional_tour_guide', + 'ceval-legal_professional', + 'ceval-high_school_chinese', + 'ceval-high_school_history', + 'ceval-middle_school_history', + ]), + dict(name='ceval-other', + subsets=[ + 'ceval-civil_servant', + 'ceval-sports_science', + 'ceval-plant_protection', + 'ceval-basic_medicine', + 'ceval-clinical_medicine', + 'ceval-urban_and_rural_planner', + 'ceval-accountant', + 'ceval-fire_engineer', + 'ceval-environmental_impact_assessment_engineer', + 'ceval-tax_accountant', + 'ceval-physician', + ]), + dict(name='ceval-hard', + subsets=[ + 'ceval-advanced_mathematics', + 'ceval-discrete_mathematics', + 'ceval-probability_and_statistics', + 'ceval-college_chemistry', + 'ceval-college_physics', + 'ceval-high_school_mathematics', + 'ceval-high_school_chemistry', + 'ceval-high_school_physics', + ]), + dict(name='ceval', + subsets=[ + 'ceval-computer_network', + 'ceval-operating_system', + 'ceval-computer_architecture', + 'ceval-college_programming', + 'ceval-college_physics', + 'ceval-college_chemistry', + 'ceval-advanced_mathematics', + 'ceval-probability_and_statistics', + 'ceval-discrete_mathematics', + 'ceval-electrical_engineer', + 'ceval-metrology_engineer', + 'ceval-high_school_mathematics', + 'ceval-high_school_physics', + 'ceval-high_school_chemistry', + 'ceval-high_school_biology', + 'ceval-middle_school_mathematics', + 'ceval-middle_school_biology', + 'ceval-middle_school_physics', + 'ceval-middle_school_chemistry', + 'ceval-veterinary_medicine', + 'ceval-college_economics', + 'ceval-business_administration', + 'ceval-marxism', + 'ceval-mao_zedong_thought', + 'ceval-education_science', + 'ceval-teacher_qualification', + 'ceval-high_school_politics', + 'ceval-high_school_geography', + 'ceval-middle_school_politics', + 'ceval-middle_school_geography', + 'ceval-modern_chinese_history', + 'ceval-ideological_and_moral_cultivation', + 'ceval-logic', + 'ceval-law', + 'ceval-chinese_language_and_literature', + 'ceval-art_studies', + 'ceval-professional_tour_guide', + 'ceval-legal_professional', + 'ceval-high_school_chinese', + 'ceval-high_school_history', + 'ceval-middle_school_history', + 'ceval-civil_servant', + 'ceval-sports_science', + 'ceval-plant_protection', + 'ceval-basic_medicine', + 'ceval-clinical_medicine', + 'ceval-urban_and_rural_planner', + 'ceval-accountant', + 'ceval-fire_engineer', + 'ceval-environmental_impact_assessment_engineer', + 'ceval-tax_accountant', + 'ceval-physician', + ]), + dict(name='bbh', + subsets=[ + 'bbh-temporal_sequences', + 'bbh-disambiguation_qa', + 'bbh-date_understanding', + 'bbh-tracking_shuffled_objects_three_objects', + 'bbh-penguins_in_a_table', + 'bbh-geometric_shapes', + 'bbh-snarks', + 'bbh-ruin_names', + 'bbh-tracking_shuffled_objects_seven_objects', + 'bbh-tracking_shuffled_objects_five_objects', + 'bbh-logical_deduction_three_objects', + 'bbh-hyperbaton', + 'bbh-logical_deduction_five_objects', + 'bbh-logical_deduction_seven_objects', + 'bbh-movie_recommendation', + 'bbh-salient_translation_error_detection', + 'bbh-reasoning_about_colored_objects', + 'bbh-multistep_arithmetic_two', + 'bbh-navigate', + 'bbh-dyck_languages', + 'bbh-word_sorting', + 'bbh-sports_understanding', + 'bbh-boolean_expressions', + 'bbh-object_counting', + 'bbh-formal_fallacies', + 'bbh-causal_judgement', + 'bbh-web_of_lies', + ]), + dict(name='GaokaoBench', + subsets=[ + 'GaokaoBench_2010-2022_Math_II_MCQs', + 'GaokaoBench_2010-2022_Math_I_MCQs', + 'GaokaoBench_2010-2022_History_MCQs', + 'GaokaoBench_2010-2022_Biology_MCQs', + 'GaokaoBench_2010-2022_Political_Science_MCQs', + 'GaokaoBench_2010-2022_Physics_MCQs', + 'GaokaoBench_2010-2022_Chemistry_MCQs', + 'GaokaoBench_2010-2013_English_MCQs', + 'GaokaoBench_2010-2022_Chinese_Modern_Lit', + 'GaokaoBench_2010-2022_English_Fill_in_Blanks', + 'GaokaoBench_2012-2022_English_Cloze_Test', + 'GaokaoBench_2010-2022_Geography_MCQs', + 'GaokaoBench_2010-2022_English_Reading_Comp', + 'GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs', + ], + weights=dict( + {'GaokaoBench_2010-2013_English_MCQs': 105, + 'GaokaoBench_2010-2022_Biology_MCQs': 900, + 'GaokaoBench_2010-2022_Chemistry_MCQs': 744, + 'GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs': 240, + 'GaokaoBench_2010-2022_Chinese_Modern_Lit': 261, + 'GaokaoBench_2010-2022_English_Fill_in_Blanks': 900.0, + 'GaokaoBench_2010-2022_English_Reading_Comp': 940, + 'GaokaoBench_2010-2022_Geography_MCQs': 380, + 'GaokaoBench_2010-2022_History_MCQs': 1148, + 'GaokaoBench_2010-2022_Math_II_MCQs': 1090, + 'GaokaoBench_2010-2022_Math_I_MCQs': 1070, + 'GaokaoBench_2010-2022_Physics_MCQs': 384, + 'GaokaoBench_2010-2022_Political_Science_MCQs': 1280, + 'GaokaoBench_2012-2022_English_Cloze_Test': 260})), + dict(name='flores_100_Indo-European-Germanic_English', + subsets=[ + 'flores_100_afr-eng', + 'flores_100_dan-eng', + 'flores_100_deu-eng', + 'flores_100_isl-eng', + 'flores_100_ltz-eng', + 'flores_100_nld-eng', + 'flores_100_nob-eng', + 'flores_100_swe-eng', + ]), + dict(name='flores_100_English_Indo-European-Germanic', + subsets=[ + 'flores_100_eng-afr', + 'flores_100_eng-dan', + 'flores_100_eng-deu', + 'flores_100_eng-isl', + 'flores_100_eng-ltz', + 'flores_100_eng-nld', + 'flores_100_eng-nob', + 'flores_100_eng-swe', + ]), + dict(name='flores_100_Indo-European-Romance_English', + subsets=[ + 'flores_100_ast-eng', + 'flores_100_cat-eng', + 'flores_100_fra-eng', + 'flores_100_glg-eng', + 'flores_100_oci-eng', + 'flores_100_por-eng', + 'flores_100_ron-eng', + 'flores_100_spa-eng', + ]), + dict(name='flores_100_English_Indo-European-Romance', + subsets=[ + 'flores_100_eng-ast', + 'flores_100_eng-cat', + 'flores_100_eng-fra', + 'flores_100_eng-glg', + 'flores_100_eng-oci', + 'flores_100_eng-por', + 'flores_100_eng-ron', + 'flores_100_eng-spa', + ]), + dict(name='flores_100_Indo-European-Slavic_English', + subsets=[ + 'flores_100_bel-eng', + 'flores_100_bos-eng', + 'flores_100_bul-eng', + 'flores_100_ces-eng', + 'flores_100_hrv-eng', + 'flores_100_mkd-eng', + 'flores_100_pol-eng', + 'flores_100_rus-eng', + 'flores_100_slk-eng', + 'flores_100_slv-eng', + 'flores_100_srp-eng', + 'flores_100_ukr-eng', + ]), + dict(name='flores_100_English_Indo-European-Slavic', + subsets=[ + 'flores_100_eng-bel', + 'flores_100_eng-bos', + 'flores_100_eng-bul', + 'flores_100_eng-ces', + 'flores_100_eng-hrv', + 'flores_100_eng-mkd', + 'flores_100_eng-pol', + 'flores_100_eng-rus', + 'flores_100_eng-slk', + 'flores_100_eng-slv', + 'flores_100_eng-srp', + 'flores_100_eng-ukr', + ]), + dict(name='flores_100_Indo-European-Indo-Aryan_English', + subsets=[ + 'flores_100_asm-eng', + 'flores_100_ben-eng', + 'flores_100_guj-eng', + 'flores_100_hin-eng', + 'flores_100_mar-eng', + 'flores_100_npi-eng', + 'flores_100_ory-eng', + 'flores_100_pan-eng', + 'flores_100_snd-eng', + 'flores_100_urd-eng', + ]), + dict(name='flores_100_English_Indo-European-Indo-Aryan', + subsets=[ + 'flores_100_eng-asm', + 'flores_100_eng-ben', + 'flores_100_eng-guj', + 'flores_100_eng-hin', + 'flores_100_eng-mar', + 'flores_100_eng-npi', + 'flores_100_eng-ory', + 'flores_100_eng-pan', + 'flores_100_eng-snd', + 'flores_100_eng-urd', + ]), + dict(name='flores_100_Indo-European-Other_English', + subsets=[ + 'flores_100_ckb-eng', + 'flores_100_cym-eng', + 'flores_100_ell-eng', + 'flores_100_fas-eng', + 'flores_100_gle-eng', + 'flores_100_hye-eng', + 'flores_100_ita-eng', + 'flores_100_lav-eng', + 'flores_100_lit-eng', + 'flores_100_pus-eng', + 'flores_100_tgk-eng', + ]), + dict(name='flores_100_English_Indo-European-Other', + subsets=[ + 'flores_100_eng-ckb', + 'flores_100_eng-cym', + 'flores_100_eng-ell', + 'flores_100_eng-fas', + 'flores_100_eng-gle', + 'flores_100_eng-hye', + 'flores_100_eng-ita', + 'flores_100_eng-lav', + 'flores_100_eng-lit', + 'flores_100_eng-pus', + 'flores_100_eng-tgk', + ]), + dict(name='flores_100_Austronesian_English', + subsets=[ + 'flores_100_ceb-eng', + 'flores_100_ind-eng', + 'flores_100_jav-eng', + 'flores_100_mri-eng', + 'flores_100_msa-eng', + 'flores_100_tgl-eng', + ]), + dict(name='flores_100_English_Austronesian', + subsets=[ + 'flores_100_eng-ceb', + 'flores_100_eng-ind', + 'flores_100_eng-jav', + 'flores_100_eng-mri', + 'flores_100_eng-msa', + 'flores_100_eng-tgl', + ]), + dict(name='flores_100_Atlantic-Congo_English', + subsets=[ + 'flores_100_ibo-eng', + 'flores_100_kam-eng', + 'flores_100_kea-eng', + 'flores_100_lin-eng', + 'flores_100_lug-eng', + 'flores_100_nso-eng', + 'flores_100_nya-eng', + 'flores_100_sna-eng', + 'flores_100_swh-eng', + 'flores_100_umb-eng', + 'flores_100_wol-eng', + 'flores_100_xho-eng', + 'flores_100_yor-eng', + 'flores_100_zul-eng', + ]), + dict(name='flores_100_English_Atlantic-Congo', + subsets=[ + 'flores_100_eng-ibo', + 'flores_100_eng-kam', + 'flores_100_eng-kea', + 'flores_100_eng-lin', + 'flores_100_eng-lug', + 'flores_100_eng-nso', + 'flores_100_eng-nya', + 'flores_100_eng-sna', + 'flores_100_eng-swh', + 'flores_100_eng-umb', + 'flores_100_eng-wol', + 'flores_100_eng-xho', + 'flores_100_eng-yor', + 'flores_100_eng-zul', + ]), + dict(name='flores_100_Afro-Asiatic_English', + subsets=[ + 'flores_100_amh-eng', + 'flores_100_ara-eng', + 'flores_100_ful-eng', + 'flores_100_mlt-eng', + 'flores_100_orm-eng', + 'flores_100_som-eng', + ]), + dict(name='flores_100_English_Afro-Asiatic', + subsets=[ + 'flores_100_eng-amh', + 'flores_100_eng-ara', + 'flores_100_eng-ful', + 'flores_100_eng-mlt', + 'flores_100_eng-orm', + 'flores_100_eng-som', + ]), + dict(name='flores_100_Turkic_English', + subsets=[ + 'flores_100_azj-eng', + 'flores_100_kaz-eng', + 'flores_100_kir-eng', + 'flores_100_tur-eng', + 'flores_100_uzb-eng', + ]), + dict(name='flores_100_English_Turkic', + subsets=[ + 'flores_100_eng-azj', + 'flores_100_eng-kaz', + 'flores_100_eng-kir', + 'flores_100_eng-tur', + 'flores_100_eng-uzb', + ]), + dict(name='flores_100_Dravidian_English', + subsets=[ + 'flores_100_kan-eng', + 'flores_100_mal-eng', + 'flores_100_tam-eng', + 'flores_100_tel-eng', + ]), + dict(name='flores_100_English_Dravidian', + subsets=[ + 'flores_100_eng-kan', + 'flores_100_eng-mal', + 'flores_100_eng-tam', + 'flores_100_eng-tel', + ]), + dict(name='flores_100_Sino-Tibetan_English', + subsets=[ + 'flores_100_mya-eng', + 'flores_100_zho_simpl-eng', + 'flores_100_zho_trad-eng', + ]), + dict(name='flores_100_English_Sino-Tibetan', + subsets=[ + 'flores_100_eng-mya', + 'flores_100_eng-zho_simpl', + 'flores_100_eng-zho_trad', + ]), + dict(name='flores_100_Other_English', + subsets=[ + 'flores_100_est-eng', + 'flores_100_fin-eng', + 'flores_100_hau-eng', + 'flores_100_heb-eng', + 'flores_100_hun-eng', + 'flores_100_jpn-eng', + 'flores_100_kat-eng', + 'flores_100_khm-eng', + 'flores_100_kor-eng', + 'flores_100_lao-eng', + 'flores_100_luo-eng', + 'flores_100_mon-eng', + 'flores_100_tha-eng', + 'flores_100_vie-eng', + ]), + dict(name='flores_100_English_Other', + subsets=[ + 'flores_100_eng-est', + 'flores_100_eng-fin', + 'flores_100_eng-hau', + 'flores_100_eng-heb', + 'flores_100_eng-hun', + 'flores_100_eng-jpn', + 'flores_100_eng-kat', + 'flores_100_eng-khm', + 'flores_100_eng-kor', + 'flores_100_eng-lao', + 'flores_100_eng-luo', + 'flores_100_eng-mon', + 'flores_100_eng-tha', + 'flores_100_eng-vie', + ]), + dict(name='flores_100', + subsets=[ + 'flores_100_afr-eng', + 'flores_100_dan-eng', + 'flores_100_deu-eng', + 'flores_100_isl-eng', + 'flores_100_ltz-eng', + 'flores_100_nld-eng', + 'flores_100_nob-eng', + 'flores_100_swe-eng', + 'flores_100_ast-eng', + 'flores_100_cat-eng', + 'flores_100_fra-eng', + 'flores_100_glg-eng', + 'flores_100_oci-eng', + 'flores_100_por-eng', + 'flores_100_ron-eng', + 'flores_100_spa-eng', + 'flores_100_bel-eng', + 'flores_100_bos-eng', + 'flores_100_bul-eng', + 'flores_100_ces-eng', + 'flores_100_hrv-eng', + 'flores_100_mkd-eng', + 'flores_100_pol-eng', + 'flores_100_rus-eng', + 'flores_100_slk-eng', + 'flores_100_slv-eng', + 'flores_100_srp-eng', + 'flores_100_ukr-eng', + 'flores_100_asm-eng', + 'flores_100_ben-eng', + 'flores_100_guj-eng', + 'flores_100_hin-eng', + 'flores_100_mar-eng', + 'flores_100_npi-eng', + 'flores_100_ory-eng', + 'flores_100_pan-eng', + 'flores_100_snd-eng', + 'flores_100_urd-eng', + 'flores_100_ckb-eng', + 'flores_100_cym-eng', + 'flores_100_ell-eng', + 'flores_100_fas-eng', + 'flores_100_gle-eng', + 'flores_100_hye-eng', + 'flores_100_ita-eng', + 'flores_100_lav-eng', + 'flores_100_lit-eng', + 'flores_100_pus-eng', + 'flores_100_tgk-eng', + 'flores_100_ceb-eng', + 'flores_100_ind-eng', + 'flores_100_jav-eng', + 'flores_100_mri-eng', + 'flores_100_msa-eng', + 'flores_100_tgl-eng', + 'flores_100_ibo-eng', + 'flores_100_kam-eng', + 'flores_100_kea-eng', + 'flores_100_lin-eng', + 'flores_100_lug-eng', + 'flores_100_nso-eng', + 'flores_100_nya-eng', + 'flores_100_sna-eng', + 'flores_100_swh-eng', + 'flores_100_umb-eng', + 'flores_100_wol-eng', + 'flores_100_xho-eng', + 'flores_100_yor-eng', + 'flores_100_zul-eng', + 'flores_100_amh-eng', + 'flores_100_ara-eng', + 'flores_100_ful-eng', + 'flores_100_mlt-eng', + 'flores_100_orm-eng', + 'flores_100_som-eng', + 'flores_100_azj-eng', + 'flores_100_kaz-eng', + 'flores_100_kir-eng', + 'flores_100_tur-eng', + 'flores_100_uzb-eng', + 'flores_100_kan-eng', + 'flores_100_mal-eng', + 'flores_100_tam-eng', + 'flores_100_tel-eng', + 'flores_100_mya-eng', + 'flores_100_zho_simpl-eng', + 'flores_100_zho_trad-eng', + 'flores_100_est-eng', + 'flores_100_fin-eng', + 'flores_100_hau-eng', + 'flores_100_heb-eng', + 'flores_100_hun-eng', + 'flores_100_jpn-eng', + 'flores_100_kat-eng', + 'flores_100_khm-eng', + 'flores_100_kor-eng', + 'flores_100_lao-eng', + 'flores_100_luo-eng', + 'flores_100_mon-eng', + 'flores_100_tha-eng', + 'flores_100_vie-eng', + 'flores_100_eng-afr', + 'flores_100_eng-dan', + 'flores_100_eng-deu', + 'flores_100_eng-isl', + 'flores_100_eng-ltz', + 'flores_100_eng-nld', + 'flores_100_eng-nob', + 'flores_100_eng-swe', + 'flores_100_eng-ast', + 'flores_100_eng-cat', + 'flores_100_eng-fra', + 'flores_100_eng-glg', + 'flores_100_eng-oci', + 'flores_100_eng-por', + 'flores_100_eng-ron', + 'flores_100_eng-spa', + 'flores_100_eng-bel', + 'flores_100_eng-bos', + 'flores_100_eng-bul', + 'flores_100_eng-ces', + 'flores_100_eng-hrv', + 'flores_100_eng-mkd', + 'flores_100_eng-pol', + 'flores_100_eng-rus', + 'flores_100_eng-slk', + 'flores_100_eng-slv', + 'flores_100_eng-srp', + 'flores_100_eng-ukr', + 'flores_100_eng-asm', + 'flores_100_eng-ben', + 'flores_100_eng-guj', + 'flores_100_eng-hin', + 'flores_100_eng-mar', + 'flores_100_eng-npi', + 'flores_100_eng-ory', + 'flores_100_eng-pan', + 'flores_100_eng-snd', + 'flores_100_eng-urd', + 'flores_100_eng-ckb', + 'flores_100_eng-cym', + 'flores_100_eng-ell', + 'flores_100_eng-fas', + 'flores_100_eng-gle', + 'flores_100_eng-hye', + 'flores_100_eng-ita', + 'flores_100_eng-lav', + 'flores_100_eng-lit', + 'flores_100_eng-pus', + 'flores_100_eng-tgk', + 'flores_100_eng-ceb', + 'flores_100_eng-ind', + 'flores_100_eng-jav', + 'flores_100_eng-mri', + 'flores_100_eng-msa', + 'flores_100_eng-tgl', + 'flores_100_eng-ibo', + 'flores_100_eng-kam', + 'flores_100_eng-kea', + 'flores_100_eng-lin', + 'flores_100_eng-lug', + 'flores_100_eng-nso', + 'flores_100_eng-nya', + 'flores_100_eng-sna', + 'flores_100_eng-swh', + 'flores_100_eng-umb', + 'flores_100_eng-wol', + 'flores_100_eng-xho', + 'flores_100_eng-yor', + 'flores_100_eng-zul', + 'flores_100_eng-amh', + 'flores_100_eng-ara', + 'flores_100_eng-ful', + 'flores_100_eng-mlt', + 'flores_100_eng-orm', + 'flores_100_eng-som', + 'flores_100_eng-azj', + 'flores_100_eng-kaz', + 'flores_100_eng-kir', + 'flores_100_eng-tur', + 'flores_100_eng-uzb', + 'flores_100_eng-kan', + 'flores_100_eng-mal', + 'flores_100_eng-tam', + 'flores_100_eng-tel', + 'flores_100_eng-mya', + 'flores_100_eng-zho_simpl', + 'flores_100_eng-zho_trad', + 'flores_100_eng-est', + 'flores_100_eng-fin', + 'flores_100_eng-hau', + 'flores_100_eng-heb', + 'flores_100_eng-hun', + 'flores_100_eng-jpn', + 'flores_100_eng-kat', + 'flores_100_eng-khm', + 'flores_100_eng-kor', + 'flores_100_eng-lao', + 'flores_100_eng-luo', + 'flores_100_eng-mon', + 'flores_100_eng-tha', + 'flores_100_eng-vie', + ]), + dict(name='jigsaw_multilingual', + subsets=[ + 'jigsaw_multilingual_es', + 'jigsaw_multilingual_fr', + 'jigsaw_multilingual_it', + 'jigsaw_multilingual_pt', + 'jigsaw_multilingual_ru', + 'jigsaw_multilingual_tr', + ]), + ]) +work_dir='outputs/demo/20251027_162236' \ No newline at end of file diff --git a/logs/eval/public/jina-reranker-m0@main/lambada.out b/logs/eval/public/jina-reranker-m0@main/lambada.out new file mode 100644 index 0000000..0b5c4aa --- /dev/null +++ b/logs/eval/public/jina-reranker-m0@main/lambada.out @@ -0,0 +1,9 @@ +[4pdvGPU Msg(853:139765910674432:libvgpu.c:873)]: Initializing..... +[4pdvGPU Msg(853:139765910674432:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated +[4pdvGPU Msg(853:139765910674432:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated +[4pdvGPU ERROR (pid:853 thread=139765910674432 libvgpu.c:924)]: cuInit failed:100 +10/27 16:22:52 - OpenCompass - ERROR - /models/opencompass/opencompass/tasks/openicl_eval.py - _score - 163 - Task [public/jina-reranker-m0@main/lambada]: No predictions found. +10/27 16:22:52 - OpenCompass - INFO - time elapsed: 2.08s +/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning + warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning') +[4pdvGPU Msg(853:139765910674432:multiprocess_memory_limit.c:543)]: Calling exit handler 853 diff --git a/logs/infer/public/jina-reranker-m0@main/lambada_0.out b/logs/infer/public/jina-reranker-m0@main/lambada_0.out new file mode 100644 index 0000000..e029f13 --- /dev/null +++ b/logs/infer/public/jina-reranker-m0@main/lambada_0.out @@ -0,0 +1,35 @@ +[4pdvGPU Msg(274:139803958045696:libvgpu.c:873)]: Initializing..... +[4pdvGPU Msg(274:139803958045696:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated +[4pdvGPU Msg(274:139803958045696:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated +[4pdvGPU ERROR (pid:274 thread=139803958045696 libvgpu.c:924)]: cuInit failed:100 +10/27 16:22:43 - OpenCompass - INFO - Task [public/jina-reranker-m0@main/lambada_0] +10/27 16:22:46 - OpenCompass - INFO - Start inferencing [public/jina-reranker-m0@main/lambada_0] +/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning + warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning') +[2025-10-27 16:22:46,260] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process... + 0%| | 0/1718 [00:00 + inferencer.run() + File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run + self._inference() + File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference + inferencer.inference(retriever, + File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference + results = self.model.generate_from_template( + File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template + return self.generate(inputs, max_out_len=max_out_len, **kwargs) + File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate + results = list( + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator + yield fs.pop().result() + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result + return self.__get_result() + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result + raise self._exception + File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run + result = self.fn(*self.args, **self.kwargs) + File "/models/opencompass/opencompass/models/openai_api.py", line 235, in _generate + return response['choices'][0]['message']['content'].strip() +TypeError: 'NoneType' object is not subscriptable +[4pdvGPU Msg(274:139803958045696:multiprocess_memory_limit.c:543)]: Calling exit handler 274 diff --git a/logs/infer/public/jina-reranker-m0@main/lambada_1.out b/logs/infer/public/jina-reranker-m0@main/lambada_1.out new file mode 100644 index 0000000..91ca252 --- /dev/null +++ b/logs/infer/public/jina-reranker-m0@main/lambada_1.out @@ -0,0 +1,35 @@ +[4pdvGPU Msg(277:140097609071616:libvgpu.c:873)]: Initializing..... +[4pdvGPU Msg(277:140097609071616:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated +[4pdvGPU Msg(277:140097609071616:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated +[4pdvGPU ERROR (pid:277 thread=140097609071616 libvgpu.c:924)]: cuInit failed:100 +10/27 16:22:43 - OpenCompass - INFO - Task [public/jina-reranker-m0@main/lambada_1] +10/27 16:22:46 - OpenCompass - INFO - Start inferencing [public/jina-reranker-m0@main/lambada_1] +/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning + warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning') +[2025-10-27 16:22:46,211] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process... + 0%| | 0/1718 [00:00 + inferencer.run() + File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run + self._inference() + File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference + inferencer.inference(retriever, + File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference + results = self.model.generate_from_template( + File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template + return self.generate(inputs, max_out_len=max_out_len, **kwargs) + File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate + results = list( + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator + yield fs.pop().result() + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result + return self.__get_result() + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result + raise self._exception + File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run + result = self.fn(*self.args, **self.kwargs) + File "/models/opencompass/opencompass/models/openai_api.py", line 235, in _generate + return response['choices'][0]['message']['content'].strip() +TypeError: 'NoneType' object is not subscriptable +[4pdvGPU Msg(277:140097609071616:multiprocess_memory_limit.c:543)]: Calling exit handler 277 diff --git a/logs/infer/public/jina-reranker-m0@main/lambada_2.out b/logs/infer/public/jina-reranker-m0@main/lambada_2.out new file mode 100644 index 0000000..36214f4 --- /dev/null +++ b/logs/infer/public/jina-reranker-m0@main/lambada_2.out @@ -0,0 +1,35 @@ +[4pdvGPU Msg(282:140164578561024:libvgpu.c:873)]: Initializing..... +[4pdvGPU Msg(282:140164578561024:multiprocess_memory_limit.c:144)]: uuid GPU-96268743-978a-9d8b-c966-e0c72728f828 validated +[4pdvGPU Msg(282:140164578561024:multiprocess_memory_limit.c:144)]: uuid GPU-d7cd3537-3d95-c1ab-84e6-c51d5fdef981 validated +[4pdvGPU ERROR (pid:282 thread=140164578561024 libvgpu.c:924)]: cuInit failed:100 +10/27 16:22:43 - OpenCompass - INFO - Task [public/jina-reranker-m0@main/lambada_2] +10/27 16:22:46 - OpenCompass - INFO - Start inferencing [public/jina-reranker-m0@main/lambada_2] +/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning + warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning') +[2025-10-27 16:22:46,281] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process... + 0%| | 0/1717 [00:00 + inferencer.run() + File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run + self._inference() + File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference + inferencer.inference(retriever, + File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference + results = self.model.generate_from_template( + File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template + return self.generate(inputs, max_out_len=max_out_len, **kwargs) + File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate + results = list( + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator + yield fs.pop().result() + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result + return self.__get_result() + File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result + raise self._exception + File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run + result = self.fn(*self.args, **self.kwargs) + File "/models/opencompass/opencompass/models/openai_api.py", line 235, in _generate + return response['choices'][0]['message']['content'].strip() +TypeError: 'NoneType' object is not subscriptable +[4pdvGPU Msg(282:140164578561024:multiprocess_memory_limit.c:543)]: Calling exit handler 282 diff --git a/summary/summary_20251027_162236.csv b/summary/summary_20251027_162236.csv new file mode 100644 index 0000000..ce03534 --- /dev/null +++ b/summary/summary_20251027_162236.csv @@ -0,0 +1,87 @@ +dataset,version,metric,mode,public/jina-reranker-m0@main +--------- 考试 Exam ---------,-,-,-,- +ceval,-,-,-,- +agieval,-,-,-,- +mmlu,-,-,-,- +GaokaoBench,-,-,-,- +ARC-c,-,-,-,- +--------- 语言 Language ---------,-,-,-,- +WiC,-,-,-,- +summedits,-,-,-,- +chid-dev,-,-,-,- +afqmc-dev,-,-,-,- +bustm-dev,-,-,-,- +cluewsc-dev,-,-,-,- +WSC,-,-,-,- +winogrande,-,-,-,- +flores_100,-,-,-,- +--------- 知识 Knowledge ---------,-,-,-,- +BoolQ,-,-,-,- +commonsense_qa,-,-,-,- +nq,-,-,-,- +triviaqa,-,-,-,- +--------- 推理 Reasoning ---------,-,-,-,- +cmnli,-,-,-,- +ocnli,-,-,-,- +ocnli_fc-dev,-,-,-,- +AX_b,-,-,-,- +AX_g,-,-,-,- +CB,-,-,-,- +RTE,-,-,-,- +story_cloze,-,-,-,- +COPA,-,-,-,- +ReCoRD,-,-,-,- +hellaswag,-,-,-,- +piqa,-,-,-,- +siqa,-,-,-,- +strategyqa,-,-,-,- +math,-,-,-,- +gsm8k,-,-,-,- +TheoremQA,-,-,-,- +openai_humaneval,-,-,-,- +mbpp,-,-,-,- +cmmlu,-,-,-,- +bbh,-,-,-,- +--------- 理解 Understanding ---------,-,-,-,- +C3,-,-,-,- +CMRC_dev,-,-,-,- +DRCD_dev,-,-,-,- +MultiRC,-,-,-,- +race-middle,-,-,-,- +race-high,-,-,-,- +openbookqa_fact,-,-,-,- +csl_dev,-,-,-,- +lcsts,-,-,-,- +Xsum,-,-,-,- +eprstmt-dev,-,-,-,- +lambada,-,-,-,- +tnews-dev,-,-,-,- +--------- 安全 Safety ---------,-,-,-,- +crows_pairs,-,-,-,- +--------- LEval Exact Match (Acc) ---------,-,-,-,- +LEval_coursera,-,-,-,- +LEval_gsm100,-,-,-,- +LEval_quality,-,-,-,- +LEval_tpo,-,-,-,- +LEval_topic_retrieval,-,-,-,- +--------- LEval Gen (ROUGE) ---------,-,-,-,- +LEval_financialqa,-,-,-,- +LEval_gov_report_summ,-,-,-,- +LEval_legal_contract_qa,-,-,-,- +LEval_meeting_summ,-,-,-,- +LEval_multidocqa,-,-,-,- +LEval_narrativeqa,-,-,-,- +LEval_nq,-,-,-,- +LEval_news_summ,-,-,-,- +LEval_paper_assistant,-,-,-,- +LEval_patent_summ,-,-,-,- +LEval_review_summ,-,-,-,- +LEval_scientificqa,-,-,-,- +LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,- +longbench_lsht,-,-,-,- +longbench_vcsum,-,-,-,- +longbench_dureader,-,-,-,- +longbench_multifieldqa_zh,-,-,-,- +longbench_passage_retrieval_zh,-,-,-,- +--------- 单选 自定义数据 ---------,-,-,-,- +SageBench-exam,-,-,-,- diff --git a/summary/summary_20251027_162236.txt b/summary/summary_20251027_162236.txt new file mode 100644 index 0000000..70f997f --- /dev/null +++ b/summary/summary_20251027_162236.txt @@ -0,0 +1,193 @@ +20251027_162236 +tabulate format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataset version metric mode public/jina-reranker-m0@main +----------------------------------------------------- --------- -------- ------ ------------------------------ +--------- 考试 Exam --------- - - - - +ceval - - - - +agieval - - - - +mmlu - - - - +GaokaoBench - - - - +ARC-c - - - - +--------- 语言 Language --------- - - - - +WiC - - - - +summedits - - - - +chid-dev - - - - +afqmc-dev - - - - +bustm-dev - - - - +cluewsc-dev - - - - +WSC - - - - +winogrande - - - - +flores_100 - - - - +--------- 知识 Knowledge --------- - - - - +BoolQ - - - - +commonsense_qa - - - - +nq - - - - +triviaqa - - - - +--------- 推理 Reasoning --------- - - - - +cmnli - - - - +ocnli - - - - +ocnli_fc-dev - - - - +AX_b - - - - +AX_g - - - - +CB - - - - +RTE - - - - +story_cloze - - - - +COPA - - - - +ReCoRD - - - - +hellaswag - - - - +piqa - - - - +siqa - - - - +strategyqa - - - - +math - - - - +gsm8k - - - - +TheoremQA - - - - +openai_humaneval - - - - +mbpp - - - - +cmmlu - - - - +bbh - - - - +--------- 理解 Understanding --------- - - - - +C3 - - - - +CMRC_dev - - - - +DRCD_dev - - - - +MultiRC - - - - +race-middle - - - - +race-high - - - - +openbookqa_fact - - - - +csl_dev - - - - +lcsts - - - - +Xsum - - - - +eprstmt-dev - - - - +lambada - - - - +tnews-dev - - - - +--------- 安全 Safety --------- - - - - +crows_pairs - - - - +--------- LEval Exact Match (Acc) --------- - - - - +LEval_coursera - - - - +LEval_gsm100 - - - - +LEval_quality - - - - +LEval_tpo - - - - +LEval_topic_retrieval - - - - +--------- LEval Gen (ROUGE) --------- - - - - +LEval_financialqa - - - - +LEval_gov_report_summ - - - - +LEval_legal_contract_qa - - - - +LEval_meeting_summ - - - - +LEval_multidocqa - - - - +LEval_narrativeqa - - - - +LEval_nq - - - - +LEval_news_summ - - - - +LEval_paper_assistant - - - - +LEval_patent_summ - - - - +LEval_review_summ - - - - +LEval_scientificqa - - - - +LEval_tvshow_summ--------- 长文本 LongBench --------- - - - - +longbench_lsht - - - - +longbench_vcsum - - - - +longbench_dureader - - - - +longbench_multifieldqa_zh - - - - +longbench_passage_retrieval_zh - - - - +--------- 单选 自定义数据 --------- - - - - +SageBench-exam - - - - +$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ + +-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER -------------------------------------------------------------------------------------------------------------------------------- + +csv format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +dataset,version,metric,mode,public/jina-reranker-m0@main +--------- 考试 Exam ---------,-,-,-,- +ceval,-,-,-,- +agieval,-,-,-,- +mmlu,-,-,-,- +GaokaoBench,-,-,-,- +ARC-c,-,-,-,- +--------- 语言 Language ---------,-,-,-,- +WiC,-,-,-,- +summedits,-,-,-,- +chid-dev,-,-,-,- +afqmc-dev,-,-,-,- +bustm-dev,-,-,-,- +cluewsc-dev,-,-,-,- +WSC,-,-,-,- +winogrande,-,-,-,- +flores_100,-,-,-,- +--------- 知识 Knowledge ---------,-,-,-,- +BoolQ,-,-,-,- +commonsense_qa,-,-,-,- +nq,-,-,-,- +triviaqa,-,-,-,- +--------- 推理 Reasoning ---------,-,-,-,- +cmnli,-,-,-,- +ocnli,-,-,-,- +ocnli_fc-dev,-,-,-,- +AX_b,-,-,-,- +AX_g,-,-,-,- +CB,-,-,-,- +RTE,-,-,-,- +story_cloze,-,-,-,- +COPA,-,-,-,- +ReCoRD,-,-,-,- +hellaswag,-,-,-,- +piqa,-,-,-,- +siqa,-,-,-,- +strategyqa,-,-,-,- +math,-,-,-,- +gsm8k,-,-,-,- +TheoremQA,-,-,-,- +openai_humaneval,-,-,-,- +mbpp,-,-,-,- +cmmlu,-,-,-,- +bbh,-,-,-,- +--------- 理解 Understanding ---------,-,-,-,- +C3,-,-,-,- +CMRC_dev,-,-,-,- +DRCD_dev,-,-,-,- +MultiRC,-,-,-,- +race-middle,-,-,-,- +race-high,-,-,-,- +openbookqa_fact,-,-,-,- +csl_dev,-,-,-,- +lcsts,-,-,-,- +Xsum,-,-,-,- +eprstmt-dev,-,-,-,- +lambada,-,-,-,- +tnews-dev,-,-,-,- +--------- 安全 Safety ---------,-,-,-,- +crows_pairs,-,-,-,- +--------- LEval Exact Match (Acc) ---------,-,-,-,- +LEval_coursera,-,-,-,- +LEval_gsm100,-,-,-,- +LEval_quality,-,-,-,- +LEval_tpo,-,-,-,- +LEval_topic_retrieval,-,-,-,- +--------- LEval Gen (ROUGE) ---------,-,-,-,- +LEval_financialqa,-,-,-,- +LEval_gov_report_summ,-,-,-,- +LEval_legal_contract_qa,-,-,-,- +LEval_meeting_summ,-,-,-,- +LEval_multidocqa,-,-,-,- +LEval_narrativeqa,-,-,-,- +LEval_nq,-,-,-,- +LEval_news_summ,-,-,-,- +LEval_paper_assistant,-,-,-,- +LEval_patent_summ,-,-,-,- +LEval_review_summ,-,-,-,- +LEval_scientificqa,-,-,-,- +LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,- +longbench_lsht,-,-,-,- +longbench_vcsum,-,-,-,- +longbench_dureader,-,-,-,- +longbench_multifieldqa_zh,-,-,-,- +longbench_passage_retrieval_zh,-,-,-,- +--------- 单选 自定义数据 ---------,-,-,-,- +SageBench-exam,-,-,-,- +$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ + +-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER -------------------------------------------------------------------------------------------------------------------------------- + +raw format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +------------------------------- +Model: public/jina-reranker-m0@main +$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$