from mmengine.config import read_base with read_base(): from .groups.agieval import agieval_summary_groups from .groups.mmlu import mmlu_summary_groups from .groups.cmmlu import cmmlu_summary_groups from .groups.ceval import ceval_summary_groups from .groups.bbh import bbh_summary_groups from .groups.GaokaoBench import GaokaoBench_summary_groups from .groups.flores import flores_summary_groups from .groups.jigsaw_multilingual import jigsaw_multilingual_summary_groups summarizer = dict( dataset_abbrs=[ "--------- 考试 Exam ---------", # category # 'Mixed', # subcategory "ceval", "agieval", "mmlu", "GaokaoBench", "ARC-c", "--------- 语言 Language ---------", # category # '字词释义', # subcategory "WiC", "summedits", # '成语习语', # subcategory "chid-dev", # '语义相似度', # subcategord y "afqmc-dev", "bustm-dev", # '指代消解', # subcategory "cluewsc-dev", "WSC", "winogrande", # '翻译', # subcategory "flores_100", "--------- 知识 Knowledge ---------", # category # '知识问答', # subcategory "BoolQ", "commonsense_qa", "nq", "triviaqa", # '多语种问答', # subcategory "--------- 推理 Reasoning ---------", # category # '文本蕴含', # subcategory "cmnli", "ocnli", "ocnli_fc-dev", "AX_b", "AX_g", "CB", "RTE", # '常识推理', # subcategory "story_cloze", "COPA", "ReCoRD", "hellaswag", "piqa", "siqa", "strategyqa", # '数学推理', # subcategory "math", "gsm8k", # '定理应用', # subcategory "TheoremQA", # '代码', # subcategory "openai_humaneval", "mbpp", # '综合推理', # subcategory "cmmlu", "bbh", "--------- 理解 Understanding ---------", # category # '阅读理解', # subcategory "C3", "CMRC_dev", "DRCD_dev", "MultiRC", "race-middle", "race-high", "openbookqa_fact", # '内容总结', # subcategory "csl_dev", "lcsts", "Xsum", # '内容分析', # subcategory "eprstmt-dev", "lambada", "tnews-dev", "--------- 安全 Safety ---------", # category # '偏见', # subcategory "crows_pairs", "--------- LEval Exact Match (Acc) ---------", # category "LEval_coursera", "LEval_gsm100", "LEval_quality", "LEval_tpo", "LEval_topic_retrieval", "--------- LEval Gen (ROUGE) ---------", # category "LEval_financialqa", "LEval_gov_report_summ", "LEval_legal_contract_qa", "LEval_meeting_summ", "LEval_multidocqa", "LEval_narrativeqa", "LEval_nq", "LEval_news_summ", "LEval_paper_assistant", "LEval_patent_summ", "LEval_review_summ", "LEval_scientificqa", "LEval_tvshow_summ" "--------- 长文本 LongBench ---------", # category "longbench_lsht", "longbench_vcsum", "longbench_dureader", "longbench_multifieldqa_zh", "longbench_passage_retrieval_zh", "--------- 单选 自定义数据 ---------", # category "SageBench-exam", ], summary_groups=sum( [v for k, v in locals().items() if k.endswith("_summary_groups")], [] ), prompt_db=dict( database_path="configs/datasets/log.json", config_dir="configs/datasets", blacklist=".promptignore", ), )