dataset-opencompass/configs/summarizers/medium_4pd.py

129 lines
3.7 KiB
Python
Raw Normal View History

2025-07-18 07:25:44 +00:00
from mmengine.config import read_base
with read_base():
from .groups.agieval import agieval_summary_groups
from .groups.mmlu import mmlu_summary_groups
from .groups.cmmlu import cmmlu_summary_groups
from .groups.ceval import ceval_summary_groups
from .groups.bbh import bbh_summary_groups
from .groups.GaokaoBench import GaokaoBench_summary_groups
from .groups.flores import flores_summary_groups
from .groups.jigsaw_multilingual import jigsaw_multilingual_summary_groups
summarizer = dict(
dataset_abbrs=[
"--------- 考试 Exam ---------", # category
# 'Mixed', # subcategory
"ceval",
"agieval",
"mmlu",
"GaokaoBench",
"ARC-c",
"--------- 语言 Language ---------", # category
# '字词释义', # subcategory
"WiC",
"summedits",
# '成语习语', # subcategory
"chid-dev",
# '语义相似度', # subcategord y
"afqmc-dev",
"bustm-dev",
# '指代消解', # subcategory
"cluewsc-dev",
"WSC",
"winogrande",
# '翻译', # subcategory
"flores_100",
"--------- 知识 Knowledge ---------", # category
# '知识问答', # subcategory
"BoolQ",
"commonsense_qa",
"nq",
"triviaqa",
# '多语种问答', # subcategory
"--------- 推理 Reasoning ---------", # category
# '文本蕴含', # subcategory
"cmnli",
"ocnli",
"ocnli_fc-dev",
"AX_b",
"AX_g",
"CB",
"RTE",
# '常识推理', # subcategory
"story_cloze",
"COPA",
"ReCoRD",
"hellaswag",
"piqa",
"siqa",
"strategyqa",
# '数学推理', # subcategory
"math",
"gsm8k",
# '定理应用', # subcategory
"TheoremQA",
# '代码', # subcategory
"openai_humaneval",
"mbpp",
# '综合推理', # subcategory
"cmmlu",
"bbh",
"--------- 理解 Understanding ---------", # category
# '阅读理解', # subcategory
"C3",
"CMRC_dev",
"DRCD_dev",
"MultiRC",
"race-middle",
"race-high",
"openbookqa_fact",
# '内容总结', # subcategory
"csl_dev",
"lcsts",
"Xsum",
# '内容分析', # subcategory
"eprstmt-dev",
"lambada",
"tnews-dev",
"--------- 安全 Safety ---------", # category
# '偏见', # subcategory
"crows_pairs",
"--------- LEval Exact Match (Acc) ---------", # category
"LEval_coursera",
"LEval_gsm100",
"LEval_quality",
"LEval_tpo",
"LEval_topic_retrieval",
"--------- LEval Gen (ROUGE) ---------", # category
"LEval_financialqa",
"LEval_gov_report_summ",
"LEval_legal_contract_qa",
"LEval_meeting_summ",
"LEval_multidocqa",
"LEval_narrativeqa",
"LEval_nq",
"LEval_news_summ",
"LEval_paper_assistant",
"LEval_patent_summ",
"LEval_review_summ",
"LEval_scientificqa",
"LEval_tvshow_summ" "--------- 长文本 LongBench ---------", # category
"longbench_lsht",
"longbench_vcsum",
"longbench_dureader",
"longbench_multifieldqa_zh",
"longbench_passage_retrieval_zh",
"--------- 单选 自定义数据 ---------", # category
"SageBench-exam",
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
),
prompt_db=dict(
database_path="configs/datasets/log.json",
config_dir="configs/datasets",
blacklist=".promptignore",
),
)