129 lines
3.7 KiB
Python
129 lines
3.7 KiB
Python
|
|
from mmengine.config import read_base
|
||
|
|
|
||
|
|
with read_base():
|
||
|
|
from .groups.agieval import agieval_summary_groups
|
||
|
|
from .groups.mmlu import mmlu_summary_groups
|
||
|
|
from .groups.cmmlu import cmmlu_summary_groups
|
||
|
|
from .groups.ceval import ceval_summary_groups
|
||
|
|
from .groups.bbh import bbh_summary_groups
|
||
|
|
from .groups.GaokaoBench import GaokaoBench_summary_groups
|
||
|
|
from .groups.flores import flores_summary_groups
|
||
|
|
from .groups.jigsaw_multilingual import jigsaw_multilingual_summary_groups
|
||
|
|
|
||
|
|
summarizer = dict(
|
||
|
|
dataset_abbrs=[
|
||
|
|
"--------- 考试 Exam ---------", # category
|
||
|
|
# 'Mixed', # subcategory
|
||
|
|
"ceval",
|
||
|
|
"agieval",
|
||
|
|
"mmlu",
|
||
|
|
"GaokaoBench",
|
||
|
|
"ARC-c",
|
||
|
|
"--------- 语言 Language ---------", # category
|
||
|
|
# '字词释义', # subcategory
|
||
|
|
"WiC",
|
||
|
|
"summedits",
|
||
|
|
# '成语习语', # subcategory
|
||
|
|
"chid-dev",
|
||
|
|
# '语义相似度', # subcategord y
|
||
|
|
"afqmc-dev",
|
||
|
|
"bustm-dev",
|
||
|
|
# '指代消解', # subcategory
|
||
|
|
"cluewsc-dev",
|
||
|
|
"WSC",
|
||
|
|
"winogrande",
|
||
|
|
# '翻译', # subcategory
|
||
|
|
"flores_100",
|
||
|
|
"--------- 知识 Knowledge ---------", # category
|
||
|
|
# '知识问答', # subcategory
|
||
|
|
"BoolQ",
|
||
|
|
"commonsense_qa",
|
||
|
|
"nq",
|
||
|
|
"triviaqa",
|
||
|
|
# '多语种问答', # subcategory
|
||
|
|
"--------- 推理 Reasoning ---------", # category
|
||
|
|
# '文本蕴含', # subcategory
|
||
|
|
"cmnli",
|
||
|
|
"ocnli",
|
||
|
|
"ocnli_fc-dev",
|
||
|
|
"AX_b",
|
||
|
|
"AX_g",
|
||
|
|
"CB",
|
||
|
|
"RTE",
|
||
|
|
# '常识推理', # subcategory
|
||
|
|
"story_cloze",
|
||
|
|
"COPA",
|
||
|
|
"ReCoRD",
|
||
|
|
"hellaswag",
|
||
|
|
"piqa",
|
||
|
|
"siqa",
|
||
|
|
"strategyqa",
|
||
|
|
# '数学推理', # subcategory
|
||
|
|
"math",
|
||
|
|
"gsm8k",
|
||
|
|
# '定理应用', # subcategory
|
||
|
|
"TheoremQA",
|
||
|
|
# '代码', # subcategory
|
||
|
|
"openai_humaneval",
|
||
|
|
"mbpp",
|
||
|
|
# '综合推理', # subcategory
|
||
|
|
"cmmlu",
|
||
|
|
"bbh",
|
||
|
|
"--------- 理解 Understanding ---------", # category
|
||
|
|
# '阅读理解', # subcategory
|
||
|
|
"C3",
|
||
|
|
"CMRC_dev",
|
||
|
|
"DRCD_dev",
|
||
|
|
"MultiRC",
|
||
|
|
"race-middle",
|
||
|
|
"race-high",
|
||
|
|
"openbookqa_fact",
|
||
|
|
# '内容总结', # subcategory
|
||
|
|
"csl_dev",
|
||
|
|
"lcsts",
|
||
|
|
"Xsum",
|
||
|
|
# '内容分析', # subcategory
|
||
|
|
"eprstmt-dev",
|
||
|
|
"lambada",
|
||
|
|
"tnews-dev",
|
||
|
|
"--------- 安全 Safety ---------", # category
|
||
|
|
# '偏见', # subcategory
|
||
|
|
"crows_pairs",
|
||
|
|
"--------- LEval Exact Match (Acc) ---------", # category
|
||
|
|
"LEval_coursera",
|
||
|
|
"LEval_gsm100",
|
||
|
|
"LEval_quality",
|
||
|
|
"LEval_tpo",
|
||
|
|
"LEval_topic_retrieval",
|
||
|
|
"--------- LEval Gen (ROUGE) ---------", # category
|
||
|
|
"LEval_financialqa",
|
||
|
|
"LEval_gov_report_summ",
|
||
|
|
"LEval_legal_contract_qa",
|
||
|
|
"LEval_meeting_summ",
|
||
|
|
"LEval_multidocqa",
|
||
|
|
"LEval_narrativeqa",
|
||
|
|
"LEval_nq",
|
||
|
|
"LEval_news_summ",
|
||
|
|
"LEval_paper_assistant",
|
||
|
|
"LEval_patent_summ",
|
||
|
|
"LEval_review_summ",
|
||
|
|
"LEval_scientificqa",
|
||
|
|
"LEval_tvshow_summ" "--------- 长文本 LongBench ---------", # category
|
||
|
|
"longbench_lsht",
|
||
|
|
"longbench_vcsum",
|
||
|
|
"longbench_dureader",
|
||
|
|
"longbench_multifieldqa_zh",
|
||
|
|
"longbench_passage_retrieval_zh",
|
||
|
|
"--------- 单选 自定义数据 ---------", # category
|
||
|
|
"SageBench-exam",
|
||
|
|
],
|
||
|
|
summary_groups=sum(
|
||
|
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
|
||
|
|
),
|
||
|
|
prompt_db=dict(
|
||
|
|
database_path="configs/datasets/log.json",
|
||
|
|
config_dir="configs/datasets",
|
||
|
|
blacklist=".promptignore",
|
||
|
|
),
|
||
|
|
)
|