Compare commits
11 Commits
b63bd8f2c3
...
1c2a60f62b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1c2a60f62b | ||
|
|
1ee9b1e998 | ||
|
|
a75ecfa256 | ||
|
|
57480e8ca4 | ||
|
|
12dcb94b91 | ||
|
|
85bb88450f | ||
|
|
81bba7c21f | ||
|
|
611fa3fecc | ||
|
|
a724222593 | ||
|
|
67b6c11cd4 | ||
|
|
ca5a85e1ff |
27
README.md
Normal file
27
README.md
Normal file
@ -0,0 +1,27 @@
|
||||
---
|
||||
license: cc-by-nc-sa-4.0
|
||||
tags:
|
||||
- Alibaba
|
||||
- arxiv:2305.08322
|
||||
|
||||
task_categories:
|
||||
- text-classification
|
||||
- multiple-choice
|
||||
- question-answering
|
||||
language:
|
||||
- zh
|
||||
pretty_name: C-Eval
|
||||
size_categories:
|
||||
- 10K<n<100K
|
||||
|
||||
|
||||
---
|
||||
数据集文件元信息以及数据文件,请浏览“数据集文件”页面获取。
|
||||
|
||||
当前数据集卡片使用的是默认模版,数据集的贡献者未提供更加详细的数据集介绍,但是您可以通过如下GIT Clone命令,或者ModelScope SDK来下载数据集
|
||||
|
||||
#### 下载方法
|
||||
:modelscope-code[]{type="sdk"}
|
||||
:modelscope-code[]{type="git"}
|
||||
|
||||
|
||||
157
ceval-exam.py
Normal file
157
ceval-exam.py
Normal file
@ -0,0 +1,157 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
|
||||
|
||||
# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License
|
||||
import os
|
||||
|
||||
import datasets
|
||||
import pandas as pd
|
||||
|
||||
|
||||
_CITATION = """\
|
||||
@article{huang2023ceval,
|
||||
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
|
||||
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
|
||||
journal={arXiv preprint arXiv:2305.08322},
|
||||
year={2023}
|
||||
}
|
||||
"""
|
||||
|
||||
_DESCRIPTION = """\
|
||||
C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
|
||||
"""
|
||||
|
||||
_HOMEPAGE = "https://cevalbenchmark.com"
|
||||
|
||||
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
|
||||
|
||||
_URL = r"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip"
|
||||
|
||||
task_list = [
|
||||
"computer_network",
|
||||
"operating_system",
|
||||
"computer_architecture",
|
||||
"college_programming",
|
||||
"college_physics",
|
||||
"college_chemistry",
|
||||
"advanced_mathematics",
|
||||
"probability_and_statistics",
|
||||
"discrete_mathematics",
|
||||
"electrical_engineer",
|
||||
"metrology_engineer",
|
||||
"high_school_mathematics",
|
||||
"high_school_physics",
|
||||
"high_school_chemistry",
|
||||
"high_school_biology",
|
||||
"middle_school_mathematics",
|
||||
"middle_school_biology",
|
||||
"middle_school_physics",
|
||||
"middle_school_chemistry",
|
||||
"veterinary_medicine",
|
||||
"college_economics",
|
||||
"business_administration",
|
||||
"marxism",
|
||||
"mao_zedong_thought",
|
||||
"education_science",
|
||||
"teacher_qualification",
|
||||
"high_school_politics",
|
||||
"high_school_geography",
|
||||
"middle_school_politics",
|
||||
"middle_school_geography",
|
||||
"modern_chinese_history",
|
||||
"ideological_and_moral_cultivation",
|
||||
"logic",
|
||||
"law",
|
||||
"chinese_language_and_literature",
|
||||
"art_studies",
|
||||
"professional_tour_guide",
|
||||
"legal_professional",
|
||||
"high_school_chinese",
|
||||
"high_school_history",
|
||||
"middle_school_history",
|
||||
"civil_servant",
|
||||
"sports_science",
|
||||
"plant_protection",
|
||||
"basic_medicine",
|
||||
"clinical_medicine",
|
||||
"urban_and_rural_planner",
|
||||
"accountant",
|
||||
"fire_engineer",
|
||||
"environmental_impact_assessment_engineer",
|
||||
"tax_accountant",
|
||||
"physician",
|
||||
]
|
||||
|
||||
|
||||
class CevalExamConfig(datasets.BuilderConfig):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
|
||||
|
||||
|
||||
class CevalExam(datasets.GeneratorBasedBuilder):
|
||||
BUILDER_CONFIGS = [
|
||||
CevalExamConfig(
|
||||
name=task_name,
|
||||
)
|
||||
for task_name in task_list
|
||||
]
|
||||
|
||||
def _info(self):
|
||||
features = datasets.Features(
|
||||
{
|
||||
"id":datasets.Value("int32"),
|
||||
"question": datasets.Value("string"),
|
||||
"A": datasets.Value("string"),
|
||||
"B": datasets.Value("string"),
|
||||
"C": datasets.Value("string"),
|
||||
"D": datasets.Value("string"),
|
||||
"answer": datasets.Value("string"),
|
||||
"explanation":datasets.Value("string"),
|
||||
}
|
||||
)
|
||||
return datasets.DatasetInfo(
|
||||
description=_DESCRIPTION,
|
||||
features=features,
|
||||
homepage=_HOMEPAGE,
|
||||
license=_LICENSE,
|
||||
citation=_CITATION,
|
||||
)
|
||||
|
||||
def _split_generators(self, dl_manager):
|
||||
data_dir = dl_manager.download_and_extract(_URL)
|
||||
task_name = self.config.name
|
||||
return [
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TEST,
|
||||
gen_kwargs={
|
||||
"filepath": os.path.join(
|
||||
data_dir, "test", f"{task_name}_test.csv"
|
||||
),
|
||||
},
|
||||
),
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split("val"),
|
||||
gen_kwargs={
|
||||
"filepath": os.path.join(
|
||||
data_dir, "val", f"{task_name}_val.csv"
|
||||
),
|
||||
},
|
||||
),
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split("dev"),
|
||||
gen_kwargs={
|
||||
"filepath": os.path.join(
|
||||
data_dir, "dev", f"{task_name}_dev.csv"
|
||||
),
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
def _generate_examples(self, filepath):
|
||||
df = pd.read_csv(filepath,encoding="utf-8")
|
||||
for i, instance in enumerate(df.to_dict(orient="records")):
|
||||
if "answer" not in instance.keys():
|
||||
instance["answer"]=""
|
||||
if "explanation" not in instance.keys():
|
||||
instance["explanation"]=""
|
||||
yield i, instance
|
||||
1
dataset_infos.json
Normal file
1
dataset_infos.json
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user