Compare commits

..

No commits in common. "1c2a60f62bf2bd358fa626b2e15684221871e440" and "b63bd8f2c3a5146c5ebf567320676941376a229d" have entirely different histories.

3 changed files with 0 additions and 185 deletions

View File

@ -1,27 +0,0 @@
---
license: cc-by-nc-sa-4.0
tags:
- Alibaba
- arxiv:2305.08322
task_categories:
- text-classification
- multiple-choice
- question-answering
language:
- zh
pretty_name: C-Eval
size_categories:
- 10K<n<100K
---
数据集文件元信息以及数据文件,请浏览“数据集文件”页面获取。
当前数据集卡片使用的是默认模版数据集的贡献者未提供更加详细的数据集介绍但是您可以通过如下GIT Clone命令或者ModelScope SDK来下载数据集
#### 下载方法
:modelscope-code[]{type="sdk"}
:modelscope-code[]{type="git"}

View File

@ -1,157 +0,0 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License
import os
import datasets
import pandas as pd
_CITATION = """\
@article{huang2023ceval,
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
journal={arXiv preprint arXiv:2305.08322},
year={2023}
}
"""
_DESCRIPTION = """\
C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
"""
_HOMEPAGE = "https://cevalbenchmark.com"
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
_URL = r"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip"
task_list = [
"computer_network",
"operating_system",
"computer_architecture",
"college_programming",
"college_physics",
"college_chemistry",
"advanced_mathematics",
"probability_and_statistics",
"discrete_mathematics",
"electrical_engineer",
"metrology_engineer",
"high_school_mathematics",
"high_school_physics",
"high_school_chemistry",
"high_school_biology",
"middle_school_mathematics",
"middle_school_biology",
"middle_school_physics",
"middle_school_chemistry",
"veterinary_medicine",
"college_economics",
"business_administration",
"marxism",
"mao_zedong_thought",
"education_science",
"teacher_qualification",
"high_school_politics",
"high_school_geography",
"middle_school_politics",
"middle_school_geography",
"modern_chinese_history",
"ideological_and_moral_cultivation",
"logic",
"law",
"chinese_language_and_literature",
"art_studies",
"professional_tour_guide",
"legal_professional",
"high_school_chinese",
"high_school_history",
"middle_school_history",
"civil_servant",
"sports_science",
"plant_protection",
"basic_medicine",
"clinical_medicine",
"urban_and_rural_planner",
"accountant",
"fire_engineer",
"environmental_impact_assessment_engineer",
"tax_accountant",
"physician",
]
class CevalExamConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class CevalExam(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CevalExamConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"id":datasets.Value("int32"),
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
"explanation":datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(
data_dir, "test", f"{task_name}_test.csv"
),
},
),
datasets.SplitGenerator(
name=datasets.Split("val"),
gen_kwargs={
"filepath": os.path.join(
data_dir, "val", f"{task_name}_val.csv"
),
},
),
datasets.SplitGenerator(
name=datasets.Split("dev"),
gen_kwargs={
"filepath": os.path.join(
data_dir, "dev", f"{task_name}_dev.csv"
),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath,encoding="utf-8")
for i, instance in enumerate(df.to_dict(orient="records")):
if "answer" not in instance.keys():
instance["answer"]=""
if "explanation" not in instance.keys():
instance["explanation"]=""
yield i, instance

File diff suppressed because one or more lines are too long