dataset-ceval-exam/dataset_infos.json


			
				
					
						
						
						
							
							
							{"computer_network": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "computer_network", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 35408, "num_examples": 171, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3799, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2361, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 41568, "size_in_bytes": 1589825}, "operating_system": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "operating_system", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 31146, "num_examples": 179, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3299, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2557, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 37002, "size_in_bytes": 1585259}, "computer_architecture": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "computer_architecture", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 40613, "num_examples": 193, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4149, "num_examples": 21, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2793, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 47555, "size_in_bytes": 1595812}, "college_programming": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "college_programming", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 83541, "num_examples": 342, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9543, "num_examples": 37, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2882, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 95966, "size_in_bytes": 1644223}, "college_physics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "college_physics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 55731, "num_examples": 176, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6145, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3824, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 65700, "size_in_bytes": 1613957}, "college_chemistry": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "college_chemistry", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 45798, "num_examples": 224, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4443, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3611, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 53852, "size_in_bytes": 1602109}, "advanced_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "advanced_mathematics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 50031, "num_examples": 173, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5331, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 7012, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 62374, "size_in_bytes": 1610631}, "probability_and_statistics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "probability_and_statistics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 56740, "num_examples": 166, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5781, "num_examples": 18, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 6769, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 69290, "size_in_bytes": 1617547}, "discrete_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "discrete_mathematics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 36045, "num_examples": 153, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3424, "num_examples": 16, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2002, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 41471, "size_in_bytes": 1589728}, "electrical_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "electrical_engineer", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 73731, "num_examples": 339, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8315, "num_examples": 37, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2180, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 84226, "size_in_bytes": 1632483}, "metrology_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "metrology_engineer", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47484, "num_examples": 219, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6116, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2485, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 56085, "size_in_bytes": 1604342}, "high_school_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_mathematics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41080, "num_examples": 166, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5144, "num_examples": 18, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3552, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 49776, "size_in_bytes": 1598033}, "high_school_physics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_physics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 61678, "num_examples": 175, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 7266, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2266, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 71210, "size_in_bytes": 1619467}, "high_school_chemistry": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_chemistry", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 46918, "num_examples": 172, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5625, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2576, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 55119, "size_in_bytes": 1603376}, "high_school_biology": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_biology", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 55239, "num_examples": 175, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6105, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2164, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 63508, "size_in_bytes": 1611765}, "middle_school_mathematics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_mathematics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 33142, "num_examples": 177, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4897, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3187, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 41226, "size_in_bytes": 1589483}, "middle_school_biology": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_biology", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47264, "num_examples": 192, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5263, "num_examples": 21, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4327, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 56854, "size_in_bytes": 1605111}, "middle_school_physics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_physics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 48793, "num_examples": 178, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5279, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3531, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 57603, "size_in_bytes": 1605860}, "middle_school_chemistry": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_chemistry", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47575, "num_examples": 185, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5654, "num_examples": 20, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3866, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 57095, "size_in_bytes": 1605352}, "veterinary_medicine": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "veterinary_medicine", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 39465, "num_examples": 210, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4559, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2362, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 46386, "size_in_bytes": 1594643}, "college_economics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "college_economics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 119734, "num_examples": 497, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 14461, "num_examples": 55, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3673, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 137868, "size_in_bytes": 1686125}, "business_administration": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "business_administration", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 78387, "num_examples": 301, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9225, "num_examples": 33, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3155, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 90767, "size_in_bytes": 1639024}, "marxism": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "marxism", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 38662, "num_examples": 179, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4251, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2142, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 45055, "size_in_bytes": 1593312}, "mao_zedong_thought": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "mao_zedong_thought", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 56699, "num_examples": 219, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5487, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3349, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 65535, "size_in_bytes": 1613792}, "education_science": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "education_science", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 55753, "num_examples": 270, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5519, "num_examples": 29, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3093, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 64365, "size_in_bytes": 1612622}, "teacher_qualification": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "teacher_qualification", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 107369, "num_examples": 399, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 12220, "num_examples": 44, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3215, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 122804, "size_in_bytes": 1671061}, "high_school_politics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_politics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 83356, "num_examples": 176, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8909, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4730, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 96995, "size_in_bytes": 1645252}, "high_school_geography": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_geography", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41244, "num_examples": 178, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3985, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2087, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 47316, "size_in_bytes": 1595573}, "middle_school_politics": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_politics", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 72478, "num_examples": 193, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 7320, "num_examples": 21, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3687, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 83485, "size_in_bytes": 1631742}, "middle_school_geography": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_geography", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 23329, "num_examples": 108, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 2641, "num_examples": 12, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2148, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 28118, "size_in_bytes": 1576375}, "modern_chinese_history": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "modern_chinese_history", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 51247, "num_examples": 212, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5188, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2983, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 59418, "size_in_bytes": 1607675}, "ideological_and_moral_cultivation": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "ideological_and_moral_cultivation", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 35315, "num_examples": 172, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3241, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1296, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 39852, "size_in_bytes": 1588109}, "logic": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "logic", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 144246, "num_examples": 204, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 15561, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 5641, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 165448, "size_in_bytes": 1713705}, "law": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "law", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 79782, "num_examples": 221, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8119, "num_examples": 24, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4142, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 92043, "size_in_bytes": 1640300}, "chinese_language_and_literature": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "chinese_language_and_literature", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 32328, "num_examples": 209, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3446, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1892, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 37666, "size_in_bytes": 1585923}, "art_studies": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "art_studies", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41227, "num_examples": 298, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4581, "num_examples": 33, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1439, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 47247, "size_in_bytes": 1595504}, "professional_tour_guide": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "professional_tour_guide", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 41231, "num_examples": 266, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4509, "num_examples": 29, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1764, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 47504, "size_in_bytes": 1595761}, "legal_professional": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "legal_professional", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 121985, "num_examples": 215, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 12215, "num_examples": 23, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 6974, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 141174, "size_in_bytes": 1689431}, "high_school_chinese": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_chinese", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 110347, "num_examples": 178, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 10475, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 5290, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 126112, "size_in_bytes": 1674369}, "high_school_history": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "high_school_history", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 56196, "num_examples": 182, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 6618, "num_examples": 20, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2421, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 65235, "size_in_bytes": 1613492}, "middle_school_history": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "middle_school_history", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 47076, "num_examples": 207, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 5990, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2014, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 55080, "size_in_bytes": 1603337}, "civil_servant": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "civil_servant", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 181504, "num_examples": 429, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 21273, "num_examples": 47, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4576, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 207353, "size_in_bytes": 1755610}, "sports_science": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "sports_science", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 32527, "num_examples": 180, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3493, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4182, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 40202, "size_in_bytes": 1588459}, "plant_protection": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "plant_protection", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 31877, "num_examples": 199, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 3634, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3726, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 39237, "size_in_bytes": 1587494}, "basic_medicine": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "basic_medicine", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 28820, "num_examples": 175, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 2627, "num_examples": 19, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1825, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 33272, "size_in_bytes": 1581529}, "clinical_medicine": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "clinical_medicine", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 42161, "num_examples": 200, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 4167, "num_examples": 22, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 1951, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 48279, "size_in_bytes": 1596536}, "urban_and_rural_planner": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "urban_and_rural_planner", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 110377, "num_examples": 418, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 12793, "num_examples": 46, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3166, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 126336, "size_in_bytes": 1674593}, "accountant": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "accountant", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 176917, "num_examples": 443, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 19549, "num_examples": 49, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 3414, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 199880, "size_in_bytes": 1748137}, "fire_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "fire_engineer", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 83611, "num_examples": 282, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9998, "num_examples": 31, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2209, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 95818, "size_in_bytes": 1644075}, "environmental_impact_assessment_engineer": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "environmental_impact_assessment_engineer", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 84680, "num_examples": 281, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 9186, "num_examples": 31, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2495, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 96361, "size_in_bytes": 1644618}, "tax_accountant": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "tax_accountant", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 174482, "num_examples": 443, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 18932, "num_examples": 49, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 4274, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 197688, "size_in_bytes": 1745945}, "physician": {"description": "C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.\n", "citation": "@article{huang2023ceval,\n    title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, \n    author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},\n    journal={arXiv preprint arXiv:2305.08322},\n    year={2023}\n}\n", "homepage": "https://cevalbenchmark.com", "license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License", "features": {"id": {"dtype": "int32", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "A": {"dtype": "string", "id": null, "_type": "Value"}, "B": {"dtype": "string", "id": null, "_type": "Value"}, "C": {"dtype": "string", "id": null, "_type": "Value"}, "D": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "ceval-exam", "config_name": "physician", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 89801, "num_examples": 443, "dataset_name": "ceval-exam"}, "val": {"name": "val", "num_bytes": 8710, "num_examples": 49, "dataset_name": "ceval-exam"}, "dev": {"name": "dev", "num_bytes": 2033, "num_examples": 5, "dataset_name": "ceval-exam"}}, "download_checksums": {"https://modelscope.oss-cn-beijing.aliyuncs.com/open_data/c-eval/ceval-exam.zip": {"num_bytes": 1548257, "checksum": "b28bc560b655dc3c0ff05b20648b5ef8caed732bdaa8918e66fe5f3a1c711c52"}}, "download_size": 1548257, "post_processing_size": null, "dataset_size": 100544, "size_in_bytes": 1648801}}
						
						
					
				
				
					
						Reference in New Issue
					
					View Git Blame
					Copy Permalink