This commit is contained in:
4pdadmin 2025-07-18 07:26:26 +00:00 committed by root
commit 2526b805eb
22 changed files with 444 additions and 0 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
*.json filter=lfs diff=lfs merge=lfs -text

View File

@ -0,0 +1,21 @@
{
"id": "4pd/chatglm-6b@v1.0.0",
"description": "",
"owner": "",
"results": {
"C3": 90.0,
"lambada": 49.62,
"GaokaoBench_2010-2013_English_MCQs": 50.0,
"triviaqa": 20.83
},
"metadata": {
"language": [
"en"
],
"tags": [
"sagegpt"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,16 @@
{
"id": "4pd/chatglm-6b@v1.0.1",
"description": "",
"owner": "",
"results": {},
"metadata": {
"language": [
"en"
],
"tags": [
"sagegpt"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,8 @@
{
"id": "4pd/llama-2-7b-chat-hf@base",
"description": "llama-2-7b-chat-hf",
"updatedAt": "2024-04-25 21:54:49",
"mode": "service",
"results": {},
"metadata": {}
}

33
4pd-niotest-main.json Normal file
View File

@ -0,0 +1,33 @@
{
"id": "4pd/niotest@main",
"description": "nio test eval rank",
"updatedAt": "2023-11-14 15:56:15",
"owner": "nio",
"results": {
"ceval": 59.92,
"mmlu": 55.46,
"triviaqa": 55.98,
"hellaswag": 71.76,
"cmmlu": 62.03,
"C3": 72.0,
"lambada": 66.83,
"SageBench-exam": 75.04,
"CValues-Responsibility": 90.07,
"longbench_lsht": 90.07,
"longbench_vcsum": 90.07,
"longbench_dureader": 90.07,
"longbench_multifieldqa_zh": 90.07,
"longbench_passage_retrieval_zh": 90.07
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,17 @@
{
"id": "4pd/nl2sql-qianwen@base",
"description": "",
"owner": "",
"results": {},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,20 @@
{
"id": "4pd/qwen-14b-chat-int4@main",
"description": "{{ description }}",
"updatedAt": "2023-10-09 17:17:35",
"mode": "api",
"results": {
"lambada": 58.88
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,20 @@
{
"id": "4pd/qwen-7b-chat@base",
"description": "",
"owner": "",
"results": {
"triviaqa": 38.67,
"lambada": 49.31
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,20 @@
{
"id": "4pd/qwen-7b-chat-hf@main",
"description": "{{ description }}",
"updatedAt": "2023-10-09 16:36:28",
"mode": "api",
"results": {
"lambada": 45.57
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,18 @@
{
"id": "4pd/qwen-7b-chat-int4@main",
"description": "{{ description }}",
"updatedAt": "2023-09-22 17:16:51.075176",
"mode": "api",
"results": {},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,18 @@
{
"id": "4pd/qwen-7b-chat@main",
"description": "{{ description }}",
"updatedAt": "2023-10-09 17:32:21",
"mode": "service",
"results": {},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,19 @@
{
"id": "4pd/qwen-7b-chat-test@base",
"description": "",
"owner": "",
"results": {
"GaokaoBench_2010-2013_English_MCQs": 66.81
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

19
4pd-sagegptv2-base.json Normal file
View File

@ -0,0 +1,19 @@
{
"id": "4pd/sagegptv2@base",
"description": "",
"owner": "",
"results": {
"lambada": 43.26
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"sagegpt"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,21 @@
{
"id": "4pd/sagegptv2@infer4000",
"description": "{{ description }}",
"updatedAt": "2023-10-09 16:01:21",
"mode": "api",
"results": {
"triviaqa": 26.41,
"lambada": 65.83
},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"sagegpt"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# eval-leaderboard
README for eval-leaderboard

View File

@ -0,0 +1,18 @@
{
"id": "baichuan2-13b-base-hf",
"description": "{{ description }}",
"updatedAt": "2023-10-09 15:09:10",
"mode": "service",
"results": {},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

View File

@ -0,0 +1,18 @@
{
"id": "baichuan2-13b-chat-hf",
"description": "{{ description }}",
"updatedAt": "2023-10-09 19:54:48",
"mode": "service",
"results": {},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

18
chatglm-6b-hf.json Normal file
View File

@ -0,0 +1,18 @@
{
"id": "chatglm-6b-hf",
"description": "{{ description }}",
"updatedAt": "2023-10-09 18:54:54",
"mode": "service",
"results": {},
"metadata": {
"language": [
"zh",
"en"
],
"tags": [
"qwen"
],
"pipeline_tag": "text-generation",
"inference": false
}
}

34
config/group.json Normal file
View File

@ -0,0 +1,34 @@
{
"公开核心榜单": [
"ceval",
"mmlu",
"triviaqa",
"hellaswag",
"cmmlu",
"C3",
"lambada"
],
"私有榜单V1": [
"SageBench-exam"
],
"私有榜单V2": [],
"中文": [
"ceval",
"cmmlu",
"C3",
"SageBench-exam"
],
"英文": [
"mmlu",
"triviaqa",
"hellaswag",
"lambada"
],
"长窗口": [
"longbench_lsht",
"longbench_vcsum",
"longbench_dureader",
"longbench_multifieldqa_zh",
"longbench_passage_retrieval_zh"
]
}

17
config/order.json Normal file
View File

@ -0,0 +1,17 @@
[
"ceval",
"agieval",
"mmlu",
"GaokaoBench",
"triviaqa",
"hellaswag",
"cmmlu",
"C3",
"lambada",
"SageBench-exam",
"longbench_lsht",
"longbench_vcsum",
"longbench_dureader",
"longbench_multifieldqa_zh",
"longbench_passage_retrieval_zh"
]

69
merge.py Normal file
View File

@ -0,0 +1,69 @@
import os
import shutil
import json
import argparse
def do_merge(new_json_path, old_json_path):
with open(new_json_path, "r") as f:
new_json = json.load(f)
with open(old_json_path, "r") as f:
old_json = json.load(f)
if new_json.get("id") != old_json.get("id"):
raise RuntimeError(f"{new_json.get('id')} not equal to {old_json.get('id')} ")
# 基础属性,从新的里拿,不为空就覆盖。
base_params = [
"description",
"owner",
]
# 获取基础属性
for param in base_params:
if new_json.get(param):
old_json[param] = new_json.get(param)
# 更新result
new_results = new_json.get("results")
old_results = old_json.get("results")
for key in new_results:
if new_results[key]:
old_results[key] = new_results[key]
# 更新metadata
if new_json.get("metadata"):
old_json["metadata"] = new_json.get("metadata")
return old_json
def merge(new_data_path, old_data_path):
if not os.path.exists(new_data_path):
raise RuntimeError(f"data_path 【{new_data_path}】 not exist!")
# foreach new files
for new_json_file in os.listdir(new_data_path):
if not new_json_file.endswith("json"):
print(f"file {new_json_file} not a json file,jump")
continue
# check old file exist or not
if not os.path.exists(os.path.join(old_data_path, new_json_file)):
print(f"new file 【{new_json_file}】 copy...")
shutil.copyfile(os.path.join(new_data_path, new_json_file), os.path.join(old_data_path, new_json_file))
else:
print(f"merge file 【{new_json_file}")
new_json_content = do_merge(os.path.join(new_data_path, new_json_file),
os.path.join(old_data_path, new_json_file))
with open(os.path.join(old_data_path, new_json_file), "w") as f:
json.dump(new_json_content, f, indent=4, ensure_ascii=False)
if __name__ == '__main__':
args = argparse.ArgumentParser()
args.add_argument("--new_data", type=str, required=True)
args.add_argument('--old_data', type=str, default=None, required=False)
args = args.parse_args()
new_data: str = args.new_data
old_data: str = args.old_data or os.path.dirname(os.path.abspath(__file__))
merge(new_data, old_data)

16
test-yu.json Normal file
View File

@ -0,0 +1,16 @@
{
"id": "test-yu",
"description": "ChatGPT 3.5",
"owner": "",
"results": {
"ceval": 52.5,
"agieval": 39.9,
"mmlu": 69.1,
"GaokaoBench": 51.1,
"triviaqa": 63.8,
"hellaswag": 79.5,
"cmmlu": 53.9,
"C3": 85.6,
"lambada": 57.5
}
}