commit 2526b805eb173121984da00fb056180dfd2788c2 Author: 4pdadmin <> Date: Fri Jul 18 07:26:26 2025 +0000 init diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7fe70d7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.json filter=lfs diff=lfs merge=lfs -text diff --git a/4pd-chatglm-6b-v1.0.0.json b/4pd-chatglm-6b-v1.0.0.json new file mode 100644 index 0000000..31ea2aa --- /dev/null +++ b/4pd-chatglm-6b-v1.0.0.json @@ -0,0 +1,21 @@ +{ + "id": "4pd/chatglm-6b@v1.0.0", + "description": "", + "owner": "", + "results": { + "C3": 90.0, + "lambada": 49.62, + "GaokaoBench_2010-2013_English_MCQs": 50.0, + "triviaqa": 20.83 + }, + "metadata": { + "language": [ + "en" + ], + "tags": [ + "sagegpt" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-chatglm-6b-v1.0.1.json b/4pd-chatglm-6b-v1.0.1.json new file mode 100644 index 0000000..84b573a --- /dev/null +++ b/4pd-chatglm-6b-v1.0.1.json @@ -0,0 +1,16 @@ +{ + "id": "4pd/chatglm-6b@v1.0.1", + "description": "", + "owner": "", + "results": {}, + "metadata": { + "language": [ + "en" + ], + "tags": [ + "sagegpt" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-llama-2-7b-chat-hf-base.json b/4pd-llama-2-7b-chat-hf-base.json new file mode 100644 index 0000000..33eb694 --- /dev/null +++ b/4pd-llama-2-7b-chat-hf-base.json @@ -0,0 +1,8 @@ +{ + "id": "4pd/llama-2-7b-chat-hf@base", + "description": "llama-2-7b-chat-hf", + "updatedAt": "2024-04-25 21:54:49", + "mode": "service", + "results": {}, + "metadata": {} +} \ No newline at end of file diff --git a/4pd-niotest-main.json b/4pd-niotest-main.json new file mode 100644 index 0000000..c66eceb --- /dev/null +++ b/4pd-niotest-main.json @@ -0,0 +1,33 @@ +{ + "id": "4pd/niotest@main", + "description": "nio test eval rank", + "updatedAt": "2023-11-14 15:56:15", + "owner": "nio", + "results": { + "ceval": 59.92, + "mmlu": 55.46, + "triviaqa": 55.98, + "hellaswag": 71.76, + "cmmlu": 62.03, + "C3": 72.0, + "lambada": 66.83, + "SageBench-exam": 75.04, + "CValues-Responsibility": 90.07, + "longbench_lsht": 90.07, + "longbench_vcsum": 90.07, + "longbench_dureader": 90.07, + "longbench_multifieldqa_zh": 90.07, + "longbench_passage_retrieval_zh": 90.07 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-nl2sql-qianwen-base.json b/4pd-nl2sql-qianwen-base.json new file mode 100644 index 0000000..6f18fc6 --- /dev/null +++ b/4pd-nl2sql-qianwen-base.json @@ -0,0 +1,17 @@ +{ + "id": "4pd/nl2sql-qianwen@base", + "description": "", + "owner": "", + "results": {}, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-qwen-14b-chat-int4-main.json b/4pd-qwen-14b-chat-int4-main.json new file mode 100644 index 0000000..715db3b --- /dev/null +++ b/4pd-qwen-14b-chat-int4-main.json @@ -0,0 +1,20 @@ +{ + "id": "4pd/qwen-14b-chat-int4@main", + "description": "{{ description }}", + "updatedAt": "2023-10-09 17:17:35", + "mode": "api", + "results": { + "lambada": 58.88 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-qwen-7b-chat-base.json b/4pd-qwen-7b-chat-base.json new file mode 100644 index 0000000..180387c --- /dev/null +++ b/4pd-qwen-7b-chat-base.json @@ -0,0 +1,20 @@ +{ + "id": "4pd/qwen-7b-chat@base", + "description": "", + "owner": "", + "results": { + "triviaqa": 38.67, + "lambada": 49.31 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-qwen-7b-chat-hf-main.json b/4pd-qwen-7b-chat-hf-main.json new file mode 100644 index 0000000..695610d --- /dev/null +++ b/4pd-qwen-7b-chat-hf-main.json @@ -0,0 +1,20 @@ +{ + "id": "4pd/qwen-7b-chat-hf@main", + "description": "{{ description }}", + "updatedAt": "2023-10-09 16:36:28", + "mode": "api", + "results": { + "lambada": 45.57 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-qwen-7b-chat-int4-main.json b/4pd-qwen-7b-chat-int4-main.json new file mode 100644 index 0000000..64e8e75 --- /dev/null +++ b/4pd-qwen-7b-chat-int4-main.json @@ -0,0 +1,18 @@ +{ + "id": "4pd/qwen-7b-chat-int4@main", + "description": "{{ description }}", + "updatedAt": "2023-09-22 17:16:51.075176", + "mode": "api", + "results": {}, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-qwen-7b-chat-main.json b/4pd-qwen-7b-chat-main.json new file mode 100644 index 0000000..24fde5c --- /dev/null +++ b/4pd-qwen-7b-chat-main.json @@ -0,0 +1,18 @@ +{ + "id": "4pd/qwen-7b-chat@main", + "description": "{{ description }}", + "updatedAt": "2023-10-09 17:32:21", + "mode": "service", + "results": {}, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-qwen-7b-chat-test-base.json b/4pd-qwen-7b-chat-test-base.json new file mode 100644 index 0000000..8671e62 --- /dev/null +++ b/4pd-qwen-7b-chat-test-base.json @@ -0,0 +1,19 @@ +{ + "id": "4pd/qwen-7b-chat-test@base", + "description": "", + "owner": "", + "results": { + "GaokaoBench_2010-2013_English_MCQs": 66.81 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-sagegptv2-base.json b/4pd-sagegptv2-base.json new file mode 100644 index 0000000..bfc61e1 --- /dev/null +++ b/4pd-sagegptv2-base.json @@ -0,0 +1,19 @@ +{ + "id": "4pd/sagegptv2@base", + "description": "", + "owner": "", + "results": { + "lambada": 43.26 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "sagegpt" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/4pd-sagegptv2-infer4000.json b/4pd-sagegptv2-infer4000.json new file mode 100644 index 0000000..7f81925 --- /dev/null +++ b/4pd-sagegptv2-infer4000.json @@ -0,0 +1,21 @@ +{ + "id": "4pd/sagegptv2@infer4000", + "description": "{{ description }}", + "updatedAt": "2023-10-09 16:01:21", + "mode": "api", + "results": { + "triviaqa": 26.41, + "lambada": 65.83 + }, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "sagegpt" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..fbf64d4 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# eval-leaderboard + +README for eval-leaderboard \ No newline at end of file diff --git a/baichuan2-13b-base-hf.json b/baichuan2-13b-base-hf.json new file mode 100644 index 0000000..38fe47f --- /dev/null +++ b/baichuan2-13b-base-hf.json @@ -0,0 +1,18 @@ +{ + "id": "baichuan2-13b-base-hf", + "description": "{{ description }}", + "updatedAt": "2023-10-09 15:09:10", + "mode": "service", + "results": {}, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/baichuan2-13b-chat-hf.json b/baichuan2-13b-chat-hf.json new file mode 100644 index 0000000..a1119a6 --- /dev/null +++ b/baichuan2-13b-chat-hf.json @@ -0,0 +1,18 @@ +{ + "id": "baichuan2-13b-chat-hf", + "description": "{{ description }}", + "updatedAt": "2023-10-09 19:54:48", + "mode": "service", + "results": {}, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/chatglm-6b-hf.json b/chatglm-6b-hf.json new file mode 100644 index 0000000..0ceb584 --- /dev/null +++ b/chatglm-6b-hf.json @@ -0,0 +1,18 @@ +{ + "id": "chatglm-6b-hf", + "description": "{{ description }}", + "updatedAt": "2023-10-09 18:54:54", + "mode": "service", + "results": {}, + "metadata": { + "language": [ + "zh", + "en" + ], + "tags": [ + "qwen" + ], + "pipeline_tag": "text-generation", + "inference": false + } +} \ No newline at end of file diff --git a/config/group.json b/config/group.json new file mode 100644 index 0000000..9ee25ec --- /dev/null +++ b/config/group.json @@ -0,0 +1,34 @@ +{ + "公开核心榜单": [ + "ceval", + "mmlu", + "triviaqa", + "hellaswag", + "cmmlu", + "C3", + "lambada" + ], + "私有榜单V1": [ + "SageBench-exam" + ], + "私有榜单V2": [], + "中文": [ + "ceval", + "cmmlu", + "C3", + "SageBench-exam" + ], + "英文": [ + "mmlu", + "triviaqa", + "hellaswag", + "lambada" + ], + "长窗口": [ + "longbench_lsht", + "longbench_vcsum", + "longbench_dureader", + "longbench_multifieldqa_zh", + "longbench_passage_retrieval_zh" + ] +} \ No newline at end of file diff --git a/config/order.json b/config/order.json new file mode 100644 index 0000000..cc0c34b --- /dev/null +++ b/config/order.json @@ -0,0 +1,17 @@ +[ + "ceval", + "agieval", + "mmlu", + "GaokaoBench", + "triviaqa", + "hellaswag", + "cmmlu", + "C3", + "lambada", + "SageBench-exam", + "longbench_lsht", + "longbench_vcsum", + "longbench_dureader", + "longbench_multifieldqa_zh", + "longbench_passage_retrieval_zh" +] \ No newline at end of file diff --git a/merge.py b/merge.py new file mode 100644 index 0000000..8a4eb3c --- /dev/null +++ b/merge.py @@ -0,0 +1,69 @@ +import os +import shutil +import json +import argparse + + +def do_merge(new_json_path, old_json_path): + with open(new_json_path, "r") as f: + new_json = json.load(f) + + with open(old_json_path, "r") as f: + old_json = json.load(f) + + if new_json.get("id") != old_json.get("id"): + raise RuntimeError(f"{new_json.get('id')} not equal to {old_json.get('id')} ") + + # 基础属性,从新的里拿,不为空就覆盖。 + + base_params = [ + "description", + "owner", + ] + # 获取基础属性 + for param in base_params: + if new_json.get(param): + old_json[param] = new_json.get(param) + # 更新result + new_results = new_json.get("results") + old_results = old_json.get("results") + for key in new_results: + if new_results[key]: + old_results[key] = new_results[key] + # 更新metadata + if new_json.get("metadata"): + old_json["metadata"] = new_json.get("metadata") + + return old_json + + +def merge(new_data_path, old_data_path): + if not os.path.exists(new_data_path): + raise RuntimeError(f"data_path 【{new_data_path}】 not exist!") + # foreach new files + for new_json_file in os.listdir(new_data_path): + if not new_json_file.endswith("json"): + print(f"file {new_json_file} not a json file,jump") + continue + # check old file exist or not + if not os.path.exists(os.path.join(old_data_path, new_json_file)): + print(f"new file 【{new_json_file}】 copy...") + shutil.copyfile(os.path.join(new_data_path, new_json_file), os.path.join(old_data_path, new_json_file)) + else: + print(f"merge file 【{new_json_file}】") + new_json_content = do_merge(os.path.join(new_data_path, new_json_file), + os.path.join(old_data_path, new_json_file)) + with open(os.path.join(old_data_path, new_json_file), "w") as f: + json.dump(new_json_content, f, indent=4, ensure_ascii=False) + + +if __name__ == '__main__': + args = argparse.ArgumentParser() + args.add_argument("--new_data", type=str, required=True) + args.add_argument('--old_data', type=str, default=None, required=False) + args = args.parse_args() + + new_data: str = args.new_data + old_data: str = args.old_data or os.path.dirname(os.path.abspath(__file__)) + + merge(new_data, old_data) diff --git a/test-yu.json b/test-yu.json new file mode 100644 index 0000000..fd04015 --- /dev/null +++ b/test-yu.json @@ -0,0 +1,16 @@ +{ + "id": "test-yu", + "description": "ChatGPT 3.5", + "owner": "", + "results": { + "ceval": 52.5, + "agieval": 39.9, + "mmlu": 69.1, + "GaokaoBench": 51.1, + "triviaqa": 63.8, + "hellaswag": 79.5, + "cmmlu": 53.9, + "C3": 85.6, + "lambada": 57.5 + } +} \ No newline at end of file