Compare commits
No commits in common. "main" and "main-260114-094141" have entirely different histories.
main
...
main-26011
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.json filter=lfs diff=lfs merge=lfs -text
|
||||
0
.gitignore
vendored
Normal file
0
.gitignore
vendored
Normal file
1321
configs/20260114_094354.py
Normal file
1321
configs/20260114_094354.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,7 @@
|
||||
[RISE-CORE Msg(15763:140664432458752:libvgpu.c:900)]: Initializing.....
|
||||
[RISE-CORE ERROR (pid:15763 thread=140664432458752 libvgpu.c:958)]: cuInit failed:100
|
||||
01/14 10:14:22 - OpenCompass - INFO - Task [public/qwen2-5-32b-instruct-lora-chatbi@main/lambada]: {'accuracy': 55.734523578497964}
|
||||
01/14 10:14:22 - OpenCompass - INFO - time elapsed: 2.20s
|
||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
||||
[RISE-CORE Msg(15763:140664432458752:multiprocess_memory_limit.c:504)]: Calling exit handler 15763
|
||||
@ -0,0 +1,7 @@
|
||||
[RISE-CORE Msg(15830:140387341290496:libvgpu.c:900)]: Initializing.....
|
||||
[RISE-CORE ERROR (pid:15830 thread=140387341290496 libvgpu.c:958)]: cuInit failed:100
|
||||
01/14 10:14:25 - OpenCompass - INFO - Task [public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa]: {'score': 57.50820414167704}
|
||||
01/14 10:14:25 - OpenCompass - INFO - time elapsed: 3.78s
|
||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
||||
[RISE-CORE Msg(15830:140387341290496:multiprocess_memory_limit.c:504)]: Calling exit handler 15830
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada_0.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada_0.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada_1.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada_1.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada_2.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada_2.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_0.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_0.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_1.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_1.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_2.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_2.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_3.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_3.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_4.json
(Stored with Git LFS)
Normal file
BIN
predictions/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa_4.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
results/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada.json
(Stored with Git LFS)
Normal file
BIN
results/public/qwen2-5-32b-instruct-lora-chatbi@main/lambada.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
results/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa.json
(Stored with Git LFS)
Normal file
BIN
results/public/qwen2-5-32b-instruct-lora-chatbi@main/triviaqa.json
(Stored with Git LFS)
Normal file
Binary file not shown.
87
summary/summary_20260114_094354.csv
Normal file
87
summary/summary_20260114_094354.csv
Normal file
@ -0,0 +1,87 @@
|
||||
dataset,version,metric,mode,public/qwen2-5-32b-instruct-lora-chatbi@main
|
||||
--------- 考试 Exam ---------,-,-,-,-
|
||||
ceval,-,-,-,-
|
||||
agieval,-,-,-,-
|
||||
mmlu,-,-,-,-
|
||||
GaokaoBench,-,-,-,-
|
||||
ARC-c,-,-,-,-
|
||||
--------- 语言 Language ---------,-,-,-,-
|
||||
WiC,-,-,-,-
|
||||
summedits,-,-,-,-
|
||||
chid-dev,-,-,-,-
|
||||
afqmc-dev,-,-,-,-
|
||||
bustm-dev,-,-,-,-
|
||||
cluewsc-dev,-,-,-,-
|
||||
WSC,-,-,-,-
|
||||
winogrande,-,-,-,-
|
||||
flores_100,-,-,-,-
|
||||
--------- 知识 Knowledge ---------,-,-,-,-
|
||||
BoolQ,-,-,-,-
|
||||
commonsense_qa,-,-,-,-
|
||||
nq,-,-,-,-
|
||||
triviaqa,2121ce,score,gen,57.51
|
||||
--------- 推理 Reasoning ---------,-,-,-,-
|
||||
cmnli,-,-,-,-
|
||||
ocnli,-,-,-,-
|
||||
ocnli_fc-dev,-,-,-,-
|
||||
AX_b,-,-,-,-
|
||||
AX_g,-,-,-,-
|
||||
CB,-,-,-,-
|
||||
RTE,-,-,-,-
|
||||
story_cloze,-,-,-,-
|
||||
COPA,-,-,-,-
|
||||
ReCoRD,-,-,-,-
|
||||
hellaswag,-,-,-,-
|
||||
piqa,-,-,-,-
|
||||
siqa,-,-,-,-
|
||||
strategyqa,-,-,-,-
|
||||
math,-,-,-,-
|
||||
gsm8k,-,-,-,-
|
||||
TheoremQA,-,-,-,-
|
||||
openai_humaneval,-,-,-,-
|
||||
mbpp,-,-,-,-
|
||||
cmmlu,-,-,-,-
|
||||
bbh,-,-,-,-
|
||||
--------- 理解 Understanding ---------,-,-,-,-
|
||||
C3,-,-,-,-
|
||||
CMRC_dev,-,-,-,-
|
||||
DRCD_dev,-,-,-,-
|
||||
MultiRC,-,-,-,-
|
||||
race-middle,-,-,-,-
|
||||
race-high,-,-,-,-
|
||||
openbookqa_fact,-,-,-,-
|
||||
csl_dev,-,-,-,-
|
||||
lcsts,-,-,-,-
|
||||
Xsum,-,-,-,-
|
||||
eprstmt-dev,-,-,-,-
|
||||
lambada,217e11,accuracy,gen,55.73
|
||||
tnews-dev,-,-,-,-
|
||||
--------- 安全 Safety ---------,-,-,-,-
|
||||
crows_pairs,-,-,-,-
|
||||
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
||||
LEval_coursera,-,-,-,-
|
||||
LEval_gsm100,-,-,-,-
|
||||
LEval_quality,-,-,-,-
|
||||
LEval_tpo,-,-,-,-
|
||||
LEval_topic_retrieval,-,-,-,-
|
||||
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
||||
LEval_financialqa,-,-,-,-
|
||||
LEval_gov_report_summ,-,-,-,-
|
||||
LEval_legal_contract_qa,-,-,-,-
|
||||
LEval_meeting_summ,-,-,-,-
|
||||
LEval_multidocqa,-,-,-,-
|
||||
LEval_narrativeqa,-,-,-,-
|
||||
LEval_nq,-,-,-,-
|
||||
LEval_news_summ,-,-,-,-
|
||||
LEval_paper_assistant,-,-,-,-
|
||||
LEval_patent_summ,-,-,-,-
|
||||
LEval_review_summ,-,-,-,-
|
||||
LEval_scientificqa,-,-,-,-
|
||||
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
||||
longbench_lsht,-,-,-,-
|
||||
longbench_vcsum,-,-,-,-
|
||||
longbench_dureader,-,-,-,-
|
||||
longbench_multifieldqa_zh,-,-,-,-
|
||||
longbench_passage_retrieval_zh,-,-,-,-
|
||||
--------- 单选 自定义数据 ---------,-,-,-,-
|
||||
SageBench-exam,-,-,-,-
|
||||
|
195
summary/summary_20260114_094354.txt
Normal file
195
summary/summary_20260114_094354.txt
Normal file
@ -0,0 +1,195 @@
|
||||
20260114_094354
|
||||
tabulate format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
dataset version metric mode public/qwen2-5-32b-instruct-lora-chatbi@main
|
||||
----------------------------------------------------- --------- -------- ------ ----------------------------------------------
|
||||
--------- 考试 Exam --------- - - - -
|
||||
ceval - - - -
|
||||
agieval - - - -
|
||||
mmlu - - - -
|
||||
GaokaoBench - - - -
|
||||
ARC-c - - - -
|
||||
--------- 语言 Language --------- - - - -
|
||||
WiC - - - -
|
||||
summedits - - - -
|
||||
chid-dev - - - -
|
||||
afqmc-dev - - - -
|
||||
bustm-dev - - - -
|
||||
cluewsc-dev - - - -
|
||||
WSC - - - -
|
||||
winogrande - - - -
|
||||
flores_100 - - - -
|
||||
--------- 知识 Knowledge --------- - - - -
|
||||
BoolQ - - - -
|
||||
commonsense_qa - - - -
|
||||
nq - - - -
|
||||
triviaqa 2121ce score gen 57.51
|
||||
--------- 推理 Reasoning --------- - - - -
|
||||
cmnli - - - -
|
||||
ocnli - - - -
|
||||
ocnli_fc-dev - - - -
|
||||
AX_b - - - -
|
||||
AX_g - - - -
|
||||
CB - - - -
|
||||
RTE - - - -
|
||||
story_cloze - - - -
|
||||
COPA - - - -
|
||||
ReCoRD - - - -
|
||||
hellaswag - - - -
|
||||
piqa - - - -
|
||||
siqa - - - -
|
||||
strategyqa - - - -
|
||||
math - - - -
|
||||
gsm8k - - - -
|
||||
TheoremQA - - - -
|
||||
openai_humaneval - - - -
|
||||
mbpp - - - -
|
||||
cmmlu - - - -
|
||||
bbh - - - -
|
||||
--------- 理解 Understanding --------- - - - -
|
||||
C3 - - - -
|
||||
CMRC_dev - - - -
|
||||
DRCD_dev - - - -
|
||||
MultiRC - - - -
|
||||
race-middle - - - -
|
||||
race-high - - - -
|
||||
openbookqa_fact - - - -
|
||||
csl_dev - - - -
|
||||
lcsts - - - -
|
||||
Xsum - - - -
|
||||
eprstmt-dev - - - -
|
||||
lambada 217e11 accuracy gen 55.73
|
||||
tnews-dev - - - -
|
||||
--------- 安全 Safety --------- - - - -
|
||||
crows_pairs - - - -
|
||||
--------- LEval Exact Match (Acc) --------- - - - -
|
||||
LEval_coursera - - - -
|
||||
LEval_gsm100 - - - -
|
||||
LEval_quality - - - -
|
||||
LEval_tpo - - - -
|
||||
LEval_topic_retrieval - - - -
|
||||
--------- LEval Gen (ROUGE) --------- - - - -
|
||||
LEval_financialqa - - - -
|
||||
LEval_gov_report_summ - - - -
|
||||
LEval_legal_contract_qa - - - -
|
||||
LEval_meeting_summ - - - -
|
||||
LEval_multidocqa - - - -
|
||||
LEval_narrativeqa - - - -
|
||||
LEval_nq - - - -
|
||||
LEval_news_summ - - - -
|
||||
LEval_paper_assistant - - - -
|
||||
LEval_patent_summ - - - -
|
||||
LEval_review_summ - - - -
|
||||
LEval_scientificqa - - - -
|
||||
LEval_tvshow_summ--------- 长文本 LongBench --------- - - - -
|
||||
longbench_lsht - - - -
|
||||
longbench_vcsum - - - -
|
||||
longbench_dureader - - - -
|
||||
longbench_multifieldqa_zh - - - -
|
||||
longbench_passage_retrieval_zh - - - -
|
||||
--------- 单选 自定义数据 --------- - - - -
|
||||
SageBench-exam - - - -
|
||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||
|
||||
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
csv format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
dataset,version,metric,mode,public/qwen2-5-32b-instruct-lora-chatbi@main
|
||||
--------- 考试 Exam ---------,-,-,-,-
|
||||
ceval,-,-,-,-
|
||||
agieval,-,-,-,-
|
||||
mmlu,-,-,-,-
|
||||
GaokaoBench,-,-,-,-
|
||||
ARC-c,-,-,-,-
|
||||
--------- 语言 Language ---------,-,-,-,-
|
||||
WiC,-,-,-,-
|
||||
summedits,-,-,-,-
|
||||
chid-dev,-,-,-,-
|
||||
afqmc-dev,-,-,-,-
|
||||
bustm-dev,-,-,-,-
|
||||
cluewsc-dev,-,-,-,-
|
||||
WSC,-,-,-,-
|
||||
winogrande,-,-,-,-
|
||||
flores_100,-,-,-,-
|
||||
--------- 知识 Knowledge ---------,-,-,-,-
|
||||
BoolQ,-,-,-,-
|
||||
commonsense_qa,-,-,-,-
|
||||
nq,-,-,-,-
|
||||
triviaqa,2121ce,score,gen,57.51
|
||||
--------- 推理 Reasoning ---------,-,-,-,-
|
||||
cmnli,-,-,-,-
|
||||
ocnli,-,-,-,-
|
||||
ocnli_fc-dev,-,-,-,-
|
||||
AX_b,-,-,-,-
|
||||
AX_g,-,-,-,-
|
||||
CB,-,-,-,-
|
||||
RTE,-,-,-,-
|
||||
story_cloze,-,-,-,-
|
||||
COPA,-,-,-,-
|
||||
ReCoRD,-,-,-,-
|
||||
hellaswag,-,-,-,-
|
||||
piqa,-,-,-,-
|
||||
siqa,-,-,-,-
|
||||
strategyqa,-,-,-,-
|
||||
math,-,-,-,-
|
||||
gsm8k,-,-,-,-
|
||||
TheoremQA,-,-,-,-
|
||||
openai_humaneval,-,-,-,-
|
||||
mbpp,-,-,-,-
|
||||
cmmlu,-,-,-,-
|
||||
bbh,-,-,-,-
|
||||
--------- 理解 Understanding ---------,-,-,-,-
|
||||
C3,-,-,-,-
|
||||
CMRC_dev,-,-,-,-
|
||||
DRCD_dev,-,-,-,-
|
||||
MultiRC,-,-,-,-
|
||||
race-middle,-,-,-,-
|
||||
race-high,-,-,-,-
|
||||
openbookqa_fact,-,-,-,-
|
||||
csl_dev,-,-,-,-
|
||||
lcsts,-,-,-,-
|
||||
Xsum,-,-,-,-
|
||||
eprstmt-dev,-,-,-,-
|
||||
lambada,217e11,accuracy,gen,55.73
|
||||
tnews-dev,-,-,-,-
|
||||
--------- 安全 Safety ---------,-,-,-,-
|
||||
crows_pairs,-,-,-,-
|
||||
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
||||
LEval_coursera,-,-,-,-
|
||||
LEval_gsm100,-,-,-,-
|
||||
LEval_quality,-,-,-,-
|
||||
LEval_tpo,-,-,-,-
|
||||
LEval_topic_retrieval,-,-,-,-
|
||||
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
||||
LEval_financialqa,-,-,-,-
|
||||
LEval_gov_report_summ,-,-,-,-
|
||||
LEval_legal_contract_qa,-,-,-,-
|
||||
LEval_meeting_summ,-,-,-,-
|
||||
LEval_multidocqa,-,-,-,-
|
||||
LEval_narrativeqa,-,-,-,-
|
||||
LEval_nq,-,-,-,-
|
||||
LEval_news_summ,-,-,-,-
|
||||
LEval_paper_assistant,-,-,-,-
|
||||
LEval_patent_summ,-,-,-,-
|
||||
LEval_review_summ,-,-,-,-
|
||||
LEval_scientificqa,-,-,-,-
|
||||
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
||||
longbench_lsht,-,-,-,-
|
||||
longbench_vcsum,-,-,-,-
|
||||
longbench_dureader,-,-,-,-
|
||||
longbench_multifieldqa_zh,-,-,-,-
|
||||
longbench_passage_retrieval_zh,-,-,-,-
|
||||
--------- 单选 自定义数据 ---------,-,-,-,-
|
||||
SageBench-exam,-,-,-,-
|
||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||
|
||||
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
raw format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
-------------------------------
|
||||
Model: public/qwen2-5-32b-instruct-lora-chatbi@main
|
||||
lambada: {'accuracy': 55.734523578497964}
|
||||
triviaqa: {'score': 57.50820414167704}
|
||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||
Loading…
Reference in New Issue
Block a user