Compare commits
No commits in common. "main-260227-152208" and "main" have entirely different histories.
main-26022
...
main
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1 +0,0 @@
|
||||
*.json filter=lfs diff=lfs merge=lfs -text
|
||||
0
.gitignore
vendored
0
.gitignore
vendored
File diff suppressed because it is too large
Load Diff
@ -1,7 +0,0 @@
|
||||
[RISE-CORE Msg(9221:139863709072384:libvgpu.c:901)]: Initializing.....
|
||||
[RISE-CORE ERROR (pid:9221 thread=139863709072384 libvgpu.c:958)]: cuInit failed:100
|
||||
02/27 15:40:27 - OpenCompass - ERROR - /models/opencompass/opencompass/tasks/openicl_eval.py - _score - 163 - Task [public/qwen3-0-6b@main/GaokaoBench_2010-2013_English_MCQs]: No predictions found.
|
||||
02/27 15:40:27 - OpenCompass - INFO - time elapsed: 2.33s
|
||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
||||
[RISE-CORE Msg(9221:139863709072384:multiprocess_memory_limit.c:504)]: Calling exit handler 9221
|
||||
@ -1,7 +0,0 @@
|
||||
[RISE-CORE Msg(9034:140205280242688:libvgpu.c:901)]: Initializing.....
|
||||
[RISE-CORE ERROR (pid:9034 thread=140205280242688 libvgpu.c:958)]: cuInit failed:100
|
||||
02/27 15:40:16 - OpenCompass - ERROR - /models/opencompass/opencompass/tasks/openicl_eval.py - _score - 163 - Task [public/qwen3-0-6b@main/lambada]: No predictions found.
|
||||
02/27 15:40:16 - OpenCompass - INFO - time elapsed: 2.19s
|
||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
||||
[RISE-CORE Msg(9034:140205280242688:multiprocess_memory_limit.c:504)]: Calling exit handler 9034
|
||||
@ -1,7 +0,0 @@
|
||||
[RISE-CORE Msg(9219:140549146053632:libvgpu.c:901)]: Initializing.....
|
||||
[RISE-CORE ERROR (pid:9219 thread=140549146053632 libvgpu.c:958)]: cuInit failed:100
|
||||
02/27 15:40:27 - OpenCompass - ERROR - /models/opencompass/opencompass/tasks/openicl_eval.py - _score - 163 - Task [public/qwen3-0-6b@main/triviaqa]: No predictions found.
|
||||
02/27 15:40:27 - OpenCompass - INFO - time elapsed: 3.06s
|
||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
||||
[RISE-CORE Msg(9219:140549146053632:multiprocess_memory_limit.c:504)]: Calling exit handler 9219
|
||||
@ -1,33 +0,0 @@
|
||||
[RISE-CORE Msg(8836:140195494972416:libvgpu.c:901)]: Initializing.....
|
||||
[RISE-CORE ERROR (pid:8836 thread=140195494972416 libvgpu.c:958)]: cuInit failed:100
|
||||
02/27 15:39:45 - OpenCompass - INFO - Task [public/qwen3-0-6b@main/GaokaoBench_2010-2013_English_MCQs]
|
||||
02/27 15:39:47 - OpenCompass - INFO - Start inferencing [public/qwen3-0-6b@main/GaokaoBench_2010-2013_English_MCQs]
|
||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
||||
[2026-02-27 15:39:47,913] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
|
||||
0%| | 0/105 [00:00<?, ?it/s]
0%| | 0/105 [00:01<?, ?it/s]
|
||||
Traceback (most recent call last):
|
||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
|
||||
inferencer.run()
|
||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
|
||||
self._inference()
|
||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 124, in _inference
|
||||
inferencer.inference(retriever,
|
||||
File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
|
||||
results = self.model.generate_from_template(
|
||||
File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
|
||||
return self.generate(inputs, max_out_len=max_out_len, **kwargs)
|
||||
File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
|
||||
results = list(
|
||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
|
||||
yield fs.pop().result()
|
||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
|
||||
return self.__get_result()
|
||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
|
||||
raise self._exception
|
||||
File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
|
||||
result = self.fn(*self.args, **self.kwargs)
|
||||
File "/models/opencompass/opencompass/models/openai_api.py", line 250, in _generate
|
||||
raise RuntimeError('Calling OpenAI failed after retrying for '
|
||||
RuntimeError: Calling OpenAI failed after retrying for 2 times. Check the logs for details.
|
||||
[RISE-CORE Msg(8836:140195494972416:multiprocess_memory_limit.c:504)]: Calling exit handler 8836
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
BIN
predictions/public/qwen3-0-6b@main/tmp_lambada_0.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_lambada_0.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_lambada_1.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_lambada_1.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_lambada_2.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_lambada_2.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_0.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_0.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_1.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_1.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_2.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_2.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_3.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_3.json
(Stored with Git LFS)
Binary file not shown.
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_4.json
(Stored with Git LFS)
BIN
predictions/public/qwen3-0-6b@main/tmp_triviaqa_4.json
(Stored with Git LFS)
Binary file not shown.
@ -1,87 +0,0 @@
|
||||
dataset,version,metric,mode,public/qwen3-0-6b@main
|
||||
--------- 考试 Exam ---------,-,-,-,-
|
||||
ceval,-,-,-,-
|
||||
agieval,-,-,-,-
|
||||
mmlu,-,-,-,-
|
||||
GaokaoBench,-,-,-,-
|
||||
ARC-c,-,-,-,-
|
||||
--------- 语言 Language ---------,-,-,-,-
|
||||
WiC,-,-,-,-
|
||||
summedits,-,-,-,-
|
||||
chid-dev,-,-,-,-
|
||||
afqmc-dev,-,-,-,-
|
||||
bustm-dev,-,-,-,-
|
||||
cluewsc-dev,-,-,-,-
|
||||
WSC,-,-,-,-
|
||||
winogrande,-,-,-,-
|
||||
flores_100,-,-,-,-
|
||||
--------- 知识 Knowledge ---------,-,-,-,-
|
||||
BoolQ,-,-,-,-
|
||||
commonsense_qa,-,-,-,-
|
||||
nq,-,-,-,-
|
||||
triviaqa,-,-,-,-
|
||||
--------- 推理 Reasoning ---------,-,-,-,-
|
||||
cmnli,-,-,-,-
|
||||
ocnli,-,-,-,-
|
||||
ocnli_fc-dev,-,-,-,-
|
||||
AX_b,-,-,-,-
|
||||
AX_g,-,-,-,-
|
||||
CB,-,-,-,-
|
||||
RTE,-,-,-,-
|
||||
story_cloze,-,-,-,-
|
||||
COPA,-,-,-,-
|
||||
ReCoRD,-,-,-,-
|
||||
hellaswag,-,-,-,-
|
||||
piqa,-,-,-,-
|
||||
siqa,-,-,-,-
|
||||
strategyqa,-,-,-,-
|
||||
math,-,-,-,-
|
||||
gsm8k,-,-,-,-
|
||||
TheoremQA,-,-,-,-
|
||||
openai_humaneval,-,-,-,-
|
||||
mbpp,-,-,-,-
|
||||
cmmlu,-,-,-,-
|
||||
bbh,-,-,-,-
|
||||
--------- 理解 Understanding ---------,-,-,-,-
|
||||
C3,-,-,-,-
|
||||
CMRC_dev,-,-,-,-
|
||||
DRCD_dev,-,-,-,-
|
||||
MultiRC,-,-,-,-
|
||||
race-middle,-,-,-,-
|
||||
race-high,-,-,-,-
|
||||
openbookqa_fact,-,-,-,-
|
||||
csl_dev,-,-,-,-
|
||||
lcsts,-,-,-,-
|
||||
Xsum,-,-,-,-
|
||||
eprstmt-dev,-,-,-,-
|
||||
lambada,-,-,-,-
|
||||
tnews-dev,-,-,-,-
|
||||
--------- 安全 Safety ---------,-,-,-,-
|
||||
crows_pairs,-,-,-,-
|
||||
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
||||
LEval_coursera,-,-,-,-
|
||||
LEval_gsm100,-,-,-,-
|
||||
LEval_quality,-,-,-,-
|
||||
LEval_tpo,-,-,-,-
|
||||
LEval_topic_retrieval,-,-,-,-
|
||||
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
||||
LEval_financialqa,-,-,-,-
|
||||
LEval_gov_report_summ,-,-,-,-
|
||||
LEval_legal_contract_qa,-,-,-,-
|
||||
LEval_meeting_summ,-,-,-,-
|
||||
LEval_multidocqa,-,-,-,-
|
||||
LEval_narrativeqa,-,-,-,-
|
||||
LEval_nq,-,-,-,-
|
||||
LEval_news_summ,-,-,-,-
|
||||
LEval_paper_assistant,-,-,-,-
|
||||
LEval_patent_summ,-,-,-,-
|
||||
LEval_review_summ,-,-,-,-
|
||||
LEval_scientificqa,-,-,-,-
|
||||
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
||||
longbench_lsht,-,-,-,-
|
||||
longbench_vcsum,-,-,-,-
|
||||
longbench_dureader,-,-,-,-
|
||||
longbench_multifieldqa_zh,-,-,-,-
|
||||
longbench_passage_retrieval_zh,-,-,-,-
|
||||
--------- 单选 自定义数据 ---------,-,-,-,-
|
||||
SageBench-exam,-,-,-,-
|
||||
|
@ -1,193 +0,0 @@
|
||||
20260227_152425
|
||||
tabulate format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
dataset version metric mode public/qwen3-0-6b@main
|
||||
----------------------------------------------------- --------- -------- ------ ------------------------
|
||||
--------- 考试 Exam --------- - - - -
|
||||
ceval - - - -
|
||||
agieval - - - -
|
||||
mmlu - - - -
|
||||
GaokaoBench - - - -
|
||||
ARC-c - - - -
|
||||
--------- 语言 Language --------- - - - -
|
||||
WiC - - - -
|
||||
summedits - - - -
|
||||
chid-dev - - - -
|
||||
afqmc-dev - - - -
|
||||
bustm-dev - - - -
|
||||
cluewsc-dev - - - -
|
||||
WSC - - - -
|
||||
winogrande - - - -
|
||||
flores_100 - - - -
|
||||
--------- 知识 Knowledge --------- - - - -
|
||||
BoolQ - - - -
|
||||
commonsense_qa - - - -
|
||||
nq - - - -
|
||||
triviaqa - - - -
|
||||
--------- 推理 Reasoning --------- - - - -
|
||||
cmnli - - - -
|
||||
ocnli - - - -
|
||||
ocnli_fc-dev - - - -
|
||||
AX_b - - - -
|
||||
AX_g - - - -
|
||||
CB - - - -
|
||||
RTE - - - -
|
||||
story_cloze - - - -
|
||||
COPA - - - -
|
||||
ReCoRD - - - -
|
||||
hellaswag - - - -
|
||||
piqa - - - -
|
||||
siqa - - - -
|
||||
strategyqa - - - -
|
||||
math - - - -
|
||||
gsm8k - - - -
|
||||
TheoremQA - - - -
|
||||
openai_humaneval - - - -
|
||||
mbpp - - - -
|
||||
cmmlu - - - -
|
||||
bbh - - - -
|
||||
--------- 理解 Understanding --------- - - - -
|
||||
C3 - - - -
|
||||
CMRC_dev - - - -
|
||||
DRCD_dev - - - -
|
||||
MultiRC - - - -
|
||||
race-middle - - - -
|
||||
race-high - - - -
|
||||
openbookqa_fact - - - -
|
||||
csl_dev - - - -
|
||||
lcsts - - - -
|
||||
Xsum - - - -
|
||||
eprstmt-dev - - - -
|
||||
lambada - - - -
|
||||
tnews-dev - - - -
|
||||
--------- 安全 Safety --------- - - - -
|
||||
crows_pairs - - - -
|
||||
--------- LEval Exact Match (Acc) --------- - - - -
|
||||
LEval_coursera - - - -
|
||||
LEval_gsm100 - - - -
|
||||
LEval_quality - - - -
|
||||
LEval_tpo - - - -
|
||||
LEval_topic_retrieval - - - -
|
||||
--------- LEval Gen (ROUGE) --------- - - - -
|
||||
LEval_financialqa - - - -
|
||||
LEval_gov_report_summ - - - -
|
||||
LEval_legal_contract_qa - - - -
|
||||
LEval_meeting_summ - - - -
|
||||
LEval_multidocqa - - - -
|
||||
LEval_narrativeqa - - - -
|
||||
LEval_nq - - - -
|
||||
LEval_news_summ - - - -
|
||||
LEval_paper_assistant - - - -
|
||||
LEval_patent_summ - - - -
|
||||
LEval_review_summ - - - -
|
||||
LEval_scientificqa - - - -
|
||||
LEval_tvshow_summ--------- 长文本 LongBench --------- - - - -
|
||||
longbench_lsht - - - -
|
||||
longbench_vcsum - - - -
|
||||
longbench_dureader - - - -
|
||||
longbench_multifieldqa_zh - - - -
|
||||
longbench_passage_retrieval_zh - - - -
|
||||
--------- 单选 自定义数据 --------- - - - -
|
||||
SageBench-exam - - - -
|
||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||
|
||||
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
csv format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
dataset,version,metric,mode,public/qwen3-0-6b@main
|
||||
--------- 考试 Exam ---------,-,-,-,-
|
||||
ceval,-,-,-,-
|
||||
agieval,-,-,-,-
|
||||
mmlu,-,-,-,-
|
||||
GaokaoBench,-,-,-,-
|
||||
ARC-c,-,-,-,-
|
||||
--------- 语言 Language ---------,-,-,-,-
|
||||
WiC,-,-,-,-
|
||||
summedits,-,-,-,-
|
||||
chid-dev,-,-,-,-
|
||||
afqmc-dev,-,-,-,-
|
||||
bustm-dev,-,-,-,-
|
||||
cluewsc-dev,-,-,-,-
|
||||
WSC,-,-,-,-
|
||||
winogrande,-,-,-,-
|
||||
flores_100,-,-,-,-
|
||||
--------- 知识 Knowledge ---------,-,-,-,-
|
||||
BoolQ,-,-,-,-
|
||||
commonsense_qa,-,-,-,-
|
||||
nq,-,-,-,-
|
||||
triviaqa,-,-,-,-
|
||||
--------- 推理 Reasoning ---------,-,-,-,-
|
||||
cmnli,-,-,-,-
|
||||
ocnli,-,-,-,-
|
||||
ocnli_fc-dev,-,-,-,-
|
||||
AX_b,-,-,-,-
|
||||
AX_g,-,-,-,-
|
||||
CB,-,-,-,-
|
||||
RTE,-,-,-,-
|
||||
story_cloze,-,-,-,-
|
||||
COPA,-,-,-,-
|
||||
ReCoRD,-,-,-,-
|
||||
hellaswag,-,-,-,-
|
||||
piqa,-,-,-,-
|
||||
siqa,-,-,-,-
|
||||
strategyqa,-,-,-,-
|
||||
math,-,-,-,-
|
||||
gsm8k,-,-,-,-
|
||||
TheoremQA,-,-,-,-
|
||||
openai_humaneval,-,-,-,-
|
||||
mbpp,-,-,-,-
|
||||
cmmlu,-,-,-,-
|
||||
bbh,-,-,-,-
|
||||
--------- 理解 Understanding ---------,-,-,-,-
|
||||
C3,-,-,-,-
|
||||
CMRC_dev,-,-,-,-
|
||||
DRCD_dev,-,-,-,-
|
||||
MultiRC,-,-,-,-
|
||||
race-middle,-,-,-,-
|
||||
race-high,-,-,-,-
|
||||
openbookqa_fact,-,-,-,-
|
||||
csl_dev,-,-,-,-
|
||||
lcsts,-,-,-,-
|
||||
Xsum,-,-,-,-
|
||||
eprstmt-dev,-,-,-,-
|
||||
lambada,-,-,-,-
|
||||
tnews-dev,-,-,-,-
|
||||
--------- 安全 Safety ---------,-,-,-,-
|
||||
crows_pairs,-,-,-,-
|
||||
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
||||
LEval_coursera,-,-,-,-
|
||||
LEval_gsm100,-,-,-,-
|
||||
LEval_quality,-,-,-,-
|
||||
LEval_tpo,-,-,-,-
|
||||
LEval_topic_retrieval,-,-,-,-
|
||||
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
||||
LEval_financialqa,-,-,-,-
|
||||
LEval_gov_report_summ,-,-,-,-
|
||||
LEval_legal_contract_qa,-,-,-,-
|
||||
LEval_meeting_summ,-,-,-,-
|
||||
LEval_multidocqa,-,-,-,-
|
||||
LEval_narrativeqa,-,-,-,-
|
||||
LEval_nq,-,-,-,-
|
||||
LEval_news_summ,-,-,-,-
|
||||
LEval_paper_assistant,-,-,-,-
|
||||
LEval_patent_summ,-,-,-,-
|
||||
LEval_review_summ,-,-,-,-
|
||||
LEval_scientificqa,-,-,-,-
|
||||
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
||||
longbench_lsht,-,-,-,-
|
||||
longbench_vcsum,-,-,-,-
|
||||
longbench_dureader,-,-,-,-
|
||||
longbench_multifieldqa_zh,-,-,-,-
|
||||
longbench_passage_retrieval_zh,-,-,-,-
|
||||
--------- 单选 自定义数据 ---------,-,-,-,-
|
||||
SageBench-exam,-,-,-,-
|
||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||
|
||||
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
raw format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
-------------------------------
|
||||
Model: public/qwen3-0-6b@main
|
||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||
Loading…
Reference in New Issue
Block a user