Compare commits
No commits in common. "main-250725-171730" and "main" have entirely different histories.
main-25072
...
main
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1 +0,0 @@
|
|||||||
*.json filter=lfs diff=lfs merge=lfs -text
|
|
||||||
0
.gitignore
vendored
0
.gitignore
vendored
File diff suppressed because it is too large
Load Diff
@ -1,9 +0,0 @@
|
|||||||
[4pdvGPU Msg(1173:140536949414912:libvgpu.c:873)]: Initializing.....
|
|
||||||
[4pdvGPU Msg(1173:140536949414912:multiprocess_memory_limit.c:144)]: uuid GPU-9a16bbfd-e4c2-d946-8cf6-81879301e66c validated
|
|
||||||
[4pdvGPU Msg(1173:140536949414912:multiprocess_memory_limit.c:144)]: uuid GPU-845ac5d5-6827-1c5e-8cd0-275d4ba08b97 validated
|
|
||||||
[4pdvGPU ERROR (pid:1173 thread=140536949414912 libvgpu.c:924)]: cuInit failed:100
|
|
||||||
07/25 17:25:42 - OpenCompass - ERROR - /models/opencompass/opencompass/tasks/openicl_eval.py - _score - 163 - Task [public/qwen2-5-vl-7b-instruct-awq@main/lambada]: No predictions found.
|
|
||||||
07/25 17:25:42 - OpenCompass - INFO - time elapsed: 1.81s
|
|
||||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
|
||||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
|
||||||
[4pdvGPU Msg(1173:140536949414912:multiprocess_memory_limit.c:543)]: Calling exit handler 1173
|
|
||||||
@ -1,35 +0,0 @@
|
|||||||
[4pdvGPU Msg(371:139792464989184:libvgpu.c:873)]: Initializing.....
|
|
||||||
[4pdvGPU Msg(371:139792464989184:multiprocess_memory_limit.c:144)]: uuid GPU-9a16bbfd-e4c2-d946-8cf6-81879301e66c validated
|
|
||||||
[4pdvGPU Msg(371:139792464989184:multiprocess_memory_limit.c:144)]: uuid GPU-845ac5d5-6827-1c5e-8cd0-275d4ba08b97 validated
|
|
||||||
[4pdvGPU ERROR (pid:371 thread=139792464989184 libvgpu.c:924)]: cuInit failed:100
|
|
||||||
07/25 17:25:33 - OpenCompass - INFO - Task [public/qwen2-5-vl-7b-instruct-awq@main/lambada_0]
|
|
||||||
07/25 17:25:35 - OpenCompass - INFO - Start inferencing [public/qwen2-5-vl-7b-instruct-awq@main/lambada_0]
|
|
||||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
|
||||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
|
||||||
[2025-07-25 17:25:35,709] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
|
|
||||||
0%| | 0/1718 [00:00<?, ?it/s]
0%| | 0/1718 [00:01<?, ?it/s]
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
|
|
||||||
inferencer.run()
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
|
|
||||||
self._inference()
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference
|
|
||||||
inferencer.inference(retriever,
|
|
||||||
File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
|
|
||||||
results = self.model.generate_from_template(
|
|
||||||
File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
|
|
||||||
return self.generate(inputs, max_out_len=max_out_len, **kwargs)
|
|
||||||
File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
|
|
||||||
results = list(
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
|
|
||||||
yield fs.pop().result()
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
|
|
||||||
return self.__get_result()
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
|
|
||||||
raise self._exception
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
|
|
||||||
result = self.fn(*self.args, **self.kwargs)
|
|
||||||
File "/models/opencompass/opencompass/models/openai_api.py", line 250, in _generate
|
|
||||||
raise RuntimeError('Calling OpenAI failed after retrying for '
|
|
||||||
RuntimeError: Calling OpenAI failed after retrying for 2 times. Check the logs for details.
|
|
||||||
[4pdvGPU Msg(371:139792464989184:multiprocess_memory_limit.c:543)]: Calling exit handler 371
|
|
||||||
@ -1,35 +0,0 @@
|
|||||||
[4pdvGPU Msg(368:139714045283328:libvgpu.c:873)]: Initializing.....
|
|
||||||
[4pdvGPU Msg(368:139714045283328:multiprocess_memory_limit.c:144)]: uuid GPU-9a16bbfd-e4c2-d946-8cf6-81879301e66c validated
|
|
||||||
[4pdvGPU Msg(368:139714045283328:multiprocess_memory_limit.c:144)]: uuid GPU-845ac5d5-6827-1c5e-8cd0-275d4ba08b97 validated
|
|
||||||
[4pdvGPU ERROR (pid:368 thread=139714045283328 libvgpu.c:924)]: cuInit failed:100
|
|
||||||
07/25 17:25:33 - OpenCompass - INFO - Task [public/qwen2-5-vl-7b-instruct-awq@main/lambada_1]
|
|
||||||
07/25 17:25:35 - OpenCompass - INFO - Start inferencing [public/qwen2-5-vl-7b-instruct-awq@main/lambada_1]
|
|
||||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
|
||||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
|
||||||
[2025-07-25 17:25:35,713] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
|
|
||||||
0%| | 0/1718 [00:00<?, ?it/s]
0%| | 0/1718 [00:01<?, ?it/s]
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
|
|
||||||
inferencer.run()
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
|
|
||||||
self._inference()
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference
|
|
||||||
inferencer.inference(retriever,
|
|
||||||
File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
|
|
||||||
results = self.model.generate_from_template(
|
|
||||||
File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
|
|
||||||
return self.generate(inputs, max_out_len=max_out_len, **kwargs)
|
|
||||||
File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
|
|
||||||
results = list(
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
|
|
||||||
yield fs.pop().result()
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
|
|
||||||
return self.__get_result()
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
|
|
||||||
raise self._exception
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
|
|
||||||
result = self.fn(*self.args, **self.kwargs)
|
|
||||||
File "/models/opencompass/opencompass/models/openai_api.py", line 250, in _generate
|
|
||||||
raise RuntimeError('Calling OpenAI failed after retrying for '
|
|
||||||
RuntimeError: Calling OpenAI failed after retrying for 2 times. Check the logs for details.
|
|
||||||
[4pdvGPU Msg(368:139714045283328:multiprocess_memory_limit.c:543)]: Calling exit handler 368
|
|
||||||
@ -1,35 +0,0 @@
|
|||||||
[4pdvGPU Msg(372:140311500876800:libvgpu.c:873)]: Initializing.....
|
|
||||||
[4pdvGPU Msg(372:140311500876800:multiprocess_memory_limit.c:144)]: uuid GPU-9a16bbfd-e4c2-d946-8cf6-81879301e66c validated
|
|
||||||
[4pdvGPU Msg(372:140311500876800:multiprocess_memory_limit.c:144)]: uuid GPU-845ac5d5-6827-1c5e-8cd0-275d4ba08b97 validated
|
|
||||||
[4pdvGPU ERROR (pid:372 thread=140311500876800 libvgpu.c:924)]: cuInit failed:100
|
|
||||||
07/25 17:25:33 - OpenCompass - INFO - Task [public/qwen2-5-vl-7b-instruct-awq@main/lambada_2]
|
|
||||||
07/25 17:25:35 - OpenCompass - INFO - Start inferencing [public/qwen2-5-vl-7b-instruct-awq@main/lambada_2]
|
|
||||||
/opt/conda/lib/python3.8/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning
|
|
||||||
warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
|
|
||||||
[2025-07-25 17:25:35,506] [opencompass.openicl.icl_inferencer.icl_gen_inferencer] [INFO] Starting inference process...
|
|
||||||
0%| | 0/1717 [00:00<?, ?it/s]
0%| | 0/1717 [00:01<?, ?it/s]
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 147, in <module>
|
|
||||||
inferencer.run()
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 76, in run
|
|
||||||
self._inference()
|
|
||||||
File "/models/opencompass/opencompass/tasks/openicl_infer.py", line 119, in _inference
|
|
||||||
inferencer.inference(retriever,
|
|
||||||
File "/models/opencompass/opencompass/openicl/icl_inferencer/icl_gen_inferencer.py", line 122, in inference
|
|
||||||
results = self.model.generate_from_template(
|
|
||||||
File "/models/opencompass/opencompass/models/base.py", line 117, in generate_from_template
|
|
||||||
return self.generate(inputs, max_out_len=max_out_len, **kwargs)
|
|
||||||
File "/models/opencompass/opencompass/models/openai_api.py", line 123, in generate
|
|
||||||
results = list(
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
|
|
||||||
yield fs.pop().result()
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 444, in result
|
|
||||||
return self.__get_result()
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
|
|
||||||
raise self._exception
|
|
||||||
File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
|
|
||||||
result = self.fn(*self.args, **self.kwargs)
|
|
||||||
File "/models/opencompass/opencompass/models/openai_api.py", line 250, in _generate
|
|
||||||
raise RuntimeError('Calling OpenAI failed after retrying for '
|
|
||||||
RuntimeError: Calling OpenAI failed after retrying for 2 times. Check the logs for details.
|
|
||||||
[4pdvGPU Msg(372:140311500876800:multiprocess_memory_limit.c:543)]: Calling exit handler 372
|
|
||||||
@ -1,87 +0,0 @@
|
|||||||
dataset,version,metric,mode,public/qwen2-5-vl-7b-instruct-awq@main
|
|
||||||
--------- 考试 Exam ---------,-,-,-,-
|
|
||||||
ceval,-,-,-,-
|
|
||||||
agieval,-,-,-,-
|
|
||||||
mmlu,-,-,-,-
|
|
||||||
GaokaoBench,-,-,-,-
|
|
||||||
ARC-c,-,-,-,-
|
|
||||||
--------- 语言 Language ---------,-,-,-,-
|
|
||||||
WiC,-,-,-,-
|
|
||||||
summedits,-,-,-,-
|
|
||||||
chid-dev,-,-,-,-
|
|
||||||
afqmc-dev,-,-,-,-
|
|
||||||
bustm-dev,-,-,-,-
|
|
||||||
cluewsc-dev,-,-,-,-
|
|
||||||
WSC,-,-,-,-
|
|
||||||
winogrande,-,-,-,-
|
|
||||||
flores_100,-,-,-,-
|
|
||||||
--------- 知识 Knowledge ---------,-,-,-,-
|
|
||||||
BoolQ,-,-,-,-
|
|
||||||
commonsense_qa,-,-,-,-
|
|
||||||
nq,-,-,-,-
|
|
||||||
triviaqa,-,-,-,-
|
|
||||||
--------- 推理 Reasoning ---------,-,-,-,-
|
|
||||||
cmnli,-,-,-,-
|
|
||||||
ocnli,-,-,-,-
|
|
||||||
ocnli_fc-dev,-,-,-,-
|
|
||||||
AX_b,-,-,-,-
|
|
||||||
AX_g,-,-,-,-
|
|
||||||
CB,-,-,-,-
|
|
||||||
RTE,-,-,-,-
|
|
||||||
story_cloze,-,-,-,-
|
|
||||||
COPA,-,-,-,-
|
|
||||||
ReCoRD,-,-,-,-
|
|
||||||
hellaswag,-,-,-,-
|
|
||||||
piqa,-,-,-,-
|
|
||||||
siqa,-,-,-,-
|
|
||||||
strategyqa,-,-,-,-
|
|
||||||
math,-,-,-,-
|
|
||||||
gsm8k,-,-,-,-
|
|
||||||
TheoremQA,-,-,-,-
|
|
||||||
openai_humaneval,-,-,-,-
|
|
||||||
mbpp,-,-,-,-
|
|
||||||
cmmlu,-,-,-,-
|
|
||||||
bbh,-,-,-,-
|
|
||||||
--------- 理解 Understanding ---------,-,-,-,-
|
|
||||||
C3,-,-,-,-
|
|
||||||
CMRC_dev,-,-,-,-
|
|
||||||
DRCD_dev,-,-,-,-
|
|
||||||
MultiRC,-,-,-,-
|
|
||||||
race-middle,-,-,-,-
|
|
||||||
race-high,-,-,-,-
|
|
||||||
openbookqa_fact,-,-,-,-
|
|
||||||
csl_dev,-,-,-,-
|
|
||||||
lcsts,-,-,-,-
|
|
||||||
Xsum,-,-,-,-
|
|
||||||
eprstmt-dev,-,-,-,-
|
|
||||||
lambada,-,-,-,-
|
|
||||||
tnews-dev,-,-,-,-
|
|
||||||
--------- 安全 Safety ---------,-,-,-,-
|
|
||||||
crows_pairs,-,-,-,-
|
|
||||||
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
|
||||||
LEval_coursera,-,-,-,-
|
|
||||||
LEval_gsm100,-,-,-,-
|
|
||||||
LEval_quality,-,-,-,-
|
|
||||||
LEval_tpo,-,-,-,-
|
|
||||||
LEval_topic_retrieval,-,-,-,-
|
|
||||||
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
|
||||||
LEval_financialqa,-,-,-,-
|
|
||||||
LEval_gov_report_summ,-,-,-,-
|
|
||||||
LEval_legal_contract_qa,-,-,-,-
|
|
||||||
LEval_meeting_summ,-,-,-,-
|
|
||||||
LEval_multidocqa,-,-,-,-
|
|
||||||
LEval_narrativeqa,-,-,-,-
|
|
||||||
LEval_nq,-,-,-,-
|
|
||||||
LEval_news_summ,-,-,-,-
|
|
||||||
LEval_paper_assistant,-,-,-,-
|
|
||||||
LEval_patent_summ,-,-,-,-
|
|
||||||
LEval_review_summ,-,-,-,-
|
|
||||||
LEval_scientificqa,-,-,-,-
|
|
||||||
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
|
||||||
longbench_lsht,-,-,-,-
|
|
||||||
longbench_vcsum,-,-,-,-
|
|
||||||
longbench_dureader,-,-,-,-
|
|
||||||
longbench_multifieldqa_zh,-,-,-,-
|
|
||||||
longbench_passage_retrieval_zh,-,-,-,-
|
|
||||||
--------- 单选 自定义数据 ---------,-,-,-,-
|
|
||||||
SageBench-exam,-,-,-,-
|
|
||||||
|
@ -1,193 +0,0 @@
|
|||||||
20250725_172527
|
|
||||||
tabulate format
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
dataset version metric mode public/qwen2-5-vl-7b-instruct-awq@main
|
|
||||||
----------------------------------------------------- --------- -------- ------ ----------------------------------------
|
|
||||||
--------- 考试 Exam --------- - - - -
|
|
||||||
ceval - - - -
|
|
||||||
agieval - - - -
|
|
||||||
mmlu - - - -
|
|
||||||
GaokaoBench - - - -
|
|
||||||
ARC-c - - - -
|
|
||||||
--------- 语言 Language --------- - - - -
|
|
||||||
WiC - - - -
|
|
||||||
summedits - - - -
|
|
||||||
chid-dev - - - -
|
|
||||||
afqmc-dev - - - -
|
|
||||||
bustm-dev - - - -
|
|
||||||
cluewsc-dev - - - -
|
|
||||||
WSC - - - -
|
|
||||||
winogrande - - - -
|
|
||||||
flores_100 - - - -
|
|
||||||
--------- 知识 Knowledge --------- - - - -
|
|
||||||
BoolQ - - - -
|
|
||||||
commonsense_qa - - - -
|
|
||||||
nq - - - -
|
|
||||||
triviaqa - - - -
|
|
||||||
--------- 推理 Reasoning --------- - - - -
|
|
||||||
cmnli - - - -
|
|
||||||
ocnli - - - -
|
|
||||||
ocnli_fc-dev - - - -
|
|
||||||
AX_b - - - -
|
|
||||||
AX_g - - - -
|
|
||||||
CB - - - -
|
|
||||||
RTE - - - -
|
|
||||||
story_cloze - - - -
|
|
||||||
COPA - - - -
|
|
||||||
ReCoRD - - - -
|
|
||||||
hellaswag - - - -
|
|
||||||
piqa - - - -
|
|
||||||
siqa - - - -
|
|
||||||
strategyqa - - - -
|
|
||||||
math - - - -
|
|
||||||
gsm8k - - - -
|
|
||||||
TheoremQA - - - -
|
|
||||||
openai_humaneval - - - -
|
|
||||||
mbpp - - - -
|
|
||||||
cmmlu - - - -
|
|
||||||
bbh - - - -
|
|
||||||
--------- 理解 Understanding --------- - - - -
|
|
||||||
C3 - - - -
|
|
||||||
CMRC_dev - - - -
|
|
||||||
DRCD_dev - - - -
|
|
||||||
MultiRC - - - -
|
|
||||||
race-middle - - - -
|
|
||||||
race-high - - - -
|
|
||||||
openbookqa_fact - - - -
|
|
||||||
csl_dev - - - -
|
|
||||||
lcsts - - - -
|
|
||||||
Xsum - - - -
|
|
||||||
eprstmt-dev - - - -
|
|
||||||
lambada - - - -
|
|
||||||
tnews-dev - - - -
|
|
||||||
--------- 安全 Safety --------- - - - -
|
|
||||||
crows_pairs - - - -
|
|
||||||
--------- LEval Exact Match (Acc) --------- - - - -
|
|
||||||
LEval_coursera - - - -
|
|
||||||
LEval_gsm100 - - - -
|
|
||||||
LEval_quality - - - -
|
|
||||||
LEval_tpo - - - -
|
|
||||||
LEval_topic_retrieval - - - -
|
|
||||||
--------- LEval Gen (ROUGE) --------- - - - -
|
|
||||||
LEval_financialqa - - - -
|
|
||||||
LEval_gov_report_summ - - - -
|
|
||||||
LEval_legal_contract_qa - - - -
|
|
||||||
LEval_meeting_summ - - - -
|
|
||||||
LEval_multidocqa - - - -
|
|
||||||
LEval_narrativeqa - - - -
|
|
||||||
LEval_nq - - - -
|
|
||||||
LEval_news_summ - - - -
|
|
||||||
LEval_paper_assistant - - - -
|
|
||||||
LEval_patent_summ - - - -
|
|
||||||
LEval_review_summ - - - -
|
|
||||||
LEval_scientificqa - - - -
|
|
||||||
LEval_tvshow_summ--------- 长文本 LongBench --------- - - - -
|
|
||||||
longbench_lsht - - - -
|
|
||||||
longbench_vcsum - - - -
|
|
||||||
longbench_dureader - - - -
|
|
||||||
longbench_multifieldqa_zh - - - -
|
|
||||||
longbench_passage_retrieval_zh - - - -
|
|
||||||
--------- 单选 自定义数据 --------- - - - -
|
|
||||||
SageBench-exam - - - -
|
|
||||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
csv format
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
dataset,version,metric,mode,public/qwen2-5-vl-7b-instruct-awq@main
|
|
||||||
--------- 考试 Exam ---------,-,-,-,-
|
|
||||||
ceval,-,-,-,-
|
|
||||||
agieval,-,-,-,-
|
|
||||||
mmlu,-,-,-,-
|
|
||||||
GaokaoBench,-,-,-,-
|
|
||||||
ARC-c,-,-,-,-
|
|
||||||
--------- 语言 Language ---------,-,-,-,-
|
|
||||||
WiC,-,-,-,-
|
|
||||||
summedits,-,-,-,-
|
|
||||||
chid-dev,-,-,-,-
|
|
||||||
afqmc-dev,-,-,-,-
|
|
||||||
bustm-dev,-,-,-,-
|
|
||||||
cluewsc-dev,-,-,-,-
|
|
||||||
WSC,-,-,-,-
|
|
||||||
winogrande,-,-,-,-
|
|
||||||
flores_100,-,-,-,-
|
|
||||||
--------- 知识 Knowledge ---------,-,-,-,-
|
|
||||||
BoolQ,-,-,-,-
|
|
||||||
commonsense_qa,-,-,-,-
|
|
||||||
nq,-,-,-,-
|
|
||||||
triviaqa,-,-,-,-
|
|
||||||
--------- 推理 Reasoning ---------,-,-,-,-
|
|
||||||
cmnli,-,-,-,-
|
|
||||||
ocnli,-,-,-,-
|
|
||||||
ocnli_fc-dev,-,-,-,-
|
|
||||||
AX_b,-,-,-,-
|
|
||||||
AX_g,-,-,-,-
|
|
||||||
CB,-,-,-,-
|
|
||||||
RTE,-,-,-,-
|
|
||||||
story_cloze,-,-,-,-
|
|
||||||
COPA,-,-,-,-
|
|
||||||
ReCoRD,-,-,-,-
|
|
||||||
hellaswag,-,-,-,-
|
|
||||||
piqa,-,-,-,-
|
|
||||||
siqa,-,-,-,-
|
|
||||||
strategyqa,-,-,-,-
|
|
||||||
math,-,-,-,-
|
|
||||||
gsm8k,-,-,-,-
|
|
||||||
TheoremQA,-,-,-,-
|
|
||||||
openai_humaneval,-,-,-,-
|
|
||||||
mbpp,-,-,-,-
|
|
||||||
cmmlu,-,-,-,-
|
|
||||||
bbh,-,-,-,-
|
|
||||||
--------- 理解 Understanding ---------,-,-,-,-
|
|
||||||
C3,-,-,-,-
|
|
||||||
CMRC_dev,-,-,-,-
|
|
||||||
DRCD_dev,-,-,-,-
|
|
||||||
MultiRC,-,-,-,-
|
|
||||||
race-middle,-,-,-,-
|
|
||||||
race-high,-,-,-,-
|
|
||||||
openbookqa_fact,-,-,-,-
|
|
||||||
csl_dev,-,-,-,-
|
|
||||||
lcsts,-,-,-,-
|
|
||||||
Xsum,-,-,-,-
|
|
||||||
eprstmt-dev,-,-,-,-
|
|
||||||
lambada,-,-,-,-
|
|
||||||
tnews-dev,-,-,-,-
|
|
||||||
--------- 安全 Safety ---------,-,-,-,-
|
|
||||||
crows_pairs,-,-,-,-
|
|
||||||
--------- LEval Exact Match (Acc) ---------,-,-,-,-
|
|
||||||
LEval_coursera,-,-,-,-
|
|
||||||
LEval_gsm100,-,-,-,-
|
|
||||||
LEval_quality,-,-,-,-
|
|
||||||
LEval_tpo,-,-,-,-
|
|
||||||
LEval_topic_retrieval,-,-,-,-
|
|
||||||
--------- LEval Gen (ROUGE) ---------,-,-,-,-
|
|
||||||
LEval_financialqa,-,-,-,-
|
|
||||||
LEval_gov_report_summ,-,-,-,-
|
|
||||||
LEval_legal_contract_qa,-,-,-,-
|
|
||||||
LEval_meeting_summ,-,-,-,-
|
|
||||||
LEval_multidocqa,-,-,-,-
|
|
||||||
LEval_narrativeqa,-,-,-,-
|
|
||||||
LEval_nq,-,-,-,-
|
|
||||||
LEval_news_summ,-,-,-,-
|
|
||||||
LEval_paper_assistant,-,-,-,-
|
|
||||||
LEval_patent_summ,-,-,-,-
|
|
||||||
LEval_review_summ,-,-,-,-
|
|
||||||
LEval_scientificqa,-,-,-,-
|
|
||||||
LEval_tvshow_summ--------- 长文本 LongBench ---------,-,-,-,-
|
|
||||||
longbench_lsht,-,-,-,-
|
|
||||||
longbench_vcsum,-,-,-,-
|
|
||||||
longbench_dureader,-,-,-,-
|
|
||||||
longbench_multifieldqa_zh,-,-,-,-
|
|
||||||
longbench_passage_retrieval_zh,-,-,-,-
|
|
||||||
--------- 单选 自定义数据 ---------,-,-,-,-
|
|
||||||
SageBench-exam,-,-,-,-
|
|
||||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------------------------------------------------------- THIS IS A DIVIDER --------------------------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
raw format
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
-------------------------------
|
|
||||||
Model: public/qwen2-5-vl-7b-instruct-awq@main
|
|
||||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
|
||||||
Loading…
Reference in New Issue
Block a user