dataset-opencompass/configs/datasets/tydiqa/tydiqa_gen_978d2a.py

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TydiQADataset, TydiQAEvaluator

# All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict(
    input_columns=["passage_text", "question_text"],
    output_column="answer",
    test_split='validation',
    train_split='validation',)

langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']

prefixs_prompt = {
    "english": ("Answer the following question based on the information in the given passage.", "Passage:", "Question:", "Answer:"),
    "arabic": ("أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.", "المقطع:", "السؤال:", "الإجابة:"),
    "bengali": ("প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।", "অধ্যায়:", "প্রশ্ন:", "উত্তর:"),
    "finnish": ("Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.", "Kappale:", "Kysymys:", "Vastaus:"),
    "indonesian": ("Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.", "Bagian:", "Pertanyaan:", "Jawaban:"),
    "korean": ("주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.", "문단:", "질문:", "답변:"),
    "japanese":("文脈に基づいて質問に答えてください。","ぶんしょう:","しつもん:", "かいとう:"),
    "russian": ("Ответьте на следующий вопрос на основе информации в данном отрывке.", "Отрывок:", "Вопрос:", "Ответ:"),
    "swahili": ("Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.", "Kifungu:", "Swali:", "Jibu:"),
    "telugu": ("ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.", "పేరా:", "ప్రశ్న:", "సమాధానం:"),
    "thai":("ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:", "ตอนข้อความ:", "คำถาม:", "คำตอบ:")
}

tydiqa_datasets = []
for _lang in langs:
    _hint = prefixs_prompt[_lang]
    tydiqa_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,
            template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
            ice_token='</E>'),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer), max_out_len=50)

    tydiqa_eval_cfg = dict(evaluator=dict(type=TydiQAEvaluator),
                        ds_split='validation',
                        ds_column='answer',
                        )
    tydiqa_datasets.append(
    dict(abbr=f'tyidqa-goldp_{_lang}',
        type=TydiQADataset,
        path='khalidalt/tydiqa-goldp',
        name=_lang,
        reader_cfg=tydiqa_reader_cfg,
        infer_cfg=tydiqa_infer_cfg,
        eval_cfg=tydiqa_eval_cfg))
init 2025-07-18 07:25:44 +00:00			`from opencompass.openicl.icl_prompt_template import PromptTemplate`
			`from opencompass.openicl.icl_retriever import ZeroRetriever`
			`from opencompass.openicl.icl_inferencer import GenInferencer`
			`from opencompass.datasets import TydiQADataset, TydiQAEvaluator`

			`# All configs are for TydiQA Goldp task`
			`tydiqa_reader_cfg = dict(`
			`input_columns=["passage_text", "question_text"],`
			`output_column="answer",`
			`test_split='validation',`
			`train_split='validation',)`

			`langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']`

			`prefixs_prompt = {`
			`"english": ("Answer the following question based on the information in the given passage.", "Passage:", "Question:", "Answer:"),`
			`"arabic": ("أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.", "المقطع:", "السؤال:", "الإجابة:"),`
			`"bengali": ("প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।", "অধ্যায়:", "প্রশ্ন:", "উত্তর:"),`
			`"finnish": ("Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.", "Kappale:", "Kysymys:", "Vastaus:"),`
			`"indonesian": ("Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.", "Bagian:", "Pertanyaan:", "Jawaban:"),`
			`"korean": ("주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.", "문단:", "질문:", "답변:"),`
			`"japanese":("文脈に基づいて質問に答えてください。","ぶんしょう:","しつもん:", "かいとう:"),`
			`"russian": ("Ответьте на следующий вопрос на основе информации в данном отрывке.", "Отрывок:", "Вопрос:", "Ответ:"),`
			`"swahili": ("Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.", "Kifungu:", "Swali:", "Jibu:"),`
			`"telugu": ("ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.", "పేరా:", "ప్రశ్న:", "సమాధానం:"),`
			`"thai":("ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:", "ตอนข้อความ:", "คำถาม:", "คำตอบ:")`
			`}`

			`tydiqa_datasets = []`
			`for _lang in langs:`
			`_hint = prefixs_prompt[_lang]`
			`tydiqa_infer_cfg = dict(`
			`prompt_template=dict(`
			`type=PromptTemplate,`
			`template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,`
			`ice_token='</E>'),`
			`retriever=dict(type=ZeroRetriever),`
			`inferencer=dict(type=GenInferencer), max_out_len=50)`

			`tydiqa_eval_cfg = dict(evaluator=dict(type=TydiQAEvaluator),`
			`ds_split='validation',`
			`ds_column='answer',`
			`)`
			`tydiqa_datasets.append(`
			`dict(abbr=f'tyidqa-goldp_{_lang}',`
			`type=TydiQADataset,`
			`path='khalidalt/tydiqa-goldp',`
			`name=_lang,`
			`reader_cfg=tydiqa_reader_cfg,`
			`infer_cfg=tydiqa_infer_cfg,`
			`eval_cfg=tydiqa_eval_cfg))`