dataset-opencompass/data/lambada/test/dataset_info.json
2025-07-18 07:25:44 +00:00

37 lines
1.5 KiB
JSON

{
"builder_name": "openai_lambada",
"citation": "@InProceedings{paperno-EtAl:2016:P16-1,\n author = {Paperno, Denis and Kruszewski, Germ'{a}n and Lazaridou,\nAngeliki and Pham, Ngoc Quan and Bernardi, Raffaella and Pezzelle,\nSandro and Baroni, Marco and Boleda, Gemma and Fernandez, Raquel},\n title = {The {LAMBADA} dataset: Word prediction requiring a broad\ndiscourse context},\n booktitle = {Proceedings of the 54th Annual Meeting of the Association for\nComputational Linguistics (Volume 1: Long Papers)},\n month = {August},\n year = {2016},\n address = {Berlin, Germany},\n publisher = {Association for Computational Linguistics},\n pages = {1525--1534},\n url = {http://www.aclweb.org/anthology/P16-1144}\n}\n",
"config_name": "default",
"dataset_size": 1709449,
"description": "LAMBADA dataset variant used by OpenAI to evaluate GPT-2 and GPT-3.\n",
"download_checksums": {
"https://openaipublic.blob.core.windows.net/gpt-2/data/lambada_test.jsonl": {
"num_bytes": 1819752,
"checksum": null
}
},
"download_size": 1819752,
"features": {
"text": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "",
"license": "",
"size_in_bytes": 3529201,
"splits": {
"test": {
"name": "test",
"num_bytes": 1709449,
"num_examples": 5153,
"dataset_name": "openai_lambada"
}
},
"version": {
"version_str": "1.0.0",
"major": 1,
"minor": 0,
"patch": 0
}
}