37 lines
1.5 KiB
JSON
37 lines
1.5 KiB
JSON
{
|
|
"builder_name": "openai_lambada",
|
|
"citation": "@InProceedings{paperno-EtAl:2016:P16-1,\n author = {Paperno, Denis and Kruszewski, Germ'{a}n and Lazaridou,\nAngeliki and Pham, Ngoc Quan and Bernardi, Raffaella and Pezzelle,\nSandro and Baroni, Marco and Boleda, Gemma and Fernandez, Raquel},\n title = {The {LAMBADA} dataset: Word prediction requiring a broad\ndiscourse context},\n booktitle = {Proceedings of the 54th Annual Meeting of the Association for\nComputational Linguistics (Volume 1: Long Papers)},\n month = {August},\n year = {2016},\n address = {Berlin, Germany},\n publisher = {Association for Computational Linguistics},\n pages = {1525--1534},\n url = {http://www.aclweb.org/anthology/P16-1144}\n}\n",
|
|
"config_name": "default",
|
|
"dataset_size": 1709449,
|
|
"description": "LAMBADA dataset variant used by OpenAI to evaluate GPT-2 and GPT-3.\n",
|
|
"download_checksums": {
|
|
"https://openaipublic.blob.core.windows.net/gpt-2/data/lambada_test.jsonl": {
|
|
"num_bytes": 1819752,
|
|
"checksum": null
|
|
}
|
|
},
|
|
"download_size": 1819752,
|
|
"features": {
|
|
"text": {
|
|
"dtype": "string",
|
|
"_type": "Value"
|
|
}
|
|
},
|
|
"homepage": "",
|
|
"license": "",
|
|
"size_in_bytes": 3529201,
|
|
"splits": {
|
|
"test": {
|
|
"name": "test",
|
|
"num_bytes": 1709449,
|
|
"num_examples": 5153,
|
|
"dataset_name": "openai_lambada"
|
|
}
|
|
},
|
|
"version": {
|
|
"version_str": "1.0.0",
|
|
"major": 1,
|
|
"minor": 0,
|
|
"patch": 0
|
|
}
|
|
} |