{ "builder_name": "openai_lambada", "citation": "@InProceedings{paperno-EtAl:2016:P16-1,\n author = {Paperno, Denis and Kruszewski, Germ'{a}n and Lazaridou,\nAngeliki and Pham, Ngoc Quan and Bernardi, Raffaella and Pezzelle,\nSandro and Baroni, Marco and Boleda, Gemma and Fernandez, Raquel},\n title = {The {LAMBADA} dataset: Word prediction requiring a broad\ndiscourse context},\n booktitle = {Proceedings of the 54th Annual Meeting of the Association for\nComputational Linguistics (Volume 1: Long Papers)},\n month = {August},\n year = {2016},\n address = {Berlin, Germany},\n publisher = {Association for Computational Linguistics},\n pages = {1525--1534},\n url = {http://www.aclweb.org/anthology/P16-1144}\n}\n", "config_name": "default", "dataset_size": 1709449, "description": "LAMBADA dataset variant used by OpenAI to evaluate GPT-2 and GPT-3.\n", "download_checksums": { "https://openaipublic.blob.core.windows.net/gpt-2/data/lambada_test.jsonl": { "num_bytes": 1819752, "checksum": null } }, "download_size": 1819752, "features": { "text": { "dtype": "string", "_type": "Value" } }, "homepage": "", "license": "", "size_in_bytes": 3529201, "splits": { "test": { "name": "test", "num_bytes": 1709449, "num_examples": 5153, "dataset_name": "openai_lambada" } }, "version": { "version_str": "1.0.0", "major": 1, "minor": 0, "patch": 0 } }