{
  "builder_name": "openai_lambada",
  "citation": "@InProceedings{paperno-EtAl:2016:P16-1,\n  author    = {Paperno, Denis  and  Kruszewski, Germ'{a}n  and  Lazaridou,\nAngeliki  and  Pham, Ngoc Quan  and  Bernardi, Raffaella  and  Pezzelle,\nSandro  and  Baroni, Marco  and  Boleda, Gemma  and  Fernandez, Raquel},\n  title     = {The {LAMBADA} dataset: Word prediction requiring a broad\ndiscourse context},\n  booktitle = {Proceedings of the 54th Annual Meeting of the Association for\nComputational Linguistics (Volume 1: Long Papers)},\n  month     = {August},\n  year      = {2016},\n  address   = {Berlin, Germany},\n  publisher = {Association for Computational Linguistics},\n  pages     = {1525--1534},\n  url       = {http://www.aclweb.org/anthology/P16-1144}\n}\n",
  "config_name": "default",
  "dataset_size": 1709449,
  "description": "LAMBADA dataset variant used by OpenAI to evaluate GPT-2 and GPT-3.\n",
  "download_checksums": {
    "https://openaipublic.blob.core.windows.net/gpt-2/data/lambada_test.jsonl": {
      "num_bytes": 1819752,
      "checksum": null
    }
  },
  "download_size": 1819752,
  "features": {
    "text": {
      "dtype": "string",
      "_type": "Value"
    }
  },
  "homepage": "",
  "license": "",
  "size_in_bytes": 3529201,
  "splits": {
    "test": {
      "name": "test",
      "num_bytes": 1709449,
      "num_examples": 5153,
      "dataset_name": "openai_lambada"
    }
  },
  "version": {
    "version_str": "1.0.0",
    "major": 1,
    "minor": 0,
    "patch": 0
  }
}