{
    "author": null,
    "date_published": "2020-09-15T20:21:00.000Z",
    "dek": null,
    "direction": "ltr",
    "domain": "arxiv.org",
    "excerpt": "When scaled to hundreds of billions of parameters, pretrained language models such as GPT-3 (Brown et al., 2020) achieve remarkable few-shot performance. However, enormous amounts of compute are&hellip;",
    "lead_image_url": "https://static.arxiv.org/icons/twitter/arxiv-logo-twitter-square.png",
    "next_page_url": null,
    "rendered_pages": 1,
    "title": "It's Not Just Size That Matters: Small Language Models Are Also Few-Shot Learners",
    "total_pages": 1,
    "url": "https://arxiv.org/abs/2009.07118v2",
    "word_count": 145
}