{
    "author": null,
    "date_published": null,
    "dek": null,
    "direction": "ltr",
    "domain": "kylrth.com",
    "excerpt": "The big idea here is to use the geometric mean instead of the arithmetic mean across samples in the batch when computing the gradient for SGD. This overcomes the situation where averaging produces&hellip;",
    "lead_image_url": null,
    "next_page_url": null,
    "rendered_pages": 1,
    "title": "Learning explanations that are hard to vary",
    "total_pages": 1,
    "url": "https://kylrth.com/paper/learning-explanations-hard-to-vary/",
    "word_count": 1
}