{
    "author": null,
    "date_published": "2018-03-26T20:18:00.000Z",
    "dek": null,
    "direction": "ltr",
    "domain": "arxiv.org",
    "excerpt": "Although deep learning has produced dazzling successes for applications of image, speech, and video processing in the past few years, most trainings are with suboptimal hyper-parameters, requiring&hellip;",
    "lead_image_url": "https://static.arxiv.org/icons/twitter/arxiv-logo-twitter-square.png",
    "next_page_url": null,
    "rendered_pages": 1,
    "title": "A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch size, momentum, and weight decay",
    "total_pages": 1,
    "url": "https://arxiv.org/abs/1803.09820v2",
    "word_count": 181
}