|
{ |
|
"best_metric": 0.8657492399215698, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 2.3346303501945527, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01556420233463035, |
|
"grad_norm": 0.7498895525932312, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0317, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01556420233463035, |
|
"eval_loss": 1.197574496269226, |
|
"eval_runtime": 2.5784, |
|
"eval_samples_per_second": 41.886, |
|
"eval_steps_per_second": 10.471, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0311284046692607, |
|
"grad_norm": 0.8710477352142334, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1157, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04669260700389105, |
|
"grad_norm": 0.9946465492248535, |
|
"learning_rate": 3e-05, |
|
"loss": 1.131, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0622568093385214, |
|
"grad_norm": 0.8549498915672302, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0619, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07782101167315175, |
|
"grad_norm": 0.6751075387001038, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0552, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0933852140077821, |
|
"grad_norm": 0.6019833087921143, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1261, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10894941634241245, |
|
"grad_norm": 0.5454035997390747, |
|
"learning_rate": 7e-05, |
|
"loss": 1.0568, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1245136186770428, |
|
"grad_norm": 0.4805472791194916, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9972, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14007782101167315, |
|
"grad_norm": 0.5375916957855225, |
|
"learning_rate": 9e-05, |
|
"loss": 1.0284, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1556420233463035, |
|
"grad_norm": 0.553347110748291, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0405, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17120622568093385, |
|
"grad_norm": 0.5552372336387634, |
|
"learning_rate": 9.999263238525136e-05, |
|
"loss": 1.0279, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1867704280155642, |
|
"grad_norm": 0.547298014163971, |
|
"learning_rate": 9.997053171227526e-05, |
|
"loss": 0.9419, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.20233463035019456, |
|
"grad_norm": 0.48389917612075806, |
|
"learning_rate": 9.993370449424153e-05, |
|
"loss": 0.9592, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2178988326848249, |
|
"grad_norm": 0.4673521816730499, |
|
"learning_rate": 9.988216158430033e-05, |
|
"loss": 0.938, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.23346303501945526, |
|
"grad_norm": 0.4670703113079071, |
|
"learning_rate": 9.981591817238378e-05, |
|
"loss": 0.8948, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2490272373540856, |
|
"grad_norm": 0.5665939450263977, |
|
"learning_rate": 9.973499378072945e-05, |
|
"loss": 1.0737, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.26459143968871596, |
|
"grad_norm": 0.4406505525112152, |
|
"learning_rate": 9.963941225812701e-05, |
|
"loss": 0.9687, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2801556420233463, |
|
"grad_norm": 0.406009316444397, |
|
"learning_rate": 9.952920177288986e-05, |
|
"loss": 0.9306, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.29571984435797666, |
|
"grad_norm": 0.3831186294555664, |
|
"learning_rate": 9.940439480455386e-05, |
|
"loss": 0.9469, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.311284046692607, |
|
"grad_norm": 0.3777288794517517, |
|
"learning_rate": 9.926502813430545e-05, |
|
"loss": 0.9571, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.32684824902723736, |
|
"grad_norm": 0.37088674306869507, |
|
"learning_rate": 9.911114283414205e-05, |
|
"loss": 0.9663, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3424124513618677, |
|
"grad_norm": 0.39943259954452515, |
|
"learning_rate": 9.89427842547679e-05, |
|
"loss": 0.9503, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.35797665369649806, |
|
"grad_norm": 0.3599907457828522, |
|
"learning_rate": 9.876000201222912e-05, |
|
"loss": 0.9655, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3735408560311284, |
|
"grad_norm": 0.36868664622306824, |
|
"learning_rate": 9.856284997329158e-05, |
|
"loss": 0.9281, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.38910505836575876, |
|
"grad_norm": 0.34976479411125183, |
|
"learning_rate": 9.835138623956603e-05, |
|
"loss": 0.9323, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4046692607003891, |
|
"grad_norm": 0.3601384162902832, |
|
"learning_rate": 9.812567313038542e-05, |
|
"loss": 0.8767, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.42023346303501946, |
|
"grad_norm": 0.3567204773426056, |
|
"learning_rate": 9.788577716443902e-05, |
|
"loss": 0.8901, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4357976653696498, |
|
"grad_norm": 0.35787394642829895, |
|
"learning_rate": 9.763176904016913e-05, |
|
"loss": 0.8908, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.45136186770428016, |
|
"grad_norm": 0.3854931890964508, |
|
"learning_rate": 9.736372361493584e-05, |
|
"loss": 0.9265, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4669260700389105, |
|
"grad_norm": 0.3999091386795044, |
|
"learning_rate": 9.708171988295631e-05, |
|
"loss": 0.9161, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48249027237354086, |
|
"grad_norm": 0.3725212514400482, |
|
"learning_rate": 9.678584095202468e-05, |
|
"loss": 0.8742, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4980544747081712, |
|
"grad_norm": 0.4123667776584625, |
|
"learning_rate": 9.647617401902002e-05, |
|
"loss": 0.9402, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5136186770428015, |
|
"grad_norm": 0.31651416420936584, |
|
"learning_rate": 9.61528103442088e-05, |
|
"loss": 0.8945, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5291828793774319, |
|
"grad_norm": 0.3038080334663391, |
|
"learning_rate": 9.581584522435024e-05, |
|
"loss": 0.8982, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5447470817120622, |
|
"grad_norm": 0.3260709047317505, |
|
"learning_rate": 9.546537796461179e-05, |
|
"loss": 0.9635, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5603112840466926, |
|
"grad_norm": 0.31259122490882874, |
|
"learning_rate": 9.510151184930354e-05, |
|
"loss": 0.8647, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5758754863813229, |
|
"grad_norm": 0.316936731338501, |
|
"learning_rate": 9.472435411143978e-05, |
|
"loss": 0.8711, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5914396887159533, |
|
"grad_norm": 0.3249667286872864, |
|
"learning_rate": 9.433401590113701e-05, |
|
"loss": 0.9385, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6070038910505836, |
|
"grad_norm": 0.3141433894634247, |
|
"learning_rate": 9.393061225285743e-05, |
|
"loss": 0.8555, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.622568093385214, |
|
"grad_norm": 0.31393998861312866, |
|
"learning_rate": 9.351426205150777e-05, |
|
"loss": 0.91, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6381322957198443, |
|
"grad_norm": 0.3161945343017578, |
|
"learning_rate": 9.308508799740341e-05, |
|
"loss": 0.9217, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6536964980544747, |
|
"grad_norm": 0.3333125114440918, |
|
"learning_rate": 9.2643216570108e-05, |
|
"loss": 0.9125, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.669260700389105, |
|
"grad_norm": 0.3434793949127197, |
|
"learning_rate": 9.218877799115928e-05, |
|
"loss": 0.8761, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6848249027237354, |
|
"grad_norm": 0.35263577103614807, |
|
"learning_rate": 9.172190618569236e-05, |
|
"loss": 0.9013, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7003891050583657, |
|
"grad_norm": 0.3485749363899231, |
|
"learning_rate": 9.124273874297122e-05, |
|
"loss": 0.8326, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7159533073929961, |
|
"grad_norm": 0.3562600612640381, |
|
"learning_rate": 9.075141687584057e-05, |
|
"loss": 0.8795, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7315175097276264, |
|
"grad_norm": 0.3698873519897461, |
|
"learning_rate": 9.024808537910981e-05, |
|
"loss": 0.914, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7470817120622568, |
|
"grad_norm": 0.3896391689777374, |
|
"learning_rate": 8.973289258688125e-05, |
|
"loss": 0.8028, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7626459143968871, |
|
"grad_norm": 0.2838418483734131, |
|
"learning_rate": 8.920599032883554e-05, |
|
"loss": 0.8242, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"grad_norm": 0.32417213916778564, |
|
"learning_rate": 8.86675338854865e-05, |
|
"loss": 0.9819, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"eval_loss": 0.8939120173454285, |
|
"eval_runtime": 2.6016, |
|
"eval_samples_per_second": 41.512, |
|
"eval_steps_per_second": 10.378, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7937743190661478, |
|
"grad_norm": 0.3049919009208679, |
|
"learning_rate": 8.811768194241952e-05, |
|
"loss": 0.8823, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8093385214007782, |
|
"grad_norm": 0.3248423635959625, |
|
"learning_rate": 8.755659654352599e-05, |
|
"loss": 0.9283, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8249027237354085, |
|
"grad_norm": 0.3247882127761841, |
|
"learning_rate": 8.698444304324835e-05, |
|
"loss": 0.9166, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8404669260700389, |
|
"grad_norm": 0.30053266882896423, |
|
"learning_rate": 8.640139005784924e-05, |
|
"loss": 0.858, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8560311284046692, |
|
"grad_norm": 0.33963730931282043, |
|
"learning_rate": 8.580760941571967e-05, |
|
"loss": 0.8852, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8715953307392996, |
|
"grad_norm": 0.31807518005371094, |
|
"learning_rate": 8.520327610674029e-05, |
|
"loss": 0.9407, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8871595330739299, |
|
"grad_norm": 0.3238030970096588, |
|
"learning_rate": 8.458856823071111e-05, |
|
"loss": 0.8734, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9027237354085603, |
|
"grad_norm": 0.3245297372341156, |
|
"learning_rate": 8.396366694486466e-05, |
|
"loss": 0.8825, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.9182879377431906, |
|
"grad_norm": 0.3227308988571167, |
|
"learning_rate": 8.332875641047817e-05, |
|
"loss": 0.8962, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.933852140077821, |
|
"grad_norm": 0.3529612123966217, |
|
"learning_rate": 8.26840237386003e-05, |
|
"loss": 0.9568, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9494163424124513, |
|
"grad_norm": 0.36405235528945923, |
|
"learning_rate": 8.202965893490878e-05, |
|
"loss": 0.897, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.9649805447470817, |
|
"grad_norm": 0.3392234444618225, |
|
"learning_rate": 8.13658548437147e-05, |
|
"loss": 0.8032, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.980544747081712, |
|
"grad_norm": 0.3424031436443329, |
|
"learning_rate": 8.06928070911306e-05, |
|
"loss": 0.8227, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9961089494163424, |
|
"grad_norm": 0.38254794478416443, |
|
"learning_rate": 8.001071402741842e-05, |
|
"loss": 0.9149, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0116731517509727, |
|
"grad_norm": 0.9797287583351135, |
|
"learning_rate": 7.931977666853479e-05, |
|
"loss": 1.5691, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.027237354085603, |
|
"grad_norm": 0.2772282361984253, |
|
"learning_rate": 7.862019863689074e-05, |
|
"loss": 0.8003, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0428015564202335, |
|
"grad_norm": 0.285792738199234, |
|
"learning_rate": 7.791218610134323e-05, |
|
"loss": 0.8685, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0583657587548638, |
|
"grad_norm": 0.32011640071868896, |
|
"learning_rate": 7.719594771643623e-05, |
|
"loss": 0.9241, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0739299610894941, |
|
"grad_norm": 0.30448997020721436, |
|
"learning_rate": 7.647169456090925e-05, |
|
"loss": 0.8244, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.0894941634241244, |
|
"grad_norm": 0.3131871819496155, |
|
"learning_rate": 7.573964007549155e-05, |
|
"loss": 0.8418, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.105058365758755, |
|
"grad_norm": 0.3209502398967743, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.7923, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1206225680933852, |
|
"grad_norm": 0.3268880844116211, |
|
"learning_rate": 7.425299230975981e-05, |
|
"loss": 0.8179, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1361867704280155, |
|
"grad_norm": 0.3202899396419525, |
|
"learning_rate": 7.3498837151366e-05, |
|
"loss": 0.8361, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1517509727626458, |
|
"grad_norm": 0.33722591400146484, |
|
"learning_rate": 7.27377567778053e-05, |
|
"loss": 0.8352, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.1673151750972763, |
|
"grad_norm": 0.3361928164958954, |
|
"learning_rate": 7.196997548295708e-05, |
|
"loss": 0.8771, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1828793774319066, |
|
"grad_norm": 0.3383100628852844, |
|
"learning_rate": 7.119571953549305e-05, |
|
"loss": 0.8353, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.198443579766537, |
|
"grad_norm": 0.342599481344223, |
|
"learning_rate": 7.041521711219467e-05, |
|
"loss": 0.7851, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.2140077821011672, |
|
"grad_norm": 0.3692112863063812, |
|
"learning_rate": 6.96286982307086e-05, |
|
"loss": 0.7754, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.2295719844357977, |
|
"grad_norm": 0.3618566691875458, |
|
"learning_rate": 6.883639468175927e-05, |
|
"loss": 0.7592, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.245136186770428, |
|
"grad_norm": 0.3885677456855774, |
|
"learning_rate": 6.803853996083917e-05, |
|
"loss": 0.8669, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2607003891050583, |
|
"grad_norm": 0.3042435646057129, |
|
"learning_rate": 6.72353691993967e-05, |
|
"loss": 0.8014, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.2762645914396886, |
|
"grad_norm": 0.2926236093044281, |
|
"learning_rate": 6.642711909554174e-05, |
|
"loss": 0.757, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.2918287937743191, |
|
"grad_norm": 0.315763384103775, |
|
"learning_rate": 6.561402784428974e-05, |
|
"loss": 0.8411, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.3073929961089494, |
|
"grad_norm": 0.3296045958995819, |
|
"learning_rate": 6.479633506736446e-05, |
|
"loss": 0.8441, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3229571984435797, |
|
"grad_norm": 0.3326404392719269, |
|
"learning_rate": 6.397428174258047e-05, |
|
"loss": 0.8187, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3385214007782102, |
|
"grad_norm": 0.32586634159088135, |
|
"learning_rate": 6.314811013282574e-05, |
|
"loss": 0.8349, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.3540856031128405, |
|
"grad_norm": 0.33808082342147827, |
|
"learning_rate": 6.231806371466574e-05, |
|
"loss": 0.848, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.3696498054474708, |
|
"grad_norm": 0.3542570173740387, |
|
"learning_rate": 6.14843871065898e-05, |
|
"loss": 0.8243, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.3852140077821011, |
|
"grad_norm": 0.356636643409729, |
|
"learning_rate": 6.064732599692079e-05, |
|
"loss": 0.8619, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.4007782101167314, |
|
"grad_norm": 0.3576579689979553, |
|
"learning_rate": 5.980712707140985e-05, |
|
"loss": 0.7986, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.416342412451362, |
|
"grad_norm": 0.36819812655448914, |
|
"learning_rate": 5.896403794053679e-05, |
|
"loss": 0.8189, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.4319066147859922, |
|
"grad_norm": 0.3678414821624756, |
|
"learning_rate": 5.8118307066538193e-05, |
|
"loss": 0.7622, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.4474708171206225, |
|
"grad_norm": 0.38239309191703796, |
|
"learning_rate": 5.7270183690184495e-05, |
|
"loss": 0.8324, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.463035019455253, |
|
"grad_norm": 0.36592909693717957, |
|
"learning_rate": 5.6419917757327555e-05, |
|
"loss": 0.7254, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.4785992217898833, |
|
"grad_norm": 0.3889016807079315, |
|
"learning_rate": 5.5567759845240444e-05, |
|
"loss": 0.7894, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.4941634241245136, |
|
"grad_norm": 0.4369194507598877, |
|
"learning_rate": 5.4713961088771226e-05, |
|
"loss": 0.8497, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.509727626459144, |
|
"grad_norm": 0.3298012614250183, |
|
"learning_rate": 5.385877310633233e-05, |
|
"loss": 0.7555, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.5252918287937742, |
|
"grad_norm": 0.3367776572704315, |
|
"learning_rate": 5.300244792574742e-05, |
|
"loss": 0.8044, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.5408560311284045, |
|
"grad_norm": 0.3444443941116333, |
|
"learning_rate": 5.214523790997773e-05, |
|
"loss": 0.8537, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"grad_norm": 0.33900946378707886, |
|
"learning_rate": 5.128739568274944e-05, |
|
"loss": 0.7601, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"eval_loss": 0.8714919686317444, |
|
"eval_runtime": 2.6058, |
|
"eval_samples_per_second": 41.446, |
|
"eval_steps_per_second": 10.362, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5719844357976653, |
|
"grad_norm": 0.3435685634613037, |
|
"learning_rate": 5.0429174054104355e-05, |
|
"loss": 0.8486, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.5875486381322959, |
|
"grad_norm": 0.3485165536403656, |
|
"learning_rate": 4.9570825945895656e-05, |
|
"loss": 0.8179, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.6031128404669261, |
|
"grad_norm": 0.3704763352870941, |
|
"learning_rate": 4.8712604317250576e-05, |
|
"loss": 0.9108, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.6186770428015564, |
|
"grad_norm": 0.33625274896621704, |
|
"learning_rate": 4.785476209002227e-05, |
|
"loss": 0.8262, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.6342412451361867, |
|
"grad_norm": 0.3515668511390686, |
|
"learning_rate": 4.699755207425259e-05, |
|
"loss": 0.8336, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.649805447470817, |
|
"grad_norm": 0.3408714830875397, |
|
"learning_rate": 4.6141226893667684e-05, |
|
"loss": 0.714, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.6653696498054473, |
|
"grad_norm": 0.36845165491104126, |
|
"learning_rate": 4.5286038911228785e-05, |
|
"loss": 0.8147, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.6809338521400778, |
|
"grad_norm": 0.3738980293273926, |
|
"learning_rate": 4.443224015475956e-05, |
|
"loss": 0.8297, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.6964980544747081, |
|
"grad_norm": 0.38755711913108826, |
|
"learning_rate": 4.358008224267245e-05, |
|
"loss": 0.8268, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.7120622568093387, |
|
"grad_norm": 0.367796927690506, |
|
"learning_rate": 4.272981630981551e-05, |
|
"loss": 0.7379, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.727626459143969, |
|
"grad_norm": 0.3794459104537964, |
|
"learning_rate": 4.188169293346183e-05, |
|
"loss": 0.7374, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.7431906614785992, |
|
"grad_norm": 0.4307495653629303, |
|
"learning_rate": 4.103596205946323e-05, |
|
"loss": 0.7783, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.7587548638132295, |
|
"grad_norm": 0.3288561701774597, |
|
"learning_rate": 4.019287292859016e-05, |
|
"loss": 0.8254, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.7743190661478598, |
|
"grad_norm": 0.3198075592517853, |
|
"learning_rate": 3.9352674003079225e-05, |
|
"loss": 0.8722, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.7898832684824901, |
|
"grad_norm": 0.3411862850189209, |
|
"learning_rate": 3.851561289341023e-05, |
|
"loss": 0.8044, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.8054474708171206, |
|
"grad_norm": 0.33337950706481934, |
|
"learning_rate": 3.768193628533427e-05, |
|
"loss": 0.7877, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.821011673151751, |
|
"grad_norm": 0.34444868564605713, |
|
"learning_rate": 3.6851889867174275e-05, |
|
"loss": 0.8396, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.8365758754863815, |
|
"grad_norm": 0.35829517245292664, |
|
"learning_rate": 3.602571825741953e-05, |
|
"loss": 0.7998, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.8521400778210118, |
|
"grad_norm": 0.33558353781700134, |
|
"learning_rate": 3.520366493263554e-05, |
|
"loss": 0.7706, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.867704280155642, |
|
"grad_norm": 0.3696330487728119, |
|
"learning_rate": 3.438597215571027e-05, |
|
"loss": 0.9026, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8832684824902723, |
|
"grad_norm": 0.34265345335006714, |
|
"learning_rate": 3.357288090445827e-05, |
|
"loss": 0.8013, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.8988326848249026, |
|
"grad_norm": 0.3573516309261322, |
|
"learning_rate": 3.2764630800603314e-05, |
|
"loss": 0.8148, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.914396887159533, |
|
"grad_norm": 0.36973556876182556, |
|
"learning_rate": 3.196146003916084e-05, |
|
"loss": 0.7784, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.9299610894941635, |
|
"grad_norm": 0.37476617097854614, |
|
"learning_rate": 3.116360531824074e-05, |
|
"loss": 0.8075, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.9455252918287937, |
|
"grad_norm": 0.36869266629219055, |
|
"learning_rate": 3.0371301769291417e-05, |
|
"loss": 0.7631, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.9610894941634243, |
|
"grad_norm": 0.38179001212120056, |
|
"learning_rate": 2.9584782887805328e-05, |
|
"loss": 0.7691, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.9766536964980546, |
|
"grad_norm": 0.4227118492126465, |
|
"learning_rate": 2.8804280464506973e-05, |
|
"loss": 0.7856, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.9922178988326849, |
|
"grad_norm": 0.46422451734542847, |
|
"learning_rate": 2.8030024517042907e-05, |
|
"loss": 0.8267, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.007782101167315, |
|
"grad_norm": 1.0904223918914795, |
|
"learning_rate": 2.726224322219473e-05, |
|
"loss": 1.539, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0233463035019454, |
|
"grad_norm": 0.2821767330169678, |
|
"learning_rate": 2.650116284863402e-05, |
|
"loss": 0.6871, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0389105058365757, |
|
"grad_norm": 0.3239165246486664, |
|
"learning_rate": 2.5747007690240198e-05, |
|
"loss": 0.8398, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.054474708171206, |
|
"grad_norm": 0.33111441135406494, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.8076, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.0700389105058368, |
|
"grad_norm": 0.36252424120903015, |
|
"learning_rate": 2.426035992450848e-05, |
|
"loss": 0.8682, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.085603112840467, |
|
"grad_norm": 0.3458457589149475, |
|
"learning_rate": 2.3528305439090743e-05, |
|
"loss": 0.7667, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.1011673151750974, |
|
"grad_norm": 0.3382876217365265, |
|
"learning_rate": 2.280405228356377e-05, |
|
"loss": 0.7525, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.1167315175097277, |
|
"grad_norm": 0.34494414925575256, |
|
"learning_rate": 2.2087813898656774e-05, |
|
"loss": 0.7819, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.132295719844358, |
|
"grad_norm": 0.3596420884132385, |
|
"learning_rate": 2.137980136310926e-05, |
|
"loss": 0.7824, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.1478599221789882, |
|
"grad_norm": 0.35264426469802856, |
|
"learning_rate": 2.068022333146522e-05, |
|
"loss": 0.7032, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.1634241245136185, |
|
"grad_norm": 0.3882901072502136, |
|
"learning_rate": 1.9989285972581595e-05, |
|
"loss": 0.826, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.178988326848249, |
|
"grad_norm": 0.38036638498306274, |
|
"learning_rate": 1.9307192908869397e-05, |
|
"loss": 0.7353, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.1945525291828796, |
|
"grad_norm": 0.3903452157974243, |
|
"learning_rate": 1.863414515628531e-05, |
|
"loss": 0.712, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.21011673151751, |
|
"grad_norm": 0.4027177095413208, |
|
"learning_rate": 1.7970341065091245e-05, |
|
"loss": 0.7076, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.22568093385214, |
|
"grad_norm": 0.4014608561992645, |
|
"learning_rate": 1.7315976261399696e-05, |
|
"loss": 0.6695, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.2412451361867705, |
|
"grad_norm": 0.43404000997543335, |
|
"learning_rate": 1.667124358952184e-05, |
|
"loss": 0.7546, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.2568093385214008, |
|
"grad_norm": 0.37586840987205505, |
|
"learning_rate": 1.6036333055135344e-05, |
|
"loss": 0.6992, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.272373540856031, |
|
"grad_norm": 0.34168022871017456, |
|
"learning_rate": 1.541143176928891e-05, |
|
"loss": 0.8211, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.2879377431906613, |
|
"grad_norm": 0.33489376306533813, |
|
"learning_rate": 1.4796723893259712e-05, |
|
"loss": 0.7855, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.3035019455252916, |
|
"grad_norm": 0.3305031359195709, |
|
"learning_rate": 1.4192390584280346e-05, |
|
"loss": 0.6823, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.319066147859922, |
|
"grad_norm": 0.3635585606098175, |
|
"learning_rate": 1.3598609942150765e-05, |
|
"loss": 0.7662, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.3346303501945527, |
|
"grad_norm": 0.34550437331199646, |
|
"learning_rate": 1.3015556956751669e-05, |
|
"loss": 0.7758, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3346303501945527, |
|
"eval_loss": 0.8657492399215698, |
|
"eval_runtime": 2.6057, |
|
"eval_samples_per_second": 41.447, |
|
"eval_steps_per_second": 10.362, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 193, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.829000395187814e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|