{ "best_metric": 1.8535966873168945, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.03421435292105038, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001710717646052519, "grad_norm": 0.383236289024353, "learning_rate": 1.004e-05, "loss": 2.0165, "step": 1 }, { "epoch": 0.0001710717646052519, "eval_loss": 2.0891342163085938, "eval_runtime": 473.9324, "eval_samples_per_second": 5.195, "eval_steps_per_second": 1.3, "step": 1 }, { "epoch": 0.0003421435292105038, "grad_norm": 0.38923412561416626, "learning_rate": 2.008e-05, "loss": 1.7318, "step": 2 }, { "epoch": 0.0005132152938157557, "grad_norm": 0.4268319606781006, "learning_rate": 3.012e-05, "loss": 2.0336, "step": 3 }, { "epoch": 0.0006842870584210076, "grad_norm": 0.4290253520011902, "learning_rate": 4.016e-05, "loss": 1.8747, "step": 4 }, { "epoch": 0.0008553588230262595, "grad_norm": 0.44560176134109497, "learning_rate": 5.02e-05, "loss": 2.1207, "step": 5 }, { "epoch": 0.0010264305876315114, "grad_norm": 0.4551117718219757, "learning_rate": 6.024e-05, "loss": 2.0431, "step": 6 }, { "epoch": 0.0011975023522367632, "grad_norm": 0.4633933901786804, "learning_rate": 7.028e-05, "loss": 1.7246, "step": 7 }, { "epoch": 0.0013685741168420152, "grad_norm": 0.5507611632347107, "learning_rate": 8.032e-05, "loss": 1.9728, "step": 8 }, { "epoch": 0.001539645881447267, "grad_norm": 0.5344508290290833, "learning_rate": 9.036000000000001e-05, "loss": 1.8793, "step": 9 }, { "epoch": 0.001710717646052519, "grad_norm": 0.5629733204841614, "learning_rate": 0.0001004, "loss": 2.0234, "step": 10 }, { "epoch": 0.0018817894106577708, "grad_norm": 0.554598867893219, "learning_rate": 9.987157894736842e-05, "loss": 1.8122, "step": 11 }, { "epoch": 0.002052861175263023, "grad_norm": 0.6233848929405212, "learning_rate": 9.934315789473684e-05, "loss": 2.0835, "step": 12 }, { "epoch": 0.002223932939868275, "grad_norm": 0.6465232372283936, "learning_rate": 9.881473684210525e-05, "loss": 1.9595, "step": 13 }, { "epoch": 0.0023950047044735264, "grad_norm": 0.6446559429168701, "learning_rate": 9.828631578947369e-05, "loss": 1.7788, "step": 14 }, { "epoch": 0.0025660764690787785, "grad_norm": 0.5892208814620972, "learning_rate": 9.77578947368421e-05, "loss": 1.7229, "step": 15 }, { "epoch": 0.0027371482336840305, "grad_norm": 0.6372520923614502, "learning_rate": 9.722947368421052e-05, "loss": 1.7322, "step": 16 }, { "epoch": 0.0029082199982892825, "grad_norm": 0.6278177499771118, "learning_rate": 9.670105263157895e-05, "loss": 1.8333, "step": 17 }, { "epoch": 0.003079291762894534, "grad_norm": 0.6287798285484314, "learning_rate": 9.617263157894737e-05, "loss": 1.8528, "step": 18 }, { "epoch": 0.003250363527499786, "grad_norm": 0.5991953015327454, "learning_rate": 9.564421052631579e-05, "loss": 1.7871, "step": 19 }, { "epoch": 0.003421435292105038, "grad_norm": 0.573375940322876, "learning_rate": 9.511578947368421e-05, "loss": 1.6581, "step": 20 }, { "epoch": 0.00359250705671029, "grad_norm": 0.5814819931983948, "learning_rate": 9.458736842105264e-05, "loss": 1.8643, "step": 21 }, { "epoch": 0.0037635788213155417, "grad_norm": 0.665965735912323, "learning_rate": 9.405894736842106e-05, "loss": 1.8961, "step": 22 }, { "epoch": 0.003934650585920794, "grad_norm": 0.6515952944755554, "learning_rate": 9.353052631578947e-05, "loss": 1.9137, "step": 23 }, { "epoch": 0.004105722350526046, "grad_norm": 0.5990787744522095, "learning_rate": 9.300210526315789e-05, "loss": 1.8396, "step": 24 }, { "epoch": 0.004276794115131297, "grad_norm": 0.6736911535263062, "learning_rate": 9.247368421052631e-05, "loss": 2.018, "step": 25 }, { "epoch": 0.00444786587973655, "grad_norm": 0.6013162732124329, "learning_rate": 9.194526315789473e-05, "loss": 1.6318, "step": 26 }, { "epoch": 0.004618937644341801, "grad_norm": 0.6049439907073975, "learning_rate": 9.141684210526316e-05, "loss": 1.8607, "step": 27 }, { "epoch": 0.004790009408947053, "grad_norm": 0.6569304466247559, "learning_rate": 9.088842105263158e-05, "loss": 1.8646, "step": 28 }, { "epoch": 0.004961081173552305, "grad_norm": 0.6664987802505493, "learning_rate": 9.036000000000001e-05, "loss": 1.7735, "step": 29 }, { "epoch": 0.005132152938157557, "grad_norm": 0.7716237902641296, "learning_rate": 8.983157894736843e-05, "loss": 2.1894, "step": 30 }, { "epoch": 0.005303224702762809, "grad_norm": 0.7701791524887085, "learning_rate": 8.930315789473684e-05, "loss": 1.9132, "step": 31 }, { "epoch": 0.005474296467368061, "grad_norm": 0.6804842352867126, "learning_rate": 8.877473684210526e-05, "loss": 1.7197, "step": 32 }, { "epoch": 0.0056453682319733125, "grad_norm": 0.7339447736740112, "learning_rate": 8.824631578947368e-05, "loss": 1.8394, "step": 33 }, { "epoch": 0.005816439996578565, "grad_norm": 0.731982409954071, "learning_rate": 8.771789473684211e-05, "loss": 1.8362, "step": 34 }, { "epoch": 0.0059875117611838165, "grad_norm": 0.8712673187255859, "learning_rate": 8.718947368421053e-05, "loss": 1.7972, "step": 35 }, { "epoch": 0.006158583525789068, "grad_norm": 0.8886252641677856, "learning_rate": 8.666105263157895e-05, "loss": 2.1816, "step": 36 }, { "epoch": 0.006329655290394321, "grad_norm": 0.913334310054779, "learning_rate": 8.613263157894737e-05, "loss": 1.8242, "step": 37 }, { "epoch": 0.006500727054999572, "grad_norm": 0.8896219730377197, "learning_rate": 8.560421052631578e-05, "loss": 1.8448, "step": 38 }, { "epoch": 0.006671798819604825, "grad_norm": 0.9681621193885803, "learning_rate": 8.50757894736842e-05, "loss": 1.6235, "step": 39 }, { "epoch": 0.006842870584210076, "grad_norm": 1.0905768871307373, "learning_rate": 8.454736842105263e-05, "loss": 1.8704, "step": 40 }, { "epoch": 0.007013942348815328, "grad_norm": 0.9447023272514343, "learning_rate": 8.401894736842106e-05, "loss": 1.8091, "step": 41 }, { "epoch": 0.00718501411342058, "grad_norm": 1.0970029830932617, "learning_rate": 8.349052631578948e-05, "loss": 2.0633, "step": 42 }, { "epoch": 0.007356085878025832, "grad_norm": 1.2916038036346436, "learning_rate": 8.29621052631579e-05, "loss": 2.2442, "step": 43 }, { "epoch": 0.007527157642631083, "grad_norm": 1.0917505025863647, "learning_rate": 8.243368421052632e-05, "loss": 1.8491, "step": 44 }, { "epoch": 0.007698229407236336, "grad_norm": 1.5188097953796387, "learning_rate": 8.190526315789474e-05, "loss": 1.6231, "step": 45 }, { "epoch": 0.007869301171841588, "grad_norm": 1.479370355606079, "learning_rate": 8.137684210526315e-05, "loss": 1.7806, "step": 46 }, { "epoch": 0.008040372936446839, "grad_norm": 1.4857006072998047, "learning_rate": 8.084842105263157e-05, "loss": 1.7682, "step": 47 }, { "epoch": 0.008211444701052091, "grad_norm": 2.0244221687316895, "learning_rate": 8.032e-05, "loss": 2.0125, "step": 48 }, { "epoch": 0.008382516465657344, "grad_norm": 1.8919278383255005, "learning_rate": 7.979157894736842e-05, "loss": 1.7106, "step": 49 }, { "epoch": 0.008553588230262595, "grad_norm": 4.771112442016602, "learning_rate": 7.926315789473684e-05, "loss": 1.6001, "step": 50 }, { "epoch": 0.008553588230262595, "eval_loss": 2.019399881362915, "eval_runtime": 474.1809, "eval_samples_per_second": 5.192, "eval_steps_per_second": 1.299, "step": 50 }, { "epoch": 0.008724659994867847, "grad_norm": 0.7101765275001526, "learning_rate": 7.873473684210526e-05, "loss": 1.8857, "step": 51 }, { "epoch": 0.0088957317594731, "grad_norm": 0.776714026927948, "learning_rate": 7.820631578947369e-05, "loss": 1.8346, "step": 52 }, { "epoch": 0.00906680352407835, "grad_norm": 0.7492753863334656, "learning_rate": 7.76778947368421e-05, "loss": 2.0555, "step": 53 }, { "epoch": 0.009237875288683603, "grad_norm": 0.532320499420166, "learning_rate": 7.714947368421052e-05, "loss": 1.833, "step": 54 }, { "epoch": 0.009408947053288855, "grad_norm": 0.506446361541748, "learning_rate": 7.662105263157896e-05, "loss": 1.8922, "step": 55 }, { "epoch": 0.009580018817894106, "grad_norm": 0.4671626389026642, "learning_rate": 7.609263157894737e-05, "loss": 1.9379, "step": 56 }, { "epoch": 0.009751090582499358, "grad_norm": 0.5219800472259521, "learning_rate": 7.556421052631579e-05, "loss": 1.9508, "step": 57 }, { "epoch": 0.00992216234710461, "grad_norm": 0.4187794029712677, "learning_rate": 7.503578947368421e-05, "loss": 1.5711, "step": 58 }, { "epoch": 0.010093234111709863, "grad_norm": 0.47037985920906067, "learning_rate": 7.450736842105263e-05, "loss": 1.8885, "step": 59 }, { "epoch": 0.010264305876315114, "grad_norm": 0.45489931106567383, "learning_rate": 7.397894736842105e-05, "loss": 2.0025, "step": 60 }, { "epoch": 0.010435377640920366, "grad_norm": 0.4538400173187256, "learning_rate": 7.345052631578948e-05, "loss": 1.8172, "step": 61 }, { "epoch": 0.010606449405525619, "grad_norm": 0.4797042906284332, "learning_rate": 7.29221052631579e-05, "loss": 1.8545, "step": 62 }, { "epoch": 0.01077752117013087, "grad_norm": 0.48613712191581726, "learning_rate": 7.239368421052631e-05, "loss": 1.7661, "step": 63 }, { "epoch": 0.010948592934736122, "grad_norm": 0.5110929012298584, "learning_rate": 7.186526315789474e-05, "loss": 1.8899, "step": 64 }, { "epoch": 0.011119664699341374, "grad_norm": 0.47642821073532104, "learning_rate": 7.133684210526316e-05, "loss": 1.7915, "step": 65 }, { "epoch": 0.011290736463946625, "grad_norm": 0.5066452622413635, "learning_rate": 7.080842105263158e-05, "loss": 1.7244, "step": 66 }, { "epoch": 0.011461808228551877, "grad_norm": 0.5502453446388245, "learning_rate": 7.028e-05, "loss": 1.7781, "step": 67 }, { "epoch": 0.01163287999315713, "grad_norm": 0.5751240253448486, "learning_rate": 6.975157894736843e-05, "loss": 1.8453, "step": 68 }, { "epoch": 0.01180395175776238, "grad_norm": 0.5743075609207153, "learning_rate": 6.922315789473685e-05, "loss": 1.8647, "step": 69 }, { "epoch": 0.011975023522367633, "grad_norm": 0.5658362507820129, "learning_rate": 6.869473684210527e-05, "loss": 1.9551, "step": 70 }, { "epoch": 0.012146095286972886, "grad_norm": 0.5974031686782837, "learning_rate": 6.816631578947368e-05, "loss": 1.9202, "step": 71 }, { "epoch": 0.012317167051578136, "grad_norm": 0.6650647521018982, "learning_rate": 6.76378947368421e-05, "loss": 1.7682, "step": 72 }, { "epoch": 0.012488238816183389, "grad_norm": 0.6288644671440125, "learning_rate": 6.710947368421052e-05, "loss": 2.042, "step": 73 }, { "epoch": 0.012659310580788641, "grad_norm": 0.6033047437667847, "learning_rate": 6.658105263157894e-05, "loss": 1.8117, "step": 74 }, { "epoch": 0.012830382345393894, "grad_norm": 0.6429676413536072, "learning_rate": 6.605263157894737e-05, "loss": 1.9016, "step": 75 }, { "epoch": 0.013001454109999144, "grad_norm": 0.7156361937522888, "learning_rate": 6.55242105263158e-05, "loss": 2.0832, "step": 76 }, { "epoch": 0.013172525874604397, "grad_norm": 0.6470149755477905, "learning_rate": 6.499578947368422e-05, "loss": 1.7331, "step": 77 }, { "epoch": 0.01334359763920965, "grad_norm": 0.6850797533988953, "learning_rate": 6.446736842105264e-05, "loss": 1.9599, "step": 78 }, { "epoch": 0.0135146694038149, "grad_norm": 0.6620407700538635, "learning_rate": 6.393894736842105e-05, "loss": 1.9652, "step": 79 }, { "epoch": 0.013685741168420152, "grad_norm": 0.6680328249931335, "learning_rate": 6.341052631578947e-05, "loss": 1.8194, "step": 80 }, { "epoch": 0.013856812933025405, "grad_norm": 0.6848267316818237, "learning_rate": 6.288210526315789e-05, "loss": 1.9067, "step": 81 }, { "epoch": 0.014027884697630656, "grad_norm": 0.7361618876457214, "learning_rate": 6.235368421052632e-05, "loss": 1.8385, "step": 82 }, { "epoch": 0.014198956462235908, "grad_norm": 0.7371098399162292, "learning_rate": 6.182526315789474e-05, "loss": 1.8822, "step": 83 }, { "epoch": 0.01437002822684116, "grad_norm": 0.7366199493408203, "learning_rate": 6.129684210526316e-05, "loss": 1.7163, "step": 84 }, { "epoch": 0.014541099991446411, "grad_norm": 0.9039151668548584, "learning_rate": 6.076842105263158e-05, "loss": 2.2045, "step": 85 }, { "epoch": 0.014712171756051664, "grad_norm": 0.7939276695251465, "learning_rate": 6.024e-05, "loss": 1.9084, "step": 86 }, { "epoch": 0.014883243520656916, "grad_norm": 0.8294872641563416, "learning_rate": 5.971157894736842e-05, "loss": 1.9239, "step": 87 }, { "epoch": 0.015054315285262167, "grad_norm": 0.8802934288978577, "learning_rate": 5.9183157894736835e-05, "loss": 2.0455, "step": 88 }, { "epoch": 0.01522538704986742, "grad_norm": 0.889517068862915, "learning_rate": 5.8654736842105267e-05, "loss": 1.9911, "step": 89 }, { "epoch": 0.015396458814472672, "grad_norm": 1.0230233669281006, "learning_rate": 5.8126315789473684e-05, "loss": 1.5023, "step": 90 }, { "epoch": 0.015567530579077922, "grad_norm": 0.9384157657623291, "learning_rate": 5.759789473684211e-05, "loss": 1.9499, "step": 91 }, { "epoch": 0.015738602343683177, "grad_norm": 0.9163877367973328, "learning_rate": 5.706947368421053e-05, "loss": 1.7279, "step": 92 }, { "epoch": 0.015909674108288425, "grad_norm": 1.0640720129013062, "learning_rate": 5.6541052631578945e-05, "loss": 2.0255, "step": 93 }, { "epoch": 0.016080745872893678, "grad_norm": 1.0487096309661865, "learning_rate": 5.601263157894736e-05, "loss": 1.7711, "step": 94 }, { "epoch": 0.01625181763749893, "grad_norm": 1.0018972158432007, "learning_rate": 5.5484210526315794e-05, "loss": 1.5404, "step": 95 }, { "epoch": 0.016422889402104183, "grad_norm": 1.243520736694336, "learning_rate": 5.495578947368421e-05, "loss": 1.5571, "step": 96 }, { "epoch": 0.016593961166709435, "grad_norm": 1.3318722248077393, "learning_rate": 5.442736842105264e-05, "loss": 1.9937, "step": 97 }, { "epoch": 0.016765032931314688, "grad_norm": 1.5980501174926758, "learning_rate": 5.3898947368421055e-05, "loss": 1.7534, "step": 98 }, { "epoch": 0.016936104695919937, "grad_norm": 2.008939266204834, "learning_rate": 5.337052631578947e-05, "loss": 1.5161, "step": 99 }, { "epoch": 0.01710717646052519, "grad_norm": 3.1472246646881104, "learning_rate": 5.284210526315789e-05, "loss": 2.7117, "step": 100 }, { "epoch": 0.01710717646052519, "eval_loss": 1.9197953939437866, "eval_runtime": 473.5626, "eval_samples_per_second": 5.199, "eval_steps_per_second": 1.301, "step": 100 }, { "epoch": 0.01727824822513044, "grad_norm": 0.6856660842895508, "learning_rate": 5.231368421052631e-05, "loss": 1.8056, "step": 101 }, { "epoch": 0.017449319989735694, "grad_norm": 0.8271840810775757, "learning_rate": 5.178526315789474e-05, "loss": 1.915, "step": 102 }, { "epoch": 0.017620391754340946, "grad_norm": 0.7571515440940857, "learning_rate": 5.1256842105263165e-05, "loss": 1.9442, "step": 103 }, { "epoch": 0.0177914635189462, "grad_norm": 0.867884635925293, "learning_rate": 5.072842105263158e-05, "loss": 2.145, "step": 104 }, { "epoch": 0.01796253528355145, "grad_norm": 0.7025319933891296, "learning_rate": 5.02e-05, "loss": 1.7341, "step": 105 }, { "epoch": 0.0181336070481567, "grad_norm": 0.6765435934066772, "learning_rate": 4.967157894736842e-05, "loss": 2.1242, "step": 106 }, { "epoch": 0.018304678812761953, "grad_norm": 0.5824732184410095, "learning_rate": 4.914315789473684e-05, "loss": 1.9422, "step": 107 }, { "epoch": 0.018475750577367205, "grad_norm": 0.5176606178283691, "learning_rate": 4.861473684210526e-05, "loss": 2.0242, "step": 108 }, { "epoch": 0.018646822341972458, "grad_norm": 0.5743870139122009, "learning_rate": 4.8086315789473686e-05, "loss": 2.0175, "step": 109 }, { "epoch": 0.01881789410657771, "grad_norm": 0.5101130604743958, "learning_rate": 4.7557894736842104e-05, "loss": 1.8344, "step": 110 }, { "epoch": 0.018988965871182963, "grad_norm": 0.5062516927719116, "learning_rate": 4.702947368421053e-05, "loss": 1.9409, "step": 111 }, { "epoch": 0.01916003763578821, "grad_norm": 0.48760825395584106, "learning_rate": 4.6501052631578946e-05, "loss": 1.8474, "step": 112 }, { "epoch": 0.019331109400393464, "grad_norm": 0.492199182510376, "learning_rate": 4.5972631578947364e-05, "loss": 1.9067, "step": 113 }, { "epoch": 0.019502181164998716, "grad_norm": 0.4930146336555481, "learning_rate": 4.544421052631579e-05, "loss": 1.7696, "step": 114 }, { "epoch": 0.01967325292960397, "grad_norm": 0.5493840575218201, "learning_rate": 4.4915789473684213e-05, "loss": 2.0178, "step": 115 }, { "epoch": 0.01984432469420922, "grad_norm": 0.5961560606956482, "learning_rate": 4.438736842105263e-05, "loss": 1.6976, "step": 116 }, { "epoch": 0.020015396458814474, "grad_norm": 0.5364917516708374, "learning_rate": 4.3858947368421056e-05, "loss": 1.847, "step": 117 }, { "epoch": 0.020186468223419726, "grad_norm": 0.5656726956367493, "learning_rate": 4.3330526315789474e-05, "loss": 1.9252, "step": 118 }, { "epoch": 0.020357539988024975, "grad_norm": 0.5928261876106262, "learning_rate": 4.280210526315789e-05, "loss": 1.9079, "step": 119 }, { "epoch": 0.020528611752630228, "grad_norm": 0.5389055609703064, "learning_rate": 4.2273684210526317e-05, "loss": 1.7967, "step": 120 }, { "epoch": 0.02069968351723548, "grad_norm": 0.6076517701148987, "learning_rate": 4.174526315789474e-05, "loss": 1.793, "step": 121 }, { "epoch": 0.020870755281840733, "grad_norm": 0.6341645121574402, "learning_rate": 4.121684210526316e-05, "loss": 1.9671, "step": 122 }, { "epoch": 0.021041827046445985, "grad_norm": 0.6870526075363159, "learning_rate": 4.068842105263158e-05, "loss": 1.7407, "step": 123 }, { "epoch": 0.021212898811051237, "grad_norm": 0.6322492957115173, "learning_rate": 4.016e-05, "loss": 1.949, "step": 124 }, { "epoch": 0.021383970575656486, "grad_norm": 0.6672692894935608, "learning_rate": 3.963157894736842e-05, "loss": 1.9643, "step": 125 }, { "epoch": 0.02155504234026174, "grad_norm": 0.6229186058044434, "learning_rate": 3.9103157894736844e-05, "loss": 1.7221, "step": 126 }, { "epoch": 0.02172611410486699, "grad_norm": 0.7301349639892578, "learning_rate": 3.857473684210526e-05, "loss": 2.0484, "step": 127 }, { "epoch": 0.021897185869472244, "grad_norm": 0.7856729626655579, "learning_rate": 3.804631578947369e-05, "loss": 1.773, "step": 128 }, { "epoch": 0.022068257634077496, "grad_norm": 0.8492652177810669, "learning_rate": 3.7517894736842105e-05, "loss": 1.8202, "step": 129 }, { "epoch": 0.02223932939868275, "grad_norm": 0.7252649068832397, "learning_rate": 3.698947368421052e-05, "loss": 1.8962, "step": 130 }, { "epoch": 0.022410401163287998, "grad_norm": 0.7953678369522095, "learning_rate": 3.646105263157895e-05, "loss": 1.7289, "step": 131 }, { "epoch": 0.02258147292789325, "grad_norm": 0.728402853012085, "learning_rate": 3.593263157894737e-05, "loss": 1.9082, "step": 132 }, { "epoch": 0.022752544692498503, "grad_norm": 0.8220646381378174, "learning_rate": 3.540421052631579e-05, "loss": 1.9591, "step": 133 }, { "epoch": 0.022923616457103755, "grad_norm": 0.8018884062767029, "learning_rate": 3.4875789473684215e-05, "loss": 2.0895, "step": 134 }, { "epoch": 0.023094688221709007, "grad_norm": 0.7861109972000122, "learning_rate": 3.434736842105263e-05, "loss": 1.9219, "step": 135 }, { "epoch": 0.02326575998631426, "grad_norm": 0.8565029501914978, "learning_rate": 3.381894736842105e-05, "loss": 2.0818, "step": 136 }, { "epoch": 0.023436831750919512, "grad_norm": 0.986121416091919, "learning_rate": 3.329052631578947e-05, "loss": 2.0713, "step": 137 }, { "epoch": 0.02360790351552476, "grad_norm": 0.8753408789634705, "learning_rate": 3.27621052631579e-05, "loss": 1.7472, "step": 138 }, { "epoch": 0.023778975280130014, "grad_norm": 1.0510600805282593, "learning_rate": 3.223368421052632e-05, "loss": 2.1363, "step": 139 }, { "epoch": 0.023950047044735266, "grad_norm": 1.0964877605438232, "learning_rate": 3.1705263157894736e-05, "loss": 2.0689, "step": 140 }, { "epoch": 0.02412111880934052, "grad_norm": 1.000916838645935, "learning_rate": 3.117684210526316e-05, "loss": 1.9126, "step": 141 }, { "epoch": 0.02429219057394577, "grad_norm": 1.1080740690231323, "learning_rate": 3.064842105263158e-05, "loss": 2.4376, "step": 142 }, { "epoch": 0.024463262338551024, "grad_norm": 1.1512094736099243, "learning_rate": 3.012e-05, "loss": 1.8156, "step": 143 }, { "epoch": 0.024634334103156273, "grad_norm": 1.3569144010543823, "learning_rate": 2.9591578947368418e-05, "loss": 1.451, "step": 144 }, { "epoch": 0.024805405867761525, "grad_norm": 1.311303734779358, "learning_rate": 2.9063157894736842e-05, "loss": 2.0755, "step": 145 }, { "epoch": 0.024976477632366777, "grad_norm": 1.199066400527954, "learning_rate": 2.8534736842105264e-05, "loss": 1.3857, "step": 146 }, { "epoch": 0.02514754939697203, "grad_norm": 1.3259034156799316, "learning_rate": 2.800631578947368e-05, "loss": 1.804, "step": 147 }, { "epoch": 0.025318621161577282, "grad_norm": 1.8676377534866333, "learning_rate": 2.7477894736842106e-05, "loss": 1.8025, "step": 148 }, { "epoch": 0.025489692926182535, "grad_norm": 2.550297498703003, "learning_rate": 2.6949473684210527e-05, "loss": 2.0316, "step": 149 }, { "epoch": 0.025660764690787787, "grad_norm": 2.5897419452667236, "learning_rate": 2.6421052631578945e-05, "loss": 1.4353, "step": 150 }, { "epoch": 0.025660764690787787, "eval_loss": 1.8687154054641724, "eval_runtime": 474.321, "eval_samples_per_second": 5.191, "eval_steps_per_second": 1.299, "step": 150 }, { "epoch": 0.025831836455393036, "grad_norm": 0.429505318403244, "learning_rate": 2.589263157894737e-05, "loss": 1.536, "step": 151 }, { "epoch": 0.02600290821999829, "grad_norm": 0.45272913575172424, "learning_rate": 2.536421052631579e-05, "loss": 1.7085, "step": 152 }, { "epoch": 0.02617397998460354, "grad_norm": 0.4526635706424713, "learning_rate": 2.483578947368421e-05, "loss": 1.9542, "step": 153 }, { "epoch": 0.026345051749208794, "grad_norm": 0.5670432448387146, "learning_rate": 2.430736842105263e-05, "loss": 2.0316, "step": 154 }, { "epoch": 0.026516123513814046, "grad_norm": 0.5231799483299255, "learning_rate": 2.3778947368421052e-05, "loss": 2.0184, "step": 155 }, { "epoch": 0.0266871952784193, "grad_norm": 0.5869430303573608, "learning_rate": 2.3250526315789473e-05, "loss": 2.0097, "step": 156 }, { "epoch": 0.026858267043024547, "grad_norm": 0.6328916549682617, "learning_rate": 2.2722105263157894e-05, "loss": 1.865, "step": 157 }, { "epoch": 0.0270293388076298, "grad_norm": 0.587527334690094, "learning_rate": 2.2193684210526316e-05, "loss": 1.9417, "step": 158 }, { "epoch": 0.027200410572235052, "grad_norm": 0.5576383471488953, "learning_rate": 2.1665263157894737e-05, "loss": 1.8592, "step": 159 }, { "epoch": 0.027371482336840305, "grad_norm": 0.5361258387565613, "learning_rate": 2.1136842105263158e-05, "loss": 1.9214, "step": 160 }, { "epoch": 0.027542554101445557, "grad_norm": 0.5628899931907654, "learning_rate": 2.060842105263158e-05, "loss": 1.9765, "step": 161 }, { "epoch": 0.02771362586605081, "grad_norm": 0.6240307092666626, "learning_rate": 2.008e-05, "loss": 2.0202, "step": 162 }, { "epoch": 0.02788469763065606, "grad_norm": 0.5304953455924988, "learning_rate": 1.9551578947368422e-05, "loss": 1.76, "step": 163 }, { "epoch": 0.02805576939526131, "grad_norm": 0.5631970763206482, "learning_rate": 1.9023157894736843e-05, "loss": 1.8933, "step": 164 }, { "epoch": 0.028226841159866563, "grad_norm": 0.6594295501708984, "learning_rate": 1.849473684210526e-05, "loss": 1.9873, "step": 165 }, { "epoch": 0.028397912924471816, "grad_norm": 0.5234274864196777, "learning_rate": 1.7966315789473686e-05, "loss": 1.5752, "step": 166 }, { "epoch": 0.02856898468907707, "grad_norm": 0.5521509647369385, "learning_rate": 1.7437894736842107e-05, "loss": 1.878, "step": 167 }, { "epoch": 0.02874005645368232, "grad_norm": 0.5257223844528198, "learning_rate": 1.6909473684210525e-05, "loss": 1.6768, "step": 168 }, { "epoch": 0.028911128218287573, "grad_norm": 0.5715846419334412, "learning_rate": 1.638105263157895e-05, "loss": 1.7078, "step": 169 }, { "epoch": 0.029082199982892822, "grad_norm": 0.5698923468589783, "learning_rate": 1.5852631578947368e-05, "loss": 1.8397, "step": 170 }, { "epoch": 0.029253271747498075, "grad_norm": 0.5703829526901245, "learning_rate": 1.532421052631579e-05, "loss": 1.667, "step": 171 }, { "epoch": 0.029424343512103327, "grad_norm": 0.5842074155807495, "learning_rate": 1.4795789473684209e-05, "loss": 1.8684, "step": 172 }, { "epoch": 0.02959541527670858, "grad_norm": 0.6371638178825378, "learning_rate": 1.4267368421052632e-05, "loss": 1.932, "step": 173 }, { "epoch": 0.029766487041313832, "grad_norm": 0.6441742181777954, "learning_rate": 1.3738947368421053e-05, "loss": 1.8601, "step": 174 }, { "epoch": 0.029937558805919084, "grad_norm": 0.6196243762969971, "learning_rate": 1.3210526315789473e-05, "loss": 1.9612, "step": 175 }, { "epoch": 0.030108630570524333, "grad_norm": 0.6074597239494324, "learning_rate": 1.2682105263157896e-05, "loss": 1.8816, "step": 176 }, { "epoch": 0.030279702335129586, "grad_norm": 0.6216224431991577, "learning_rate": 1.2153684210526315e-05, "loss": 1.8326, "step": 177 }, { "epoch": 0.03045077409973484, "grad_norm": 0.6427397727966309, "learning_rate": 1.1625263157894737e-05, "loss": 1.65, "step": 178 }, { "epoch": 0.03062184586434009, "grad_norm": 0.7216449975967407, "learning_rate": 1.1096842105263158e-05, "loss": 1.9551, "step": 179 }, { "epoch": 0.030792917628945343, "grad_norm": 0.6623689532279968, "learning_rate": 1.0568421052631579e-05, "loss": 1.7214, "step": 180 }, { "epoch": 0.030963989393550596, "grad_norm": 1.0571134090423584, "learning_rate": 1.004e-05, "loss": 1.9657, "step": 181 }, { "epoch": 0.031135061158155845, "grad_norm": 0.7337217330932617, "learning_rate": 9.511578947368422e-06, "loss": 1.9204, "step": 182 }, { "epoch": 0.0313061329227611, "grad_norm": 0.8052051067352295, "learning_rate": 8.983157894736843e-06, "loss": 1.9703, "step": 183 }, { "epoch": 0.03147720468736635, "grad_norm": 0.8778511881828308, "learning_rate": 8.454736842105263e-06, "loss": 1.8453, "step": 184 }, { "epoch": 0.0316482764519716, "grad_norm": 0.8391135931015015, "learning_rate": 7.926315789473684e-06, "loss": 1.9047, "step": 185 }, { "epoch": 0.03181934821657685, "grad_norm": 0.8739292025566101, "learning_rate": 7.397894736842104e-06, "loss": 1.8505, "step": 186 }, { "epoch": 0.03199041998118211, "grad_norm": 0.9787918329238892, "learning_rate": 6.8694736842105265e-06, "loss": 1.9557, "step": 187 }, { "epoch": 0.032161491745787356, "grad_norm": 0.8386358022689819, "learning_rate": 6.341052631578948e-06, "loss": 1.9371, "step": 188 }, { "epoch": 0.03233256351039261, "grad_norm": 0.9877040982246399, "learning_rate": 5.812631578947368e-06, "loss": 1.945, "step": 189 }, { "epoch": 0.03250363527499786, "grad_norm": 0.8487772941589355, "learning_rate": 5.2842105263157896e-06, "loss": 2.0013, "step": 190 }, { "epoch": 0.03267470703960312, "grad_norm": 0.9981456995010376, "learning_rate": 4.755789473684211e-06, "loss": 2.1154, "step": 191 }, { "epoch": 0.032845778804208366, "grad_norm": 0.9859482049942017, "learning_rate": 4.227368421052631e-06, "loss": 1.6442, "step": 192 }, { "epoch": 0.033016850568813615, "grad_norm": 1.0620956420898438, "learning_rate": 3.698947368421052e-06, "loss": 2.079, "step": 193 }, { "epoch": 0.03318792233341887, "grad_norm": 1.0179574489593506, "learning_rate": 3.170526315789474e-06, "loss": 1.6924, "step": 194 }, { "epoch": 0.03335899409802412, "grad_norm": 1.1170974969863892, "learning_rate": 2.6421052631578948e-06, "loss": 2.0182, "step": 195 }, { "epoch": 0.033530065862629375, "grad_norm": 1.1970032453536987, "learning_rate": 2.1136842105263157e-06, "loss": 1.8493, "step": 196 }, { "epoch": 0.033701137627234624, "grad_norm": 1.5224543809890747, "learning_rate": 1.585263157894737e-06, "loss": 1.8961, "step": 197 }, { "epoch": 0.03387220939183987, "grad_norm": 1.648374319076538, "learning_rate": 1.0568421052631578e-06, "loss": 2.2679, "step": 198 }, { "epoch": 0.03404328115644513, "grad_norm": 1.8483800888061523, "learning_rate": 5.284210526315789e-07, "loss": 1.8236, "step": 199 }, { "epoch": 0.03421435292105038, "grad_norm": 2.218536615371704, "learning_rate": 0.0, "loss": 1.7512, "step": 200 }, { "epoch": 0.03421435292105038, "eval_loss": 1.8535966873168945, "eval_runtime": 473.8317, "eval_samples_per_second": 5.196, "eval_steps_per_second": 1.3, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4129939313013555e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }