{ "best_metric": 1.031202793121338, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.0184314809694959, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000184314809694959, "grad_norm": 5.6318583488464355, "learning_rate": 1e-05, "loss": 1.5347, "step": 1 }, { "epoch": 0.000184314809694959, "eval_loss": 1.8659999370574951, "eval_runtime": 690.4469, "eval_samples_per_second": 13.235, "eval_steps_per_second": 3.309, "step": 1 }, { "epoch": 0.000368629619389918, "grad_norm": 6.1715569496154785, "learning_rate": 2e-05, "loss": 1.4167, "step": 2 }, { "epoch": 0.000552944429084877, "grad_norm": 8.9030179977417, "learning_rate": 3e-05, "loss": 1.4119, "step": 3 }, { "epoch": 0.000737259238779836, "grad_norm": 3.8838179111480713, "learning_rate": 4e-05, "loss": 1.3732, "step": 4 }, { "epoch": 0.0009215740484747949, "grad_norm": 2.0309371948242188, "learning_rate": 5e-05, "loss": 1.3939, "step": 5 }, { "epoch": 0.001105888858169754, "grad_norm": 2.3626461029052734, "learning_rate": 6e-05, "loss": 1.3555, "step": 6 }, { "epoch": 0.001290203667864713, "grad_norm": 1.0350866317749023, "learning_rate": 7e-05, "loss": 1.3572, "step": 7 }, { "epoch": 0.001474518477559672, "grad_norm": 0.9269211292266846, "learning_rate": 8e-05, "loss": 1.3471, "step": 8 }, { "epoch": 0.0016588332872546309, "grad_norm": 0.5549191236495972, "learning_rate": 9e-05, "loss": 1.1463, "step": 9 }, { "epoch": 0.0018431480969495898, "grad_norm": 0.6531222462654114, "learning_rate": 0.0001, "loss": 1.1916, "step": 10 }, { "epoch": 0.0020274629066445488, "grad_norm": 0.6145579218864441, "learning_rate": 9.999316524962345e-05, "loss": 1.2206, "step": 11 }, { "epoch": 0.002211777716339508, "grad_norm": 0.5604798793792725, "learning_rate": 9.997266286704631e-05, "loss": 1.1826, "step": 12 }, { "epoch": 0.0023960925260344667, "grad_norm": 0.5696496367454529, "learning_rate": 9.993849845741524e-05, "loss": 1.0996, "step": 13 }, { "epoch": 0.002580407335729426, "grad_norm": 0.4159340262413025, "learning_rate": 9.989068136093873e-05, "loss": 1.1148, "step": 14 }, { "epoch": 0.002764722145424385, "grad_norm": 0.36338186264038086, "learning_rate": 9.98292246503335e-05, "loss": 1.2153, "step": 15 }, { "epoch": 0.002949036955119344, "grad_norm": 0.4037957191467285, "learning_rate": 9.975414512725057e-05, "loss": 1.1852, "step": 16 }, { "epoch": 0.003133351764814303, "grad_norm": 0.3298725485801697, "learning_rate": 9.966546331768191e-05, "loss": 1.178, "step": 17 }, { "epoch": 0.0033176665745092617, "grad_norm": 0.3211928606033325, "learning_rate": 9.956320346634876e-05, "loss": 1.1417, "step": 18 }, { "epoch": 0.003501981384204221, "grad_norm": 0.3271002173423767, "learning_rate": 9.944739353007344e-05, "loss": 1.0869, "step": 19 }, { "epoch": 0.0036862961938991796, "grad_norm": 0.3212750256061554, "learning_rate": 9.931806517013612e-05, "loss": 1.225, "step": 20 }, { "epoch": 0.003870611003594139, "grad_norm": 0.30292510986328125, "learning_rate": 9.917525374361912e-05, "loss": 1.0461, "step": 21 }, { "epoch": 0.0040549258132890975, "grad_norm": 0.32509174942970276, "learning_rate": 9.901899829374047e-05, "loss": 1.051, "step": 22 }, { "epoch": 0.004239240622984057, "grad_norm": 0.34532973170280457, "learning_rate": 9.884934153917997e-05, "loss": 1.1228, "step": 23 }, { "epoch": 0.004423555432679016, "grad_norm": 0.42038172483444214, "learning_rate": 9.86663298624003e-05, "loss": 1.0871, "step": 24 }, { "epoch": 0.004607870242373975, "grad_norm": 0.37254562973976135, "learning_rate": 9.847001329696653e-05, "loss": 1.1268, "step": 25 }, { "epoch": 0.004792185052068933, "grad_norm": 0.3617491126060486, "learning_rate": 9.826044551386744e-05, "loss": 1.0728, "step": 26 }, { "epoch": 0.0049764998617638926, "grad_norm": 0.36921921372413635, "learning_rate": 9.803768380684242e-05, "loss": 1.098, "step": 27 }, { "epoch": 0.005160814671458852, "grad_norm": 0.4023534953594208, "learning_rate": 9.780178907671789e-05, "loss": 1.1402, "step": 28 }, { "epoch": 0.005345129481153811, "grad_norm": 0.34242668747901917, "learning_rate": 9.755282581475769e-05, "loss": 1.1111, "step": 29 }, { "epoch": 0.00552944429084877, "grad_norm": 0.4034554064273834, "learning_rate": 9.729086208503174e-05, "loss": 1.1581, "step": 30 }, { "epoch": 0.005713759100543728, "grad_norm": 0.33662593364715576, "learning_rate": 9.701596950580806e-05, "loss": 1.0005, "step": 31 }, { "epoch": 0.005898073910238688, "grad_norm": 0.3856150209903717, "learning_rate": 9.672822322997305e-05, "loss": 1.121, "step": 32 }, { "epoch": 0.006082388719933647, "grad_norm": 0.37353983521461487, "learning_rate": 9.642770192448536e-05, "loss": 1.1154, "step": 33 }, { "epoch": 0.006266703529628606, "grad_norm": 0.4448533058166504, "learning_rate": 9.611448774886924e-05, "loss": 1.1909, "step": 34 }, { "epoch": 0.006451018339323564, "grad_norm": 0.4428851008415222, "learning_rate": 9.578866633275288e-05, "loss": 1.1107, "step": 35 }, { "epoch": 0.006635333149018523, "grad_norm": 0.4387335181236267, "learning_rate": 9.545032675245813e-05, "loss": 1.0992, "step": 36 }, { "epoch": 0.006819647958713483, "grad_norm": 0.47019171714782715, "learning_rate": 9.509956150664796e-05, "loss": 1.1371, "step": 37 }, { "epoch": 0.007003962768408442, "grad_norm": 0.4469152092933655, "learning_rate": 9.473646649103818e-05, "loss": 1.13, "step": 38 }, { "epoch": 0.007188277578103401, "grad_norm": 0.45129719376564026, "learning_rate": 9.43611409721806e-05, "loss": 1.1404, "step": 39 }, { "epoch": 0.007372592387798359, "grad_norm": 0.5520405769348145, "learning_rate": 9.397368756032445e-05, "loss": 1.2157, "step": 40 }, { "epoch": 0.0075569071974933184, "grad_norm": 0.5522369146347046, "learning_rate": 9.357421218136386e-05, "loss": 1.0666, "step": 41 }, { "epoch": 0.007741222007188278, "grad_norm": 0.5612422823905945, "learning_rate": 9.316282404787871e-05, "loss": 1.215, "step": 42 }, { "epoch": 0.007925536816883237, "grad_norm": 0.5706326365470886, "learning_rate": 9.273963562927695e-05, "loss": 1.1647, "step": 43 }, { "epoch": 0.008109851626578195, "grad_norm": 0.6179230809211731, "learning_rate": 9.230476262104677e-05, "loss": 1.1573, "step": 44 }, { "epoch": 0.008294166436273155, "grad_norm": 0.6024773716926575, "learning_rate": 9.185832391312644e-05, "loss": 1.1301, "step": 45 }, { "epoch": 0.008478481245968113, "grad_norm": 0.6232503056526184, "learning_rate": 9.140044155740101e-05, "loss": 1.0745, "step": 46 }, { "epoch": 0.008662796055663072, "grad_norm": 0.6185201406478882, "learning_rate": 9.093124073433463e-05, "loss": 1.0483, "step": 47 }, { "epoch": 0.008847110865358032, "grad_norm": 0.6560455560684204, "learning_rate": 9.045084971874738e-05, "loss": 1.0439, "step": 48 }, { "epoch": 0.00903142567505299, "grad_norm": 0.7669586539268494, "learning_rate": 8.995939984474624e-05, "loss": 1.0518, "step": 49 }, { "epoch": 0.00921574048474795, "grad_norm": 0.9614732265472412, "learning_rate": 8.945702546981969e-05, "loss": 0.8784, "step": 50 }, { "epoch": 0.00921574048474795, "eval_loss": 1.1081956624984741, "eval_runtime": 692.2817, "eval_samples_per_second": 13.2, "eval_steps_per_second": 3.301, "step": 50 }, { "epoch": 0.009400055294442908, "grad_norm": 0.48868894577026367, "learning_rate": 8.894386393810563e-05, "loss": 0.8871, "step": 51 }, { "epoch": 0.009584370104137867, "grad_norm": 0.4619584381580353, "learning_rate": 8.842005554284296e-05, "loss": 0.9272, "step": 52 }, { "epoch": 0.009768684913832827, "grad_norm": 0.4740832448005676, "learning_rate": 8.788574348801675e-05, "loss": 1.017, "step": 53 }, { "epoch": 0.009952999723527785, "grad_norm": 0.36349406838417053, "learning_rate": 8.73410738492077e-05, "loss": 1.0592, "step": 54 }, { "epoch": 0.010137314533222745, "grad_norm": 0.3204439878463745, "learning_rate": 8.678619553365659e-05, "loss": 0.9724, "step": 55 }, { "epoch": 0.010321629342917703, "grad_norm": 0.2830290198326111, "learning_rate": 8.622126023955446e-05, "loss": 0.9933, "step": 56 }, { "epoch": 0.010505944152612662, "grad_norm": 0.2824137806892395, "learning_rate": 8.564642241456986e-05, "loss": 1.0, "step": 57 }, { "epoch": 0.010690258962307622, "grad_norm": 0.2988187074661255, "learning_rate": 8.506183921362443e-05, "loss": 1.028, "step": 58 }, { "epoch": 0.01087457377200258, "grad_norm": 0.25733092427253723, "learning_rate": 8.44676704559283e-05, "loss": 1.0645, "step": 59 }, { "epoch": 0.01105888858169754, "grad_norm": 0.29357191920280457, "learning_rate": 8.386407858128706e-05, "loss": 1.0244, "step": 60 }, { "epoch": 0.011243203391392498, "grad_norm": 0.28911539912223816, "learning_rate": 8.32512286056924e-05, "loss": 1.0476, "step": 61 }, { "epoch": 0.011427518201087457, "grad_norm": 0.307320237159729, "learning_rate": 8.262928807620843e-05, "loss": 1.0962, "step": 62 }, { "epoch": 0.011611833010782417, "grad_norm": 0.28484243154525757, "learning_rate": 8.199842702516583e-05, "loss": 1.0024, "step": 63 }, { "epoch": 0.011796147820477375, "grad_norm": 0.2947733998298645, "learning_rate": 8.135881792367686e-05, "loss": 0.934, "step": 64 }, { "epoch": 0.011980462630172335, "grad_norm": 0.28290286660194397, "learning_rate": 8.07106356344834e-05, "loss": 0.92, "step": 65 }, { "epoch": 0.012164777439867294, "grad_norm": 0.29178595542907715, "learning_rate": 8.005405736415126e-05, "loss": 1.035, "step": 66 }, { "epoch": 0.012349092249562252, "grad_norm": 0.2829218804836273, "learning_rate": 7.938926261462366e-05, "loss": 1.0749, "step": 67 }, { "epoch": 0.012533407059257212, "grad_norm": 0.3027656376361847, "learning_rate": 7.871643313414718e-05, "loss": 1.0472, "step": 68 }, { "epoch": 0.01271772186895217, "grad_norm": 0.3055793344974518, "learning_rate": 7.803575286758364e-05, "loss": 1.0366, "step": 69 }, { "epoch": 0.012902036678647128, "grad_norm": 0.2900894582271576, "learning_rate": 7.734740790612136e-05, "loss": 1.063, "step": 70 }, { "epoch": 0.013086351488342089, "grad_norm": 0.2908357083797455, "learning_rate": 7.66515864363997e-05, "loss": 1.0328, "step": 71 }, { "epoch": 0.013270666298037047, "grad_norm": 0.2756956219673157, "learning_rate": 7.594847868906076e-05, "loss": 0.9437, "step": 72 }, { "epoch": 0.013454981107732007, "grad_norm": 0.2930951416492462, "learning_rate": 7.52382768867422e-05, "loss": 1.0172, "step": 73 }, { "epoch": 0.013639295917426965, "grad_norm": 0.29136356711387634, "learning_rate": 7.452117519152542e-05, "loss": 1.0952, "step": 74 }, { "epoch": 0.013823610727121923, "grad_norm": 0.32149508595466614, "learning_rate": 7.379736965185368e-05, "loss": 0.9384, "step": 75 }, { "epoch": 0.014007925536816884, "grad_norm": 0.3315494954586029, "learning_rate": 7.30670581489344e-05, "loss": 1.0781, "step": 76 }, { "epoch": 0.014192240346511842, "grad_norm": 0.31092604994773865, "learning_rate": 7.233044034264034e-05, "loss": 1.0356, "step": 77 }, { "epoch": 0.014376555156206802, "grad_norm": 0.3504660427570343, "learning_rate": 7.158771761692464e-05, "loss": 1.1046, "step": 78 }, { "epoch": 0.01456086996590176, "grad_norm": 0.31477150321006775, "learning_rate": 7.083909302476453e-05, "loss": 1.0652, "step": 79 }, { "epoch": 0.014745184775596719, "grad_norm": 0.3404114544391632, "learning_rate": 7.008477123264848e-05, "loss": 1.0259, "step": 80 }, { "epoch": 0.014929499585291679, "grad_norm": 0.32545822858810425, "learning_rate": 6.932495846462261e-05, "loss": 1.0509, "step": 81 }, { "epoch": 0.015113814394986637, "grad_norm": 0.3260136544704437, "learning_rate": 6.855986244591104e-05, "loss": 1.0279, "step": 82 }, { "epoch": 0.015298129204681597, "grad_norm": 0.37822264432907104, "learning_rate": 6.778969234612584e-05, "loss": 0.9802, "step": 83 }, { "epoch": 0.015482444014376555, "grad_norm": 0.37306562066078186, "learning_rate": 6.701465872208216e-05, "loss": 1.1249, "step": 84 }, { "epoch": 0.015666758824071515, "grad_norm": 0.37852713465690613, "learning_rate": 6.623497346023418e-05, "loss": 1.1109, "step": 85 }, { "epoch": 0.015851073633766474, "grad_norm": 0.3962250351905823, "learning_rate": 6.545084971874738e-05, "loss": 1.1005, "step": 86 }, { "epoch": 0.016035388443461432, "grad_norm": 0.41836705803871155, "learning_rate": 6.466250186922325e-05, "loss": 1.0755, "step": 87 }, { "epoch": 0.01621970325315639, "grad_norm": 0.4547691345214844, "learning_rate": 6.387014543809223e-05, "loss": 1.071, "step": 88 }, { "epoch": 0.01640401806285135, "grad_norm": 0.4516165256500244, "learning_rate": 6.307399704769099e-05, "loss": 1.089, "step": 89 }, { "epoch": 0.01658833287254631, "grad_norm": 0.46264412999153137, "learning_rate": 6.227427435703997e-05, "loss": 1.0303, "step": 90 }, { "epoch": 0.01677264768224127, "grad_norm": 0.5163282752037048, "learning_rate": 6.147119600233758e-05, "loss": 1.0642, "step": 91 }, { "epoch": 0.016956962491936227, "grad_norm": 0.49469366669654846, "learning_rate": 6.066498153718735e-05, "loss": 0.9772, "step": 92 }, { "epoch": 0.017141277301631185, "grad_norm": 0.5242800712585449, "learning_rate": 5.985585137257401e-05, "loss": 1.0933, "step": 93 }, { "epoch": 0.017325592111326144, "grad_norm": 0.5365490913391113, "learning_rate": 5.90440267166055e-05, "loss": 0.9827, "step": 94 }, { "epoch": 0.017509906921021105, "grad_norm": 0.5569202899932861, "learning_rate": 5.8229729514036705e-05, "loss": 1.1377, "step": 95 }, { "epoch": 0.017694221730716064, "grad_norm": 0.6217386722564697, "learning_rate": 5.74131823855921e-05, "loss": 1.0177, "step": 96 }, { "epoch": 0.017878536540411022, "grad_norm": 0.6114227175712585, "learning_rate": 5.6594608567103456e-05, "loss": 0.9871, "step": 97 }, { "epoch": 0.01806285135010598, "grad_norm": 0.6836398243904114, "learning_rate": 5.577423184847932e-05, "loss": 0.9991, "step": 98 }, { "epoch": 0.01824716615980094, "grad_norm": 0.7496846914291382, "learning_rate": 5.495227651252315e-05, "loss": 1.0059, "step": 99 }, { "epoch": 0.0184314809694959, "grad_norm": 0.8987806439399719, "learning_rate": 5.4128967273616625e-05, "loss": 0.7866, "step": 100 }, { "epoch": 0.0184314809694959, "eval_loss": 1.031202793121338, "eval_runtime": 692.9955, "eval_samples_per_second": 13.186, "eval_steps_per_second": 3.297, "step": 100 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4158528819023053e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }