{ "best_metric": 1.2866495847702026, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.1350438892640108, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027008777852802163, "grad_norm": 0.3624476194381714, "learning_rate": 1e-05, "loss": 1.4218, "step": 1 }, { "epoch": 0.0027008777852802163, "eval_loss": 1.4606833457946777, "eval_runtime": 15.6141, "eval_samples_per_second": 39.964, "eval_steps_per_second": 9.991, "step": 1 }, { "epoch": 0.0054017555705604325, "grad_norm": 0.3938450813293457, "learning_rate": 2e-05, "loss": 1.3709, "step": 2 }, { "epoch": 0.008102633355840648, "grad_norm": 0.39943069219589233, "learning_rate": 3e-05, "loss": 1.4067, "step": 3 }, { "epoch": 0.010803511141120865, "grad_norm": 0.40101104974746704, "learning_rate": 4e-05, "loss": 1.4221, "step": 4 }, { "epoch": 0.01350438892640108, "grad_norm": 0.36832377314567566, "learning_rate": 5e-05, "loss": 1.4441, "step": 5 }, { "epoch": 0.016205266711681297, "grad_norm": 0.41918250918388367, "learning_rate": 6e-05, "loss": 1.4905, "step": 6 }, { "epoch": 0.018906144496961513, "grad_norm": 0.4318130612373352, "learning_rate": 7e-05, "loss": 1.3951, "step": 7 }, { "epoch": 0.02160702228224173, "grad_norm": 0.524061381816864, "learning_rate": 8e-05, "loss": 1.3791, "step": 8 }, { "epoch": 0.024307900067521943, "grad_norm": 0.5278139710426331, "learning_rate": 9e-05, "loss": 1.3826, "step": 9 }, { "epoch": 0.02700877785280216, "grad_norm": 0.5666940808296204, "learning_rate": 0.0001, "loss": 1.3379, "step": 10 }, { "epoch": 0.029709655638082377, "grad_norm": 0.5191312432289124, "learning_rate": 9.999316524962345e-05, "loss": 1.1892, "step": 11 }, { "epoch": 0.03241053342336259, "grad_norm": 0.430105596780777, "learning_rate": 9.997266286704631e-05, "loss": 1.1462, "step": 12 }, { "epoch": 0.035111411208642807, "grad_norm": 0.4412747621536255, "learning_rate": 9.993849845741524e-05, "loss": 1.2241, "step": 13 }, { "epoch": 0.03781228899392303, "grad_norm": 0.4131179451942444, "learning_rate": 9.989068136093873e-05, "loss": 1.2011, "step": 14 }, { "epoch": 0.04051316677920324, "grad_norm": 0.4340050518512726, "learning_rate": 9.98292246503335e-05, "loss": 1.1766, "step": 15 }, { "epoch": 0.04321404456448346, "grad_norm": 0.42998600006103516, "learning_rate": 9.975414512725057e-05, "loss": 1.2414, "step": 16 }, { "epoch": 0.04591492234976367, "grad_norm": 0.42034226655960083, "learning_rate": 9.966546331768191e-05, "loss": 1.0788, "step": 17 }, { "epoch": 0.048615800135043886, "grad_norm": 0.5337233543395996, "learning_rate": 9.956320346634876e-05, "loss": 1.1981, "step": 18 }, { "epoch": 0.05131667792032411, "grad_norm": 0.4583562910556793, "learning_rate": 9.944739353007344e-05, "loss": 1.2196, "step": 19 }, { "epoch": 0.05401755570560432, "grad_norm": 0.49332761764526367, "learning_rate": 9.931806517013612e-05, "loss": 1.1968, "step": 20 }, { "epoch": 0.05671843349088454, "grad_norm": 0.48594799637794495, "learning_rate": 9.917525374361912e-05, "loss": 1.1309, "step": 21 }, { "epoch": 0.05941931127616475, "grad_norm": 0.5126538276672363, "learning_rate": 9.901899829374047e-05, "loss": 1.1186, "step": 22 }, { "epoch": 0.062120189061444966, "grad_norm": 0.47921475768089294, "learning_rate": 9.884934153917997e-05, "loss": 1.0795, "step": 23 }, { "epoch": 0.06482106684672519, "grad_norm": 0.4876593053340912, "learning_rate": 9.86663298624003e-05, "loss": 1.0949, "step": 24 }, { "epoch": 0.0675219446320054, "grad_norm": 0.536691427230835, "learning_rate": 9.847001329696653e-05, "loss": 1.0373, "step": 25 }, { "epoch": 0.07022282241728561, "grad_norm": 0.6566392779350281, "learning_rate": 9.826044551386744e-05, "loss": 1.2434, "step": 26 }, { "epoch": 0.07292370020256583, "grad_norm": 0.5158787369728088, "learning_rate": 9.803768380684242e-05, "loss": 1.1554, "step": 27 }, { "epoch": 0.07562457798784605, "grad_norm": 0.5759814977645874, "learning_rate": 9.780178907671789e-05, "loss": 1.3289, "step": 28 }, { "epoch": 0.07832545577312626, "grad_norm": 0.5752044320106506, "learning_rate": 9.755282581475769e-05, "loss": 1.1889, "step": 29 }, { "epoch": 0.08102633355840648, "grad_norm": 0.6152178645133972, "learning_rate": 9.729086208503174e-05, "loss": 1.1266, "step": 30 }, { "epoch": 0.0837272113436867, "grad_norm": 0.6457420587539673, "learning_rate": 9.701596950580806e-05, "loss": 1.1839, "step": 31 }, { "epoch": 0.08642808912896692, "grad_norm": 0.6329084038734436, "learning_rate": 9.672822322997305e-05, "loss": 1.234, "step": 32 }, { "epoch": 0.08912896691424713, "grad_norm": 0.6863877773284912, "learning_rate": 9.642770192448536e-05, "loss": 1.2342, "step": 33 }, { "epoch": 0.09182984469952735, "grad_norm": 0.6431719660758972, "learning_rate": 9.611448774886924e-05, "loss": 1.177, "step": 34 }, { "epoch": 0.09453072248480757, "grad_norm": 0.6808779835700989, "learning_rate": 9.578866633275288e-05, "loss": 1.3163, "step": 35 }, { "epoch": 0.09723160027008777, "grad_norm": 0.7064206600189209, "learning_rate": 9.545032675245813e-05, "loss": 1.3071, "step": 36 }, { "epoch": 0.099932478055368, "grad_norm": 0.7828763723373413, "learning_rate": 9.509956150664796e-05, "loss": 1.3091, "step": 37 }, { "epoch": 0.10263335584064821, "grad_norm": 0.8526866436004639, "learning_rate": 9.473646649103818e-05, "loss": 1.2666, "step": 38 }, { "epoch": 0.10533423362592843, "grad_norm": 0.7964354157447815, "learning_rate": 9.43611409721806e-05, "loss": 1.4333, "step": 39 }, { "epoch": 0.10803511141120864, "grad_norm": 0.8185365796089172, "learning_rate": 9.397368756032445e-05, "loss": 1.3689, "step": 40 }, { "epoch": 0.11073598919648886, "grad_norm": 0.8150967955589294, "learning_rate": 9.357421218136386e-05, "loss": 1.3536, "step": 41 }, { "epoch": 0.11343686698176908, "grad_norm": 0.8503111004829407, "learning_rate": 9.316282404787871e-05, "loss": 1.3862, "step": 42 }, { "epoch": 0.11613774476704929, "grad_norm": 0.8694619536399841, "learning_rate": 9.273963562927695e-05, "loss": 1.3512, "step": 43 }, { "epoch": 0.1188386225523295, "grad_norm": 1.0029643774032593, "learning_rate": 9.230476262104677e-05, "loss": 1.2742, "step": 44 }, { "epoch": 0.12153950033760973, "grad_norm": 1.017909049987793, "learning_rate": 9.185832391312644e-05, "loss": 1.4303, "step": 45 }, { "epoch": 0.12424037812288993, "grad_norm": 1.519902229309082, "learning_rate": 9.140044155740101e-05, "loss": 1.4151, "step": 46 }, { "epoch": 0.12694125590817015, "grad_norm": 1.4137510061264038, "learning_rate": 9.093124073433463e-05, "loss": 1.4488, "step": 47 }, { "epoch": 0.12964213369345037, "grad_norm": 1.4305354356765747, "learning_rate": 9.045084971874738e-05, "loss": 1.3568, "step": 48 }, { "epoch": 0.1323430114787306, "grad_norm": 1.5225553512573242, "learning_rate": 8.995939984474624e-05, "loss": 1.2391, "step": 49 }, { "epoch": 0.1350438892640108, "grad_norm": 2.4049932956695557, "learning_rate": 8.945702546981969e-05, "loss": 1.4481, "step": 50 }, { "epoch": 0.1350438892640108, "eval_loss": 1.2866495847702026, "eval_runtime": 15.5441, "eval_samples_per_second": 40.144, "eval_steps_per_second": 10.036, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.435563454365696e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }