|
{ |
|
"best_metric": 1.2866495847702026, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.1350438892640108, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027008777852802163, |
|
"grad_norm": 0.3624476194381714, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4218, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0027008777852802163, |
|
"eval_loss": 1.4606833457946777, |
|
"eval_runtime": 15.6141, |
|
"eval_samples_per_second": 39.964, |
|
"eval_steps_per_second": 9.991, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0054017555705604325, |
|
"grad_norm": 0.3938450813293457, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3709, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008102633355840648, |
|
"grad_norm": 0.39943069219589233, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4067, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010803511141120865, |
|
"grad_norm": 0.40101104974746704, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4221, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01350438892640108, |
|
"grad_norm": 0.36832377314567566, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4441, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.016205266711681297, |
|
"grad_norm": 0.41918250918388367, |
|
"learning_rate": 6e-05, |
|
"loss": 1.4905, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.018906144496961513, |
|
"grad_norm": 0.4318130612373352, |
|
"learning_rate": 7e-05, |
|
"loss": 1.3951, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02160702228224173, |
|
"grad_norm": 0.524061381816864, |
|
"learning_rate": 8e-05, |
|
"loss": 1.3791, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024307900067521943, |
|
"grad_norm": 0.5278139710426331, |
|
"learning_rate": 9e-05, |
|
"loss": 1.3826, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02700877785280216, |
|
"grad_norm": 0.5666940808296204, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3379, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.029709655638082377, |
|
"grad_norm": 0.5191312432289124, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 1.1892, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03241053342336259, |
|
"grad_norm": 0.430105596780777, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 1.1462, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.035111411208642807, |
|
"grad_norm": 0.4412747621536255, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 1.2241, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03781228899392303, |
|
"grad_norm": 0.4131179451942444, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 1.2011, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04051316677920324, |
|
"grad_norm": 0.4340050518512726, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 1.1766, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04321404456448346, |
|
"grad_norm": 0.42998600006103516, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 1.2414, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04591492234976367, |
|
"grad_norm": 0.42034226655960083, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 1.0788, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.048615800135043886, |
|
"grad_norm": 0.5337233543395996, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 1.1981, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05131667792032411, |
|
"grad_norm": 0.4583562910556793, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 1.2196, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05401755570560432, |
|
"grad_norm": 0.49332761764526367, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 1.1968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05671843349088454, |
|
"grad_norm": 0.48594799637794495, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 1.1309, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05941931127616475, |
|
"grad_norm": 0.5126538276672363, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 1.1186, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.062120189061444966, |
|
"grad_norm": 0.47921475768089294, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 1.0795, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06482106684672519, |
|
"grad_norm": 0.4876593053340912, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 1.0949, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0675219446320054, |
|
"grad_norm": 0.536691427230835, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 1.0373, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07022282241728561, |
|
"grad_norm": 0.6566392779350281, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 1.2434, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07292370020256583, |
|
"grad_norm": 0.5158787369728088, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 1.1554, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07562457798784605, |
|
"grad_norm": 0.5759814977645874, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 1.3289, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07832545577312626, |
|
"grad_norm": 0.5752044320106506, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.1889, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08102633355840648, |
|
"grad_norm": 0.6152178645133972, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 1.1266, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0837272113436867, |
|
"grad_norm": 0.6457420587539673, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 1.1839, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08642808912896692, |
|
"grad_norm": 0.6329084038734436, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 1.234, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08912896691424713, |
|
"grad_norm": 0.6863877773284912, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 1.2342, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09182984469952735, |
|
"grad_norm": 0.6431719660758972, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 1.177, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09453072248480757, |
|
"grad_norm": 0.6808779835700989, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 1.3163, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09723160027008777, |
|
"grad_norm": 0.7064206600189209, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 1.3071, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.099932478055368, |
|
"grad_norm": 0.7828763723373413, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 1.3091, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10263335584064821, |
|
"grad_norm": 0.8526866436004639, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 1.2666, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10533423362592843, |
|
"grad_norm": 0.7964354157447815, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 1.4333, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10803511141120864, |
|
"grad_norm": 0.8185365796089172, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 1.3689, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11073598919648886, |
|
"grad_norm": 0.8150967955589294, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 1.3536, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11343686698176908, |
|
"grad_norm": 0.8503111004829407, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 1.3862, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11613774476704929, |
|
"grad_norm": 0.8694619536399841, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 1.3512, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1188386225523295, |
|
"grad_norm": 1.0029643774032593, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 1.2742, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12153950033760973, |
|
"grad_norm": 1.017909049987793, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 1.4303, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12424037812288993, |
|
"grad_norm": 1.519902229309082, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 1.4151, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12694125590817015, |
|
"grad_norm": 1.4137510061264038, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 1.4488, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12964213369345037, |
|
"grad_norm": 1.4305354356765747, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.3568, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1323430114787306, |
|
"grad_norm": 1.5225553512573242, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 1.2391, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1350438892640108, |
|
"grad_norm": 2.4049932956695557, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 1.4481, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1350438892640108, |
|
"eval_loss": 1.2866495847702026, |
|
"eval_runtime": 15.5441, |
|
"eval_samples_per_second": 40.144, |
|
"eval_steps_per_second": 10.036, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.435563454365696e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|