|
{ |
|
"best_metric": 0.8939120173454285, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.7782101167315175, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01556420233463035, |
|
"grad_norm": 0.7498895525932312, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0317, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01556420233463035, |
|
"eval_loss": 1.197574496269226, |
|
"eval_runtime": 2.5784, |
|
"eval_samples_per_second": 41.886, |
|
"eval_steps_per_second": 10.471, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0311284046692607, |
|
"grad_norm": 0.8710477352142334, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1157, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04669260700389105, |
|
"grad_norm": 0.9946465492248535, |
|
"learning_rate": 3e-05, |
|
"loss": 1.131, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0622568093385214, |
|
"grad_norm": 0.8549498915672302, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0619, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07782101167315175, |
|
"grad_norm": 0.6751075387001038, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0552, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0933852140077821, |
|
"grad_norm": 0.6019833087921143, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1261, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10894941634241245, |
|
"grad_norm": 0.5454035997390747, |
|
"learning_rate": 7e-05, |
|
"loss": 1.0568, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1245136186770428, |
|
"grad_norm": 0.4805472791194916, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9972, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14007782101167315, |
|
"grad_norm": 0.5375916957855225, |
|
"learning_rate": 9e-05, |
|
"loss": 1.0284, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1556420233463035, |
|
"grad_norm": 0.553347110748291, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0405, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17120622568093385, |
|
"grad_norm": 0.5552372336387634, |
|
"learning_rate": 9.999263238525136e-05, |
|
"loss": 1.0279, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1867704280155642, |
|
"grad_norm": 0.547298014163971, |
|
"learning_rate": 9.997053171227526e-05, |
|
"loss": 0.9419, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.20233463035019456, |
|
"grad_norm": 0.48389917612075806, |
|
"learning_rate": 9.993370449424153e-05, |
|
"loss": 0.9592, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2178988326848249, |
|
"grad_norm": 0.4673521816730499, |
|
"learning_rate": 9.988216158430033e-05, |
|
"loss": 0.938, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.23346303501945526, |
|
"grad_norm": 0.4670703113079071, |
|
"learning_rate": 9.981591817238378e-05, |
|
"loss": 0.8948, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2490272373540856, |
|
"grad_norm": 0.5665939450263977, |
|
"learning_rate": 9.973499378072945e-05, |
|
"loss": 1.0737, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.26459143968871596, |
|
"grad_norm": 0.4406505525112152, |
|
"learning_rate": 9.963941225812701e-05, |
|
"loss": 0.9687, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2801556420233463, |
|
"grad_norm": 0.406009316444397, |
|
"learning_rate": 9.952920177288986e-05, |
|
"loss": 0.9306, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.29571984435797666, |
|
"grad_norm": 0.3831186294555664, |
|
"learning_rate": 9.940439480455386e-05, |
|
"loss": 0.9469, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.311284046692607, |
|
"grad_norm": 0.3777288794517517, |
|
"learning_rate": 9.926502813430545e-05, |
|
"loss": 0.9571, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.32684824902723736, |
|
"grad_norm": 0.37088674306869507, |
|
"learning_rate": 9.911114283414205e-05, |
|
"loss": 0.9663, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3424124513618677, |
|
"grad_norm": 0.39943259954452515, |
|
"learning_rate": 9.89427842547679e-05, |
|
"loss": 0.9503, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.35797665369649806, |
|
"grad_norm": 0.3599907457828522, |
|
"learning_rate": 9.876000201222912e-05, |
|
"loss": 0.9655, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3735408560311284, |
|
"grad_norm": 0.36868664622306824, |
|
"learning_rate": 9.856284997329158e-05, |
|
"loss": 0.9281, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.38910505836575876, |
|
"grad_norm": 0.34976479411125183, |
|
"learning_rate": 9.835138623956603e-05, |
|
"loss": 0.9323, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4046692607003891, |
|
"grad_norm": 0.3601384162902832, |
|
"learning_rate": 9.812567313038542e-05, |
|
"loss": 0.8767, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.42023346303501946, |
|
"grad_norm": 0.3567204773426056, |
|
"learning_rate": 9.788577716443902e-05, |
|
"loss": 0.8901, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4357976653696498, |
|
"grad_norm": 0.35787394642829895, |
|
"learning_rate": 9.763176904016913e-05, |
|
"loss": 0.8908, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.45136186770428016, |
|
"grad_norm": 0.3854931890964508, |
|
"learning_rate": 9.736372361493584e-05, |
|
"loss": 0.9265, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4669260700389105, |
|
"grad_norm": 0.3999091386795044, |
|
"learning_rate": 9.708171988295631e-05, |
|
"loss": 0.9161, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48249027237354086, |
|
"grad_norm": 0.3725212514400482, |
|
"learning_rate": 9.678584095202468e-05, |
|
"loss": 0.8742, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4980544747081712, |
|
"grad_norm": 0.4123667776584625, |
|
"learning_rate": 9.647617401902002e-05, |
|
"loss": 0.9402, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5136186770428015, |
|
"grad_norm": 0.31651416420936584, |
|
"learning_rate": 9.61528103442088e-05, |
|
"loss": 0.8945, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5291828793774319, |
|
"grad_norm": 0.3038080334663391, |
|
"learning_rate": 9.581584522435024e-05, |
|
"loss": 0.8982, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5447470817120622, |
|
"grad_norm": 0.3260709047317505, |
|
"learning_rate": 9.546537796461179e-05, |
|
"loss": 0.9635, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5603112840466926, |
|
"grad_norm": 0.31259122490882874, |
|
"learning_rate": 9.510151184930354e-05, |
|
"loss": 0.8647, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5758754863813229, |
|
"grad_norm": 0.316936731338501, |
|
"learning_rate": 9.472435411143978e-05, |
|
"loss": 0.8711, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5914396887159533, |
|
"grad_norm": 0.3249667286872864, |
|
"learning_rate": 9.433401590113701e-05, |
|
"loss": 0.9385, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6070038910505836, |
|
"grad_norm": 0.3141433894634247, |
|
"learning_rate": 9.393061225285743e-05, |
|
"loss": 0.8555, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.622568093385214, |
|
"grad_norm": 0.31393998861312866, |
|
"learning_rate": 9.351426205150777e-05, |
|
"loss": 0.91, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6381322957198443, |
|
"grad_norm": 0.3161945343017578, |
|
"learning_rate": 9.308508799740341e-05, |
|
"loss": 0.9217, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6536964980544747, |
|
"grad_norm": 0.3333125114440918, |
|
"learning_rate": 9.2643216570108e-05, |
|
"loss": 0.9125, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.669260700389105, |
|
"grad_norm": 0.3434793949127197, |
|
"learning_rate": 9.218877799115928e-05, |
|
"loss": 0.8761, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6848249027237354, |
|
"grad_norm": 0.35263577103614807, |
|
"learning_rate": 9.172190618569236e-05, |
|
"loss": 0.9013, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7003891050583657, |
|
"grad_norm": 0.3485749363899231, |
|
"learning_rate": 9.124273874297122e-05, |
|
"loss": 0.8326, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7159533073929961, |
|
"grad_norm": 0.3562600612640381, |
|
"learning_rate": 9.075141687584057e-05, |
|
"loss": 0.8795, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7315175097276264, |
|
"grad_norm": 0.3698873519897461, |
|
"learning_rate": 9.024808537910981e-05, |
|
"loss": 0.914, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7470817120622568, |
|
"grad_norm": 0.3896391689777374, |
|
"learning_rate": 8.973289258688125e-05, |
|
"loss": 0.8028, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7626459143968871, |
|
"grad_norm": 0.2838418483734131, |
|
"learning_rate": 8.920599032883554e-05, |
|
"loss": 0.8242, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"grad_norm": 0.32417213916778564, |
|
"learning_rate": 8.86675338854865e-05, |
|
"loss": 0.9819, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"eval_loss": 0.8939120173454285, |
|
"eval_runtime": 2.6016, |
|
"eval_samples_per_second": 41.512, |
|
"eval_steps_per_second": 10.378, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 193, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3012201900081152e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|