|
{ |
|
"best_metric": 0.6253278255462646, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.04927322000492732, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009854644000985464, |
|
"grad_norm": 0.4313327372074127, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0899, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009854644000985464, |
|
"eval_loss": 0.9981184601783752, |
|
"eval_runtime": 158.8136, |
|
"eval_samples_per_second": 10.761, |
|
"eval_steps_per_second": 5.384, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001970928800197093, |
|
"grad_norm": 0.5051336884498596, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3115, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0029563932002956393, |
|
"grad_norm": 0.40696489810943604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3648, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003941857600394186, |
|
"grad_norm": 0.5555673241615295, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.4126, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004927322000492732, |
|
"grad_norm": 0.6959850788116455, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.5118, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005912786400591279, |
|
"grad_norm": 0.6902092695236206, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6241, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006898250800689825, |
|
"grad_norm": 0.5990986824035645, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.7635, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007883715200788372, |
|
"grad_norm": 0.46151483058929443, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.7826, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008869179600886918, |
|
"grad_norm": 0.40509724617004395, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8468, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009854644000985464, |
|
"grad_norm": 0.4107632040977478, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7862, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01084010840108401, |
|
"grad_norm": 0.4689662754535675, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.7878, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011825572801182557, |
|
"grad_norm": 0.4455258250236511, |
|
"learning_rate": 4e-05, |
|
"loss": 0.8286, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012811037201281104, |
|
"grad_norm": 0.4460776746273041, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.6505, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01379650160137965, |
|
"grad_norm": 0.47701597213745117, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.8125, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.014781966001478197, |
|
"grad_norm": 0.43874800205230713, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6508, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015767430401576743, |
|
"grad_norm": 0.43464934825897217, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.7736, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01675289480167529, |
|
"grad_norm": 0.4389445185661316, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 0.7805, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017738359201773836, |
|
"grad_norm": 0.33348941802978516, |
|
"learning_rate": 6e-05, |
|
"loss": 0.6069, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018723823601872382, |
|
"grad_norm": 0.3531840443611145, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.6574, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01970928800197093, |
|
"grad_norm": 0.30243533849716187, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.6736, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.020694752402069475, |
|
"grad_norm": 0.32596221566200256, |
|
"learning_rate": 7e-05, |
|
"loss": 0.6822, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02168021680216802, |
|
"grad_norm": 0.3076384365558624, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 0.7694, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.022665681202266568, |
|
"grad_norm": 0.30813416838645935, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.6511, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.023651145602365115, |
|
"grad_norm": 0.3227630853652954, |
|
"learning_rate": 8e-05, |
|
"loss": 0.6856, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02463661000246366, |
|
"grad_norm": 0.3547810912132263, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.6536, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.025622074402562207, |
|
"grad_norm": 0.3056383430957794, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.7591, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.026607538802660754, |
|
"grad_norm": 0.33836042881011963, |
|
"learning_rate": 9e-05, |
|
"loss": 0.8528, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0275930032027593, |
|
"grad_norm": 0.3435473144054413, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.7285, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.028578467602857847, |
|
"grad_norm": 0.3219856917858124, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.7288, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.029563932002956393, |
|
"grad_norm": 0.3125074505805969, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7225, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03054939640305494, |
|
"grad_norm": 0.31725504994392395, |
|
"learning_rate": 9.999146252290264e-05, |
|
"loss": 0.7351, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.031534860803153486, |
|
"grad_norm": 0.3184851109981537, |
|
"learning_rate": 9.996585300715116e-05, |
|
"loss": 0.7631, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 0.36127910017967224, |
|
"learning_rate": 9.99231801983717e-05, |
|
"loss": 0.7501, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03350578960335058, |
|
"grad_norm": 0.32324203848838806, |
|
"learning_rate": 9.986345866928941e-05, |
|
"loss": 0.7823, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03449125400344913, |
|
"grad_norm": 0.3167076110839844, |
|
"learning_rate": 9.978670881475172e-05, |
|
"loss": 0.6496, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03547671840354767, |
|
"grad_norm": 0.3640748858451843, |
|
"learning_rate": 9.96929568447637e-05, |
|
"loss": 0.6557, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03646218280364622, |
|
"grad_norm": 0.40133535861968994, |
|
"learning_rate": 9.958223477553714e-05, |
|
"loss": 0.7987, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.037447647203744765, |
|
"grad_norm": 0.40022367238998413, |
|
"learning_rate": 9.94545804185573e-05, |
|
"loss": 0.7052, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.038433111603843315, |
|
"grad_norm": 0.4175143241882324, |
|
"learning_rate": 9.931003736767013e-05, |
|
"loss": 0.7733, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03941857600394186, |
|
"grad_norm": 0.5691938996315002, |
|
"learning_rate": 9.91486549841951e-05, |
|
"loss": 0.6385, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04040404040404041, |
|
"grad_norm": 0.36552998423576355, |
|
"learning_rate": 9.89704883800683e-05, |
|
"loss": 0.5661, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04138950480413895, |
|
"grad_norm": 0.44288337230682373, |
|
"learning_rate": 9.877559839902184e-05, |
|
"loss": 0.7711, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0423749692042375, |
|
"grad_norm": 0.42461535334587097, |
|
"learning_rate": 9.85640515958057e-05, |
|
"loss": 0.7106, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04336043360433604, |
|
"grad_norm": 0.46416065096855164, |
|
"learning_rate": 9.833592021345937e-05, |
|
"loss": 0.7392, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04434589800443459, |
|
"grad_norm": 0.46228498220443726, |
|
"learning_rate": 9.809128215864097e-05, |
|
"loss": 0.6193, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.045331362404533136, |
|
"grad_norm": 0.5916977524757385, |
|
"learning_rate": 9.783022097502204e-05, |
|
"loss": 0.9369, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.046316826804631686, |
|
"grad_norm": 0.509521484375, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.8942, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04730229120473023, |
|
"grad_norm": 0.5575716495513916, |
|
"learning_rate": 9.725919140804099e-05, |
|
"loss": 0.7795, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04828775560482878, |
|
"grad_norm": 0.674028754234314, |
|
"learning_rate": 9.694941803075283e-05, |
|
"loss": 0.9203, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04927322000492732, |
|
"grad_norm": 0.8474329113960266, |
|
"learning_rate": 9.662361147021779e-05, |
|
"loss": 0.9088, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04927322000492732, |
|
"eval_loss": 0.6253278255462646, |
|
"eval_runtime": 160.106, |
|
"eval_samples_per_second": 10.674, |
|
"eval_steps_per_second": 5.34, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.560814559382733e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|