nttx's picture
Training in progress, step 50, checkpoint
934b869 verified
raw
history blame
9.97 kB
{
"best_metric": 0.6253278255462646,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.04927322000492732,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009854644000985464,
"grad_norm": 0.4313327372074127,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0899,
"step": 1
},
{
"epoch": 0.0009854644000985464,
"eval_loss": 0.9981184601783752,
"eval_runtime": 158.8136,
"eval_samples_per_second": 10.761,
"eval_steps_per_second": 5.384,
"step": 1
},
{
"epoch": 0.001970928800197093,
"grad_norm": 0.5051336884498596,
"learning_rate": 6.666666666666667e-06,
"loss": 0.3115,
"step": 2
},
{
"epoch": 0.0029563932002956393,
"grad_norm": 0.40696489810943604,
"learning_rate": 1e-05,
"loss": 0.3648,
"step": 3
},
{
"epoch": 0.003941857600394186,
"grad_norm": 0.5555673241615295,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.4126,
"step": 4
},
{
"epoch": 0.004927322000492732,
"grad_norm": 0.6959850788116455,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.5118,
"step": 5
},
{
"epoch": 0.005912786400591279,
"grad_norm": 0.6902092695236206,
"learning_rate": 2e-05,
"loss": 0.6241,
"step": 6
},
{
"epoch": 0.006898250800689825,
"grad_norm": 0.5990986824035645,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.7635,
"step": 7
},
{
"epoch": 0.007883715200788372,
"grad_norm": 0.46151483058929443,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.7826,
"step": 8
},
{
"epoch": 0.008869179600886918,
"grad_norm": 0.40509724617004395,
"learning_rate": 3e-05,
"loss": 0.8468,
"step": 9
},
{
"epoch": 0.009854644000985464,
"grad_norm": 0.4107632040977478,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.7862,
"step": 10
},
{
"epoch": 0.01084010840108401,
"grad_norm": 0.4689662754535675,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.7878,
"step": 11
},
{
"epoch": 0.011825572801182557,
"grad_norm": 0.4455258250236511,
"learning_rate": 4e-05,
"loss": 0.8286,
"step": 12
},
{
"epoch": 0.012811037201281104,
"grad_norm": 0.4460776746273041,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.6505,
"step": 13
},
{
"epoch": 0.01379650160137965,
"grad_norm": 0.47701597213745117,
"learning_rate": 4.666666666666667e-05,
"loss": 0.8125,
"step": 14
},
{
"epoch": 0.014781966001478197,
"grad_norm": 0.43874800205230713,
"learning_rate": 5e-05,
"loss": 0.6508,
"step": 15
},
{
"epoch": 0.015767430401576743,
"grad_norm": 0.43464934825897217,
"learning_rate": 5.333333333333333e-05,
"loss": 0.7736,
"step": 16
},
{
"epoch": 0.01675289480167529,
"grad_norm": 0.4389445185661316,
"learning_rate": 5.666666666666667e-05,
"loss": 0.7805,
"step": 17
},
{
"epoch": 0.017738359201773836,
"grad_norm": 0.33348941802978516,
"learning_rate": 6e-05,
"loss": 0.6069,
"step": 18
},
{
"epoch": 0.018723823601872382,
"grad_norm": 0.3531840443611145,
"learning_rate": 6.333333333333333e-05,
"loss": 0.6574,
"step": 19
},
{
"epoch": 0.01970928800197093,
"grad_norm": 0.30243533849716187,
"learning_rate": 6.666666666666667e-05,
"loss": 0.6736,
"step": 20
},
{
"epoch": 0.020694752402069475,
"grad_norm": 0.32596221566200256,
"learning_rate": 7e-05,
"loss": 0.6822,
"step": 21
},
{
"epoch": 0.02168021680216802,
"grad_norm": 0.3076384365558624,
"learning_rate": 7.333333333333333e-05,
"loss": 0.7694,
"step": 22
},
{
"epoch": 0.022665681202266568,
"grad_norm": 0.30813416838645935,
"learning_rate": 7.666666666666667e-05,
"loss": 0.6511,
"step": 23
},
{
"epoch": 0.023651145602365115,
"grad_norm": 0.3227630853652954,
"learning_rate": 8e-05,
"loss": 0.6856,
"step": 24
},
{
"epoch": 0.02463661000246366,
"grad_norm": 0.3547810912132263,
"learning_rate": 8.333333333333334e-05,
"loss": 0.6536,
"step": 25
},
{
"epoch": 0.025622074402562207,
"grad_norm": 0.3056383430957794,
"learning_rate": 8.666666666666667e-05,
"loss": 0.7591,
"step": 26
},
{
"epoch": 0.026607538802660754,
"grad_norm": 0.33836042881011963,
"learning_rate": 9e-05,
"loss": 0.8528,
"step": 27
},
{
"epoch": 0.0275930032027593,
"grad_norm": 0.3435473144054413,
"learning_rate": 9.333333333333334e-05,
"loss": 0.7285,
"step": 28
},
{
"epoch": 0.028578467602857847,
"grad_norm": 0.3219856917858124,
"learning_rate": 9.666666666666667e-05,
"loss": 0.7288,
"step": 29
},
{
"epoch": 0.029563932002956393,
"grad_norm": 0.3125074505805969,
"learning_rate": 0.0001,
"loss": 0.7225,
"step": 30
},
{
"epoch": 0.03054939640305494,
"grad_norm": 0.31725504994392395,
"learning_rate": 9.999146252290264e-05,
"loss": 0.7351,
"step": 31
},
{
"epoch": 0.031534860803153486,
"grad_norm": 0.3184851109981537,
"learning_rate": 9.996585300715116e-05,
"loss": 0.7631,
"step": 32
},
{
"epoch": 0.032520325203252036,
"grad_norm": 0.36127910017967224,
"learning_rate": 9.99231801983717e-05,
"loss": 0.7501,
"step": 33
},
{
"epoch": 0.03350578960335058,
"grad_norm": 0.32324203848838806,
"learning_rate": 9.986345866928941e-05,
"loss": 0.7823,
"step": 34
},
{
"epoch": 0.03449125400344913,
"grad_norm": 0.3167076110839844,
"learning_rate": 9.978670881475172e-05,
"loss": 0.6496,
"step": 35
},
{
"epoch": 0.03547671840354767,
"grad_norm": 0.3640748858451843,
"learning_rate": 9.96929568447637e-05,
"loss": 0.6557,
"step": 36
},
{
"epoch": 0.03646218280364622,
"grad_norm": 0.40133535861968994,
"learning_rate": 9.958223477553714e-05,
"loss": 0.7987,
"step": 37
},
{
"epoch": 0.037447647203744765,
"grad_norm": 0.40022367238998413,
"learning_rate": 9.94545804185573e-05,
"loss": 0.7052,
"step": 38
},
{
"epoch": 0.038433111603843315,
"grad_norm": 0.4175143241882324,
"learning_rate": 9.931003736767013e-05,
"loss": 0.7733,
"step": 39
},
{
"epoch": 0.03941857600394186,
"grad_norm": 0.5691938996315002,
"learning_rate": 9.91486549841951e-05,
"loss": 0.6385,
"step": 40
},
{
"epoch": 0.04040404040404041,
"grad_norm": 0.36552998423576355,
"learning_rate": 9.89704883800683e-05,
"loss": 0.5661,
"step": 41
},
{
"epoch": 0.04138950480413895,
"grad_norm": 0.44288337230682373,
"learning_rate": 9.877559839902184e-05,
"loss": 0.7711,
"step": 42
},
{
"epoch": 0.0423749692042375,
"grad_norm": 0.42461535334587097,
"learning_rate": 9.85640515958057e-05,
"loss": 0.7106,
"step": 43
},
{
"epoch": 0.04336043360433604,
"grad_norm": 0.46416065096855164,
"learning_rate": 9.833592021345937e-05,
"loss": 0.7392,
"step": 44
},
{
"epoch": 0.04434589800443459,
"grad_norm": 0.46228498220443726,
"learning_rate": 9.809128215864097e-05,
"loss": 0.6193,
"step": 45
},
{
"epoch": 0.045331362404533136,
"grad_norm": 0.5916977524757385,
"learning_rate": 9.783022097502204e-05,
"loss": 0.9369,
"step": 46
},
{
"epoch": 0.046316826804631686,
"grad_norm": 0.509521484375,
"learning_rate": 9.755282581475769e-05,
"loss": 0.8942,
"step": 47
},
{
"epoch": 0.04730229120473023,
"grad_norm": 0.5575716495513916,
"learning_rate": 9.725919140804099e-05,
"loss": 0.7795,
"step": 48
},
{
"epoch": 0.04828775560482878,
"grad_norm": 0.674028754234314,
"learning_rate": 9.694941803075283e-05,
"loss": 0.9203,
"step": 49
},
{
"epoch": 0.04927322000492732,
"grad_norm": 0.8474329113960266,
"learning_rate": 9.662361147021779e-05,
"loss": 0.9088,
"step": 50
},
{
"epoch": 0.04927322000492732,
"eval_loss": 0.6253278255462646,
"eval_runtime": 160.106,
"eval_samples_per_second": 10.674,
"eval_steps_per_second": 5.34,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.560814559382733e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}