lesso16's picture
Training in progress, step 50, checkpoint
8130323 verified
raw
history blame
9.99 kB
{
"best_metric": 1.0756638050079346,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 1.1038251366120218,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02185792349726776,
"grad_norm": 0.054219260811805725,
"learning_rate": 1.16e-05,
"loss": 1.0221,
"step": 1
},
{
"epoch": 0.02185792349726776,
"eval_loss": 1.3064125776290894,
"eval_runtime": 1.5023,
"eval_samples_per_second": 409.374,
"eval_steps_per_second": 13.313,
"step": 1
},
{
"epoch": 0.04371584699453552,
"grad_norm": 0.07374625653028488,
"learning_rate": 2.32e-05,
"loss": 1.13,
"step": 2
},
{
"epoch": 0.06557377049180328,
"grad_norm": 0.08744122087955475,
"learning_rate": 3.48e-05,
"loss": 1.2471,
"step": 3
},
{
"epoch": 0.08743169398907104,
"grad_norm": 0.1099563017487526,
"learning_rate": 4.64e-05,
"loss": 1.3512,
"step": 4
},
{
"epoch": 0.1092896174863388,
"grad_norm": 0.14091312885284424,
"learning_rate": 5.8e-05,
"loss": 1.382,
"step": 5
},
{
"epoch": 0.13114754098360656,
"grad_norm": 0.19244275987148285,
"learning_rate": 6.96e-05,
"loss": 1.524,
"step": 6
},
{
"epoch": 0.15300546448087432,
"grad_norm": 0.052936580032110214,
"learning_rate": 8.12e-05,
"loss": 1.0329,
"step": 7
},
{
"epoch": 0.17486338797814208,
"grad_norm": 0.06494678556919098,
"learning_rate": 9.28e-05,
"loss": 1.1503,
"step": 8
},
{
"epoch": 0.19672131147540983,
"grad_norm": 0.07551469653844833,
"learning_rate": 0.0001044,
"loss": 1.2085,
"step": 9
},
{
"epoch": 0.2185792349726776,
"grad_norm": 0.08664041757583618,
"learning_rate": 0.000116,
"loss": 1.2444,
"step": 10
},
{
"epoch": 0.24043715846994534,
"grad_norm": 0.10655322670936584,
"learning_rate": 0.00011598225532067881,
"loss": 1.3136,
"step": 11
},
{
"epoch": 0.26229508196721313,
"grad_norm": 0.14484980702400208,
"learning_rate": 0.00011592903214042715,
"loss": 1.3774,
"step": 12
},
{
"epoch": 0.28415300546448086,
"grad_norm": 0.049404121935367584,
"learning_rate": 0.00011584036302573693,
"loss": 0.9998,
"step": 13
},
{
"epoch": 0.30601092896174864,
"grad_norm": 0.05533352494239807,
"learning_rate": 0.0001157163022319532,
"loss": 1.077,
"step": 14
},
{
"epoch": 0.32786885245901637,
"grad_norm": 0.06618451327085495,
"learning_rate": 0.00011555692567007598,
"loss": 1.1209,
"step": 15
},
{
"epoch": 0.34972677595628415,
"grad_norm": 0.07199019938707352,
"learning_rate": 0.00011536233086031157,
"loss": 1.2181,
"step": 16
},
{
"epoch": 0.37158469945355194,
"grad_norm": 0.08229127526283264,
"learning_rate": 0.00011513263687240126,
"loss": 1.2544,
"step": 17
},
{
"epoch": 0.39344262295081966,
"grad_norm": 0.10118231177330017,
"learning_rate": 0.00011486798425276428,
"loss": 1.3167,
"step": 18
},
{
"epoch": 0.41530054644808745,
"grad_norm": 0.06382325291633606,
"learning_rate": 0.00011456853493849944,
"loss": 0.9757,
"step": 19
},
{
"epoch": 0.4371584699453552,
"grad_norm": 0.06287430226802826,
"learning_rate": 0.0001142344721582983,
"loss": 1.0141,
"step": 20
},
{
"epoch": 0.45901639344262296,
"grad_norm": 0.061046287417411804,
"learning_rate": 0.00011386600032033012,
"loss": 1.1142,
"step": 21
},
{
"epoch": 0.4808743169398907,
"grad_norm": 0.05975975841283798,
"learning_rate": 0.0001134633448871674,
"loss": 1.172,
"step": 22
},
{
"epoch": 0.5027322404371585,
"grad_norm": 0.06590148061513901,
"learning_rate": 0.00011302675223782873,
"loss": 1.1934,
"step": 23
},
{
"epoch": 0.5245901639344263,
"grad_norm": 0.07652608305215836,
"learning_rate": 0.00011255648951702296,
"loss": 1.2285,
"step": 24
},
{
"epoch": 0.546448087431694,
"grad_norm": 0.11880210041999817,
"learning_rate": 0.0001120528444716872,
"loss": 1.2294,
"step": 25
},
{
"epoch": 0.5683060109289617,
"grad_norm": 0.04327382519841194,
"learning_rate": 0.00011151612527491878,
"loss": 0.9457,
"step": 26
},
{
"epoch": 0.5901639344262295,
"grad_norm": 0.05113707482814789,
"learning_rate": 0.00011094666033740846,
"loss": 1.0301,
"step": 27
},
{
"epoch": 0.6120218579234973,
"grad_norm": 0.04633456468582153,
"learning_rate": 0.00011034479810649071,
"loss": 1.1369,
"step": 28
},
{
"epoch": 0.6338797814207651,
"grad_norm": 0.052176687866449356,
"learning_rate": 0.00010971090685293396,
"loss": 1.1575,
"step": 29
},
{
"epoch": 0.6557377049180327,
"grad_norm": 0.05911482125520706,
"learning_rate": 0.00010904537444560093,
"loss": 1.1915,
"step": 30
},
{
"epoch": 0.6775956284153005,
"grad_norm": 0.08560285717248917,
"learning_rate": 0.0001083486081141173,
"loss": 1.1844,
"step": 31
},
{
"epoch": 0.6994535519125683,
"grad_norm": 0.0443929098546505,
"learning_rate": 0.00010762103419969393,
"loss": 0.9784,
"step": 32
},
{
"epoch": 0.7213114754098361,
"grad_norm": 0.04982827231287956,
"learning_rate": 0.00010686309789425474,
"loss": 1.0368,
"step": 33
},
{
"epoch": 0.7431693989071039,
"grad_norm": 0.04613876715302467,
"learning_rate": 0.00010607526296803026,
"loss": 1.0534,
"step": 34
},
{
"epoch": 0.7650273224043715,
"grad_norm": 0.04624936357140541,
"learning_rate": 0.00010525801148578341,
"loss": 1.1136,
"step": 35
},
{
"epoch": 0.7868852459016393,
"grad_norm": 0.050727903842926025,
"learning_rate": 0.000104411843511841,
"loss": 1.1563,
"step": 36
},
{
"epoch": 0.8087431693989071,
"grad_norm": 0.07218360155820847,
"learning_rate": 0.00010353727680411158,
"loss": 1.148,
"step": 37
},
{
"epoch": 0.8306010928961749,
"grad_norm": 0.04049117863178253,
"learning_rate": 0.00010263484649727705,
"loss": 0.9096,
"step": 38
},
{
"epoch": 0.8524590163934426,
"grad_norm": 0.0455789640545845,
"learning_rate": 0.00010170510477535133,
"loss": 1.0006,
"step": 39
},
{
"epoch": 0.8743169398907104,
"grad_norm": 0.039463143795728683,
"learning_rate": 0.00010074862053380711,
"loss": 1.0411,
"step": 40
},
{
"epoch": 0.8961748633879781,
"grad_norm": 0.042614974081516266,
"learning_rate": 9.976597903147682e-05,
"loss": 1.1396,
"step": 41
},
{
"epoch": 0.9180327868852459,
"grad_norm": 0.04930881783366203,
"learning_rate": 9.875778153244143e-05,
"loss": 1.1744,
"step": 42
},
{
"epoch": 0.9398907103825137,
"grad_norm": 0.06974472105503082,
"learning_rate": 9.772464493812549e-05,
"loss": 1.15,
"step": 43
},
{
"epoch": 0.9617486338797814,
"grad_norm": 0.04092060774564743,
"learning_rate": 9.66672014098242e-05,
"loss": 0.9676,
"step": 44
},
{
"epoch": 0.9836065573770492,
"grad_norm": 0.0392816998064518,
"learning_rate": 9.558609798189311e-05,
"loss": 1.0893,
"step": 45
},
{
"epoch": 1.0163934426229508,
"grad_norm": 0.08897832781076431,
"learning_rate": 9.448199616583707e-05,
"loss": 1.8898,
"step": 46
},
{
"epoch": 1.0382513661202186,
"grad_norm": 0.03982605040073395,
"learning_rate": 9.335557154554105e-05,
"loss": 0.9943,
"step": 47
},
{
"epoch": 1.0601092896174864,
"grad_norm": 0.03858646750450134,
"learning_rate": 9.220751336389013e-05,
"loss": 1.0459,
"step": 48
},
{
"epoch": 1.0819672131147542,
"grad_norm": 0.040587618947029114,
"learning_rate": 9.10385241010317e-05,
"loss": 1.1494,
"step": 49
},
{
"epoch": 1.1038251366120218,
"grad_norm": 0.052482884377241135,
"learning_rate": 8.984931904453821e-05,
"loss": 1.1475,
"step": 50
},
{
"epoch": 1.1038251366120218,
"eval_loss": 1.0756638050079346,
"eval_runtime": 1.9721,
"eval_samples_per_second": 311.855,
"eval_steps_per_second": 10.142,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 137,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.8231094746742784e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}