brixeus's picture
Training in progress, step 50, checkpoint
e9b1347 verified
raw
history blame
9.95 kB
{
"best_metric": 1.2765874862670898,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.06791171477079797,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0013582342954159593,
"grad_norm": 0.7695918679237366,
"learning_rate": 1e-05,
"loss": 1.3733,
"step": 1
},
{
"epoch": 0.0013582342954159593,
"eval_loss": 1.4100896120071411,
"eval_runtime": 74.7293,
"eval_samples_per_second": 16.593,
"eval_steps_per_second": 4.148,
"step": 1
},
{
"epoch": 0.0027164685908319186,
"grad_norm": 0.7374516129493713,
"learning_rate": 2e-05,
"loss": 1.4754,
"step": 2
},
{
"epoch": 0.0040747028862478775,
"grad_norm": 0.7155938148498535,
"learning_rate": 3e-05,
"loss": 1.3174,
"step": 3
},
{
"epoch": 0.005432937181663837,
"grad_norm": 0.6224644184112549,
"learning_rate": 4e-05,
"loss": 1.3707,
"step": 4
},
{
"epoch": 0.006791171477079796,
"grad_norm": 0.4430236220359802,
"learning_rate": 5e-05,
"loss": 1.3729,
"step": 5
},
{
"epoch": 0.008149405772495755,
"grad_norm": 0.40971505641937256,
"learning_rate": 6e-05,
"loss": 1.3221,
"step": 6
},
{
"epoch": 0.009507640067911714,
"grad_norm": 0.3615075945854187,
"learning_rate": 7e-05,
"loss": 1.3454,
"step": 7
},
{
"epoch": 0.010865874363327675,
"grad_norm": 0.35050615668296814,
"learning_rate": 8e-05,
"loss": 1.309,
"step": 8
},
{
"epoch": 0.012224108658743633,
"grad_norm": 0.30065447092056274,
"learning_rate": 9e-05,
"loss": 1.2385,
"step": 9
},
{
"epoch": 0.013582342954159592,
"grad_norm": 0.3258337378501892,
"learning_rate": 0.0001,
"loss": 1.2086,
"step": 10
},
{
"epoch": 0.014940577249575551,
"grad_norm": 0.3569473326206207,
"learning_rate": 9.99983777858264e-05,
"loss": 1.3262,
"step": 11
},
{
"epoch": 0.01629881154499151,
"grad_norm": 0.37945055961608887,
"learning_rate": 9.999351124856874e-05,
"loss": 1.3609,
"step": 12
},
{
"epoch": 0.01765704584040747,
"grad_norm": 0.3099724352359772,
"learning_rate": 9.998540070400966e-05,
"loss": 1.2673,
"step": 13
},
{
"epoch": 0.019015280135823428,
"grad_norm": 0.31235983967781067,
"learning_rate": 9.997404667843075e-05,
"loss": 1.2393,
"step": 14
},
{
"epoch": 0.02037351443123939,
"grad_norm": 0.27202108502388,
"learning_rate": 9.995944990857849e-05,
"loss": 1.2544,
"step": 15
},
{
"epoch": 0.02173174872665535,
"grad_norm": 0.27280139923095703,
"learning_rate": 9.994161134161634e-05,
"loss": 1.2061,
"step": 16
},
{
"epoch": 0.023089983022071308,
"grad_norm": 0.293769896030426,
"learning_rate": 9.992053213506334e-05,
"loss": 1.1964,
"step": 17
},
{
"epoch": 0.024448217317487267,
"grad_norm": 0.28996336460113525,
"learning_rate": 9.989621365671902e-05,
"loss": 1.2483,
"step": 18
},
{
"epoch": 0.025806451612903226,
"grad_norm": 0.271930456161499,
"learning_rate": 9.986865748457457e-05,
"loss": 1.2141,
"step": 19
},
{
"epoch": 0.027164685908319185,
"grad_norm": 0.2686532735824585,
"learning_rate": 9.983786540671051e-05,
"loss": 1.2347,
"step": 20
},
{
"epoch": 0.028522920203735144,
"grad_norm": 0.2654222249984741,
"learning_rate": 9.980383942118066e-05,
"loss": 1.2133,
"step": 21
},
{
"epoch": 0.029881154499151102,
"grad_norm": 0.2612136900424957,
"learning_rate": 9.976658173588244e-05,
"loss": 1.2274,
"step": 22
},
{
"epoch": 0.03123938879456706,
"grad_norm": 0.24660049378871918,
"learning_rate": 9.972609476841367e-05,
"loss": 1.2137,
"step": 23
},
{
"epoch": 0.03259762308998302,
"grad_norm": 0.2399974763393402,
"learning_rate": 9.968238114591566e-05,
"loss": 1.1527,
"step": 24
},
{
"epoch": 0.03395585738539898,
"grad_norm": 0.24953867495059967,
"learning_rate": 9.96354437049027e-05,
"loss": 1.2337,
"step": 25
},
{
"epoch": 0.03531409168081494,
"grad_norm": 0.25382018089294434,
"learning_rate": 9.95852854910781e-05,
"loss": 1.2282,
"step": 26
},
{
"epoch": 0.0366723259762309,
"grad_norm": 0.26546919345855713,
"learning_rate": 9.953190975913647e-05,
"loss": 1.2031,
"step": 27
},
{
"epoch": 0.038030560271646856,
"grad_norm": 0.2536904513835907,
"learning_rate": 9.947531997255256e-05,
"loss": 1.218,
"step": 28
},
{
"epoch": 0.03938879456706282,
"grad_norm": 0.2628403902053833,
"learning_rate": 9.941551980335652e-05,
"loss": 1.1947,
"step": 29
},
{
"epoch": 0.04074702886247878,
"grad_norm": 0.26902860403060913,
"learning_rate": 9.935251313189564e-05,
"loss": 1.2258,
"step": 30
},
{
"epoch": 0.042105263157894736,
"grad_norm": 0.264118492603302,
"learning_rate": 9.928630404658255e-05,
"loss": 1.2215,
"step": 31
},
{
"epoch": 0.0434634974533107,
"grad_norm": 0.2503666579723358,
"learning_rate": 9.921689684362989e-05,
"loss": 1.2607,
"step": 32
},
{
"epoch": 0.044821731748726654,
"grad_norm": 0.2590126693248749,
"learning_rate": 9.914429602677162e-05,
"loss": 1.236,
"step": 33
},
{
"epoch": 0.046179966044142616,
"grad_norm": 0.2683681845664978,
"learning_rate": 9.906850630697068e-05,
"loss": 1.195,
"step": 34
},
{
"epoch": 0.04753820033955857,
"grad_norm": 0.2629988491535187,
"learning_rate": 9.898953260211338e-05,
"loss": 1.1939,
"step": 35
},
{
"epoch": 0.048896434634974534,
"grad_norm": 0.28124624490737915,
"learning_rate": 9.890738003669029e-05,
"loss": 1.277,
"step": 36
},
{
"epoch": 0.05025466893039049,
"grad_norm": 0.27577537298202515,
"learning_rate": 9.882205394146361e-05,
"loss": 1.1631,
"step": 37
},
{
"epoch": 0.05161290322580645,
"grad_norm": 0.26586708426475525,
"learning_rate": 9.87335598531214e-05,
"loss": 1.1817,
"step": 38
},
{
"epoch": 0.052971137521222414,
"grad_norm": 0.28580763936042786,
"learning_rate": 9.864190351391822e-05,
"loss": 1.2785,
"step": 39
},
{
"epoch": 0.05432937181663837,
"grad_norm": 0.28752267360687256,
"learning_rate": 9.85470908713026e-05,
"loss": 1.1675,
"step": 40
},
{
"epoch": 0.05568760611205433,
"grad_norm": 0.2884860634803772,
"learning_rate": 9.844912807753104e-05,
"loss": 1.1855,
"step": 41
},
{
"epoch": 0.05704584040747029,
"grad_norm": 0.2822176218032837,
"learning_rate": 9.834802148926882e-05,
"loss": 1.2276,
"step": 42
},
{
"epoch": 0.05840407470288625,
"grad_norm": 0.28766173124313354,
"learning_rate": 9.824377766717759e-05,
"loss": 1.1655,
"step": 43
},
{
"epoch": 0.059762308998302205,
"grad_norm": 0.30563387274742126,
"learning_rate": 9.813640337548954e-05,
"loss": 1.2651,
"step": 44
},
{
"epoch": 0.06112054329371817,
"grad_norm": 0.3028828799724579,
"learning_rate": 9.802590558156862e-05,
"loss": 1.1419,
"step": 45
},
{
"epoch": 0.06247877758913412,
"grad_norm": 0.3264331817626953,
"learning_rate": 9.791229145545831e-05,
"loss": 1.1629,
"step": 46
},
{
"epoch": 0.06383701188455009,
"grad_norm": 0.3301156163215637,
"learning_rate": 9.779556836941645e-05,
"loss": 1.2186,
"step": 47
},
{
"epoch": 0.06519524617996604,
"grad_norm": 0.3410678505897522,
"learning_rate": 9.767574389743682e-05,
"loss": 1.2162,
"step": 48
},
{
"epoch": 0.06655348047538201,
"grad_norm": 0.36292213201522827,
"learning_rate": 9.755282581475769e-05,
"loss": 1.1575,
"step": 49
},
{
"epoch": 0.06791171477079797,
"grad_norm": 0.5214879512786865,
"learning_rate": 9.742682209735727e-05,
"loss": 1.1233,
"step": 50
},
{
"epoch": 0.06791171477079797,
"eval_loss": 1.2765874862670898,
"eval_runtime": 75.9116,
"eval_samples_per_second": 16.335,
"eval_steps_per_second": 4.084,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 400,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.041831845619302e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}