|
{ |
|
"best_metric": 0.8329792022705078, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.008840951286358412, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00017681902572716824, |
|
"grad_norm": 31.273752212524414, |
|
"learning_rate": 1e-05, |
|
"loss": 5.7904, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00017681902572716824, |
|
"eval_loss": 1.5057493448257446, |
|
"eval_runtime": 707.0176, |
|
"eval_samples_per_second": 13.472, |
|
"eval_steps_per_second": 3.369, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003536380514543365, |
|
"grad_norm": 33.96555709838867, |
|
"learning_rate": 2e-05, |
|
"loss": 6.1229, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005304570771815047, |
|
"grad_norm": 22.80571937561035, |
|
"learning_rate": 3e-05, |
|
"loss": 5.831, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000707276102908673, |
|
"grad_norm": 17.077383041381836, |
|
"learning_rate": 4e-05, |
|
"loss": 4.8649, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008840951286358412, |
|
"grad_norm": 15.064048767089844, |
|
"learning_rate": 5e-05, |
|
"loss": 4.4974, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0010609141543630094, |
|
"grad_norm": 12.101446151733398, |
|
"learning_rate": 6e-05, |
|
"loss": 3.9707, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0012377331800901778, |
|
"grad_norm": 10.376118659973145, |
|
"learning_rate": 7e-05, |
|
"loss": 3.7705, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001414552205817346, |
|
"grad_norm": 10.354647636413574, |
|
"learning_rate": 8e-05, |
|
"loss": 3.6031, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.001591371231544514, |
|
"grad_norm": 10.469047546386719, |
|
"learning_rate": 9e-05, |
|
"loss": 3.4053, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0017681902572716825, |
|
"grad_norm": 9.278939247131348, |
|
"learning_rate": 0.0001, |
|
"loss": 3.659, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0019450092829988506, |
|
"grad_norm": 15.399981498718262, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 3.6484, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.002121828308726019, |
|
"grad_norm": 10.160966873168945, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 3.5971, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002298647334453187, |
|
"grad_norm": 7.888012886047363, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 3.4807, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0024754663601803555, |
|
"grad_norm": 7.219862937927246, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 3.3483, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0026522853859075237, |
|
"grad_norm": 6.999874114990234, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 3.27, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002829104411634692, |
|
"grad_norm": 6.7764692306518555, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 3.2381, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00300592343736186, |
|
"grad_norm": 7.229355812072754, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 3.1572, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.003182742463089028, |
|
"grad_norm": 7.318708419799805, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 3.2629, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0033595614888161968, |
|
"grad_norm": 7.011357307434082, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 3.1216, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003536380514543365, |
|
"grad_norm": 6.351388454437256, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 3.1227, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003713199540270533, |
|
"grad_norm": 6.193704128265381, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 3.0576, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0038900185659977013, |
|
"grad_norm": 6.23767614364624, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 3.1666, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00406683759172487, |
|
"grad_norm": 5.724069118499756, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 3.0868, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004243656617452038, |
|
"grad_norm": 6.114058494567871, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 3.1426, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004420475643179206, |
|
"grad_norm": 6.249318599700928, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 3.1009, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004597294668906374, |
|
"grad_norm": 5.979328632354736, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 3.0182, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0047741136946335425, |
|
"grad_norm": 5.138391971588135, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 2.7047, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004950932720360711, |
|
"grad_norm": 5.40130090713501, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 3.1115, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005127751746087879, |
|
"grad_norm": 5.664342403411865, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 2.8735, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005304570771815047, |
|
"grad_norm": 5.740615367889404, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 3.1383, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005481389797542215, |
|
"grad_norm": 5.822990417480469, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 2.7514, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005658208823269384, |
|
"grad_norm": 6.541563034057617, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 2.896, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005835027848996552, |
|
"grad_norm": 5.708773612976074, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 2.9389, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00601184687472372, |
|
"grad_norm": 5.139838218688965, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 2.85, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006188665900450889, |
|
"grad_norm": 4.9747796058654785, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 2.9934, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.006365484926178056, |
|
"grad_norm": 4.744929313659668, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 2.893, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006542303951905225, |
|
"grad_norm": 5.647818088531494, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 3.0005, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0067191229776323936, |
|
"grad_norm": 5.120396614074707, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 2.8626, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006895942003359561, |
|
"grad_norm": 5.247471809387207, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 2.9666, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.00707276102908673, |
|
"grad_norm": 5.75712251663208, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 2.7657, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007249580054813898, |
|
"grad_norm": 5.175816535949707, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 2.9367, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.007426399080541066, |
|
"grad_norm": 5.635525703430176, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 2.8576, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007603218106268235, |
|
"grad_norm": 4.829882621765137, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 2.6487, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0077800371319954025, |
|
"grad_norm": 4.661524295806885, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 2.7636, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007956856157722571, |
|
"grad_norm": 5.161839008331299, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 2.7386, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00813367518344974, |
|
"grad_norm": 5.641641616821289, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 2.6801, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.008310494209176908, |
|
"grad_norm": 5.284437656402588, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 2.7854, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.008487313234904075, |
|
"grad_norm": 4.893701076507568, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 2.6241, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008664132260631244, |
|
"grad_norm": 5.710663795471191, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 2.6486, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.008840951286358412, |
|
"grad_norm": 5.808746337890625, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 2.8028, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008840951286358412, |
|
"eval_loss": 0.8329792022705078, |
|
"eval_runtime": 712.2685, |
|
"eval_samples_per_second": 13.373, |
|
"eval_steps_per_second": 3.344, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.15499809800192e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|