|
{ |
|
"best_metric": 0.6465714573860168, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.07072135785007072, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014144271570014145, |
|
"grad_norm": 30.358135223388672, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 5.9636, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014144271570014145, |
|
"eval_loss": 1.5057185888290405, |
|
"eval_runtime": 65.8108, |
|
"eval_samples_per_second": 144.733, |
|
"eval_steps_per_second": 4.528, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002828854314002829, |
|
"grad_norm": 31.44856071472168, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 6.0251, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004243281471004243, |
|
"grad_norm": 22.16029930114746, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 5.5536, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005657708628005658, |
|
"grad_norm": 17.20639991760254, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 4.7391, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007072135785007072, |
|
"grad_norm": 12.612313270568848, |
|
"learning_rate": 5.05e-05, |
|
"loss": 4.1681, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008486562942008486, |
|
"grad_norm": 8.703054428100586, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 3.637, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009900990099009901, |
|
"grad_norm": 9.403382301330566, |
|
"learning_rate": 7.07e-05, |
|
"loss": 3.7323, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011315417256011316, |
|
"grad_norm": 12.620731353759766, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 3.6018, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01272984441301273, |
|
"grad_norm": 8.057601928710938, |
|
"learning_rate": 9.09e-05, |
|
"loss": 3.4366, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014144271570014143, |
|
"grad_norm": 4.481306552886963, |
|
"learning_rate": 0.000101, |
|
"loss": 3.2684, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015558698727015558, |
|
"grad_norm": 4.987090110778809, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 3.1242, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.016973125884016973, |
|
"grad_norm": 4.0390849113464355, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 2.8276, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.018387553041018388, |
|
"grad_norm": 4.3538432121276855, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 3.0122, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.019801980198019802, |
|
"grad_norm": 3.8758561611175537, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 3.1345, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.021216407355021217, |
|
"grad_norm": 4.682952880859375, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 3.0093, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02263083451202263, |
|
"grad_norm": 3.6436381340026855, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 2.8878, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.024045261669024046, |
|
"grad_norm": 3.1359572410583496, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 2.8174, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02545968882602546, |
|
"grad_norm": 3.475654363632202, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 2.6673, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.026874115983026876, |
|
"grad_norm": 3.38742733001709, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 2.8075, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.028288543140028287, |
|
"grad_norm": 6.014629364013672, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 3.0922, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0297029702970297, |
|
"grad_norm": 3.0035653114318848, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 2.9599, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.031117397454031116, |
|
"grad_norm": 3.9666907787323, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 2.8904, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03253182461103253, |
|
"grad_norm": 3.315082550048828, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 2.7768, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.033946251768033946, |
|
"grad_norm": 2.5618789196014404, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 2.6475, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03536067892503536, |
|
"grad_norm": 2.922630548477173, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 2.5942, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.036775106082036775, |
|
"grad_norm": 4.30806827545166, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 3.0147, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03818953323903819, |
|
"grad_norm": 2.8271915912628174, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 2.8583, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.039603960396039604, |
|
"grad_norm": 2.8441073894500732, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 2.7133, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04101838755304102, |
|
"grad_norm": 2.657393455505371, |
|
"learning_rate": 9.09e-05, |
|
"loss": 2.6705, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.042432814710042434, |
|
"grad_norm": 2.526547908782959, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 2.6752, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04384724186704385, |
|
"grad_norm": 2.5309224128723145, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 2.6006, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04526166902404526, |
|
"grad_norm": 3.246652841567993, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 2.926, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04667609618104668, |
|
"grad_norm": 2.7534706592559814, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 2.8862, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04809052333804809, |
|
"grad_norm": 2.599621295928955, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 2.7434, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04950495049504951, |
|
"grad_norm": 2.4461135864257812, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 2.6901, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05091937765205092, |
|
"grad_norm": 2.444023370742798, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 2.5766, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05233380480905234, |
|
"grad_norm": 2.3293182849884033, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 2.4633, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05374823196605375, |
|
"grad_norm": 2.4258995056152344, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 2.6781, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.055162659123055166, |
|
"grad_norm": 3.006838083267212, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 2.8319, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.056577086280056574, |
|
"grad_norm": 2.496774673461914, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 2.644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05799151343705799, |
|
"grad_norm": 2.602163076400757, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 2.6905, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0594059405940594, |
|
"grad_norm": 2.52225661277771, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 2.5921, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06082036775106082, |
|
"grad_norm": 2.3774173259735107, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 2.4794, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06223479490806223, |
|
"grad_norm": 2.1766254901885986, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 2.5138, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06364922206506365, |
|
"grad_norm": 2.6760802268981934, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 2.7873, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06506364922206506, |
|
"grad_norm": 2.3476643562316895, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 2.6749, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06647807637906648, |
|
"grad_norm": 2.3106589317321777, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 2.5956, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06789250353606789, |
|
"grad_norm": 2.198598861694336, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 2.5047, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06930693069306931, |
|
"grad_norm": 2.2239043712615967, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 2.5194, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07072135785007072, |
|
"grad_norm": 2.4284136295318604, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 2.4291, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07072135785007072, |
|
"eval_loss": 0.6465714573860168, |
|
"eval_runtime": 65.9647, |
|
"eval_samples_per_second": 144.395, |
|
"eval_steps_per_second": 4.518, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.723998478401536e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|