|
{ |
|
"best_metric": 1.0756638050079346, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 1.1038251366120218, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"grad_norm": 0.054219260811805725, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.0221, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"eval_loss": 1.3064125776290894, |
|
"eval_runtime": 1.5023, |
|
"eval_samples_per_second": 409.374, |
|
"eval_steps_per_second": 13.313, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 0.07374625653028488, |
|
"learning_rate": 2.32e-05, |
|
"loss": 1.13, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 0.08744122087955475, |
|
"learning_rate": 3.48e-05, |
|
"loss": 1.2471, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 0.1099563017487526, |
|
"learning_rate": 4.64e-05, |
|
"loss": 1.3512, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 0.14091312885284424, |
|
"learning_rate": 5.8e-05, |
|
"loss": 1.382, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13114754098360656, |
|
"grad_norm": 0.19244275987148285, |
|
"learning_rate": 6.96e-05, |
|
"loss": 1.524, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.15300546448087432, |
|
"grad_norm": 0.052936580032110214, |
|
"learning_rate": 8.12e-05, |
|
"loss": 1.0329, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.17486338797814208, |
|
"grad_norm": 0.06494678556919098, |
|
"learning_rate": 9.28e-05, |
|
"loss": 1.1503, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.19672131147540983, |
|
"grad_norm": 0.07551469653844833, |
|
"learning_rate": 0.0001044, |
|
"loss": 1.2085, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 0.08664041757583618, |
|
"learning_rate": 0.000116, |
|
"loss": 1.2444, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24043715846994534, |
|
"grad_norm": 0.10655322670936584, |
|
"learning_rate": 0.00011598225532067881, |
|
"loss": 1.3136, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.26229508196721313, |
|
"grad_norm": 0.14484980702400208, |
|
"learning_rate": 0.00011592903214042715, |
|
"loss": 1.3774, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.28415300546448086, |
|
"grad_norm": 0.049404121935367584, |
|
"learning_rate": 0.00011584036302573693, |
|
"loss": 0.9998, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.30601092896174864, |
|
"grad_norm": 0.05533352494239807, |
|
"learning_rate": 0.0001157163022319532, |
|
"loss": 1.077, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 0.06618451327085495, |
|
"learning_rate": 0.00011555692567007598, |
|
"loss": 1.1209, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.34972677595628415, |
|
"grad_norm": 0.07199019938707352, |
|
"learning_rate": 0.00011536233086031157, |
|
"loss": 1.2181, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.37158469945355194, |
|
"grad_norm": 0.08229127526283264, |
|
"learning_rate": 0.00011513263687240126, |
|
"loss": 1.2544, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 0.10118231177330017, |
|
"learning_rate": 0.00011486798425276428, |
|
"loss": 1.3167, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.41530054644808745, |
|
"grad_norm": 0.06382325291633606, |
|
"learning_rate": 0.00011456853493849944, |
|
"loss": 0.9757, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 0.06287430226802826, |
|
"learning_rate": 0.0001142344721582983, |
|
"loss": 1.0141, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45901639344262296, |
|
"grad_norm": 0.061046287417411804, |
|
"learning_rate": 0.00011386600032033012, |
|
"loss": 1.1142, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4808743169398907, |
|
"grad_norm": 0.05975975841283798, |
|
"learning_rate": 0.0001134633448871674, |
|
"loss": 1.172, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5027322404371585, |
|
"grad_norm": 0.06590148061513901, |
|
"learning_rate": 0.00011302675223782873, |
|
"loss": 1.1934, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5245901639344263, |
|
"grad_norm": 0.07652608305215836, |
|
"learning_rate": 0.00011255648951702296, |
|
"loss": 1.2285, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 0.11880210041999817, |
|
"learning_rate": 0.0001120528444716872, |
|
"loss": 1.2294, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5683060109289617, |
|
"grad_norm": 0.04327382519841194, |
|
"learning_rate": 0.00011151612527491878, |
|
"loss": 0.9457, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5901639344262295, |
|
"grad_norm": 0.05113707482814789, |
|
"learning_rate": 0.00011094666033740846, |
|
"loss": 1.0301, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6120218579234973, |
|
"grad_norm": 0.04633456468582153, |
|
"learning_rate": 0.00011034479810649071, |
|
"loss": 1.1369, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6338797814207651, |
|
"grad_norm": 0.052176687866449356, |
|
"learning_rate": 0.00010971090685293396, |
|
"loss": 1.1575, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 0.05911482125520706, |
|
"learning_rate": 0.00010904537444560093, |
|
"loss": 1.1915, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6775956284153005, |
|
"grad_norm": 0.08560285717248917, |
|
"learning_rate": 0.0001083486081141173, |
|
"loss": 1.1844, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6994535519125683, |
|
"grad_norm": 0.0443929098546505, |
|
"learning_rate": 0.00010762103419969393, |
|
"loss": 0.9784, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7213114754098361, |
|
"grad_norm": 0.04982827231287956, |
|
"learning_rate": 0.00010686309789425474, |
|
"loss": 1.0368, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7431693989071039, |
|
"grad_norm": 0.04613876715302467, |
|
"learning_rate": 0.00010607526296803026, |
|
"loss": 1.0534, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7650273224043715, |
|
"grad_norm": 0.04624936357140541, |
|
"learning_rate": 0.00010525801148578341, |
|
"loss": 1.1136, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 0.050727903842926025, |
|
"learning_rate": 0.000104411843511841, |
|
"loss": 1.1563, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8087431693989071, |
|
"grad_norm": 0.07218360155820847, |
|
"learning_rate": 0.00010353727680411158, |
|
"loss": 1.148, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8306010928961749, |
|
"grad_norm": 0.04049117863178253, |
|
"learning_rate": 0.00010263484649727705, |
|
"loss": 0.9096, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8524590163934426, |
|
"grad_norm": 0.0455789640545845, |
|
"learning_rate": 0.00010170510477535133, |
|
"loss": 1.0006, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 0.039463143795728683, |
|
"learning_rate": 0.00010074862053380711, |
|
"loss": 1.0411, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8961748633879781, |
|
"grad_norm": 0.042614974081516266, |
|
"learning_rate": 9.976597903147682e-05, |
|
"loss": 1.1396, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9180327868852459, |
|
"grad_norm": 0.04930881783366203, |
|
"learning_rate": 9.875778153244143e-05, |
|
"loss": 1.1744, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9398907103825137, |
|
"grad_norm": 0.06974472105503082, |
|
"learning_rate": 9.772464493812549e-05, |
|
"loss": 1.15, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9617486338797814, |
|
"grad_norm": 0.04092060774564743, |
|
"learning_rate": 9.66672014098242e-05, |
|
"loss": 0.9676, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 0.0392816998064518, |
|
"learning_rate": 9.558609798189311e-05, |
|
"loss": 1.0893, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0163934426229508, |
|
"grad_norm": 0.08897832781076431, |
|
"learning_rate": 9.448199616583707e-05, |
|
"loss": 1.8898, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0382513661202186, |
|
"grad_norm": 0.03982605040073395, |
|
"learning_rate": 9.335557154554105e-05, |
|
"loss": 0.9943, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0601092896174864, |
|
"grad_norm": 0.03858646750450134, |
|
"learning_rate": 9.220751336389013e-05, |
|
"loss": 1.0459, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0819672131147542, |
|
"grad_norm": 0.040587618947029114, |
|
"learning_rate": 9.10385241010317e-05, |
|
"loss": 1.1494, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.1038251366120218, |
|
"grad_norm": 0.052482884377241135, |
|
"learning_rate": 8.984931904453821e-05, |
|
"loss": 1.1475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1038251366120218, |
|
"eval_loss": 1.0756638050079346, |
|
"eval_runtime": 1.9721, |
|
"eval_samples_per_second": 311.855, |
|
"eval_steps_per_second": 10.142, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 137, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8231094746742784e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|