|
{ |
|
"best_metric": 1.5350492000579834, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.1718213058419244, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000859106529209622, |
|
"eval_loss": 2.4528794288635254, |
|
"eval_runtime": 36.5626, |
|
"eval_samples_per_second": 13.402, |
|
"eval_steps_per_second": 3.364, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00859106529209622, |
|
"grad_norm": 3.842317581176758, |
|
"learning_rate": 5.05e-06, |
|
"loss": 2.3208, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01718213058419244, |
|
"grad_norm": 1.7511423826217651, |
|
"learning_rate": 1.01e-05, |
|
"loss": 2.1038, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02577319587628866, |
|
"grad_norm": 0.6390647292137146, |
|
"learning_rate": 9.538888888888889e-06, |
|
"loss": 1.6949, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03436426116838488, |
|
"grad_norm": 0.8226912617683411, |
|
"learning_rate": 8.977777777777778e-06, |
|
"loss": 1.6081, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0429553264604811, |
|
"grad_norm": 1.4054566621780396, |
|
"learning_rate": 8.416666666666667e-06, |
|
"loss": 1.7309, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0429553264604811, |
|
"eval_loss": 1.6015360355377197, |
|
"eval_runtime": 36.8118, |
|
"eval_samples_per_second": 13.311, |
|
"eval_steps_per_second": 3.341, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05154639175257732, |
|
"grad_norm": 0.49393361806869507, |
|
"learning_rate": 7.855555555555556e-06, |
|
"loss": 1.5143, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06013745704467354, |
|
"grad_norm": 0.4182685911655426, |
|
"learning_rate": 7.294444444444444e-06, |
|
"loss": 1.5843, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06872852233676977, |
|
"grad_norm": 0.5429704785346985, |
|
"learning_rate": 6.733333333333333e-06, |
|
"loss": 1.5534, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07731958762886598, |
|
"grad_norm": 0.7752597332000732, |
|
"learning_rate": 6.172222222222223e-06, |
|
"loss": 1.5595, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0859106529209622, |
|
"grad_norm": 1.1356843709945679, |
|
"learning_rate": 5.611111111111111e-06, |
|
"loss": 1.6458, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0859106529209622, |
|
"eval_loss": 1.5543313026428223, |
|
"eval_runtime": 36.4615, |
|
"eval_samples_per_second": 13.439, |
|
"eval_steps_per_second": 3.373, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09450171821305842, |
|
"grad_norm": 0.41234856843948364, |
|
"learning_rate": 5.05e-06, |
|
"loss": 1.4694, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 0.45751872658729553, |
|
"learning_rate": 4.488888888888889e-06, |
|
"loss": 1.5459, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11168384879725086, |
|
"grad_norm": 0.4848977327346802, |
|
"learning_rate": 3.927777777777778e-06, |
|
"loss": 1.4818, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12027491408934708, |
|
"grad_norm": 0.6670855283737183, |
|
"learning_rate": 3.3666666666666665e-06, |
|
"loss": 1.5387, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12886597938144329, |
|
"grad_norm": 1.1316231489181519, |
|
"learning_rate": 2.8055555555555555e-06, |
|
"loss": 1.5887, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12886597938144329, |
|
"eval_loss": 1.5382037162780762, |
|
"eval_runtime": 36.6812, |
|
"eval_samples_per_second": 13.358, |
|
"eval_steps_per_second": 3.353, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13745704467353953, |
|
"grad_norm": 0.4882570207118988, |
|
"learning_rate": 2.2444444444444445e-06, |
|
"loss": 1.4184, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14604810996563575, |
|
"grad_norm": 0.4242735207080841, |
|
"learning_rate": 1.6833333333333332e-06, |
|
"loss": 1.5099, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15463917525773196, |
|
"grad_norm": 0.5443804860115051, |
|
"learning_rate": 1.1222222222222222e-06, |
|
"loss": 1.4786, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16323024054982818, |
|
"grad_norm": 0.6373876929283142, |
|
"learning_rate": 5.611111111111111e-07, |
|
"loss": 1.5083, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"grad_norm": 1.4106392860412598, |
|
"learning_rate": 0.0, |
|
"loss": 1.5946, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"eval_loss": 1.5350492000579834, |
|
"eval_runtime": 36.6428, |
|
"eval_samples_per_second": 13.372, |
|
"eval_steps_per_second": 3.357, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.925036081152e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|