|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.41450777202072536, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002072538860103627, |
|
"eval_loss": 0.7757489085197449, |
|
"eval_runtime": 21.4463, |
|
"eval_samples_per_second": 9.512, |
|
"eval_steps_per_second": 4.756, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02072538860103627, |
|
"grad_norm": 2.0602431297302246, |
|
"learning_rate": 0.00019967573081342103, |
|
"loss": 2.9893, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04145077720207254, |
|
"grad_norm": 0.9440550804138184, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 2.4423, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06217616580310881, |
|
"grad_norm": 1.6078230142593384, |
|
"learning_rate": 0.00019199794436588243, |
|
"loss": 2.5943, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08290155440414508, |
|
"grad_norm": 0.8488917350769043, |
|
"learning_rate": 0.0001845190085543795, |
|
"loss": 2.5973, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10362694300518134, |
|
"grad_norm": 0.5597093105316162, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 2.4491, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10362694300518134, |
|
"eval_loss": 0.6410841345787048, |
|
"eval_runtime": 21.4898, |
|
"eval_samples_per_second": 9.493, |
|
"eval_steps_per_second": 4.746, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12435233160621761, |
|
"grad_norm": 0.7345647811889648, |
|
"learning_rate": 0.00016324453755953773, |
|
"loss": 2.4479, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14507772020725387, |
|
"grad_norm": 0.7027221322059631, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 2.5233, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16580310880829016, |
|
"grad_norm": 0.6218558549880981, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 2.3496, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18652849740932642, |
|
"grad_norm": 0.7521792650222778, |
|
"learning_rate": 0.00012000256937760445, |
|
"loss": 2.5815, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"grad_norm": 0.5388895273208618, |
|
"learning_rate": 0.00010402659401094152, |
|
"loss": 2.4037, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"eval_loss": 0.6285238265991211, |
|
"eval_runtime": 21.468, |
|
"eval_samples_per_second": 9.503, |
|
"eval_steps_per_second": 4.751, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22797927461139897, |
|
"grad_norm": 0.8191409707069397, |
|
"learning_rate": 8.79463319744677e-05, |
|
"loss": 2.3543, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24870466321243523, |
|
"grad_norm": 0.9439878463745117, |
|
"learning_rate": 7.217825360835473e-05, |
|
"loss": 2.5457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2694300518134715, |
|
"grad_norm": 0.7924945950508118, |
|
"learning_rate": 5.713074385969457e-05, |
|
"loss": 2.6822, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29015544041450775, |
|
"grad_norm": 0.7882089614868164, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"loss": 2.5139, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31088082901554404, |
|
"grad_norm": 0.8719218373298645, |
|
"learning_rate": 3.072756464904006e-05, |
|
"loss": 2.3399, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31088082901554404, |
|
"eval_loss": 0.6230465173721313, |
|
"eval_runtime": 21.4544, |
|
"eval_samples_per_second": 9.509, |
|
"eval_steps_per_second": 4.754, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3316062176165803, |
|
"grad_norm": 0.7077692151069641, |
|
"learning_rate": 2.0055723659649904e-05, |
|
"loss": 2.5644, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35233160621761656, |
|
"grad_norm": 0.6462181210517883, |
|
"learning_rate": 1.1454397434679021e-05, |
|
"loss": 2.3797, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37305699481865284, |
|
"grad_norm": 0.9239203929901123, |
|
"learning_rate": 5.146355805285452e-06, |
|
"loss": 2.4447, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39378238341968913, |
|
"grad_norm": 1.950250506401062, |
|
"learning_rate": 1.2949737362087156e-06, |
|
"loss": 2.3303, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41450777202072536, |
|
"grad_norm": 0.7704319357872009, |
|
"learning_rate": 0.0, |
|
"loss": 2.3666, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41450777202072536, |
|
"eval_loss": 0.6215264797210693, |
|
"eval_runtime": 21.4235, |
|
"eval_samples_per_second": 9.522, |
|
"eval_steps_per_second": 4.761, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.301191701037056e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|