|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.061977068484660676, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0012395413696932136, |
|
"eval_loss": 3.160734176635742, |
|
"eval_runtime": 70.7744, |
|
"eval_samples_per_second": 4.804, |
|
"eval_steps_per_second": 2.402, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006197706848466067, |
|
"grad_norm": 1.6868420839309692, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0013, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012395413696932134, |
|
"grad_norm": 1.7511683702468872, |
|
"learning_rate": 0.0001, |
|
"loss": 3.0908, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012395413696932134, |
|
"eval_loss": 2.564448356628418, |
|
"eval_runtime": 71.7863, |
|
"eval_samples_per_second": 4.736, |
|
"eval_steps_per_second": 2.368, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018593120545398203, |
|
"grad_norm": 1.5322659015655518, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 2.2162, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02479082739386427, |
|
"grad_norm": 1.4782131910324097, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.7327, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02479082739386427, |
|
"eval_loss": 1.4430201053619385, |
|
"eval_runtime": 71.9238, |
|
"eval_samples_per_second": 4.727, |
|
"eval_steps_per_second": 2.364, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030988534242330338, |
|
"grad_norm": 1.605191707611084, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.2809, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03718624109079641, |
|
"grad_norm": 1.7237857580184937, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2256, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03718624109079641, |
|
"eval_loss": 1.1801698207855225, |
|
"eval_runtime": 71.9099, |
|
"eval_samples_per_second": 4.728, |
|
"eval_steps_per_second": 2.364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04338394793926247, |
|
"grad_norm": 1.2960554361343384, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.0238, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04958165478772854, |
|
"grad_norm": 1.3113014698028564, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.0754, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04958165478772854, |
|
"eval_loss": 1.103080153465271, |
|
"eval_runtime": 72.0279, |
|
"eval_samples_per_second": 4.72, |
|
"eval_steps_per_second": 2.36, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05577936163619461, |
|
"grad_norm": 1.3435271978378296, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.9994, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.061977068484660676, |
|
"grad_norm": 1.486733317375183, |
|
"learning_rate": 0.0, |
|
"loss": 1.0947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.061977068484660676, |
|
"eval_loss": 1.0898029804229736, |
|
"eval_runtime": 72.0292, |
|
"eval_samples_per_second": 4.72, |
|
"eval_steps_per_second": 2.36, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6061930309419008e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|