|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.003463755348224675, |
|
"eval_steps": 9, |
|
"global_step": 36, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.621542633957431e-05, |
|
"eval_loss": 3.1601955890655518, |
|
"eval_runtime": 13080.0915, |
|
"eval_samples_per_second": 2.677, |
|
"eval_steps_per_second": 1.338, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00028864627901872294, |
|
"grad_norm": 88.87578582763672, |
|
"learning_rate": 3e-05, |
|
"loss": 100.51, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005772925580374459, |
|
"grad_norm": 85.21216583251953, |
|
"learning_rate": 6e-05, |
|
"loss": 96.5347, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008659388370561688, |
|
"grad_norm": 77.13072967529297, |
|
"learning_rate": 9e-05, |
|
"loss": 80.7471, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008659388370561688, |
|
"eval_loss": 2.1278481483459473, |
|
"eval_runtime": 13090.1143, |
|
"eval_samples_per_second": 2.675, |
|
"eval_steps_per_second": 1.337, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0011545851160748917, |
|
"grad_norm": 68.87947845458984, |
|
"learning_rate": 0.00012, |
|
"loss": 64.7623, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0014432313950936146, |
|
"grad_norm": 43.136959075927734, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 44.3736, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0017318776741123375, |
|
"grad_norm": 41.73539352416992, |
|
"learning_rate": 0.00018, |
|
"loss": 31.1364, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0017318776741123375, |
|
"eval_loss": 0.844988226890564, |
|
"eval_runtime": 13063.393, |
|
"eval_samples_per_second": 2.68, |
|
"eval_steps_per_second": 1.34, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0020205239531310604, |
|
"grad_norm": 37.0665397644043, |
|
"learning_rate": 0.0001999229036240723, |
|
"loss": 25.9659, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0023091702321497835, |
|
"grad_norm": 32.10556411743164, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 19.8184, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002597816511168506, |
|
"grad_norm": 39.13405990600586, |
|
"learning_rate": 0.00019624552364536473, |
|
"loss": 15.5343, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002597816511168506, |
|
"eval_loss": 0.4686388671398163, |
|
"eval_runtime": 11313.8601, |
|
"eval_samples_per_second": 3.094, |
|
"eval_steps_per_second": 1.547, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0028864627901872292, |
|
"grad_norm": 29.395793914794922, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 14.8443, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003175109069205952, |
|
"grad_norm": 27.154170989990234, |
|
"learning_rate": 0.00018724960070727972, |
|
"loss": 13.3753, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.003463755348224675, |
|
"grad_norm": 23.787752151489258, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 10.3941, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.003463755348224675, |
|
"eval_loss": 0.3380902111530304, |
|
"eval_runtime": 10045.2845, |
|
"eval_samples_per_second": 3.485, |
|
"eval_steps_per_second": 1.743, |
|
"step": 36 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8932962735580774e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|