|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 284.7457627118644, |
|
"eval_steps": 1500, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 4.764285714285715e-05, |
|
"loss": 0.0462, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 4.5261904761904766e-05, |
|
"loss": 0.0281, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 4.2880952380952384e-05, |
|
"loss": 0.0283, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.0289, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 3.811904761904762e-05, |
|
"loss": 0.018, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 81.36, |
|
"learning_rate": 3.573809523809524e-05, |
|
"loss": 0.0269, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 94.92, |
|
"learning_rate": 3.3357142857142856e-05, |
|
"loss": 0.0191, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 108.47, |
|
"learning_rate": 3.0976190476190474e-05, |
|
"loss": 0.0211, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 122.03, |
|
"learning_rate": 2.85952380952381e-05, |
|
"loss": 0.0214, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 135.59, |
|
"learning_rate": 2.6214285714285713e-05, |
|
"loss": 0.0272, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 149.15, |
|
"learning_rate": 2.3833333333333334e-05, |
|
"loss": 0.0217, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 162.71, |
|
"learning_rate": 2.1452380952380956e-05, |
|
"loss": 0.0207, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 176.27, |
|
"learning_rate": 1.9071428571428574e-05, |
|
"loss": 0.0192, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 189.83, |
|
"learning_rate": 1.669047619047619e-05, |
|
"loss": 0.0164, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 203.39, |
|
"learning_rate": 1.4309523809523811e-05, |
|
"loss": 0.0194, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 203.39, |
|
"eval_accuracy": 0.9677419066429138, |
|
"eval_loss": 0.115411177277565, |
|
"eval_runtime": 1.4468, |
|
"eval_samples_per_second": 64.281, |
|
"eval_steps_per_second": 8.294, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 216.95, |
|
"learning_rate": 1.192857142857143e-05, |
|
"loss": 0.0177, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 230.51, |
|
"learning_rate": 9.547619047619049e-06, |
|
"loss": 0.0168, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 244.07, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.0171, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 257.63, |
|
"learning_rate": 4.785714285714286e-06, |
|
"loss": 0.0179, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 271.19, |
|
"learning_rate": 2.404761904761905e-06, |
|
"loss": 0.0171, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 284.75, |
|
"learning_rate": 2.380952380952381e-08, |
|
"loss": 0.0163, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 284.75, |
|
"step": 2100, |
|
"total_flos": 4.0318045822521795e+18, |
|
"train_loss": 0.022167698939641316, |
|
"train_runtime": 3842.9627, |
|
"train_samples_per_second": 36.378, |
|
"train_steps_per_second": 0.546 |
|
}, |
|
{ |
|
"epoch": 284.75, |
|
"eval_accuracy": 0.9677419066429138, |
|
"eval_loss": 0.11643270403146744, |
|
"eval_runtime": 1.6786, |
|
"eval_samples_per_second": 55.402, |
|
"eval_steps_per_second": 7.149, |
|
"step": 2100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2100, |
|
"num_train_epochs": 300, |
|
"save_steps": 2000, |
|
"total_flos": 4.0318045822521795e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|