|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.08991310918020401, |
|
"eval_steps": 17, |
|
"global_step": 119, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007555723460521345, |
|
"eval_loss": 12.450613975524902, |
|
"eval_runtime": 61.6454, |
|
"eval_samples_per_second": 36.175, |
|
"eval_steps_per_second": 4.526, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022667170381564035, |
|
"grad_norm": 0.009174207225441933, |
|
"learning_rate": 3e-05, |
|
"loss": 12.453, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004533434076312807, |
|
"grad_norm": 0.009654853492975235, |
|
"learning_rate": 6e-05, |
|
"loss": 12.4526, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00680015111446921, |
|
"grad_norm": 0.008986242115497589, |
|
"learning_rate": 9e-05, |
|
"loss": 12.4507, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009066868152625614, |
|
"grad_norm": 0.009755842387676239, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 12.4496, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011333585190782017, |
|
"grad_norm": 0.011493120342493057, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 12.4487, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012844729882886286, |
|
"eval_loss": 12.450437545776367, |
|
"eval_runtime": 61.7871, |
|
"eval_samples_per_second": 36.092, |
|
"eval_steps_per_second": 4.516, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01360030222893842, |
|
"grad_norm": 0.009524165652692318, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 12.454, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015867019267094825, |
|
"grad_norm": 0.012806428596377373, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 12.4518, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.018133736305251228, |
|
"grad_norm": 0.011713715270161629, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 12.4506, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02040045334340763, |
|
"grad_norm": 0.010967453010380268, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 12.4491, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022667170381564034, |
|
"grad_norm": 0.0131283700466156, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 12.4512, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.024933887419720437, |
|
"grad_norm": 0.012522528879344463, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 12.4509, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02568945976577257, |
|
"eval_loss": 12.45008373260498, |
|
"eval_runtime": 61.8082, |
|
"eval_samples_per_second": 36.079, |
|
"eval_steps_per_second": 4.514, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02720060445787684, |
|
"grad_norm": 0.013970930129289627, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 12.4541, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.029467321496033247, |
|
"grad_norm": 0.014797762967646122, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 12.4476, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03173403853418965, |
|
"grad_norm": 0.01638209819793701, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 12.4498, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03400075557234605, |
|
"grad_norm": 0.017414981499314308, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 12.4489, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.036267472610502456, |
|
"grad_norm": 0.01976948417723179, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 12.4498, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03853418964865886, |
|
"grad_norm": 0.017705973237752914, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 12.4478, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03853418964865886, |
|
"eval_loss": 12.449511528015137, |
|
"eval_runtime": 61.8317, |
|
"eval_samples_per_second": 36.066, |
|
"eval_steps_per_second": 4.512, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04080090668681526, |
|
"grad_norm": 0.018775586038827896, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 12.4476, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.043067623724971665, |
|
"grad_norm": 0.020082606002688408, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 12.4519, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04533434076312807, |
|
"grad_norm": 0.01945601962506771, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 12.4513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04760105780128447, |
|
"grad_norm": 0.02504323236644268, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 12.451, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.049867774839440875, |
|
"grad_norm": 0.02477116324007511, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 12.4508, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05137891953154514, |
|
"eval_loss": 12.448628425598145, |
|
"eval_runtime": 61.7963, |
|
"eval_samples_per_second": 36.086, |
|
"eval_steps_per_second": 4.515, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05213449187759728, |
|
"grad_norm": 0.02452562004327774, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 12.4501, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05440120891575368, |
|
"grad_norm": 0.024608276784420013, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 12.449, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.056667925953910084, |
|
"grad_norm": 0.025729818269610405, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 12.4491, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.058934642992066494, |
|
"grad_norm": 0.026810096576809883, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 12.4492, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0612013600302229, |
|
"grad_norm": 0.030193008482456207, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 12.4488, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0634680770683793, |
|
"grad_norm": 0.0322081558406353, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 12.4457, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06422364941443143, |
|
"eval_loss": 12.447479248046875, |
|
"eval_runtime": 61.6601, |
|
"eval_samples_per_second": 36.166, |
|
"eval_steps_per_second": 4.525, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0657347941065357, |
|
"grad_norm": 0.03805829957127571, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 12.4443, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0680015111446921, |
|
"grad_norm": 0.03595505282282829, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 12.4494, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0702682281828485, |
|
"grad_norm": 0.03157583624124527, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 12.446, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07253494522100491, |
|
"grad_norm": 0.03606283292174339, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 12.4462, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07480166225916131, |
|
"grad_norm": 0.03671179711818695, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 12.4473, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07706837929731772, |
|
"grad_norm": 0.042302753776311874, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 12.4479, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07706837929731772, |
|
"eval_loss": 12.446091651916504, |
|
"eval_runtime": 61.8106, |
|
"eval_samples_per_second": 36.078, |
|
"eval_steps_per_second": 4.514, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07933509633547413, |
|
"grad_norm": 0.04813135042786598, |
|
"learning_rate": 5e-05, |
|
"loss": 12.4455, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08160181337363052, |
|
"grad_norm": 0.046277038753032684, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 12.4466, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08386853041178693, |
|
"grad_norm": 0.046285875141620636, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 12.4448, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08613524744994333, |
|
"grad_norm": 0.051637545228004456, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 12.4436, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08840196448809974, |
|
"grad_norm": 0.04436314478516579, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 12.4446, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08991310918020401, |
|
"eval_loss": 12.444755554199219, |
|
"eval_runtime": 61.8074, |
|
"eval_samples_per_second": 36.08, |
|
"eval_steps_per_second": 4.514, |
|
"step": 119 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 17, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 263272267776.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|