|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.029448575425163808, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00029448575425163807, |
|
"eval_loss": 2.0751140117645264, |
|
"eval_runtime": 395.9969, |
|
"eval_samples_per_second": 14.445, |
|
"eval_steps_per_second": 1.806, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008834572627549142, |
|
"grad_norm": 45.21540451049805, |
|
"learning_rate": 1.5e-05, |
|
"loss": 8.1698, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0017669145255098284, |
|
"grad_norm": 29.80286407470703, |
|
"learning_rate": 3e-05, |
|
"loss": 7.8228, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002650371788264743, |
|
"grad_norm": 14.917160034179688, |
|
"learning_rate": 4.5e-05, |
|
"loss": 5.8253, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.002650371788264743, |
|
"eval_loss": 1.2880661487579346, |
|
"eval_runtime": 398.9272, |
|
"eval_samples_per_second": 14.338, |
|
"eval_steps_per_second": 1.792, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003533829051019657, |
|
"grad_norm": 12.576233863830566, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 4.4652, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004417286313774571, |
|
"grad_norm": 12.087996482849121, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 4.1895, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005300743576529486, |
|
"grad_norm": 9.449481964111328, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 3.6443, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005300743576529486, |
|
"eval_loss": 0.9807960987091064, |
|
"eval_runtime": 398.492, |
|
"eval_samples_per_second": 14.354, |
|
"eval_steps_per_second": 1.794, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0061842008392844, |
|
"grad_norm": 8.682868003845215, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 3.8094, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.007067658102039314, |
|
"grad_norm": 7.913160800933838, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 3.8054, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.007951115364794228, |
|
"grad_norm": 9.07978343963623, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 3.5453, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.007951115364794228, |
|
"eval_loss": 0.9140304327011108, |
|
"eval_runtime": 398.4209, |
|
"eval_samples_per_second": 14.357, |
|
"eval_steps_per_second": 1.795, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.008834572627549142, |
|
"grad_norm": 8.685811042785645, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 3.5354, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009718029890304056, |
|
"grad_norm": 7.87326192855835, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 3.519, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.010601487153058971, |
|
"grad_norm": 8.163304328918457, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 3.73, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.010601487153058971, |
|
"eval_loss": 0.8961595892906189, |
|
"eval_runtime": 398.4206, |
|
"eval_samples_per_second": 14.357, |
|
"eval_steps_per_second": 1.795, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.011484944415813885, |
|
"grad_norm": 8.32168960571289, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 3.6521, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0123684016785688, |
|
"grad_norm": 5.999353885650635, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 3.7375, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013251858941323713, |
|
"grad_norm": 6.515491485595703, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 3.1763, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.013251858941323713, |
|
"eval_loss": 0.886064887046814, |
|
"eval_runtime": 398.3365, |
|
"eval_samples_per_second": 14.36, |
|
"eval_steps_per_second": 1.795, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.014135316204078627, |
|
"grad_norm": 7.323680400848389, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 3.6858, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.015018773466833541, |
|
"grad_norm": 8.582962036132812, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 3.484, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.015902230729588455, |
|
"grad_norm": 7.271780490875244, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 3.4188, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.015902230729588455, |
|
"eval_loss": 0.8784601092338562, |
|
"eval_runtime": 398.3464, |
|
"eval_samples_per_second": 14.359, |
|
"eval_steps_per_second": 1.795, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01678568799234337, |
|
"grad_norm": 7.015294075012207, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 3.6007, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.017669145255098283, |
|
"grad_norm": 8.451822280883789, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 3.2948, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0185526025178532, |
|
"grad_norm": 7.4288129806518555, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 3.4584, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0185526025178532, |
|
"eval_loss": 0.872480034828186, |
|
"eval_runtime": 398.1418, |
|
"eval_samples_per_second": 14.367, |
|
"eval_steps_per_second": 1.796, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01943605978060811, |
|
"grad_norm": 6.286691188812256, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 3.5261, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.020319517043363027, |
|
"grad_norm": 7.259737014770508, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 3.9765, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.021202974306117943, |
|
"grad_norm": 7.015435695648193, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 3.3159, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.021202974306117943, |
|
"eval_loss": 0.8712356090545654, |
|
"eval_runtime": 398.3235, |
|
"eval_samples_per_second": 14.36, |
|
"eval_steps_per_second": 1.795, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.022086431568872855, |
|
"grad_norm": 6.05751895904541, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 3.464, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02296988883162777, |
|
"grad_norm": 6.652122974395752, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 3.3039, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.023853346094382683, |
|
"grad_norm": 6.0785136222839355, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 3.3843, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.023853346094382683, |
|
"eval_loss": 0.8678173422813416, |
|
"eval_runtime": 398.2552, |
|
"eval_samples_per_second": 14.363, |
|
"eval_steps_per_second": 1.795, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0247368033571376, |
|
"grad_norm": 6.711858749389648, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 3.6375, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02562026061989251, |
|
"grad_norm": 6.864299297332764, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 3.1301, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.026503717882647427, |
|
"grad_norm": 7.26350736618042, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 3.1821, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.026503717882647427, |
|
"eval_loss": 0.8663612008094788, |
|
"eval_runtime": 398.3373, |
|
"eval_samples_per_second": 14.36, |
|
"eval_steps_per_second": 1.795, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.027387175145402343, |
|
"grad_norm": 6.898801326751709, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 3.1039, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.028270632408157255, |
|
"grad_norm": 6.446213722229004, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 3.1602, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02915408967091217, |
|
"grad_norm": 6.621945381164551, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 3.3198, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.02915408967091217, |
|
"eval_loss": 0.8659896850585938, |
|
"eval_runtime": 398.2351, |
|
"eval_samples_per_second": 14.363, |
|
"eval_steps_per_second": 1.795, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.406877472653312e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|