|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0060779189205616, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000121558378411232, |
|
"grad_norm": 1.3522109985351562, |
|
"learning_rate": 2e-05, |
|
"loss": 9.0768, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000121558378411232, |
|
"eval_loss": 2.5203659534454346, |
|
"eval_runtime": 130.544, |
|
"eval_samples_per_second": 26.535, |
|
"eval_steps_per_second": 13.268, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000243116756822464, |
|
"grad_norm": 0.9176049828529358, |
|
"learning_rate": 4e-05, |
|
"loss": 8.9894, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.000364675135233696, |
|
"grad_norm": 1.0146596431732178, |
|
"learning_rate": 6e-05, |
|
"loss": 9.935, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000486233513644928, |
|
"grad_norm": 1.0469788312911987, |
|
"learning_rate": 8e-05, |
|
"loss": 9.3424, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00060779189205616, |
|
"grad_norm": 0.8254321813583374, |
|
"learning_rate": 0.0001, |
|
"loss": 8.339, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.000729350270467392, |
|
"grad_norm": 1.032138705253601, |
|
"learning_rate": 0.00012, |
|
"loss": 9.6692, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.000850908648878624, |
|
"grad_norm": 1.692064881324768, |
|
"learning_rate": 0.00014, |
|
"loss": 9.8725, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.000972467027289856, |
|
"grad_norm": 1.5163041353225708, |
|
"learning_rate": 0.00016, |
|
"loss": 9.3607, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.001094025405701088, |
|
"grad_norm": 3.152874231338501, |
|
"learning_rate": 0.00018, |
|
"loss": 9.9624, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00121558378411232, |
|
"grad_norm": 1.7264574766159058, |
|
"learning_rate": 0.0002, |
|
"loss": 10.4001, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0013371421625235518, |
|
"grad_norm": 4.396702766418457, |
|
"learning_rate": 0.0001999863304992469, |
|
"loss": 8.8946, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.001458700540934784, |
|
"grad_norm": 1.9022140502929688, |
|
"learning_rate": 0.00019994532573409262, |
|
"loss": 10.3819, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.001580258919346016, |
|
"grad_norm": 2.1845006942749023, |
|
"learning_rate": 0.00019987699691483048, |
|
"loss": 9.6356, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.001701817297757248, |
|
"grad_norm": 2.10140061378479, |
|
"learning_rate": 0.00019978136272187747, |
|
"loss": 9.6127, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0018233756761684798, |
|
"grad_norm": 2.068610668182373, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 11.08, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001944934054579712, |
|
"grad_norm": 3.655803680419922, |
|
"learning_rate": 0.00019950829025450114, |
|
"loss": 10.0545, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.002066492432990944, |
|
"grad_norm": 1.8618426322937012, |
|
"learning_rate": 0.00019933092663536382, |
|
"loss": 9.6545, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002188050811402176, |
|
"grad_norm": 1.8773517608642578, |
|
"learning_rate": 0.00019912640693269752, |
|
"loss": 7.7819, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002309609189813408, |
|
"grad_norm": 2.198024272918701, |
|
"learning_rate": 0.00019889478706014687, |
|
"loss": 9.8384, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00243116756822464, |
|
"grad_norm": 2.1132476329803467, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 8.5882, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0025527259466358717, |
|
"grad_norm": 2.646947145462036, |
|
"learning_rate": 0.00019835050748723824, |
|
"loss": 9.4244, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0026742843250471037, |
|
"grad_norm": 2.169708728790283, |
|
"learning_rate": 0.00019803799658748094, |
|
"loss": 8.9, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.002795842703458336, |
|
"grad_norm": 4.217128276824951, |
|
"learning_rate": 0.00019769868307835994, |
|
"loss": 8.5476, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002917401081869568, |
|
"grad_norm": 1.9587572813034058, |
|
"learning_rate": 0.0001973326597248006, |
|
"loss": 9.5256, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0030389594602808, |
|
"grad_norm": 4.422936916351318, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 8.403, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.003160517838692032, |
|
"grad_norm": 1.6125566959381104, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 9.2696, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.003282076217103264, |
|
"grad_norm": 1.9092199802398682, |
|
"learning_rate": 0.00019607536761368484, |
|
"loss": 9.0772, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.003403634595514496, |
|
"grad_norm": 3.2192418575286865, |
|
"learning_rate": 0.00019560357815343577, |
|
"loss": 9.7968, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0035251929739257277, |
|
"grad_norm": 2.5830280780792236, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 9.7017, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0036467513523369597, |
|
"grad_norm": 2.7067151069641113, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 10.3412, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0037683097307481916, |
|
"grad_norm": 3.9291067123413086, |
|
"learning_rate": 0.00019403193901161613, |
|
"loss": 9.8539, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003889868109159424, |
|
"grad_norm": 1.7529088258743286, |
|
"learning_rate": 0.0001934564464599461, |
|
"loss": 8.7478, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004011426487570656, |
|
"grad_norm": 1.7117114067077637, |
|
"learning_rate": 0.00019285540384897073, |
|
"loss": 8.765, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.004132984865981888, |
|
"grad_norm": 1.856972575187683, |
|
"learning_rate": 0.00019222897549773848, |
|
"loss": 9.777, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00425454324439312, |
|
"grad_norm": 2.3596813678741455, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 9.5862, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004376101622804352, |
|
"grad_norm": 2.5083954334259033, |
|
"learning_rate": 0.00019090065350491626, |
|
"loss": 9.3247, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.004497660001215584, |
|
"grad_norm": 1.3706068992614746, |
|
"learning_rate": 0.00019019912301329592, |
|
"loss": 9.3214, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.004619218379626816, |
|
"grad_norm": 1.7020275592803955, |
|
"learning_rate": 0.00018947293298207635, |
|
"loss": 8.5611, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004740776758038048, |
|
"grad_norm": 1.8647181987762451, |
|
"learning_rate": 0.0001887222819443612, |
|
"loss": 9.9951, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.00486233513644928, |
|
"grad_norm": 2.506580352783203, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 9.6237, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0049838935148605115, |
|
"grad_norm": 1.9629899263381958, |
|
"learning_rate": 0.00018714842436272773, |
|
"loss": 9.371, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0051054518932717435, |
|
"grad_norm": 2.3143656253814697, |
|
"learning_rate": 0.00018632564809575742, |
|
"loss": 9.235, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005227010271682975, |
|
"grad_norm": 2.5331368446350098, |
|
"learning_rate": 0.0001854792712585539, |
|
"loss": 10.1562, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005348568650094207, |
|
"grad_norm": 4.555860996246338, |
|
"learning_rate": 0.00018460952524209355, |
|
"loss": 8.2419, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005470127028505439, |
|
"grad_norm": 1.6661875247955322, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 8.2566, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.005591685406916672, |
|
"grad_norm": 1.7546186447143555, |
|
"learning_rate": 0.00018280088311480201, |
|
"loss": 9.1446, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.005713243785327904, |
|
"grad_norm": 1.751132845878601, |
|
"learning_rate": 0.00018186248146866927, |
|
"loss": 8.6259, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.005834802163739136, |
|
"grad_norm": 2.203023672103882, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 9.3622, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005956360542150368, |
|
"grad_norm": 2.7363836765289307, |
|
"learning_rate": 0.0001799187996894925, |
|
"loss": 8.0726, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0060779189205616, |
|
"grad_norm": 2.010042905807495, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 9.8848, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0060779189205616, |
|
"eval_loss": 2.22426700592041, |
|
"eval_runtime": 130.4637, |
|
"eval_samples_per_second": 26.551, |
|
"eval_steps_per_second": 13.276, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9283870679629824.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|