|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.034601788313578706, |
|
"eval_steps": 50, |
|
"global_step": 26, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0013308380120607194, |
|
"grad_norm": 3.5958077907562256, |
|
"learning_rate": 7.499999999999999e-06, |
|
"loss": 53.6584, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013308380120607194, |
|
"eval_loss": 1.6596457958221436, |
|
"eval_runtime": 576.7284, |
|
"eval_samples_per_second": 4.389, |
|
"eval_steps_per_second": 2.195, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026616760241214388, |
|
"grad_norm": 4.593629360198975, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 53.83, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003992514036182159, |
|
"grad_norm": 3.8698318004608154, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 53.7949, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0053233520482428775, |
|
"grad_norm": 3.574679136276245, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 51.8953, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006654190060303597, |
|
"grad_norm": 3.828587532043457, |
|
"learning_rate": 3.75e-05, |
|
"loss": 54.6933, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007985028072364318, |
|
"grad_norm": 4.688767433166504, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 53.4681, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009315866084425037, |
|
"grad_norm": 3.8683738708496094, |
|
"learning_rate": 5.2499999999999995e-05, |
|
"loss": 52.2187, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.010646704096485755, |
|
"grad_norm": 4.84136962890625, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 51.3524, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.011977542108546475, |
|
"grad_norm": 4.16347599029541, |
|
"learning_rate": 6.75e-05, |
|
"loss": 52.5617, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.013308380120607194, |
|
"grad_norm": 4.299065589904785, |
|
"learning_rate": 7.5e-05, |
|
"loss": 51.664, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014639218132667914, |
|
"grad_norm": 4.201577663421631, |
|
"learning_rate": 8.25e-05, |
|
"loss": 48.1549, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.015970056144728635, |
|
"grad_norm": 4.490873336791992, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 51.543, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.017300894156789353, |
|
"grad_norm": 5.399848461151123, |
|
"learning_rate": 9.75e-05, |
|
"loss": 50.0357, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.018631732168850074, |
|
"grad_norm": 5.014857769012451, |
|
"learning_rate": 0.00010499999999999999, |
|
"loss": 52.9329, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.019962570180910792, |
|
"grad_norm": 4.966302871704102, |
|
"learning_rate": 0.0001125, |
|
"loss": 49.8081, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02129340819297151, |
|
"grad_norm": 5.2359299659729, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 50.2341, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02262424620503223, |
|
"grad_norm": 5.209284782409668, |
|
"learning_rate": 0.00012749999999999998, |
|
"loss": 47.965, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02395508421709295, |
|
"grad_norm": 5.249284267425537, |
|
"learning_rate": 0.000135, |
|
"loss": 51.0081, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02528592222915367, |
|
"grad_norm": 6.256328582763672, |
|
"learning_rate": 0.0001425, |
|
"loss": 51.1662, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02661676024121439, |
|
"grad_norm": 5.501735687255859, |
|
"learning_rate": 0.00015, |
|
"loss": 47.9934, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02794759825327511, |
|
"grad_norm": 5.087883472442627, |
|
"learning_rate": 0.00014997810105601446, |
|
"loss": 49.7915, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.029278436265335828, |
|
"grad_norm": 7.2340874671936035, |
|
"learning_rate": 0.0001499124170124245, |
|
"loss": 49.9978, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03060927427739655, |
|
"grad_norm": 5.028345584869385, |
|
"learning_rate": 0.00014980298622686183, |
|
"loss": 50.2118, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03194011228945727, |
|
"grad_norm": 4.296029090881348, |
|
"learning_rate": 0.00014964987260382363, |
|
"loss": 46.4395, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.033270950301517985, |
|
"grad_norm": 4.090815544128418, |
|
"learning_rate": 0.00014945316555735403, |
|
"loss": 50.296, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.034601788313578706, |
|
"grad_norm": 4.5317063331604, |
|
"learning_rate": 0.0001492129799588288, |
|
"loss": 49.3161, |
|
"step": 26 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.484068172762972e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|