|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.03544776119402985, |
|
"eval_steps": 38, |
|
"global_step": 38, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009328358208955224, |
|
"grad_norm": 2.931856870651245, |
|
"learning_rate": 4e-05, |
|
"loss": 3.1183, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009328358208955224, |
|
"eval_loss": 0.7989240884780884, |
|
"eval_runtime": 61.068, |
|
"eval_samples_per_second": 7.402, |
|
"eval_steps_per_second": 3.701, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018656716417910447, |
|
"grad_norm": 3.0857083797454834, |
|
"learning_rate": 8e-05, |
|
"loss": 3.3967, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002798507462686567, |
|
"grad_norm": 2.9495201110839844, |
|
"learning_rate": 0.00012, |
|
"loss": 3.4361, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0037313432835820895, |
|
"grad_norm": 2.4459776878356934, |
|
"learning_rate": 0.00016, |
|
"loss": 3.2275, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0046641791044776115, |
|
"grad_norm": 2.410581588745117, |
|
"learning_rate": 0.0002, |
|
"loss": 3.0069, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005597014925373134, |
|
"grad_norm": 1.99094820022583, |
|
"learning_rate": 0.00024, |
|
"loss": 2.6997, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0065298507462686565, |
|
"grad_norm": 1.850408911705017, |
|
"learning_rate": 0.00028, |
|
"loss": 2.6469, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007462686567164179, |
|
"grad_norm": 2.972104072570801, |
|
"learning_rate": 0.00032, |
|
"loss": 3.351, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008395522388059701, |
|
"grad_norm": 2.8936173915863037, |
|
"learning_rate": 0.00036, |
|
"loss": 2.8041, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009328358208955223, |
|
"grad_norm": 2.354464530944824, |
|
"learning_rate": 0.0004, |
|
"loss": 2.7652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010261194029850746, |
|
"grad_norm": 2.024070978164673, |
|
"learning_rate": 0.0003999496469885013, |
|
"loss": 2.8786, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011194029850746268, |
|
"grad_norm": 1.8775830268859863, |
|
"learning_rate": 0.00039979861330826294, |
|
"loss": 2.0356, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012126865671641791, |
|
"grad_norm": 1.7130846977233887, |
|
"learning_rate": 0.0003995469750092912, |
|
"loss": 2.6772, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013059701492537313, |
|
"grad_norm": 1.9945520162582397, |
|
"learning_rate": 0.00039919485879904784, |
|
"loss": 2.8959, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013992537313432836, |
|
"grad_norm": 1.6973563432693481, |
|
"learning_rate": 0.00039874244197864856, |
|
"loss": 2.3531, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 1.5936486721038818, |
|
"learning_rate": 0.00039818995235358696, |
|
"loss": 1.9481, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01585820895522388, |
|
"grad_norm": 1.828995943069458, |
|
"learning_rate": 0.00039753766811902755, |
|
"loss": 3.6787, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.016791044776119403, |
|
"grad_norm": 1.553421139717102, |
|
"learning_rate": 0.0003967859177197259, |
|
"loss": 2.2696, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.017723880597014924, |
|
"grad_norm": 1.2893372774124146, |
|
"learning_rate": 0.00039593507968464716, |
|
"loss": 1.9452, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.018656716417910446, |
|
"grad_norm": 1.674631953239441, |
|
"learning_rate": 0.0003949855824363647, |
|
"loss": 2.2916, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01958955223880597, |
|
"grad_norm": 1.4569449424743652, |
|
"learning_rate": 0.0003939379040753374, |
|
"loss": 2.1701, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.020522388059701493, |
|
"grad_norm": 1.438828468322754, |
|
"learning_rate": 0.00039279257213917066, |
|
"loss": 2.5621, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.021455223880597014, |
|
"grad_norm": 1.3770123720169067, |
|
"learning_rate": 0.0003915501633369861, |
|
"loss": 2.1366, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.022388059701492536, |
|
"grad_norm": 1.4712828397750854, |
|
"learning_rate": 0.00039021130325903074, |
|
"loss": 2.319, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02332089552238806, |
|
"grad_norm": 1.465248465538025, |
|
"learning_rate": 0.00038877666606167355, |
|
"loss": 2.3959, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.024253731343283583, |
|
"grad_norm": 1.4384740591049194, |
|
"learning_rate": 0.00038724697412794747, |
|
"loss": 2.097, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.025186567164179104, |
|
"grad_norm": 1.3539812564849854, |
|
"learning_rate": 0.0003856229977038078, |
|
"loss": 2.2313, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.026119402985074626, |
|
"grad_norm": 1.3618801832199097, |
|
"learning_rate": 0.0003839055545102902, |
|
"loss": 2.1054, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.027052238805970148, |
|
"grad_norm": 1.422633409500122, |
|
"learning_rate": 0.00038209550933176323, |
|
"loss": 2.276, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.027985074626865673, |
|
"grad_norm": 1.4232622385025024, |
|
"learning_rate": 0.0003801937735804838, |
|
"loss": 2.1735, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.028917910447761194, |
|
"grad_norm": 1.4555679559707642, |
|
"learning_rate": 0.0003782013048376736, |
|
"loss": 2.2706, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 1.2929563522338867, |
|
"learning_rate": 0.0003761191063713476, |
|
"loss": 1.9037, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.030783582089552237, |
|
"grad_norm": 1.2687627077102661, |
|
"learning_rate": 0.0003739482266311391, |
|
"loss": 2.1032, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03171641791044776, |
|
"grad_norm": 1.2993357181549072, |
|
"learning_rate": 0.00037168975872037323, |
|
"loss": 2.0062, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03264925373134328, |
|
"grad_norm": 1.3507018089294434, |
|
"learning_rate": 0.00036934483984565685, |
|
"loss": 2.1522, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.033582089552238806, |
|
"grad_norm": 1.4183921813964844, |
|
"learning_rate": 0.00036691465074426054, |
|
"loss": 1.845, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03451492537313433, |
|
"grad_norm": 1.3370906114578247, |
|
"learning_rate": 0.00036440041508958203, |
|
"loss": 1.9448, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03544776119402985, |
|
"grad_norm": 1.4249347448349, |
|
"learning_rate": 0.0003618033988749895, |
|
"loss": 2.4594, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03544776119402985, |
|
"eval_loss": 0.5035107135772705, |
|
"eval_runtime": 60.2427, |
|
"eval_samples_per_second": 7.503, |
|
"eval_steps_per_second": 3.751, |
|
"step": 38 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 38, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4981999911567360.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|