|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0020815986677768525, |
|
"eval_steps": 9, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.326394671107411e-05, |
|
"grad_norm": 0.6871480941772461, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1197, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 8.326394671107411e-05, |
|
"eval_loss": 1.2025511264801025, |
|
"eval_runtime": 1873.6459, |
|
"eval_samples_per_second": 5.398, |
|
"eval_steps_per_second": 0.675, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016652789342214822, |
|
"grad_norm": 0.6065630912780762, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4157, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002497918401332223, |
|
"grad_norm": 0.6395959854125977, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0421, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00033305578684429644, |
|
"grad_norm": 0.7535728812217712, |
|
"learning_rate": 4e-05, |
|
"loss": 0.8854, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00041631973355537054, |
|
"grad_norm": 0.8002839684486389, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1319, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0004995836802664446, |
|
"grad_norm": 0.7185359597206116, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1534, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005828476269775188, |
|
"grad_norm": 0.7994174957275391, |
|
"learning_rate": 7e-05, |
|
"loss": 1.1366, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0006661115736885929, |
|
"grad_norm": 0.9843465089797974, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2944, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.000749375520399667, |
|
"grad_norm": 1.1043306589126587, |
|
"learning_rate": 9e-05, |
|
"loss": 1.1426, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.000749375520399667, |
|
"eval_loss": 1.0168187618255615, |
|
"eval_runtime": 1879.978, |
|
"eval_samples_per_second": 5.38, |
|
"eval_steps_per_second": 0.673, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008326394671107411, |
|
"grad_norm": 0.7300062775611877, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8043, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0009159034138218152, |
|
"grad_norm": 0.5794069170951843, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 0.8879, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0009991673605328892, |
|
"grad_norm": 0.656294047832489, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.8947, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0010824313072439634, |
|
"grad_norm": 0.6405145525932312, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 0.7168, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0011656952539550376, |
|
"grad_norm": 0.7153993248939514, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.8198, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0012489592006661116, |
|
"grad_norm": 0.6253563761711121, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 0.9583, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0013322231473771858, |
|
"grad_norm": 0.9064929485321045, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.7007, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0014154870940882597, |
|
"grad_norm": 0.6331225633621216, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 0.6507, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.001498751040799334, |
|
"grad_norm": 0.6313807964324951, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.8009, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001498751040799334, |
|
"eval_loss": 0.7656639218330383, |
|
"eval_runtime": 1878.3063, |
|
"eval_samples_per_second": 5.385, |
|
"eval_steps_per_second": 0.673, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001582014987510408, |
|
"grad_norm": 0.5964687466621399, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.8406, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0016652789342214821, |
|
"grad_norm": 0.8008723855018616, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.7004, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0017485428809325561, |
|
"grad_norm": 0.7248008251190186, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 0.6875, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0018318068276436303, |
|
"grad_norm": 0.6736342906951904, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.761, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0019150707743547043, |
|
"grad_norm": 0.7091475129127502, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 1.1793, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0019983347210657783, |
|
"grad_norm": 0.6263002753257751, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.7844, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0020815986677768525, |
|
"grad_norm": 0.8400290608406067, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.7321, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.941868820135936e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|