|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9748743718592965, |
|
"eval_steps": 500, |
|
"global_step": 222, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13400335008375208, |
|
"grad_norm": 0.4171938896179199, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 0.8101, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26800670016750416, |
|
"grad_norm": 0.4708716869354248, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 0.8211, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4020100502512563, |
|
"grad_norm": 0.5508167743682861, |
|
"learning_rate": 9.96950085488444e-05, |
|
"loss": 0.6245, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5360134003350083, |
|
"grad_norm": 0.34629738330841064, |
|
"learning_rate": 9.821012312558058e-05, |
|
"loss": 0.6189, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6700167504187605, |
|
"grad_norm": 0.4830147624015808, |
|
"learning_rate": 9.552620875987311e-05, |
|
"loss": 0.5793, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8040201005025126, |
|
"grad_norm": 0.528333306312561, |
|
"learning_rate": 9.171001680589588e-05, |
|
"loss": 0.625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9380234505862647, |
|
"grad_norm": 0.43927663564682007, |
|
"learning_rate": 8.685645937558896e-05, |
|
"loss": 0.5697, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0720268006700167, |
|
"grad_norm": 0.49636346101760864, |
|
"learning_rate": 8.108624878942477e-05, |
|
"loss": 0.562, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2060301507537687, |
|
"grad_norm": 0.3749530017375946, |
|
"learning_rate": 7.454289535287968e-05, |
|
"loss": 0.5005, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3400335008375208, |
|
"grad_norm": 0.47080516815185547, |
|
"learning_rate": 6.738913812659912e-05, |
|
"loss": 0.4522, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.474036850921273, |
|
"grad_norm": 0.450717955827713, |
|
"learning_rate": 5.980289746019892e-05, |
|
"loss": 0.4691, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.608040201005025, |
|
"grad_norm": 0.5994529724121094, |
|
"learning_rate": 5.1972849953812644e-05, |
|
"loss": 0.4904, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.742043551088777, |
|
"grad_norm": 0.5364199280738831, |
|
"learning_rate": 4.4093735902054605e-05, |
|
"loss": 0.4698, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.8760469011725294, |
|
"grad_norm": 0.5703357458114624, |
|
"learning_rate": 3.636151592846985e-05, |
|
"loss": 0.5083, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0100502512562812, |
|
"grad_norm": 0.7257386445999146, |
|
"learning_rate": 2.8968497269310803e-05, |
|
"loss": 0.5063, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.1440536013400333, |
|
"grad_norm": 0.7995928525924683, |
|
"learning_rate": 2.2098550920327998e-05, |
|
"loss": 0.4249, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.2780569514237854, |
|
"grad_norm": 0.4787980914115906, |
|
"learning_rate": 1.5922538600418318e-05, |
|
"loss": 0.4145, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.4120603015075375, |
|
"grad_norm": 0.5283986330032349, |
|
"learning_rate": 1.0594063267640386e-05, |
|
"loss": 0.4231, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.5460636515912896, |
|
"grad_norm": 0.8906112313270569, |
|
"learning_rate": 6.2456488760703205e-06, |
|
"loss": 0.4165, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.6800670016750416, |
|
"grad_norm": 0.7813794016838074, |
|
"learning_rate": 2.9854443863662262e-06, |
|
"loss": 0.4132, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.8140703517587937, |
|
"grad_norm": 0.6210780739784241, |
|
"learning_rate": 8.945340042509797e-07, |
|
"loss": 0.424, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.948073701842546, |
|
"grad_norm": 0.7047247886657715, |
|
"learning_rate": 2.4920543691309138e-08, |
|
"loss": 0.4248, |
|
"step": 220 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 222, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9154193387028480.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|