|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968847352024922, |
|
"eval_steps": 500, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012461059190031152, |
|
"grad_norm": 25.687093602481305, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.2485, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06230529595015576, |
|
"grad_norm": 15.094847025724023, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.1293, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12461059190031153, |
|
"grad_norm": 11.363800874975448, |
|
"learning_rate": 1.9961946980917457e-05, |
|
"loss": 0.9767, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 8.12136266435658, |
|
"learning_rate": 1.953716950748227e-05, |
|
"loss": 0.7746, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24922118380062305, |
|
"grad_norm": 7.927023181508759, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 0.6367, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3115264797507788, |
|
"grad_norm": 3.686503615552781, |
|
"learning_rate": 1.737277336810124e-05, |
|
"loss": 0.5175, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 3.6799656836763077, |
|
"learning_rate": 1.573576436351046e-05, |
|
"loss": 0.4736, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43613707165109034, |
|
"grad_norm": 1.824779614147191, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 0.4468, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4984423676012461, |
|
"grad_norm": 1.6428127656511486, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 0.4412, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 1.255700115951508, |
|
"learning_rate": 9.563806126346643e-06, |
|
"loss": 0.4362, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"grad_norm": 1.1763215498923227, |
|
"learning_rate": 7.411809548974792e-06, |
|
"loss": 0.4261, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6853582554517134, |
|
"grad_norm": 1.0876919118710244, |
|
"learning_rate": 5.382513867649663e-06, |
|
"loss": 0.4159, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 1.0885014436037381, |
|
"learning_rate": 3.5721239031346067e-06, |
|
"loss": 0.4142, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8099688473520249, |
|
"grad_norm": 0.9605618048169687, |
|
"learning_rate": 2.0664665970876496e-06, |
|
"loss": 0.4068, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8722741433021807, |
|
"grad_norm": 0.9420340387839357, |
|
"learning_rate": 9.369221296335007e-07, |
|
"loss": 0.4026, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 0.8433707190106806, |
|
"learning_rate": 2.370399288006664e-07, |
|
"loss": 0.3984, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"grad_norm": 0.8754132731808941, |
|
"learning_rate": 0.0, |
|
"loss": 0.399, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"eval_loss": 1.1888436079025269, |
|
"eval_runtime": 0.7161, |
|
"eval_samples_per_second": 15.361, |
|
"eval_steps_per_second": 1.396, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"step": 80, |
|
"total_flos": 33396055080960.0, |
|
"train_loss": 0.5449534490704536, |
|
"train_runtime": 879.8279, |
|
"train_samples_per_second": 46.577, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 80, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 33396055080960.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|