{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10233086981239341, "eval_steps": 9, "global_step": 45, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022740193291642978, "eval_loss": 2.2858026027679443, "eval_runtime": 35.4041, "eval_samples_per_second": 20.93, "eval_steps_per_second": 2.627, "step": 1 }, { "epoch": 0.006822057987492893, "grad_norm": 0.60309898853302, "learning_rate": 3e-05, "loss": 2.083, "step": 3 }, { "epoch": 0.013644115974985787, "grad_norm": 0.683789074420929, "learning_rate": 6e-05, "loss": 2.3169, "step": 6 }, { "epoch": 0.02046617396247868, "grad_norm": 0.8379518985748291, "learning_rate": 9e-05, "loss": 2.2734, "step": 9 }, { "epoch": 0.02046617396247868, "eval_loss": 2.204383373260498, "eval_runtime": 35.8511, "eval_samples_per_second": 20.669, "eval_steps_per_second": 2.594, "step": 9 }, { "epoch": 0.027288231949971573, "grad_norm": 0.6264774799346924, "learning_rate": 9.987820251299122e-05, "loss": 2.0739, "step": 12 }, { "epoch": 0.03411028993746447, "grad_norm": 0.5958771705627441, "learning_rate": 9.924038765061042e-05, "loss": 2.0297, "step": 15 }, { "epoch": 0.04093234792495736, "grad_norm": 1.1767833232879639, "learning_rate": 9.806308479691595e-05, "loss": 2.04, "step": 18 }, { "epoch": 0.04093234792495736, "eval_loss": 1.970629334449768, "eval_runtime": 35.8984, "eval_samples_per_second": 20.642, "eval_steps_per_second": 2.591, "step": 18 }, { "epoch": 0.047754405912450254, "grad_norm": 0.9643926620483398, "learning_rate": 9.635919272833938e-05, "loss": 1.8702, "step": 21 }, { "epoch": 0.054576463899943146, "grad_norm": 1.1769558191299438, "learning_rate": 9.414737964294636e-05, "loss": 1.8235, "step": 24 }, { "epoch": 0.061398521887436046, "grad_norm": 1.374096393585205, "learning_rate": 9.145187862775209e-05, "loss": 1.647, "step": 27 }, { "epoch": 0.061398521887436046, "eval_loss": 1.761104941368103, "eval_runtime": 35.916, "eval_samples_per_second": 20.631, "eval_steps_per_second": 2.589, "step": 27 }, { "epoch": 0.06822057987492894, "grad_norm": 0.7258775234222412, "learning_rate": 8.83022221559489e-05, "loss": 1.7701, "step": 30 }, { "epoch": 0.07504263786242182, "grad_norm": 0.5846672654151917, "learning_rate": 8.473291852294987e-05, "loss": 1.5202, "step": 33 }, { "epoch": 0.08186469584991472, "grad_norm": 0.7897035479545593, "learning_rate": 8.07830737662829e-05, "loss": 1.6917, "step": 36 }, { "epoch": 0.08186469584991472, "eval_loss": 1.7188748121261597, "eval_runtime": 35.9555, "eval_samples_per_second": 20.609, "eval_steps_per_second": 2.587, "step": 36 }, { "epoch": 0.08868675383740762, "grad_norm": 0.4667467176914215, "learning_rate": 7.649596321166024e-05, "loss": 1.6435, "step": 39 }, { "epoch": 0.09550881182490051, "grad_norm": 0.4430086612701416, "learning_rate": 7.191855733945387e-05, "loss": 1.6451, "step": 42 }, { "epoch": 0.10233086981239341, "grad_norm": 0.5567775368690491, "learning_rate": 6.710100716628344e-05, "loss": 1.5085, "step": 45 }, { "epoch": 0.10233086981239341, "eval_loss": 1.696839451789856, "eval_runtime": 35.9155, "eval_samples_per_second": 20.632, "eval_steps_per_second": 2.589, "step": 45 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.7019967018827776e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }