{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.026698804116065636, "eval_steps": 2, "global_step": 18, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014832668953369797, "grad_norm": 20.178142547607422, "learning_rate": 2e-05, "loss": 43.4533, "step": 1 }, { "epoch": 0.0014832668953369797, "eval_loss": 2.656465768814087, "eval_runtime": 154.8536, "eval_samples_per_second": 3.668, "eval_steps_per_second": 3.668, "step": 1 }, { "epoch": 0.0029665337906739594, "grad_norm": 17.634891510009766, "learning_rate": 4e-05, "loss": 39.2663, "step": 2 }, { "epoch": 0.0029665337906739594, "eval_loss": 2.655576705932617, "eval_runtime": 154.4307, "eval_samples_per_second": 3.678, "eval_steps_per_second": 3.678, "step": 2 }, { "epoch": 0.004449800686010939, "grad_norm": 21.489526748657227, "learning_rate": 6e-05, "loss": 40.7739, "step": 3 }, { "epoch": 0.005933067581347919, "grad_norm": 23.668588638305664, "learning_rate": 8e-05, "loss": 39.5743, "step": 4 }, { "epoch": 0.005933067581347919, "eval_loss": 2.6245453357696533, "eval_runtime": 150.3826, "eval_samples_per_second": 3.777, "eval_steps_per_second": 3.777, "step": 4 }, { "epoch": 0.007416334476684898, "grad_norm": 25.051509857177734, "learning_rate": 0.0001, "loss": 43.102, "step": 5 }, { "epoch": 0.008899601372021879, "grad_norm": 29.83518409729004, "learning_rate": 0.00012, "loss": 37.8648, "step": 6 }, { "epoch": 0.008899601372021879, "eval_loss": 2.4578418731689453, "eval_runtime": 150.3798, "eval_samples_per_second": 3.777, "eval_steps_per_second": 3.777, "step": 6 }, { "epoch": 0.010382868267358857, "grad_norm": 47.01485061645508, "learning_rate": 0.00014, "loss": 38.7341, "step": 7 }, { "epoch": 0.011866135162695838, "grad_norm": 45.00392532348633, "learning_rate": 0.00016, "loss": 36.6476, "step": 8 }, { "epoch": 0.011866135162695838, "eval_loss": 1.9450047016143799, "eval_runtime": 150.402, "eval_samples_per_second": 3.777, "eval_steps_per_second": 3.777, "step": 8 }, { "epoch": 0.013349402058032818, "grad_norm": 52.0974006652832, "learning_rate": 0.00018, "loss": 31.804, "step": 9 }, { "epoch": 0.014832668953369797, "grad_norm": 42.790775299072266, "learning_rate": 0.0002, "loss": 23.4278, "step": 10 }, { "epoch": 0.014832668953369797, "eval_loss": 1.1998307704925537, "eval_runtime": 151.4492, "eval_samples_per_second": 3.75, "eval_steps_per_second": 3.75, "step": 10 }, { "epoch": 0.016315935848706775, "grad_norm": 54.17241668701172, "learning_rate": 0.00019510565162951537, "loss": 18.5255, "step": 11 }, { "epoch": 0.017799202744043757, "grad_norm": 42.78046417236328, "learning_rate": 0.00018090169943749476, "loss": 11.9862, "step": 12 }, { "epoch": 0.017799202744043757, "eval_loss": 0.5600711703300476, "eval_runtime": 151.2427, "eval_samples_per_second": 3.756, "eval_steps_per_second": 3.756, "step": 12 }, { "epoch": 0.019282469639380736, "grad_norm": 31.308900833129883, "learning_rate": 0.00015877852522924732, "loss": 11.0217, "step": 13 }, { "epoch": 0.020765736534717714, "grad_norm": 26.355045318603516, "learning_rate": 0.00013090169943749476, "loss": 4.9234, "step": 14 }, { "epoch": 0.020765736534717714, "eval_loss": 0.39810341596603394, "eval_runtime": 151.4481, "eval_samples_per_second": 3.75, "eval_steps_per_second": 3.75, "step": 14 }, { "epoch": 0.022249003430054697, "grad_norm": 26.284854888916016, "learning_rate": 0.0001, "loss": 6.4574, "step": 15 }, { "epoch": 0.023732270325391675, "grad_norm": 35.7542839050293, "learning_rate": 6.909830056250527e-05, "loss": 9.4431, "step": 16 }, { "epoch": 0.023732270325391675, "eval_loss": 0.36851683259010315, "eval_runtime": 151.278, "eval_samples_per_second": 3.755, "eval_steps_per_second": 3.755, "step": 16 }, { "epoch": 0.025215537220728654, "grad_norm": 25.165361404418945, "learning_rate": 4.12214747707527e-05, "loss": 4.9765, "step": 17 }, { "epoch": 0.026698804116065636, "grad_norm": 14.660453796386719, "learning_rate": 1.9098300562505266e-05, "loss": 1.5801, "step": 18 }, { "epoch": 0.026698804116065636, "eval_loss": 0.34035831689834595, "eval_runtime": 151.1958, "eval_samples_per_second": 3.757, "eval_steps_per_second": 3.757, "step": 18 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2927327503712256.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }