{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.616636528028933, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 8e-05, "loss": 2.7434, "step": 200 }, { "epoch": 0.18, "learning_rate": 7.814986123959297e-05, "loss": 1.6287, "step": 400 }, { "epoch": 0.27, "learning_rate": 7.629972247918594e-05, "loss": 1.5903, "step": 600 }, { "epoch": 0.36, "learning_rate": 7.444958371877891e-05, "loss": 1.5706, "step": 800 }, { "epoch": 0.45, "learning_rate": 7.259944495837189e-05, "loss": 1.556, "step": 1000 }, { "epoch": 0.54, "learning_rate": 7.074930619796485e-05, "loss": 1.5469, "step": 1200 }, { "epoch": 0.63, "learning_rate": 6.889916743755783e-05, "loss": 1.5393, "step": 1400 }, { "epoch": 0.72, "learning_rate": 6.70490286771508e-05, "loss": 1.5316, "step": 1600 }, { "epoch": 0.81, "learning_rate": 6.519888991674377e-05, "loss": 1.5272, "step": 1800 }, { "epoch": 0.9, "learning_rate": 6.334875115633673e-05, "loss": 1.5229, "step": 2000 }, { "epoch": 0.9, "eval_loss": 1.9172784090042114, "eval_runtime": 137.7477, "eval_samples_per_second": 884.196, "eval_steps_per_second": 3.456, "step": 2000 }, { "epoch": 0.99, "learning_rate": 6.14986123959297e-05, "loss": 1.5171, "step": 2200 }, { "epoch": 1.08, "learning_rate": 5.964847363552267e-05, "loss": 1.507, "step": 2400 }, { "epoch": 1.18, "learning_rate": 5.779833487511564e-05, "loss": 1.5052, "step": 2600 }, { "epoch": 1.27, "learning_rate": 5.5948196114708607e-05, "loss": 1.5016, "step": 2800 }, { "epoch": 1.36, "learning_rate": 5.4098057354301575e-05, "loss": 1.5007, "step": 3000 }, { "epoch": 1.45, "learning_rate": 5.224791859389455e-05, "loss": 1.4962, "step": 3200 }, { "epoch": 1.54, "learning_rate": 5.039777983348751e-05, "loss": 1.494, "step": 3400 }, { "epoch": 1.63, "learning_rate": 4.8547641073080486e-05, "loss": 1.4907, "step": 3600 }, { "epoch": 1.72, "learning_rate": 4.6697502312673454e-05, "loss": 1.4908, "step": 3800 }, { "epoch": 1.81, "learning_rate": 4.484736355226642e-05, "loss": 1.4895, "step": 4000 }, { "epoch": 1.81, "eval_loss": 1.9569236040115356, "eval_runtime": 138.1502, "eval_samples_per_second": 881.62, "eval_steps_per_second": 3.446, "step": 4000 }, { "epoch": 1.9, "learning_rate": 4.299722479185939e-05, "loss": 1.4883, "step": 4200 }, { "epoch": 1.99, "learning_rate": 4.1147086031452366e-05, "loss": 1.4843, "step": 4400 }, { "epoch": 2.08, "learning_rate": 3.9296947271045334e-05, "loss": 1.4751, "step": 4600 }, { "epoch": 2.17, "learning_rate": 3.74468085106383e-05, "loss": 1.4734, "step": 4800 }, { "epoch": 2.26, "learning_rate": 3.559666975023127e-05, "loss": 1.471, "step": 5000 }, { "epoch": 2.35, "learning_rate": 3.374653098982424e-05, "loss": 1.4732, "step": 5200 }, { "epoch": 2.44, "learning_rate": 3.1896392229417213e-05, "loss": 1.4739, "step": 5400 }, { "epoch": 2.53, "learning_rate": 3.0046253469010178e-05, "loss": 1.4692, "step": 5600 }, { "epoch": 2.62, "learning_rate": 2.8196114708603146e-05, "loss": 1.4645, "step": 5800 }, { "epoch": 2.71, "learning_rate": 2.6345975948196118e-05, "loss": 1.4667, "step": 6000 }, { "epoch": 2.71, "eval_loss": 1.9881402254104614, "eval_runtime": 137.335, "eval_samples_per_second": 886.854, "eval_steps_per_second": 3.466, "step": 6000 }, { "epoch": 2.8, "learning_rate": 2.4495837187789086e-05, "loss": 1.4677, "step": 6200 }, { "epoch": 2.89, "learning_rate": 2.2645698427382054e-05, "loss": 1.4639, "step": 6400 }, { "epoch": 2.98, "learning_rate": 2.0795559666975026e-05, "loss": 1.4615, "step": 6600 }, { "epoch": 3.07, "learning_rate": 1.8945420906567994e-05, "loss": 1.4551, "step": 6800 }, { "epoch": 3.16, "learning_rate": 1.7095282146160962e-05, "loss": 1.4539, "step": 7000 }, { "epoch": 3.25, "learning_rate": 1.5245143385753934e-05, "loss": 1.4535, "step": 7200 }, { "epoch": 3.35, "learning_rate": 1.33950046253469e-05, "loss": 1.4506, "step": 7400 }, { "epoch": 3.44, "learning_rate": 1.1544865864939872e-05, "loss": 1.4522, "step": 7600 }, { "epoch": 3.53, "learning_rate": 9.69472710453284e-06, "loss": 1.4504, "step": 7800 }, { "epoch": 3.62, "learning_rate": 7.84458834412581e-06, "loss": 1.4489, "step": 8000 }, { "epoch": 3.62, "eval_loss": 2.012910842895508, "eval_runtime": 137.8372, "eval_samples_per_second": 883.622, "eval_steps_per_second": 3.453, "step": 8000 } ], "max_steps": 8848, "num_train_epochs": 4, "total_flos": 1.5605846428483584e+17, "trial_name": null, "trial_params": null }