{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0014082027812004929, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014082027812004929, "grad_norm": 0.14066897332668304, "learning_rate": 2e-05, "loss": 10.8356, "step": 1 }, { "epoch": 0.00014082027812004929, "eval_loss": 10.834281921386719, "eval_runtime": 9.2658, "eval_samples_per_second": 322.693, "eval_steps_per_second": 161.346, "step": 1 }, { "epoch": 0.00028164055624009857, "grad_norm": 0.1249518170952797, "learning_rate": 4e-05, "loss": 10.8414, "step": 2 }, { "epoch": 0.00042246083436014786, "grad_norm": 0.09757815301418304, "learning_rate": 6e-05, "loss": 10.8309, "step": 3 }, { "epoch": 0.00042246083436014786, "eval_loss": 10.834235191345215, "eval_runtime": 8.4562, "eval_samples_per_second": 353.588, "eval_steps_per_second": 176.794, "step": 3 }, { "epoch": 0.0005632811124801971, "grad_norm": 0.11606577038764954, "learning_rate": 8e-05, "loss": 10.8325, "step": 4 }, { "epoch": 0.0007041013906002464, "grad_norm": 0.09170947968959808, "learning_rate": 0.0001, "loss": 10.8415, "step": 5 }, { "epoch": 0.0008449216687202957, "grad_norm": 0.11384788155555725, "learning_rate": 0.00012, "loss": 10.8325, "step": 6 }, { "epoch": 0.0008449216687202957, "eval_loss": 10.83386516571045, "eval_runtime": 8.6078, "eval_samples_per_second": 347.359, "eval_steps_per_second": 173.68, "step": 6 }, { "epoch": 0.0009857419468403451, "grad_norm": 0.09094872325658798, "learning_rate": 0.00014, "loss": 10.8366, "step": 7 }, { "epoch": 0.0011265622249603943, "grad_norm": 0.15334953367710114, "learning_rate": 0.00016, "loss": 10.8373, "step": 8 }, { "epoch": 0.0012673825030804437, "grad_norm": 0.19216717779636383, "learning_rate": 0.00018, "loss": 10.8287, "step": 9 }, { "epoch": 0.0012673825030804437, "eval_loss": 10.833248138427734, "eval_runtime": 8.5606, "eval_samples_per_second": 349.275, "eval_steps_per_second": 174.638, "step": 9 }, { "epoch": 0.0014082027812004929, "grad_norm": 0.1223614290356636, "learning_rate": 0.0002, "loss": 10.8299, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1066384490496.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }