{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.16447368421052633, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003289473684210526, "eval_loss": 1.5813665390014648, "eval_runtime": 78.8755, "eval_samples_per_second": 1.635, "eval_steps_per_second": 0.824, "step": 1 }, { "epoch": 0.01644736842105263, "grad_norm": 1.7443066835403442, "learning_rate": 5e-05, "loss": 6.5254, "step": 5 }, { "epoch": 0.03289473684210526, "grad_norm": 2.3605740070343018, "learning_rate": 0.0001, "loss": 6.2149, "step": 10 }, { "epoch": 0.03289473684210526, "eval_loss": 1.5433368682861328, "eval_runtime": 76.1974, "eval_samples_per_second": 1.693, "eval_steps_per_second": 0.853, "step": 10 }, { "epoch": 0.049342105263157895, "grad_norm": 3.8667151927948, "learning_rate": 9.619397662556435e-05, "loss": 6.0027, "step": 15 }, { "epoch": 0.06578947368421052, "grad_norm": 3.2571685314178467, "learning_rate": 8.535533905932738e-05, "loss": 6.018, "step": 20 }, { "epoch": 0.06578947368421052, "eval_loss": 1.4255777597427368, "eval_runtime": 58.9794, "eval_samples_per_second": 2.187, "eval_steps_per_second": 1.102, "step": 20 }, { "epoch": 0.08223684210526316, "grad_norm": 3.4146549701690674, "learning_rate": 6.91341716182545e-05, "loss": 5.5718, "step": 25 }, { "epoch": 0.09868421052631579, "grad_norm": 3.615739345550537, "learning_rate": 5e-05, "loss": 5.5577, "step": 30 }, { "epoch": 0.09868421052631579, "eval_loss": 1.3943980932235718, "eval_runtime": 51.6919, "eval_samples_per_second": 2.496, "eval_steps_per_second": 1.257, "step": 30 }, { "epoch": 0.11513157894736842, "grad_norm": 2.7189626693725586, "learning_rate": 3.086582838174551e-05, "loss": 5.6287, "step": 35 }, { "epoch": 0.13157894736842105, "grad_norm": 2.727278709411621, "learning_rate": 1.4644660940672627e-05, "loss": 5.4495, "step": 40 }, { "epoch": 0.13157894736842105, "eval_loss": 1.3894844055175781, "eval_runtime": 46.8497, "eval_samples_per_second": 2.753, "eval_steps_per_second": 1.387, "step": 40 }, { "epoch": 0.14802631578947367, "grad_norm": 2.9583241939544678, "learning_rate": 3.8060233744356633e-06, "loss": 5.5246, "step": 45 }, { "epoch": 0.16447368421052633, "grad_norm": 3.652521848678589, "learning_rate": 0.0, "loss": 5.0081, "step": 50 }, { "epoch": 0.16447368421052633, "eval_loss": 1.388664960861206, "eval_runtime": 35.0602, "eval_samples_per_second": 3.679, "eval_steps_per_second": 1.854, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8143665679564800.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }