{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0243116756822464, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000121558378411232, "eval_loss": 2.51324725151062, "eval_runtime": 129.2942, "eval_samples_per_second": 26.792, "eval_steps_per_second": 13.396, "step": 1 }, { "epoch": 0.00121558378411232, "grad_norm": 1.9220033884048462, "learning_rate": 0.0002, "loss": 9.2311, "step": 10 }, { "epoch": 0.00243116756822464, "grad_norm": 2.4667861461639404, "learning_rate": 0.0002, "loss": 9.3961, "step": 20 }, { "epoch": 0.0036467513523369597, "grad_norm": 2.1678335666656494, "learning_rate": 0.0002, "loss": 9.2295, "step": 30 }, { "epoch": 0.00486233513644928, "grad_norm": 1.7435227632522583, "learning_rate": 0.0002, "loss": 9.2995, "step": 40 }, { "epoch": 0.0060779189205616, "grad_norm": 1.7226349115371704, "learning_rate": 0.0002, "loss": 8.9876, "step": 50 }, { "epoch": 0.0060779189205616, "eval_loss": 2.2176895141601562, "eval_runtime": 129.241, "eval_samples_per_second": 26.803, "eval_steps_per_second": 13.401, "step": 50 }, { "epoch": 0.007293502704673919, "grad_norm": 1.1817125082015991, "learning_rate": 0.0002, "loss": 9.0394, "step": 60 }, { "epoch": 0.00850908648878624, "grad_norm": 1.4283983707427979, "learning_rate": 0.0002, "loss": 9.0866, "step": 70 }, { "epoch": 0.00972467027289856, "grad_norm": 1.7030812501907349, "learning_rate": 0.0002, "loss": 8.8661, "step": 80 }, { "epoch": 0.010940254057010879, "grad_norm": 1.697030782699585, "learning_rate": 0.0002, "loss": 8.8937, "step": 90 }, { "epoch": 0.0121558378411232, "grad_norm": 1.6287360191345215, "learning_rate": 0.0002, "loss": 9.0157, "step": 100 }, { "epoch": 0.0121558378411232, "eval_loss": 2.199361801147461, "eval_runtime": 129.2644, "eval_samples_per_second": 26.798, "eval_steps_per_second": 13.399, "step": 100 }, { "epoch": 0.01337142162523552, "grad_norm": 1.7065634727478027, "learning_rate": 0.0002, "loss": 9.0676, "step": 110 }, { "epoch": 0.014587005409347839, "grad_norm": 2.568800210952759, "learning_rate": 0.0002, "loss": 8.584, "step": 120 }, { "epoch": 0.01580258919346016, "grad_norm": 2.7793874740600586, "learning_rate": 0.0002, "loss": 8.6535, "step": 130 }, { "epoch": 0.01701817297757248, "grad_norm": 2.116802453994751, "learning_rate": 0.0002, "loss": 8.6239, "step": 140 }, { "epoch": 0.0182337567616848, "grad_norm": 1.6645129919052124, "learning_rate": 0.0002, "loss": 8.5241, "step": 150 }, { "epoch": 0.0182337567616848, "eval_loss": 2.1867380142211914, "eval_runtime": 129.2587, "eval_samples_per_second": 26.799, "eval_steps_per_second": 13.399, "step": 150 }, { "epoch": 0.01944934054579712, "grad_norm": 1.7567479610443115, "learning_rate": 0.0002, "loss": 8.761, "step": 160 }, { "epoch": 0.020664924329909438, "grad_norm": 1.3298070430755615, "learning_rate": 0.0002, "loss": 8.6619, "step": 170 }, { "epoch": 0.021880508114021757, "grad_norm": 1.3300223350524902, "learning_rate": 0.0002, "loss": 8.9402, "step": 180 }, { "epoch": 0.02309609189813408, "grad_norm": 1.8174155950546265, "learning_rate": 0.0002, "loss": 8.7659, "step": 190 }, { "epoch": 0.0243116756822464, "grad_norm": 1.6860878467559814, "learning_rate": 0.0002, "loss": 9.0848, "step": 200 }, { "epoch": 0.0243116756822464, "eval_loss": 2.1836845874786377, "eval_runtime": 129.2197, "eval_samples_per_second": 26.807, "eval_steps_per_second": 13.404, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.762410749113139e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }