{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0013823610727121925, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.607870242373975e-05, "eval_loss": 1.6102041006088257, "eval_runtime": 1242.2325, "eval_samples_per_second": 7.356, "eval_steps_per_second": 3.678, "step": 1 }, { "epoch": 0.00013823610727121925, "grad_norm": 0.8831824064254761, "learning_rate": 4e-05, "loss": 5.9381, "step": 3 }, { "epoch": 0.00023039351211869873, "eval_loss": 1.5882604122161865, "eval_runtime": 1246.126, "eval_samples_per_second": 7.333, "eval_steps_per_second": 3.667, "step": 5 }, { "epoch": 0.0002764722145424385, "grad_norm": 1.0897746086120605, "learning_rate": 8e-05, "loss": 5.26, "step": 6 }, { "epoch": 0.0004147083218136577, "grad_norm": 1.1137936115264893, "learning_rate": 0.00012, "loss": 5.9993, "step": 9 }, { "epoch": 0.00046078702423739745, "eval_loss": 1.4089840650558472, "eval_runtime": 1245.8334, "eval_samples_per_second": 7.335, "eval_steps_per_second": 3.667, "step": 10 }, { "epoch": 0.000552944429084877, "grad_norm": 1.1472383737564087, "learning_rate": 0.00016, "loss": 5.9979, "step": 12 }, { "epoch": 0.0006911805363560963, "grad_norm": 0.7887614965438843, "learning_rate": 0.0002, "loss": 5.3875, "step": 15 }, { "epoch": 0.0006911805363560963, "eval_loss": 1.2726045846939087, "eval_runtime": 1245.3567, "eval_samples_per_second": 7.338, "eval_steps_per_second": 3.669, "step": 15 }, { "epoch": 0.0008294166436273154, "grad_norm": 1.062957763671875, "learning_rate": 0.00018090169943749476, "loss": 5.2179, "step": 18 }, { "epoch": 0.0009215740484747949, "eval_loss": 1.2275902032852173, "eval_runtime": 1246.2045, "eval_samples_per_second": 7.333, "eval_steps_per_second": 3.666, "step": 20 }, { "epoch": 0.0009676527508985347, "grad_norm": 0.8761299848556519, "learning_rate": 0.00013090169943749476, "loss": 4.7563, "step": 21 }, { "epoch": 0.001105888858169754, "grad_norm": 0.9032831192016602, "learning_rate": 6.909830056250527e-05, "loss": 4.4314, "step": 24 }, { "epoch": 0.0011519675605934938, "eval_loss": 1.2096927165985107, "eval_runtime": 1245.7257, "eval_samples_per_second": 7.335, "eval_steps_per_second": 3.668, "step": 25 }, { "epoch": 0.0012441249654409731, "grad_norm": 1.0080879926681519, "learning_rate": 1.9098300562505266e-05, "loss": 4.7194, "step": 27 }, { "epoch": 0.0013823610727121925, "grad_norm": 0.8476549983024597, "learning_rate": 0.0, "loss": 5.2062, "step": 30 }, { "epoch": 0.0013823610727121925, "eval_loss": 1.2064721584320068, "eval_runtime": 1245.4272, "eval_samples_per_second": 7.337, "eval_steps_per_second": 3.669, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.094783887572992e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }