{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04106143817687215, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020530719088436073, "eval_loss": 1.3546793460845947, "eval_runtime": 87.8517, "eval_samples_per_second": 23.346, "eval_steps_per_second": 11.679, "step": 1 }, { "epoch": 0.002053071908843607, "grad_norm": 1.1122969388961792, "learning_rate": 0.0002, "loss": 1.1488, "step": 10 }, { "epoch": 0.004106143817687214, "grad_norm": 0.7866343259811401, "learning_rate": 0.00019863613034027224, "loss": 0.5164, "step": 20 }, { "epoch": 0.006159215726530822, "grad_norm": 0.9349194765090942, "learning_rate": 0.00019458172417006347, "loss": 0.3543, "step": 30 }, { "epoch": 0.008212287635374429, "grad_norm": 0.6453638076782227, "learning_rate": 0.0001879473751206489, "loss": 0.3246, "step": 40 }, { "epoch": 0.010265359544218037, "grad_norm": 0.4768398702144623, "learning_rate": 0.00017891405093963938, "loss": 0.3237, "step": 50 }, { "epoch": 0.010265359544218037, "eval_loss": 0.3221627175807953, "eval_runtime": 86.3643, "eval_samples_per_second": 23.748, "eval_steps_per_second": 11.88, "step": 50 }, { "epoch": 0.012318431453061643, "grad_norm": 0.49055156111717224, "learning_rate": 0.00016772815716257412, "loss": 0.3168, "step": 60 }, { "epoch": 0.014371503361905251, "grad_norm": 0.5417104363441467, "learning_rate": 0.00015469481581224272, "loss": 0.3073, "step": 70 }, { "epoch": 0.016424575270748858, "grad_norm": 0.681601345539093, "learning_rate": 0.00014016954246529696, "loss": 0.3046, "step": 80 }, { "epoch": 0.018477647179592466, "grad_norm": 0.581589937210083, "learning_rate": 0.00012454854871407994, "loss": 0.3075, "step": 90 }, { "epoch": 0.020530719088436074, "grad_norm": 0.5185021758079529, "learning_rate": 0.00010825793454723325, "loss": 0.295, "step": 100 }, { "epoch": 0.020530719088436074, "eval_loss": 0.2953387498855591, "eval_runtime": 86.335, "eval_samples_per_second": 23.756, "eval_steps_per_second": 11.884, "step": 100 }, { "epoch": 0.02258379099727968, "grad_norm": 0.6514111161231995, "learning_rate": 9.174206545276677e-05, "loss": 0.3086, "step": 110 }, { "epoch": 0.024636862906123286, "grad_norm": 0.5423412322998047, "learning_rate": 7.54514512859201e-05, "loss": 0.2823, "step": 120 }, { "epoch": 0.026689934814966895, "grad_norm": 0.43237441778182983, "learning_rate": 5.983045753470308e-05, "loss": 0.2855, "step": 130 }, { "epoch": 0.028743006723810503, "grad_norm": 0.508624792098999, "learning_rate": 4.530518418775733e-05, "loss": 0.2724, "step": 140 }, { "epoch": 0.030796078632654107, "grad_norm": 0.3984101712703705, "learning_rate": 3.227184283742591e-05, "loss": 0.2764, "step": 150 }, { "epoch": 0.030796078632654107, "eval_loss": 0.28801968693733215, "eval_runtime": 86.3387, "eval_samples_per_second": 23.755, "eval_steps_per_second": 11.883, "step": 150 }, { "epoch": 0.032849150541497715, "grad_norm": 0.4992406666278839, "learning_rate": 2.1085949060360654e-05, "loss": 0.2794, "step": 160 }, { "epoch": 0.03490222245034132, "grad_norm": 0.438711017370224, "learning_rate": 1.2052624879351104e-05, "loss": 0.2507, "step": 170 }, { "epoch": 0.03695529435918493, "grad_norm": 0.4992602467536926, "learning_rate": 5.418275829936537e-06, "loss": 0.2777, "step": 180 }, { "epoch": 0.03900836626802854, "grad_norm": 0.4569292366504669, "learning_rate": 1.3638696597277679e-06, "loss": 0.2744, "step": 190 }, { "epoch": 0.04106143817687215, "grad_norm": 0.49360865354537964, "learning_rate": 0.0, "loss": 0.297, "step": 200 }, { "epoch": 0.04106143817687215, "eval_loss": 0.28519004583358765, "eval_runtime": 86.3294, "eval_samples_per_second": 23.758, "eval_steps_per_second": 11.885, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.51479053795328e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }