{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1088139281828074, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000544069640914037, "eval_loss": 0.22625693678855896, "eval_runtime": 24.4291, "eval_samples_per_second": 15.842, "eval_steps_per_second": 7.941, "step": 1 }, { "epoch": 0.00544069640914037, "grad_norm": 0.32291528582572937, "learning_rate": 0.0002, "loss": 0.3295, "step": 10 }, { "epoch": 0.01088139281828074, "grad_norm": 0.2552948594093323, "learning_rate": 0.0002, "loss": 0.2325, "step": 20 }, { "epoch": 0.01632208922742111, "grad_norm": 0.2857631742954254, "learning_rate": 0.0002, "loss": 0.2269, "step": 30 }, { "epoch": 0.02176278563656148, "grad_norm": 0.22182883322238922, "learning_rate": 0.0002, "loss": 0.1901, "step": 40 }, { "epoch": 0.02720348204570185, "grad_norm": 0.3137857913970947, "learning_rate": 0.0002, "loss": 0.1887, "step": 50 }, { "epoch": 0.02720348204570185, "eval_loss": 0.10863105207681656, "eval_runtime": 24.3415, "eval_samples_per_second": 15.899, "eval_steps_per_second": 7.97, "step": 50 }, { "epoch": 0.03264417845484222, "grad_norm": 0.8275140523910522, "learning_rate": 0.0002, "loss": 0.2106, "step": 60 }, { "epoch": 0.03808487486398259, "grad_norm": 0.3336808681488037, "learning_rate": 0.0002, "loss": 0.1991, "step": 70 }, { "epoch": 0.04352557127312296, "grad_norm": 0.3360205888748169, "learning_rate": 0.0002, "loss": 0.2231, "step": 80 }, { "epoch": 0.04896626768226333, "grad_norm": 0.13263827562332153, "learning_rate": 0.0002, "loss": 0.215, "step": 90 }, { "epoch": 0.0544069640914037, "grad_norm": 0.26391035318374634, "learning_rate": 0.0002, "loss": 0.1878, "step": 100 }, { "epoch": 0.0544069640914037, "eval_loss": 0.10204492509365082, "eval_runtime": 24.3432, "eval_samples_per_second": 15.898, "eval_steps_per_second": 7.969, "step": 100 }, { "epoch": 0.05984766050054407, "grad_norm": 0.21117790043354034, "learning_rate": 0.0002, "loss": 0.2473, "step": 110 }, { "epoch": 0.06528835690968444, "grad_norm": 0.4645533859729767, "learning_rate": 0.0002, "loss": 0.1634, "step": 120 }, { "epoch": 0.07072905331882481, "grad_norm": 0.18397293984889984, "learning_rate": 0.0002, "loss": 0.16, "step": 130 }, { "epoch": 0.07616974972796518, "grad_norm": 0.3006034195423126, "learning_rate": 0.0002, "loss": 0.198, "step": 140 }, { "epoch": 0.08161044613710555, "grad_norm": 0.32205018401145935, "learning_rate": 0.0002, "loss": 0.2153, "step": 150 }, { "epoch": 0.08161044613710555, "eval_loss": 0.09607148170471191, "eval_runtime": 24.3523, "eval_samples_per_second": 15.892, "eval_steps_per_second": 7.966, "step": 150 }, { "epoch": 0.08705114254624592, "grad_norm": 0.4278263449668884, "learning_rate": 0.0002, "loss": 0.1737, "step": 160 }, { "epoch": 0.09249183895538629, "grad_norm": 0.9995900988578796, "learning_rate": 0.0002, "loss": 0.231, "step": 170 }, { "epoch": 0.09793253536452666, "grad_norm": 0.1793416291475296, "learning_rate": 0.0002, "loss": 0.2015, "step": 180 }, { "epoch": 0.10337323177366703, "grad_norm": 0.8814968466758728, "learning_rate": 0.0002, "loss": 0.1999, "step": 190 }, { "epoch": 0.1088139281828074, "grad_norm": 0.2865917682647705, "learning_rate": 0.0002, "loss": 0.2293, "step": 200 }, { "epoch": 0.1088139281828074, "eval_loss": 0.09589679539203644, "eval_runtime": 24.4054, "eval_samples_per_second": 15.857, "eval_steps_per_second": 7.949, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.044558460734669e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }