{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08161044613710555, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000544069640914037, "eval_loss": 0.23080985248088837, "eval_runtime": 24.1847, "eval_samples_per_second": 16.002, "eval_steps_per_second": 8.022, "step": 1 }, { "epoch": 0.00544069640914037, "grad_norm": 0.28848734498023987, "learning_rate": 0.0002, "loss": 0.3543, "step": 10 }, { "epoch": 0.01088139281828074, "grad_norm": 0.28202658891677856, "learning_rate": 0.0002, "loss": 0.2399, "step": 20 }, { "epoch": 0.01632208922742111, "grad_norm": 0.39430463314056396, "learning_rate": 0.0002, "loss": 0.2353, "step": 30 }, { "epoch": 0.02176278563656148, "grad_norm": 0.24603690207004547, "learning_rate": 0.0002, "loss": 0.1954, "step": 40 }, { "epoch": 0.02720348204570185, "grad_norm": 0.2858443260192871, "learning_rate": 0.0002, "loss": 0.194, "step": 50 }, { "epoch": 0.02720348204570185, "eval_loss": 0.1075894683599472, "eval_runtime": 24.2053, "eval_samples_per_second": 15.988, "eval_steps_per_second": 8.015, "step": 50 }, { "epoch": 0.03264417845484222, "grad_norm": 1.0796252489089966, "learning_rate": 0.0002, "loss": 0.2065, "step": 60 }, { "epoch": 0.03808487486398259, "grad_norm": 0.3447917103767395, "learning_rate": 0.0002, "loss": 0.1985, "step": 70 }, { "epoch": 0.04352557127312296, "grad_norm": 0.30337709188461304, "learning_rate": 0.0002, "loss": 0.2268, "step": 80 }, { "epoch": 0.04896626768226333, "grad_norm": 0.12297934293746948, "learning_rate": 0.0002, "loss": 0.2141, "step": 90 }, { "epoch": 0.0544069640914037, "grad_norm": 0.2504233121871948, "learning_rate": 0.0002, "loss": 0.185, "step": 100 }, { "epoch": 0.0544069640914037, "eval_loss": 0.10059353709220886, "eval_runtime": 24.2252, "eval_samples_per_second": 15.975, "eval_steps_per_second": 8.008, "step": 100 }, { "epoch": 0.05984766050054407, "grad_norm": 0.21035900712013245, "learning_rate": 0.0002, "loss": 0.2442, "step": 110 }, { "epoch": 0.06528835690968444, "grad_norm": 0.41095128655433655, "learning_rate": 0.0002, "loss": 0.1617, "step": 120 }, { "epoch": 0.07072905331882481, "grad_norm": 0.19263286888599396, "learning_rate": 0.0002, "loss": 0.1611, "step": 130 }, { "epoch": 0.07616974972796518, "grad_norm": 0.24695733189582825, "learning_rate": 0.0002, "loss": 0.1977, "step": 140 }, { "epoch": 0.08161044613710555, "grad_norm": 0.3113032579421997, "learning_rate": 0.0002, "loss": 0.219, "step": 150 }, { "epoch": 0.08161044613710555, "eval_loss": 0.09649600088596344, "eval_runtime": 24.2365, "eval_samples_per_second": 15.968, "eval_steps_per_second": 8.004, "step": 150 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.262621331075891e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }