{ "best_metric": 2.0692508220672607, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 2.9976133651551313, "eval_steps": 50, "global_step": 314, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00954653937947494, "eval_loss": 3.094449996948242, "eval_runtime": 7.3615, "eval_samples_per_second": 24.044, "eval_steps_per_second": 6.113, "step": 1 }, { "epoch": 0.0954653937947494, "grad_norm": 2.3829503059387207, "learning_rate": 0.0002, "loss": 2.5847, "step": 10 }, { "epoch": 0.1909307875894988, "grad_norm": 2.0937304496765137, "learning_rate": 0.00019946649801132427, "loss": 2.1149, "step": 20 }, { "epoch": 0.2863961813842482, "grad_norm": 2.3218345642089844, "learning_rate": 0.00019787168453273544, "loss": 2.1679, "step": 30 }, { "epoch": 0.3818615751789976, "grad_norm": 1.8039714097976685, "learning_rate": 0.00019523257628748146, "loss": 2.1264, "step": 40 }, { "epoch": 0.477326968973747, "grad_norm": 1.8133724927902222, "learning_rate": 0.00019157733266550575, "loss": 1.9072, "step": 50 }, { "epoch": 0.477326968973747, "eval_loss": 2.178868293762207, "eval_runtime": 7.5111, "eval_samples_per_second": 23.565, "eval_steps_per_second": 5.991, "step": 50 }, { "epoch": 0.5727923627684964, "grad_norm": 1.3719878196716309, "learning_rate": 0.0001869449552616367, "loss": 2.3152, "step": 60 }, { "epoch": 0.6682577565632458, "grad_norm": 1.5155035257339478, "learning_rate": 0.0001813848717270195, "loss": 2.0273, "step": 70 }, { "epoch": 0.7637231503579952, "grad_norm": 2.242422342300415, "learning_rate": 0.0001749564083741126, "loss": 2.0894, "step": 80 }, { "epoch": 0.8591885441527446, "grad_norm": 1.8298614025115967, "learning_rate": 0.00016772815716257412, "loss": 2.1092, "step": 90 }, { "epoch": 0.954653937947494, "grad_norm": 1.6426506042480469, "learning_rate": 0.0001597772438203241, "loss": 1.8906, "step": 100 }, { "epoch": 0.954653937947494, "eval_loss": 2.040241003036499, "eval_runtime": 7.4921, "eval_samples_per_second": 23.625, "eval_steps_per_second": 6.006, "step": 100 }, { "epoch": 1.0501193317422435, "grad_norm": 1.5735242366790771, "learning_rate": 0.00015118850490896012, "loss": 2.1002, "step": 110 }, { "epoch": 1.1455847255369929, "grad_norm": 1.3919321298599243, "learning_rate": 0.00014205358261427074, "loss": 1.5763, "step": 120 }, { "epoch": 1.2410501193317423, "grad_norm": 2.7821781635284424, "learning_rate": 0.00013246994692046836, "loss": 1.2649, "step": 130 }, { "epoch": 1.3365155131264916, "grad_norm": 1.6206250190734863, "learning_rate": 0.00012253985560158062, "loss": 1.7724, "step": 140 }, { "epoch": 1.431980906921241, "grad_norm": 2.021432399749756, "learning_rate": 0.00011236926312693479, "loss": 1.4029, "step": 150 }, { "epoch": 1.431980906921241, "eval_loss": 2.0692508220672607, "eval_runtime": 7.5907, "eval_samples_per_second": 23.318, "eval_steps_per_second": 5.928, "step": 150 }, { "epoch": 1.5274463007159904, "grad_norm": 2.0635786056518555, "learning_rate": 0.00010206669012275545, "loss": 1.5269, "step": 160 }, { "epoch": 1.6229116945107398, "grad_norm": 1.6010887622833252, "learning_rate": 9.174206545276677e-05, "loss": 1.579, "step": 170 }, { "epoch": 1.7183770883054894, "grad_norm": 1.7680290937423706, "learning_rate": 8.150555327284417e-05, "loss": 1.2945, "step": 180 }, { "epoch": 1.8138424821002386, "grad_norm": 1.8155319690704346, "learning_rate": 7.146637757508949e-05, "loss": 1.6222, "step": 190 }, { "epoch": 1.9093078758949882, "grad_norm": 1.8103411197662354, "learning_rate": 6.173165676349103e-05, "loss": 1.4044, "step": 200 }, { "epoch": 1.9093078758949882, "eval_loss": 2.026817560195923, "eval_runtime": 7.5202, "eval_samples_per_second": 23.537, "eval_steps_per_second": 5.984, "step": 200 }, { "epoch": 2.0047732696897373, "grad_norm": 1.5236141681671143, "learning_rate": 5.240526069629265e-05, "loss": 1.3441, "step": 210 }, { "epoch": 2.100238663484487, "grad_norm": 1.6003341674804688, "learning_rate": 4.35867023904749e-05, "loss": 1.1384, "step": 220 }, { "epoch": 2.195704057279236, "grad_norm": 2.2577860355377197, "learning_rate": 3.53700762139059e-05, "loss": 0.7451, "step": 230 }, { "epoch": 2.2911694510739857, "grad_norm": 2.3072211742401123, "learning_rate": 2.7843053894693803e-05, "loss": 1.0129, "step": 240 }, { "epoch": 2.386634844868735, "grad_norm": 1.8944575786590576, "learning_rate": 2.1085949060360654e-05, "loss": 0.8775, "step": 250 }, { "epoch": 2.386634844868735, "eval_loss": 2.2578365802764893, "eval_runtime": 7.5056, "eval_samples_per_second": 23.582, "eval_steps_per_second": 5.995, "step": 250 }, { "epoch": 2.4821002386634845, "grad_norm": 2.331746816635132, "learning_rate": 1.5170860288242638e-05, "loss": 0.7135, "step": 260 }, { "epoch": 2.577565632458234, "grad_norm": 2.0088183879852295, "learning_rate": 1.0160901810802115e-05, "loss": 1.0913, "step": 270 }, { "epoch": 2.6730310262529833, "grad_norm": 2.1094515323638916, "learning_rate": 6.109530084257042e-06, "loss": 0.7847, "step": 280 }, { "epoch": 2.7684964200477324, "grad_norm": 2.2581160068511963, "learning_rate": 3.059973406066963e-06, "loss": 0.8564, "step": 290 }, { "epoch": 2.863961813842482, "grad_norm": 2.2174034118652344, "learning_rate": 1.0447706672797264e-06, "loss": 0.9619, "step": 300 }, { "epoch": 2.863961813842482, "eval_loss": 2.261319398880005, "eval_runtime": 7.5074, "eval_samples_per_second": 23.577, "eval_steps_per_second": 5.994, "step": 300 }, { "epoch": 2.9594272076372317, "grad_norm": 2.1443300247192383, "learning_rate": 8.542416126989805e-08, "loss": 0.677, "step": 310 } ], "logging_steps": 10, "max_steps": 314, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 150, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.081848407097344e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }