{ "best_metric": 1.2161424160003662, "best_model_checkpoint": "miner_id_24/checkpoint-300", "epoch": 0.1958863858961802, "eval_steps": 50, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000652954619653934, "eval_loss": 1.7593097686767578, "eval_runtime": 42.0818, "eval_samples_per_second": 61.285, "eval_steps_per_second": 15.327, "step": 1 }, { "epoch": 0.00652954619653934, "grad_norm": 0.1703435182571411, "learning_rate": 0.0002, "loss": 1.3545, "step": 10 }, { "epoch": 0.01305909239307868, "grad_norm": 0.10457056760787964, "learning_rate": 0.0001998582695676762, "loss": 1.3658, "step": 20 }, { "epoch": 0.019588638589618023, "grad_norm": 0.10769687592983246, "learning_rate": 0.00019943348002101371, "loss": 1.409, "step": 30 }, { "epoch": 0.02611818478615736, "grad_norm": 0.23312892019748688, "learning_rate": 0.00019872683547213446, "loss": 1.5118, "step": 40 }, { "epoch": 0.0326477309826967, "grad_norm": 0.8387290239334106, "learning_rate": 0.00019774033898178667, "loss": 1.6039, "step": 50 }, { "epoch": 0.0326477309826967, "eval_loss": 1.4602386951446533, "eval_runtime": 42.1985, "eval_samples_per_second": 61.116, "eval_steps_per_second": 15.285, "step": 50 }, { "epoch": 0.039177277179236046, "grad_norm": 0.09031832218170166, "learning_rate": 0.0001964767868814516, "loss": 1.2569, "step": 60 }, { "epoch": 0.045706823375775384, "grad_norm": 0.09168185293674469, "learning_rate": 0.00019493976084683813, "loss": 1.2857, "step": 70 }, { "epoch": 0.05223636957231472, "grad_norm": 0.1381785273551941, "learning_rate": 0.00019313361774523385, "loss": 1.3339, "step": 80 }, { "epoch": 0.058765915768854066, "grad_norm": 0.3087773323059082, "learning_rate": 0.00019106347728549135, "loss": 1.3948, "step": 90 }, { "epoch": 0.0652954619653934, "grad_norm": 0.6337625980377197, "learning_rate": 0.00018873520750565718, "loss": 1.3688, "step": 100 }, { "epoch": 0.0652954619653934, "eval_loss": 1.3073418140411377, "eval_runtime": 42.0855, "eval_samples_per_second": 61.28, "eval_steps_per_second": 15.326, "step": 100 }, { "epoch": 0.07182500816193274, "grad_norm": 0.15941736102104187, "learning_rate": 0.0001861554081393806, "loss": 1.2118, "step": 110 }, { "epoch": 0.07835455435847209, "grad_norm": 0.07974281907081604, "learning_rate": 0.0001833313919082515, "loss": 1.2545, "step": 120 }, { "epoch": 0.08488410055501143, "grad_norm": 0.14130432903766632, "learning_rate": 0.00018027116379309638, "loss": 1.3471, "step": 130 }, { "epoch": 0.09141364675155077, "grad_norm": 0.31553900241851807, "learning_rate": 0.00017698339834299061, "loss": 1.3692, "step": 140 }, { "epoch": 0.0979431929480901, "grad_norm": 0.9388893246650696, "learning_rate": 0.00017347741508630672, "loss": 1.4355, "step": 150 }, { "epoch": 0.0979431929480901, "eval_loss": 1.267966389656067, "eval_runtime": 42.2941, "eval_samples_per_second": 60.978, "eval_steps_per_second": 15.25, "step": 150 }, { "epoch": 0.10447273914462944, "grad_norm": 0.07010999321937561, "learning_rate": 0.0001697631521134985, "loss": 1.1417, "step": 160 }, { "epoch": 0.1110022853411688, "grad_norm": 0.10216495394706726, "learning_rate": 0.00016585113790650388, "loss": 1.238, "step": 170 }, { "epoch": 0.11753183153770813, "grad_norm": 0.12675678730010986, "learning_rate": 0.0001617524614946192, "loss": 1.3154, "step": 180 }, { "epoch": 0.12406137773424747, "grad_norm": 0.31328052282333374, "learning_rate": 0.0001574787410214407, "loss": 1.3155, "step": 190 }, { "epoch": 0.1305909239307868, "grad_norm": 1.081733226776123, "learning_rate": 0.00015304209081197425, "loss": 1.3807, "step": 200 }, { "epoch": 0.1305909239307868, "eval_loss": 1.2385743856430054, "eval_runtime": 42.1021, "eval_samples_per_second": 61.256, "eval_steps_per_second": 15.32, "step": 200 }, { "epoch": 0.13712047012732614, "grad_norm": 0.06139560416340828, "learning_rate": 0.00014845508703326504, "loss": 1.1561, "step": 210 }, { "epoch": 0.14365001632386548, "grad_norm": 0.07948515564203262, "learning_rate": 0.00014373073204588556, "loss": 1.2563, "step": 220 }, { "epoch": 0.15017956252040482, "grad_norm": 0.14629638195037842, "learning_rate": 0.00013888241754733208, "loss": 1.3074, "step": 230 }, { "epoch": 0.15670910871694418, "grad_norm": 0.3273250162601471, "learning_rate": 0.00013392388661180303, "loss": 1.288, "step": 240 }, { "epoch": 0.16323865491348352, "grad_norm": 1.0025297403335571, "learning_rate": 0.0001288691947339621, "loss": 1.242, "step": 250 }, { "epoch": 0.16323865491348352, "eval_loss": 1.2301549911499023, "eval_runtime": 41.9764, "eval_samples_per_second": 61.439, "eval_steps_per_second": 15.366, "step": 250 }, { "epoch": 0.16976820111002286, "grad_norm": 0.05828835442662239, "learning_rate": 0.0001237326699871115, "loss": 1.1586, "step": 260 }, { "epoch": 0.1762977473065622, "grad_norm": 0.09446071833372116, "learning_rate": 0.00011852887240871145, "loss": 1.2516, "step": 270 }, { "epoch": 0.18282729350310153, "grad_norm": 0.15607990324497223, "learning_rate": 0.00011327255272837221, "loss": 1.2933, "step": 280 }, { "epoch": 0.18935683969964087, "grad_norm": 0.35059285163879395, "learning_rate": 0.00010797861055530831, "loss": 1.2938, "step": 290 }, { "epoch": 0.1958863858961802, "grad_norm": 1.048234462738037, "learning_rate": 0.00010266205214377748, "loss": 1.1819, "step": 300 }, { "epoch": 0.1958863858961802, "eval_loss": 1.2161424160003662, "eval_runtime": 42.3336, "eval_samples_per_second": 60.921, "eval_steps_per_second": 15.236, "step": 300 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.875582621037363e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }