{ "best_metric": 0.9393484592437744, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.26881720430107525, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002688172043010753, "grad_norm": 2.552518367767334, "learning_rate": 1.008e-05, "loss": 4.2159, "step": 1 }, { "epoch": 0.002688172043010753, "eval_loss": 2.7429797649383545, "eval_runtime": 20.0523, "eval_samples_per_second": 7.83, "eval_steps_per_second": 1.995, "step": 1 }, { "epoch": 0.005376344086021506, "grad_norm": 4.294527053833008, "learning_rate": 2.016e-05, "loss": 5.2935, "step": 2 }, { "epoch": 0.008064516129032258, "grad_norm": 3.8511745929718018, "learning_rate": 3.024e-05, "loss": 4.435, "step": 3 }, { "epoch": 0.010752688172043012, "grad_norm": 4.876188278198242, "learning_rate": 4.032e-05, "loss": 5.0044, "step": 4 }, { "epoch": 0.013440860215053764, "grad_norm": 4.7782392501831055, "learning_rate": 5.04e-05, "loss": 4.3481, "step": 5 }, { "epoch": 0.016129032258064516, "grad_norm": 4.02572774887085, "learning_rate": 6.048e-05, "loss": 3.9645, "step": 6 }, { "epoch": 0.01881720430107527, "grad_norm": 4.530522346496582, "learning_rate": 7.055999999999999e-05, "loss": 3.7968, "step": 7 }, { "epoch": 0.021505376344086023, "grad_norm": 3.8177340030670166, "learning_rate": 8.064e-05, "loss": 3.9752, "step": 8 }, { "epoch": 0.024193548387096774, "grad_norm": 3.42366361618042, "learning_rate": 9.072e-05, "loss": 3.7669, "step": 9 }, { "epoch": 0.026881720430107527, "grad_norm": 3.45857572555542, "learning_rate": 0.0001008, "loss": 3.1495, "step": 10 }, { "epoch": 0.02956989247311828, "grad_norm": 5.064671516418457, "learning_rate": 0.00010026947368421052, "loss": 2.8932, "step": 11 }, { "epoch": 0.03225806451612903, "grad_norm": 7.331480979919434, "learning_rate": 9.973894736842104e-05, "loss": 2.1098, "step": 12 }, { "epoch": 0.03494623655913978, "grad_norm": 6.972163677215576, "learning_rate": 9.920842105263157e-05, "loss": 3.326, "step": 13 }, { "epoch": 0.03763440860215054, "grad_norm": 4.03656005859375, "learning_rate": 9.86778947368421e-05, "loss": 2.4259, "step": 14 }, { "epoch": 0.04032258064516129, "grad_norm": 6.760096073150635, "learning_rate": 9.814736842105264e-05, "loss": 1.9487, "step": 15 }, { "epoch": 0.043010752688172046, "grad_norm": 4.662262439727783, "learning_rate": 9.761684210526316e-05, "loss": 2.489, "step": 16 }, { "epoch": 0.0456989247311828, "grad_norm": 5.427197456359863, "learning_rate": 9.708631578947368e-05, "loss": 2.9028, "step": 17 }, { "epoch": 0.04838709677419355, "grad_norm": 6.550124168395996, "learning_rate": 9.655578947368421e-05, "loss": 2.5639, "step": 18 }, { "epoch": 0.051075268817204304, "grad_norm": 5.742612361907959, "learning_rate": 9.602526315789473e-05, "loss": 1.5072, "step": 19 }, { "epoch": 0.053763440860215055, "grad_norm": 4.321170330047607, "learning_rate": 9.549473684210525e-05, "loss": 1.1754, "step": 20 }, { "epoch": 0.056451612903225805, "grad_norm": 3.095183849334717, "learning_rate": 9.496421052631579e-05, "loss": 1.5671, "step": 21 }, { "epoch": 0.05913978494623656, "grad_norm": 2.853109121322632, "learning_rate": 9.443368421052631e-05, "loss": 1.2742, "step": 22 }, { "epoch": 0.06182795698924731, "grad_norm": 3.149440050125122, "learning_rate": 9.390315789473683e-05, "loss": 2.0117, "step": 23 }, { "epoch": 0.06451612903225806, "grad_norm": 3.0655312538146973, "learning_rate": 9.337263157894737e-05, "loss": 2.2624, "step": 24 }, { "epoch": 0.06720430107526881, "grad_norm": 3.4676096439361572, "learning_rate": 9.28421052631579e-05, "loss": 2.3864, "step": 25 }, { "epoch": 0.06989247311827956, "grad_norm": 3.4489779472351074, "learning_rate": 9.231157894736842e-05, "loss": 1.5537, "step": 26 }, { "epoch": 0.07258064516129033, "grad_norm": 3.5075297355651855, "learning_rate": 9.178105263157895e-05, "loss": 1.6461, "step": 27 }, { "epoch": 0.07526881720430108, "grad_norm": 2.4398374557495117, "learning_rate": 9.125052631578948e-05, "loss": 1.4466, "step": 28 }, { "epoch": 0.07795698924731183, "grad_norm": 3.644092321395874, "learning_rate": 9.072e-05, "loss": 2.507, "step": 29 }, { "epoch": 0.08064516129032258, "grad_norm": 3.263528823852539, "learning_rate": 9.018947368421052e-05, "loss": 1.2544, "step": 30 }, { "epoch": 0.08333333333333333, "grad_norm": 3.7490763664245605, "learning_rate": 8.965894736842104e-05, "loss": 1.2655, "step": 31 }, { "epoch": 0.08602150537634409, "grad_norm": 3.2782695293426514, "learning_rate": 8.912842105263157e-05, "loss": 1.7594, "step": 32 }, { "epoch": 0.08870967741935484, "grad_norm": 2.6544880867004395, "learning_rate": 8.85978947368421e-05, "loss": 1.6763, "step": 33 }, { "epoch": 0.0913978494623656, "grad_norm": 2.461487054824829, "learning_rate": 8.806736842105264e-05, "loss": 1.4153, "step": 34 }, { "epoch": 0.09408602150537634, "grad_norm": 2.037763833999634, "learning_rate": 8.753684210526316e-05, "loss": 1.4954, "step": 35 }, { "epoch": 0.0967741935483871, "grad_norm": 2.2501564025878906, "learning_rate": 8.700631578947369e-05, "loss": 1.8277, "step": 36 }, { "epoch": 0.09946236559139784, "grad_norm": 2.666536331176758, "learning_rate": 8.647578947368421e-05, "loss": 2.2322, "step": 37 }, { "epoch": 0.10215053763440861, "grad_norm": 3.9293577671051025, "learning_rate": 8.594526315789473e-05, "loss": 2.394, "step": 38 }, { "epoch": 0.10483870967741936, "grad_norm": 5.047905921936035, "learning_rate": 8.541473684210525e-05, "loss": 2.2877, "step": 39 }, { "epoch": 0.10752688172043011, "grad_norm": 3.5374960899353027, "learning_rate": 8.488421052631578e-05, "loss": 1.552, "step": 40 }, { "epoch": 0.11021505376344086, "grad_norm": 4.972512722015381, "learning_rate": 8.435368421052631e-05, "loss": 2.5782, "step": 41 }, { "epoch": 0.11290322580645161, "grad_norm": 4.151662349700928, "learning_rate": 8.382315789473684e-05, "loss": 1.3923, "step": 42 }, { "epoch": 0.11559139784946236, "grad_norm": 3.7074525356292725, "learning_rate": 8.329263157894737e-05, "loss": 1.3753, "step": 43 }, { "epoch": 0.11827956989247312, "grad_norm": 3.257704973220825, "learning_rate": 8.27621052631579e-05, "loss": 1.5227, "step": 44 }, { "epoch": 0.12096774193548387, "grad_norm": 2.8200066089630127, "learning_rate": 8.223157894736842e-05, "loss": 1.0166, "step": 45 }, { "epoch": 0.12365591397849462, "grad_norm": 9.245245933532715, "learning_rate": 8.170105263157894e-05, "loss": 3.0691, "step": 46 }, { "epoch": 0.12634408602150538, "grad_norm": 6.105884075164795, "learning_rate": 8.117052631578946e-05, "loss": 2.7272, "step": 47 }, { "epoch": 0.12903225806451613, "grad_norm": 9.19589900970459, "learning_rate": 8.064e-05, "loss": 2.4159, "step": 48 }, { "epoch": 0.13172043010752688, "grad_norm": 6.029378890991211, "learning_rate": 8.010947368421052e-05, "loss": 1.4225, "step": 49 }, { "epoch": 0.13440860215053763, "grad_norm": 4.892293930053711, "learning_rate": 7.957894736842105e-05, "loss": 0.7743, "step": 50 }, { "epoch": 0.13440860215053763, "eval_loss": 1.1576098203659058, "eval_runtime": 20.0295, "eval_samples_per_second": 7.838, "eval_steps_per_second": 1.997, "step": 50 }, { "epoch": 0.13709677419354838, "grad_norm": 3.6493349075317383, "learning_rate": 7.904842105263158e-05, "loss": 3.4341, "step": 51 }, { "epoch": 0.13978494623655913, "grad_norm": 3.946737051010132, "learning_rate": 7.85178947368421e-05, "loss": 4.1712, "step": 52 }, { "epoch": 0.1424731182795699, "grad_norm": 3.0488572120666504, "learning_rate": 7.798736842105263e-05, "loss": 2.7678, "step": 53 }, { "epoch": 0.14516129032258066, "grad_norm": 2.641152858734131, "learning_rate": 7.745684210526315e-05, "loss": 3.4299, "step": 54 }, { "epoch": 0.1478494623655914, "grad_norm": 2.6064324378967285, "learning_rate": 7.692631578947369e-05, "loss": 2.8889, "step": 55 }, { "epoch": 0.15053763440860216, "grad_norm": 2.853044271469116, "learning_rate": 7.639578947368421e-05, "loss": 3.9092, "step": 56 }, { "epoch": 0.1532258064516129, "grad_norm": 3.7264606952667236, "learning_rate": 7.586526315789473e-05, "loss": 3.1663, "step": 57 }, { "epoch": 0.15591397849462366, "grad_norm": 2.3590335845947266, "learning_rate": 7.533473684210526e-05, "loss": 2.5423, "step": 58 }, { "epoch": 0.1586021505376344, "grad_norm": 3.6274800300598145, "learning_rate": 7.480421052631578e-05, "loss": 2.7857, "step": 59 }, { "epoch": 0.16129032258064516, "grad_norm": 3.466926097869873, "learning_rate": 7.427368421052632e-05, "loss": 2.1407, "step": 60 }, { "epoch": 0.1639784946236559, "grad_norm": 2.135305166244507, "learning_rate": 7.374315789473685e-05, "loss": 2.0501, "step": 61 }, { "epoch": 0.16666666666666666, "grad_norm": 2.145434617996216, "learning_rate": 7.321263157894737e-05, "loss": 1.8671, "step": 62 }, { "epoch": 0.1693548387096774, "grad_norm": 2.250318765640259, "learning_rate": 7.26821052631579e-05, "loss": 2.8158, "step": 63 }, { "epoch": 0.17204301075268819, "grad_norm": 2.0222816467285156, "learning_rate": 7.215157894736842e-05, "loss": 1.9672, "step": 64 }, { "epoch": 0.17473118279569894, "grad_norm": 2.0015830993652344, "learning_rate": 7.162105263157894e-05, "loss": 1.7105, "step": 65 }, { "epoch": 0.1774193548387097, "grad_norm": 2.1358511447906494, "learning_rate": 7.109052631578947e-05, "loss": 2.0722, "step": 66 }, { "epoch": 0.18010752688172044, "grad_norm": 2.1164755821228027, "learning_rate": 7.055999999999999e-05, "loss": 1.3778, "step": 67 }, { "epoch": 0.1827956989247312, "grad_norm": 2.6545348167419434, "learning_rate": 7.002947368421052e-05, "loss": 1.5793, "step": 68 }, { "epoch": 0.18548387096774194, "grad_norm": 2.278327703475952, "learning_rate": 6.949894736842105e-05, "loss": 1.3986, "step": 69 }, { "epoch": 0.1881720430107527, "grad_norm": 2.1539969444274902, "learning_rate": 6.896842105263158e-05, "loss": 0.8514, "step": 70 }, { "epoch": 0.19086021505376344, "grad_norm": 2.815715789794922, "learning_rate": 6.843789473684211e-05, "loss": 1.9194, "step": 71 }, { "epoch": 0.1935483870967742, "grad_norm": 1.7789136171340942, "learning_rate": 6.790736842105263e-05, "loss": 1.1168, "step": 72 }, { "epoch": 0.19623655913978494, "grad_norm": 2.7908670902252197, "learning_rate": 6.737684210526315e-05, "loss": 1.6579, "step": 73 }, { "epoch": 0.1989247311827957, "grad_norm": 3.0436408519744873, "learning_rate": 6.684631578947368e-05, "loss": 2.1317, "step": 74 }, { "epoch": 0.20161290322580644, "grad_norm": 2.6586692333221436, "learning_rate": 6.631578947368421e-05, "loss": 1.5078, "step": 75 }, { "epoch": 0.20430107526881722, "grad_norm": 2.3957104682922363, "learning_rate": 6.578526315789473e-05, "loss": 1.4137, "step": 76 }, { "epoch": 0.20698924731182797, "grad_norm": 2.467710256576538, "learning_rate": 6.525473684210526e-05, "loss": 1.5379, "step": 77 }, { "epoch": 0.20967741935483872, "grad_norm": 2.3089401721954346, "learning_rate": 6.47242105263158e-05, "loss": 1.3105, "step": 78 }, { "epoch": 0.21236559139784947, "grad_norm": 3.3448803424835205, "learning_rate": 6.419368421052632e-05, "loss": 2.9907, "step": 79 }, { "epoch": 0.21505376344086022, "grad_norm": 1.918912649154663, "learning_rate": 6.366315789473684e-05, "loss": 1.0538, "step": 80 }, { "epoch": 0.21774193548387097, "grad_norm": 2.1565945148468018, "learning_rate": 6.313263157894736e-05, "loss": 0.8427, "step": 81 }, { "epoch": 0.22043010752688172, "grad_norm": 1.9118677377700806, "learning_rate": 6.26021052631579e-05, "loss": 0.8702, "step": 82 }, { "epoch": 0.22311827956989247, "grad_norm": 2.161041498184204, "learning_rate": 6.207157894736842e-05, "loss": 1.5612, "step": 83 }, { "epoch": 0.22580645161290322, "grad_norm": 1.8799899816513062, "learning_rate": 6.154105263157894e-05, "loss": 1.0472, "step": 84 }, { "epoch": 0.22849462365591397, "grad_norm": 3.115447521209717, "learning_rate": 6.1010526315789474e-05, "loss": 1.3093, "step": 85 }, { "epoch": 0.23118279569892472, "grad_norm": 2.5335817337036133, "learning_rate": 6.048e-05, "loss": 1.5528, "step": 86 }, { "epoch": 0.23387096774193547, "grad_norm": 2.8771631717681885, "learning_rate": 5.994947368421052e-05, "loss": 1.6662, "step": 87 }, { "epoch": 0.23655913978494625, "grad_norm": 3.9417519569396973, "learning_rate": 5.941894736842104e-05, "loss": 1.8012, "step": 88 }, { "epoch": 0.239247311827957, "grad_norm": 3.2698731422424316, "learning_rate": 5.888842105263158e-05, "loss": 2.3075, "step": 89 }, { "epoch": 0.24193548387096775, "grad_norm": 2.6435606479644775, "learning_rate": 5.835789473684211e-05, "loss": 1.3781, "step": 90 }, { "epoch": 0.2446236559139785, "grad_norm": 3.099759817123413, "learning_rate": 5.782736842105263e-05, "loss": 2.1561, "step": 91 }, { "epoch": 0.24731182795698925, "grad_norm": 3.145350217819214, "learning_rate": 5.7296842105263154e-05, "loss": 1.4506, "step": 92 }, { "epoch": 0.25, "grad_norm": 4.324268341064453, "learning_rate": 5.676631578947368e-05, "loss": 0.9423, "step": 93 }, { "epoch": 0.25268817204301075, "grad_norm": 2.9348642826080322, "learning_rate": 5.623578947368421e-05, "loss": 1.7886, "step": 94 }, { "epoch": 0.2553763440860215, "grad_norm": 1.738901138305664, "learning_rate": 5.570526315789474e-05, "loss": 0.676, "step": 95 }, { "epoch": 0.25806451612903225, "grad_norm": 2.8710925579071045, "learning_rate": 5.5174736842105266e-05, "loss": 0.9851, "step": 96 }, { "epoch": 0.260752688172043, "grad_norm": 3.99045729637146, "learning_rate": 5.464421052631579e-05, "loss": 1.8844, "step": 97 }, { "epoch": 0.26344086021505375, "grad_norm": 4.602737903594971, "learning_rate": 5.411368421052631e-05, "loss": 1.3643, "step": 98 }, { "epoch": 0.2661290322580645, "grad_norm": 3.7166576385498047, "learning_rate": 5.358315789473684e-05, "loss": 0.6935, "step": 99 }, { "epoch": 0.26881720430107525, "grad_norm": 3.1870436668395996, "learning_rate": 5.3052631578947364e-05, "loss": 1.2186, "step": 100 }, { "epoch": 0.26881720430107525, "eval_loss": 0.9393484592437744, "eval_runtime": 20.0614, "eval_samples_per_second": 7.826, "eval_steps_per_second": 1.994, "step": 100 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.32627164790784e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }