{ "best_metric": 0.9471560716629028, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.006386511687316388, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.386511687316388e-05, "grad_norm": 0.28274792432785034, "learning_rate": 1.6666666666666668e-07, "loss": 0.8756, "step": 1 }, { "epoch": 6.386511687316388e-05, "eval_loss": 1.0774480104446411, "eval_runtime": 466.7328, "eval_samples_per_second": 14.126, "eval_steps_per_second": 1.768, "step": 1 }, { "epoch": 0.00012773023374632777, "grad_norm": 0.36416366696357727, "learning_rate": 3.3333333333333335e-07, "loss": 0.89, "step": 2 }, { "epoch": 0.00019159535061949164, "grad_norm": 0.3401440978050232, "learning_rate": 5.000000000000001e-07, "loss": 0.8263, "step": 3 }, { "epoch": 0.00025546046749265553, "grad_norm": 0.38306111097335815, "learning_rate": 6.666666666666667e-07, "loss": 0.8554, "step": 4 }, { "epoch": 0.0003193255843658194, "grad_norm": 0.46144235134124756, "learning_rate": 8.333333333333333e-07, "loss": 1.0143, "step": 5 }, { "epoch": 0.00038319070123898327, "grad_norm": 0.4670914113521576, "learning_rate": 1.0000000000000002e-06, "loss": 0.8594, "step": 6 }, { "epoch": 0.00044705581811214717, "grad_norm": 0.4479999244213104, "learning_rate": 1.1666666666666668e-06, "loss": 0.9004, "step": 7 }, { "epoch": 0.0005109209349853111, "grad_norm": 0.4828587770462036, "learning_rate": 1.3333333333333334e-06, "loss": 0.9055, "step": 8 }, { "epoch": 0.0005747860518584749, "grad_norm": 0.4640510678291321, "learning_rate": 1.5e-06, "loss": 0.994, "step": 9 }, { "epoch": 0.0006386511687316387, "grad_norm": 0.4303988516330719, "learning_rate": 1.6666666666666667e-06, "loss": 0.9769, "step": 10 }, { "epoch": 0.0007025162856048026, "grad_norm": 0.46102985739707947, "learning_rate": 1.8333333333333333e-06, "loss": 0.8656, "step": 11 }, { "epoch": 0.0007663814024779665, "grad_norm": 0.5135804414749146, "learning_rate": 2.0000000000000003e-06, "loss": 0.9238, "step": 12 }, { "epoch": 0.0008302465193511304, "grad_norm": 0.5029675960540771, "learning_rate": 2.166666666666667e-06, "loss": 0.8977, "step": 13 }, { "epoch": 0.0008941116362242943, "grad_norm": 0.474789559841156, "learning_rate": 2.3333333333333336e-06, "loss": 0.9364, "step": 14 }, { "epoch": 0.0009579767530974581, "grad_norm": 0.4973190426826477, "learning_rate": 2.5e-06, "loss": 0.8554, "step": 15 }, { "epoch": 0.0010218418699706221, "grad_norm": 0.5029857158660889, "learning_rate": 2.666666666666667e-06, "loss": 0.9228, "step": 16 }, { "epoch": 0.001085706986843786, "grad_norm": 0.5457301139831543, "learning_rate": 2.8333333333333335e-06, "loss": 0.9745, "step": 17 }, { "epoch": 0.0011495721037169497, "grad_norm": 0.5255910754203796, "learning_rate": 3e-06, "loss": 0.9358, "step": 18 }, { "epoch": 0.0012134372205901136, "grad_norm": 0.5320528745651245, "learning_rate": 3.1666666666666667e-06, "loss": 1.0076, "step": 19 }, { "epoch": 0.0012773023374632775, "grad_norm": 0.5209840536117554, "learning_rate": 3.3333333333333333e-06, "loss": 1.0075, "step": 20 }, { "epoch": 0.0013411674543364414, "grad_norm": 0.5553448796272278, "learning_rate": 3.5e-06, "loss": 0.9764, "step": 21 }, { "epoch": 0.0014050325712096053, "grad_norm": 0.5567176342010498, "learning_rate": 3.6666666666666666e-06, "loss": 0.9495, "step": 22 }, { "epoch": 0.0014688976880827692, "grad_norm": 0.5516341328620911, "learning_rate": 3.833333333333334e-06, "loss": 0.9358, "step": 23 }, { "epoch": 0.001532762804955933, "grad_norm": 0.5788530707359314, "learning_rate": 4.000000000000001e-06, "loss": 0.9492, "step": 24 }, { "epoch": 0.001596627921829097, "grad_norm": 0.5552225112915039, "learning_rate": 4.166666666666667e-06, "loss": 1.0613, "step": 25 }, { "epoch": 0.0016604930387022609, "grad_norm": 0.577714204788208, "learning_rate": 4.333333333333334e-06, "loss": 0.9998, "step": 26 }, { "epoch": 0.0017243581555754248, "grad_norm": 0.6057453751564026, "learning_rate": 4.5e-06, "loss": 1.0597, "step": 27 }, { "epoch": 0.0017882232724485887, "grad_norm": 0.6490088105201721, "learning_rate": 4.666666666666667e-06, "loss": 1.0991, "step": 28 }, { "epoch": 0.0018520883893217524, "grad_norm": 0.7101827263832092, "learning_rate": 4.833333333333333e-06, "loss": 1.1455, "step": 29 }, { "epoch": 0.0019159535061949162, "grad_norm": 0.5995752215385437, "learning_rate": 5e-06, "loss": 1.0078, "step": 30 }, { "epoch": 0.00197981862306808, "grad_norm": 0.6341984272003174, "learning_rate": 4.997482666353287e-06, "loss": 1.0688, "step": 31 }, { "epoch": 0.0020436837399412443, "grad_norm": 0.6025628447532654, "learning_rate": 4.989935734988098e-06, "loss": 0.9718, "step": 32 }, { "epoch": 0.002107548856814408, "grad_norm": 0.6797362565994263, "learning_rate": 4.977374404419838e-06, "loss": 1.0952, "step": 33 }, { "epoch": 0.002171413973687572, "grad_norm": 0.7258560657501221, "learning_rate": 4.959823971496575e-06, "loss": 1.1253, "step": 34 }, { "epoch": 0.0022352790905607357, "grad_norm": 0.7295971512794495, "learning_rate": 4.937319780454559e-06, "loss": 1.1276, "step": 35 }, { "epoch": 0.0022991442074338994, "grad_norm": 0.804185688495636, "learning_rate": 4.909907151739634e-06, "loss": 1.188, "step": 36 }, { "epoch": 0.0023630093243070635, "grad_norm": 0.7474485039710999, "learning_rate": 4.8776412907378845e-06, "loss": 1.1521, "step": 37 }, { "epoch": 0.002426874441180227, "grad_norm": 0.8790514469146729, "learning_rate": 4.8405871765993435e-06, "loss": 1.247, "step": 38 }, { "epoch": 0.0024907395580533913, "grad_norm": 0.8702276945114136, "learning_rate": 4.7988194313786275e-06, "loss": 1.278, "step": 39 }, { "epoch": 0.002554604674926555, "grad_norm": 0.742009699344635, "learning_rate": 4.752422169756048e-06, "loss": 1.1468, "step": 40 }, { "epoch": 0.002618469791799719, "grad_norm": 0.937059760093689, "learning_rate": 4.701488829641845e-06, "loss": 1.2915, "step": 41 }, { "epoch": 0.002682334908672883, "grad_norm": 0.8140389919281006, "learning_rate": 4.646121984004666e-06, "loss": 1.2823, "step": 42 }, { "epoch": 0.002746200025546047, "grad_norm": 1.0560404062271118, "learning_rate": 4.586433134303257e-06, "loss": 1.3401, "step": 43 }, { "epoch": 0.0028100651424192106, "grad_norm": 0.9050847887992859, "learning_rate": 4.522542485937369e-06, "loss": 1.2105, "step": 44 }, { "epoch": 0.0028739302592923747, "grad_norm": 0.9784116744995117, "learning_rate": 4.454578706170075e-06, "loss": 1.262, "step": 45 }, { "epoch": 0.0029377953761655384, "grad_norm": 0.9561217427253723, "learning_rate": 4.382678665009028e-06, "loss": 1.2848, "step": 46 }, { "epoch": 0.003001660493038702, "grad_norm": 0.9958779811859131, "learning_rate": 4.3069871595684795e-06, "loss": 1.2555, "step": 47 }, { "epoch": 0.003065525609911866, "grad_norm": 0.9706637263298035, "learning_rate": 4.227656622467162e-06, "loss": 1.2734, "step": 48 }, { "epoch": 0.00312939072678503, "grad_norm": 1.050161600112915, "learning_rate": 4.144846814849282e-06, "loss": 1.3141, "step": 49 }, { "epoch": 0.003193255843658194, "grad_norm": 0.9445985555648804, "learning_rate": 4.058724504646834e-06, "loss": 1.3076, "step": 50 }, { "epoch": 0.003193255843658194, "eval_loss": 1.0112289190292358, "eval_runtime": 468.8993, "eval_samples_per_second": 14.061, "eval_steps_per_second": 1.759, "step": 50 }, { "epoch": 0.0032571209605313576, "grad_norm": 0.35466328263282776, "learning_rate": 3.969463130731183e-06, "loss": 1.0494, "step": 51 }, { "epoch": 0.0033209860774045218, "grad_norm": 0.4603920578956604, "learning_rate": 3.8772424536302565e-06, "loss": 0.8436, "step": 52 }, { "epoch": 0.0033848511942776854, "grad_norm": 0.4390234351158142, "learning_rate": 3.782248193514766e-06, "loss": 0.8673, "step": 53 }, { "epoch": 0.0034487163111508496, "grad_norm": 0.43319058418273926, "learning_rate": 3.684671656182497e-06, "loss": 1.0298, "step": 54 }, { "epoch": 0.0035125814280240132, "grad_norm": 0.46855083107948303, "learning_rate": 3.5847093477938955e-06, "loss": 0.7543, "step": 55 }, { "epoch": 0.0035764465448971773, "grad_norm": 0.48883286118507385, "learning_rate": 3.4825625791348093e-06, "loss": 0.7879, "step": 56 }, { "epoch": 0.003640311661770341, "grad_norm": 0.4997875392436981, "learning_rate": 3.3784370602033572e-06, "loss": 0.8255, "step": 57 }, { "epoch": 0.0037041767786435047, "grad_norm": 0.48925676941871643, "learning_rate": 3.272542485937369e-06, "loss": 0.8007, "step": 58 }, { "epoch": 0.003768041895516669, "grad_norm": 0.5267242193222046, "learning_rate": 3.165092113916688e-06, "loss": 0.7858, "step": 59 }, { "epoch": 0.0038319070123898325, "grad_norm": 0.5722787380218506, "learning_rate": 3.056302334890786e-06, "loss": 0.832, "step": 60 }, { "epoch": 0.0038957721292629966, "grad_norm": 0.5531828999519348, "learning_rate": 2.946392236996592e-06, "loss": 0.8113, "step": 61 }, { "epoch": 0.00395963724613616, "grad_norm": 0.5620059370994568, "learning_rate": 2.835583164544139e-06, "loss": 0.8526, "step": 62 }, { "epoch": 0.004023502363009324, "grad_norm": 0.5260962247848511, "learning_rate": 2.724098272258584e-06, "loss": 0.8254, "step": 63 }, { "epoch": 0.0040873674798824885, "grad_norm": 0.5846118927001953, "learning_rate": 2.6121620758762877e-06, "loss": 0.8756, "step": 64 }, { "epoch": 0.004151232596755652, "grad_norm": 0.591052770614624, "learning_rate": 2.5e-06, "loss": 0.8552, "step": 65 }, { "epoch": 0.004215097713628816, "grad_norm": 0.5996527671813965, "learning_rate": 2.3878379241237136e-06, "loss": 0.7867, "step": 66 }, { "epoch": 0.0042789628305019796, "grad_norm": 0.6025989055633545, "learning_rate": 2.2759017277414165e-06, "loss": 0.8736, "step": 67 }, { "epoch": 0.004342827947375144, "grad_norm": 0.6134032011032104, "learning_rate": 2.1644168354558623e-06, "loss": 0.8842, "step": 68 }, { "epoch": 0.004406693064248308, "grad_norm": 0.6772037744522095, "learning_rate": 2.053607763003409e-06, "loss": 0.8653, "step": 69 }, { "epoch": 0.0044705581811214715, "grad_norm": 0.554199755191803, "learning_rate": 1.9436976651092143e-06, "loss": 0.8358, "step": 70 }, { "epoch": 0.004534423297994635, "grad_norm": 0.6779680252075195, "learning_rate": 1.8349078860833125e-06, "loss": 0.9094, "step": 71 }, { "epoch": 0.004598288414867799, "grad_norm": 0.6369734406471252, "learning_rate": 1.7274575140626318e-06, "loss": 0.8661, "step": 72 }, { "epoch": 0.004662153531740963, "grad_norm": 0.6391808390617371, "learning_rate": 1.6215629397966432e-06, "loss": 0.9004, "step": 73 }, { "epoch": 0.004726018648614127, "grad_norm": 0.6250472068786621, "learning_rate": 1.5174374208651913e-06, "loss": 0.8611, "step": 74 }, { "epoch": 0.004789883765487291, "grad_norm": 0.7451426386833191, "learning_rate": 1.415290652206105e-06, "loss": 0.9557, "step": 75 }, { "epoch": 0.004853748882360454, "grad_norm": 0.6133967041969299, "learning_rate": 1.3153283438175036e-06, "loss": 0.9089, "step": 76 }, { "epoch": 0.004917613999233619, "grad_norm": 0.7205358743667603, "learning_rate": 1.217751806485235e-06, "loss": 0.9354, "step": 77 }, { "epoch": 0.004981479116106783, "grad_norm": 0.7420779466629028, "learning_rate": 1.122757546369744e-06, "loss": 1.0057, "step": 78 }, { "epoch": 0.005045344232979946, "grad_norm": 0.7057881951332092, "learning_rate": 1.0305368692688175e-06, "loss": 0.9137, "step": 79 }, { "epoch": 0.00510920934985311, "grad_norm": 0.7535944581031799, "learning_rate": 9.412754953531664e-07, "loss": 0.9348, "step": 80 }, { "epoch": 0.005173074466726274, "grad_norm": 0.6707415580749512, "learning_rate": 8.551531851507186e-07, "loss": 0.8909, "step": 81 }, { "epoch": 0.005236939583599438, "grad_norm": 0.7699809074401855, "learning_rate": 7.723433775328385e-07, "loss": 1.0632, "step": 82 }, { "epoch": 0.005300804700472602, "grad_norm": 0.7439144849777222, "learning_rate": 6.930128404315214e-07, "loss": 0.9034, "step": 83 }, { "epoch": 0.005364669817345766, "grad_norm": 0.7623469829559326, "learning_rate": 6.17321334990973e-07, "loss": 0.9428, "step": 84 }, { "epoch": 0.005428534934218929, "grad_norm": 0.7557162642478943, "learning_rate": 5.454212938299256e-07, "loss": 0.9681, "step": 85 }, { "epoch": 0.005492400051092094, "grad_norm": 0.8695307374000549, "learning_rate": 4.774575140626317e-07, "loss": 1.0252, "step": 86 }, { "epoch": 0.0055562651679652575, "grad_norm": 0.8959054946899414, "learning_rate": 4.1356686569674344e-07, "loss": 1.0098, "step": 87 }, { "epoch": 0.005620130284838421, "grad_norm": 0.8213502764701843, "learning_rate": 3.538780159953348e-07, "loss": 1.0395, "step": 88 }, { "epoch": 0.005683995401711585, "grad_norm": 0.8729150891304016, "learning_rate": 2.98511170358155e-07, "loss": 1.0537, "step": 89 }, { "epoch": 0.005747860518584749, "grad_norm": 0.8639337420463562, "learning_rate": 2.4757783024395244e-07, "loss": 1.1122, "step": 90 }, { "epoch": 0.005811725635457913, "grad_norm": 0.8852090239524841, "learning_rate": 2.0118056862137358e-07, "loss": 1.1326, "step": 91 }, { "epoch": 0.005875590752331077, "grad_norm": 1.0262387990951538, "learning_rate": 1.59412823400657e-07, "loss": 1.2442, "step": 92 }, { "epoch": 0.0059394558692042404, "grad_norm": 1.0481868982315063, "learning_rate": 1.223587092621162e-07, "loss": 1.1703, "step": 93 }, { "epoch": 0.006003320986077404, "grad_norm": 1.0297393798828125, "learning_rate": 9.00928482603669e-08, "loss": 1.0855, "step": 94 }, { "epoch": 0.006067186102950569, "grad_norm": 1.0903443098068237, "learning_rate": 6.268021954544095e-08, "loss": 1.1983, "step": 95 }, { "epoch": 0.006131051219823732, "grad_norm": 0.9865908026695251, "learning_rate": 4.017602850342584e-08, "loss": 1.0632, "step": 96 }, { "epoch": 0.006194916336696896, "grad_norm": 1.1608613729476929, "learning_rate": 2.262559558016325e-08, "loss": 1.2043, "step": 97 }, { "epoch": 0.00625878145357006, "grad_norm": 0.9920202493667603, "learning_rate": 1.006426501190233e-08, "loss": 1.073, "step": 98 }, { "epoch": 0.006322646570443224, "grad_norm": 1.018128514289856, "learning_rate": 2.5173336467135266e-09, "loss": 1.1236, "step": 99 }, { "epoch": 0.006386511687316388, "grad_norm": 1.1295783519744873, "learning_rate": 0.0, "loss": 1.2105, "step": 100 }, { "epoch": 0.006386511687316388, "eval_loss": 0.9471560716629028, "eval_runtime": 467.8522, "eval_samples_per_second": 14.092, "eval_steps_per_second": 1.763, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.672899276570624e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }