{ "best_metric": 1.5667701959609985, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.9404388714733543, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006269592476489028, "grad_norm": 0.1956695169210434, "learning_rate": 1.0100000000000002e-05, "loss": 1.4797, "step": 1 }, { "epoch": 0.006269592476489028, "eval_loss": 2.014873743057251, "eval_runtime": 21.7647, "eval_samples_per_second": 98.738, "eval_steps_per_second": 3.124, "step": 1 }, { "epoch": 0.012539184952978056, "grad_norm": 0.21951091289520264, "learning_rate": 2.0200000000000003e-05, "loss": 1.8897, "step": 2 }, { "epoch": 0.018808777429467086, "grad_norm": 0.2182653844356537, "learning_rate": 3.0299999999999998e-05, "loss": 1.8755, "step": 3 }, { "epoch": 0.025078369905956112, "grad_norm": 0.3972433805465698, "learning_rate": 4.0400000000000006e-05, "loss": 2.0184, "step": 4 }, { "epoch": 0.03134796238244514, "grad_norm": 0.7481153607368469, "learning_rate": 5.05e-05, "loss": 2.1242, "step": 5 }, { "epoch": 0.03761755485893417, "grad_norm": 0.6163584589958191, "learning_rate": 6.0599999999999996e-05, "loss": 2.254, "step": 6 }, { "epoch": 0.0438871473354232, "grad_norm": 0.17153511941432953, "learning_rate": 7.07e-05, "loss": 1.4166, "step": 7 }, { "epoch": 0.050156739811912224, "grad_norm": 0.16584382951259613, "learning_rate": 8.080000000000001e-05, "loss": 1.7842, "step": 8 }, { "epoch": 0.05642633228840126, "grad_norm": 0.18644414842128754, "learning_rate": 9.09e-05, "loss": 1.8403, "step": 9 }, { "epoch": 0.06269592476489028, "grad_norm": 0.21256040036678314, "learning_rate": 0.000101, "loss": 1.9229, "step": 10 }, { "epoch": 0.06896551724137931, "grad_norm": 0.452114462852478, "learning_rate": 0.00010046842105263158, "loss": 1.9909, "step": 11 }, { "epoch": 0.07523510971786834, "grad_norm": 0.3007873296737671, "learning_rate": 9.993684210526315e-05, "loss": 1.995, "step": 12 }, { "epoch": 0.08150470219435736, "grad_norm": 0.22591519355773926, "learning_rate": 9.940526315789473e-05, "loss": 1.3466, "step": 13 }, { "epoch": 0.0877742946708464, "grad_norm": 0.22962772846221924, "learning_rate": 9.887368421052632e-05, "loss": 1.5942, "step": 14 }, { "epoch": 0.09404388714733543, "grad_norm": 0.20066531002521515, "learning_rate": 9.83421052631579e-05, "loss": 1.759, "step": 15 }, { "epoch": 0.10031347962382445, "grad_norm": 0.24357616901397705, "learning_rate": 9.781052631578948e-05, "loss": 1.7682, "step": 16 }, { "epoch": 0.10658307210031348, "grad_norm": 0.42487001419067383, "learning_rate": 9.727894736842106e-05, "loss": 1.8977, "step": 17 }, { "epoch": 0.11285266457680251, "grad_norm": 0.5519868731498718, "learning_rate": 9.674736842105263e-05, "loss": 1.8652, "step": 18 }, { "epoch": 0.11912225705329153, "grad_norm": 0.1837894469499588, "learning_rate": 9.621578947368421e-05, "loss": 1.2621, "step": 19 }, { "epoch": 0.12539184952978055, "grad_norm": 0.16041411459445953, "learning_rate": 9.568421052631578e-05, "loss": 1.4973, "step": 20 }, { "epoch": 0.13166144200626959, "grad_norm": 0.1466640830039978, "learning_rate": 9.515263157894737e-05, "loss": 1.6563, "step": 21 }, { "epoch": 0.13793103448275862, "grad_norm": 0.15411381423473358, "learning_rate": 9.462105263157895e-05, "loss": 1.8432, "step": 22 }, { "epoch": 0.14420062695924765, "grad_norm": 0.20786531269550323, "learning_rate": 9.408947368421054e-05, "loss": 1.866, "step": 23 }, { "epoch": 0.15047021943573669, "grad_norm": 0.27923616766929626, "learning_rate": 9.355789473684211e-05, "loss": 1.8369, "step": 24 }, { "epoch": 0.15673981191222572, "grad_norm": 0.45824792981147766, "learning_rate": 9.302631578947369e-05, "loss": 1.9946, "step": 25 }, { "epoch": 0.16300940438871472, "grad_norm": 0.11721207201480865, "learning_rate": 9.249473684210526e-05, "loss": 1.3217, "step": 26 }, { "epoch": 0.16927899686520376, "grad_norm": 0.15526717901229858, "learning_rate": 9.196315789473685e-05, "loss": 1.7139, "step": 27 }, { "epoch": 0.1755485893416928, "grad_norm": 0.15869063138961792, "learning_rate": 9.143157894736843e-05, "loss": 1.7312, "step": 28 }, { "epoch": 0.18181818181818182, "grad_norm": 0.1636732816696167, "learning_rate": 9.09e-05, "loss": 1.7714, "step": 29 }, { "epoch": 0.18808777429467086, "grad_norm": 0.20718851685523987, "learning_rate": 9.036842105263158e-05, "loss": 1.828, "step": 30 }, { "epoch": 0.19435736677115986, "grad_norm": 0.30376043915748596, "learning_rate": 8.983684210526316e-05, "loss": 1.8697, "step": 31 }, { "epoch": 0.2006269592476489, "grad_norm": 0.11045503616333008, "learning_rate": 8.930526315789474e-05, "loss": 1.3708, "step": 32 }, { "epoch": 0.20689655172413793, "grad_norm": 0.14193548262119293, "learning_rate": 8.877368421052632e-05, "loss": 1.6054, "step": 33 }, { "epoch": 0.21316614420062696, "grad_norm": 0.15867547690868378, "learning_rate": 8.82421052631579e-05, "loss": 1.6872, "step": 34 }, { "epoch": 0.219435736677116, "grad_norm": 0.16795365512371063, "learning_rate": 8.771052631578948e-05, "loss": 1.8028, "step": 35 }, { "epoch": 0.22570532915360503, "grad_norm": 0.19065167009830475, "learning_rate": 8.717894736842105e-05, "loss": 1.8083, "step": 36 }, { "epoch": 0.23197492163009403, "grad_norm": 0.23978182673454285, "learning_rate": 8.664736842105263e-05, "loss": 1.8856, "step": 37 }, { "epoch": 0.23824451410658307, "grad_norm": 0.13266095519065857, "learning_rate": 8.61157894736842e-05, "loss": 1.239, "step": 38 }, { "epoch": 0.2445141065830721, "grad_norm": 0.1396464705467224, "learning_rate": 8.55842105263158e-05, "loss": 1.528, "step": 39 }, { "epoch": 0.2507836990595611, "grad_norm": 0.15594753623008728, "learning_rate": 8.505263157894737e-05, "loss": 1.6795, "step": 40 }, { "epoch": 0.25705329153605017, "grad_norm": 0.16817249357700348, "learning_rate": 8.452105263157896e-05, "loss": 1.7164, "step": 41 }, { "epoch": 0.26332288401253917, "grad_norm": 0.18988773226737976, "learning_rate": 8.398947368421053e-05, "loss": 1.7163, "step": 42 }, { "epoch": 0.26959247648902823, "grad_norm": 0.20697522163391113, "learning_rate": 8.345789473684211e-05, "loss": 1.7748, "step": 43 }, { "epoch": 0.27586206896551724, "grad_norm": 0.14124108850955963, "learning_rate": 8.292631578947368e-05, "loss": 1.3559, "step": 44 }, { "epoch": 0.28213166144200624, "grad_norm": 0.10386940836906433, "learning_rate": 8.239473684210526e-05, "loss": 1.4066, "step": 45 }, { "epoch": 0.2884012539184953, "grad_norm": 0.15532410144805908, "learning_rate": 8.186315789473683e-05, "loss": 1.6759, "step": 46 }, { "epoch": 0.2946708463949843, "grad_norm": 0.16163372993469238, "learning_rate": 8.133157894736842e-05, "loss": 1.6765, "step": 47 }, { "epoch": 0.30094043887147337, "grad_norm": 0.18687112629413605, "learning_rate": 8.080000000000001e-05, "loss": 1.7736, "step": 48 }, { "epoch": 0.3072100313479624, "grad_norm": 0.19891808927059174, "learning_rate": 8.026842105263159e-05, "loss": 1.7138, "step": 49 }, { "epoch": 0.31347962382445144, "grad_norm": 0.546370267868042, "learning_rate": 7.973684210526316e-05, "loss": 1.8809, "step": 50 }, { "epoch": 0.31347962382445144, "eval_loss": 1.6940747499465942, "eval_runtime": 22.5071, "eval_samples_per_second": 95.481, "eval_steps_per_second": 3.021, "step": 50 }, { "epoch": 0.31974921630094044, "grad_norm": 0.11734400689601898, "learning_rate": 7.920526315789474e-05, "loss": 1.4185, "step": 51 }, { "epoch": 0.32601880877742945, "grad_norm": 0.24825617671012878, "learning_rate": 7.867368421052631e-05, "loss": 1.6477, "step": 52 }, { "epoch": 0.3322884012539185, "grad_norm": 0.2786071002483368, "learning_rate": 7.814210526315789e-05, "loss": 1.6508, "step": 53 }, { "epoch": 0.3385579937304075, "grad_norm": 0.22523944079875946, "learning_rate": 7.761052631578946e-05, "loss": 1.7142, "step": 54 }, { "epoch": 0.3448275862068966, "grad_norm": 0.20191609859466553, "learning_rate": 7.707894736842105e-05, "loss": 1.7316, "step": 55 }, { "epoch": 0.3510971786833856, "grad_norm": 0.2791290283203125, "learning_rate": 7.654736842105264e-05, "loss": 1.754, "step": 56 }, { "epoch": 0.3573667711598746, "grad_norm": 0.10366496443748474, "learning_rate": 7.601578947368422e-05, "loss": 1.2027, "step": 57 }, { "epoch": 0.36363636363636365, "grad_norm": 0.15324437618255615, "learning_rate": 7.548421052631579e-05, "loss": 1.6548, "step": 58 }, { "epoch": 0.36990595611285265, "grad_norm": 0.1608223021030426, "learning_rate": 7.495263157894737e-05, "loss": 1.6525, "step": 59 }, { "epoch": 0.3761755485893417, "grad_norm": 0.20208290219306946, "learning_rate": 7.442105263157894e-05, "loss": 1.6865, "step": 60 }, { "epoch": 0.3824451410658307, "grad_norm": 0.23002368211746216, "learning_rate": 7.388947368421053e-05, "loss": 1.7319, "step": 61 }, { "epoch": 0.3887147335423197, "grad_norm": 0.25719794631004333, "learning_rate": 7.335789473684211e-05, "loss": 1.7345, "step": 62 }, { "epoch": 0.3949843260188088, "grad_norm": 0.11874490976333618, "learning_rate": 7.282631578947368e-05, "loss": 1.1791, "step": 63 }, { "epoch": 0.4012539184952978, "grad_norm": 0.13194750249385834, "learning_rate": 7.229473684210527e-05, "loss": 1.5006, "step": 64 }, { "epoch": 0.40752351097178685, "grad_norm": 0.16368307173252106, "learning_rate": 7.176315789473685e-05, "loss": 1.653, "step": 65 }, { "epoch": 0.41379310344827586, "grad_norm": 0.18413026630878448, "learning_rate": 7.123157894736842e-05, "loss": 1.6872, "step": 66 }, { "epoch": 0.4200626959247649, "grad_norm": 0.23217302560806274, "learning_rate": 7.07e-05, "loss": 1.7738, "step": 67 }, { "epoch": 0.4263322884012539, "grad_norm": 0.21129940450191498, "learning_rate": 7.016842105263159e-05, "loss": 1.6499, "step": 68 }, { "epoch": 0.43260188087774293, "grad_norm": 0.14536724984645844, "learning_rate": 6.963684210526316e-05, "loss": 1.0403, "step": 69 }, { "epoch": 0.438871473354232, "grad_norm": 0.12869617342948914, "learning_rate": 6.910526315789474e-05, "loss": 1.4696, "step": 70 }, { "epoch": 0.445141065830721, "grad_norm": 0.1599850058555603, "learning_rate": 6.857368421052631e-05, "loss": 1.6434, "step": 71 }, { "epoch": 0.45141065830721006, "grad_norm": 0.17733348906040192, "learning_rate": 6.80421052631579e-05, "loss": 1.6627, "step": 72 }, { "epoch": 0.45768025078369906, "grad_norm": 0.1968277245759964, "learning_rate": 6.751052631578948e-05, "loss": 1.641, "step": 73 }, { "epoch": 0.46394984326018807, "grad_norm": 0.2661347985267639, "learning_rate": 6.697894736842105e-05, "loss": 1.6584, "step": 74 }, { "epoch": 0.4702194357366771, "grad_norm": 0.574530303478241, "learning_rate": 6.644736842105264e-05, "loss": 1.8574, "step": 75 }, { "epoch": 0.47648902821316613, "grad_norm": 0.11632688343524933, "learning_rate": 6.591578947368422e-05, "loss": 1.4023, "step": 76 }, { "epoch": 0.4827586206896552, "grad_norm": 0.2113131582736969, "learning_rate": 6.538421052631579e-05, "loss": 1.6341, "step": 77 }, { "epoch": 0.4890282131661442, "grad_norm": 0.2565387189388275, "learning_rate": 6.485263157894737e-05, "loss": 1.6345, "step": 78 }, { "epoch": 0.4952978056426332, "grad_norm": 0.2454068958759308, "learning_rate": 6.432105263157894e-05, "loss": 1.6885, "step": 79 }, { "epoch": 0.5015673981191222, "grad_norm": 0.25979533791542053, "learning_rate": 6.378947368421053e-05, "loss": 1.7208, "step": 80 }, { "epoch": 0.5078369905956113, "grad_norm": 0.30401724576950073, "learning_rate": 6.32578947368421e-05, "loss": 1.7026, "step": 81 }, { "epoch": 0.5141065830721003, "grad_norm": 0.11303433030843735, "learning_rate": 6.27263157894737e-05, "loss": 1.1272, "step": 82 }, { "epoch": 0.5203761755485894, "grad_norm": 0.21448008716106415, "learning_rate": 6.219473684210527e-05, "loss": 1.6056, "step": 83 }, { "epoch": 0.5266457680250783, "grad_norm": 0.18290068209171295, "learning_rate": 6.166315789473685e-05, "loss": 1.6274, "step": 84 }, { "epoch": 0.5329153605015674, "grad_norm": 0.21106426417827606, "learning_rate": 6.113157894736842e-05, "loss": 1.6398, "step": 85 }, { "epoch": 0.5391849529780565, "grad_norm": 0.24094125628471375, "learning_rate": 6.0599999999999996e-05, "loss": 1.7705, "step": 86 }, { "epoch": 0.5454545454545454, "grad_norm": 0.2784807085990906, "learning_rate": 6.006842105263158e-05, "loss": 1.6553, "step": 87 }, { "epoch": 0.5517241379310345, "grad_norm": 0.135740265250206, "learning_rate": 5.953684210526315e-05, "loss": 1.0888, "step": 88 }, { "epoch": 0.5579937304075235, "grad_norm": 0.13163161277770996, "learning_rate": 5.900526315789474e-05, "loss": 1.5075, "step": 89 }, { "epoch": 0.5642633228840125, "grad_norm": 0.1819342076778412, "learning_rate": 5.847368421052632e-05, "loss": 1.6406, "step": 90 }, { "epoch": 0.5705329153605015, "grad_norm": 0.20158928632736206, "learning_rate": 5.79421052631579e-05, "loss": 1.6991, "step": 91 }, { "epoch": 0.5768025078369906, "grad_norm": 0.218618705868721, "learning_rate": 5.7410526315789475e-05, "loss": 1.6872, "step": 92 }, { "epoch": 0.5830721003134797, "grad_norm": 0.2431667596101761, "learning_rate": 5.687894736842105e-05, "loss": 1.6567, "step": 93 }, { "epoch": 0.5893416927899686, "grad_norm": 0.15539388358592987, "learning_rate": 5.6347368421052625e-05, "loss": 1.1379, "step": 94 }, { "epoch": 0.5956112852664577, "grad_norm": 0.14918771386146545, "learning_rate": 5.5815789473684214e-05, "loss": 1.4123, "step": 95 }, { "epoch": 0.6018808777429467, "grad_norm": 0.19607971608638763, "learning_rate": 5.5284210526315796e-05, "loss": 1.6222, "step": 96 }, { "epoch": 0.6081504702194357, "grad_norm": 0.21519030630588531, "learning_rate": 5.475263157894737e-05, "loss": 1.6053, "step": 97 }, { "epoch": 0.6144200626959248, "grad_norm": 0.2148403525352478, "learning_rate": 5.422105263157895e-05, "loss": 1.7118, "step": 98 }, { "epoch": 0.6206896551724138, "grad_norm": 0.24617306888103485, "learning_rate": 5.368947368421053e-05, "loss": 1.6654, "step": 99 }, { "epoch": 0.6269592476489029, "grad_norm": 0.5618337392807007, "learning_rate": 5.3157894736842104e-05, "loss": 1.7828, "step": 100 }, { "epoch": 0.6269592476489029, "eval_loss": 1.6180227994918823, "eval_runtime": 21.2838, "eval_samples_per_second": 100.969, "eval_steps_per_second": 3.195, "step": 100 }, { "epoch": 0.6332288401253918, "grad_norm": 0.12550345063209534, "learning_rate": 5.262631578947368e-05, "loss": 1.1544, "step": 101 }, { "epoch": 0.6394984326018809, "grad_norm": 0.20995737612247467, "learning_rate": 5.209473684210527e-05, "loss": 1.5736, "step": 102 }, { "epoch": 0.64576802507837, "grad_norm": 0.25661516189575195, "learning_rate": 5.1563157894736844e-05, "loss": 1.6052, "step": 103 }, { "epoch": 0.6520376175548589, "grad_norm": 0.2354477196931839, "learning_rate": 5.1031578947368426e-05, "loss": 1.6452, "step": 104 }, { "epoch": 0.658307210031348, "grad_norm": 0.2458198219537735, "learning_rate": 5.05e-05, "loss": 1.7015, "step": 105 }, { "epoch": 0.664576802507837, "grad_norm": 0.33329248428344727, "learning_rate": 4.9968421052631576e-05, "loss": 1.5996, "step": 106 }, { "epoch": 0.670846394984326, "grad_norm": 0.1337110549211502, "learning_rate": 4.943684210526316e-05, "loss": 1.3261, "step": 107 }, { "epoch": 0.677115987460815, "grad_norm": 0.17378275096416473, "learning_rate": 4.890526315789474e-05, "loss": 1.5064, "step": 108 }, { "epoch": 0.6833855799373041, "grad_norm": 0.20934435725212097, "learning_rate": 4.8373684210526316e-05, "loss": 1.6143, "step": 109 }, { "epoch": 0.6896551724137931, "grad_norm": 0.24600297212600708, "learning_rate": 4.784210526315789e-05, "loss": 1.6473, "step": 110 }, { "epoch": 0.6959247648902821, "grad_norm": 0.2352122813463211, "learning_rate": 4.731052631578947e-05, "loss": 1.59, "step": 111 }, { "epoch": 0.7021943573667712, "grad_norm": 0.3226903975009918, "learning_rate": 4.6778947368421055e-05, "loss": 1.5846, "step": 112 }, { "epoch": 0.7084639498432602, "grad_norm": 0.14687702059745789, "learning_rate": 4.624736842105263e-05, "loss": 1.1129, "step": 113 }, { "epoch": 0.7147335423197492, "grad_norm": 0.15256668627262115, "learning_rate": 4.571578947368421e-05, "loss": 1.4691, "step": 114 }, { "epoch": 0.7210031347962382, "grad_norm": 0.20408159494400024, "learning_rate": 4.518421052631579e-05, "loss": 1.6611, "step": 115 }, { "epoch": 0.7272727272727273, "grad_norm": 0.22884103655815125, "learning_rate": 4.465263157894737e-05, "loss": 1.6256, "step": 116 }, { "epoch": 0.7335423197492164, "grad_norm": 0.23959754407405853, "learning_rate": 4.412105263157895e-05, "loss": 1.6332, "step": 117 }, { "epoch": 0.7398119122257053, "grad_norm": 0.2993139624595642, "learning_rate": 4.358947368421053e-05, "loss": 1.6246, "step": 118 }, { "epoch": 0.7460815047021944, "grad_norm": 0.19114616513252258, "learning_rate": 4.30578947368421e-05, "loss": 1.0553, "step": 119 }, { "epoch": 0.7523510971786834, "grad_norm": 0.1459835320711136, "learning_rate": 4.2526315789473685e-05, "loss": 1.432, "step": 120 }, { "epoch": 0.7586206896551724, "grad_norm": 0.19810743629932404, "learning_rate": 4.199473684210527e-05, "loss": 1.5597, "step": 121 }, { "epoch": 0.7648902821316614, "grad_norm": 0.22155524790287018, "learning_rate": 4.146315789473684e-05, "loss": 1.5973, "step": 122 }, { "epoch": 0.7711598746081505, "grad_norm": 0.252210795879364, "learning_rate": 4.093157894736842e-05, "loss": 1.7093, "step": 123 }, { "epoch": 0.7774294670846394, "grad_norm": 0.2699624300003052, "learning_rate": 4.0400000000000006e-05, "loss": 1.6218, "step": 124 }, { "epoch": 0.7836990595611285, "grad_norm": 0.5296167731285095, "learning_rate": 3.986842105263158e-05, "loss": 1.6447, "step": 125 }, { "epoch": 0.7899686520376176, "grad_norm": 0.1305689811706543, "learning_rate": 3.933684210526316e-05, "loss": 1.2415, "step": 126 }, { "epoch": 0.7962382445141066, "grad_norm": 0.19951651990413666, "learning_rate": 3.880526315789473e-05, "loss": 1.5846, "step": 127 }, { "epoch": 0.8025078369905956, "grad_norm": 0.24844390153884888, "learning_rate": 3.827368421052632e-05, "loss": 1.5013, "step": 128 }, { "epoch": 0.8087774294670846, "grad_norm": 0.26770254969596863, "learning_rate": 3.7742105263157896e-05, "loss": 1.6404, "step": 129 }, { "epoch": 0.8150470219435737, "grad_norm": 0.268388956785202, "learning_rate": 3.721052631578947e-05, "loss": 1.6831, "step": 130 }, { "epoch": 0.8213166144200627, "grad_norm": 0.3877102732658386, "learning_rate": 3.6678947368421054e-05, "loss": 1.5713, "step": 131 }, { "epoch": 0.8275862068965517, "grad_norm": 0.14182406663894653, "learning_rate": 3.6147368421052636e-05, "loss": 1.1147, "step": 132 }, { "epoch": 0.8338557993730408, "grad_norm": 0.1862500160932541, "learning_rate": 3.561578947368421e-05, "loss": 1.4998, "step": 133 }, { "epoch": 0.8401253918495298, "grad_norm": 0.22701646387577057, "learning_rate": 3.508421052631579e-05, "loss": 1.5137, "step": 134 }, { "epoch": 0.8463949843260188, "grad_norm": 0.2560668885707855, "learning_rate": 3.455263157894737e-05, "loss": 1.6129, "step": 135 }, { "epoch": 0.8526645768025078, "grad_norm": 0.2551365792751312, "learning_rate": 3.402105263157895e-05, "loss": 1.6454, "step": 136 }, { "epoch": 0.8589341692789969, "grad_norm": 0.331463485956192, "learning_rate": 3.3489473684210526e-05, "loss": 1.5863, "step": 137 }, { "epoch": 0.8652037617554859, "grad_norm": 0.17028988897800446, "learning_rate": 3.295789473684211e-05, "loss": 1.1377, "step": 138 }, { "epoch": 0.8714733542319749, "grad_norm": 0.1787910908460617, "learning_rate": 3.242631578947368e-05, "loss": 1.4605, "step": 139 }, { "epoch": 0.877742946708464, "grad_norm": 0.21535782516002655, "learning_rate": 3.1894736842105265e-05, "loss": 1.5651, "step": 140 }, { "epoch": 0.8840125391849529, "grad_norm": 0.2516005337238312, "learning_rate": 3.136315789473685e-05, "loss": 1.6457, "step": 141 }, { "epoch": 0.890282131661442, "grad_norm": 0.26242345571517944, "learning_rate": 3.083157894736842e-05, "loss": 1.6603, "step": 142 }, { "epoch": 0.896551724137931, "grad_norm": 0.31754815578460693, "learning_rate": 3.0299999999999998e-05, "loss": 1.6406, "step": 143 }, { "epoch": 0.9028213166144201, "grad_norm": 0.19499145448207855, "learning_rate": 2.9768421052631577e-05, "loss": 1.0787, "step": 144 }, { "epoch": 0.9090909090909091, "grad_norm": 0.16461507976055145, "learning_rate": 2.923684210526316e-05, "loss": 1.3539, "step": 145 }, { "epoch": 0.9153605015673981, "grad_norm": 0.231450155377388, "learning_rate": 2.8705263157894737e-05, "loss": 1.6046, "step": 146 }, { "epoch": 0.9216300940438872, "grad_norm": 0.25709661841392517, "learning_rate": 2.8173684210526313e-05, "loss": 1.5514, "step": 147 }, { "epoch": 0.9278996865203761, "grad_norm": 0.26337385177612305, "learning_rate": 2.7642105263157898e-05, "loss": 1.6737, "step": 148 }, { "epoch": 0.9341692789968652, "grad_norm": 0.29333168268203735, "learning_rate": 2.7110526315789473e-05, "loss": 1.5403, "step": 149 }, { "epoch": 0.9404388714733543, "grad_norm": 0.6478084325790405, "learning_rate": 2.6578947368421052e-05, "loss": 1.7032, "step": 150 }, { "epoch": 0.9404388714733543, "eval_loss": 1.5667701959609985, "eval_runtime": 22.4342, "eval_samples_per_second": 95.791, "eval_steps_per_second": 3.031, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6208921818772275e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }