{ "best_metric": 0.3262763023376465, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 2.3112391930835736, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011527377521613832, "grad_norm": 15.795230865478516, "learning_rate": 1.0100000000000002e-05, "loss": 4.073, "step": 1 }, { "epoch": 0.011527377521613832, "eval_loss": 1.3264315128326416, "eval_runtime": 17.2268, "eval_samples_per_second": 67.685, "eval_steps_per_second": 2.148, "step": 1 }, { "epoch": 0.023054755043227664, "grad_norm": 16.31910514831543, "learning_rate": 2.0200000000000003e-05, "loss": 4.1133, "step": 2 }, { "epoch": 0.0345821325648415, "grad_norm": 9.573389053344727, "learning_rate": 3.0299999999999998e-05, "loss": 3.7994, "step": 3 }, { "epoch": 0.04610951008645533, "grad_norm": 6.555534839630127, "learning_rate": 4.0400000000000006e-05, "loss": 3.7398, "step": 4 }, { "epoch": 0.05763688760806916, "grad_norm": 10.255160331726074, "learning_rate": 5.05e-05, "loss": 4.3368, "step": 5 }, { "epoch": 0.069164265129683, "grad_norm": 15.695327758789062, "learning_rate": 6.0599999999999996e-05, "loss": 5.9102, "step": 6 }, { "epoch": 0.08069164265129683, "grad_norm": 10.010425567626953, "learning_rate": 7.07e-05, "loss": 4.749, "step": 7 }, { "epoch": 0.09221902017291066, "grad_norm": 5.381749153137207, "learning_rate": 8.080000000000001e-05, "loss": 3.3209, "step": 8 }, { "epoch": 0.1037463976945245, "grad_norm": 2.711754083633423, "learning_rate": 9.09e-05, "loss": 3.2273, "step": 9 }, { "epoch": 0.11527377521613832, "grad_norm": 3.078639268875122, "learning_rate": 0.000101, "loss": 3.1344, "step": 10 }, { "epoch": 0.12680115273775217, "grad_norm": 3.792743682861328, "learning_rate": 0.00010046842105263158, "loss": 3.5957, "step": 11 }, { "epoch": 0.138328530259366, "grad_norm": 5.194520473480225, "learning_rate": 9.993684210526315e-05, "loss": 4.1843, "step": 12 }, { "epoch": 0.14985590778097982, "grad_norm": 8.25356674194336, "learning_rate": 9.940526315789473e-05, "loss": 4.6054, "step": 13 }, { "epoch": 0.16138328530259366, "grad_norm": 3.3276405334472656, "learning_rate": 9.887368421052632e-05, "loss": 2.9768, "step": 14 }, { "epoch": 0.1729106628242075, "grad_norm": 2.0909225940704346, "learning_rate": 9.83421052631579e-05, "loss": 2.8321, "step": 15 }, { "epoch": 0.1844380403458213, "grad_norm": 1.5769151449203491, "learning_rate": 9.781052631578948e-05, "loss": 2.6877, "step": 16 }, { "epoch": 0.19596541786743515, "grad_norm": 3.1385788917541504, "learning_rate": 9.727894736842106e-05, "loss": 2.9761, "step": 17 }, { "epoch": 0.207492795389049, "grad_norm": 3.5247676372528076, "learning_rate": 9.674736842105263e-05, "loss": 3.5156, "step": 18 }, { "epoch": 0.21902017291066284, "grad_norm": 6.310729503631592, "learning_rate": 9.621578947368421e-05, "loss": 4.3218, "step": 19 }, { "epoch": 0.23054755043227665, "grad_norm": 3.2671117782592773, "learning_rate": 9.568421052631578e-05, "loss": 2.677, "step": 20 }, { "epoch": 0.2420749279538905, "grad_norm": 2.4811301231384277, "learning_rate": 9.515263157894737e-05, "loss": 2.6409, "step": 21 }, { "epoch": 0.25360230547550433, "grad_norm": 1.5807781219482422, "learning_rate": 9.462105263157895e-05, "loss": 2.4935, "step": 22 }, { "epoch": 0.26512968299711814, "grad_norm": 2.4525790214538574, "learning_rate": 9.408947368421054e-05, "loss": 2.6027, "step": 23 }, { "epoch": 0.276657060518732, "grad_norm": 2.5126707553863525, "learning_rate": 9.355789473684211e-05, "loss": 2.9244, "step": 24 }, { "epoch": 0.2881844380403458, "grad_norm": 5.332083702087402, "learning_rate": 9.302631578947369e-05, "loss": 3.8721, "step": 25 }, { "epoch": 0.29971181556195964, "grad_norm": 2.8693103790283203, "learning_rate": 9.249473684210526e-05, "loss": 2.4373, "step": 26 }, { "epoch": 0.3112391930835735, "grad_norm": 1.3940588235855103, "learning_rate": 9.196315789473685e-05, "loss": 2.3714, "step": 27 }, { "epoch": 0.3227665706051873, "grad_norm": 1.8282368183135986, "learning_rate": 9.143157894736843e-05, "loss": 2.272, "step": 28 }, { "epoch": 0.33429394812680113, "grad_norm": 1.570576786994934, "learning_rate": 9.09e-05, "loss": 2.3689, "step": 29 }, { "epoch": 0.345821325648415, "grad_norm": 1.7571359872817993, "learning_rate": 9.036842105263158e-05, "loss": 2.5752, "step": 30 }, { "epoch": 0.3573487031700288, "grad_norm": 3.287325143814087, "learning_rate": 8.983684210526316e-05, "loss": 2.9271, "step": 31 }, { "epoch": 0.3688760806916426, "grad_norm": 3.290728807449341, "learning_rate": 8.930526315789474e-05, "loss": 2.8079, "step": 32 }, { "epoch": 0.3804034582132565, "grad_norm": 1.4297772645950317, "learning_rate": 8.877368421052632e-05, "loss": 2.2268, "step": 33 }, { "epoch": 0.3919308357348703, "grad_norm": 1.2625175714492798, "learning_rate": 8.82421052631579e-05, "loss": 2.1646, "step": 34 }, { "epoch": 0.4034582132564842, "grad_norm": 1.2002922296524048, "learning_rate": 8.771052631578948e-05, "loss": 2.0982, "step": 35 }, { "epoch": 0.414985590778098, "grad_norm": 1.4599192142486572, "learning_rate": 8.717894736842105e-05, "loss": 2.3861, "step": 36 }, { "epoch": 0.4265129682997118, "grad_norm": 2.3603785037994385, "learning_rate": 8.664736842105263e-05, "loss": 2.7989, "step": 37 }, { "epoch": 0.43804034582132567, "grad_norm": 3.2342662811279297, "learning_rate": 8.61157894736842e-05, "loss": 2.6078, "step": 38 }, { "epoch": 0.4495677233429395, "grad_norm": 1.4671645164489746, "learning_rate": 8.55842105263158e-05, "loss": 2.1885, "step": 39 }, { "epoch": 0.4610951008645533, "grad_norm": 1.2487242221832275, "learning_rate": 8.505263157894737e-05, "loss": 1.9964, "step": 40 }, { "epoch": 0.47262247838616717, "grad_norm": 1.3007112741470337, "learning_rate": 8.452105263157896e-05, "loss": 1.9985, "step": 41 }, { "epoch": 0.484149855907781, "grad_norm": 1.5477087497711182, "learning_rate": 8.398947368421053e-05, "loss": 2.1297, "step": 42 }, { "epoch": 0.4956772334293948, "grad_norm": 2.269618272781372, "learning_rate": 8.345789473684211e-05, "loss": 2.4867, "step": 43 }, { "epoch": 0.5072046109510087, "grad_norm": 3.860551595687866, "learning_rate": 8.292631578947368e-05, "loss": 3.0229, "step": 44 }, { "epoch": 0.5187319884726225, "grad_norm": 1.6341471672058105, "learning_rate": 8.239473684210526e-05, "loss": 1.9718, "step": 45 }, { "epoch": 0.5302593659942363, "grad_norm": 1.2065647840499878, "learning_rate": 8.186315789473683e-05, "loss": 2.0059, "step": 46 }, { "epoch": 0.5417867435158501, "grad_norm": 1.6778544187545776, "learning_rate": 8.133157894736842e-05, "loss": 1.9092, "step": 47 }, { "epoch": 0.553314121037464, "grad_norm": 1.4027705192565918, "learning_rate": 8.080000000000001e-05, "loss": 2.0142, "step": 48 }, { "epoch": 0.5648414985590778, "grad_norm": 1.6712775230407715, "learning_rate": 8.026842105263159e-05, "loss": 2.289, "step": 49 }, { "epoch": 0.5763688760806917, "grad_norm": 3.5697531700134277, "learning_rate": 7.973684210526316e-05, "loss": 2.8535, "step": 50 }, { "epoch": 0.5763688760806917, "eval_loss": 0.522826075553894, "eval_runtime": 16.9076, "eval_samples_per_second": 68.963, "eval_steps_per_second": 2.188, "step": 50 }, { "epoch": 0.5878962536023055, "grad_norm": 3.06416392326355, "learning_rate": 7.920526315789474e-05, "loss": 1.8642, "step": 51 }, { "epoch": 0.5994236311239193, "grad_norm": 1.5283445119857788, "learning_rate": 7.867368421052631e-05, "loss": 1.8985, "step": 52 }, { "epoch": 0.6109510086455331, "grad_norm": 1.7631272077560425, "learning_rate": 7.814210526315789e-05, "loss": 1.7763, "step": 53 }, { "epoch": 0.622478386167147, "grad_norm": 1.3397003412246704, "learning_rate": 7.761052631578946e-05, "loss": 1.9477, "step": 54 }, { "epoch": 0.6340057636887608, "grad_norm": 1.6053922176361084, "learning_rate": 7.707894736842105e-05, "loss": 2.18, "step": 55 }, { "epoch": 0.6455331412103746, "grad_norm": 2.7249755859375, "learning_rate": 7.654736842105264e-05, "loss": 2.3534, "step": 56 }, { "epoch": 0.6570605187319885, "grad_norm": 2.97141432762146, "learning_rate": 7.601578947368422e-05, "loss": 2.4034, "step": 57 }, { "epoch": 0.6685878962536023, "grad_norm": 1.9900548458099365, "learning_rate": 7.548421052631579e-05, "loss": 1.8997, "step": 58 }, { "epoch": 0.6801152737752162, "grad_norm": 1.2884211540222168, "learning_rate": 7.495263157894737e-05, "loss": 1.7887, "step": 59 }, { "epoch": 0.69164265129683, "grad_norm": 1.2060939073562622, "learning_rate": 7.442105263157894e-05, "loss": 1.7919, "step": 60 }, { "epoch": 0.7031700288184438, "grad_norm": 1.6086294651031494, "learning_rate": 7.388947368421053e-05, "loss": 1.947, "step": 61 }, { "epoch": 0.7146974063400576, "grad_norm": 2.0594823360443115, "learning_rate": 7.335789473684211e-05, "loss": 2.2118, "step": 62 }, { "epoch": 0.7262247838616714, "grad_norm": 3.474539041519165, "learning_rate": 7.282631578947368e-05, "loss": 2.4448, "step": 63 }, { "epoch": 0.7377521613832853, "grad_norm": 2.4555892944335938, "learning_rate": 7.229473684210527e-05, "loss": 1.6301, "step": 64 }, { "epoch": 0.7492795389048992, "grad_norm": 1.3167078495025635, "learning_rate": 7.176315789473685e-05, "loss": 1.8001, "step": 65 }, { "epoch": 0.760806916426513, "grad_norm": 1.3120259046554565, "learning_rate": 7.123157894736842e-05, "loss": 1.6406, "step": 66 }, { "epoch": 0.7723342939481268, "grad_norm": 1.2128572463989258, "learning_rate": 7.07e-05, "loss": 1.9768, "step": 67 }, { "epoch": 0.7838616714697406, "grad_norm": 1.7433110475540161, "learning_rate": 7.016842105263159e-05, "loss": 2.0889, "step": 68 }, { "epoch": 0.7953890489913544, "grad_norm": 2.9662296772003174, "learning_rate": 6.963684210526316e-05, "loss": 2.4274, "step": 69 }, { "epoch": 0.8069164265129684, "grad_norm": 1.7265340089797974, "learning_rate": 6.910526315789474e-05, "loss": 1.6667, "step": 70 }, { "epoch": 0.8184438040345822, "grad_norm": 1.2966630458831787, "learning_rate": 6.857368421052631e-05, "loss": 1.5911, "step": 71 }, { "epoch": 0.829971181556196, "grad_norm": 1.1538748741149902, "learning_rate": 6.80421052631579e-05, "loss": 1.623, "step": 72 }, { "epoch": 0.8414985590778098, "grad_norm": 1.2530492544174194, "learning_rate": 6.751052631578948e-05, "loss": 1.6128, "step": 73 }, { "epoch": 0.8530259365994236, "grad_norm": 1.6641517877578735, "learning_rate": 6.697894736842105e-05, "loss": 2.1622, "step": 74 }, { "epoch": 0.8645533141210374, "grad_norm": 2.932410955429077, "learning_rate": 6.644736842105264e-05, "loss": 2.34, "step": 75 }, { "epoch": 0.8760806916426513, "grad_norm": 1.564931869506836, "learning_rate": 6.591578947368422e-05, "loss": 1.5343, "step": 76 }, { "epoch": 0.8876080691642652, "grad_norm": 1.22451913356781, "learning_rate": 6.538421052631579e-05, "loss": 1.5855, "step": 77 }, { "epoch": 0.899135446685879, "grad_norm": 1.0155211687088013, "learning_rate": 6.485263157894737e-05, "loss": 1.4917, "step": 78 }, { "epoch": 0.9106628242074928, "grad_norm": 1.1343241930007935, "learning_rate": 6.432105263157894e-05, "loss": 1.4143, "step": 79 }, { "epoch": 0.9221902017291066, "grad_norm": 1.3770869970321655, "learning_rate": 6.378947368421053e-05, "loss": 1.9376, "step": 80 }, { "epoch": 0.9337175792507204, "grad_norm": 2.184307813644409, "learning_rate": 6.32578947368421e-05, "loss": 2.1743, "step": 81 }, { "epoch": 0.9452449567723343, "grad_norm": 2.2855091094970703, "learning_rate": 6.27263157894737e-05, "loss": 2.0407, "step": 82 }, { "epoch": 0.9567723342939481, "grad_norm": 1.2995476722717285, "learning_rate": 6.219473684210527e-05, "loss": 1.5107, "step": 83 }, { "epoch": 0.968299711815562, "grad_norm": 1.1009643077850342, "learning_rate": 6.166315789473685e-05, "loss": 1.4408, "step": 84 }, { "epoch": 0.9798270893371758, "grad_norm": 1.1911717653274536, "learning_rate": 6.113157894736842e-05, "loss": 1.7328, "step": 85 }, { "epoch": 0.9913544668587896, "grad_norm": 1.749642014503479, "learning_rate": 6.0599999999999996e-05, "loss": 1.8929, "step": 86 }, { "epoch": 1.005763688760807, "grad_norm": 2.45554256439209, "learning_rate": 6.006842105263158e-05, "loss": 1.955, "step": 87 }, { "epoch": 1.0172910662824208, "grad_norm": 1.4383282661437988, "learning_rate": 5.953684210526315e-05, "loss": 1.2303, "step": 88 }, { "epoch": 1.0288184438040346, "grad_norm": 1.0873348712921143, "learning_rate": 5.900526315789474e-05, "loss": 1.2904, "step": 89 }, { "epoch": 1.0403458213256485, "grad_norm": 1.093670129776001, "learning_rate": 5.847368421052632e-05, "loss": 1.342, "step": 90 }, { "epoch": 1.0518731988472623, "grad_norm": 1.1975847482681274, "learning_rate": 5.79421052631579e-05, "loss": 1.5195, "step": 91 }, { "epoch": 1.063400576368876, "grad_norm": 1.6481062173843384, "learning_rate": 5.7410526315789475e-05, "loss": 1.7187, "step": 92 }, { "epoch": 1.07492795389049, "grad_norm": 2.8624935150146484, "learning_rate": 5.687894736842105e-05, "loss": 1.8387, "step": 93 }, { "epoch": 1.0864553314121037, "grad_norm": 1.3205828666687012, "learning_rate": 5.6347368421052625e-05, "loss": 1.3021, "step": 94 }, { "epoch": 1.0979827089337175, "grad_norm": 1.1411229372024536, "learning_rate": 5.5815789473684214e-05, "loss": 1.3437, "step": 95 }, { "epoch": 1.1095100864553313, "grad_norm": 1.0338324308395386, "learning_rate": 5.5284210526315796e-05, "loss": 1.3011, "step": 96 }, { "epoch": 1.1210374639769451, "grad_norm": 1.0531305074691772, "learning_rate": 5.475263157894737e-05, "loss": 1.3966, "step": 97 }, { "epoch": 1.1325648414985592, "grad_norm": 1.6607333421707153, "learning_rate": 5.422105263157895e-05, "loss": 1.7677, "step": 98 }, { "epoch": 1.144092219020173, "grad_norm": 2.978102684020996, "learning_rate": 5.368947368421053e-05, "loss": 2.0096, "step": 99 }, { "epoch": 1.1556195965417868, "grad_norm": 1.1680306196212769, "learning_rate": 5.3157894736842104e-05, "loss": 1.2335, "step": 100 }, { "epoch": 1.1556195965417868, "eval_loss": 0.3946053087711334, "eval_runtime": 17.2978, "eval_samples_per_second": 67.407, "eval_steps_per_second": 2.139, "step": 100 }, { "epoch": 1.1671469740634006, "grad_norm": 1.1594160795211792, "learning_rate": 5.262631578947368e-05, "loss": 1.1509, "step": 101 }, { "epoch": 1.1786743515850144, "grad_norm": 1.061645269393921, "learning_rate": 5.209473684210527e-05, "loss": 1.1868, "step": 102 }, { "epoch": 1.1902017291066282, "grad_norm": 1.021350622177124, "learning_rate": 5.1563157894736844e-05, "loss": 1.2714, "step": 103 }, { "epoch": 1.201729106628242, "grad_norm": 1.3108984231948853, "learning_rate": 5.1031578947368426e-05, "loss": 1.5794, "step": 104 }, { "epoch": 1.2132564841498559, "grad_norm": 2.1817190647125244, "learning_rate": 5.05e-05, "loss": 1.6861, "step": 105 }, { "epoch": 1.2247838616714697, "grad_norm": 2.2876627445220947, "learning_rate": 4.9968421052631576e-05, "loss": 1.647, "step": 106 }, { "epoch": 1.2363112391930835, "grad_norm": 1.1767092943191528, "learning_rate": 4.943684210526316e-05, "loss": 1.1436, "step": 107 }, { "epoch": 1.2478386167146973, "grad_norm": 1.122977614402771, "learning_rate": 4.890526315789474e-05, "loss": 1.2584, "step": 108 }, { "epoch": 1.2593659942363113, "grad_norm": 1.0112011432647705, "learning_rate": 4.8373684210526316e-05, "loss": 1.2698, "step": 109 }, { "epoch": 1.270893371757925, "grad_norm": 1.2474466562271118, "learning_rate": 4.784210526315789e-05, "loss": 1.4298, "step": 110 }, { "epoch": 1.282420749279539, "grad_norm": 2.0961225032806396, "learning_rate": 4.731052631578947e-05, "loss": 1.5249, "step": 111 }, { "epoch": 1.2939481268011528, "grad_norm": 2.892947196960449, "learning_rate": 4.6778947368421055e-05, "loss": 1.7823, "step": 112 }, { "epoch": 1.3054755043227666, "grad_norm": 1.2904059886932373, "learning_rate": 4.624736842105263e-05, "loss": 1.0597, "step": 113 }, { "epoch": 1.3170028818443804, "grad_norm": 1.0860971212387085, "learning_rate": 4.571578947368421e-05, "loss": 1.3479, "step": 114 }, { "epoch": 1.3285302593659942, "grad_norm": 1.0123194456100464, "learning_rate": 4.518421052631579e-05, "loss": 1.1235, "step": 115 }, { "epoch": 1.340057636887608, "grad_norm": 1.1925913095474243, "learning_rate": 4.465263157894737e-05, "loss": 1.3878, "step": 116 }, { "epoch": 1.3515850144092219, "grad_norm": 1.8660753965377808, "learning_rate": 4.412105263157895e-05, "loss": 1.6422, "step": 117 }, { "epoch": 1.3631123919308357, "grad_norm": 2.7606379985809326, "learning_rate": 4.358947368421053e-05, "loss": 1.6949, "step": 118 }, { "epoch": 1.3746397694524495, "grad_norm": 1.5100210905075073, "learning_rate": 4.30578947368421e-05, "loss": 1.0997, "step": 119 }, { "epoch": 1.3861671469740635, "grad_norm": 1.165024995803833, "learning_rate": 4.2526315789473685e-05, "loss": 1.1644, "step": 120 }, { "epoch": 1.397694524495677, "grad_norm": 0.9900702834129333, "learning_rate": 4.199473684210527e-05, "loss": 1.2457, "step": 121 }, { "epoch": 1.4092219020172911, "grad_norm": 1.1262096166610718, "learning_rate": 4.146315789473684e-05, "loss": 1.341, "step": 122 }, { "epoch": 1.420749279538905, "grad_norm": 1.4549776315689087, "learning_rate": 4.093157894736842e-05, "loss": 1.4951, "step": 123 }, { "epoch": 1.4322766570605188, "grad_norm": 2.960393190383911, "learning_rate": 4.0400000000000006e-05, "loss": 1.9611, "step": 124 }, { "epoch": 1.4438040345821326, "grad_norm": 1.1258149147033691, "learning_rate": 3.986842105263158e-05, "loss": 1.2408, "step": 125 }, { "epoch": 1.4553314121037464, "grad_norm": 1.0389220714569092, "learning_rate": 3.933684210526316e-05, "loss": 1.1756, "step": 126 }, { "epoch": 1.4668587896253602, "grad_norm": 1.1349718570709229, "learning_rate": 3.880526315789473e-05, "loss": 1.2792, "step": 127 }, { "epoch": 1.478386167146974, "grad_norm": 1.1235551834106445, "learning_rate": 3.827368421052632e-05, "loss": 1.3884, "step": 128 }, { "epoch": 1.4899135446685878, "grad_norm": 1.4590579271316528, "learning_rate": 3.7742105263157896e-05, "loss": 1.547, "step": 129 }, { "epoch": 1.5014409221902016, "grad_norm": 2.1345937252044678, "learning_rate": 3.721052631578947e-05, "loss": 1.699, "step": 130 }, { "epoch": 1.5129682997118157, "grad_norm": 2.1057217121124268, "learning_rate": 3.6678947368421054e-05, "loss": 1.3336, "step": 131 }, { "epoch": 1.5244956772334293, "grad_norm": 1.0975521802902222, "learning_rate": 3.6147368421052636e-05, "loss": 1.1837, "step": 132 }, { "epoch": 1.5360230547550433, "grad_norm": 1.0290075540542603, "learning_rate": 3.561578947368421e-05, "loss": 1.0588, "step": 133 }, { "epoch": 1.547550432276657, "grad_norm": 1.109420895576477, "learning_rate": 3.508421052631579e-05, "loss": 1.2782, "step": 134 }, { "epoch": 1.559077809798271, "grad_norm": 1.3788710832595825, "learning_rate": 3.455263157894737e-05, "loss": 1.4358, "step": 135 }, { "epoch": 1.5706051873198847, "grad_norm": 1.9282630681991577, "learning_rate": 3.402105263157895e-05, "loss": 1.5525, "step": 136 }, { "epoch": 1.5821325648414986, "grad_norm": 3.0819172859191895, "learning_rate": 3.3489473684210526e-05, "loss": 1.6032, "step": 137 }, { "epoch": 1.5936599423631124, "grad_norm": 1.0416362285614014, "learning_rate": 3.295789473684211e-05, "loss": 1.0864, "step": 138 }, { "epoch": 1.6051873198847262, "grad_norm": 1.0696144104003906, "learning_rate": 3.242631578947368e-05, "loss": 1.0025, "step": 139 }, { "epoch": 1.6167146974063402, "grad_norm": 0.9461542963981628, "learning_rate": 3.1894736842105265e-05, "loss": 0.982, "step": 140 }, { "epoch": 1.6282420749279538, "grad_norm": 1.1103463172912598, "learning_rate": 3.136315789473685e-05, "loss": 1.2823, "step": 141 }, { "epoch": 1.6397694524495678, "grad_norm": 1.6639349460601807, "learning_rate": 3.083157894736842e-05, "loss": 1.486, "step": 142 }, { "epoch": 1.6512968299711814, "grad_norm": 2.9342904090881348, "learning_rate": 3.0299999999999998e-05, "loss": 1.8242, "step": 143 }, { "epoch": 1.6628242074927955, "grad_norm": 1.3234608173370361, "learning_rate": 2.9768421052631577e-05, "loss": 1.1624, "step": 144 }, { "epoch": 1.674351585014409, "grad_norm": 1.2971738576889038, "learning_rate": 2.923684210526316e-05, "loss": 1.1139, "step": 145 }, { "epoch": 1.685878962536023, "grad_norm": 1.0851243734359741, "learning_rate": 2.8705263157894737e-05, "loss": 1.0419, "step": 146 }, { "epoch": 1.697406340057637, "grad_norm": 1.0544915199279785, "learning_rate": 2.8173684210526313e-05, "loss": 1.2045, "step": 147 }, { "epoch": 1.7089337175792507, "grad_norm": 1.4829477071762085, "learning_rate": 2.7642105263157898e-05, "loss": 1.3821, "step": 148 }, { "epoch": 1.7204610951008645, "grad_norm": 2.9280033111572266, "learning_rate": 2.7110526315789473e-05, "loss": 1.8452, "step": 149 }, { "epoch": 1.7319884726224783, "grad_norm": 1.1372859477996826, "learning_rate": 2.6578947368421052e-05, "loss": 1.0575, "step": 150 }, { "epoch": 1.7319884726224783, "eval_loss": 0.3432846665382385, "eval_runtime": 18.7516, "eval_samples_per_second": 62.181, "eval_steps_per_second": 1.973, "step": 150 }, { "epoch": 1.7435158501440924, "grad_norm": 1.023056983947754, "learning_rate": 2.6047368421052634e-05, "loss": 1.0717, "step": 151 }, { "epoch": 1.755043227665706, "grad_norm": 0.9638779759407043, "learning_rate": 2.5515789473684213e-05, "loss": 0.9971, "step": 152 }, { "epoch": 1.76657060518732, "grad_norm": 1.0617165565490723, "learning_rate": 2.4984210526315788e-05, "loss": 1.1202, "step": 153 }, { "epoch": 1.7780979827089336, "grad_norm": 1.5653163194656372, "learning_rate": 2.445263157894737e-05, "loss": 1.4338, "step": 154 }, { "epoch": 1.7896253602305476, "grad_norm": 2.3075835704803467, "learning_rate": 2.3921052631578946e-05, "loss": 1.5135, "step": 155 }, { "epoch": 1.8011527377521612, "grad_norm": 2.41831111907959, "learning_rate": 2.3389473684210528e-05, "loss": 1.4573, "step": 156 }, { "epoch": 1.8126801152737753, "grad_norm": 1.1299927234649658, "learning_rate": 2.2857894736842106e-05, "loss": 0.9739, "step": 157 }, { "epoch": 1.824207492795389, "grad_norm": 0.9729629755020142, "learning_rate": 2.2326315789473685e-05, "loss": 1.1128, "step": 158 }, { "epoch": 1.8357348703170029, "grad_norm": 0.9762557744979858, "learning_rate": 2.1794736842105264e-05, "loss": 1.0709, "step": 159 }, { "epoch": 1.8472622478386167, "grad_norm": 1.2971409559249878, "learning_rate": 2.1263157894736842e-05, "loss": 1.3502, "step": 160 }, { "epoch": 1.8587896253602305, "grad_norm": 1.9666305780410767, "learning_rate": 2.073157894736842e-05, "loss": 1.6911, "step": 161 }, { "epoch": 1.8703170028818443, "grad_norm": 2.376969575881958, "learning_rate": 2.0200000000000003e-05, "loss": 1.433, "step": 162 }, { "epoch": 1.8818443804034581, "grad_norm": 0.978244423866272, "learning_rate": 1.966842105263158e-05, "loss": 1.0352, "step": 163 }, { "epoch": 1.8933717579250722, "grad_norm": 0.9526923298835754, "learning_rate": 1.913684210526316e-05, "loss": 0.9, "step": 164 }, { "epoch": 1.9048991354466858, "grad_norm": 0.9895343780517578, "learning_rate": 1.8605263157894736e-05, "loss": 0.981, "step": 165 }, { "epoch": 1.9164265129682998, "grad_norm": 1.156259536743164, "learning_rate": 1.8073684210526318e-05, "loss": 1.1636, "step": 166 }, { "epoch": 1.9279538904899134, "grad_norm": 1.878818154335022, "learning_rate": 1.7542105263157897e-05, "loss": 1.4938, "step": 167 }, { "epoch": 1.9394812680115274, "grad_norm": 2.605971097946167, "learning_rate": 1.7010526315789475e-05, "loss": 1.4686, "step": 168 }, { "epoch": 1.9510086455331412, "grad_norm": 0.9951879978179932, "learning_rate": 1.6478947368421054e-05, "loss": 1.0558, "step": 169 }, { "epoch": 1.962536023054755, "grad_norm": 0.976740300655365, "learning_rate": 1.5947368421052633e-05, "loss": 1.0558, "step": 170 }, { "epoch": 1.9740634005763689, "grad_norm": 0.9469358325004578, "learning_rate": 1.541578947368421e-05, "loss": 1.0353, "step": 171 }, { "epoch": 1.9855907780979827, "grad_norm": 1.4167894124984741, "learning_rate": 1.4884210526315788e-05, "loss": 1.2964, "step": 172 }, { "epoch": 1.9971181556195965, "grad_norm": 2.729344129562378, "learning_rate": 1.4352631578947369e-05, "loss": 1.7346, "step": 173 }, { "epoch": 2.011527377521614, "grad_norm": 0.9175971746444702, "learning_rate": 1.3821052631578949e-05, "loss": 0.8195, "step": 174 }, { "epoch": 2.0230547550432276, "grad_norm": 0.883823037147522, "learning_rate": 1.3289473684210526e-05, "loss": 1.0166, "step": 175 }, { "epoch": 2.0345821325648417, "grad_norm": 0.8910732865333557, "learning_rate": 1.2757894736842106e-05, "loss": 0.9309, "step": 176 }, { "epoch": 2.0461095100864553, "grad_norm": 0.9672825932502747, "learning_rate": 1.2226315789473685e-05, "loss": 0.9933, "step": 177 }, { "epoch": 2.0576368876080693, "grad_norm": 1.295758605003357, "learning_rate": 1.1694736842105264e-05, "loss": 1.1036, "step": 178 }, { "epoch": 2.069164265129683, "grad_norm": 2.083310127258301, "learning_rate": 1.1163157894736842e-05, "loss": 1.1551, "step": 179 }, { "epoch": 2.080691642651297, "grad_norm": 2.122234344482422, "learning_rate": 1.0631578947368421e-05, "loss": 1.114, "step": 180 }, { "epoch": 2.0922190201729105, "grad_norm": 1.4002490043640137, "learning_rate": 1.0100000000000002e-05, "loss": 0.8692, "step": 181 }, { "epoch": 2.1037463976945245, "grad_norm": 1.2972763776779175, "learning_rate": 9.56842105263158e-06, "loss": 0.8139, "step": 182 }, { "epoch": 2.115273775216138, "grad_norm": 1.2130416631698608, "learning_rate": 9.036842105263159e-06, "loss": 0.9519, "step": 183 }, { "epoch": 2.126801152737752, "grad_norm": 1.3538533449172974, "learning_rate": 8.505263157894738e-06, "loss": 1.1313, "step": 184 }, { "epoch": 2.138328530259366, "grad_norm": 1.8970357179641724, "learning_rate": 7.973684210526316e-06, "loss": 1.1983, "step": 185 }, { "epoch": 2.14985590778098, "grad_norm": 2.499178886413574, "learning_rate": 7.442105263157894e-06, "loss": 1.1611, "step": 186 }, { "epoch": 2.161383285302594, "grad_norm": 0.9207624197006226, "learning_rate": 6.9105263157894745e-06, "loss": 0.7852, "step": 187 }, { "epoch": 2.1729106628242074, "grad_norm": 0.9183100461959839, "learning_rate": 6.378947368421053e-06, "loss": 0.8561, "step": 188 }, { "epoch": 2.1844380403458215, "grad_norm": 0.886722207069397, "learning_rate": 5.847368421052632e-06, "loss": 0.8833, "step": 189 }, { "epoch": 2.195965417867435, "grad_norm": 1.110753059387207, "learning_rate": 5.315789473684211e-06, "loss": 1.1024, "step": 190 }, { "epoch": 2.207492795389049, "grad_norm": 1.6218575239181519, "learning_rate": 4.78421052631579e-06, "loss": 1.0674, "step": 191 }, { "epoch": 2.2190201729106627, "grad_norm": 2.869983673095703, "learning_rate": 4.252631578947369e-06, "loss": 1.2698, "step": 192 }, { "epoch": 2.2305475504322767, "grad_norm": 0.937856912612915, "learning_rate": 3.721052631578947e-06, "loss": 0.916, "step": 193 }, { "epoch": 2.2420749279538903, "grad_norm": 0.8958096504211426, "learning_rate": 3.1894736842105266e-06, "loss": 0.9195, "step": 194 }, { "epoch": 2.2536023054755043, "grad_norm": 0.9468475580215454, "learning_rate": 2.6578947368421053e-06, "loss": 0.9558, "step": 195 }, { "epoch": 2.2651296829971184, "grad_norm": 1.0763096809387207, "learning_rate": 2.1263157894736844e-06, "loss": 1.0005, "step": 196 }, { "epoch": 2.276657060518732, "grad_norm": 1.6214865446090698, "learning_rate": 1.5947368421052633e-06, "loss": 1.2395, "step": 197 }, { "epoch": 2.288184438040346, "grad_norm": 2.93674898147583, "learning_rate": 1.0631578947368422e-06, "loss": 1.3516, "step": 198 }, { "epoch": 2.2997118155619596, "grad_norm": 0.9342450499534607, "learning_rate": 5.315789473684211e-07, "loss": 0.7604, "step": 199 }, { "epoch": 2.3112391930835736, "grad_norm": 0.9478535652160645, "learning_rate": 0.0, "loss": 0.7978, "step": 200 }, { "epoch": 2.3112391930835736, "eval_loss": 0.3262763023376465, "eval_runtime": 17.7432, "eval_samples_per_second": 65.715, "eval_steps_per_second": 2.085, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5438705567323914e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }