{ "best_metric": 0.6914480924606323, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.026439288783131735, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013219644391565866, "grad_norm": 0.19538387656211853, "learning_rate": 1e-05, "loss": 0.7056, "step": 1 }, { "epoch": 0.00013219644391565866, "eval_loss": 1.0835129022598267, "eval_runtime": 1275.6627, "eval_samples_per_second": 9.987, "eval_steps_per_second": 2.497, "step": 1 }, { "epoch": 0.0002643928878313173, "grad_norm": 0.24580149352550507, "learning_rate": 2e-05, "loss": 0.782, "step": 2 }, { "epoch": 0.000396589331746976, "grad_norm": 0.266649454832077, "learning_rate": 3e-05, "loss": 0.8056, "step": 3 }, { "epoch": 0.0005287857756626346, "grad_norm": 0.32284072041511536, "learning_rate": 4e-05, "loss": 0.8834, "step": 4 }, { "epoch": 0.0006609822195782933, "grad_norm": 0.24443857371807098, "learning_rate": 5e-05, "loss": 0.8021, "step": 5 }, { "epoch": 0.000793178663493952, "grad_norm": 0.24312937259674072, "learning_rate": 6e-05, "loss": 0.8884, "step": 6 }, { "epoch": 0.0009253751074096107, "grad_norm": 0.2570464611053467, "learning_rate": 7e-05, "loss": 0.8745, "step": 7 }, { "epoch": 0.0010575715513252693, "grad_norm": 0.2607725262641907, "learning_rate": 8e-05, "loss": 0.9049, "step": 8 }, { "epoch": 0.001189767995240928, "grad_norm": 0.27473318576812744, "learning_rate": 9e-05, "loss": 0.9292, "step": 9 }, { "epoch": 0.0013219644391565867, "grad_norm": 0.28504708409309387, "learning_rate": 0.0001, "loss": 0.8395, "step": 10 }, { "epoch": 0.0014541608830722455, "grad_norm": 0.3271239101886749, "learning_rate": 9.999316524962345e-05, "loss": 0.7652, "step": 11 }, { "epoch": 0.001586357326987904, "grad_norm": 0.3310953378677368, "learning_rate": 9.997266286704631e-05, "loss": 0.9367, "step": 12 }, { "epoch": 0.0017185537709035626, "grad_norm": 0.3015696704387665, "learning_rate": 9.993849845741524e-05, "loss": 0.8164, "step": 13 }, { "epoch": 0.0018507502148192214, "grad_norm": 0.32852622866630554, "learning_rate": 9.989068136093873e-05, "loss": 0.9113, "step": 14 }, { "epoch": 0.00198294665873488, "grad_norm": 0.2933748960494995, "learning_rate": 9.98292246503335e-05, "loss": 0.9147, "step": 15 }, { "epoch": 0.0021151431026505386, "grad_norm": 0.2997913062572479, "learning_rate": 9.975414512725057e-05, "loss": 0.7964, "step": 16 }, { "epoch": 0.0022473395465661974, "grad_norm": 0.2818261384963989, "learning_rate": 9.966546331768191e-05, "loss": 0.8466, "step": 17 }, { "epoch": 0.002379535990481856, "grad_norm": 0.31236743927001953, "learning_rate": 9.956320346634876e-05, "loss": 0.8633, "step": 18 }, { "epoch": 0.0025117324343975145, "grad_norm": 0.267844021320343, "learning_rate": 9.944739353007344e-05, "loss": 0.8381, "step": 19 }, { "epoch": 0.0026439288783131733, "grad_norm": 0.25400617718696594, "learning_rate": 9.931806517013612e-05, "loss": 0.8444, "step": 20 }, { "epoch": 0.002776125322228832, "grad_norm": 0.2786080241203308, "learning_rate": 9.917525374361912e-05, "loss": 0.8191, "step": 21 }, { "epoch": 0.002908321766144491, "grad_norm": 0.2764554023742676, "learning_rate": 9.901899829374047e-05, "loss": 0.8549, "step": 22 }, { "epoch": 0.0030405182100601493, "grad_norm": 0.28269195556640625, "learning_rate": 9.884934153917997e-05, "loss": 0.809, "step": 23 }, { "epoch": 0.003172714653975808, "grad_norm": 0.27460262179374695, "learning_rate": 9.86663298624003e-05, "loss": 0.7984, "step": 24 }, { "epoch": 0.003304911097891467, "grad_norm": 0.3024798035621643, "learning_rate": 9.847001329696653e-05, "loss": 0.9123, "step": 25 }, { "epoch": 0.003437107541807125, "grad_norm": 0.3165448307991028, "learning_rate": 9.826044551386744e-05, "loss": 0.9137, "step": 26 }, { "epoch": 0.003569303985722784, "grad_norm": 0.30335527658462524, "learning_rate": 9.803768380684242e-05, "loss": 0.8245, "step": 27 }, { "epoch": 0.003701500429638443, "grad_norm": 0.30884429812431335, "learning_rate": 9.780178907671789e-05, "loss": 0.9312, "step": 28 }, { "epoch": 0.0038336968735541016, "grad_norm": 0.303963303565979, "learning_rate": 9.755282581475769e-05, "loss": 0.9228, "step": 29 }, { "epoch": 0.00396589331746976, "grad_norm": 0.30520156025886536, "learning_rate": 9.729086208503174e-05, "loss": 0.8577, "step": 30 }, { "epoch": 0.004098089761385418, "grad_norm": 0.3221169412136078, "learning_rate": 9.701596950580806e-05, "loss": 0.8526, "step": 31 }, { "epoch": 0.004230286205301077, "grad_norm": 0.3038342297077179, "learning_rate": 9.672822322997305e-05, "loss": 0.9267, "step": 32 }, { "epoch": 0.004362482649216736, "grad_norm": 0.3137074410915375, "learning_rate": 9.642770192448536e-05, "loss": 0.809, "step": 33 }, { "epoch": 0.004494679093132395, "grad_norm": 0.3341916799545288, "learning_rate": 9.611448774886924e-05, "loss": 0.8703, "step": 34 }, { "epoch": 0.0046268755370480535, "grad_norm": 0.34784042835235596, "learning_rate": 9.578866633275288e-05, "loss": 0.8523, "step": 35 }, { "epoch": 0.004759071980963712, "grad_norm": 0.3425697982311249, "learning_rate": 9.545032675245813e-05, "loss": 0.8096, "step": 36 }, { "epoch": 0.004891268424879371, "grad_norm": 0.3550679683685303, "learning_rate": 9.509956150664796e-05, "loss": 0.8501, "step": 37 }, { "epoch": 0.005023464868795029, "grad_norm": 0.35561543703079224, "learning_rate": 9.473646649103818e-05, "loss": 0.8739, "step": 38 }, { "epoch": 0.005155661312710688, "grad_norm": 0.3632775843143463, "learning_rate": 9.43611409721806e-05, "loss": 0.8532, "step": 39 }, { "epoch": 0.005287857756626347, "grad_norm": 0.37438029050827026, "learning_rate": 9.397368756032445e-05, "loss": 0.7684, "step": 40 }, { "epoch": 0.005420054200542005, "grad_norm": 0.3539464771747589, "learning_rate": 9.357421218136386e-05, "loss": 0.809, "step": 41 }, { "epoch": 0.005552250644457664, "grad_norm": 0.3721257448196411, "learning_rate": 9.316282404787871e-05, "loss": 0.8727, "step": 42 }, { "epoch": 0.005684447088373323, "grad_norm": 0.3766726553440094, "learning_rate": 9.273963562927695e-05, "loss": 0.7634, "step": 43 }, { "epoch": 0.005816643532288982, "grad_norm": 0.33808189630508423, "learning_rate": 9.230476262104677e-05, "loss": 0.7711, "step": 44 }, { "epoch": 0.00594883997620464, "grad_norm": 0.37606221437454224, "learning_rate": 9.185832391312644e-05, "loss": 0.7699, "step": 45 }, { "epoch": 0.0060810364201202985, "grad_norm": 0.3631663918495178, "learning_rate": 9.140044155740101e-05, "loss": 0.7655, "step": 46 }, { "epoch": 0.006213232864035957, "grad_norm": 0.39946281909942627, "learning_rate": 9.093124073433463e-05, "loss": 0.7527, "step": 47 }, { "epoch": 0.006345429307951616, "grad_norm": 0.41046342253685, "learning_rate": 9.045084971874738e-05, "loss": 0.7463, "step": 48 }, { "epoch": 0.006477625751867275, "grad_norm": 0.4188183844089508, "learning_rate": 8.995939984474624e-05, "loss": 0.7044, "step": 49 }, { "epoch": 0.006609822195782934, "grad_norm": 0.5937662720680237, "learning_rate": 8.945702546981969e-05, "loss": 0.6353, "step": 50 }, { "epoch": 0.006609822195782934, "eval_loss": 0.7970728278160095, "eval_runtime": 1284.9623, "eval_samples_per_second": 9.915, "eval_steps_per_second": 2.479, "step": 50 }, { "epoch": 0.0067420186396985925, "grad_norm": 0.2786080241203308, "learning_rate": 8.894386393810563e-05, "loss": 0.6512, "step": 51 }, { "epoch": 0.00687421508361425, "grad_norm": 0.27424800395965576, "learning_rate": 8.842005554284296e-05, "loss": 0.6597, "step": 52 }, { "epoch": 0.007006411527529909, "grad_norm": 0.2192063182592392, "learning_rate": 8.788574348801675e-05, "loss": 0.6719, "step": 53 }, { "epoch": 0.007138607971445568, "grad_norm": 0.2032398134469986, "learning_rate": 8.73410738492077e-05, "loss": 0.7025, "step": 54 }, { "epoch": 0.007270804415361227, "grad_norm": 0.19889365136623383, "learning_rate": 8.678619553365659e-05, "loss": 0.7491, "step": 55 }, { "epoch": 0.007403000859276886, "grad_norm": 0.20415586233139038, "learning_rate": 8.622126023955446e-05, "loss": 0.6961, "step": 56 }, { "epoch": 0.007535197303192544, "grad_norm": 0.22474883496761322, "learning_rate": 8.564642241456986e-05, "loss": 0.7191, "step": 57 }, { "epoch": 0.007667393747108203, "grad_norm": 0.1937185525894165, "learning_rate": 8.506183921362443e-05, "loss": 0.7348, "step": 58 }, { "epoch": 0.007799590191023861, "grad_norm": 0.22836388647556305, "learning_rate": 8.44676704559283e-05, "loss": 0.7534, "step": 59 }, { "epoch": 0.00793178663493952, "grad_norm": 0.22243137657642365, "learning_rate": 8.386407858128706e-05, "loss": 0.7747, "step": 60 }, { "epoch": 0.00806398307885518, "grad_norm": 0.2246285378932953, "learning_rate": 8.32512286056924e-05, "loss": 0.752, "step": 61 }, { "epoch": 0.008196179522770837, "grad_norm": 0.22273488342761993, "learning_rate": 8.262928807620843e-05, "loss": 0.8714, "step": 62 }, { "epoch": 0.008328375966686495, "grad_norm": 0.24067454040050507, "learning_rate": 8.199842702516583e-05, "loss": 0.8074, "step": 63 }, { "epoch": 0.008460572410602154, "grad_norm": 0.25970250368118286, "learning_rate": 8.135881792367686e-05, "loss": 0.6916, "step": 64 }, { "epoch": 0.008592768854517813, "grad_norm": 0.26505008339881897, "learning_rate": 8.07106356344834e-05, "loss": 0.8349, "step": 65 }, { "epoch": 0.008724965298433472, "grad_norm": 0.292170912027359, "learning_rate": 8.005405736415126e-05, "loss": 0.8387, "step": 66 }, { "epoch": 0.00885716174234913, "grad_norm": 0.2884085774421692, "learning_rate": 7.938926261462366e-05, "loss": 0.8228, "step": 67 }, { "epoch": 0.00898935818626479, "grad_norm": 0.2931410074234009, "learning_rate": 7.871643313414718e-05, "loss": 0.9008, "step": 68 }, { "epoch": 0.009121554630180448, "grad_norm": 0.2584120035171509, "learning_rate": 7.803575286758364e-05, "loss": 0.7052, "step": 69 }, { "epoch": 0.009253751074096107, "grad_norm": 0.2519213557243347, "learning_rate": 7.734740790612136e-05, "loss": 0.7115, "step": 70 }, { "epoch": 0.009385947518011766, "grad_norm": 0.24533411860466003, "learning_rate": 7.66515864363997e-05, "loss": 0.7453, "step": 71 }, { "epoch": 0.009518143961927425, "grad_norm": 0.2644747197628021, "learning_rate": 7.594847868906076e-05, "loss": 0.8302, "step": 72 }, { "epoch": 0.009650340405843083, "grad_norm": 0.283643513917923, "learning_rate": 7.52382768867422e-05, "loss": 0.7019, "step": 73 }, { "epoch": 0.009782536849758742, "grad_norm": 0.26060959696769714, "learning_rate": 7.452117519152542e-05, "loss": 0.8799, "step": 74 }, { "epoch": 0.009914733293674401, "grad_norm": 0.2720307409763336, "learning_rate": 7.379736965185368e-05, "loss": 0.865, "step": 75 }, { "epoch": 0.010046929737590058, "grad_norm": 0.26768940687179565, "learning_rate": 7.30670581489344e-05, "loss": 0.7436, "step": 76 }, { "epoch": 0.010179126181505717, "grad_norm": 0.27865222096443176, "learning_rate": 7.233044034264034e-05, "loss": 0.8181, "step": 77 }, { "epoch": 0.010311322625421376, "grad_norm": 0.2616931200027466, "learning_rate": 7.158771761692464e-05, "loss": 0.8217, "step": 78 }, { "epoch": 0.010443519069337034, "grad_norm": 0.2953363060951233, "learning_rate": 7.083909302476453e-05, "loss": 0.8209, "step": 79 }, { "epoch": 0.010575715513252693, "grad_norm": 0.295382022857666, "learning_rate": 7.008477123264848e-05, "loss": 0.7624, "step": 80 }, { "epoch": 0.010707911957168352, "grad_norm": 0.30855387449264526, "learning_rate": 6.932495846462261e-05, "loss": 0.8067, "step": 81 }, { "epoch": 0.01084010840108401, "grad_norm": 0.307273268699646, "learning_rate": 6.855986244591104e-05, "loss": 0.8527, "step": 82 }, { "epoch": 0.01097230484499967, "grad_norm": 0.3054741322994232, "learning_rate": 6.778969234612584e-05, "loss": 0.6751, "step": 83 }, { "epoch": 0.011104501288915328, "grad_norm": 0.300057053565979, "learning_rate": 6.701465872208216e-05, "loss": 0.7919, "step": 84 }, { "epoch": 0.011236697732830987, "grad_norm": 0.3191482126712799, "learning_rate": 6.623497346023418e-05, "loss": 0.8327, "step": 85 }, { "epoch": 0.011368894176746646, "grad_norm": 0.3450177311897278, "learning_rate": 6.545084971874738e-05, "loss": 0.7622, "step": 86 }, { "epoch": 0.011501090620662305, "grad_norm": 0.3140789270401001, "learning_rate": 6.466250186922325e-05, "loss": 0.6407, "step": 87 }, { "epoch": 0.011633287064577964, "grad_norm": 0.327387273311615, "learning_rate": 6.387014543809223e-05, "loss": 0.8469, "step": 88 }, { "epoch": 0.01176548350849362, "grad_norm": 0.33975428342819214, "learning_rate": 6.307399704769099e-05, "loss": 0.7539, "step": 89 }, { "epoch": 0.01189767995240928, "grad_norm": 0.3623250722885132, "learning_rate": 6.227427435703997e-05, "loss": 0.7197, "step": 90 }, { "epoch": 0.012029876396324938, "grad_norm": 0.3607431948184967, "learning_rate": 6.147119600233758e-05, "loss": 0.693, "step": 91 }, { "epoch": 0.012162072840240597, "grad_norm": 0.34808632731437683, "learning_rate": 6.066498153718735e-05, "loss": 0.7834, "step": 92 }, { "epoch": 0.012294269284156256, "grad_norm": 0.36442074179649353, "learning_rate": 5.985585137257401e-05, "loss": 0.7211, "step": 93 }, { "epoch": 0.012426465728071915, "grad_norm": 0.42843687534332275, "learning_rate": 5.90440267166055e-05, "loss": 0.7012, "step": 94 }, { "epoch": 0.012558662171987573, "grad_norm": 0.419827401638031, "learning_rate": 5.8229729514036705e-05, "loss": 0.7325, "step": 95 }, { "epoch": 0.012690858615903232, "grad_norm": 0.4016064703464508, "learning_rate": 5.74131823855921e-05, "loss": 0.7227, "step": 96 }, { "epoch": 0.012823055059818891, "grad_norm": 0.5434070229530334, "learning_rate": 5.6594608567103456e-05, "loss": 0.6988, "step": 97 }, { "epoch": 0.01295525150373455, "grad_norm": 0.42109209299087524, "learning_rate": 5.577423184847932e-05, "loss": 0.6488, "step": 98 }, { "epoch": 0.013087447947650209, "grad_norm": 0.4489916265010834, "learning_rate": 5.495227651252315e-05, "loss": 0.6232, "step": 99 }, { "epoch": 0.013219644391565867, "grad_norm": 0.5862768888473511, "learning_rate": 5.4128967273616625e-05, "loss": 0.6915, "step": 100 }, { "epoch": 0.013219644391565867, "eval_loss": 0.7334732413291931, "eval_runtime": 1283.9042, "eval_samples_per_second": 9.923, "eval_steps_per_second": 2.481, "step": 100 }, { "epoch": 0.013351840835481526, "grad_norm": 0.24181707203388214, "learning_rate": 5.330452921628497e-05, "loss": 0.596, "step": 101 }, { "epoch": 0.013484037279397185, "grad_norm": 0.23782025277614594, "learning_rate": 5.247918773366112e-05, "loss": 0.5794, "step": 102 }, { "epoch": 0.013616233723312842, "grad_norm": 0.2528301775455475, "learning_rate": 5.165316846586541e-05, "loss": 0.5885, "step": 103 }, { "epoch": 0.0137484301672285, "grad_norm": 0.21014751493930817, "learning_rate": 5.0826697238317935e-05, "loss": 0.5951, "step": 104 }, { "epoch": 0.01388062661114416, "grad_norm": 0.22236773371696472, "learning_rate": 5e-05, "loss": 0.7092, "step": 105 }, { "epoch": 0.014012823055059818, "grad_norm": 0.21512827277183533, "learning_rate": 4.917330276168208e-05, "loss": 0.6623, "step": 106 }, { "epoch": 0.014145019498975477, "grad_norm": 0.21951237320899963, "learning_rate": 4.834683153413459e-05, "loss": 0.6856, "step": 107 }, { "epoch": 0.014277215942891136, "grad_norm": 0.21105462312698364, "learning_rate": 4.7520812266338885e-05, "loss": 0.5825, "step": 108 }, { "epoch": 0.014409412386806795, "grad_norm": 0.27910059690475464, "learning_rate": 4.669547078371504e-05, "loss": 0.8379, "step": 109 }, { "epoch": 0.014541608830722454, "grad_norm": 0.2623700499534607, "learning_rate": 4.5871032726383386e-05, "loss": 0.7664, "step": 110 }, { "epoch": 0.014673805274638112, "grad_norm": 0.24401693046092987, "learning_rate": 4.504772348747687e-05, "loss": 0.6521, "step": 111 }, { "epoch": 0.014806001718553771, "grad_norm": 0.2733868658542633, "learning_rate": 4.4225768151520694e-05, "loss": 0.6896, "step": 112 }, { "epoch": 0.01493819816246943, "grad_norm": 0.29973694682121277, "learning_rate": 4.3405391432896555e-05, "loss": 0.8034, "step": 113 }, { "epoch": 0.015070394606385089, "grad_norm": 0.2729474604129791, "learning_rate": 4.2586817614407895e-05, "loss": 0.721, "step": 114 }, { "epoch": 0.015202591050300748, "grad_norm": 0.3053736686706543, "learning_rate": 4.17702704859633e-05, "loss": 0.7217, "step": 115 }, { "epoch": 0.015334787494216406, "grad_norm": 0.30556952953338623, "learning_rate": 4.095597328339452e-05, "loss": 0.736, "step": 116 }, { "epoch": 0.015466983938132063, "grad_norm": 0.356710284948349, "learning_rate": 4.0144148627425993e-05, "loss": 0.8814, "step": 117 }, { "epoch": 0.015599180382047722, "grad_norm": 0.28044766187667847, "learning_rate": 3.933501846281267e-05, "loss": 0.7973, "step": 118 }, { "epoch": 0.015731376825963383, "grad_norm": 0.26613718271255493, "learning_rate": 3.852880399766243e-05, "loss": 0.7435, "step": 119 }, { "epoch": 0.01586357326987904, "grad_norm": 0.2693482041358948, "learning_rate": 3.772572564296005e-05, "loss": 0.7187, "step": 120 }, { "epoch": 0.0159957697137947, "grad_norm": 0.278363436460495, "learning_rate": 3.6926002952309016e-05, "loss": 0.6341, "step": 121 }, { "epoch": 0.01612796615771036, "grad_norm": 0.2664760947227478, "learning_rate": 3.612985456190778e-05, "loss": 0.6746, "step": 122 }, { "epoch": 0.016260162601626018, "grad_norm": 0.2626112103462219, "learning_rate": 3.533749813077677e-05, "loss": 0.6684, "step": 123 }, { "epoch": 0.016392359045541673, "grad_norm": 0.2720891237258911, "learning_rate": 3.4549150281252636e-05, "loss": 0.7468, "step": 124 }, { "epoch": 0.016524555489457332, "grad_norm": 0.32452595233917236, "learning_rate": 3.3765026539765834e-05, "loss": 0.8739, "step": 125 }, { "epoch": 0.01665675193337299, "grad_norm": 0.2681269943714142, "learning_rate": 3.298534127791785e-05, "loss": 0.6395, "step": 126 }, { "epoch": 0.01678894837728865, "grad_norm": 0.2877364158630371, "learning_rate": 3.221030765387417e-05, "loss": 0.7308, "step": 127 }, { "epoch": 0.01692114482120431, "grad_norm": 0.28337937593460083, "learning_rate": 3.144013755408895e-05, "loss": 0.6387, "step": 128 }, { "epoch": 0.017053341265119967, "grad_norm": 0.3185059428215027, "learning_rate": 3.0675041535377405e-05, "loss": 0.8061, "step": 129 }, { "epoch": 0.017185537709035626, "grad_norm": 0.2694532573223114, "learning_rate": 2.991522876735154e-05, "loss": 0.6599, "step": 130 }, { "epoch": 0.017317734152951285, "grad_norm": 0.3253261148929596, "learning_rate": 2.916090697523549e-05, "loss": 0.7728, "step": 131 }, { "epoch": 0.017449930596866944, "grad_norm": 0.3177144229412079, "learning_rate": 2.8412282383075363e-05, "loss": 0.7772, "step": 132 }, { "epoch": 0.017582127040782602, "grad_norm": 0.2779819071292877, "learning_rate": 2.766955965735968e-05, "loss": 0.6469, "step": 133 }, { "epoch": 0.01771432348469826, "grad_norm": 0.29959800839424133, "learning_rate": 2.693294185106562e-05, "loss": 0.6401, "step": 134 }, { "epoch": 0.01784651992861392, "grad_norm": 0.3584449887275696, "learning_rate": 2.6202630348146324e-05, "loss": 0.8156, "step": 135 }, { "epoch": 0.01797871637252958, "grad_norm": 0.3743143677711487, "learning_rate": 2.547882480847461e-05, "loss": 0.7496, "step": 136 }, { "epoch": 0.018110912816445238, "grad_norm": 0.37764647603034973, "learning_rate": 2.476172311325783e-05, "loss": 0.8135, "step": 137 }, { "epoch": 0.018243109260360896, "grad_norm": 0.34589067101478577, "learning_rate": 2.405152131093926e-05, "loss": 0.7126, "step": 138 }, { "epoch": 0.018375305704276555, "grad_norm": 0.3663211762905121, "learning_rate": 2.3348413563600325e-05, "loss": 0.7474, "step": 139 }, { "epoch": 0.018507502148192214, "grad_norm": 0.3387898802757263, "learning_rate": 2.2652592093878666e-05, "loss": 0.6598, "step": 140 }, { "epoch": 0.018639698592107873, "grad_norm": 0.44442814588546753, "learning_rate": 2.196424713241637e-05, "loss": 0.7162, "step": 141 }, { "epoch": 0.01877189503602353, "grad_norm": 0.38971763849258423, "learning_rate": 2.128356686585282e-05, "loss": 0.7364, "step": 142 }, { "epoch": 0.01890409147993919, "grad_norm": 0.3823245167732239, "learning_rate": 2.061073738537635e-05, "loss": 0.7215, "step": 143 }, { "epoch": 0.01903628792385485, "grad_norm": 0.40516719222068787, "learning_rate": 1.9945942635848748e-05, "loss": 0.7439, "step": 144 }, { "epoch": 0.019168484367770508, "grad_norm": 0.3880475163459778, "learning_rate": 1.928936436551661e-05, "loss": 0.7098, "step": 145 }, { "epoch": 0.019300680811686167, "grad_norm": 0.39626526832580566, "learning_rate": 1.8641182076323148e-05, "loss": 0.6337, "step": 146 }, { "epoch": 0.019432877255601826, "grad_norm": 0.40710315108299255, "learning_rate": 1.800157297483417e-05, "loss": 0.6118, "step": 147 }, { "epoch": 0.019565073699517484, "grad_norm": 0.40455126762390137, "learning_rate": 1.7370711923791567e-05, "loss": 0.5703, "step": 148 }, { "epoch": 0.019697270143433143, "grad_norm": 0.4407866299152374, "learning_rate": 1.6748771394307585e-05, "loss": 0.5417, "step": 149 }, { "epoch": 0.019829466587348802, "grad_norm": 0.6249489188194275, "learning_rate": 1.6135921418712956e-05, "loss": 0.5839, "step": 150 }, { "epoch": 0.019829466587348802, "eval_loss": 0.7003475427627563, "eval_runtime": 1285.1101, "eval_samples_per_second": 9.914, "eval_steps_per_second": 2.478, "step": 150 }, { "epoch": 0.019961663031264457, "grad_norm": 0.2190771996974945, "learning_rate": 1.553232954407171e-05, "loss": 0.547, "step": 151 }, { "epoch": 0.020093859475180116, "grad_norm": 0.21638715267181396, "learning_rate": 1.4938160786375572e-05, "loss": 0.6147, "step": 152 }, { "epoch": 0.020226055919095775, "grad_norm": 0.23970794677734375, "learning_rate": 1.435357758543015e-05, "loss": 0.6681, "step": 153 }, { "epoch": 0.020358252363011434, "grad_norm": 0.22642923891544342, "learning_rate": 1.3778739760445552e-05, "loss": 0.6036, "step": 154 }, { "epoch": 0.020490448806927093, "grad_norm": 0.2349853813648224, "learning_rate": 1.3213804466343421e-05, "loss": 0.633, "step": 155 }, { "epoch": 0.02062264525084275, "grad_norm": 0.28051039576530457, "learning_rate": 1.2658926150792322e-05, "loss": 0.6268, "step": 156 }, { "epoch": 0.02075484169475841, "grad_norm": 0.2436865270137787, "learning_rate": 1.2114256511983274e-05, "loss": 0.6419, "step": 157 }, { "epoch": 0.02088703813867407, "grad_norm": 0.2448512613773346, "learning_rate": 1.157994445715706e-05, "loss": 0.6428, "step": 158 }, { "epoch": 0.021019234582589728, "grad_norm": 0.26140111684799194, "learning_rate": 1.1056136061894384e-05, "loss": 0.6894, "step": 159 }, { "epoch": 0.021151431026505386, "grad_norm": 0.2788008153438568, "learning_rate": 1.0542974530180327e-05, "loss": 0.7297, "step": 160 }, { "epoch": 0.021283627470421045, "grad_norm": 0.26800769567489624, "learning_rate": 1.0040600155253765e-05, "loss": 0.7211, "step": 161 }, { "epoch": 0.021415823914336704, "grad_norm": 0.25211745500564575, "learning_rate": 9.549150281252633e-06, "loss": 0.686, "step": 162 }, { "epoch": 0.021548020358252363, "grad_norm": 0.31665900349617004, "learning_rate": 9.068759265665384e-06, "loss": 0.6526, "step": 163 }, { "epoch": 0.02168021680216802, "grad_norm": 0.2835245132446289, "learning_rate": 8.599558442598998e-06, "loss": 0.7668, "step": 164 }, { "epoch": 0.02181241324608368, "grad_norm": 0.28432321548461914, "learning_rate": 8.141676086873572e-06, "loss": 0.6613, "step": 165 }, { "epoch": 0.02194460968999934, "grad_norm": 0.309469074010849, "learning_rate": 7.695237378953223e-06, "loss": 0.7844, "step": 166 }, { "epoch": 0.022076806133914998, "grad_norm": 0.3084992468357086, "learning_rate": 7.260364370723044e-06, "loss": 0.7063, "step": 167 }, { "epoch": 0.022209002577830657, "grad_norm": 0.28169193863868713, "learning_rate": 6.837175952121306e-06, "loss": 0.6554, "step": 168 }, { "epoch": 0.022341199021746316, "grad_norm": 0.34954968094825745, "learning_rate": 6.425787818636131e-06, "loss": 0.7327, "step": 169 }, { "epoch": 0.022473395465661974, "grad_norm": 0.26686882972717285, "learning_rate": 6.026312439675552e-06, "loss": 0.6385, "step": 170 }, { "epoch": 0.022605591909577633, "grad_norm": 0.2663428783416748, "learning_rate": 5.6388590278194096e-06, "loss": 0.7073, "step": 171 }, { "epoch": 0.022737788353493292, "grad_norm": 0.2708145081996918, "learning_rate": 5.263533508961827e-06, "loss": 0.679, "step": 172 }, { "epoch": 0.02286998479740895, "grad_norm": 0.28522220253944397, "learning_rate": 4.900438493352055e-06, "loss": 0.7062, "step": 173 }, { "epoch": 0.02300218124132461, "grad_norm": 0.2947212755680084, "learning_rate": 4.549673247541875e-06, "loss": 0.7412, "step": 174 }, { "epoch": 0.02313437768524027, "grad_norm": 0.2984776198863983, "learning_rate": 4.2113336672471245e-06, "loss": 0.6706, "step": 175 }, { "epoch": 0.023266574129155927, "grad_norm": 0.2954590916633606, "learning_rate": 3.885512251130763e-06, "loss": 0.6932, "step": 176 }, { "epoch": 0.023398770573071586, "grad_norm": 0.30990472435951233, "learning_rate": 3.5722980755146517e-06, "loss": 0.7621, "step": 177 }, { "epoch": 0.02353096701698724, "grad_norm": 0.3284866511821747, "learning_rate": 3.271776770026963e-06, "loss": 0.6574, "step": 178 }, { "epoch": 0.0236631634609029, "grad_norm": 0.2818564772605896, "learning_rate": 2.9840304941919415e-06, "loss": 0.6757, "step": 179 }, { "epoch": 0.02379535990481856, "grad_norm": 0.3373962342739105, "learning_rate": 2.7091379149682685e-06, "loss": 0.8335, "step": 180 }, { "epoch": 0.023927556348734218, "grad_norm": 0.31307893991470337, "learning_rate": 2.4471741852423237e-06, "loss": 0.691, "step": 181 }, { "epoch": 0.024059752792649877, "grad_norm": 0.3503880798816681, "learning_rate": 2.1982109232821178e-06, "loss": 0.762, "step": 182 }, { "epoch": 0.024191949236565535, "grad_norm": 0.3093627393245697, "learning_rate": 1.962316193157593e-06, "loss": 0.6622, "step": 183 }, { "epoch": 0.024324145680481194, "grad_norm": 0.34426236152648926, "learning_rate": 1.7395544861325718e-06, "loss": 0.7187, "step": 184 }, { "epoch": 0.024456342124396853, "grad_norm": 0.33490943908691406, "learning_rate": 1.5299867030334814e-06, "loss": 0.7159, "step": 185 }, { "epoch": 0.02458853856831251, "grad_norm": 0.3667902648448944, "learning_rate": 1.333670137599713e-06, "loss": 0.7885, "step": 186 }, { "epoch": 0.02472073501222817, "grad_norm": 0.3871891498565674, "learning_rate": 1.1506584608200367e-06, "loss": 0.7991, "step": 187 }, { "epoch": 0.02485293145614383, "grad_norm": 0.3386266231536865, "learning_rate": 9.810017062595322e-07, "loss": 0.6831, "step": 188 }, { "epoch": 0.024985127900059488, "grad_norm": 0.3666995167732239, "learning_rate": 8.247462563808817e-07, "loss": 0.6735, "step": 189 }, { "epoch": 0.025117324343975147, "grad_norm": 0.4168129563331604, "learning_rate": 6.819348298638839e-07, "loss": 0.8024, "step": 190 }, { "epoch": 0.025249520787890806, "grad_norm": 0.3915609121322632, "learning_rate": 5.526064699265753e-07, "loss": 0.7442, "step": 191 }, { "epoch": 0.025381717231806464, "grad_norm": 0.4013979732990265, "learning_rate": 4.367965336512403e-07, "loss": 0.7614, "step": 192 }, { "epoch": 0.025513913675722123, "grad_norm": 0.3672316074371338, "learning_rate": 3.3453668231809286e-07, "loss": 0.6, "step": 193 }, { "epoch": 0.025646110119637782, "grad_norm": 0.391980916261673, "learning_rate": 2.458548727494292e-07, "loss": 0.6969, "step": 194 }, { "epoch": 0.02577830656355344, "grad_norm": 0.4123277962207794, "learning_rate": 1.7077534966650766e-07, "loss": 0.7068, "step": 195 }, { "epoch": 0.0259105030074691, "grad_norm": 0.4344162344932556, "learning_rate": 1.0931863906127327e-07, "loss": 0.7109, "step": 196 }, { "epoch": 0.02604269945138476, "grad_norm": 0.4235592186450958, "learning_rate": 6.150154258476315e-08, "loss": 0.6262, "step": 197 }, { "epoch": 0.026174895895300417, "grad_norm": 0.4420209527015686, "learning_rate": 2.7337132953697554e-08, "loss": 0.6347, "step": 198 }, { "epoch": 0.026307092339216076, "grad_norm": 0.43775972723960876, "learning_rate": 6.834750376549792e-09, "loss": 0.6553, "step": 199 }, { "epoch": 0.026439288783131735, "grad_norm": 0.5719525218009949, "learning_rate": 0.0, "loss": 0.6075, "step": 200 }, { "epoch": 0.026439288783131735, "eval_loss": 0.6914480924606323, "eval_runtime": 1285.2242, "eval_samples_per_second": 9.913, "eval_steps_per_second": 2.478, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.7586505622880256e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }