{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997808596562627, "global_step": 3992, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6666666666666668e-07, "loss": 1.5241, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.3333333333333335e-07, "loss": 1.5171, "step": 2 }, { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 1.3526, "step": 3 }, { "epoch": 0.0, "learning_rate": 6.666666666666667e-07, "loss": 1.3309, "step": 4 }, { "epoch": 0.0, "learning_rate": 8.333333333333333e-07, "loss": 1.3196, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 1.3371, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.1666666666666668e-06, "loss": 1.2855, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.3333333333333334e-06, "loss": 1.3379, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.5e-06, "loss": 1.2656, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.6666666666666667e-06, "loss": 1.3073, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.8333333333333333e-06, "loss": 1.2441, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 1.2974, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.166666666666667e-06, "loss": 1.3072, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.3333333333333336e-06, "loss": 1.286, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.5e-06, "loss": 1.2486, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.666666666666667e-06, "loss": 1.3356, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.8333333333333335e-06, "loss": 1.3348, "step": 17 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 1.3323, "step": 18 }, { "epoch": 0.0, "learning_rate": 3.1666666666666667e-06, "loss": 1.2719, "step": 19 }, { "epoch": 0.01, "learning_rate": 3.3333333333333333e-06, "loss": 1.2636, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.5e-06, "loss": 1.2767, "step": 21 }, { "epoch": 0.01, "learning_rate": 3.6666666666666666e-06, "loss": 1.2713, "step": 22 }, { "epoch": 0.01, "learning_rate": 3.833333333333334e-06, "loss": 1.2215, "step": 23 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 1.2324, "step": 24 }, { "epoch": 0.01, "learning_rate": 4.166666666666667e-06, "loss": 1.28, "step": 25 }, { "epoch": 0.01, "learning_rate": 4.333333333333334e-06, "loss": 1.2294, "step": 26 }, { "epoch": 0.01, "learning_rate": 4.5e-06, "loss": 1.2735, "step": 27 }, { "epoch": 0.01, "learning_rate": 4.666666666666667e-06, "loss": 1.2608, "step": 28 }, { "epoch": 0.01, "learning_rate": 4.833333333333333e-06, "loss": 1.2264, "step": 29 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 1.2516, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.1666666666666675e-06, "loss": 1.2233, "step": 31 }, { "epoch": 0.01, "learning_rate": 5.333333333333334e-06, "loss": 1.2388, "step": 32 }, { "epoch": 0.01, "learning_rate": 5.500000000000001e-06, "loss": 1.2421, "step": 33 }, { "epoch": 0.01, "learning_rate": 5.666666666666667e-06, "loss": 1.2545, "step": 34 }, { "epoch": 0.01, "learning_rate": 5.833333333333334e-06, "loss": 1.2466, "step": 35 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 1.2587, "step": 36 }, { "epoch": 0.01, "learning_rate": 6.166666666666667e-06, "loss": 1.2631, "step": 37 }, { "epoch": 0.01, "learning_rate": 6.333333333333333e-06, "loss": 1.1755, "step": 38 }, { "epoch": 0.01, "learning_rate": 6.5000000000000004e-06, "loss": 1.2307, "step": 39 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-06, "loss": 1.2059, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.833333333333334e-06, "loss": 1.2525, "step": 41 }, { "epoch": 0.01, "learning_rate": 7e-06, "loss": 1.2164, "step": 42 }, { "epoch": 0.01, "learning_rate": 7.166666666666667e-06, "loss": 1.3042, "step": 43 }, { "epoch": 0.01, "learning_rate": 7.333333333333333e-06, "loss": 1.2542, "step": 44 }, { "epoch": 0.01, "learning_rate": 7.500000000000001e-06, "loss": 1.2182, "step": 45 }, { "epoch": 0.01, "learning_rate": 7.666666666666667e-06, "loss": 1.2506, "step": 46 }, { "epoch": 0.01, "learning_rate": 7.833333333333333e-06, "loss": 1.2525, "step": 47 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 1.2552, "step": 48 }, { "epoch": 0.01, "learning_rate": 8.166666666666668e-06, "loss": 1.1882, "step": 49 }, { "epoch": 0.01, "learning_rate": 8.333333333333334e-06, "loss": 1.1986, "step": 50 }, { "epoch": 0.01, "learning_rate": 8.5e-06, "loss": 1.183, "step": 51 }, { "epoch": 0.01, "learning_rate": 8.666666666666668e-06, "loss": 1.2273, "step": 52 }, { "epoch": 0.01, "learning_rate": 8.833333333333334e-06, "loss": 1.1502, "step": 53 }, { "epoch": 0.01, "learning_rate": 9e-06, "loss": 1.2466, "step": 54 }, { "epoch": 0.01, "learning_rate": 9.166666666666666e-06, "loss": 1.2473, "step": 55 }, { "epoch": 0.01, "learning_rate": 9.333333333333334e-06, "loss": 1.2581, "step": 56 }, { "epoch": 0.01, "learning_rate": 9.5e-06, "loss": 1.2913, "step": 57 }, { "epoch": 0.01, "learning_rate": 9.666666666666667e-06, "loss": 1.2201, "step": 58 }, { "epoch": 0.01, "learning_rate": 9.833333333333333e-06, "loss": 1.2283, "step": 59 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 1.2788, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.0166666666666667e-05, "loss": 1.1618, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.0333333333333335e-05, "loss": 1.2088, "step": 62 }, { "epoch": 0.02, "learning_rate": 1.0500000000000001e-05, "loss": 1.2099, "step": 63 }, { "epoch": 0.02, "learning_rate": 1.0666666666666667e-05, "loss": 1.2179, "step": 64 }, { "epoch": 0.02, "learning_rate": 1.0833333333333334e-05, "loss": 1.2403, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.1000000000000001e-05, "loss": 1.2374, "step": 66 }, { "epoch": 0.02, "learning_rate": 1.1166666666666668e-05, "loss": 1.2037, "step": 67 }, { "epoch": 0.02, "learning_rate": 1.1333333333333334e-05, "loss": 1.2166, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.15e-05, "loss": 1.2076, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.1666666666666668e-05, "loss": 1.289, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.1833333333333334e-05, "loss": 1.2102, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.2e-05, "loss": 1.2453, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.2166666666666667e-05, "loss": 1.2396, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.2333333333333334e-05, "loss": 1.2432, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.25e-05, "loss": 1.2205, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.2666666666666667e-05, "loss": 1.2291, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.2833333333333335e-05, "loss": 1.1993, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.3000000000000001e-05, "loss": 1.2181, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.3166666666666667e-05, "loss": 1.2406, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.3333333333333333e-05, "loss": 1.2773, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.3500000000000001e-05, "loss": 1.2193, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.3666666666666667e-05, "loss": 1.1978, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.3833333333333334e-05, "loss": 1.2453, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.4e-05, "loss": 1.1862, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.416666666666667e-05, "loss": 1.239, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.4333333333333334e-05, "loss": 1.1952, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.45e-05, "loss": 1.2194, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.4666666666666666e-05, "loss": 1.2602, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.4833333333333336e-05, "loss": 1.1942, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.5000000000000002e-05, "loss": 1.2267, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.5166666666666667e-05, "loss": 1.172, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.5333333333333334e-05, "loss": 1.2234, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.55e-05, "loss": 1.2092, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.5666666666666667e-05, "loss": 1.2637, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.5833333333333333e-05, "loss": 1.1498, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.6000000000000003e-05, "loss": 1.2004, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.616666666666667e-05, "loss": 1.2044, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.6333333333333335e-05, "loss": 1.1938, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.65e-05, "loss": 1.22, "step": 99 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-05, "loss": 1.1935, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.6833333333333334e-05, "loss": 1.217, "step": 101 }, { "epoch": 0.03, "learning_rate": 1.7e-05, "loss": 1.2278, "step": 102 }, { "epoch": 0.03, "learning_rate": 1.7166666666666666e-05, "loss": 1.208, "step": 103 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-05, "loss": 1.1942, "step": 104 }, { "epoch": 0.03, "learning_rate": 1.7500000000000002e-05, "loss": 1.1902, "step": 105 }, { "epoch": 0.03, "learning_rate": 1.7666666666666668e-05, "loss": 1.2095, "step": 106 }, { "epoch": 0.03, "learning_rate": 1.7833333333333334e-05, "loss": 1.2064, "step": 107 }, { "epoch": 0.03, "learning_rate": 1.8e-05, "loss": 1.2441, "step": 108 }, { "epoch": 0.03, "learning_rate": 1.8166666666666667e-05, "loss": 1.1674, "step": 109 }, { "epoch": 0.03, "learning_rate": 1.8333333333333333e-05, "loss": 1.2397, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.8500000000000002e-05, "loss": 1.2449, "step": 111 }, { "epoch": 0.03, "learning_rate": 1.866666666666667e-05, "loss": 1.184, "step": 112 }, { "epoch": 0.03, "learning_rate": 1.8833333333333335e-05, "loss": 1.2232, "step": 113 }, { "epoch": 0.03, "learning_rate": 1.9e-05, "loss": 1.2024, "step": 114 }, { "epoch": 0.03, "learning_rate": 1.916666666666667e-05, "loss": 1.2359, "step": 115 }, { "epoch": 0.03, "learning_rate": 1.9333333333333333e-05, "loss": 1.2314, "step": 116 }, { "epoch": 0.03, "learning_rate": 1.95e-05, "loss": 1.2119, "step": 117 }, { "epoch": 0.03, "learning_rate": 1.9666666666666666e-05, "loss": 1.2211, "step": 118 }, { "epoch": 0.03, "learning_rate": 1.9833333333333335e-05, "loss": 1.2197, "step": 119 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 1.1807, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.9999996708460824e-05, "loss": 1.1395, "step": 121 }, { "epoch": 0.03, "learning_rate": 1.999998683384546e-05, "loss": 1.221, "step": 122 }, { "epoch": 0.03, "learning_rate": 1.9999970376160406e-05, "loss": 1.2181, "step": 123 }, { "epoch": 0.03, "learning_rate": 1.9999947335416502e-05, "loss": 1.1999, "step": 124 }, { "epoch": 0.03, "learning_rate": 1.9999917711628908e-05, "loss": 1.1939, "step": 125 }, { "epoch": 0.03, "learning_rate": 1.9999881504817135e-05, "loss": 1.1833, "step": 126 }, { "epoch": 0.03, "learning_rate": 1.9999838715005012e-05, "loss": 1.1989, "step": 127 }, { "epoch": 0.03, "learning_rate": 1.999978934222071e-05, "loss": 1.2075, "step": 128 }, { "epoch": 0.03, "learning_rate": 1.999973338649673e-05, "loss": 1.1864, "step": 129 }, { "epoch": 0.03, "learning_rate": 1.9999670847869906e-05, "loss": 1.1893, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.9999601726381415e-05, "loss": 1.2212, "step": 131 }, { "epoch": 0.03, "learning_rate": 1.9999526022076757e-05, "loss": 1.2023, "step": 132 }, { "epoch": 0.03, "learning_rate": 1.9999443735005767e-05, "loss": 1.2052, "step": 133 }, { "epoch": 0.03, "learning_rate": 1.999935486522261e-05, "loss": 1.1775, "step": 134 }, { "epoch": 0.03, "learning_rate": 1.99992594127858e-05, "loss": 1.195, "step": 135 }, { "epoch": 0.03, "learning_rate": 1.999915737775817e-05, "loss": 1.1603, "step": 136 }, { "epoch": 0.03, "learning_rate": 1.999904876020689e-05, "loss": 1.2248, "step": 137 }, { "epoch": 0.03, "learning_rate": 1.9998933560203464e-05, "loss": 1.1726, "step": 138 }, { "epoch": 0.03, "learning_rate": 1.9998811777823727e-05, "loss": 1.1798, "step": 139 }, { "epoch": 0.04, "learning_rate": 1.999868341314785e-05, "loss": 1.1834, "step": 140 }, { "epoch": 0.04, "learning_rate": 1.9998548466260344e-05, "loss": 1.2453, "step": 141 }, { "epoch": 0.04, "learning_rate": 1.9998406937250035e-05, "loss": 1.1814, "step": 142 }, { "epoch": 0.04, "learning_rate": 1.99982588262101e-05, "loss": 1.2231, "step": 143 }, { "epoch": 0.04, "learning_rate": 1.9998104133238034e-05, "loss": 1.2239, "step": 144 }, { "epoch": 0.04, "learning_rate": 1.9997942858435676e-05, "loss": 1.1961, "step": 145 }, { "epoch": 0.04, "learning_rate": 1.99977750019092e-05, "loss": 1.1829, "step": 146 }, { "epoch": 0.04, "learning_rate": 1.99976005637691e-05, "loss": 1.2289, "step": 147 }, { "epoch": 0.04, "learning_rate": 1.999741954413022e-05, "loss": 1.2015, "step": 148 }, { "epoch": 0.04, "learning_rate": 1.9997231943111712e-05, "loss": 1.2258, "step": 149 }, { "epoch": 0.04, "learning_rate": 1.9997037760837082e-05, "loss": 1.1994, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.9996836997434167e-05, "loss": 1.1692, "step": 151 }, { "epoch": 0.04, "learning_rate": 1.9996629653035128e-05, "loss": 1.2013, "step": 152 }, { "epoch": 0.04, "learning_rate": 1.9996415727776456e-05, "loss": 1.2042, "step": 153 }, { "epoch": 0.04, "learning_rate": 1.9996195221798985e-05, "loss": 1.1874, "step": 154 }, { "epoch": 0.04, "learning_rate": 1.9995968135247874e-05, "loss": 1.2539, "step": 155 }, { "epoch": 0.04, "learning_rate": 1.9995734468272616e-05, "loss": 1.2089, "step": 156 }, { "epoch": 0.04, "learning_rate": 1.9995494221027036e-05, "loss": 1.2224, "step": 157 }, { "epoch": 0.04, "learning_rate": 1.9995247393669292e-05, "loss": 1.246, "step": 158 }, { "epoch": 0.04, "learning_rate": 1.9994993986361873e-05, "loss": 1.2001, "step": 159 }, { "epoch": 0.04, "learning_rate": 1.9994733999271596e-05, "loss": 1.2153, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.9994467432569612e-05, "loss": 1.1919, "step": 161 }, { "epoch": 0.04, "learning_rate": 1.9994194286431407e-05, "loss": 1.2351, "step": 162 }, { "epoch": 0.04, "learning_rate": 1.9993914561036796e-05, "loss": 1.1929, "step": 163 }, { "epoch": 0.04, "learning_rate": 1.999362825656992e-05, "loss": 1.218, "step": 164 }, { "epoch": 0.04, "learning_rate": 1.9993335373219257e-05, "loss": 1.1717, "step": 165 }, { "epoch": 0.04, "learning_rate": 1.9993035911177616e-05, "loss": 1.2285, "step": 166 }, { "epoch": 0.04, "learning_rate": 1.9992729870642136e-05, "loss": 1.2138, "step": 167 }, { "epoch": 0.04, "learning_rate": 1.999241725181428e-05, "loss": 1.1519, "step": 168 }, { "epoch": 0.04, "learning_rate": 1.9992098054899857e-05, "loss": 1.2223, "step": 169 }, { "epoch": 0.04, "learning_rate": 1.9991772280108992e-05, "loss": 1.2452, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.999143992765614e-05, "loss": 1.2277, "step": 171 }, { "epoch": 0.04, "learning_rate": 1.9991100997760098e-05, "loss": 1.1592, "step": 172 }, { "epoch": 0.04, "learning_rate": 1.9990755490643986e-05, "loss": 1.2085, "step": 173 }, { "epoch": 0.04, "learning_rate": 1.999040340653525e-05, "loss": 1.1824, "step": 174 }, { "epoch": 0.04, "learning_rate": 1.9990044745665672e-05, "loss": 1.2135, "step": 175 }, { "epoch": 0.04, "learning_rate": 1.998967950827136e-05, "loss": 1.1996, "step": 176 }, { "epoch": 0.04, "learning_rate": 1.9989307694592757e-05, "loss": 1.1776, "step": 177 }, { "epoch": 0.04, "learning_rate": 1.998892930487463e-05, "loss": 1.2009, "step": 178 }, { "epoch": 0.04, "learning_rate": 1.9988544339366068e-05, "loss": 1.1974, "step": 179 }, { "epoch": 0.05, "learning_rate": 1.9988152798320508e-05, "loss": 1.1942, "step": 180 }, { "epoch": 0.05, "learning_rate": 1.9987754681995692e-05, "loss": 1.1908, "step": 181 }, { "epoch": 0.05, "learning_rate": 1.9987349990653714e-05, "loss": 1.1809, "step": 182 }, { "epoch": 0.05, "learning_rate": 1.998693872456098e-05, "loss": 1.2553, "step": 183 }, { "epoch": 0.05, "learning_rate": 1.9986520883988233e-05, "loss": 1.1797, "step": 184 }, { "epoch": 0.05, "learning_rate": 1.9986096469210537e-05, "loss": 1.132, "step": 185 }, { "epoch": 0.05, "learning_rate": 1.998566548050729e-05, "loss": 1.2439, "step": 186 }, { "epoch": 0.05, "learning_rate": 1.9985227918162214e-05, "loss": 1.1973, "step": 187 }, { "epoch": 0.05, "learning_rate": 1.9984783782463364e-05, "loss": 1.1648, "step": 188 }, { "epoch": 0.05, "learning_rate": 1.998433307370311e-05, "loss": 1.1843, "step": 189 }, { "epoch": 0.05, "learning_rate": 1.9983875792178167e-05, "loss": 1.1872, "step": 190 }, { "epoch": 0.05, "learning_rate": 1.9983411938189555e-05, "loss": 1.1837, "step": 191 }, { "epoch": 0.05, "learning_rate": 1.9982941512042645e-05, "loss": 1.2344, "step": 192 }, { "epoch": 0.05, "learning_rate": 1.9982464514047117e-05, "loss": 1.2152, "step": 193 }, { "epoch": 0.05, "learning_rate": 1.9981980944516978e-05, "loss": 1.1618, "step": 194 }, { "epoch": 0.05, "learning_rate": 1.9981490803770568e-05, "loss": 1.1668, "step": 195 }, { "epoch": 0.05, "learning_rate": 1.9980994092130558e-05, "loss": 1.179, "step": 196 }, { "epoch": 0.05, "learning_rate": 1.9980490809923928e-05, "loss": 1.2367, "step": 197 }, { "epoch": 0.05, "learning_rate": 1.9979980957481995e-05, "loss": 1.1757, "step": 198 }, { "epoch": 0.05, "learning_rate": 1.99794645351404e-05, "loss": 1.2278, "step": 199 }, { "epoch": 0.05, "learning_rate": 1.997894154323911e-05, "loss": 1.1724, "step": 200 }, { "epoch": 0.05, "learning_rate": 1.9978411982122412e-05, "loss": 1.1656, "step": 201 }, { "epoch": 0.05, "learning_rate": 1.997787585213892e-05, "loss": 1.2071, "step": 202 }, { "epoch": 0.05, "learning_rate": 1.997733315364157e-05, "loss": 1.1987, "step": 203 }, { "epoch": 0.05, "learning_rate": 1.9976783886987632e-05, "loss": 1.1877, "step": 204 }, { "epoch": 0.05, "learning_rate": 1.997622805253869e-05, "loss": 1.1892, "step": 205 }, { "epoch": 0.05, "learning_rate": 1.9975665650660647e-05, "loss": 1.2044, "step": 206 }, { "epoch": 0.05, "learning_rate": 1.9975096681723744e-05, "loss": 1.1792, "step": 207 }, { "epoch": 0.05, "learning_rate": 1.9974521146102535e-05, "loss": 1.1817, "step": 208 }, { "epoch": 0.05, "learning_rate": 1.9973939044175904e-05, "loss": 1.1647, "step": 209 }, { "epoch": 0.05, "learning_rate": 1.997335037632705e-05, "loss": 1.1939, "step": 210 }, { "epoch": 0.05, "learning_rate": 1.997275514294349e-05, "loss": 1.1471, "step": 211 }, { "epoch": 0.05, "learning_rate": 1.9972153344417084e-05, "loss": 1.2244, "step": 212 }, { "epoch": 0.05, "learning_rate": 1.9971544981143996e-05, "loss": 1.1645, "step": 213 }, { "epoch": 0.05, "learning_rate": 1.997093005352471e-05, "loss": 1.2097, "step": 214 }, { "epoch": 0.05, "learning_rate": 1.9970308561964046e-05, "loss": 1.2342, "step": 215 }, { "epoch": 0.05, "learning_rate": 1.9969680506871138e-05, "loss": 1.2387, "step": 216 }, { "epoch": 0.05, "learning_rate": 1.9969045888659427e-05, "loss": 1.2425, "step": 217 }, { "epoch": 0.05, "learning_rate": 1.9968404707746698e-05, "loss": 1.2046, "step": 218 }, { "epoch": 0.05, "learning_rate": 1.9967756964555044e-05, "loss": 1.1616, "step": 219 }, { "epoch": 0.06, "learning_rate": 1.9967102659510877e-05, "loss": 1.2366, "step": 220 }, { "epoch": 0.06, "learning_rate": 1.9966441793044935e-05, "loss": 1.2276, "step": 221 }, { "epoch": 0.06, "learning_rate": 1.9965774365592264e-05, "loss": 1.194, "step": 222 }, { "epoch": 0.06, "learning_rate": 1.9965100377592244e-05, "loss": 1.2102, "step": 223 }, { "epoch": 0.06, "learning_rate": 1.9964419829488567e-05, "loss": 1.1897, "step": 224 }, { "epoch": 0.06, "learning_rate": 1.9963732721729236e-05, "loss": 1.2144, "step": 225 }, { "epoch": 0.06, "learning_rate": 1.9963039054766584e-05, "loss": 1.1637, "step": 226 }, { "epoch": 0.06, "learning_rate": 1.996233882905726e-05, "loss": 1.2195, "step": 227 }, { "epoch": 0.06, "learning_rate": 1.9961632045062224e-05, "loss": 1.191, "step": 228 }, { "epoch": 0.06, "learning_rate": 1.996091870324676e-05, "loss": 1.1845, "step": 229 }, { "epoch": 0.06, "learning_rate": 1.9960198804080462e-05, "loss": 1.2049, "step": 230 }, { "epoch": 0.06, "learning_rate": 1.9959472348037247e-05, "loss": 1.1975, "step": 231 }, { "epoch": 0.06, "learning_rate": 1.995873933559535e-05, "loss": 1.2232, "step": 232 }, { "epoch": 0.06, "learning_rate": 1.995799976723732e-05, "loss": 1.2015, "step": 233 }, { "epoch": 0.06, "learning_rate": 1.9957253643450016e-05, "loss": 1.2001, "step": 234 }, { "epoch": 0.06, "learning_rate": 1.9956500964724615e-05, "loss": 1.1719, "step": 235 }, { "epoch": 0.06, "learning_rate": 1.995574173155662e-05, "loss": 1.18, "step": 236 }, { "epoch": 0.06, "learning_rate": 1.9954975944445832e-05, "loss": 1.1921, "step": 237 }, { "epoch": 0.06, "learning_rate": 1.995420360389638e-05, "loss": 1.1683, "step": 238 }, { "epoch": 0.06, "learning_rate": 1.9953424710416697e-05, "loss": 1.2412, "step": 239 }, { "epoch": 0.06, "learning_rate": 1.995263926451954e-05, "loss": 1.1279, "step": 240 }, { "epoch": 0.06, "learning_rate": 1.995184726672197e-05, "loss": 1.183, "step": 241 }, { "epoch": 0.06, "learning_rate": 1.9951048717545368e-05, "loss": 1.1892, "step": 242 }, { "epoch": 0.06, "learning_rate": 1.9950243617515423e-05, "loss": 1.201, "step": 243 }, { "epoch": 0.06, "learning_rate": 1.994943196716214e-05, "loss": 1.1693, "step": 244 }, { "epoch": 0.06, "learning_rate": 1.9948613767019833e-05, "loss": 1.164, "step": 245 }, { "epoch": 0.06, "learning_rate": 1.9947789017627134e-05, "loss": 1.1881, "step": 246 }, { "epoch": 0.06, "learning_rate": 1.9946957719526977e-05, "loss": 1.1754, "step": 247 }, { "epoch": 0.06, "learning_rate": 1.9946119873266615e-05, "loss": 1.1985, "step": 248 }, { "epoch": 0.06, "learning_rate": 1.9945275479397607e-05, "loss": 1.24, "step": 249 }, { "epoch": 0.06, "learning_rate": 1.9944424538475827e-05, "loss": 1.1531, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.9943567051061453e-05, "loss": 1.2249, "step": 251 }, { "epoch": 0.06, "learning_rate": 1.9942703017718977e-05, "loss": 1.1944, "step": 252 }, { "epoch": 0.06, "learning_rate": 1.9941832439017194e-05, "loss": 1.1798, "step": 253 }, { "epoch": 0.06, "learning_rate": 1.9940955315529224e-05, "loss": 1.1936, "step": 254 }, { "epoch": 0.06, "learning_rate": 1.9940071647832475e-05, "loss": 1.179, "step": 255 }, { "epoch": 0.06, "learning_rate": 1.9939181436508673e-05, "loss": 1.1865, "step": 256 }, { "epoch": 0.06, "learning_rate": 1.9938284682143857e-05, "loss": 1.1899, "step": 257 }, { "epoch": 0.06, "learning_rate": 1.993738138532836e-05, "loss": 1.1856, "step": 258 }, { "epoch": 0.06, "learning_rate": 1.9936471546656835e-05, "loss": 1.1562, "step": 259 }, { "epoch": 0.07, "learning_rate": 1.9935555166728234e-05, "loss": 1.1947, "step": 260 }, { "epoch": 0.07, "learning_rate": 1.9934632246145814e-05, "loss": 1.2023, "step": 261 }, { "epoch": 0.07, "learning_rate": 1.993370278551715e-05, "loss": 1.1758, "step": 262 }, { "epoch": 0.07, "learning_rate": 1.99327667854541e-05, "loss": 1.1622, "step": 263 }, { "epoch": 0.07, "learning_rate": 1.993182424657285e-05, "loss": 1.1956, "step": 264 }, { "epoch": 0.07, "learning_rate": 1.993087516949388e-05, "loss": 1.1835, "step": 265 }, { "epoch": 0.07, "learning_rate": 1.9929919554841972e-05, "loss": 1.1712, "step": 266 }, { "epoch": 0.07, "learning_rate": 1.9928957403246214e-05, "loss": 1.2055, "step": 267 }, { "epoch": 0.07, "learning_rate": 1.992798871534e-05, "loss": 1.1491, "step": 268 }, { "epoch": 0.07, "learning_rate": 1.9927013491761023e-05, "loss": 1.1995, "step": 269 }, { "epoch": 0.07, "learning_rate": 1.9926031733151282e-05, "loss": 1.2289, "step": 270 }, { "epoch": 0.07, "learning_rate": 1.992504344015708e-05, "loss": 1.1271, "step": 271 }, { "epoch": 0.07, "learning_rate": 1.9924048613429008e-05, "loss": 1.1498, "step": 272 }, { "epoch": 0.07, "learning_rate": 1.9923047253621977e-05, "loss": 1.1614, "step": 273 }, { "epoch": 0.07, "learning_rate": 1.9922039361395186e-05, "loss": 1.1742, "step": 274 }, { "epoch": 0.07, "learning_rate": 1.992102493741214e-05, "loss": 1.1593, "step": 275 }, { "epoch": 0.07, "learning_rate": 1.992000398234064e-05, "loss": 1.1651, "step": 276 }, { "epoch": 0.07, "learning_rate": 1.9918976496852794e-05, "loss": 1.2029, "step": 277 }, { "epoch": 0.07, "learning_rate": 1.9917942481625e-05, "loss": 1.2177, "step": 278 }, { "epoch": 0.07, "learning_rate": 1.9916901937337957e-05, "loss": 1.1601, "step": 279 }, { "epoch": 0.07, "learning_rate": 1.9915854864676665e-05, "loss": 1.2071, "step": 280 }, { "epoch": 0.07, "learning_rate": 1.991480126433042e-05, "loss": 1.2001, "step": 281 }, { "epoch": 0.07, "learning_rate": 1.9913741136992818e-05, "loss": 1.161, "step": 282 }, { "epoch": 0.07, "learning_rate": 1.9912674483361742e-05, "loss": 1.1575, "step": 283 }, { "epoch": 0.07, "learning_rate": 1.9911601304139387e-05, "loss": 1.1661, "step": 284 }, { "epoch": 0.07, "learning_rate": 1.991052160003223e-05, "loss": 1.205, "step": 285 }, { "epoch": 0.07, "learning_rate": 1.990943537175105e-05, "loss": 1.1957, "step": 286 }, { "epoch": 0.07, "learning_rate": 1.9908342620010918e-05, "loss": 1.1993, "step": 287 }, { "epoch": 0.07, "learning_rate": 1.9907243345531206e-05, "loss": 1.2081, "step": 288 }, { "epoch": 0.07, "learning_rate": 1.990613754903557e-05, "loss": 1.1689, "step": 289 }, { "epoch": 0.07, "learning_rate": 1.9905025231251963e-05, "loss": 1.2038, "step": 290 }, { "epoch": 0.07, "learning_rate": 1.9903906392912642e-05, "loss": 1.2071, "step": 291 }, { "epoch": 0.07, "learning_rate": 1.9902781034754138e-05, "loss": 1.1627, "step": 292 }, { "epoch": 0.07, "learning_rate": 1.9901649157517283e-05, "loss": 1.1506, "step": 293 }, { "epoch": 0.07, "learning_rate": 1.9900510761947208e-05, "loss": 1.174, "step": 294 }, { "epoch": 0.07, "learning_rate": 1.989936584879332e-05, "loss": 1.1653, "step": 295 }, { "epoch": 0.07, "learning_rate": 1.989821441880933e-05, "loss": 1.1907, "step": 296 }, { "epoch": 0.07, "learning_rate": 1.9897056472753228e-05, "loss": 1.2512, "step": 297 }, { "epoch": 0.07, "learning_rate": 1.9895892011387303e-05, "loss": 1.1885, "step": 298 }, { "epoch": 0.07, "learning_rate": 1.989472103547813e-05, "loss": 1.167, "step": 299 }, { "epoch": 0.08, "learning_rate": 1.9893543545796565e-05, "loss": 1.2086, "step": 300 }, { "epoch": 0.08, "learning_rate": 1.9892359543117767e-05, "loss": 1.1777, "step": 301 }, { "epoch": 0.08, "learning_rate": 1.9891169028221168e-05, "loss": 1.1778, "step": 302 }, { "epoch": 0.08, "learning_rate": 1.9889972001890497e-05, "loss": 1.2026, "step": 303 }, { "epoch": 0.08, "learning_rate": 1.9888768464913766e-05, "loss": 1.1171, "step": 304 }, { "epoch": 0.08, "learning_rate": 1.988755841808327e-05, "loss": 1.1813, "step": 305 }, { "epoch": 0.08, "learning_rate": 1.9886341862195594e-05, "loss": 1.1563, "step": 306 }, { "epoch": 0.08, "learning_rate": 1.9885118798051607e-05, "loss": 1.2294, "step": 307 }, { "epoch": 0.08, "learning_rate": 1.988388922645646e-05, "loss": 1.1373, "step": 308 }, { "epoch": 0.08, "learning_rate": 1.9882653148219585e-05, "loss": 1.1836, "step": 309 }, { "epoch": 0.08, "learning_rate": 1.9881410564154714e-05, "loss": 1.1795, "step": 310 }, { "epoch": 0.08, "learning_rate": 1.988016147507984e-05, "loss": 1.1483, "step": 311 }, { "epoch": 0.08, "learning_rate": 1.9878905881817254e-05, "loss": 1.189, "step": 312 }, { "epoch": 0.08, "learning_rate": 1.9877643785193517e-05, "loss": 1.186, "step": 313 }, { "epoch": 0.08, "learning_rate": 1.987637518603948e-05, "loss": 1.249, "step": 314 }, { "epoch": 0.08, "learning_rate": 1.9875100085190273e-05, "loss": 1.2027, "step": 315 }, { "epoch": 0.08, "learning_rate": 1.9873818483485304e-05, "loss": 1.1733, "step": 316 }, { "epoch": 0.08, "learning_rate": 1.987253038176826e-05, "loss": 1.2142, "step": 317 }, { "epoch": 0.08, "learning_rate": 1.9871235780887114e-05, "loss": 1.1744, "step": 318 }, { "epoch": 0.08, "learning_rate": 1.9869934681694105e-05, "loss": 1.1459, "step": 319 }, { "epoch": 0.08, "learning_rate": 1.986862708504576e-05, "loss": 1.2182, "step": 320 }, { "epoch": 0.08, "learning_rate": 1.9867312991802877e-05, "loss": 1.1747, "step": 321 }, { "epoch": 0.08, "learning_rate": 1.9865992402830537e-05, "loss": 1.2229, "step": 322 }, { "epoch": 0.08, "learning_rate": 1.9864665318998094e-05, "loss": 1.1736, "step": 323 }, { "epoch": 0.08, "learning_rate": 1.9863331741179177e-05, "loss": 1.1876, "step": 324 }, { "epoch": 0.08, "learning_rate": 1.9861991670251692e-05, "loss": 1.2363, "step": 325 }, { "epoch": 0.08, "learning_rate": 1.9860645107097817e-05, "loss": 1.1485, "step": 326 }, { "epoch": 0.08, "learning_rate": 1.9859292052604005e-05, "loss": 1.1752, "step": 327 }, { "epoch": 0.08, "learning_rate": 1.9857932507660983e-05, "loss": 1.1223, "step": 328 }, { "epoch": 0.08, "learning_rate": 1.9856566473163747e-05, "loss": 1.1913, "step": 329 }, { "epoch": 0.08, "learning_rate": 1.9855193950011574e-05, "loss": 1.1894, "step": 330 }, { "epoch": 0.08, "learning_rate": 1.9853814939108e-05, "loss": 1.2005, "step": 331 }, { "epoch": 0.08, "learning_rate": 1.9852429441360845e-05, "loss": 1.1871, "step": 332 }, { "epoch": 0.08, "learning_rate": 1.985103745768219e-05, "loss": 1.1838, "step": 333 }, { "epoch": 0.08, "learning_rate": 1.984963898898839e-05, "loss": 1.185, "step": 334 }, { "epoch": 0.08, "learning_rate": 1.9848234036200064e-05, "loss": 1.1652, "step": 335 }, { "epoch": 0.08, "learning_rate": 1.984682260024211e-05, "loss": 1.1641, "step": 336 }, { "epoch": 0.08, "learning_rate": 1.984540468204368e-05, "loss": 1.1798, "step": 337 }, { "epoch": 0.08, "learning_rate": 1.9843980282538208e-05, "loss": 1.1899, "step": 338 }, { "epoch": 0.08, "learning_rate": 1.9842549402663382e-05, "loss": 1.153, "step": 339 }, { "epoch": 0.09, "learning_rate": 1.984111204336116e-05, "loss": 1.1931, "step": 340 }, { "epoch": 0.09, "learning_rate": 1.9839668205577774e-05, "loss": 1.1768, "step": 341 }, { "epoch": 0.09, "learning_rate": 1.9838217890263712e-05, "loss": 1.183, "step": 342 }, { "epoch": 0.09, "learning_rate": 1.983676109837372e-05, "loss": 1.1764, "step": 343 }, { "epoch": 0.09, "learning_rate": 1.9835297830866827e-05, "loss": 1.1562, "step": 344 }, { "epoch": 0.09, "learning_rate": 1.9833828088706304e-05, "loss": 1.1708, "step": 345 }, { "epoch": 0.09, "learning_rate": 1.9832351872859696e-05, "loss": 1.1409, "step": 346 }, { "epoch": 0.09, "learning_rate": 1.9830869184298813e-05, "loss": 1.1832, "step": 347 }, { "epoch": 0.09, "learning_rate": 1.9829380023999715e-05, "loss": 1.1536, "step": 348 }, { "epoch": 0.09, "learning_rate": 1.9827884392942728e-05, "loss": 1.1323, "step": 349 }, { "epoch": 0.09, "learning_rate": 1.9826382292112442e-05, "loss": 1.1308, "step": 350 }, { "epoch": 0.09, "learning_rate": 1.9824873722497694e-05, "loss": 1.1593, "step": 351 }, { "epoch": 0.09, "learning_rate": 1.9823358685091597e-05, "loss": 1.1881, "step": 352 }, { "epoch": 0.09, "learning_rate": 1.9821837180891504e-05, "loss": 1.2001, "step": 353 }, { "epoch": 0.09, "learning_rate": 1.9820309210899036e-05, "loss": 1.1905, "step": 354 }, { "epoch": 0.09, "learning_rate": 1.981877477612007e-05, "loss": 1.1728, "step": 355 }, { "epoch": 0.09, "learning_rate": 1.981723387756473e-05, "loss": 1.1667, "step": 356 }, { "epoch": 0.09, "learning_rate": 1.9815686516247407e-05, "loss": 1.234, "step": 357 }, { "epoch": 0.09, "learning_rate": 1.981413269318674e-05, "loss": 1.1729, "step": 358 }, { "epoch": 0.09, "learning_rate": 1.9812572409405627e-05, "loss": 1.2086, "step": 359 }, { "epoch": 0.09, "learning_rate": 1.9811005665931205e-05, "loss": 1.1632, "step": 360 }, { "epoch": 0.09, "learning_rate": 1.980943246379488e-05, "loss": 1.1801, "step": 361 }, { "epoch": 0.09, "learning_rate": 1.9807852804032306e-05, "loss": 1.2092, "step": 362 }, { "epoch": 0.09, "learning_rate": 1.980626668768338e-05, "loss": 1.1748, "step": 363 }, { "epoch": 0.09, "learning_rate": 1.9804674115792256e-05, "loss": 1.1645, "step": 364 }, { "epoch": 0.09, "learning_rate": 1.980307508940734e-05, "loss": 1.1596, "step": 365 }, { "epoch": 0.09, "learning_rate": 1.980146960958128e-05, "loss": 1.1729, "step": 366 }, { "epoch": 0.09, "learning_rate": 1.9799857677370973e-05, "loss": 1.1957, "step": 367 }, { "epoch": 0.09, "learning_rate": 1.9798239293837572e-05, "loss": 1.1791, "step": 368 }, { "epoch": 0.09, "learning_rate": 1.979661446004647e-05, "loss": 1.1243, "step": 369 }, { "epoch": 0.09, "learning_rate": 1.9794983177067314e-05, "loss": 1.1948, "step": 370 }, { "epoch": 0.09, "learning_rate": 1.9793345445973976e-05, "loss": 1.2004, "step": 371 }, { "epoch": 0.09, "learning_rate": 1.97917012678446e-05, "loss": 1.1558, "step": 372 }, { "epoch": 0.09, "learning_rate": 1.9790050643761552e-05, "loss": 1.1088, "step": 373 }, { "epoch": 0.09, "learning_rate": 1.978839357481146e-05, "loss": 1.1698, "step": 374 }, { "epoch": 0.09, "learning_rate": 1.978673006208518e-05, "loss": 1.1265, "step": 375 }, { "epoch": 0.09, "learning_rate": 1.9785060106677818e-05, "loss": 1.2059, "step": 376 }, { "epoch": 0.09, "learning_rate": 1.9783383709688714e-05, "loss": 1.171, "step": 377 }, { "epoch": 0.09, "learning_rate": 1.9781700872221458e-05, "loss": 1.1634, "step": 378 }, { "epoch": 0.09, "learning_rate": 1.9780011595383874e-05, "loss": 1.1981, "step": 379 }, { "epoch": 0.1, "learning_rate": 1.9778315880288024e-05, "loss": 1.1399, "step": 380 }, { "epoch": 0.1, "learning_rate": 1.9776613728050214e-05, "loss": 1.1912, "step": 381 }, { "epoch": 0.1, "learning_rate": 1.977490513979098e-05, "loss": 1.1908, "step": 382 }, { "epoch": 0.1, "learning_rate": 1.9773190116635104e-05, "loss": 1.2065, "step": 383 }, { "epoch": 0.1, "learning_rate": 1.9771468659711595e-05, "loss": 1.185, "step": 384 }, { "epoch": 0.1, "learning_rate": 1.9769740770153704e-05, "loss": 1.2061, "step": 385 }, { "epoch": 0.1, "learning_rate": 1.9768006449098916e-05, "loss": 1.1929, "step": 386 }, { "epoch": 0.1, "learning_rate": 1.976626569768894e-05, "loss": 1.1844, "step": 387 }, { "epoch": 0.1, "learning_rate": 1.9764518517069737e-05, "loss": 1.2062, "step": 388 }, { "epoch": 0.1, "learning_rate": 1.9762764908391483e-05, "loss": 1.1949, "step": 389 }, { "epoch": 0.1, "learning_rate": 1.9761004872808594e-05, "loss": 1.1657, "step": 390 }, { "epoch": 0.1, "learning_rate": 1.9759238411479715e-05, "loss": 1.1553, "step": 391 }, { "epoch": 0.1, "learning_rate": 1.975746552556772e-05, "loss": 1.1831, "step": 392 }, { "epoch": 0.1, "learning_rate": 1.9755686216239715e-05, "loss": 1.1527, "step": 393 }, { "epoch": 0.1, "learning_rate": 1.9753900484667035e-05, "loss": 1.1762, "step": 394 }, { "epoch": 0.1, "learning_rate": 1.975210833202524e-05, "loss": 1.2006, "step": 395 }, { "epoch": 0.1, "learning_rate": 1.9750309759494116e-05, "loss": 1.1099, "step": 396 }, { "epoch": 0.1, "learning_rate": 1.974850476825768e-05, "loss": 1.1721, "step": 397 }, { "epoch": 0.1, "learning_rate": 1.974669335950417e-05, "loss": 1.1475, "step": 398 }, { "epoch": 0.1, "learning_rate": 1.9744875534426053e-05, "loss": 1.1731, "step": 399 }, { "epoch": 0.1, "learning_rate": 1.9743051294220015e-05, "loss": 1.1808, "step": 400 }, { "epoch": 0.1, "learning_rate": 1.974122064008697e-05, "loss": 1.191, "step": 401 }, { "epoch": 0.1, "learning_rate": 1.973938357323205e-05, "loss": 1.1498, "step": 402 }, { "epoch": 0.1, "learning_rate": 1.973754009486461e-05, "loss": 1.1693, "step": 403 }, { "epoch": 0.1, "learning_rate": 1.9735690206198225e-05, "loss": 1.1236, "step": 404 }, { "epoch": 0.1, "learning_rate": 1.9733833908450697e-05, "loss": 1.189, "step": 405 }, { "epoch": 0.1, "learning_rate": 1.9731971202844036e-05, "loss": 1.1576, "step": 406 }, { "epoch": 0.1, "learning_rate": 1.9730102090604477e-05, "loss": 1.1662, "step": 407 }, { "epoch": 0.1, "learning_rate": 1.9728226572962474e-05, "loss": 1.1615, "step": 408 }, { "epoch": 0.1, "learning_rate": 1.9726344651152694e-05, "loss": 1.1541, "step": 409 }, { "epoch": 0.1, "learning_rate": 1.9724456326414016e-05, "loss": 1.1565, "step": 410 }, { "epoch": 0.1, "learning_rate": 1.9722561599989544e-05, "loss": 1.1942, "step": 411 }, { "epoch": 0.1, "learning_rate": 1.972066047312659e-05, "loss": 1.1789, "step": 412 }, { "epoch": 0.1, "learning_rate": 1.971875294707668e-05, "loss": 1.1582, "step": 413 }, { "epoch": 0.1, "learning_rate": 1.9716839023095557e-05, "loss": 1.1942, "step": 414 }, { "epoch": 0.1, "learning_rate": 1.9714918702443165e-05, "loss": 1.161, "step": 415 }, { "epoch": 0.1, "learning_rate": 1.971299198638367e-05, "loss": 1.1697, "step": 416 }, { "epoch": 0.1, "learning_rate": 1.9711058876185446e-05, "loss": 1.1315, "step": 417 }, { "epoch": 0.1, "learning_rate": 1.9709119373121072e-05, "loss": 1.1333, "step": 418 }, { "epoch": 0.1, "learning_rate": 1.9707173478467342e-05, "loss": 1.1628, "step": 419 }, { "epoch": 0.11, "learning_rate": 1.970522119350525e-05, "loss": 1.142, "step": 420 }, { "epoch": 0.11, "learning_rate": 1.970326251952e-05, "loss": 1.1287, "step": 421 }, { "epoch": 0.11, "learning_rate": 1.9701297457801005e-05, "loss": 1.171, "step": 422 }, { "epoch": 0.11, "learning_rate": 1.969932600964188e-05, "loss": 1.1994, "step": 423 }, { "epoch": 0.11, "learning_rate": 1.9697348176340442e-05, "loss": 1.1704, "step": 424 }, { "epoch": 0.11, "learning_rate": 1.9695363959198714e-05, "loss": 1.1589, "step": 425 }, { "epoch": 0.11, "learning_rate": 1.9693373359522927e-05, "loss": 1.1327, "step": 426 }, { "epoch": 0.11, "learning_rate": 1.9691376378623504e-05, "loss": 1.1437, "step": 427 }, { "epoch": 0.11, "learning_rate": 1.9689373017815076e-05, "loss": 1.1745, "step": 428 }, { "epoch": 0.11, "learning_rate": 1.9687363278416466e-05, "loss": 1.1257, "step": 429 }, { "epoch": 0.11, "learning_rate": 1.968534716175071e-05, "loss": 1.19, "step": 430 }, { "epoch": 0.11, "learning_rate": 1.9683324669145024e-05, "loss": 1.1842, "step": 431 }, { "epoch": 0.11, "learning_rate": 1.968129580193084e-05, "loss": 1.1945, "step": 432 }, { "epoch": 0.11, "learning_rate": 1.9679260561443765e-05, "loss": 1.1487, "step": 433 }, { "epoch": 0.11, "learning_rate": 1.9677218949023623e-05, "loss": 1.1649, "step": 434 }, { "epoch": 0.11, "learning_rate": 1.9675170966014424e-05, "loss": 1.1864, "step": 435 }, { "epoch": 0.11, "learning_rate": 1.9673116613764363e-05, "loss": 1.2002, "step": 436 }, { "epoch": 0.11, "learning_rate": 1.9671055893625847e-05, "loss": 1.1965, "step": 437 }, { "epoch": 0.11, "learning_rate": 1.966898880695546e-05, "loss": 1.1881, "step": 438 }, { "epoch": 0.11, "learning_rate": 1.9666915355113976e-05, "loss": 1.1719, "step": 439 }, { "epoch": 0.11, "learning_rate": 1.966483553946637e-05, "loss": 1.2084, "step": 440 }, { "epoch": 0.11, "learning_rate": 1.9662749361381804e-05, "loss": 1.1914, "step": 441 }, { "epoch": 0.11, "learning_rate": 1.9660656822233614e-05, "loss": 1.1414, "step": 442 }, { "epoch": 0.11, "learning_rate": 1.9658557923399345e-05, "loss": 1.1407, "step": 443 }, { "epoch": 0.11, "learning_rate": 1.965645266626072e-05, "loss": 1.1894, "step": 444 }, { "epoch": 0.11, "learning_rate": 1.9654341052203635e-05, "loss": 1.1465, "step": 445 }, { "epoch": 0.11, "learning_rate": 1.9652223082618195e-05, "loss": 1.1369, "step": 446 }, { "epoch": 0.11, "learning_rate": 1.9650098758898666e-05, "loss": 1.1873, "step": 447 }, { "epoch": 0.11, "learning_rate": 1.964796808244351e-05, "loss": 1.191, "step": 448 }, { "epoch": 0.11, "learning_rate": 1.9645831054655372e-05, "loss": 1.1973, "step": 449 }, { "epoch": 0.11, "learning_rate": 1.964368767694107e-05, "loss": 1.1228, "step": 450 }, { "epoch": 0.11, "learning_rate": 1.9641537950711606e-05, "loss": 1.2169, "step": 451 }, { "epoch": 0.11, "learning_rate": 1.9639381877382167e-05, "loss": 1.1807, "step": 452 }, { "epoch": 0.11, "learning_rate": 1.9637219458372104e-05, "loss": 1.172, "step": 453 }, { "epoch": 0.11, "learning_rate": 1.963505069510496e-05, "loss": 1.1669, "step": 454 }, { "epoch": 0.11, "learning_rate": 1.9632875589008454e-05, "loss": 1.2167, "step": 455 }, { "epoch": 0.11, "learning_rate": 1.9630694141514467e-05, "loss": 1.2035, "step": 456 }, { "epoch": 0.11, "learning_rate": 1.9628506354059064e-05, "loss": 1.1748, "step": 457 }, { "epoch": 0.11, "learning_rate": 1.962631222808248e-05, "loss": 1.1209, "step": 458 }, { "epoch": 0.11, "learning_rate": 1.9624111765029137e-05, "loss": 1.1971, "step": 459 }, { "epoch": 0.12, "learning_rate": 1.9621904966347604e-05, "loss": 1.1663, "step": 460 }, { "epoch": 0.12, "learning_rate": 1.9619691833490645e-05, "loss": 1.1908, "step": 461 }, { "epoch": 0.12, "learning_rate": 1.961747236791517e-05, "loss": 1.1954, "step": 462 }, { "epoch": 0.12, "learning_rate": 1.9615246571082282e-05, "loss": 1.207, "step": 463 }, { "epoch": 0.12, "learning_rate": 1.9613014444457237e-05, "loss": 1.1426, "step": 464 }, { "epoch": 0.12, "learning_rate": 1.961077598950946e-05, "loss": 1.134, "step": 465 }, { "epoch": 0.12, "learning_rate": 1.9608531207712545e-05, "loss": 1.1598, "step": 466 }, { "epoch": 0.12, "learning_rate": 1.960628010054425e-05, "loss": 1.2164, "step": 467 }, { "epoch": 0.12, "learning_rate": 1.9604022669486487e-05, "loss": 1.141, "step": 468 }, { "epoch": 0.12, "learning_rate": 1.9601758916025354e-05, "loss": 1.1402, "step": 469 }, { "epoch": 0.12, "learning_rate": 1.9599488841651095e-05, "loss": 1.2038, "step": 470 }, { "epoch": 0.12, "learning_rate": 1.9597212447858112e-05, "loss": 1.1425, "step": 471 }, { "epoch": 0.12, "learning_rate": 1.9594929736144978e-05, "loss": 1.1516, "step": 472 }, { "epoch": 0.12, "learning_rate": 1.9592640708014416e-05, "loss": 1.2103, "step": 473 }, { "epoch": 0.12, "learning_rate": 1.959034536497331e-05, "loss": 1.2201, "step": 474 }, { "epoch": 0.12, "learning_rate": 1.9588043708532707e-05, "loss": 1.1996, "step": 475 }, { "epoch": 0.12, "learning_rate": 1.958573574020781e-05, "loss": 1.1795, "step": 476 }, { "epoch": 0.12, "learning_rate": 1.9583421461517957e-05, "loss": 1.1164, "step": 477 }, { "epoch": 0.12, "learning_rate": 1.9581100873986672e-05, "loss": 1.1695, "step": 478 }, { "epoch": 0.12, "learning_rate": 1.9578773979141605e-05, "loss": 1.1516, "step": 479 }, { "epoch": 0.12, "learning_rate": 1.9576440778514576e-05, "loss": 1.18, "step": 480 }, { "epoch": 0.12, "learning_rate": 1.9574101273641545e-05, "loss": 1.1672, "step": 481 }, { "epoch": 0.12, "learning_rate": 1.957175546606263e-05, "loss": 1.1563, "step": 482 }, { "epoch": 0.12, "learning_rate": 1.956940335732209e-05, "loss": 1.1657, "step": 483 }, { "epoch": 0.12, "learning_rate": 1.9567044948968342e-05, "loss": 1.1721, "step": 484 }, { "epoch": 0.12, "learning_rate": 1.956468024255394e-05, "loss": 1.2079, "step": 485 }, { "epoch": 0.12, "learning_rate": 1.9562309239635587e-05, "loss": 1.1612, "step": 486 }, { "epoch": 0.12, "learning_rate": 1.9559931941774142e-05, "loss": 1.1663, "step": 487 }, { "epoch": 0.12, "learning_rate": 1.955754835053459e-05, "loss": 1.1486, "step": 488 }, { "epoch": 0.12, "learning_rate": 1.955515846748607e-05, "loss": 1.1149, "step": 489 }, { "epoch": 0.12, "learning_rate": 1.9552762294201867e-05, "loss": 1.1839, "step": 490 }, { "epoch": 0.12, "learning_rate": 1.955035983225939e-05, "loss": 1.1176, "step": 491 }, { "epoch": 0.12, "learning_rate": 1.9547951083240204e-05, "loss": 1.1638, "step": 492 }, { "epoch": 0.12, "learning_rate": 1.954553604873001e-05, "loss": 1.154, "step": 493 }, { "epoch": 0.12, "learning_rate": 1.954311473031864e-05, "loss": 1.1852, "step": 494 }, { "epoch": 0.12, "learning_rate": 1.9540687129600068e-05, "loss": 1.161, "step": 495 }, { "epoch": 0.12, "learning_rate": 1.9538253248172404e-05, "loss": 1.1794, "step": 496 }, { "epoch": 0.12, "learning_rate": 1.9535813087637887e-05, "loss": 1.1591, "step": 497 }, { "epoch": 0.12, "learning_rate": 1.9533366649602898e-05, "loss": 1.1907, "step": 498 }, { "epoch": 0.12, "learning_rate": 1.9530913935677946e-05, "loss": 1.1626, "step": 499 }, { "epoch": 0.13, "learning_rate": 1.9528454947477674e-05, "loss": 1.1148, "step": 500 }, { "epoch": 0.13, "learning_rate": 1.9525989686620844e-05, "loss": 1.1527, "step": 501 }, { "epoch": 0.13, "learning_rate": 1.9523518154730364e-05, "loss": 1.1746, "step": 502 }, { "epoch": 0.13, "learning_rate": 1.9521040353433263e-05, "loss": 1.1752, "step": 503 }, { "epoch": 0.13, "learning_rate": 1.9518556284360696e-05, "loss": 1.1511, "step": 504 }, { "epoch": 0.13, "learning_rate": 1.9516065949147945e-05, "loss": 1.2076, "step": 505 }, { "epoch": 0.13, "learning_rate": 1.9513569349434417e-05, "loss": 1.1525, "step": 506 }, { "epoch": 0.13, "learning_rate": 1.9511066486863642e-05, "loss": 1.1591, "step": 507 }, { "epoch": 0.13, "learning_rate": 1.9508557363083272e-05, "loss": 1.1653, "step": 508 }, { "epoch": 0.13, "learning_rate": 1.950604197974509e-05, "loss": 1.1803, "step": 509 }, { "epoch": 0.13, "learning_rate": 1.9503520338504986e-05, "loss": 1.1859, "step": 510 }, { "epoch": 0.13, "learning_rate": 1.9500992441022978e-05, "loss": 1.1642, "step": 511 }, { "epoch": 0.13, "learning_rate": 1.9498458288963204e-05, "loss": 1.1841, "step": 512 }, { "epoch": 0.13, "learning_rate": 1.9495917883993915e-05, "loss": 1.1442, "step": 513 }, { "epoch": 0.13, "learning_rate": 1.949337122778747e-05, "loss": 1.1365, "step": 514 }, { "epoch": 0.13, "learning_rate": 1.9490818322020367e-05, "loss": 1.1817, "step": 515 }, { "epoch": 0.13, "learning_rate": 1.9488259168373198e-05, "loss": 1.1728, "step": 516 }, { "epoch": 0.13, "learning_rate": 1.9485693768530672e-05, "loss": 1.1633, "step": 517 }, { "epoch": 0.13, "learning_rate": 1.9483122124181613e-05, "loss": 1.1904, "step": 518 }, { "epoch": 0.13, "learning_rate": 1.9480544237018956e-05, "loss": 1.1343, "step": 519 }, { "epoch": 0.13, "learning_rate": 1.947796010873974e-05, "loss": 1.1818, "step": 520 }, { "epoch": 0.13, "learning_rate": 1.9475369741045122e-05, "loss": 1.1237, "step": 521 }, { "epoch": 0.13, "learning_rate": 1.947277313564036e-05, "loss": 1.1939, "step": 522 }, { "epoch": 0.13, "learning_rate": 1.9470170294234817e-05, "loss": 1.1947, "step": 523 }, { "epoch": 0.13, "learning_rate": 1.9467561218541968e-05, "loss": 1.1485, "step": 524 }, { "epoch": 0.13, "learning_rate": 1.9464945910279387e-05, "loss": 1.1397, "step": 525 }, { "epoch": 0.13, "learning_rate": 1.946232437116875e-05, "loss": 1.1778, "step": 526 }, { "epoch": 0.13, "learning_rate": 1.9459696602935838e-05, "loss": 1.1785, "step": 527 }, { "epoch": 0.13, "learning_rate": 1.9457062607310527e-05, "loss": 1.173, "step": 528 }, { "epoch": 0.13, "learning_rate": 1.9454422386026807e-05, "loss": 1.2103, "step": 529 }, { "epoch": 0.13, "learning_rate": 1.9451775940822748e-05, "loss": 1.1782, "step": 530 }, { "epoch": 0.13, "learning_rate": 1.9449123273440527e-05, "loss": 1.1547, "step": 531 }, { "epoch": 0.13, "learning_rate": 1.944646438562642e-05, "loss": 1.211, "step": 532 }, { "epoch": 0.13, "learning_rate": 1.9443799279130786e-05, "loss": 1.2008, "step": 533 }, { "epoch": 0.13, "learning_rate": 1.944112795570809e-05, "loss": 1.1465, "step": 534 }, { "epoch": 0.13, "learning_rate": 1.943845041711689e-05, "loss": 1.1995, "step": 535 }, { "epoch": 0.13, "learning_rate": 1.9435766665119823e-05, "loss": 1.08, "step": 536 }, { "epoch": 0.13, "learning_rate": 1.9433076701483623e-05, "loss": 1.186, "step": 537 }, { "epoch": 0.13, "learning_rate": 1.9430380527979124e-05, "loss": 1.1779, "step": 538 }, { "epoch": 0.13, "learning_rate": 1.942767814638123e-05, "loss": 1.1527, "step": 539 }, { "epoch": 0.14, "learning_rate": 1.9424969558468937e-05, "loss": 1.1376, "step": 540 }, { "epoch": 0.14, "learning_rate": 1.9422254766025338e-05, "loss": 1.1755, "step": 541 }, { "epoch": 0.14, "learning_rate": 1.94195337708376e-05, "loss": 1.1383, "step": 542 }, { "epoch": 0.14, "learning_rate": 1.9416806574696974e-05, "loss": 1.1782, "step": 543 }, { "epoch": 0.14, "learning_rate": 1.9414073179398794e-05, "loss": 1.1647, "step": 544 }, { "epoch": 0.14, "learning_rate": 1.9411333586742476e-05, "loss": 1.1569, "step": 545 }, { "epoch": 0.14, "learning_rate": 1.9408587798531516e-05, "loss": 1.1773, "step": 546 }, { "epoch": 0.14, "learning_rate": 1.9405835816573485e-05, "loss": 1.0861, "step": 547 }, { "epoch": 0.14, "learning_rate": 1.940307764268004e-05, "loss": 1.1385, "step": 548 }, { "epoch": 0.14, "learning_rate": 1.94003132786669e-05, "loss": 1.1285, "step": 549 }, { "epoch": 0.14, "learning_rate": 1.939754272635388e-05, "loss": 1.1519, "step": 550 }, { "epoch": 0.14, "learning_rate": 1.9394765987564845e-05, "loss": 1.1319, "step": 551 }, { "epoch": 0.14, "learning_rate": 1.939198306412775e-05, "loss": 1.1617, "step": 552 }, { "epoch": 0.14, "learning_rate": 1.9389193957874613e-05, "loss": 1.1845, "step": 553 }, { "epoch": 0.14, "learning_rate": 1.9386398670641523e-05, "loss": 1.1469, "step": 554 }, { "epoch": 0.14, "learning_rate": 1.9383597204268643e-05, "loss": 1.1861, "step": 555 }, { "epoch": 0.14, "learning_rate": 1.9380789560600197e-05, "loss": 1.1293, "step": 556 }, { "epoch": 0.14, "learning_rate": 1.937797574148448e-05, "loss": 1.128, "step": 557 }, { "epoch": 0.14, "learning_rate": 1.9375155748773853e-05, "loss": 1.1767, "step": 558 }, { "epoch": 0.14, "learning_rate": 1.9372329584324737e-05, "loss": 1.1422, "step": 559 }, { "epoch": 0.14, "learning_rate": 1.936949724999762e-05, "loss": 1.1237, "step": 560 }, { "epoch": 0.14, "learning_rate": 1.9366658747657047e-05, "loss": 1.1884, "step": 561 }, { "epoch": 0.14, "learning_rate": 1.9363814079171628e-05, "loss": 1.1875, "step": 562 }, { "epoch": 0.14, "learning_rate": 1.9360963246414033e-05, "loss": 1.1495, "step": 563 }, { "epoch": 0.14, "learning_rate": 1.9358106251260984e-05, "loss": 1.1763, "step": 564 }, { "epoch": 0.14, "learning_rate": 1.9355243095593264e-05, "loss": 1.1893, "step": 565 }, { "epoch": 0.14, "learning_rate": 1.9352373781295714e-05, "loss": 1.1503, "step": 566 }, { "epoch": 0.14, "learning_rate": 1.934949831025722e-05, "loss": 1.2336, "step": 567 }, { "epoch": 0.14, "learning_rate": 1.934661668437073e-05, "loss": 1.156, "step": 568 }, { "epoch": 0.14, "learning_rate": 1.9343728905533246e-05, "loss": 1.1603, "step": 569 }, { "epoch": 0.14, "learning_rate": 1.934083497564581e-05, "loss": 1.1371, "step": 570 }, { "epoch": 0.14, "learning_rate": 1.9337934896613516e-05, "loss": 1.1981, "step": 571 }, { "epoch": 0.14, "learning_rate": 1.9335028670345517e-05, "loss": 1.1746, "step": 572 }, { "epoch": 0.14, "learning_rate": 1.9332116298754996e-05, "loss": 1.1168, "step": 573 }, { "epoch": 0.14, "learning_rate": 1.9329197783759196e-05, "loss": 1.1354, "step": 574 }, { "epoch": 0.14, "learning_rate": 1.9326273127279397e-05, "loss": 1.1378, "step": 575 }, { "epoch": 0.14, "learning_rate": 1.932334233124092e-05, "loss": 1.1784, "step": 576 }, { "epoch": 0.14, "learning_rate": 1.9320405397573134e-05, "loss": 1.1276, "step": 577 }, { "epoch": 0.14, "learning_rate": 1.931746232820945e-05, "loss": 1.1506, "step": 578 }, { "epoch": 0.15, "learning_rate": 1.9314513125087303e-05, "loss": 1.1438, "step": 579 }, { "epoch": 0.15, "learning_rate": 1.9311557790148182e-05, "loss": 1.2001, "step": 580 }, { "epoch": 0.15, "learning_rate": 1.930859632533761e-05, "loss": 1.1687, "step": 581 }, { "epoch": 0.15, "learning_rate": 1.930562873260514e-05, "loss": 1.1514, "step": 582 }, { "epoch": 0.15, "learning_rate": 1.930265501390436e-05, "loss": 1.1601, "step": 583 }, { "epoch": 0.15, "learning_rate": 1.929967517119289e-05, "loss": 1.1444, "step": 584 }, { "epoch": 0.15, "learning_rate": 1.929668920643239e-05, "loss": 1.1742, "step": 585 }, { "epoch": 0.15, "learning_rate": 1.9293697121588547e-05, "loss": 1.1378, "step": 586 }, { "epoch": 0.15, "learning_rate": 1.9290698918631063e-05, "loss": 1.1078, "step": 587 }, { "epoch": 0.15, "learning_rate": 1.9287694599533683e-05, "loss": 1.1688, "step": 588 }, { "epoch": 0.15, "learning_rate": 1.928468416627418e-05, "loss": 1.1857, "step": 589 }, { "epoch": 0.15, "learning_rate": 1.9281667620834332e-05, "loss": 1.1663, "step": 590 }, { "epoch": 0.15, "learning_rate": 1.9278644965199968e-05, "loss": 1.1205, "step": 591 }, { "epoch": 0.15, "learning_rate": 1.927561620136092e-05, "loss": 1.1867, "step": 592 }, { "epoch": 0.15, "learning_rate": 1.927258133131105e-05, "loss": 1.1181, "step": 593 }, { "epoch": 0.15, "learning_rate": 1.926954035704823e-05, "loss": 1.1196, "step": 594 }, { "epoch": 0.15, "learning_rate": 1.9266493280574365e-05, "loss": 1.1838, "step": 595 }, { "epoch": 0.15, "learning_rate": 1.9263440103895366e-05, "loss": 1.1557, "step": 596 }, { "epoch": 0.15, "learning_rate": 1.9260380829021162e-05, "loss": 1.1655, "step": 597 }, { "epoch": 0.15, "learning_rate": 1.92573154579657e-05, "loss": 1.1286, "step": 598 }, { "epoch": 0.15, "learning_rate": 1.9254243992746935e-05, "loss": 1.2225, "step": 599 }, { "epoch": 0.15, "learning_rate": 1.9251166435386837e-05, "loss": 1.1349, "step": 600 }, { "epoch": 0.15, "learning_rate": 1.9248082787911393e-05, "loss": 1.1465, "step": 601 }, { "epoch": 0.15, "learning_rate": 1.9244993052350584e-05, "loss": 1.1176, "step": 602 }, { "epoch": 0.15, "learning_rate": 1.9241897230738407e-05, "loss": 1.1846, "step": 603 }, { "epoch": 0.15, "learning_rate": 1.9238795325112867e-05, "loss": 1.175, "step": 604 }, { "epoch": 0.15, "learning_rate": 1.923568733751598e-05, "loss": 1.1535, "step": 605 }, { "epoch": 0.15, "learning_rate": 1.923257326999375e-05, "loss": 1.1568, "step": 606 }, { "epoch": 0.15, "learning_rate": 1.9229453124596193e-05, "loss": 1.1517, "step": 607 }, { "epoch": 0.15, "learning_rate": 1.9226326903377332e-05, "loss": 1.1543, "step": 608 }, { "epoch": 0.15, "learning_rate": 1.9223194608395173e-05, "loss": 1.1724, "step": 609 }, { "epoch": 0.15, "learning_rate": 1.9220056241711736e-05, "loss": 1.1496, "step": 610 }, { "epoch": 0.15, "learning_rate": 1.9216911805393034e-05, "loss": 1.1443, "step": 611 }, { "epoch": 0.15, "learning_rate": 1.9213761301509068e-05, "loss": 1.168, "step": 612 }, { "epoch": 0.15, "learning_rate": 1.9210604732133842e-05, "loss": 1.1573, "step": 613 }, { "epoch": 0.15, "learning_rate": 1.9207442099345357e-05, "loss": 1.1706, "step": 614 }, { "epoch": 0.15, "learning_rate": 1.9204273405225588e-05, "loss": 1.1759, "step": 615 }, { "epoch": 0.15, "learning_rate": 1.920109865186052e-05, "loss": 1.1374, "step": 616 }, { "epoch": 0.15, "learning_rate": 1.919791784134011e-05, "loss": 1.1089, "step": 617 }, { "epoch": 0.15, "learning_rate": 1.9194730975758323e-05, "loss": 1.1456, "step": 618 }, { "epoch": 0.16, "learning_rate": 1.9191538057213082e-05, "loss": 1.1486, "step": 619 }, { "epoch": 0.16, "learning_rate": 1.9188339087806323e-05, "loss": 1.0945, "step": 620 }, { "epoch": 0.16, "learning_rate": 1.9185134069643943e-05, "loss": 1.187, "step": 621 }, { "epoch": 0.16, "learning_rate": 1.918192300483584e-05, "loss": 1.154, "step": 622 }, { "epoch": 0.16, "learning_rate": 1.9178705895495877e-05, "loss": 1.1238, "step": 623 }, { "epoch": 0.16, "learning_rate": 1.91754827437419e-05, "loss": 1.1631, "step": 624 }, { "epoch": 0.16, "learning_rate": 1.917225355169574e-05, "loss": 1.1805, "step": 625 }, { "epoch": 0.16, "learning_rate": 1.9169018321483198e-05, "loss": 1.1535, "step": 626 }, { "epoch": 0.16, "learning_rate": 1.9165777055234055e-05, "loss": 1.1739, "step": 627 }, { "epoch": 0.16, "learning_rate": 1.9162529755082053e-05, "loss": 1.1287, "step": 628 }, { "epoch": 0.16, "learning_rate": 1.9159276423164923e-05, "loss": 1.1595, "step": 629 }, { "epoch": 0.16, "learning_rate": 1.9156017061624355e-05, "loss": 1.1729, "step": 630 }, { "epoch": 0.16, "learning_rate": 1.9152751672606017e-05, "loss": 1.1754, "step": 631 }, { "epoch": 0.16, "learning_rate": 1.9149480258259535e-05, "loss": 1.1619, "step": 632 }, { "epoch": 0.16, "learning_rate": 1.9146202820738505e-05, "loss": 1.1854, "step": 633 }, { "epoch": 0.16, "learning_rate": 1.9142919362200495e-05, "loss": 1.1873, "step": 634 }, { "epoch": 0.16, "learning_rate": 1.9139629884807034e-05, "loss": 1.1445, "step": 635 }, { "epoch": 0.16, "learning_rate": 1.9136334390723605e-05, "loss": 1.1513, "step": 636 }, { "epoch": 0.16, "learning_rate": 1.9133032882119656e-05, "loss": 1.1825, "step": 637 }, { "epoch": 0.16, "learning_rate": 1.9129725361168597e-05, "loss": 1.1554, "step": 638 }, { "epoch": 0.16, "learning_rate": 1.9126411830047803e-05, "loss": 1.106, "step": 639 }, { "epoch": 0.16, "learning_rate": 1.9123092290938587e-05, "loss": 1.1749, "step": 640 }, { "epoch": 0.16, "learning_rate": 1.9119766746026237e-05, "loss": 1.1705, "step": 641 }, { "epoch": 0.16, "learning_rate": 1.9116435197499977e-05, "loss": 1.1638, "step": 642 }, { "epoch": 0.16, "learning_rate": 1.9113097647552995e-05, "loss": 1.1558, "step": 643 }, { "epoch": 0.16, "learning_rate": 1.9109754098382428e-05, "loss": 1.1578, "step": 644 }, { "epoch": 0.16, "learning_rate": 1.9106404552189355e-05, "loss": 1.1611, "step": 645 }, { "epoch": 0.16, "learning_rate": 1.9103049011178813e-05, "loss": 1.1439, "step": 646 }, { "epoch": 0.16, "learning_rate": 1.909968747755978e-05, "loss": 1.131, "step": 647 }, { "epoch": 0.16, "learning_rate": 1.9096319953545186e-05, "loss": 1.1622, "step": 648 }, { "epoch": 0.16, "learning_rate": 1.9092946441351888e-05, "loss": 1.1405, "step": 649 }, { "epoch": 0.16, "learning_rate": 1.90895669432007e-05, "loss": 1.1422, "step": 650 }, { "epoch": 0.16, "learning_rate": 1.9086181461316374e-05, "loss": 1.131, "step": 651 }, { "epoch": 0.16, "learning_rate": 1.9082789997927594e-05, "loss": 1.157, "step": 652 }, { "epoch": 0.16, "learning_rate": 1.9079392555266994e-05, "loss": 1.1534, "step": 653 }, { "epoch": 0.16, "learning_rate": 1.9075989135571135e-05, "loss": 1.1296, "step": 654 }, { "epoch": 0.16, "learning_rate": 1.9072579741080506e-05, "loss": 1.1701, "step": 655 }, { "epoch": 0.16, "learning_rate": 1.9069164374039553e-05, "loss": 1.1643, "step": 656 }, { "epoch": 0.16, "learning_rate": 1.906574303669663e-05, "loss": 1.1731, "step": 657 }, { "epoch": 0.16, "learning_rate": 1.9062315731304028e-05, "loss": 1.1791, "step": 658 }, { "epoch": 0.17, "learning_rate": 1.9058882460117972e-05, "loss": 1.1344, "step": 659 }, { "epoch": 0.17, "learning_rate": 1.9055443225398616e-05, "loss": 1.1332, "step": 660 }, { "epoch": 0.17, "learning_rate": 1.905199802941003e-05, "loss": 1.137, "step": 661 }, { "epoch": 0.17, "learning_rate": 1.9048546874420214e-05, "loss": 1.1539, "step": 662 }, { "epoch": 0.17, "learning_rate": 1.904508976270109e-05, "loss": 1.193, "step": 663 }, { "epoch": 0.17, "learning_rate": 1.9041626696528503e-05, "loss": 1.1301, "step": 664 }, { "epoch": 0.17, "learning_rate": 1.9038157678182217e-05, "loss": 1.167, "step": 665 }, { "epoch": 0.17, "learning_rate": 1.9034682709945916e-05, "loss": 1.1081, "step": 666 }, { "epoch": 0.17, "learning_rate": 1.903120179410719e-05, "loss": 1.1584, "step": 667 }, { "epoch": 0.17, "learning_rate": 1.9027714932957564e-05, "loss": 1.1973, "step": 668 }, { "epoch": 0.17, "learning_rate": 1.9024222128792457e-05, "loss": 1.195, "step": 669 }, { "epoch": 0.17, "learning_rate": 1.9020723383911214e-05, "loss": 1.1509, "step": 670 }, { "epoch": 0.17, "learning_rate": 1.901721870061709e-05, "loss": 1.138, "step": 671 }, { "epoch": 0.17, "learning_rate": 1.9013708081217238e-05, "loss": 1.1466, "step": 672 }, { "epoch": 0.17, "learning_rate": 1.901019152802273e-05, "loss": 1.1628, "step": 673 }, { "epoch": 0.17, "learning_rate": 1.900666904334854e-05, "loss": 1.1488, "step": 674 }, { "epoch": 0.17, "learning_rate": 1.900314062951355e-05, "loss": 1.1425, "step": 675 }, { "epoch": 0.17, "learning_rate": 1.8999606288840535e-05, "loss": 1.1945, "step": 676 }, { "epoch": 0.17, "learning_rate": 1.8996066023656186e-05, "loss": 1.1456, "step": 677 }, { "epoch": 0.17, "learning_rate": 1.8992519836291086e-05, "loss": 1.1507, "step": 678 }, { "epoch": 0.17, "learning_rate": 1.8988967729079717e-05, "loss": 1.1919, "step": 679 }, { "epoch": 0.17, "learning_rate": 1.8985409704360457e-05, "loss": 1.1731, "step": 680 }, { "epoch": 0.17, "learning_rate": 1.8981845764475585e-05, "loss": 1.1574, "step": 681 }, { "epoch": 0.17, "learning_rate": 1.8978275911771268e-05, "loss": 1.1744, "step": 682 }, { "epoch": 0.17, "learning_rate": 1.897470014859757e-05, "loss": 1.1687, "step": 683 }, { "epoch": 0.17, "learning_rate": 1.8971118477308444e-05, "loss": 1.1535, "step": 684 }, { "epoch": 0.17, "learning_rate": 1.8967530900261734e-05, "loss": 1.1507, "step": 685 }, { "epoch": 0.17, "learning_rate": 1.8963937419819162e-05, "loss": 1.149, "step": 686 }, { "epoch": 0.17, "learning_rate": 1.8960338038346353e-05, "loss": 1.1187, "step": 687 }, { "epoch": 0.17, "learning_rate": 1.8956732758212803e-05, "loss": 1.1883, "step": 688 }, { "epoch": 0.17, "learning_rate": 1.89531215817919e-05, "loss": 1.144, "step": 689 }, { "epoch": 0.17, "learning_rate": 1.8949504511460906e-05, "loss": 1.1729, "step": 690 }, { "epoch": 0.17, "learning_rate": 1.894588154960097e-05, "loss": 1.2176, "step": 691 }, { "epoch": 0.17, "learning_rate": 1.8942252698597113e-05, "loss": 1.1319, "step": 692 }, { "epoch": 0.17, "learning_rate": 1.8938617960838243e-05, "loss": 1.1913, "step": 693 }, { "epoch": 0.17, "learning_rate": 1.8934977338717123e-05, "loss": 1.1474, "step": 694 }, { "epoch": 0.17, "learning_rate": 1.8931330834630418e-05, "loss": 1.1265, "step": 695 }, { "epoch": 0.17, "learning_rate": 1.892767845097864e-05, "loss": 1.1654, "step": 696 }, { "epoch": 0.17, "learning_rate": 1.8924020190166183e-05, "loss": 1.1212, "step": 697 }, { "epoch": 0.17, "learning_rate": 1.8920356054601315e-05, "loss": 1.1555, "step": 698 }, { "epoch": 0.18, "learning_rate": 1.8916686046696157e-05, "loss": 1.1352, "step": 699 }, { "epoch": 0.18, "learning_rate": 1.8913010168866712e-05, "loss": 1.1467, "step": 700 }, { "epoch": 0.18, "learning_rate": 1.8909328423532834e-05, "loss": 1.1385, "step": 701 }, { "epoch": 0.18, "learning_rate": 1.8905640813118243e-05, "loss": 1.095, "step": 702 }, { "epoch": 0.18, "learning_rate": 1.890194734005053e-05, "loss": 1.167, "step": 703 }, { "epoch": 0.18, "learning_rate": 1.8898248006761128e-05, "loss": 1.1852, "step": 704 }, { "epoch": 0.18, "learning_rate": 1.889454281568534e-05, "loss": 1.2101, "step": 705 }, { "epoch": 0.18, "learning_rate": 1.8890831769262326e-05, "loss": 1.1093, "step": 706 }, { "epoch": 0.18, "learning_rate": 1.8887114869935098e-05, "loss": 1.1292, "step": 707 }, { "epoch": 0.18, "learning_rate": 1.8883392120150513e-05, "loss": 1.1412, "step": 708 }, { "epoch": 0.18, "learning_rate": 1.8879663522359286e-05, "loss": 1.1555, "step": 709 }, { "epoch": 0.18, "learning_rate": 1.887592907901599e-05, "loss": 1.2103, "step": 710 }, { "epoch": 0.18, "learning_rate": 1.8872188792579035e-05, "loss": 1.1497, "step": 711 }, { "epoch": 0.18, "learning_rate": 1.886844266551068e-05, "loss": 1.2009, "step": 712 }, { "epoch": 0.18, "learning_rate": 1.8864690700277027e-05, "loss": 1.1201, "step": 713 }, { "epoch": 0.18, "learning_rate": 1.8860932899348028e-05, "loss": 1.1861, "step": 714 }, { "epoch": 0.18, "learning_rate": 1.8857169265197472e-05, "loss": 1.1583, "step": 715 }, { "epoch": 0.18, "learning_rate": 1.885339980030299e-05, "loss": 1.1722, "step": 716 }, { "epoch": 0.18, "learning_rate": 1.8849624507146045e-05, "loss": 1.1317, "step": 717 }, { "epoch": 0.18, "learning_rate": 1.884584338821195e-05, "loss": 1.1377, "step": 718 }, { "epoch": 0.18, "learning_rate": 1.8842056445989837e-05, "loss": 1.1078, "step": 719 }, { "epoch": 0.18, "learning_rate": 1.883826368297269e-05, "loss": 1.1418, "step": 720 }, { "epoch": 0.18, "learning_rate": 1.88344651016573e-05, "loss": 1.111, "step": 721 }, { "epoch": 0.18, "learning_rate": 1.8830660704544316e-05, "loss": 1.1484, "step": 722 }, { "epoch": 0.18, "learning_rate": 1.88268504941382e-05, "loss": 1.1069, "step": 723 }, { "epoch": 0.18, "learning_rate": 1.882303447294724e-05, "loss": 1.1154, "step": 724 }, { "epoch": 0.18, "learning_rate": 1.881921264348355e-05, "loss": 1.164, "step": 725 }, { "epoch": 0.18, "learning_rate": 1.881538500826308e-05, "loss": 1.1748, "step": 726 }, { "epoch": 0.18, "learning_rate": 1.8811551569805583e-05, "loss": 1.185, "step": 727 }, { "epoch": 0.18, "learning_rate": 1.8807712330634645e-05, "loss": 1.1297, "step": 728 }, { "epoch": 0.18, "learning_rate": 1.8803867293277668e-05, "loss": 1.1018, "step": 729 }, { "epoch": 0.18, "learning_rate": 1.8800016460265865e-05, "loss": 1.1409, "step": 730 }, { "epoch": 0.18, "learning_rate": 1.879615983413428e-05, "loss": 1.1974, "step": 731 }, { "epoch": 0.18, "learning_rate": 1.879229741742175e-05, "loss": 1.139, "step": 732 }, { "epoch": 0.18, "learning_rate": 1.8788429212670942e-05, "loss": 1.1361, "step": 733 }, { "epoch": 0.18, "learning_rate": 1.878455522242832e-05, "loss": 1.1539, "step": 734 }, { "epoch": 0.18, "learning_rate": 1.8780675449244164e-05, "loss": 1.1267, "step": 735 }, { "epoch": 0.18, "learning_rate": 1.8776789895672557e-05, "loss": 1.1288, "step": 736 }, { "epoch": 0.18, "learning_rate": 1.8772898564271393e-05, "loss": 1.1468, "step": 737 }, { "epoch": 0.18, "learning_rate": 1.8769001457602366e-05, "loss": 1.1466, "step": 738 }, { "epoch": 0.19, "learning_rate": 1.8765098578230967e-05, "loss": 1.1101, "step": 739 }, { "epoch": 0.19, "learning_rate": 1.87611899287265e-05, "loss": 1.1665, "step": 740 }, { "epoch": 0.19, "learning_rate": 1.875727551166205e-05, "loss": 1.1373, "step": 741 }, { "epoch": 0.19, "learning_rate": 1.8753355329614512e-05, "loss": 1.0958, "step": 742 }, { "epoch": 0.19, "learning_rate": 1.874942938516458e-05, "loss": 1.1502, "step": 743 }, { "epoch": 0.19, "learning_rate": 1.8745497680896722e-05, "loss": 1.1533, "step": 744 }, { "epoch": 0.19, "learning_rate": 1.8741560219399217e-05, "loss": 1.1785, "step": 745 }, { "epoch": 0.19, "learning_rate": 1.8737617003264128e-05, "loss": 1.1429, "step": 746 }, { "epoch": 0.19, "learning_rate": 1.8733668035087302e-05, "loss": 1.1449, "step": 747 }, { "epoch": 0.19, "learning_rate": 1.8729713317468376e-05, "loss": 1.1202, "step": 748 }, { "epoch": 0.19, "learning_rate": 1.872575285301077e-05, "loss": 1.1687, "step": 749 }, { "epoch": 0.19, "learning_rate": 1.872178664432169e-05, "loss": 1.0842, "step": 750 }, { "epoch": 0.19, "learning_rate": 1.8717814694012123e-05, "loss": 1.1473, "step": 751 }, { "epoch": 0.19, "learning_rate": 1.8713837004696836e-05, "loss": 1.1721, "step": 752 }, { "epoch": 0.19, "learning_rate": 1.8709853578994372e-05, "loss": 1.1508, "step": 753 }, { "epoch": 0.19, "learning_rate": 1.8705864419527048e-05, "loss": 1.1319, "step": 754 }, { "epoch": 0.19, "learning_rate": 1.8701869528920965e-05, "loss": 1.1481, "step": 755 }, { "epoch": 0.19, "learning_rate": 1.8697868909805985e-05, "loss": 1.155, "step": 756 }, { "epoch": 0.19, "learning_rate": 1.8693862564815752e-05, "loss": 1.1623, "step": 757 }, { "epoch": 0.19, "learning_rate": 1.8689850496587674e-05, "loss": 1.1698, "step": 758 }, { "epoch": 0.19, "learning_rate": 1.868583270776292e-05, "loss": 1.1352, "step": 759 }, { "epoch": 0.19, "learning_rate": 1.868180920098644e-05, "loss": 1.1298, "step": 760 }, { "epoch": 0.19, "learning_rate": 1.8677779978906936e-05, "loss": 1.1495, "step": 761 }, { "epoch": 0.19, "learning_rate": 1.8673745044176878e-05, "loss": 1.1738, "step": 762 }, { "epoch": 0.19, "learning_rate": 1.8669704399452494e-05, "loss": 1.1379, "step": 763 }, { "epoch": 0.19, "learning_rate": 1.8665658047393772e-05, "loss": 1.1579, "step": 764 }, { "epoch": 0.19, "learning_rate": 1.866160599066446e-05, "loss": 1.1637, "step": 765 }, { "epoch": 0.19, "learning_rate": 1.8657548231932058e-05, "loss": 1.1425, "step": 766 }, { "epoch": 0.19, "learning_rate": 1.8653484773867813e-05, "loss": 1.1535, "step": 767 }, { "epoch": 0.19, "learning_rate": 1.864941561914674e-05, "loss": 1.2032, "step": 768 }, { "epoch": 0.19, "learning_rate": 1.8645340770447595e-05, "loss": 1.1768, "step": 769 }, { "epoch": 0.19, "learning_rate": 1.864126023045288e-05, "loss": 1.1417, "step": 770 }, { "epoch": 0.19, "learning_rate": 1.8637174001848843e-05, "loss": 1.1655, "step": 771 }, { "epoch": 0.19, "learning_rate": 1.8633082087325485e-05, "loss": 1.1538, "step": 772 }, { "epoch": 0.19, "learning_rate": 1.8628984489576546e-05, "loss": 1.1507, "step": 773 }, { "epoch": 0.19, "learning_rate": 1.8624881211299507e-05, "loss": 1.1553, "step": 774 }, { "epoch": 0.19, "learning_rate": 1.8620772255195584e-05, "loss": 1.1621, "step": 775 }, { "epoch": 0.19, "learning_rate": 1.861665762396974e-05, "loss": 1.1958, "step": 776 }, { "epoch": 0.19, "learning_rate": 1.8612537320330666e-05, "loss": 1.1577, "step": 777 }, { "epoch": 0.19, "learning_rate": 1.860841134699079e-05, "loss": 1.1347, "step": 778 }, { "epoch": 0.2, "learning_rate": 1.8604279706666272e-05, "loss": 1.1616, "step": 779 }, { "epoch": 0.2, "learning_rate": 1.8600142402077006e-05, "loss": 1.1182, "step": 780 }, { "epoch": 0.2, "learning_rate": 1.859599943594661e-05, "loss": 1.1487, "step": 781 }, { "epoch": 0.2, "learning_rate": 1.8591850811002433e-05, "loss": 1.1478, "step": 782 }, { "epoch": 0.2, "learning_rate": 1.8587696529975546e-05, "loss": 1.1319, "step": 783 }, { "epoch": 0.2, "learning_rate": 1.8583536595600742e-05, "loss": 1.2203, "step": 784 }, { "epoch": 0.2, "learning_rate": 1.8579371010616543e-05, "loss": 1.1326, "step": 785 }, { "epoch": 0.2, "learning_rate": 1.8575199777765183e-05, "loss": 1.1584, "step": 786 }, { "epoch": 0.2, "learning_rate": 1.857102289979262e-05, "loss": 1.138, "step": 787 }, { "epoch": 0.2, "learning_rate": 1.8566840379448523e-05, "loss": 1.1394, "step": 788 }, { "epoch": 0.2, "learning_rate": 1.8562652219486277e-05, "loss": 1.1159, "step": 789 }, { "epoch": 0.2, "learning_rate": 1.8558458422662987e-05, "loss": 1.159, "step": 790 }, { "epoch": 0.2, "learning_rate": 1.8554258991739454e-05, "loss": 1.1258, "step": 791 }, { "epoch": 0.2, "learning_rate": 1.8550053929480202e-05, "loss": 1.1275, "step": 792 }, { "epoch": 0.2, "learning_rate": 1.8545843238653453e-05, "loss": 1.1557, "step": 793 }, { "epoch": 0.2, "learning_rate": 1.854162692203114e-05, "loss": 1.1664, "step": 794 }, { "epoch": 0.2, "learning_rate": 1.8537404982388894e-05, "loss": 1.1048, "step": 795 }, { "epoch": 0.2, "learning_rate": 1.853317742250606e-05, "loss": 1.1681, "step": 796 }, { "epoch": 0.2, "learning_rate": 1.8528944245165662e-05, "loss": 1.1207, "step": 797 }, { "epoch": 0.2, "learning_rate": 1.852470545315444e-05, "loss": 1.173, "step": 798 }, { "epoch": 0.2, "learning_rate": 1.8520461049262826e-05, "loss": 1.1686, "step": 799 }, { "epoch": 0.2, "learning_rate": 1.8516211036284937e-05, "loss": 1.1554, "step": 800 }, { "epoch": 0.2, "learning_rate": 1.8511955417018598e-05, "loss": 1.1449, "step": 801 }, { "epoch": 0.2, "learning_rate": 1.850769419426531e-05, "loss": 1.1677, "step": 802 }, { "epoch": 0.2, "learning_rate": 1.8503427370830272e-05, "loss": 1.1368, "step": 803 }, { "epoch": 0.2, "learning_rate": 1.849915494952237e-05, "loss": 1.1605, "step": 804 }, { "epoch": 0.2, "learning_rate": 1.8494876933154167e-05, "loss": 1.1292, "step": 805 }, { "epoch": 0.2, "learning_rate": 1.849059332454192e-05, "loss": 1.1748, "step": 806 }, { "epoch": 0.2, "learning_rate": 1.8486304126505557e-05, "loss": 1.1831, "step": 807 }, { "epoch": 0.2, "learning_rate": 1.8482009341868696e-05, "loss": 1.1128, "step": 808 }, { "epoch": 0.2, "learning_rate": 1.8477708973458625e-05, "loss": 1.1513, "step": 809 }, { "epoch": 0.2, "learning_rate": 1.8473403024106303e-05, "loss": 1.1406, "step": 810 }, { "epoch": 0.2, "learning_rate": 1.8469091496646386e-05, "loss": 1.167, "step": 811 }, { "epoch": 0.2, "learning_rate": 1.8464774393917174e-05, "loss": 1.1755, "step": 812 }, { "epoch": 0.2, "learning_rate": 1.8460451718760653e-05, "loss": 1.1518, "step": 813 }, { "epoch": 0.2, "learning_rate": 1.845612347402247e-05, "loss": 1.1069, "step": 814 }, { "epoch": 0.2, "learning_rate": 1.8451789662551955e-05, "loss": 1.1433, "step": 815 }, { "epoch": 0.2, "learning_rate": 1.8447450287202075e-05, "loss": 1.1446, "step": 816 }, { "epoch": 0.2, "learning_rate": 1.844310535082948e-05, "loss": 1.1592, "step": 817 }, { "epoch": 0.2, "learning_rate": 1.843875485629448e-05, "loss": 1.1676, "step": 818 }, { "epoch": 0.21, "learning_rate": 1.8434398806461037e-05, "loss": 1.1727, "step": 819 }, { "epoch": 0.21, "learning_rate": 1.843003720419677e-05, "loss": 1.1365, "step": 820 }, { "epoch": 0.21, "learning_rate": 1.842567005237296e-05, "loss": 1.1645, "step": 821 }, { "epoch": 0.21, "learning_rate": 1.8421297353864532e-05, "loss": 1.1295, "step": 822 }, { "epoch": 0.21, "learning_rate": 1.8416919111550074e-05, "loss": 1.1609, "step": 823 }, { "epoch": 0.21, "learning_rate": 1.8412535328311813e-05, "loss": 1.1398, "step": 824 }, { "epoch": 0.21, "learning_rate": 1.840814600703563e-05, "loss": 1.1503, "step": 825 }, { "epoch": 0.21, "learning_rate": 1.8403751150611048e-05, "loss": 1.1478, "step": 826 }, { "epoch": 0.21, "learning_rate": 1.8399350761931237e-05, "loss": 1.104, "step": 827 }, { "epoch": 0.21, "learning_rate": 1.8394944843893006e-05, "loss": 1.1872, "step": 828 }, { "epoch": 0.21, "learning_rate": 1.8390533399396807e-05, "loss": 1.14, "step": 829 }, { "epoch": 0.21, "learning_rate": 1.8386116431346724e-05, "loss": 1.1727, "step": 830 }, { "epoch": 0.21, "learning_rate": 1.838169394265049e-05, "loss": 1.1561, "step": 831 }, { "epoch": 0.21, "learning_rate": 1.8377265936219457e-05, "loss": 1.1313, "step": 832 }, { "epoch": 0.21, "learning_rate": 1.837283241496862e-05, "loss": 1.1384, "step": 833 }, { "epoch": 0.21, "learning_rate": 1.8368393381816597e-05, "loss": 1.1508, "step": 834 }, { "epoch": 0.21, "learning_rate": 1.8363948839685638e-05, "loss": 1.1072, "step": 835 }, { "epoch": 0.21, "learning_rate": 1.8359498791501624e-05, "loss": 1.1263, "step": 836 }, { "epoch": 0.21, "learning_rate": 1.835504324019406e-05, "loss": 1.1232, "step": 837 }, { "epoch": 0.21, "learning_rate": 1.8350582188696058e-05, "loss": 1.1165, "step": 838 }, { "epoch": 0.21, "learning_rate": 1.8346115639944374e-05, "loss": 1.2069, "step": 839 }, { "epoch": 0.21, "learning_rate": 1.834164359687937e-05, "loss": 1.1633, "step": 840 }, { "epoch": 0.21, "learning_rate": 1.833716606244502e-05, "loss": 1.0767, "step": 841 }, { "epoch": 0.21, "learning_rate": 1.833268303958893e-05, "loss": 1.1436, "step": 842 }, { "epoch": 0.21, "learning_rate": 1.8328194531262302e-05, "loss": 1.1678, "step": 843 }, { "epoch": 0.21, "learning_rate": 1.832370054041996e-05, "loss": 1.1549, "step": 844 }, { "epoch": 0.21, "learning_rate": 1.8319201070020333e-05, "loss": 1.1319, "step": 845 }, { "epoch": 0.21, "learning_rate": 1.8314696123025456e-05, "loss": 1.169, "step": 846 }, { "epoch": 0.21, "learning_rate": 1.831018570240097e-05, "loss": 1.1621, "step": 847 }, { "epoch": 0.21, "learning_rate": 1.8305669811116123e-05, "loss": 1.1394, "step": 848 }, { "epoch": 0.21, "learning_rate": 1.830114845214376e-05, "loss": 1.1524, "step": 849 }, { "epoch": 0.21, "learning_rate": 1.8296621628460328e-05, "loss": 1.1724, "step": 850 }, { "epoch": 0.21, "learning_rate": 1.829208934304587e-05, "loss": 1.1326, "step": 851 }, { "epoch": 0.21, "learning_rate": 1.8287551598884024e-05, "loss": 1.1349, "step": 852 }, { "epoch": 0.21, "learning_rate": 1.8283008398962025e-05, "loss": 1.086, "step": 853 }, { "epoch": 0.21, "learning_rate": 1.82784597462707e-05, "loss": 1.1571, "step": 854 }, { "epoch": 0.21, "learning_rate": 1.827390564380445e-05, "loss": 1.1727, "step": 855 }, { "epoch": 0.21, "learning_rate": 1.826934609456129e-05, "loss": 1.1329, "step": 856 }, { "epoch": 0.21, "learning_rate": 1.8264781101542797e-05, "loss": 1.1322, "step": 857 }, { "epoch": 0.21, "learning_rate": 1.826021066775415e-05, "loss": 1.158, "step": 858 }, { "epoch": 0.22, "learning_rate": 1.8255634796204097e-05, "loss": 1.1706, "step": 859 }, { "epoch": 0.22, "learning_rate": 1.825105348990497e-05, "loss": 1.1444, "step": 860 }, { "epoch": 0.22, "learning_rate": 1.824646675187268e-05, "loss": 1.1541, "step": 861 }, { "epoch": 0.22, "learning_rate": 1.824187458512672e-05, "loss": 1.1305, "step": 862 }, { "epoch": 0.22, "learning_rate": 1.823727699269013e-05, "loss": 1.1115, "step": 863 }, { "epoch": 0.22, "learning_rate": 1.8232673977589556e-05, "loss": 1.1357, "step": 864 }, { "epoch": 0.22, "learning_rate": 1.8228065542855196e-05, "loss": 1.131, "step": 865 }, { "epoch": 0.22, "learning_rate": 1.822345169152082e-05, "loss": 1.1547, "step": 866 }, { "epoch": 0.22, "learning_rate": 1.8218832426623763e-05, "loss": 1.1099, "step": 867 }, { "epoch": 0.22, "learning_rate": 1.8214207751204917e-05, "loss": 1.1397, "step": 868 }, { "epoch": 0.22, "learning_rate": 1.8209577668308748e-05, "loss": 1.1244, "step": 869 }, { "epoch": 0.22, "learning_rate": 1.820494218098327e-05, "loss": 1.1298, "step": 870 }, { "epoch": 0.22, "learning_rate": 1.820030129228007e-05, "loss": 1.1717, "step": 871 }, { "epoch": 0.22, "learning_rate": 1.8195655005254274e-05, "loss": 1.1962, "step": 872 }, { "epoch": 0.22, "learning_rate": 1.8191003322964573e-05, "loss": 1.1678, "step": 873 }, { "epoch": 0.22, "learning_rate": 1.81863462484732e-05, "loss": 1.1688, "step": 874 }, { "epoch": 0.22, "learning_rate": 1.8181683784845953e-05, "loss": 1.1329, "step": 875 }, { "epoch": 0.22, "learning_rate": 1.8177015935152156e-05, "loss": 1.1256, "step": 876 }, { "epoch": 0.22, "learning_rate": 1.8172342702464705e-05, "loss": 1.1272, "step": 877 }, { "epoch": 0.22, "learning_rate": 1.8167664089860015e-05, "loss": 1.1353, "step": 878 }, { "epoch": 0.22, "learning_rate": 1.816298010041806e-05, "loss": 1.1409, "step": 879 }, { "epoch": 0.22, "learning_rate": 1.815829073722234e-05, "loss": 1.1793, "step": 880 }, { "epoch": 0.22, "learning_rate": 1.815359600335991e-05, "loss": 1.1803, "step": 881 }, { "epoch": 0.22, "learning_rate": 1.8148895901921334e-05, "loss": 1.0923, "step": 882 }, { "epoch": 0.22, "learning_rate": 1.8144190436000744e-05, "loss": 1.1224, "step": 883 }, { "epoch": 0.22, "learning_rate": 1.8139479608695773e-05, "loss": 1.1064, "step": 884 }, { "epoch": 0.22, "learning_rate": 1.8134763423107597e-05, "loss": 1.1914, "step": 885 }, { "epoch": 0.22, "learning_rate": 1.8130041882340922e-05, "loss": 1.1188, "step": 886 }, { "epoch": 0.22, "learning_rate": 1.8125314989503976e-05, "loss": 1.1089, "step": 887 }, { "epoch": 0.22, "learning_rate": 1.8120582747708503e-05, "loss": 1.153, "step": 888 }, { "epoch": 0.22, "learning_rate": 1.811584516006978e-05, "loss": 1.0919, "step": 889 }, { "epoch": 0.22, "learning_rate": 1.8111102229706593e-05, "loss": 1.1386, "step": 890 }, { "epoch": 0.22, "learning_rate": 1.8106353959741257e-05, "loss": 1.1466, "step": 891 }, { "epoch": 0.22, "learning_rate": 1.8101600353299593e-05, "loss": 1.1245, "step": 892 }, { "epoch": 0.22, "learning_rate": 1.8096841413510936e-05, "loss": 1.1343, "step": 893 }, { "epoch": 0.22, "learning_rate": 1.809207714350813e-05, "loss": 1.1246, "step": 894 }, { "epoch": 0.22, "learning_rate": 1.808730754642754e-05, "loss": 1.1404, "step": 895 }, { "epoch": 0.22, "learning_rate": 1.8082532625409022e-05, "loss": 1.1299, "step": 896 }, { "epoch": 0.22, "learning_rate": 1.8077752383595947e-05, "loss": 1.1755, "step": 897 }, { "epoch": 0.22, "learning_rate": 1.8072966824135185e-05, "loss": 1.1346, "step": 898 }, { "epoch": 0.23, "learning_rate": 1.8068175950177107e-05, "loss": 1.1334, "step": 899 }, { "epoch": 0.23, "learning_rate": 1.8063379764875583e-05, "loss": 1.1628, "step": 900 }, { "epoch": 0.23, "learning_rate": 1.805857827138798e-05, "loss": 1.1322, "step": 901 }, { "epoch": 0.23, "learning_rate": 1.8053771472875158e-05, "loss": 1.1404, "step": 902 }, { "epoch": 0.23, "learning_rate": 1.804895937250147e-05, "loss": 1.1874, "step": 903 }, { "epoch": 0.23, "learning_rate": 1.804414197343476e-05, "loss": 1.1826, "step": 904 }, { "epoch": 0.23, "learning_rate": 1.8039319278846357e-05, "loss": 1.1453, "step": 905 }, { "epoch": 0.23, "learning_rate": 1.803449129191108e-05, "loss": 1.1891, "step": 906 }, { "epoch": 0.23, "learning_rate": 1.8029658015807237e-05, "loss": 1.1165, "step": 907 }, { "epoch": 0.23, "learning_rate": 1.8024819453716603e-05, "loss": 1.1669, "step": 908 }, { "epoch": 0.23, "learning_rate": 1.801997560882444e-05, "loss": 1.1853, "step": 909 }, { "epoch": 0.23, "learning_rate": 1.80151264843195e-05, "loss": 1.1313, "step": 910 }, { "epoch": 0.23, "learning_rate": 1.801027208339399e-05, "loss": 1.1767, "step": 911 }, { "epoch": 0.23, "learning_rate": 1.8005412409243604e-05, "loss": 1.1243, "step": 912 }, { "epoch": 0.23, "learning_rate": 1.8000547465067506e-05, "loss": 1.1417, "step": 913 }, { "epoch": 0.23, "learning_rate": 1.7995677254068317e-05, "loss": 1.1411, "step": 914 }, { "epoch": 0.23, "learning_rate": 1.7990801779452146e-05, "loss": 1.125, "step": 915 }, { "epoch": 0.23, "learning_rate": 1.7985921044428553e-05, "loss": 1.1295, "step": 916 }, { "epoch": 0.23, "learning_rate": 1.798103505221056e-05, "loss": 1.1399, "step": 917 }, { "epoch": 0.23, "learning_rate": 1.797614380601466e-05, "loss": 1.1484, "step": 918 }, { "epoch": 0.23, "learning_rate": 1.797124730906079e-05, "loss": 1.1406, "step": 919 }, { "epoch": 0.23, "learning_rate": 1.796634556457236e-05, "loss": 1.1255, "step": 920 }, { "epoch": 0.23, "learning_rate": 1.7961438575776227e-05, "loss": 1.1479, "step": 921 }, { "epoch": 0.23, "learning_rate": 1.79565263459027e-05, "loss": 1.139, "step": 922 }, { "epoch": 0.23, "learning_rate": 1.7951608878185533e-05, "loss": 1.114, "step": 923 }, { "epoch": 0.23, "learning_rate": 1.7946686175861936e-05, "loss": 1.1533, "step": 924 }, { "epoch": 0.23, "learning_rate": 1.7941758242172564e-05, "loss": 1.1436, "step": 925 }, { "epoch": 0.23, "learning_rate": 1.7936825080361515e-05, "loss": 1.1244, "step": 926 }, { "epoch": 0.23, "learning_rate": 1.7931886693676326e-05, "loss": 1.1739, "step": 927 }, { "epoch": 0.23, "learning_rate": 1.7926943085367975e-05, "loss": 1.1509, "step": 928 }, { "epoch": 0.23, "learning_rate": 1.792199425869088e-05, "loss": 1.1322, "step": 929 }, { "epoch": 0.23, "learning_rate": 1.7917040216902896e-05, "loss": 1.1118, "step": 930 }, { "epoch": 0.23, "learning_rate": 1.79120809632653e-05, "loss": 1.1198, "step": 931 }, { "epoch": 0.23, "learning_rate": 1.790711650104281e-05, "loss": 1.138, "step": 932 }, { "epoch": 0.23, "learning_rate": 1.7902146833503576e-05, "loss": 1.0988, "step": 933 }, { "epoch": 0.23, "learning_rate": 1.789717196391916e-05, "loss": 1.0823, "step": 934 }, { "epoch": 0.23, "learning_rate": 1.7892191895564564e-05, "loss": 1.1489, "step": 935 }, { "epoch": 0.23, "learning_rate": 1.7887206631718202e-05, "loss": 1.167, "step": 936 }, { "epoch": 0.23, "learning_rate": 1.7882216175661918e-05, "loss": 1.1487, "step": 937 }, { "epoch": 0.23, "learning_rate": 1.7877220530680958e-05, "loss": 1.1547, "step": 938 }, { "epoch": 0.24, "learning_rate": 1.787221970006401e-05, "loss": 1.1385, "step": 939 }, { "epoch": 0.24, "learning_rate": 1.7867213687103145e-05, "loss": 1.1223, "step": 940 }, { "epoch": 0.24, "learning_rate": 1.7862202495093867e-05, "loss": 1.1124, "step": 941 }, { "epoch": 0.24, "learning_rate": 1.7857186127335084e-05, "loss": 1.0959, "step": 942 }, { "epoch": 0.24, "learning_rate": 1.7852164587129107e-05, "loss": 1.1119, "step": 943 }, { "epoch": 0.24, "learning_rate": 1.7847137877781655e-05, "loss": 1.126, "step": 944 }, { "epoch": 0.24, "learning_rate": 1.7842106002601854e-05, "loss": 1.0909, "step": 945 }, { "epoch": 0.24, "learning_rate": 1.7837068964902225e-05, "loss": 1.1563, "step": 946 }, { "epoch": 0.24, "learning_rate": 1.783202676799869e-05, "loss": 1.1367, "step": 947 }, { "epoch": 0.24, "learning_rate": 1.7826979415210567e-05, "loss": 1.1718, "step": 948 }, { "epoch": 0.24, "learning_rate": 1.7821926909860567e-05, "loss": 1.154, "step": 949 }, { "epoch": 0.24, "learning_rate": 1.781686925527479e-05, "loss": 1.1703, "step": 950 }, { "epoch": 0.24, "learning_rate": 1.7811806454782734e-05, "loss": 1.1326, "step": 951 }, { "epoch": 0.24, "learning_rate": 1.780673851171728e-05, "loss": 1.155, "step": 952 }, { "epoch": 0.24, "learning_rate": 1.7801665429414696e-05, "loss": 1.165, "step": 953 }, { "epoch": 0.24, "learning_rate": 1.779658721121463e-05, "loss": 1.1676, "step": 954 }, { "epoch": 0.24, "learning_rate": 1.7791503860460112e-05, "loss": 1.147, "step": 955 }, { "epoch": 0.24, "learning_rate": 1.778641538049755e-05, "loss": 1.1424, "step": 956 }, { "epoch": 0.24, "learning_rate": 1.778132177467674e-05, "loss": 1.1565, "step": 957 }, { "epoch": 0.24, "learning_rate": 1.7776223046350833e-05, "loss": 1.1714, "step": 958 }, { "epoch": 0.24, "learning_rate": 1.777111919887636e-05, "loss": 1.1181, "step": 959 }, { "epoch": 0.24, "learning_rate": 1.7766010235613225e-05, "loss": 1.113, "step": 960 }, { "epoch": 0.24, "learning_rate": 1.7760896159924713e-05, "loss": 1.111, "step": 961 }, { "epoch": 0.24, "learning_rate": 1.775577697517744e-05, "loss": 1.1586, "step": 962 }, { "epoch": 0.24, "learning_rate": 1.7750652684741416e-05, "loss": 1.1261, "step": 963 }, { "epoch": 0.24, "learning_rate": 1.7745523291989998e-05, "loss": 1.1162, "step": 964 }, { "epoch": 0.24, "learning_rate": 1.7740388800299908e-05, "loss": 1.1283, "step": 965 }, { "epoch": 0.24, "learning_rate": 1.7735249213051222e-05, "loss": 1.1444, "step": 966 }, { "epoch": 0.24, "learning_rate": 1.773010453362737e-05, "loss": 1.1416, "step": 967 }, { "epoch": 0.24, "learning_rate": 1.7724954765415137e-05, "loss": 1.1055, "step": 968 }, { "epoch": 0.24, "learning_rate": 1.771979991180465e-05, "loss": 1.1437, "step": 969 }, { "epoch": 0.24, "learning_rate": 1.7714639976189393e-05, "loss": 1.1282, "step": 970 }, { "epoch": 0.24, "learning_rate": 1.770947496196619e-05, "loss": 1.1264, "step": 971 }, { "epoch": 0.24, "learning_rate": 1.7704304872535215e-05, "loss": 1.099, "step": 972 }, { "epoch": 0.24, "learning_rate": 1.7699129711299974e-05, "loss": 1.1394, "step": 973 }, { "epoch": 0.24, "learning_rate": 1.769394948166732e-05, "loss": 1.1058, "step": 974 }, { "epoch": 0.24, "learning_rate": 1.7688764187047434e-05, "loss": 1.0963, "step": 975 }, { "epoch": 0.24, "learning_rate": 1.7683573830853836e-05, "loss": 1.1599, "step": 976 }, { "epoch": 0.24, "learning_rate": 1.7678378416503386e-05, "loss": 1.131, "step": 977 }, { "epoch": 0.24, "learning_rate": 1.7673177947416258e-05, "loss": 1.0996, "step": 978 }, { "epoch": 0.25, "learning_rate": 1.7667972427015968e-05, "loss": 1.121, "step": 979 }, { "epoch": 0.25, "learning_rate": 1.766276185872934e-05, "loss": 1.0992, "step": 980 }, { "epoch": 0.25, "learning_rate": 1.7657546245986544e-05, "loss": 1.1414, "step": 981 }, { "epoch": 0.25, "learning_rate": 1.7652325592221054e-05, "loss": 1.1617, "step": 982 }, { "epoch": 0.25, "learning_rate": 1.7647099900869667e-05, "loss": 1.1452, "step": 983 }, { "epoch": 0.25, "learning_rate": 1.7641869175372493e-05, "loss": 1.1699, "step": 984 }, { "epoch": 0.25, "learning_rate": 1.7636633419172967e-05, "loss": 1.1133, "step": 985 }, { "epoch": 0.25, "learning_rate": 1.763139263571782e-05, "loss": 1.1838, "step": 986 }, { "epoch": 0.25, "learning_rate": 1.7626146828457108e-05, "loss": 1.1644, "step": 987 }, { "epoch": 0.25, "learning_rate": 1.762089600084419e-05, "loss": 1.1241, "step": 988 }, { "epoch": 0.25, "learning_rate": 1.7615640156335713e-05, "loss": 1.138, "step": 989 }, { "epoch": 0.25, "learning_rate": 1.761037929839165e-05, "loss": 1.1294, "step": 990 }, { "epoch": 0.25, "learning_rate": 1.7605113430475267e-05, "loss": 1.1117, "step": 991 }, { "epoch": 0.25, "learning_rate": 1.759984255605312e-05, "loss": 1.1412, "step": 992 }, { "epoch": 0.25, "learning_rate": 1.7594566678595075e-05, "loss": 1.1525, "step": 993 }, { "epoch": 0.25, "learning_rate": 1.7589285801574275e-05, "loss": 1.1433, "step": 994 }, { "epoch": 0.25, "learning_rate": 1.7583999928467166e-05, "loss": 1.1497, "step": 995 }, { "epoch": 0.25, "learning_rate": 1.757870906275348e-05, "loss": 1.1443, "step": 996 }, { "epoch": 0.25, "learning_rate": 1.7573413207916233e-05, "loss": 1.1735, "step": 997 }, { "epoch": 0.25, "learning_rate": 1.7568112367441735e-05, "loss": 1.122, "step": 998 }, { "epoch": 0.25, "learning_rate": 1.756280654481956e-05, "loss": 1.1582, "step": 999 }, { "epoch": 0.25, "learning_rate": 1.7557495743542586e-05, "loss": 1.1863, "step": 1000 }, { "epoch": 0.25, "learning_rate": 1.7552179967106942e-05, "loss": 1.1472, "step": 1001 }, { "epoch": 0.25, "learning_rate": 1.754685921901205e-05, "loss": 1.1519, "step": 1002 }, { "epoch": 0.25, "learning_rate": 1.7541533502760605e-05, "loss": 1.1436, "step": 1003 }, { "epoch": 0.25, "learning_rate": 1.7536202821858558e-05, "loss": 1.1381, "step": 1004 }, { "epoch": 0.25, "learning_rate": 1.753086717981515e-05, "loss": 1.1761, "step": 1005 }, { "epoch": 0.25, "learning_rate": 1.7525526580142865e-05, "loss": 1.1145, "step": 1006 }, { "epoch": 0.25, "learning_rate": 1.7520181026357467e-05, "loss": 1.1636, "step": 1007 }, { "epoch": 0.25, "learning_rate": 1.7514830521977978e-05, "loss": 1.1204, "step": 1008 }, { "epoch": 0.25, "learning_rate": 1.750947507052667e-05, "loss": 1.1599, "step": 1009 }, { "epoch": 0.25, "learning_rate": 1.7504114675529086e-05, "loss": 1.1347, "step": 1010 }, { "epoch": 0.25, "learning_rate": 1.749874934051401e-05, "loss": 1.086, "step": 1011 }, { "epoch": 0.25, "learning_rate": 1.749337906901349e-05, "loss": 1.1233, "step": 1012 }, { "epoch": 0.25, "learning_rate": 1.7488003864562813e-05, "loss": 1.1336, "step": 1013 }, { "epoch": 0.25, "learning_rate": 1.7482623730700518e-05, "loss": 1.1234, "step": 1014 }, { "epoch": 0.25, "learning_rate": 1.7477238670968394e-05, "loss": 1.1453, "step": 1015 }, { "epoch": 0.25, "learning_rate": 1.7471848688911465e-05, "loss": 1.1361, "step": 1016 }, { "epoch": 0.25, "learning_rate": 1.7466453788077998e-05, "loss": 1.1533, "step": 1017 }, { "epoch": 0.25, "learning_rate": 1.74610539720195e-05, "loss": 1.1476, "step": 1018 }, { "epoch": 0.26, "learning_rate": 1.7455649244290712e-05, "loss": 1.1528, "step": 1019 }, { "epoch": 0.26, "learning_rate": 1.7450239608449605e-05, "loss": 1.1311, "step": 1020 }, { "epoch": 0.26, "learning_rate": 1.744482506805739e-05, "loss": 1.1341, "step": 1021 }, { "epoch": 0.26, "learning_rate": 1.7439405626678496e-05, "loss": 1.2054, "step": 1022 }, { "epoch": 0.26, "learning_rate": 1.7433981287880587e-05, "loss": 1.1626, "step": 1023 }, { "epoch": 0.26, "learning_rate": 1.7428552055234547e-05, "loss": 1.1471, "step": 1024 }, { "epoch": 0.26, "learning_rate": 1.7423117932314487e-05, "loss": 1.1032, "step": 1025 }, { "epoch": 0.26, "learning_rate": 1.7417678922697727e-05, "loss": 1.1782, "step": 1026 }, { "epoch": 0.26, "learning_rate": 1.7412235029964812e-05, "loss": 1.1492, "step": 1027 }, { "epoch": 0.26, "learning_rate": 1.7406786257699494e-05, "loss": 1.1355, "step": 1028 }, { "epoch": 0.26, "learning_rate": 1.7401332609488754e-05, "loss": 1.1198, "step": 1029 }, { "epoch": 0.26, "learning_rate": 1.739587408892276e-05, "loss": 1.1412, "step": 1030 }, { "epoch": 0.26, "learning_rate": 1.73904106995949e-05, "loss": 1.1614, "step": 1031 }, { "epoch": 0.26, "learning_rate": 1.7384942445101772e-05, "loss": 1.0959, "step": 1032 }, { "epoch": 0.26, "learning_rate": 1.7379469329043166e-05, "loss": 1.1226, "step": 1033 }, { "epoch": 0.26, "learning_rate": 1.737399135502208e-05, "loss": 1.186, "step": 1034 }, { "epoch": 0.26, "learning_rate": 1.736850852664471e-05, "loss": 1.1411, "step": 1035 }, { "epoch": 0.26, "learning_rate": 1.736302084752044e-05, "loss": 1.1116, "step": 1036 }, { "epoch": 0.26, "learning_rate": 1.735752832126185e-05, "loss": 1.1272, "step": 1037 }, { "epoch": 0.26, "learning_rate": 1.7352030951484718e-05, "loss": 1.1264, "step": 1038 }, { "epoch": 0.26, "learning_rate": 1.7346528741808003e-05, "loss": 1.1754, "step": 1039 }, { "epoch": 0.26, "learning_rate": 1.7341021695853856e-05, "loss": 1.1076, "step": 1040 }, { "epoch": 0.26, "learning_rate": 1.7335509817247604e-05, "loss": 1.1113, "step": 1041 }, { "epoch": 0.26, "learning_rate": 1.7329993109617765e-05, "loss": 1.1224, "step": 1042 }, { "epoch": 0.26, "learning_rate": 1.7324471576596026e-05, "loss": 1.1671, "step": 1043 }, { "epoch": 0.26, "learning_rate": 1.7318945221817255e-05, "loss": 1.1629, "step": 1044 }, { "epoch": 0.26, "learning_rate": 1.7313414048919498e-05, "loss": 1.1247, "step": 1045 }, { "epoch": 0.26, "learning_rate": 1.730787806154397e-05, "loss": 1.1817, "step": 1046 }, { "epoch": 0.26, "learning_rate": 1.7302337263335055e-05, "loss": 1.1246, "step": 1047 }, { "epoch": 0.26, "learning_rate": 1.72967916579403e-05, "loss": 1.1262, "step": 1048 }, { "epoch": 0.26, "learning_rate": 1.7291241249010428e-05, "loss": 1.1287, "step": 1049 }, { "epoch": 0.26, "learning_rate": 1.7285686040199304e-05, "loss": 1.1173, "step": 1050 }, { "epoch": 0.26, "learning_rate": 1.7280126035163975e-05, "loss": 1.1252, "step": 1051 }, { "epoch": 0.26, "learning_rate": 1.7274561237564636e-05, "loss": 1.119, "step": 1052 }, { "epoch": 0.26, "learning_rate": 1.7268991651064633e-05, "loss": 1.0993, "step": 1053 }, { "epoch": 0.26, "learning_rate": 1.726341727933047e-05, "loss": 1.1531, "step": 1054 }, { "epoch": 0.26, "learning_rate": 1.7257838126031797e-05, "loss": 1.1802, "step": 1055 }, { "epoch": 0.26, "learning_rate": 1.7252254194841422e-05, "loss": 1.1785, "step": 1056 }, { "epoch": 0.26, "learning_rate": 1.7246665489435284e-05, "loss": 1.1361, "step": 1057 }, { "epoch": 0.26, "learning_rate": 1.7241072013492473e-05, "loss": 1.1232, "step": 1058 }, { "epoch": 0.27, "learning_rate": 1.7235473770695218e-05, "loss": 1.1417, "step": 1059 }, { "epoch": 0.27, "learning_rate": 1.7229870764728887e-05, "loss": 1.1858, "step": 1060 }, { "epoch": 0.27, "learning_rate": 1.7224262999281983e-05, "loss": 1.0942, "step": 1061 }, { "epoch": 0.27, "learning_rate": 1.721865047804614e-05, "loss": 1.1643, "step": 1062 }, { "epoch": 0.27, "learning_rate": 1.7213033204716123e-05, "loss": 1.1327, "step": 1063 }, { "epoch": 0.27, "learning_rate": 1.7207411182989834e-05, "loss": 1.1557, "step": 1064 }, { "epoch": 0.27, "learning_rate": 1.7201784416568288e-05, "loss": 1.1543, "step": 1065 }, { "epoch": 0.27, "learning_rate": 1.719615290915563e-05, "loss": 1.0816, "step": 1066 }, { "epoch": 0.27, "learning_rate": 1.7190516664459127e-05, "loss": 1.0758, "step": 1067 }, { "epoch": 0.27, "learning_rate": 1.7184875686189167e-05, "loss": 1.1263, "step": 1068 }, { "epoch": 0.27, "learning_rate": 1.717922997805924e-05, "loss": 1.1477, "step": 1069 }, { "epoch": 0.27, "learning_rate": 1.717357954378597e-05, "loss": 1.1614, "step": 1070 }, { "epoch": 0.27, "learning_rate": 1.7167924387089074e-05, "loss": 1.1713, "step": 1071 }, { "epoch": 0.27, "learning_rate": 1.7162264511691392e-05, "loss": 1.1647, "step": 1072 }, { "epoch": 0.27, "learning_rate": 1.7156599921318863e-05, "loss": 1.1645, "step": 1073 }, { "epoch": 0.27, "learning_rate": 1.715093061970053e-05, "loss": 1.1214, "step": 1074 }, { "epoch": 0.27, "learning_rate": 1.7145256610568538e-05, "loss": 1.0998, "step": 1075 }, { "epoch": 0.27, "learning_rate": 1.7139577897658133e-05, "loss": 1.1381, "step": 1076 }, { "epoch": 0.27, "learning_rate": 1.7133894484707657e-05, "loss": 1.2032, "step": 1077 }, { "epoch": 0.27, "learning_rate": 1.712820637545854e-05, "loss": 1.1214, "step": 1078 }, { "epoch": 0.27, "learning_rate": 1.7122513573655323e-05, "loss": 1.1263, "step": 1079 }, { "epoch": 0.27, "learning_rate": 1.7116816083045603e-05, "loss": 1.143, "step": 1080 }, { "epoch": 0.27, "learning_rate": 1.7111113907380096e-05, "loss": 1.1422, "step": 1081 }, { "epoch": 0.27, "learning_rate": 1.7105407050412585e-05, "loss": 1.1117, "step": 1082 }, { "epoch": 0.27, "learning_rate": 1.7099695515899936e-05, "loss": 1.1372, "step": 1083 }, { "epoch": 0.27, "learning_rate": 1.70939793076021e-05, "loss": 1.1643, "step": 1084 }, { "epoch": 0.27, "learning_rate": 1.70882584292821e-05, "loss": 1.1207, "step": 1085 }, { "epoch": 0.27, "learning_rate": 1.708253288470604e-05, "loss": 1.1201, "step": 1086 }, { "epoch": 0.27, "learning_rate": 1.7076802677643086e-05, "loss": 1.1053, "step": 1087 }, { "epoch": 0.27, "learning_rate": 1.7071067811865477e-05, "loss": 1.1449, "step": 1088 }, { "epoch": 0.27, "learning_rate": 1.706532829114852e-05, "loss": 1.1269, "step": 1089 }, { "epoch": 0.27, "learning_rate": 1.7059584119270595e-05, "loss": 1.1626, "step": 1090 }, { "epoch": 0.27, "learning_rate": 1.7053835300013125e-05, "loss": 1.1177, "step": 1091 }, { "epoch": 0.27, "learning_rate": 1.7048081837160607e-05, "loss": 1.1416, "step": 1092 }, { "epoch": 0.27, "learning_rate": 1.7042323734500592e-05, "loss": 1.1137, "step": 1093 }, { "epoch": 0.27, "learning_rate": 1.7036560995823683e-05, "loss": 1.0934, "step": 1094 }, { "epoch": 0.27, "learning_rate": 1.7030793624923535e-05, "loss": 1.1215, "step": 1095 }, { "epoch": 0.27, "learning_rate": 1.7025021625596852e-05, "loss": 1.1358, "step": 1096 }, { "epoch": 0.27, "learning_rate": 1.7019245001643394e-05, "loss": 1.1261, "step": 1097 }, { "epoch": 0.27, "learning_rate": 1.701346375686595e-05, "loss": 1.1875, "step": 1098 }, { "epoch": 0.28, "learning_rate": 1.7007677895070358e-05, "loss": 1.1367, "step": 1099 }, { "epoch": 0.28, "learning_rate": 1.70018874200655e-05, "loss": 1.1128, "step": 1100 }, { "epoch": 0.28, "learning_rate": 1.6996092335663293e-05, "loss": 1.1103, "step": 1101 }, { "epoch": 0.28, "learning_rate": 1.699029264567868e-05, "loss": 1.1446, "step": 1102 }, { "epoch": 0.28, "learning_rate": 1.6984488353929648e-05, "loss": 1.1194, "step": 1103 }, { "epoch": 0.28, "learning_rate": 1.697867946423721e-05, "loss": 1.1766, "step": 1104 }, { "epoch": 0.28, "learning_rate": 1.6972865980425392e-05, "loss": 1.1235, "step": 1105 }, { "epoch": 0.28, "learning_rate": 1.6967047906321266e-05, "loss": 1.1354, "step": 1106 }, { "epoch": 0.28, "learning_rate": 1.6961225245754915e-05, "loss": 1.1646, "step": 1107 }, { "epoch": 0.28, "learning_rate": 1.6955398002559437e-05, "loss": 1.1131, "step": 1108 }, { "epoch": 0.28, "learning_rate": 1.6949566180570956e-05, "loss": 1.132, "step": 1109 }, { "epoch": 0.28, "learning_rate": 1.694372978362861e-05, "loss": 1.11, "step": 1110 }, { "epoch": 0.28, "learning_rate": 1.6937888815574536e-05, "loss": 1.117, "step": 1111 }, { "epoch": 0.28, "learning_rate": 1.6932043280253892e-05, "loss": 1.1551, "step": 1112 }, { "epoch": 0.28, "learning_rate": 1.6926193181514842e-05, "loss": 1.104, "step": 1113 }, { "epoch": 0.28, "learning_rate": 1.6920338523208547e-05, "loss": 1.1208, "step": 1114 }, { "epoch": 0.28, "learning_rate": 1.6914479309189185e-05, "loss": 1.0927, "step": 1115 }, { "epoch": 0.28, "learning_rate": 1.6908615543313912e-05, "loss": 1.1481, "step": 1116 }, { "epoch": 0.28, "learning_rate": 1.6902747229442894e-05, "loss": 1.1649, "step": 1117 }, { "epoch": 0.28, "learning_rate": 1.689687437143929e-05, "loss": 1.1532, "step": 1118 }, { "epoch": 0.28, "learning_rate": 1.6890996973169245e-05, "loss": 1.1244, "step": 1119 }, { "epoch": 0.28, "learning_rate": 1.6885115038501903e-05, "loss": 1.1632, "step": 1120 }, { "epoch": 0.28, "learning_rate": 1.6879228571309377e-05, "loss": 1.0795, "step": 1121 }, { "epoch": 0.28, "learning_rate": 1.6873337575466782e-05, "loss": 1.0968, "step": 1122 }, { "epoch": 0.28, "learning_rate": 1.686744205485221e-05, "loss": 1.1314, "step": 1123 }, { "epoch": 0.28, "learning_rate": 1.6861542013346718e-05, "loss": 1.0632, "step": 1124 }, { "epoch": 0.28, "learning_rate": 1.6855637454834358e-05, "loss": 1.143, "step": 1125 }, { "epoch": 0.28, "learning_rate": 1.6849728383202143e-05, "loss": 1.1068, "step": 1126 }, { "epoch": 0.28, "learning_rate": 1.684381480234006e-05, "loss": 1.145, "step": 1127 }, { "epoch": 0.28, "learning_rate": 1.683789671614107e-05, "loss": 1.1138, "step": 1128 }, { "epoch": 0.28, "learning_rate": 1.6831974128501095e-05, "loss": 1.1711, "step": 1129 }, { "epoch": 0.28, "learning_rate": 1.6826047043319023e-05, "loss": 1.1329, "step": 1130 }, { "epoch": 0.28, "learning_rate": 1.6820115464496693e-05, "loss": 1.1067, "step": 1131 }, { "epoch": 0.28, "learning_rate": 1.6814179395938915e-05, "loss": 1.1, "step": 1132 }, { "epoch": 0.28, "learning_rate": 1.6808238841553444e-05, "loss": 1.1583, "step": 1133 }, { "epoch": 0.28, "learning_rate": 1.6802293805251e-05, "loss": 1.1019, "step": 1134 }, { "epoch": 0.28, "learning_rate": 1.6796344290945247e-05, "loss": 1.1306, "step": 1135 }, { "epoch": 0.28, "learning_rate": 1.6790390302552793e-05, "loss": 1.127, "step": 1136 }, { "epoch": 0.28, "learning_rate": 1.67844318439932e-05, "loss": 1.1737, "step": 1137 }, { "epoch": 0.29, "learning_rate": 1.677846891918896e-05, "loss": 1.1767, "step": 1138 }, { "epoch": 0.29, "learning_rate": 1.6772501532065523e-05, "loss": 1.1184, "step": 1139 }, { "epoch": 0.29, "learning_rate": 1.6766529686551258e-05, "loss": 1.1092, "step": 1140 }, { "epoch": 0.29, "learning_rate": 1.676055338657748e-05, "loss": 1.0594, "step": 1141 }, { "epoch": 0.29, "learning_rate": 1.675457263607844e-05, "loss": 1.1211, "step": 1142 }, { "epoch": 0.29, "learning_rate": 1.6748587438991303e-05, "loss": 1.0877, "step": 1143 }, { "epoch": 0.29, "learning_rate": 1.6742597799256182e-05, "loss": 1.1253, "step": 1144 }, { "epoch": 0.29, "learning_rate": 1.6736603720816097e-05, "loss": 1.1335, "step": 1145 }, { "epoch": 0.29, "learning_rate": 1.6730605207616997e-05, "loss": 1.107, "step": 1146 }, { "epoch": 0.29, "learning_rate": 1.672460226360775e-05, "loss": 1.1259, "step": 1147 }, { "epoch": 0.29, "learning_rate": 1.6718594892740146e-05, "loss": 1.1348, "step": 1148 }, { "epoch": 0.29, "learning_rate": 1.6712583098968878e-05, "loss": 1.1023, "step": 1149 }, { "epoch": 0.29, "learning_rate": 1.670656688625156e-05, "loss": 1.149, "step": 1150 }, { "epoch": 0.29, "learning_rate": 1.6700546258548713e-05, "loss": 1.1392, "step": 1151 }, { "epoch": 0.29, "learning_rate": 1.6694521219823757e-05, "loss": 1.1395, "step": 1152 }, { "epoch": 0.29, "learning_rate": 1.668849177404303e-05, "loss": 1.1101, "step": 1153 }, { "epoch": 0.29, "learning_rate": 1.6682457925175762e-05, "loss": 1.1234, "step": 1154 }, { "epoch": 0.29, "learning_rate": 1.6676419677194083e-05, "loss": 1.1395, "step": 1155 }, { "epoch": 0.29, "learning_rate": 1.6670377034073014e-05, "loss": 1.1485, "step": 1156 }, { "epoch": 0.29, "learning_rate": 1.666432999979048e-05, "loss": 1.1556, "step": 1157 }, { "epoch": 0.29, "learning_rate": 1.6658278578327292e-05, "loss": 1.1944, "step": 1158 }, { "epoch": 0.29, "learning_rate": 1.665222277366714e-05, "loss": 1.1172, "step": 1159 }, { "epoch": 0.29, "learning_rate": 1.6646162589796616e-05, "loss": 1.1144, "step": 1160 }, { "epoch": 0.29, "learning_rate": 1.6640098030705183e-05, "loss": 1.1531, "step": 1161 }, { "epoch": 0.29, "learning_rate": 1.6634029100385184e-05, "loss": 1.0727, "step": 1162 }, { "epoch": 0.29, "learning_rate": 1.6627955802831852e-05, "loss": 1.1296, "step": 1163 }, { "epoch": 0.29, "learning_rate": 1.662187814204328e-05, "loss": 1.1593, "step": 1164 }, { "epoch": 0.29, "learning_rate": 1.6615796122020443e-05, "loss": 1.1647, "step": 1165 }, { "epoch": 0.29, "learning_rate": 1.660970974676718e-05, "loss": 1.1415, "step": 1166 }, { "epoch": 0.29, "learning_rate": 1.66036190202902e-05, "loss": 1.1198, "step": 1167 }, { "epoch": 0.29, "learning_rate": 1.6597523946599076e-05, "loss": 1.1493, "step": 1168 }, { "epoch": 0.29, "learning_rate": 1.6591424529706244e-05, "loss": 1.1213, "step": 1169 }, { "epoch": 0.29, "learning_rate": 1.6585320773626995e-05, "loss": 1.0698, "step": 1170 }, { "epoch": 0.29, "learning_rate": 1.6579212682379485e-05, "loss": 1.1023, "step": 1171 }, { "epoch": 0.29, "learning_rate": 1.6573100259984714e-05, "loss": 1.1451, "step": 1172 }, { "epoch": 0.29, "learning_rate": 1.656698351046654e-05, "loss": 1.0935, "step": 1173 }, { "epoch": 0.29, "learning_rate": 1.656086243785166e-05, "loss": 1.1129, "step": 1174 }, { "epoch": 0.29, "learning_rate": 1.655473704616964e-05, "loss": 1.1414, "step": 1175 }, { "epoch": 0.29, "learning_rate": 1.6548607339452853e-05, "loss": 1.1742, "step": 1176 }, { "epoch": 0.29, "learning_rate": 1.6542473321736547e-05, "loss": 1.1241, "step": 1177 }, { "epoch": 0.3, "learning_rate": 1.653633499705879e-05, "loss": 1.2057, "step": 1178 }, { "epoch": 0.3, "learning_rate": 1.6530192369460486e-05, "loss": 1.1307, "step": 1179 }, { "epoch": 0.3, "learning_rate": 1.6524045442985383e-05, "loss": 1.1333, "step": 1180 }, { "epoch": 0.3, "learning_rate": 1.6517894221680042e-05, "loss": 1.1012, "step": 1181 }, { "epoch": 0.3, "learning_rate": 1.6511738709593866e-05, "loss": 1.1441, "step": 1182 }, { "epoch": 0.3, "learning_rate": 1.6505578910779075e-05, "loss": 1.1463, "step": 1183 }, { "epoch": 0.3, "learning_rate": 1.6499414829290712e-05, "loss": 1.0949, "step": 1184 }, { "epoch": 0.3, "learning_rate": 1.649324646918664e-05, "loss": 1.1617, "step": 1185 }, { "epoch": 0.3, "learning_rate": 1.6487073834527542e-05, "loss": 1.1208, "step": 1186 }, { "epoch": 0.3, "learning_rate": 1.6480896929376905e-05, "loss": 1.1493, "step": 1187 }, { "epoch": 0.3, "learning_rate": 1.6474715757801043e-05, "loss": 1.1609, "step": 1188 }, { "epoch": 0.3, "learning_rate": 1.6468530323869063e-05, "loss": 1.1271, "step": 1189 }, { "epoch": 0.3, "learning_rate": 1.646234063165289e-05, "loss": 1.1529, "step": 1190 }, { "epoch": 0.3, "learning_rate": 1.645614668522724e-05, "loss": 1.1111, "step": 1191 }, { "epoch": 0.3, "learning_rate": 1.644994848866964e-05, "loss": 1.1811, "step": 1192 }, { "epoch": 0.3, "learning_rate": 1.6443746046060414e-05, "loss": 1.1319, "step": 1193 }, { "epoch": 0.3, "learning_rate": 1.6437539361482674e-05, "loss": 1.1424, "step": 1194 }, { "epoch": 0.3, "learning_rate": 1.6431328439022333e-05, "loss": 1.1334, "step": 1195 }, { "epoch": 0.3, "learning_rate": 1.6425113282768087e-05, "loss": 1.108, "step": 1196 }, { "epoch": 0.3, "learning_rate": 1.6418893896811424e-05, "loss": 1.1661, "step": 1197 }, { "epoch": 0.3, "learning_rate": 1.641267028524661e-05, "loss": 1.136, "step": 1198 }, { "epoch": 0.3, "learning_rate": 1.6406442452170705e-05, "loss": 1.1282, "step": 1199 }, { "epoch": 0.3, "learning_rate": 1.6400210401683535e-05, "loss": 1.1665, "step": 1200 }, { "epoch": 0.3, "learning_rate": 1.639397413788771e-05, "loss": 1.1055, "step": 1201 }, { "epoch": 0.3, "learning_rate": 1.6387733664888608e-05, "loss": 1.1416, "step": 1202 }, { "epoch": 0.3, "learning_rate": 1.6381488986794386e-05, "loss": 1.1278, "step": 1203 }, { "epoch": 0.3, "learning_rate": 1.637524010771596e-05, "loss": 1.1447, "step": 1204 }, { "epoch": 0.3, "learning_rate": 1.636898703176702e-05, "loss": 1.1605, "step": 1205 }, { "epoch": 0.3, "learning_rate": 1.636272976306401e-05, "loss": 1.1597, "step": 1206 }, { "epoch": 0.3, "learning_rate": 1.635646830572614e-05, "loss": 1.1434, "step": 1207 }, { "epoch": 0.3, "learning_rate": 1.6350202663875385e-05, "loss": 1.1642, "step": 1208 }, { "epoch": 0.3, "learning_rate": 1.6343932841636455e-05, "loss": 1.1247, "step": 1209 }, { "epoch": 0.3, "learning_rate": 1.6337658843136832e-05, "loss": 1.1655, "step": 1210 }, { "epoch": 0.3, "learning_rate": 1.6331380672506728e-05, "loss": 1.121, "step": 1211 }, { "epoch": 0.3, "learning_rate": 1.6325098333879122e-05, "loss": 1.1101, "step": 1212 }, { "epoch": 0.3, "learning_rate": 1.631881183138972e-05, "loss": 1.143, "step": 1213 }, { "epoch": 0.3, "learning_rate": 1.6312521169176983e-05, "loss": 1.1106, "step": 1214 }, { "epoch": 0.3, "learning_rate": 1.6306226351382095e-05, "loss": 1.1109, "step": 1215 }, { "epoch": 0.3, "learning_rate": 1.6299927382148987e-05, "loss": 1.0813, "step": 1216 }, { "epoch": 0.3, "learning_rate": 1.6293624265624323e-05, "loss": 1.116, "step": 1217 }, { "epoch": 0.31, "learning_rate": 1.6287317005957493e-05, "loss": 1.1443, "step": 1218 }, { "epoch": 0.31, "learning_rate": 1.628100560730061e-05, "loss": 1.1331, "step": 1219 }, { "epoch": 0.31, "learning_rate": 1.627469007380852e-05, "loss": 1.1308, "step": 1220 }, { "epoch": 0.31, "learning_rate": 1.626837040963879e-05, "loss": 1.126, "step": 1221 }, { "epoch": 0.31, "learning_rate": 1.6262046618951708e-05, "loss": 1.1035, "step": 1222 }, { "epoch": 0.31, "learning_rate": 1.625571870591027e-05, "loss": 1.1019, "step": 1223 }, { "epoch": 0.31, "learning_rate": 1.6249386674680186e-05, "loss": 1.1472, "step": 1224 }, { "epoch": 0.31, "learning_rate": 1.624305052942989e-05, "loss": 1.0956, "step": 1225 }, { "epoch": 0.31, "learning_rate": 1.6236710274330513e-05, "loss": 1.118, "step": 1226 }, { "epoch": 0.31, "learning_rate": 1.6230365913555893e-05, "loss": 1.1942, "step": 1227 }, { "epoch": 0.31, "learning_rate": 1.622401745128258e-05, "loss": 1.1508, "step": 1228 }, { "epoch": 0.31, "learning_rate": 1.6217664891689805e-05, "loss": 1.0971, "step": 1229 }, { "epoch": 0.31, "learning_rate": 1.6211308238959518e-05, "loss": 1.0881, "step": 1230 }, { "epoch": 0.31, "learning_rate": 1.6204947497276346e-05, "loss": 1.1408, "step": 1231 }, { "epoch": 0.31, "learning_rate": 1.619858267082762e-05, "loss": 1.1346, "step": 1232 }, { "epoch": 0.31, "learning_rate": 1.619221376380335e-05, "loss": 1.1291, "step": 1233 }, { "epoch": 0.31, "learning_rate": 1.618584078039624e-05, "loss": 1.1112, "step": 1234 }, { "epoch": 0.31, "learning_rate": 1.6179463724801677e-05, "loss": 1.0951, "step": 1235 }, { "epoch": 0.31, "learning_rate": 1.6173082601217728e-05, "loss": 1.1218, "step": 1236 }, { "epoch": 0.31, "learning_rate": 1.616669741384513e-05, "loss": 1.1585, "step": 1237 }, { "epoch": 0.31, "learning_rate": 1.6160308166887304e-05, "loss": 1.1614, "step": 1238 }, { "epoch": 0.31, "learning_rate": 1.6153914864550344e-05, "loss": 1.1357, "step": 1239 }, { "epoch": 0.31, "learning_rate": 1.614751751104301e-05, "loss": 1.1117, "step": 1240 }, { "epoch": 0.31, "learning_rate": 1.614111611057673e-05, "loss": 1.1619, "step": 1241 }, { "epoch": 0.31, "learning_rate": 1.6134710667365598e-05, "loss": 1.0625, "step": 1242 }, { "epoch": 0.31, "learning_rate": 1.612830118562636e-05, "loss": 1.1299, "step": 1243 }, { "epoch": 0.31, "learning_rate": 1.6121887669578437e-05, "loss": 1.1225, "step": 1244 }, { "epoch": 0.31, "learning_rate": 1.6115470123443895e-05, "loss": 1.1517, "step": 1245 }, { "epoch": 0.31, "learning_rate": 1.610904855144745e-05, "loss": 1.1088, "step": 1246 }, { "epoch": 0.31, "learning_rate": 1.610262295781648e-05, "loss": 1.0836, "step": 1247 }, { "epoch": 0.31, "learning_rate": 1.6096193346780995e-05, "loss": 1.1343, "step": 1248 }, { "epoch": 0.31, "learning_rate": 1.6089759722573664e-05, "loss": 1.1615, "step": 1249 }, { "epoch": 0.31, "learning_rate": 1.6083322089429795e-05, "loss": 1.1256, "step": 1250 }, { "epoch": 0.31, "learning_rate": 1.6076880451587326e-05, "loss": 1.0942, "step": 1251 }, { "epoch": 0.31, "learning_rate": 1.607043481328684e-05, "loss": 1.1319, "step": 1252 }, { "epoch": 0.31, "learning_rate": 1.6063985178771555e-05, "loss": 1.0934, "step": 1253 }, { "epoch": 0.31, "learning_rate": 1.605753155228731e-05, "loss": 1.1535, "step": 1254 }, { "epoch": 0.31, "learning_rate": 1.605107393808258e-05, "loss": 1.1065, "step": 1255 }, { "epoch": 0.31, "learning_rate": 1.6044612340408466e-05, "loss": 1.1214, "step": 1256 }, { "epoch": 0.31, "learning_rate": 1.6038146763518684e-05, "loss": 1.102, "step": 1257 }, { "epoch": 0.32, "learning_rate": 1.6031677211669574e-05, "loss": 1.1345, "step": 1258 }, { "epoch": 0.32, "learning_rate": 1.6025203689120095e-05, "loss": 1.1212, "step": 1259 }, { "epoch": 0.32, "learning_rate": 1.6018726200131823e-05, "loss": 1.1004, "step": 1260 }, { "epoch": 0.32, "learning_rate": 1.6012244748968927e-05, "loss": 1.112, "step": 1261 }, { "epoch": 0.32, "learning_rate": 1.6005759339898207e-05, "loss": 1.1454, "step": 1262 }, { "epoch": 0.32, "learning_rate": 1.5999269977189056e-05, "loss": 1.1175, "step": 1263 }, { "epoch": 0.32, "learning_rate": 1.599277666511347e-05, "loss": 1.1294, "step": 1264 }, { "epoch": 0.32, "learning_rate": 1.5986279407946052e-05, "loss": 1.1103, "step": 1265 }, { "epoch": 0.32, "learning_rate": 1.5979778209963993e-05, "loss": 1.113, "step": 1266 }, { "epoch": 0.32, "learning_rate": 1.5973273075447082e-05, "loss": 1.1146, "step": 1267 }, { "epoch": 0.32, "learning_rate": 1.5966764008677703e-05, "loss": 1.1593, "step": 1268 }, { "epoch": 0.32, "learning_rate": 1.5960251013940827e-05, "loss": 1.097, "step": 1269 }, { "epoch": 0.32, "learning_rate": 1.5953734095524004e-05, "loss": 1.0939, "step": 1270 }, { "epoch": 0.32, "learning_rate": 1.5947213257717375e-05, "loss": 1.1164, "step": 1271 }, { "epoch": 0.32, "learning_rate": 1.5940688504813664e-05, "loss": 1.1446, "step": 1272 }, { "epoch": 0.32, "learning_rate": 1.593415984110816e-05, "loss": 1.1387, "step": 1273 }, { "epoch": 0.32, "learning_rate": 1.5927627270898735e-05, "loss": 1.1282, "step": 1274 }, { "epoch": 0.32, "learning_rate": 1.592109079848583e-05, "loss": 1.0922, "step": 1275 }, { "epoch": 0.32, "learning_rate": 1.5914550428172463e-05, "loss": 1.1245, "step": 1276 }, { "epoch": 0.32, "learning_rate": 1.59080061642642e-05, "loss": 1.1267, "step": 1277 }, { "epoch": 0.32, "learning_rate": 1.5901458011069188e-05, "loss": 1.1312, "step": 1278 }, { "epoch": 0.32, "learning_rate": 1.589490597289813e-05, "loss": 1.1149, "step": 1279 }, { "epoch": 0.32, "learning_rate": 1.5888350054064275e-05, "loss": 1.0881, "step": 1280 }, { "epoch": 0.32, "learning_rate": 1.5881790258883447e-05, "loss": 1.1396, "step": 1281 }, { "epoch": 0.32, "learning_rate": 1.5875226591674002e-05, "loss": 1.1236, "step": 1282 }, { "epoch": 0.32, "learning_rate": 1.586865905675686e-05, "loss": 1.1808, "step": 1283 }, { "epoch": 0.32, "learning_rate": 1.586208765845547e-05, "loss": 1.1977, "step": 1284 }, { "epoch": 0.32, "learning_rate": 1.585551240109585e-05, "loss": 1.1249, "step": 1285 }, { "epoch": 0.32, "learning_rate": 1.584893328900653e-05, "loss": 1.0769, "step": 1286 }, { "epoch": 0.32, "learning_rate": 1.58423503265186e-05, "loss": 1.131, "step": 1287 }, { "epoch": 0.32, "learning_rate": 1.5835763517965676e-05, "loss": 1.1409, "step": 1288 }, { "epoch": 0.32, "learning_rate": 1.5829172867683896e-05, "loss": 1.1164, "step": 1289 }, { "epoch": 0.32, "learning_rate": 1.5822578380011946e-05, "loss": 1.1184, "step": 1290 }, { "epoch": 0.32, "learning_rate": 1.581598005929102e-05, "loss": 1.1298, "step": 1291 }, { "epoch": 0.32, "learning_rate": 1.5809377909864857e-05, "loss": 1.1282, "step": 1292 }, { "epoch": 0.32, "learning_rate": 1.5802771936079696e-05, "loss": 1.0926, "step": 1293 }, { "epoch": 0.32, "learning_rate": 1.57961621422843e-05, "loss": 1.122, "step": 1294 }, { "epoch": 0.32, "learning_rate": 1.5789548532829948e-05, "loss": 1.1326, "step": 1295 }, { "epoch": 0.32, "learning_rate": 1.5782931112070437e-05, "loss": 1.0927, "step": 1296 }, { "epoch": 0.32, "learning_rate": 1.577630988436206e-05, "loss": 1.1377, "step": 1297 }, { "epoch": 0.33, "learning_rate": 1.5769684854063624e-05, "loss": 1.1543, "step": 1298 }, { "epoch": 0.33, "learning_rate": 1.5763056025536445e-05, "loss": 1.1317, "step": 1299 }, { "epoch": 0.33, "learning_rate": 1.5756423403144325e-05, "loss": 1.1001, "step": 1300 }, { "epoch": 0.33, "learning_rate": 1.5749786991253576e-05, "loss": 1.1092, "step": 1301 }, { "epoch": 0.33, "learning_rate": 1.5743146794233e-05, "loss": 1.1597, "step": 1302 }, { "epoch": 0.33, "learning_rate": 1.5736502816453885e-05, "loss": 1.1137, "step": 1303 }, { "epoch": 0.33, "learning_rate": 1.5729855062290024e-05, "loss": 1.1405, "step": 1304 }, { "epoch": 0.33, "learning_rate": 1.5723203536117676e-05, "loss": 1.067, "step": 1305 }, { "epoch": 0.33, "learning_rate": 1.5716548242315593e-05, "loss": 1.1014, "step": 1306 }, { "epoch": 0.33, "learning_rate": 1.570988918526501e-05, "loss": 1.1412, "step": 1307 }, { "epoch": 0.33, "learning_rate": 1.5703226369349642e-05, "loss": 1.1036, "step": 1308 }, { "epoch": 0.33, "learning_rate": 1.5696559798955665e-05, "loss": 1.1235, "step": 1309 }, { "epoch": 0.33, "learning_rate": 1.5689889478471738e-05, "loss": 1.1813, "step": 1310 }, { "epoch": 0.33, "learning_rate": 1.5683215412288984e-05, "loss": 1.094, "step": 1311 }, { "epoch": 0.33, "learning_rate": 1.5676537604800992e-05, "loss": 1.1338, "step": 1312 }, { "epoch": 0.33, "learning_rate": 1.5669856060403814e-05, "loss": 1.1428, "step": 1313 }, { "epoch": 0.33, "learning_rate": 1.566317078349597e-05, "loss": 1.0787, "step": 1314 }, { "epoch": 0.33, "learning_rate": 1.5656481778478427e-05, "loss": 1.1561, "step": 1315 }, { "epoch": 0.33, "learning_rate": 1.5649789049754604e-05, "loss": 1.1185, "step": 1316 }, { "epoch": 0.33, "learning_rate": 1.564309260173038e-05, "loss": 1.1055, "step": 1317 }, { "epoch": 0.33, "learning_rate": 1.563639243881408e-05, "loss": 1.1612, "step": 1318 }, { "epoch": 0.33, "learning_rate": 1.562968856541648e-05, "loss": 1.1171, "step": 1319 }, { "epoch": 0.33, "learning_rate": 1.562298098595078e-05, "loss": 1.1189, "step": 1320 }, { "epoch": 0.33, "learning_rate": 1.561626970483264e-05, "loss": 1.1098, "step": 1321 }, { "epoch": 0.33, "learning_rate": 1.5609554726480146e-05, "loss": 1.1306, "step": 1322 }, { "epoch": 0.33, "learning_rate": 1.5602836055313827e-05, "loss": 1.1244, "step": 1323 }, { "epoch": 0.33, "learning_rate": 1.5596113695756624e-05, "loss": 1.1299, "step": 1324 }, { "epoch": 0.33, "learning_rate": 1.5589387652233934e-05, "loss": 1.1365, "step": 1325 }, { "epoch": 0.33, "learning_rate": 1.5582657929173555e-05, "loss": 1.1176, "step": 1326 }, { "epoch": 0.33, "learning_rate": 1.5575924531005714e-05, "loss": 1.1011, "step": 1327 }, { "epoch": 0.33, "learning_rate": 1.5569187462163068e-05, "loss": 1.1127, "step": 1328 }, { "epoch": 0.33, "learning_rate": 1.556244672708068e-05, "loss": 1.1092, "step": 1329 }, { "epoch": 0.33, "learning_rate": 1.5555702330196024e-05, "loss": 1.1406, "step": 1330 }, { "epoch": 0.33, "learning_rate": 1.5548954275948993e-05, "loss": 1.0758, "step": 1331 }, { "epoch": 0.33, "learning_rate": 1.5542202568781886e-05, "loss": 1.0772, "step": 1332 }, { "epoch": 0.33, "learning_rate": 1.5535447213139397e-05, "loss": 1.13, "step": 1333 }, { "epoch": 0.33, "learning_rate": 1.552868821346864e-05, "loss": 1.1117, "step": 1334 }, { "epoch": 0.33, "learning_rate": 1.5521925574219108e-05, "loss": 1.094, "step": 1335 }, { "epoch": 0.33, "learning_rate": 1.551515929984271e-05, "loss": 1.1297, "step": 1336 }, { "epoch": 0.33, "learning_rate": 1.5508389394793724e-05, "loss": 1.1718, "step": 1337 }, { "epoch": 0.34, "learning_rate": 1.550161586352884e-05, "loss": 1.1071, "step": 1338 }, { "epoch": 0.34, "learning_rate": 1.549483871050713e-05, "loss": 1.1085, "step": 1339 }, { "epoch": 0.34, "learning_rate": 1.5488057940190038e-05, "loss": 1.0794, "step": 1340 }, { "epoch": 0.34, "learning_rate": 1.5481273557041402e-05, "loss": 1.1009, "step": 1341 }, { "epoch": 0.34, "learning_rate": 1.5474485565527437e-05, "loss": 1.1226, "step": 1342 }, { "epoch": 0.34, "learning_rate": 1.546769397011673e-05, "loss": 1.1487, "step": 1343 }, { "epoch": 0.34, "learning_rate": 1.5460898775280235e-05, "loss": 1.0995, "step": 1344 }, { "epoch": 0.34, "learning_rate": 1.5454099985491293e-05, "loss": 1.1451, "step": 1345 }, { "epoch": 0.34, "learning_rate": 1.5447297605225595e-05, "loss": 1.1529, "step": 1346 }, { "epoch": 0.34, "learning_rate": 1.54404916389612e-05, "loss": 1.1696, "step": 1347 }, { "epoch": 0.34, "learning_rate": 1.5433682091178525e-05, "loss": 1.115, "step": 1348 }, { "epoch": 0.34, "learning_rate": 1.5426868966360362e-05, "loss": 1.1378, "step": 1349 }, { "epoch": 0.34, "learning_rate": 1.5420052268991833e-05, "loss": 1.1197, "step": 1350 }, { "epoch": 0.34, "learning_rate": 1.5413232003560426e-05, "loss": 1.105, "step": 1351 }, { "epoch": 0.34, "learning_rate": 1.5406408174555978e-05, "loss": 1.0633, "step": 1352 }, { "epoch": 0.34, "learning_rate": 1.5399580786470663e-05, "loss": 1.1326, "step": 1353 }, { "epoch": 0.34, "learning_rate": 1.5392749843799013e-05, "loss": 1.1363, "step": 1354 }, { "epoch": 0.34, "learning_rate": 1.5385915351037883e-05, "loss": 1.1372, "step": 1355 }, { "epoch": 0.34, "learning_rate": 1.537907731268648e-05, "loss": 1.1099, "step": 1356 }, { "epoch": 0.34, "learning_rate": 1.5372235733246326e-05, "loss": 1.1366, "step": 1357 }, { "epoch": 0.34, "learning_rate": 1.53653906172213e-05, "loss": 1.1001, "step": 1358 }, { "epoch": 0.34, "learning_rate": 1.5358541969117592e-05, "loss": 1.1181, "step": 1359 }, { "epoch": 0.34, "learning_rate": 1.5351689793443714e-05, "loss": 1.0939, "step": 1360 }, { "epoch": 0.34, "learning_rate": 1.534483409471052e-05, "loss": 1.1, "step": 1361 }, { "epoch": 0.34, "learning_rate": 1.5337974877431157e-05, "loss": 1.0828, "step": 1362 }, { "epoch": 0.34, "learning_rate": 1.5331112146121104e-05, "loss": 1.1391, "step": 1363 }, { "epoch": 0.34, "learning_rate": 1.532424590529816e-05, "loss": 1.1054, "step": 1364 }, { "epoch": 0.34, "learning_rate": 1.5317376159482417e-05, "loss": 1.1536, "step": 1365 }, { "epoch": 0.34, "learning_rate": 1.5310502913196277e-05, "loss": 1.1484, "step": 1366 }, { "epoch": 0.34, "learning_rate": 1.5303626170964467e-05, "loss": 1.0929, "step": 1367 }, { "epoch": 0.34, "learning_rate": 1.529674593731399e-05, "loss": 1.0853, "step": 1368 }, { "epoch": 0.34, "learning_rate": 1.528986221677416e-05, "loss": 1.1484, "step": 1369 }, { "epoch": 0.34, "learning_rate": 1.528297501387658e-05, "loss": 1.1284, "step": 1370 }, { "epoch": 0.34, "learning_rate": 1.5276084333155155e-05, "loss": 1.1221, "step": 1371 }, { "epoch": 0.34, "learning_rate": 1.5269190179146075e-05, "loss": 1.1093, "step": 1372 }, { "epoch": 0.34, "learning_rate": 1.5262292556387814e-05, "loss": 1.1454, "step": 1373 }, { "epoch": 0.34, "learning_rate": 1.525539146942113e-05, "loss": 1.0965, "step": 1374 }, { "epoch": 0.34, "learning_rate": 1.5248486922789063e-05, "loss": 1.094, "step": 1375 }, { "epoch": 0.34, "learning_rate": 1.524157892103693e-05, "loss": 1.1567, "step": 1376 }, { "epoch": 0.34, "learning_rate": 1.5234667468712324e-05, "loss": 1.1082, "step": 1377 }, { "epoch": 0.35, "learning_rate": 1.5227752570365106e-05, "loss": 1.1073, "step": 1378 }, { "epoch": 0.35, "learning_rate": 1.522083423054741e-05, "loss": 1.1373, "step": 1379 }, { "epoch": 0.35, "learning_rate": 1.5213912453813632e-05, "loss": 1.0887, "step": 1380 }, { "epoch": 0.35, "learning_rate": 1.5206987244720432e-05, "loss": 1.172, "step": 1381 }, { "epoch": 0.35, "learning_rate": 1.520005860782673e-05, "loss": 1.1047, "step": 1382 }, { "epoch": 0.35, "learning_rate": 1.5193126547693702e-05, "loss": 1.123, "step": 1383 }, { "epoch": 0.35, "learning_rate": 1.5186191068884774e-05, "loss": 1.1122, "step": 1384 }, { "epoch": 0.35, "learning_rate": 1.5179252175965632e-05, "loss": 1.1301, "step": 1385 }, { "epoch": 0.35, "learning_rate": 1.5172309873504202e-05, "loss": 1.1606, "step": 1386 }, { "epoch": 0.35, "learning_rate": 1.5165364166070652e-05, "loss": 1.1171, "step": 1387 }, { "epoch": 0.35, "learning_rate": 1.51584150582374e-05, "loss": 1.1381, "step": 1388 }, { "epoch": 0.35, "learning_rate": 1.5151462554579094e-05, "loss": 1.1114, "step": 1389 }, { "epoch": 0.35, "learning_rate": 1.5144506659672629e-05, "loss": 1.1433, "step": 1390 }, { "epoch": 0.35, "learning_rate": 1.5137547378097116e-05, "loss": 1.0956, "step": 1391 }, { "epoch": 0.35, "learning_rate": 1.5130584714433907e-05, "loss": 1.1103, "step": 1392 }, { "epoch": 0.35, "learning_rate": 1.5123618673266584e-05, "loss": 1.1477, "step": 1393 }, { "epoch": 0.35, "learning_rate": 1.511664925918094e-05, "loss": 1.1361, "step": 1394 }, { "epoch": 0.35, "learning_rate": 1.5109676476765001e-05, "loss": 1.1202, "step": 1395 }, { "epoch": 0.35, "learning_rate": 1.5102700330609e-05, "loss": 1.138, "step": 1396 }, { "epoch": 0.35, "learning_rate": 1.5095720825305385e-05, "loss": 1.1831, "step": 1397 }, { "epoch": 0.35, "learning_rate": 1.5088737965448825e-05, "loss": 1.1171, "step": 1398 }, { "epoch": 0.35, "learning_rate": 1.5081751755636191e-05, "loss": 1.1141, "step": 1399 }, { "epoch": 0.35, "learning_rate": 1.5074762200466557e-05, "loss": 1.1412, "step": 1400 }, { "epoch": 0.35, "learning_rate": 1.5067769304541205e-05, "loss": 1.121, "step": 1401 }, { "epoch": 0.35, "learning_rate": 1.506077307246361e-05, "loss": 1.1201, "step": 1402 }, { "epoch": 0.35, "learning_rate": 1.5053773508839448e-05, "loss": 1.1072, "step": 1403 }, { "epoch": 0.35, "learning_rate": 1.5046770618276586e-05, "loss": 1.1034, "step": 1404 }, { "epoch": 0.35, "learning_rate": 1.5039764405385082e-05, "loss": 1.116, "step": 1405 }, { "epoch": 0.35, "learning_rate": 1.5032754874777185e-05, "loss": 1.1507, "step": 1406 }, { "epoch": 0.35, "learning_rate": 1.5025742031067316e-05, "loss": 1.1099, "step": 1407 }, { "epoch": 0.35, "learning_rate": 1.501872587887209e-05, "loss": 1.0862, "step": 1408 }, { "epoch": 0.35, "learning_rate": 1.5011706422810294e-05, "loss": 1.1004, "step": 1409 }, { "epoch": 0.35, "learning_rate": 1.500468366750289e-05, "loss": 1.0949, "step": 1410 }, { "epoch": 0.35, "learning_rate": 1.4997657617573014e-05, "loss": 1.1024, "step": 1411 }, { "epoch": 0.35, "learning_rate": 1.4990628277645969e-05, "loss": 1.0703, "step": 1412 }, { "epoch": 0.35, "learning_rate": 1.4983595652349224e-05, "loss": 1.1323, "step": 1413 }, { "epoch": 0.35, "learning_rate": 1.4976559746312416e-05, "loss": 1.08, "step": 1414 }, { "epoch": 0.35, "learning_rate": 1.4969520564167327e-05, "loss": 1.1731, "step": 1415 }, { "epoch": 0.35, "learning_rate": 1.4962478110547918e-05, "loss": 1.124, "step": 1416 }, { "epoch": 0.35, "learning_rate": 1.4955432390090284e-05, "loss": 1.1082, "step": 1417 }, { "epoch": 0.36, "learning_rate": 1.4948383407432678e-05, "loss": 1.1207, "step": 1418 }, { "epoch": 0.36, "learning_rate": 1.49413311672155e-05, "loss": 1.1112, "step": 1419 }, { "epoch": 0.36, "learning_rate": 1.49342756740813e-05, "loss": 1.1365, "step": 1420 }, { "epoch": 0.36, "learning_rate": 1.4927216932674762e-05, "loss": 1.1172, "step": 1421 }, { "epoch": 0.36, "learning_rate": 1.4920154947642708e-05, "loss": 1.1126, "step": 1422 }, { "epoch": 0.36, "learning_rate": 1.4913089723634102e-05, "loss": 1.146, "step": 1423 }, { "epoch": 0.36, "learning_rate": 1.4906021265300035e-05, "loss": 1.1341, "step": 1424 }, { "epoch": 0.36, "learning_rate": 1.4898949577293724e-05, "loss": 1.1111, "step": 1425 }, { "epoch": 0.36, "learning_rate": 1.4891874664270525e-05, "loss": 1.0973, "step": 1426 }, { "epoch": 0.36, "learning_rate": 1.4884796530887902e-05, "loss": 1.1102, "step": 1427 }, { "epoch": 0.36, "learning_rate": 1.4877715181805448e-05, "loss": 1.1002, "step": 1428 }, { "epoch": 0.36, "learning_rate": 1.4870630621684873e-05, "loss": 1.1269, "step": 1429 }, { "epoch": 0.36, "learning_rate": 1.4863542855189996e-05, "loss": 1.1055, "step": 1430 }, { "epoch": 0.36, "learning_rate": 1.4856451886986744e-05, "loss": 1.1192, "step": 1431 }, { "epoch": 0.36, "learning_rate": 1.4849357721743169e-05, "loss": 1.1718, "step": 1432 }, { "epoch": 0.36, "learning_rate": 1.4842260364129404e-05, "loss": 1.1269, "step": 1433 }, { "epoch": 0.36, "learning_rate": 1.4835159818817701e-05, "loss": 1.1179, "step": 1434 }, { "epoch": 0.36, "learning_rate": 1.4828056090482405e-05, "loss": 1.1346, "step": 1435 }, { "epoch": 0.36, "learning_rate": 1.4820949183799952e-05, "loss": 1.1235, "step": 1436 }, { "epoch": 0.36, "learning_rate": 1.481383910344888e-05, "loss": 1.1204, "step": 1437 }, { "epoch": 0.36, "learning_rate": 1.4806725854109804e-05, "loss": 1.1531, "step": 1438 }, { "epoch": 0.36, "learning_rate": 1.479960944046544e-05, "loss": 1.1522, "step": 1439 }, { "epoch": 0.36, "learning_rate": 1.479248986720057e-05, "loss": 1.1185, "step": 1440 }, { "epoch": 0.36, "learning_rate": 1.478536713900207e-05, "loss": 1.1228, "step": 1441 }, { "epoch": 0.36, "learning_rate": 1.4778241260558885e-05, "loss": 1.1225, "step": 1442 }, { "epoch": 0.36, "learning_rate": 1.4771112236562043e-05, "loss": 1.1399, "step": 1443 }, { "epoch": 0.36, "learning_rate": 1.4763980071704629e-05, "loss": 1.1351, "step": 1444 }, { "epoch": 0.36, "learning_rate": 1.4756844770681807e-05, "loss": 1.1194, "step": 1445 }, { "epoch": 0.36, "learning_rate": 1.4749706338190796e-05, "loss": 1.1201, "step": 1446 }, { "epoch": 0.36, "learning_rate": 1.4742564778930892e-05, "loss": 1.0932, "step": 1447 }, { "epoch": 0.36, "learning_rate": 1.473542009760343e-05, "loss": 1.0879, "step": 1448 }, { "epoch": 0.36, "learning_rate": 1.4728272298911814e-05, "loss": 1.1316, "step": 1449 }, { "epoch": 0.36, "learning_rate": 1.4721121387561497e-05, "loss": 1.1159, "step": 1450 }, { "epoch": 0.36, "learning_rate": 1.4713967368259981e-05, "loss": 1.1323, "step": 1451 }, { "epoch": 0.36, "learning_rate": 1.4706810245716803e-05, "loss": 1.0768, "step": 1452 }, { "epoch": 0.36, "learning_rate": 1.4699650024643565e-05, "loss": 1.1213, "step": 1453 }, { "epoch": 0.36, "learning_rate": 1.4692486709753891e-05, "loss": 1.0905, "step": 1454 }, { "epoch": 0.36, "learning_rate": 1.4685320305763447e-05, "loss": 1.1325, "step": 1455 }, { "epoch": 0.36, "learning_rate": 1.4678150817389936e-05, "loss": 1.1276, "step": 1456 }, { "epoch": 0.36, "learning_rate": 1.4670978249353085e-05, "loss": 1.1267, "step": 1457 }, { "epoch": 0.37, "learning_rate": 1.4663802606374653e-05, "loss": 1.1354, "step": 1458 }, { "epoch": 0.37, "learning_rate": 1.465662389317842e-05, "loss": 1.1176, "step": 1459 }, { "epoch": 0.37, "learning_rate": 1.4649442114490198e-05, "loss": 1.1172, "step": 1460 }, { "epoch": 0.37, "learning_rate": 1.4642257275037796e-05, "loss": 1.0552, "step": 1461 }, { "epoch": 0.37, "learning_rate": 1.4635069379551054e-05, "loss": 1.1203, "step": 1462 }, { "epoch": 0.37, "learning_rate": 1.4627878432761826e-05, "loss": 1.0864, "step": 1463 }, { "epoch": 0.37, "learning_rate": 1.4620684439403962e-05, "loss": 1.1123, "step": 1464 }, { "epoch": 0.37, "learning_rate": 1.4613487404213327e-05, "loss": 1.0514, "step": 1465 }, { "epoch": 0.37, "learning_rate": 1.4606287331927784e-05, "loss": 1.0851, "step": 1466 }, { "epoch": 0.37, "learning_rate": 1.4599084227287199e-05, "loss": 1.1593, "step": 1467 }, { "epoch": 0.37, "learning_rate": 1.4591878095033432e-05, "loss": 1.1254, "step": 1468 }, { "epoch": 0.37, "learning_rate": 1.4584668939910334e-05, "loss": 1.11, "step": 1469 }, { "epoch": 0.37, "learning_rate": 1.4577456766663748e-05, "loss": 1.1259, "step": 1470 }, { "epoch": 0.37, "learning_rate": 1.4570241580041508e-05, "loss": 1.1648, "step": 1471 }, { "epoch": 0.37, "learning_rate": 1.4563023384793425e-05, "loss": 1.1313, "step": 1472 }, { "epoch": 0.37, "learning_rate": 1.4555802185671297e-05, "loss": 1.121, "step": 1473 }, { "epoch": 0.37, "learning_rate": 1.454857798742889e-05, "loss": 1.0745, "step": 1474 }, { "epoch": 0.37, "learning_rate": 1.4541350794821956e-05, "loss": 1.1348, "step": 1475 }, { "epoch": 0.37, "learning_rate": 1.4534120612608207e-05, "loss": 1.1425, "step": 1476 }, { "epoch": 0.37, "learning_rate": 1.4526887445547336e-05, "loss": 1.1086, "step": 1477 }, { "epoch": 0.37, "learning_rate": 1.4519651298400985e-05, "loss": 1.1229, "step": 1478 }, { "epoch": 0.37, "learning_rate": 1.4512412175932775e-05, "loss": 1.0732, "step": 1479 }, { "epoch": 0.37, "learning_rate": 1.4505170082908269e-05, "loss": 1.118, "step": 1480 }, { "epoch": 0.37, "learning_rate": 1.4497925024094998e-05, "loss": 1.0968, "step": 1481 }, { "epoch": 0.37, "learning_rate": 1.4490677004262443e-05, "loss": 1.1033, "step": 1482 }, { "epoch": 0.37, "learning_rate": 1.4483426028182028e-05, "loss": 1.1525, "step": 1483 }, { "epoch": 0.37, "learning_rate": 1.4476172100627127e-05, "loss": 1.0874, "step": 1484 }, { "epoch": 0.37, "learning_rate": 1.446891522637306e-05, "loss": 1.096, "step": 1485 }, { "epoch": 0.37, "learning_rate": 1.4461655410197085e-05, "loss": 1.1376, "step": 1486 }, { "epoch": 0.37, "learning_rate": 1.4454392656878394e-05, "loss": 1.1663, "step": 1487 }, { "epoch": 0.37, "learning_rate": 1.4447126971198113e-05, "loss": 1.1691, "step": 1488 }, { "epoch": 0.37, "learning_rate": 1.4439858357939302e-05, "loss": 1.112, "step": 1489 }, { "epoch": 0.37, "learning_rate": 1.4432586821886948e-05, "loss": 1.118, "step": 1490 }, { "epoch": 0.37, "learning_rate": 1.4425312367827952e-05, "loss": 1.0847, "step": 1491 }, { "epoch": 0.37, "learning_rate": 1.4418035000551156e-05, "loss": 1.1226, "step": 1492 }, { "epoch": 0.37, "learning_rate": 1.4410754724847298e-05, "loss": 1.1324, "step": 1493 }, { "epoch": 0.37, "learning_rate": 1.4403471545509046e-05, "loss": 1.1416, "step": 1494 }, { "epoch": 0.37, "learning_rate": 1.4396185467330974e-05, "loss": 1.1008, "step": 1495 }, { "epoch": 0.37, "learning_rate": 1.438889649510956e-05, "loss": 1.0788, "step": 1496 }, { "epoch": 0.37, "learning_rate": 1.4381604633643193e-05, "loss": 1.0874, "step": 1497 }, { "epoch": 0.38, "learning_rate": 1.4374309887732167e-05, "loss": 1.1229, "step": 1498 }, { "epoch": 0.38, "learning_rate": 1.4367012262178663e-05, "loss": 1.0807, "step": 1499 }, { "epoch": 0.38, "learning_rate": 1.4359711761786775e-05, "loss": 1.0918, "step": 1500 }, { "epoch": 0.38, "learning_rate": 1.4352408391362466e-05, "loss": 1.1304, "step": 1501 }, { "epoch": 0.38, "learning_rate": 1.4345102155713617e-05, "loss": 1.1311, "step": 1502 }, { "epoch": 0.38, "learning_rate": 1.4337793059649969e-05, "loss": 1.0712, "step": 1503 }, { "epoch": 0.38, "learning_rate": 1.4330481107983163e-05, "loss": 1.0842, "step": 1504 }, { "epoch": 0.38, "learning_rate": 1.4323166305526713e-05, "loss": 1.1025, "step": 1505 }, { "epoch": 0.38, "learning_rate": 1.4315848657096006e-05, "loss": 1.0855, "step": 1506 }, { "epoch": 0.38, "learning_rate": 1.4308528167508316e-05, "loss": 1.1082, "step": 1507 }, { "epoch": 0.38, "learning_rate": 1.4301204841582772e-05, "loss": 1.1052, "step": 1508 }, { "epoch": 0.38, "learning_rate": 1.4293878684140379e-05, "loss": 1.0975, "step": 1509 }, { "epoch": 0.38, "learning_rate": 1.4286549700004005e-05, "loss": 1.147, "step": 1510 }, { "epoch": 0.38, "learning_rate": 1.4279217893998371e-05, "loss": 1.1053, "step": 1511 }, { "epoch": 0.38, "learning_rate": 1.4271883270950073e-05, "loss": 1.0966, "step": 1512 }, { "epoch": 0.38, "learning_rate": 1.4264545835687544e-05, "loss": 1.1026, "step": 1513 }, { "epoch": 0.38, "learning_rate": 1.4257205593041077e-05, "loss": 1.1699, "step": 1514 }, { "epoch": 0.38, "learning_rate": 1.4249862547842809e-05, "loss": 1.1683, "step": 1515 }, { "epoch": 0.38, "learning_rate": 1.4242516704926728e-05, "loss": 1.0877, "step": 1516 }, { "epoch": 0.38, "learning_rate": 1.4235168069128657e-05, "loss": 1.1067, "step": 1517 }, { "epoch": 0.38, "learning_rate": 1.4227816645286265e-05, "loss": 1.1577, "step": 1518 }, { "epoch": 0.38, "learning_rate": 1.4220462438239043e-05, "loss": 1.1378, "step": 1519 }, { "epoch": 0.38, "learning_rate": 1.4213105452828332e-05, "loss": 1.0948, "step": 1520 }, { "epoch": 0.38, "learning_rate": 1.4205745693897288e-05, "loss": 1.1588, "step": 1521 }, { "epoch": 0.38, "learning_rate": 1.41983831662909e-05, "loss": 1.0909, "step": 1522 }, { "epoch": 0.38, "learning_rate": 1.4191017874855979e-05, "loss": 1.1064, "step": 1523 }, { "epoch": 0.38, "learning_rate": 1.418364982444115e-05, "loss": 1.0972, "step": 1524 }, { "epoch": 0.38, "learning_rate": 1.4176279019896864e-05, "loss": 1.1008, "step": 1525 }, { "epoch": 0.38, "learning_rate": 1.416890546607537e-05, "loss": 1.1306, "step": 1526 }, { "epoch": 0.38, "learning_rate": 1.4161529167830748e-05, "loss": 1.1384, "step": 1527 }, { "epoch": 0.38, "learning_rate": 1.4154150130018867e-05, "loss": 1.0724, "step": 1528 }, { "epoch": 0.38, "learning_rate": 1.4146768357497405e-05, "loss": 1.1341, "step": 1529 }, { "epoch": 0.38, "learning_rate": 1.413938385512584e-05, "loss": 1.1224, "step": 1530 }, { "epoch": 0.38, "learning_rate": 1.413199662776545e-05, "loss": 1.1394, "step": 1531 }, { "epoch": 0.38, "learning_rate": 1.4124606680279302e-05, "loss": 1.1369, "step": 1532 }, { "epoch": 0.38, "learning_rate": 1.411721401753226e-05, "loss": 1.1061, "step": 1533 }, { "epoch": 0.38, "learning_rate": 1.410981864439097e-05, "loss": 1.1151, "step": 1534 }, { "epoch": 0.38, "learning_rate": 1.4102420565723861e-05, "loss": 1.1217, "step": 1535 }, { "epoch": 0.38, "learning_rate": 1.4095019786401153e-05, "loss": 1.1397, "step": 1536 }, { "epoch": 0.38, "learning_rate": 1.4087616311294831e-05, "loss": 1.1485, "step": 1537 }, { "epoch": 0.39, "learning_rate": 1.4080210145278663e-05, "loss": 1.0885, "step": 1538 }, { "epoch": 0.39, "learning_rate": 1.407280129322819e-05, "loss": 1.0833, "step": 1539 }, { "epoch": 0.39, "learning_rate": 1.4065389760020708e-05, "loss": 1.0841, "step": 1540 }, { "epoch": 0.39, "learning_rate": 1.4057975550535295e-05, "loss": 1.1859, "step": 1541 }, { "epoch": 0.39, "learning_rate": 1.405055866965278e-05, "loss": 1.0935, "step": 1542 }, { "epoch": 0.39, "learning_rate": 1.4043139122255756e-05, "loss": 1.0942, "step": 1543 }, { "epoch": 0.39, "learning_rate": 1.4035716913228568e-05, "loss": 1.1238, "step": 1544 }, { "epoch": 0.39, "learning_rate": 1.4028292047457312e-05, "loss": 1.1027, "step": 1545 }, { "epoch": 0.39, "learning_rate": 1.402086452982984e-05, "loss": 1.0983, "step": 1546 }, { "epoch": 0.39, "learning_rate": 1.4013434365235743e-05, "loss": 1.0897, "step": 1547 }, { "epoch": 0.39, "learning_rate": 1.4006001558566352e-05, "loss": 1.1535, "step": 1548 }, { "epoch": 0.39, "learning_rate": 1.3998566114714752e-05, "loss": 1.156, "step": 1549 }, { "epoch": 0.39, "learning_rate": 1.3991128038575741e-05, "loss": 1.116, "step": 1550 }, { "epoch": 0.39, "learning_rate": 1.3983687335045875e-05, "loss": 1.1128, "step": 1551 }, { "epoch": 0.39, "learning_rate": 1.3976244009023424e-05, "loss": 1.0686, "step": 1552 }, { "epoch": 0.39, "learning_rate": 1.396879806540838e-05, "loss": 1.1056, "step": 1553 }, { "epoch": 0.39, "learning_rate": 1.3961349509102477e-05, "loss": 1.1146, "step": 1554 }, { "epoch": 0.39, "learning_rate": 1.3953898345009152e-05, "loss": 1.1307, "step": 1555 }, { "epoch": 0.39, "learning_rate": 1.3946444578033562e-05, "loss": 1.133, "step": 1556 }, { "epoch": 0.39, "learning_rate": 1.393898821308259e-05, "loss": 1.1406, "step": 1557 }, { "epoch": 0.39, "learning_rate": 1.3931529255064808e-05, "loss": 1.1422, "step": 1558 }, { "epoch": 0.39, "learning_rate": 1.3924067708890516e-05, "loss": 1.1343, "step": 1559 }, { "epoch": 0.39, "learning_rate": 1.3916603579471705e-05, "loss": 1.1263, "step": 1560 }, { "epoch": 0.39, "learning_rate": 1.3909136871722066e-05, "loss": 1.1026, "step": 1561 }, { "epoch": 0.39, "learning_rate": 1.3901667590556996e-05, "loss": 1.112, "step": 1562 }, { "epoch": 0.39, "learning_rate": 1.389419574089358e-05, "loss": 1.1306, "step": 1563 }, { "epoch": 0.39, "learning_rate": 1.3886721327650592e-05, "loss": 1.1021, "step": 1564 }, { "epoch": 0.39, "learning_rate": 1.3879244355748504e-05, "loss": 1.1196, "step": 1565 }, { "epoch": 0.39, "learning_rate": 1.3871764830109457e-05, "loss": 1.1188, "step": 1566 }, { "epoch": 0.39, "learning_rate": 1.3864282755657287e-05, "loss": 1.1009, "step": 1567 }, { "epoch": 0.39, "learning_rate": 1.3856798137317502e-05, "loss": 1.1047, "step": 1568 }, { "epoch": 0.39, "learning_rate": 1.3849310980017281e-05, "loss": 1.1261, "step": 1569 }, { "epoch": 0.39, "learning_rate": 1.3841821288685482e-05, "loss": 1.0796, "step": 1570 }, { "epoch": 0.39, "learning_rate": 1.3834329068252624e-05, "loss": 1.1301, "step": 1571 }, { "epoch": 0.39, "learning_rate": 1.3826834323650899e-05, "loss": 1.1166, "step": 1572 }, { "epoch": 0.39, "learning_rate": 1.3819337059814153e-05, "loss": 1.1299, "step": 1573 }, { "epoch": 0.39, "learning_rate": 1.3811837281677893e-05, "loss": 1.0883, "step": 1574 }, { "epoch": 0.39, "learning_rate": 1.3804334994179286e-05, "loss": 1.1326, "step": 1575 }, { "epoch": 0.39, "learning_rate": 1.3796830202257141e-05, "loss": 1.1243, "step": 1576 }, { "epoch": 0.39, "learning_rate": 1.3789322910851923e-05, "loss": 1.133, "step": 1577 }, { "epoch": 0.4, "learning_rate": 1.3781813124905745e-05, "loss": 1.1282, "step": 1578 }, { "epoch": 0.4, "learning_rate": 1.3774300849362352e-05, "loss": 1.1236, "step": 1579 }, { "epoch": 0.4, "learning_rate": 1.3766786089167136e-05, "loss": 1.091, "step": 1580 }, { "epoch": 0.4, "learning_rate": 1.3759268849267125e-05, "loss": 1.1089, "step": 1581 }, { "epoch": 0.4, "learning_rate": 1.3751749134610974e-05, "loss": 1.1282, "step": 1582 }, { "epoch": 0.4, "learning_rate": 1.374422695014897e-05, "loss": 1.1214, "step": 1583 }, { "epoch": 0.4, "learning_rate": 1.3736702300833027e-05, "loss": 1.0515, "step": 1584 }, { "epoch": 0.4, "learning_rate": 1.3729175191616683e-05, "loss": 1.0764, "step": 1585 }, { "epoch": 0.4, "learning_rate": 1.372164562745509e-05, "loss": 1.0993, "step": 1586 }, { "epoch": 0.4, "learning_rate": 1.3714113613305017e-05, "loss": 1.0915, "step": 1587 }, { "epoch": 0.4, "learning_rate": 1.3706579154124852e-05, "loss": 1.1272, "step": 1588 }, { "epoch": 0.4, "learning_rate": 1.3699042254874586e-05, "loss": 1.0773, "step": 1589 }, { "epoch": 0.4, "learning_rate": 1.369150292051582e-05, "loss": 1.0921, "step": 1590 }, { "epoch": 0.4, "learning_rate": 1.3683961156011761e-05, "loss": 1.0702, "step": 1591 }, { "epoch": 0.4, "learning_rate": 1.3676416966327201e-05, "loss": 1.1119, "step": 1592 }, { "epoch": 0.4, "learning_rate": 1.3668870356428549e-05, "loss": 1.1082, "step": 1593 }, { "epoch": 0.4, "learning_rate": 1.3661321331283796e-05, "loss": 1.1118, "step": 1594 }, { "epoch": 0.4, "learning_rate": 1.365376989586252e-05, "loss": 1.1412, "step": 1595 }, { "epoch": 0.4, "learning_rate": 1.3646216055135892e-05, "loss": 1.1162, "step": 1596 }, { "epoch": 0.4, "learning_rate": 1.3638659814076662e-05, "loss": 1.1466, "step": 1597 }, { "epoch": 0.4, "learning_rate": 1.3631101177659172e-05, "loss": 1.1213, "step": 1598 }, { "epoch": 0.4, "learning_rate": 1.3623540150859324e-05, "loss": 1.1366, "step": 1599 }, { "epoch": 0.4, "learning_rate": 1.36159767386546e-05, "loss": 1.0954, "step": 1600 }, { "epoch": 0.4, "learning_rate": 1.3608410946024057e-05, "loss": 1.1172, "step": 1601 }, { "epoch": 0.4, "learning_rate": 1.3600842777948315e-05, "loss": 1.1388, "step": 1602 }, { "epoch": 0.4, "learning_rate": 1.3593272239409559e-05, "loss": 1.116, "step": 1603 }, { "epoch": 0.4, "learning_rate": 1.3585699335391533e-05, "loss": 1.099, "step": 1604 }, { "epoch": 0.4, "learning_rate": 1.3578124070879534e-05, "loss": 1.1014, "step": 1605 }, { "epoch": 0.4, "learning_rate": 1.3570546450860428e-05, "loss": 1.084, "step": 1606 }, { "epoch": 0.4, "learning_rate": 1.3562966480322617e-05, "loss": 1.1187, "step": 1607 }, { "epoch": 0.4, "learning_rate": 1.3555384164256048e-05, "loss": 1.1065, "step": 1608 }, { "epoch": 0.4, "learning_rate": 1.3547799507652232e-05, "loss": 1.1004, "step": 1609 }, { "epoch": 0.4, "learning_rate": 1.3540212515504198e-05, "loss": 1.092, "step": 1610 }, { "epoch": 0.4, "learning_rate": 1.353262319280653e-05, "loss": 1.0477, "step": 1611 }, { "epoch": 0.4, "learning_rate": 1.3525031544555329e-05, "loss": 1.0799, "step": 1612 }, { "epoch": 0.4, "learning_rate": 1.3517437575748245e-05, "loss": 1.0834, "step": 1613 }, { "epoch": 0.4, "learning_rate": 1.3509841291384444e-05, "loss": 1.1398, "step": 1614 }, { "epoch": 0.4, "learning_rate": 1.3502242696464619e-05, "loss": 1.0973, "step": 1615 }, { "epoch": 0.4, "learning_rate": 1.3494641795990986e-05, "loss": 1.1498, "step": 1616 }, { "epoch": 0.4, "learning_rate": 1.3487038594967276e-05, "loss": 1.1255, "step": 1617 }, { "epoch": 0.41, "learning_rate": 1.3479433098398735e-05, "loss": 1.1171, "step": 1618 }, { "epoch": 0.41, "learning_rate": 1.3471825311292123e-05, "loss": 1.1388, "step": 1619 }, { "epoch": 0.41, "learning_rate": 1.3464215238655707e-05, "loss": 1.1561, "step": 1620 }, { "epoch": 0.41, "learning_rate": 1.3456602885499252e-05, "loss": 1.0964, "step": 1621 }, { "epoch": 0.41, "learning_rate": 1.3448988256834037e-05, "loss": 1.1096, "step": 1622 }, { "epoch": 0.41, "learning_rate": 1.3441371357672825e-05, "loss": 1.0817, "step": 1623 }, { "epoch": 0.41, "learning_rate": 1.3433752193029888e-05, "loss": 1.1073, "step": 1624 }, { "epoch": 0.41, "learning_rate": 1.3426130767920975e-05, "loss": 1.085, "step": 1625 }, { "epoch": 0.41, "learning_rate": 1.3418507087363328e-05, "loss": 1.1252, "step": 1626 }, { "epoch": 0.41, "learning_rate": 1.3410881156375684e-05, "loss": 1.066, "step": 1627 }, { "epoch": 0.41, "learning_rate": 1.340325297997825e-05, "loss": 1.1374, "step": 1628 }, { "epoch": 0.41, "learning_rate": 1.339562256319271e-05, "loss": 1.1518, "step": 1629 }, { "epoch": 0.41, "learning_rate": 1.3387989911042232e-05, "loss": 1.0695, "step": 1630 }, { "epoch": 0.41, "learning_rate": 1.3380355028551448e-05, "loss": 1.0897, "step": 1631 }, { "epoch": 0.41, "learning_rate": 1.3372717920746464e-05, "loss": 1.0974, "step": 1632 }, { "epoch": 0.41, "learning_rate": 1.3365078592654843e-05, "loss": 1.0848, "step": 1633 }, { "epoch": 0.41, "learning_rate": 1.335743704930562e-05, "loss": 1.1097, "step": 1634 }, { "epoch": 0.41, "learning_rate": 1.3349793295729278e-05, "loss": 1.0949, "step": 1635 }, { "epoch": 0.41, "learning_rate": 1.3342147336957764e-05, "loss": 1.0736, "step": 1636 }, { "epoch": 0.41, "learning_rate": 1.3334499178024472e-05, "loss": 1.1185, "step": 1637 }, { "epoch": 0.41, "learning_rate": 1.3326848823964243e-05, "loss": 1.0856, "step": 1638 }, { "epoch": 0.41, "learning_rate": 1.3319196279813362e-05, "loss": 1.102, "step": 1639 }, { "epoch": 0.41, "learning_rate": 1.3311541550609566e-05, "loss": 1.0663, "step": 1640 }, { "epoch": 0.41, "learning_rate": 1.3303884641392019e-05, "loss": 1.1004, "step": 1641 }, { "epoch": 0.41, "learning_rate": 1.3296225557201324e-05, "loss": 1.0956, "step": 1642 }, { "epoch": 0.41, "learning_rate": 1.328856430307952e-05, "loss": 1.1123, "step": 1643 }, { "epoch": 0.41, "learning_rate": 1.3280900884070064e-05, "loss": 1.1495, "step": 1644 }, { "epoch": 0.41, "learning_rate": 1.3273235305217853e-05, "loss": 1.1285, "step": 1645 }, { "epoch": 0.41, "learning_rate": 1.3265567571569192e-05, "loss": 1.1115, "step": 1646 }, { "epoch": 0.41, "learning_rate": 1.3257897688171809e-05, "loss": 1.1276, "step": 1647 }, { "epoch": 0.41, "learning_rate": 1.3250225660074852e-05, "loss": 1.0934, "step": 1648 }, { "epoch": 0.41, "learning_rate": 1.3242551492328875e-05, "loss": 1.0808, "step": 1649 }, { "epoch": 0.41, "learning_rate": 1.3234875189985845e-05, "loss": 1.1099, "step": 1650 }, { "epoch": 0.41, "learning_rate": 1.3227196758099133e-05, "loss": 1.0894, "step": 1651 }, { "epoch": 0.41, "learning_rate": 1.3219516201723503e-05, "loss": 1.1037, "step": 1652 }, { "epoch": 0.41, "learning_rate": 1.3211833525915135e-05, "loss": 1.1248, "step": 1653 }, { "epoch": 0.41, "learning_rate": 1.3204148735731587e-05, "loss": 1.1213, "step": 1654 }, { "epoch": 0.41, "learning_rate": 1.3196461836231822e-05, "loss": 1.1222, "step": 1655 }, { "epoch": 0.41, "learning_rate": 1.318877283247619e-05, "loss": 1.1348, "step": 1656 }, { "epoch": 0.41, "learning_rate": 1.3181081729526409e-05, "loss": 1.1354, "step": 1657 }, { "epoch": 0.42, "learning_rate": 1.3173388532445604e-05, "loss": 1.0986, "step": 1658 }, { "epoch": 0.42, "learning_rate": 1.3165693246298263e-05, "loss": 1.1303, "step": 1659 }, { "epoch": 0.42, "learning_rate": 1.3157995876150252e-05, "loss": 1.1162, "step": 1660 }, { "epoch": 0.42, "learning_rate": 1.3150296427068809e-05, "loss": 1.1203, "step": 1661 }, { "epoch": 0.42, "learning_rate": 1.3142594904122548e-05, "loss": 1.0986, "step": 1662 }, { "epoch": 0.42, "learning_rate": 1.3134891312381437e-05, "loss": 1.119, "step": 1663 }, { "epoch": 0.42, "learning_rate": 1.3127185656916811e-05, "loss": 1.0589, "step": 1664 }, { "epoch": 0.42, "learning_rate": 1.3119477942801364e-05, "loss": 1.0848, "step": 1665 }, { "epoch": 0.42, "learning_rate": 1.3111768175109147e-05, "loss": 1.0817, "step": 1666 }, { "epoch": 0.42, "learning_rate": 1.3104056358915553e-05, "loss": 1.0991, "step": 1667 }, { "epoch": 0.42, "learning_rate": 1.309634249929734e-05, "loss": 1.0884, "step": 1668 }, { "epoch": 0.42, "learning_rate": 1.3088626601332599e-05, "loss": 1.1225, "step": 1669 }, { "epoch": 0.42, "learning_rate": 1.3080908670100762e-05, "loss": 1.1288, "step": 1670 }, { "epoch": 0.42, "learning_rate": 1.3073188710682612e-05, "loss": 1.0982, "step": 1671 }, { "epoch": 0.42, "learning_rate": 1.3065466728160253e-05, "loss": 1.0757, "step": 1672 }, { "epoch": 0.42, "learning_rate": 1.3057742727617125e-05, "loss": 1.1211, "step": 1673 }, { "epoch": 0.42, "learning_rate": 1.3050016714138e-05, "loss": 1.0831, "step": 1674 }, { "epoch": 0.42, "learning_rate": 1.3042288692808974e-05, "loss": 1.0916, "step": 1675 }, { "epoch": 0.42, "learning_rate": 1.3034558668717466e-05, "loss": 1.1371, "step": 1676 }, { "epoch": 0.42, "learning_rate": 1.3026826646952206e-05, "loss": 1.1035, "step": 1677 }, { "epoch": 0.42, "learning_rate": 1.301909263260325e-05, "loss": 1.0917, "step": 1678 }, { "epoch": 0.42, "learning_rate": 1.3011356630761958e-05, "loss": 1.1276, "step": 1679 }, { "epoch": 0.42, "learning_rate": 1.3003618646520999e-05, "loss": 1.0896, "step": 1680 }, { "epoch": 0.42, "learning_rate": 1.2995878684974352e-05, "loss": 1.1184, "step": 1681 }, { "epoch": 0.42, "learning_rate": 1.2988136751217292e-05, "loss": 1.0975, "step": 1682 }, { "epoch": 0.42, "learning_rate": 1.2980392850346393e-05, "loss": 1.1079, "step": 1683 }, { "epoch": 0.42, "learning_rate": 1.2972646987459532e-05, "loss": 1.1194, "step": 1684 }, { "epoch": 0.42, "learning_rate": 1.2964899167655865e-05, "loss": 1.1364, "step": 1685 }, { "epoch": 0.42, "learning_rate": 1.2957149396035844e-05, "loss": 1.0769, "step": 1686 }, { "epoch": 0.42, "learning_rate": 1.2949397677701205e-05, "loss": 1.0834, "step": 1687 }, { "epoch": 0.42, "learning_rate": 1.2941644017754964e-05, "loss": 1.1243, "step": 1688 }, { "epoch": 0.42, "learning_rate": 1.2933888421301419e-05, "loss": 1.0793, "step": 1689 }, { "epoch": 0.42, "learning_rate": 1.2926130893446138e-05, "loss": 1.1612, "step": 1690 }, { "epoch": 0.42, "learning_rate": 1.2918371439295961e-05, "loss": 1.0958, "step": 1691 }, { "epoch": 0.42, "learning_rate": 1.2910610063958999e-05, "loss": 1.1252, "step": 1692 }, { "epoch": 0.42, "learning_rate": 1.2902846772544625e-05, "loss": 1.0934, "step": 1693 }, { "epoch": 0.42, "learning_rate": 1.2895081570163477e-05, "loss": 1.1049, "step": 1694 }, { "epoch": 0.42, "learning_rate": 1.2887314461927447e-05, "loss": 1.0934, "step": 1695 }, { "epoch": 0.42, "learning_rate": 1.287954545294968e-05, "loss": 1.1169, "step": 1696 }, { "epoch": 0.43, "learning_rate": 1.2871774548344583e-05, "loss": 1.1008, "step": 1697 }, { "epoch": 0.43, "learning_rate": 1.2864001753227798e-05, "loss": 1.098, "step": 1698 }, { "epoch": 0.43, "learning_rate": 1.2856227072716214e-05, "loss": 1.088, "step": 1699 }, { "epoch": 0.43, "learning_rate": 1.2848450511927974e-05, "loss": 1.1275, "step": 1700 }, { "epoch": 0.43, "learning_rate": 1.2840672075982439e-05, "loss": 1.1022, "step": 1701 }, { "epoch": 0.43, "learning_rate": 1.2832891770000223e-05, "loss": 1.0947, "step": 1702 }, { "epoch": 0.43, "learning_rate": 1.2825109599103154e-05, "loss": 1.124, "step": 1703 }, { "epoch": 0.43, "learning_rate": 1.2817325568414299e-05, "loss": 1.132, "step": 1704 }, { "epoch": 0.43, "learning_rate": 1.2809539683057948e-05, "loss": 1.0919, "step": 1705 }, { "epoch": 0.43, "learning_rate": 1.280175194815961e-05, "loss": 1.1088, "step": 1706 }, { "epoch": 0.43, "learning_rate": 1.2793962368846012e-05, "loss": 1.1154, "step": 1707 }, { "epoch": 0.43, "learning_rate": 1.2786170950245095e-05, "loss": 1.138, "step": 1708 }, { "epoch": 0.43, "learning_rate": 1.2778377697486009e-05, "loss": 1.1381, "step": 1709 }, { "epoch": 0.43, "learning_rate": 1.2770582615699117e-05, "loss": 1.0845, "step": 1710 }, { "epoch": 0.43, "learning_rate": 1.2762785710015978e-05, "loss": 1.1191, "step": 1711 }, { "epoch": 0.43, "learning_rate": 1.2754986985569357e-05, "loss": 1.0945, "step": 1712 }, { "epoch": 0.43, "learning_rate": 1.274718644749322e-05, "loss": 1.0883, "step": 1713 }, { "epoch": 0.43, "learning_rate": 1.2739384100922719e-05, "loss": 1.0937, "step": 1714 }, { "epoch": 0.43, "learning_rate": 1.27315799509942e-05, "loss": 1.1294, "step": 1715 }, { "epoch": 0.43, "learning_rate": 1.2723774002845194e-05, "loss": 1.1047, "step": 1716 }, { "epoch": 0.43, "learning_rate": 1.271596626161442e-05, "loss": 1.0906, "step": 1717 }, { "epoch": 0.43, "learning_rate": 1.2708156732441776e-05, "loss": 1.1163, "step": 1718 }, { "epoch": 0.43, "learning_rate": 1.2700345420468337e-05, "loss": 1.1335, "step": 1719 }, { "epoch": 0.43, "learning_rate": 1.2692532330836346e-05, "loss": 1.0609, "step": 1720 }, { "epoch": 0.43, "learning_rate": 1.2684717468689227e-05, "loss": 1.1059, "step": 1721 }, { "epoch": 0.43, "learning_rate": 1.267690083917156e-05, "loss": 1.1386, "step": 1722 }, { "epoch": 0.43, "learning_rate": 1.26690824474291e-05, "loss": 1.1009, "step": 1723 }, { "epoch": 0.43, "learning_rate": 1.2661262298608748e-05, "loss": 1.1093, "step": 1724 }, { "epoch": 0.43, "learning_rate": 1.2653440397858571e-05, "loss": 1.1416, "step": 1725 }, { "epoch": 0.43, "learning_rate": 1.2645616750327792e-05, "loss": 1.0792, "step": 1726 }, { "epoch": 0.43, "learning_rate": 1.2637791361166773e-05, "loss": 1.1137, "step": 1727 }, { "epoch": 0.43, "learning_rate": 1.2629964235527038e-05, "loss": 1.0838, "step": 1728 }, { "epoch": 0.43, "learning_rate": 1.2622135378561238e-05, "loss": 1.1195, "step": 1729 }, { "epoch": 0.43, "learning_rate": 1.2614304795423169e-05, "loss": 1.1092, "step": 1730 }, { "epoch": 0.43, "learning_rate": 1.2606472491267771e-05, "loss": 1.1078, "step": 1731 }, { "epoch": 0.43, "learning_rate": 1.259863847125111e-05, "loss": 1.0979, "step": 1732 }, { "epoch": 0.43, "learning_rate": 1.259080274053038e-05, "loss": 1.1119, "step": 1733 }, { "epoch": 0.43, "learning_rate": 1.258296530426391e-05, "loss": 1.1481, "step": 1734 }, { "epoch": 0.43, "learning_rate": 1.2575126167611136e-05, "loss": 1.1165, "step": 1735 }, { "epoch": 0.43, "learning_rate": 1.2567285335732633e-05, "loss": 1.0729, "step": 1736 }, { "epoch": 0.44, "learning_rate": 1.2559442813790077e-05, "loss": 1.0957, "step": 1737 }, { "epoch": 0.44, "learning_rate": 1.2551598606946257e-05, "loss": 1.1134, "step": 1738 }, { "epoch": 0.44, "learning_rate": 1.2543752720365088e-05, "loss": 1.0812, "step": 1739 }, { "epoch": 0.44, "learning_rate": 1.2535905159211567e-05, "loss": 1.1017, "step": 1740 }, { "epoch": 0.44, "learning_rate": 1.252805592865181e-05, "loss": 1.0723, "step": 1741 }, { "epoch": 0.44, "learning_rate": 1.252020503385303e-05, "loss": 1.095, "step": 1742 }, { "epoch": 0.44, "learning_rate": 1.2512352479983525e-05, "loss": 1.1174, "step": 1743 }, { "epoch": 0.44, "learning_rate": 1.2504498272212701e-05, "loss": 1.1403, "step": 1744 }, { "epoch": 0.44, "learning_rate": 1.2496642415711038e-05, "loss": 1.1483, "step": 1745 }, { "epoch": 0.44, "learning_rate": 1.2488784915650112e-05, "loss": 1.0917, "step": 1746 }, { "epoch": 0.44, "learning_rate": 1.2480925777202573e-05, "loss": 1.1324, "step": 1747 }, { "epoch": 0.44, "learning_rate": 1.2473065005542155e-05, "loss": 1.134, "step": 1748 }, { "epoch": 0.44, "learning_rate": 1.2465202605843669e-05, "loss": 1.0895, "step": 1749 }, { "epoch": 0.44, "learning_rate": 1.2457338583282992e-05, "loss": 1.1222, "step": 1750 }, { "epoch": 0.44, "learning_rate": 1.244947294303707e-05, "loss": 1.1013, "step": 1751 }, { "epoch": 0.44, "learning_rate": 1.2441605690283915e-05, "loss": 1.1519, "step": 1752 }, { "epoch": 0.44, "learning_rate": 1.2433736830202605e-05, "loss": 1.1135, "step": 1753 }, { "epoch": 0.44, "learning_rate": 1.2425866367973267e-05, "loss": 1.141, "step": 1754 }, { "epoch": 0.44, "learning_rate": 1.2417994308777095e-05, "loss": 1.0919, "step": 1755 }, { "epoch": 0.44, "learning_rate": 1.2410120657796323e-05, "loss": 1.1096, "step": 1756 }, { "epoch": 0.44, "learning_rate": 1.2402245420214235e-05, "loss": 1.1205, "step": 1757 }, { "epoch": 0.44, "learning_rate": 1.2394368601215167e-05, "loss": 1.1259, "step": 1758 }, { "epoch": 0.44, "learning_rate": 1.2386490205984488e-05, "loss": 1.1892, "step": 1759 }, { "epoch": 0.44, "learning_rate": 1.2378610239708606e-05, "loss": 1.0919, "step": 1760 }, { "epoch": 0.44, "learning_rate": 1.2370728707574964e-05, "loss": 1.0582, "step": 1761 }, { "epoch": 0.44, "learning_rate": 1.2362845614772038e-05, "loss": 1.0718, "step": 1762 }, { "epoch": 0.44, "learning_rate": 1.2354960966489333e-05, "loss": 1.1026, "step": 1763 }, { "epoch": 0.44, "learning_rate": 1.234707476791737e-05, "loss": 1.1677, "step": 1764 }, { "epoch": 0.44, "learning_rate": 1.2339187024247697e-05, "loss": 1.1024, "step": 1765 }, { "epoch": 0.44, "learning_rate": 1.2331297740672877e-05, "loss": 1.095, "step": 1766 }, { "epoch": 0.44, "learning_rate": 1.2323406922386487e-05, "loss": 1.147, "step": 1767 }, { "epoch": 0.44, "learning_rate": 1.2315514574583113e-05, "loss": 1.111, "step": 1768 }, { "epoch": 0.44, "learning_rate": 1.2307620702458354e-05, "loss": 1.0574, "step": 1769 }, { "epoch": 0.44, "learning_rate": 1.2299725311208807e-05, "loss": 1.0442, "step": 1770 }, { "epoch": 0.44, "learning_rate": 1.2291828406032067e-05, "loss": 1.1112, "step": 1771 }, { "epoch": 0.44, "learning_rate": 1.2283929992126729e-05, "loss": 1.0741, "step": 1772 }, { "epoch": 0.44, "learning_rate": 1.2276030074692383e-05, "loss": 1.1106, "step": 1773 }, { "epoch": 0.44, "learning_rate": 1.2268128658929602e-05, "loss": 1.09, "step": 1774 }, { "epoch": 0.44, "learning_rate": 1.2260225750039952e-05, "loss": 1.1102, "step": 1775 }, { "epoch": 0.44, "learning_rate": 1.2252321353225984e-05, "loss": 1.1261, "step": 1776 }, { "epoch": 0.45, "learning_rate": 1.2244415473691221e-05, "loss": 1.1055, "step": 1777 }, { "epoch": 0.45, "learning_rate": 1.2236508116640164e-05, "loss": 1.0816, "step": 1778 }, { "epoch": 0.45, "learning_rate": 1.222859928727829e-05, "loss": 1.0973, "step": 1779 }, { "epoch": 0.45, "learning_rate": 1.2220688990812047e-05, "loss": 1.1072, "step": 1780 }, { "epoch": 0.45, "learning_rate": 1.2212777232448837e-05, "loss": 1.0804, "step": 1781 }, { "epoch": 0.45, "learning_rate": 1.2204864017397036e-05, "loss": 1.0464, "step": 1782 }, { "epoch": 0.45, "learning_rate": 1.2196949350865979e-05, "loss": 1.1059, "step": 1783 }, { "epoch": 0.45, "learning_rate": 1.218903323806595e-05, "loss": 1.1297, "step": 1784 }, { "epoch": 0.45, "learning_rate": 1.2181115684208187e-05, "loss": 1.1038, "step": 1785 }, { "epoch": 0.45, "learning_rate": 1.2173196694504878e-05, "loss": 1.1022, "step": 1786 }, { "epoch": 0.45, "learning_rate": 1.216527627416916e-05, "loss": 1.1143, "step": 1787 }, { "epoch": 0.45, "learning_rate": 1.2157354428415103e-05, "loss": 1.1293, "step": 1788 }, { "epoch": 0.45, "learning_rate": 1.2149431162457723e-05, "loss": 1.0963, "step": 1789 }, { "epoch": 0.45, "learning_rate": 1.2141506481512964e-05, "loss": 1.1114, "step": 1790 }, { "epoch": 0.45, "learning_rate": 1.213358039079771e-05, "loss": 1.1274, "step": 1791 }, { "epoch": 0.45, "learning_rate": 1.2125652895529766e-05, "loss": 1.0626, "step": 1792 }, { "epoch": 0.45, "learning_rate": 1.2117724000927869e-05, "loss": 1.1316, "step": 1793 }, { "epoch": 0.45, "learning_rate": 1.2109793712211668e-05, "loss": 1.1484, "step": 1794 }, { "epoch": 0.45, "learning_rate": 1.2101862034601732e-05, "loss": 1.1048, "step": 1795 }, { "epoch": 0.45, "learning_rate": 1.2093928973319554e-05, "loss": 1.0802, "step": 1796 }, { "epoch": 0.45, "learning_rate": 1.2085994533587522e-05, "loss": 1.0754, "step": 1797 }, { "epoch": 0.45, "learning_rate": 1.2078058720628942e-05, "loss": 1.0764, "step": 1798 }, { "epoch": 0.45, "learning_rate": 1.2070121539668029e-05, "loss": 1.0786, "step": 1799 }, { "epoch": 0.45, "learning_rate": 1.2062182995929883e-05, "loss": 1.0746, "step": 1800 }, { "epoch": 0.45, "learning_rate": 1.2054243094640514e-05, "loss": 1.0742, "step": 1801 }, { "epoch": 0.45, "learning_rate": 1.204630184102682e-05, "loss": 1.1433, "step": 1802 }, { "epoch": 0.45, "learning_rate": 1.2038359240316589e-05, "loss": 1.1162, "step": 1803 }, { "epoch": 0.45, "learning_rate": 1.2030415297738498e-05, "loss": 1.0688, "step": 1804 }, { "epoch": 0.45, "learning_rate": 1.2022470018522109e-05, "loss": 1.0794, "step": 1805 }, { "epoch": 0.45, "learning_rate": 1.2014523407897858e-05, "loss": 1.0973, "step": 1806 }, { "epoch": 0.45, "learning_rate": 1.2006575471097063e-05, "loss": 1.1178, "step": 1807 }, { "epoch": 0.45, "learning_rate": 1.1998626213351914e-05, "loss": 1.0736, "step": 1808 }, { "epoch": 0.45, "learning_rate": 1.199067563989547e-05, "loss": 1.1343, "step": 1809 }, { "epoch": 0.45, "learning_rate": 1.1982723755961651e-05, "loss": 1.0967, "step": 1810 }, { "epoch": 0.45, "learning_rate": 1.197477056678525e-05, "loss": 1.1178, "step": 1811 }, { "epoch": 0.45, "learning_rate": 1.196681607760191e-05, "loss": 1.1196, "step": 1812 }, { "epoch": 0.45, "learning_rate": 1.1958860293648136e-05, "loss": 1.1197, "step": 1813 }, { "epoch": 0.45, "learning_rate": 1.1950903220161286e-05, "loss": 1.0702, "step": 1814 }, { "epoch": 0.45, "learning_rate": 1.1942944862379555e-05, "loss": 1.0994, "step": 1815 }, { "epoch": 0.45, "learning_rate": 1.1934985225541998e-05, "loss": 1.1353, "step": 1816 }, { "epoch": 0.46, "learning_rate": 1.1927024314888506e-05, "loss": 1.0956, "step": 1817 }, { "epoch": 0.46, "learning_rate": 1.191906213565981e-05, "loss": 1.0838, "step": 1818 }, { "epoch": 0.46, "learning_rate": 1.1911098693097469e-05, "loss": 1.1027, "step": 1819 }, { "epoch": 0.46, "learning_rate": 1.190313399244389e-05, "loss": 1.0793, "step": 1820 }, { "epoch": 0.46, "learning_rate": 1.1895168038942286e-05, "loss": 1.1044, "step": 1821 }, { "epoch": 0.46, "learning_rate": 1.1887200837836716e-05, "loss": 1.0938, "step": 1822 }, { "epoch": 0.46, "learning_rate": 1.1879232394372044e-05, "loss": 1.1095, "step": 1823 }, { "epoch": 0.46, "learning_rate": 1.1871262713793967e-05, "loss": 1.1174, "step": 1824 }, { "epoch": 0.46, "learning_rate": 1.186329180134898e-05, "loss": 1.0905, "step": 1825 }, { "epoch": 0.46, "learning_rate": 1.1855319662284399e-05, "loss": 1.116, "step": 1826 }, { "epoch": 0.46, "learning_rate": 1.1847346301848351e-05, "loss": 1.1283, "step": 1827 }, { "epoch": 0.46, "learning_rate": 1.1839371725289757e-05, "loss": 1.1024, "step": 1828 }, { "epoch": 0.46, "learning_rate": 1.1831395937858342e-05, "loss": 1.1025, "step": 1829 }, { "epoch": 0.46, "learning_rate": 1.1823418944804632e-05, "loss": 1.0769, "step": 1830 }, { "epoch": 0.46, "learning_rate": 1.1815440751379942e-05, "loss": 1.0966, "step": 1831 }, { "epoch": 0.46, "learning_rate": 1.1807461362836382e-05, "loss": 1.0745, "step": 1832 }, { "epoch": 0.46, "learning_rate": 1.1799480784426844e-05, "loss": 1.1101, "step": 1833 }, { "epoch": 0.46, "learning_rate": 1.1791499021405005e-05, "loss": 1.0983, "step": 1834 }, { "epoch": 0.46, "learning_rate": 1.1783516079025325e-05, "loss": 1.0988, "step": 1835 }, { "epoch": 0.46, "learning_rate": 1.1775531962543036e-05, "loss": 1.1074, "step": 1836 }, { "epoch": 0.46, "learning_rate": 1.176754667721414e-05, "loss": 1.0912, "step": 1837 }, { "epoch": 0.46, "learning_rate": 1.175956022829542e-05, "loss": 1.1223, "step": 1838 }, { "epoch": 0.46, "learning_rate": 1.175157262104441e-05, "loss": 1.1344, "step": 1839 }, { "epoch": 0.46, "learning_rate": 1.1743583860719425e-05, "loss": 1.1339, "step": 1840 }, { "epoch": 0.46, "learning_rate": 1.1735593952579523e-05, "loss": 1.0824, "step": 1841 }, { "epoch": 0.46, "learning_rate": 1.1727602901884519e-05, "loss": 1.0936, "step": 1842 }, { "epoch": 0.46, "learning_rate": 1.1719610713894992e-05, "loss": 1.1071, "step": 1843 }, { "epoch": 0.46, "learning_rate": 1.1711617393872255e-05, "loss": 1.0738, "step": 1844 }, { "epoch": 0.46, "learning_rate": 1.1703622947078376e-05, "loss": 1.0958, "step": 1845 }, { "epoch": 0.46, "learning_rate": 1.1695627378776164e-05, "loss": 1.0755, "step": 1846 }, { "epoch": 0.46, "learning_rate": 1.1687630694229159e-05, "loss": 1.083, "step": 1847 }, { "epoch": 0.46, "learning_rate": 1.1679632898701649e-05, "loss": 1.0981, "step": 1848 }, { "epoch": 0.46, "learning_rate": 1.167163399745864e-05, "loss": 1.0572, "step": 1849 }, { "epoch": 0.46, "learning_rate": 1.1663633995765873e-05, "loss": 1.0596, "step": 1850 }, { "epoch": 0.46, "learning_rate": 1.165563289888981e-05, "loss": 1.128, "step": 1851 }, { "epoch": 0.46, "learning_rate": 1.1647630712097638e-05, "loss": 1.0733, "step": 1852 }, { "epoch": 0.46, "learning_rate": 1.1639627440657258e-05, "loss": 1.138, "step": 1853 }, { "epoch": 0.46, "learning_rate": 1.1631623089837284e-05, "loss": 1.0905, "step": 1854 }, { "epoch": 0.46, "learning_rate": 1.1623617664907045e-05, "loss": 1.0702, "step": 1855 }, { "epoch": 0.46, "learning_rate": 1.1615611171136581e-05, "loss": 1.0954, "step": 1856 }, { "epoch": 0.47, "learning_rate": 1.1607603613796618e-05, "loss": 1.0785, "step": 1857 }, { "epoch": 0.47, "learning_rate": 1.1599594998158602e-05, "loss": 1.0741, "step": 1858 }, { "epoch": 0.47, "learning_rate": 1.1591585329494668e-05, "loss": 1.0884, "step": 1859 }, { "epoch": 0.47, "learning_rate": 1.1583574613077637e-05, "loss": 1.0818, "step": 1860 }, { "epoch": 0.47, "learning_rate": 1.157556285418103e-05, "loss": 1.0964, "step": 1861 }, { "epoch": 0.47, "learning_rate": 1.1567550058079053e-05, "loss": 1.1276, "step": 1862 }, { "epoch": 0.47, "learning_rate": 1.1559536230046588e-05, "loss": 1.0638, "step": 1863 }, { "epoch": 0.47, "learning_rate": 1.1551521375359207e-05, "loss": 1.0486, "step": 1864 }, { "epoch": 0.47, "learning_rate": 1.1543505499293143e-05, "loss": 1.1108, "step": 1865 }, { "epoch": 0.47, "learning_rate": 1.1535488607125321e-05, "loss": 1.0947, "step": 1866 }, { "epoch": 0.47, "learning_rate": 1.1527470704133314e-05, "loss": 1.0796, "step": 1867 }, { "epoch": 0.47, "learning_rate": 1.1519451795595369e-05, "loss": 1.126, "step": 1868 }, { "epoch": 0.47, "learning_rate": 1.1511431886790407e-05, "loss": 1.0792, "step": 1869 }, { "epoch": 0.47, "learning_rate": 1.1503410982997991e-05, "loss": 1.1194, "step": 1870 }, { "epoch": 0.47, "learning_rate": 1.1495389089498343e-05, "loss": 1.0919, "step": 1871 }, { "epoch": 0.47, "learning_rate": 1.1487366211572343e-05, "loss": 1.0662, "step": 1872 }, { "epoch": 0.47, "learning_rate": 1.1479342354501508e-05, "loss": 1.072, "step": 1873 }, { "epoch": 0.47, "learning_rate": 1.1471317523568013e-05, "loss": 1.0939, "step": 1874 }, { "epoch": 0.47, "learning_rate": 1.146329172405466e-05, "loss": 1.096, "step": 1875 }, { "epoch": 0.47, "learning_rate": 1.1455264961244903e-05, "loss": 1.1185, "step": 1876 }, { "epoch": 0.47, "learning_rate": 1.144723724042282e-05, "loss": 1.0789, "step": 1877 }, { "epoch": 0.47, "learning_rate": 1.143920856687312e-05, "loss": 1.1242, "step": 1878 }, { "epoch": 0.47, "learning_rate": 1.1431178945881143e-05, "loss": 1.1026, "step": 1879 }, { "epoch": 0.47, "learning_rate": 1.1423148382732854e-05, "loss": 1.0937, "step": 1880 }, { "epoch": 0.47, "learning_rate": 1.1415116882714833e-05, "loss": 1.1124, "step": 1881 }, { "epoch": 0.47, "learning_rate": 1.1407084451114277e-05, "loss": 1.0404, "step": 1882 }, { "epoch": 0.47, "learning_rate": 1.1399051093219005e-05, "loss": 1.0767, "step": 1883 }, { "epoch": 0.47, "learning_rate": 1.1391016814317433e-05, "loss": 1.08, "step": 1884 }, { "epoch": 0.47, "learning_rate": 1.1382981619698597e-05, "loss": 1.0779, "step": 1885 }, { "epoch": 0.47, "learning_rate": 1.1374945514652121e-05, "loss": 1.1013, "step": 1886 }, { "epoch": 0.47, "learning_rate": 1.1366908504468242e-05, "loss": 1.1463, "step": 1887 }, { "epoch": 0.47, "learning_rate": 1.135887059443778e-05, "loss": 1.0614, "step": 1888 }, { "epoch": 0.47, "learning_rate": 1.1350831789852162e-05, "loss": 1.0891, "step": 1889 }, { "epoch": 0.47, "learning_rate": 1.1342792096003393e-05, "loss": 1.1065, "step": 1890 }, { "epoch": 0.47, "learning_rate": 1.1334751518184062e-05, "loss": 1.0787, "step": 1891 }, { "epoch": 0.47, "learning_rate": 1.1326710061687351e-05, "loss": 1.1438, "step": 1892 }, { "epoch": 0.47, "learning_rate": 1.1318667731807012e-05, "loss": 1.0922, "step": 1893 }, { "epoch": 0.47, "learning_rate": 1.1310624533837369e-05, "loss": 1.0808, "step": 1894 }, { "epoch": 0.47, "learning_rate": 1.130258047307333e-05, "loss": 1.0838, "step": 1895 }, { "epoch": 0.47, "learning_rate": 1.1294535554810356e-05, "loss": 1.0911, "step": 1896 }, { "epoch": 0.48, "learning_rate": 1.1286489784344484e-05, "loss": 1.0907, "step": 1897 }, { "epoch": 0.48, "learning_rate": 1.1278443166972308e-05, "loss": 1.0597, "step": 1898 }, { "epoch": 0.48, "learning_rate": 1.1270395707990976e-05, "loss": 1.1355, "step": 1899 }, { "epoch": 0.48, "learning_rate": 1.1262347412698195e-05, "loss": 1.0884, "step": 1900 }, { "epoch": 0.48, "learning_rate": 1.1254298286392225e-05, "loss": 1.081, "step": 1901 }, { "epoch": 0.48, "learning_rate": 1.124624833437186e-05, "loss": 1.1171, "step": 1902 }, { "epoch": 0.48, "learning_rate": 1.1238197561936455e-05, "loss": 1.0686, "step": 1903 }, { "epoch": 0.48, "learning_rate": 1.1230145974385892e-05, "loss": 1.1165, "step": 1904 }, { "epoch": 0.48, "learning_rate": 1.1222093577020593e-05, "loss": 1.0977, "step": 1905 }, { "epoch": 0.48, "learning_rate": 1.1214040375141522e-05, "loss": 1.0674, "step": 1906 }, { "epoch": 0.48, "learning_rate": 1.1205986374050155e-05, "loss": 1.1148, "step": 1907 }, { "epoch": 0.48, "learning_rate": 1.119793157904851e-05, "loss": 1.1126, "step": 1908 }, { "epoch": 0.48, "learning_rate": 1.118987599543912e-05, "loss": 1.1162, "step": 1909 }, { "epoch": 0.48, "learning_rate": 1.118181962852504e-05, "loss": 1.1127, "step": 1910 }, { "epoch": 0.48, "learning_rate": 1.1173762483609835e-05, "loss": 1.0972, "step": 1911 }, { "epoch": 0.48, "learning_rate": 1.1165704565997593e-05, "loss": 1.0921, "step": 1912 }, { "epoch": 0.48, "learning_rate": 1.1157645880992901e-05, "loss": 1.0971, "step": 1913 }, { "epoch": 0.48, "learning_rate": 1.1149586433900856e-05, "loss": 1.1058, "step": 1914 }, { "epoch": 0.48, "learning_rate": 1.1141526230027051e-05, "loss": 1.121, "step": 1915 }, { "epoch": 0.48, "learning_rate": 1.1133465274677588e-05, "loss": 1.109, "step": 1916 }, { "epoch": 0.48, "learning_rate": 1.112540357315905e-05, "loss": 1.0962, "step": 1917 }, { "epoch": 0.48, "learning_rate": 1.1117341130778523e-05, "loss": 1.0978, "step": 1918 }, { "epoch": 0.48, "learning_rate": 1.1109277952843576e-05, "loss": 1.0863, "step": 1919 }, { "epoch": 0.48, "learning_rate": 1.1101214044662258e-05, "loss": 1.0893, "step": 1920 }, { "epoch": 0.48, "learning_rate": 1.1093149411543109e-05, "loss": 1.1158, "step": 1921 }, { "epoch": 0.48, "learning_rate": 1.1085084058795135e-05, "loss": 1.1316, "step": 1922 }, { "epoch": 0.48, "learning_rate": 1.1077017991727824e-05, "loss": 1.0929, "step": 1923 }, { "epoch": 0.48, "learning_rate": 1.1068951215651132e-05, "loss": 1.0757, "step": 1924 }, { "epoch": 0.48, "learning_rate": 1.1060883735875471e-05, "loss": 1.0969, "step": 1925 }, { "epoch": 0.48, "learning_rate": 1.1052815557711742e-05, "loss": 1.1329, "step": 1926 }, { "epoch": 0.48, "learning_rate": 1.1044746686471281e-05, "loss": 1.0701, "step": 1927 }, { "epoch": 0.48, "learning_rate": 1.103667712746589e-05, "loss": 1.0835, "step": 1928 }, { "epoch": 0.48, "learning_rate": 1.1028606886007823e-05, "loss": 1.1075, "step": 1929 }, { "epoch": 0.48, "learning_rate": 1.102053596740978e-05, "loss": 1.101, "step": 1930 }, { "epoch": 0.48, "learning_rate": 1.1012464376984917e-05, "loss": 1.0934, "step": 1931 }, { "epoch": 0.48, "learning_rate": 1.1004392120046821e-05, "loss": 1.1006, "step": 1932 }, { "epoch": 0.48, "learning_rate": 1.0996319201909519e-05, "loss": 1.133, "step": 1933 }, { "epoch": 0.48, "learning_rate": 1.0988245627887484e-05, "loss": 1.1049, "step": 1934 }, { "epoch": 0.48, "learning_rate": 1.098017140329561e-05, "loss": 1.1054, "step": 1935 }, { "epoch": 0.48, "learning_rate": 1.0972096533449218e-05, "loss": 1.1321, "step": 1936 }, { "epoch": 0.49, "learning_rate": 1.096402102366406e-05, "loss": 1.0442, "step": 1937 }, { "epoch": 0.49, "learning_rate": 1.095594487925631e-05, "loss": 1.0798, "step": 1938 }, { "epoch": 0.49, "learning_rate": 1.0947868105542555e-05, "loss": 1.0684, "step": 1939 }, { "epoch": 0.49, "learning_rate": 1.0939790707839802e-05, "loss": 1.0674, "step": 1940 }, { "epoch": 0.49, "learning_rate": 1.0931712691465458e-05, "loss": 1.1046, "step": 1941 }, { "epoch": 0.49, "learning_rate": 1.0923634061737351e-05, "loss": 1.1312, "step": 1942 }, { "epoch": 0.49, "learning_rate": 1.0915554823973704e-05, "loss": 1.1008, "step": 1943 }, { "epoch": 0.49, "learning_rate": 1.0907474983493144e-05, "loss": 1.0839, "step": 1944 }, { "epoch": 0.49, "learning_rate": 1.0899394545614692e-05, "loss": 1.0645, "step": 1945 }, { "epoch": 0.49, "learning_rate": 1.089131351565776e-05, "loss": 1.0762, "step": 1946 }, { "epoch": 0.49, "learning_rate": 1.088323189894216e-05, "loss": 1.1172, "step": 1947 }, { "epoch": 0.49, "learning_rate": 1.0875149700788078e-05, "loss": 1.0933, "step": 1948 }, { "epoch": 0.49, "learning_rate": 1.0867066926516091e-05, "loss": 1.0493, "step": 1949 }, { "epoch": 0.49, "learning_rate": 1.0858983581447155e-05, "loss": 1.1199, "step": 1950 }, { "epoch": 0.49, "learning_rate": 1.0850899670902595e-05, "loss": 1.0737, "step": 1951 }, { "epoch": 0.49, "learning_rate": 1.0842815200204116e-05, "loss": 1.063, "step": 1952 }, { "epoch": 0.49, "learning_rate": 1.0834730174673784e-05, "loss": 1.0868, "step": 1953 }, { "epoch": 0.49, "learning_rate": 1.0826644599634037e-05, "loss": 1.1003, "step": 1954 }, { "epoch": 0.49, "learning_rate": 1.0818558480407677e-05, "loss": 1.1005, "step": 1955 }, { "epoch": 0.49, "learning_rate": 1.0810471822317852e-05, "loss": 1.0796, "step": 1956 }, { "epoch": 0.49, "learning_rate": 1.080238463068808e-05, "loss": 1.0607, "step": 1957 }, { "epoch": 0.49, "learning_rate": 1.0794296910842216e-05, "loss": 1.0766, "step": 1958 }, { "epoch": 0.49, "learning_rate": 1.078620866810447e-05, "loss": 1.1028, "step": 1959 }, { "epoch": 0.49, "learning_rate": 1.0778119907799399e-05, "loss": 1.0772, "step": 1960 }, { "epoch": 0.49, "learning_rate": 1.0770030635251899e-05, "loss": 1.0791, "step": 1961 }, { "epoch": 0.49, "learning_rate": 1.0761940855787193e-05, "loss": 1.0965, "step": 1962 }, { "epoch": 0.49, "learning_rate": 1.0753850574730856e-05, "loss": 1.1047, "step": 1963 }, { "epoch": 0.49, "learning_rate": 1.0745759797408774e-05, "loss": 1.0758, "step": 1964 }, { "epoch": 0.49, "learning_rate": 1.0737668529147179e-05, "loss": 1.0804, "step": 1965 }, { "epoch": 0.49, "learning_rate": 1.072957677527261e-05, "loss": 1.1004, "step": 1966 }, { "epoch": 0.49, "learning_rate": 1.072148454111193e-05, "loss": 1.08, "step": 1967 }, { "epoch": 0.49, "learning_rate": 1.0713391831992324e-05, "loss": 1.0621, "step": 1968 }, { "epoch": 0.49, "learning_rate": 1.0705298653241286e-05, "loss": 1.0702, "step": 1969 }, { "epoch": 0.49, "learning_rate": 1.069720501018662e-05, "loss": 1.0641, "step": 1970 }, { "epoch": 0.49, "learning_rate": 1.0689110908156432e-05, "loss": 1.1144, "step": 1971 }, { "epoch": 0.49, "learning_rate": 1.0681016352479134e-05, "loss": 1.0731, "step": 1972 }, { "epoch": 0.49, "learning_rate": 1.0672921348483435e-05, "loss": 1.1414, "step": 1973 }, { "epoch": 0.49, "learning_rate": 1.066482590149834e-05, "loss": 1.0154, "step": 1974 }, { "epoch": 0.49, "learning_rate": 1.0656730016853143e-05, "loss": 1.1497, "step": 1975 }, { "epoch": 0.49, "learning_rate": 1.064863369987743e-05, "loss": 1.0793, "step": 1976 }, { "epoch": 0.5, "learning_rate": 1.0640536955901071e-05, "loss": 1.108, "step": 1977 }, { "epoch": 0.5, "learning_rate": 1.0632439790254216e-05, "loss": 1.1049, "step": 1978 }, { "epoch": 0.5, "learning_rate": 1.0624342208267293e-05, "loss": 1.1017, "step": 1979 }, { "epoch": 0.5, "learning_rate": 1.0616244215270998e-05, "loss": 1.1281, "step": 1980 }, { "epoch": 0.5, "learning_rate": 1.0608145816596312e-05, "loss": 1.0856, "step": 1981 }, { "epoch": 0.5, "learning_rate": 1.0600047017574467e-05, "loss": 1.1263, "step": 1982 }, { "epoch": 0.5, "learning_rate": 1.0591947823536968e-05, "loss": 1.0802, "step": 1983 }, { "epoch": 0.5, "learning_rate": 1.058384823981558e-05, "loss": 1.1231, "step": 1984 }, { "epoch": 0.5, "learning_rate": 1.0575748271742319e-05, "loss": 1.0595, "step": 1985 }, { "epoch": 0.5, "learning_rate": 1.0567647924649462e-05, "loss": 1.0674, "step": 1986 }, { "epoch": 0.5, "learning_rate": 1.0559547203869527e-05, "loss": 1.037, "step": 1987 }, { "epoch": 0.5, "learning_rate": 1.0551446114735286e-05, "loss": 1.0759, "step": 1988 }, { "epoch": 0.5, "learning_rate": 1.0543344662579744e-05, "loss": 1.1156, "step": 1989 }, { "epoch": 0.5, "learning_rate": 1.0535242852736152e-05, "loss": 1.1382, "step": 1990 }, { "epoch": 0.5, "learning_rate": 1.0527140690538e-05, "loss": 1.1215, "step": 1991 }, { "epoch": 0.5, "learning_rate": 1.0519038181319e-05, "loss": 1.1196, "step": 1992 }, { "epoch": 0.5, "learning_rate": 1.0510935330413098e-05, "loss": 1.0961, "step": 1993 }, { "epoch": 0.5, "learning_rate": 1.0502832143154465e-05, "loss": 1.0482, "step": 1994 }, { "epoch": 0.5, "learning_rate": 1.0494728624877489e-05, "loss": 1.1434, "step": 1995 }, { "epoch": 0.5, "learning_rate": 1.0486624780916784e-05, "loss": 1.1151, "step": 1996 }, { "epoch": 0.5, "learning_rate": 1.0478520616607174e-05, "loss": 1.1142, "step": 1997 }, { "epoch": 0.5, "learning_rate": 1.0470416137283693e-05, "loss": 1.0911, "step": 1998 }, { "epoch": 0.5, "learning_rate": 1.0462311348281584e-05, "loss": 1.0922, "step": 1999 }, { "epoch": 0.5, "learning_rate": 1.0454206254936287e-05, "loss": 1.0577, "step": 2000 }, { "epoch": 0.5, "learning_rate": 1.0446100862583459e-05, "loss": 1.0803, "step": 2001 }, { "epoch": 0.5, "learning_rate": 1.0437995176558938e-05, "loss": 1.088, "step": 2002 }, { "epoch": 0.5, "learning_rate": 1.0429889202198753e-05, "loss": 1.0795, "step": 2003 }, { "epoch": 0.5, "learning_rate": 1.0421782944839145e-05, "loss": 1.1099, "step": 2004 }, { "epoch": 0.5, "learning_rate": 1.0413676409816518e-05, "loss": 1.0629, "step": 2005 }, { "epoch": 0.5, "learning_rate": 1.0405569602467469e-05, "loss": 1.0874, "step": 2006 }, { "epoch": 0.5, "learning_rate": 1.0397462528128772e-05, "loss": 1.1405, "step": 2007 }, { "epoch": 0.5, "learning_rate": 1.0389355192137379e-05, "loss": 1.0727, "step": 2008 }, { "epoch": 0.5, "learning_rate": 1.0381247599830414e-05, "loss": 1.0534, "step": 2009 }, { "epoch": 0.5, "learning_rate": 1.0373139756545164e-05, "loss": 1.1004, "step": 2010 }, { "epoch": 0.5, "learning_rate": 1.0365031667619085e-05, "loss": 1.0896, "step": 2011 }, { "epoch": 0.5, "learning_rate": 1.0356923338389807e-05, "loss": 1.102, "step": 2012 }, { "epoch": 0.5, "learning_rate": 1.0348814774195095e-05, "loss": 1.1262, "step": 2013 }, { "epoch": 0.5, "learning_rate": 1.0340705980372883e-05, "loss": 1.1075, "step": 2014 }, { "epoch": 0.5, "learning_rate": 1.0332596962261257e-05, "loss": 1.0679, "step": 2015 }, { "epoch": 0.5, "learning_rate": 1.0324487725198442e-05, "loss": 1.1116, "step": 2016 }, { "epoch": 0.51, "learning_rate": 1.0316378274522816e-05, "loss": 1.1399, "step": 2017 }, { "epoch": 0.51, "learning_rate": 1.0308268615572894e-05, "loss": 1.1144, "step": 2018 }, { "epoch": 0.51, "learning_rate": 1.0300158753687323e-05, "loss": 1.1017, "step": 2019 }, { "epoch": 0.51, "learning_rate": 1.0292048694204897e-05, "loss": 1.1027, "step": 2020 }, { "epoch": 0.51, "learning_rate": 1.0283938442464525e-05, "loss": 1.0988, "step": 2021 }, { "epoch": 0.51, "learning_rate": 1.0275828003805256e-05, "loss": 1.1395, "step": 2022 }, { "epoch": 0.51, "learning_rate": 1.0267717383566247e-05, "loss": 1.0885, "step": 2023 }, { "epoch": 0.51, "learning_rate": 1.0259606587086783e-05, "loss": 1.0912, "step": 2024 }, { "epoch": 0.51, "learning_rate": 1.0251495619706273e-05, "loss": 1.0948, "step": 2025 }, { "epoch": 0.51, "learning_rate": 1.0243384486764228e-05, "loss": 1.1237, "step": 2026 }, { "epoch": 0.51, "learning_rate": 1.0235273193600264e-05, "loss": 1.0439, "step": 2027 }, { "epoch": 0.51, "learning_rate": 1.0227161745554117e-05, "loss": 1.1092, "step": 2028 }, { "epoch": 0.51, "learning_rate": 1.021905014796561e-05, "loss": 1.0837, "step": 2029 }, { "epoch": 0.51, "learning_rate": 1.0210938406174675e-05, "loss": 1.0685, "step": 2030 }, { "epoch": 0.51, "learning_rate": 1.0202826525521337e-05, "loss": 1.0767, "step": 2031 }, { "epoch": 0.51, "learning_rate": 1.0194714511345702e-05, "loss": 1.1087, "step": 2032 }, { "epoch": 0.51, "learning_rate": 1.0186602368987983e-05, "loss": 1.0808, "step": 2033 }, { "epoch": 0.51, "learning_rate": 1.0178490103788462e-05, "loss": 1.0912, "step": 2034 }, { "epoch": 0.51, "learning_rate": 1.0170377721087507e-05, "loss": 1.1114, "step": 2035 }, { "epoch": 0.51, "learning_rate": 1.0162265226225563e-05, "loss": 1.1211, "step": 2036 }, { "epoch": 0.51, "learning_rate": 1.0154152624543147e-05, "loss": 1.049, "step": 2037 }, { "epoch": 0.51, "learning_rate": 1.0146039921380855e-05, "loss": 1.1212, "step": 2038 }, { "epoch": 0.51, "learning_rate": 1.0137927122079334e-05, "loss": 1.0492, "step": 2039 }, { "epoch": 0.51, "learning_rate": 1.012981423197931e-05, "loss": 1.1323, "step": 2040 }, { "epoch": 0.51, "learning_rate": 1.0121701256421562e-05, "loss": 1.0947, "step": 2041 }, { "epoch": 0.51, "learning_rate": 1.0113588200746918e-05, "loss": 1.0688, "step": 2042 }, { "epoch": 0.51, "learning_rate": 1.0105475070296276e-05, "loss": 1.1044, "step": 2043 }, { "epoch": 0.51, "learning_rate": 1.0097361870410566e-05, "loss": 1.0693, "step": 2044 }, { "epoch": 0.51, "learning_rate": 1.0089248606430775e-05, "loss": 1.1032, "step": 2045 }, { "epoch": 0.51, "learning_rate": 1.0081135283697927e-05, "loss": 1.0948, "step": 2046 }, { "epoch": 0.51, "learning_rate": 1.0073021907553086e-05, "loss": 1.0694, "step": 2047 }, { "epoch": 0.51, "learning_rate": 1.0064908483337352e-05, "loss": 1.1191, "step": 2048 }, { "epoch": 0.51, "learning_rate": 1.0056795016391854e-05, "loss": 1.0849, "step": 2049 }, { "epoch": 0.51, "learning_rate": 1.0048681512057751e-05, "loss": 1.1166, "step": 2050 }, { "epoch": 0.51, "learning_rate": 1.004056797567623e-05, "loss": 1.0972, "step": 2051 }, { "epoch": 0.51, "learning_rate": 1.0032454412588488e-05, "loss": 1.0931, "step": 2052 }, { "epoch": 0.51, "learning_rate": 1.0024340828135755e-05, "loss": 1.065, "step": 2053 }, { "epoch": 0.51, "learning_rate": 1.0016227227659262e-05, "loss": 1.0706, "step": 2054 }, { "epoch": 0.51, "learning_rate": 1.000811361650026e-05, "loss": 1.1406, "step": 2055 }, { "epoch": 0.51, "learning_rate": 1e-05, "loss": 1.1022, "step": 2056 }, { "epoch": 0.52, "learning_rate": 9.99188638349974e-06, "loss": 1.105, "step": 2057 }, { "epoch": 0.52, "learning_rate": 9.98377277234074e-06, "loss": 1.1039, "step": 2058 }, { "epoch": 0.52, "learning_rate": 9.975659171864247e-06, "loss": 1.0355, "step": 2059 }, { "epoch": 0.52, "learning_rate": 9.967545587411515e-06, "loss": 1.1114, "step": 2060 }, { "epoch": 0.52, "learning_rate": 9.959432024323773e-06, "loss": 1.0993, "step": 2061 }, { "epoch": 0.52, "learning_rate": 9.951318487942254e-06, "loss": 1.0805, "step": 2062 }, { "epoch": 0.52, "learning_rate": 9.94320498360815e-06, "loss": 1.087, "step": 2063 }, { "epoch": 0.52, "learning_rate": 9.93509151666265e-06, "loss": 1.0695, "step": 2064 }, { "epoch": 0.52, "learning_rate": 9.926978092446915e-06, "loss": 1.0748, "step": 2065 }, { "epoch": 0.52, "learning_rate": 9.918864716302074e-06, "loss": 1.0613, "step": 2066 }, { "epoch": 0.52, "learning_rate": 9.910751393569228e-06, "loss": 1.1001, "step": 2067 }, { "epoch": 0.52, "learning_rate": 9.902638129589435e-06, "loss": 1.0929, "step": 2068 }, { "epoch": 0.52, "learning_rate": 9.89452492970373e-06, "loss": 1.0712, "step": 2069 }, { "epoch": 0.52, "learning_rate": 9.886411799253085e-06, "loss": 1.0746, "step": 2070 }, { "epoch": 0.52, "learning_rate": 9.87829874357844e-06, "loss": 1.0625, "step": 2071 }, { "epoch": 0.52, "learning_rate": 9.870185768020694e-06, "loss": 1.1146, "step": 2072 }, { "epoch": 0.52, "learning_rate": 9.862072877920668e-06, "loss": 1.0956, "step": 2073 }, { "epoch": 0.52, "learning_rate": 9.85396007861915e-06, "loss": 1.0787, "step": 2074 }, { "epoch": 0.52, "learning_rate": 9.845847375456854e-06, "loss": 1.0982, "step": 2075 }, { "epoch": 0.52, "learning_rate": 9.837734773774442e-06, "loss": 1.0591, "step": 2076 }, { "epoch": 0.52, "learning_rate": 9.829622278912496e-06, "loss": 1.1388, "step": 2077 }, { "epoch": 0.52, "learning_rate": 9.82150989621154e-06, "loss": 1.0981, "step": 2078 }, { "epoch": 0.52, "learning_rate": 9.813397631012019e-06, "loss": 1.1015, "step": 2079 }, { "epoch": 0.52, "learning_rate": 9.805285488654298e-06, "loss": 1.0422, "step": 2080 }, { "epoch": 0.52, "learning_rate": 9.797173474478668e-06, "loss": 1.0547, "step": 2081 }, { "epoch": 0.52, "learning_rate": 9.789061593825327e-06, "loss": 1.0964, "step": 2082 }, { "epoch": 0.52, "learning_rate": 9.780949852034394e-06, "loss": 1.0838, "step": 2083 }, { "epoch": 0.52, "learning_rate": 9.772838254445887e-06, "loss": 1.0811, "step": 2084 }, { "epoch": 0.52, "learning_rate": 9.764726806399736e-06, "loss": 1.0794, "step": 2085 }, { "epoch": 0.52, "learning_rate": 9.756615513235776e-06, "loss": 1.0836, "step": 2086 }, { "epoch": 0.52, "learning_rate": 9.748504380293727e-06, "loss": 1.0725, "step": 2087 }, { "epoch": 0.52, "learning_rate": 9.740393412913219e-06, "loss": 1.0918, "step": 2088 }, { "epoch": 0.52, "learning_rate": 9.732282616433756e-06, "loss": 1.0455, "step": 2089 }, { "epoch": 0.52, "learning_rate": 9.724171996194751e-06, "loss": 1.0589, "step": 2090 }, { "epoch": 0.52, "learning_rate": 9.716061557535477e-06, "loss": 1.0867, "step": 2091 }, { "epoch": 0.52, "learning_rate": 9.707951305795103e-06, "loss": 1.1009, "step": 2092 }, { "epoch": 0.52, "learning_rate": 9.699841246312679e-06, "loss": 1.1208, "step": 2093 }, { "epoch": 0.52, "learning_rate": 9.69173138442711e-06, "loss": 1.0885, "step": 2094 }, { "epoch": 0.52, "learning_rate": 9.683621725477188e-06, "loss": 1.0697, "step": 2095 }, { "epoch": 0.52, "learning_rate": 9.675512274801561e-06, "loss": 1.0995, "step": 2096 }, { "epoch": 0.53, "learning_rate": 9.667403037738748e-06, "loss": 1.0922, "step": 2097 }, { "epoch": 0.53, "learning_rate": 9.65929401962712e-06, "loss": 1.068, "step": 2098 }, { "epoch": 0.53, "learning_rate": 9.651185225804908e-06, "loss": 1.081, "step": 2099 }, { "epoch": 0.53, "learning_rate": 9.643076661610197e-06, "loss": 1.0707, "step": 2100 }, { "epoch": 0.53, "learning_rate": 9.634968332380913e-06, "loss": 1.0832, "step": 2101 }, { "epoch": 0.53, "learning_rate": 9.626860243454841e-06, "loss": 1.0696, "step": 2102 }, { "epoch": 0.53, "learning_rate": 9.618752400169591e-06, "loss": 1.0879, "step": 2103 }, { "epoch": 0.53, "learning_rate": 9.610644807862625e-06, "loss": 1.0767, "step": 2104 }, { "epoch": 0.53, "learning_rate": 9.602537471871231e-06, "loss": 1.0886, "step": 2105 }, { "epoch": 0.53, "learning_rate": 9.594430397532533e-06, "loss": 1.0553, "step": 2106 }, { "epoch": 0.53, "learning_rate": 9.586323590183484e-06, "loss": 1.0818, "step": 2107 }, { "epoch": 0.53, "learning_rate": 9.578217055160855e-06, "loss": 1.0746, "step": 2108 }, { "epoch": 0.53, "learning_rate": 9.570110797801248e-06, "loss": 1.1109, "step": 2109 }, { "epoch": 0.53, "learning_rate": 9.562004823441066e-06, "loss": 1.0569, "step": 2110 }, { "epoch": 0.53, "learning_rate": 9.553899137416546e-06, "loss": 1.0873, "step": 2111 }, { "epoch": 0.53, "learning_rate": 9.545793745063714e-06, "loss": 1.1247, "step": 2112 }, { "epoch": 0.53, "learning_rate": 9.537688651718418e-06, "loss": 1.1026, "step": 2113 }, { "epoch": 0.53, "learning_rate": 9.52958386271631e-06, "loss": 1.1022, "step": 2114 }, { "epoch": 0.53, "learning_rate": 9.521479383392826e-06, "loss": 1.1276, "step": 2115 }, { "epoch": 0.53, "learning_rate": 9.513375219083218e-06, "loss": 1.0876, "step": 2116 }, { "epoch": 0.53, "learning_rate": 9.505271375122514e-06, "loss": 1.0898, "step": 2117 }, { "epoch": 0.53, "learning_rate": 9.497167856845542e-06, "loss": 1.1211, "step": 2118 }, { "epoch": 0.53, "learning_rate": 9.489064669586907e-06, "loss": 1.0746, "step": 2119 }, { "epoch": 0.53, "learning_rate": 9.480961818681004e-06, "loss": 1.0964, "step": 2120 }, { "epoch": 0.53, "learning_rate": 9.472859309462003e-06, "loss": 1.0838, "step": 2121 }, { "epoch": 0.53, "learning_rate": 9.464757147263849e-06, "loss": 1.1446, "step": 2122 }, { "epoch": 0.53, "learning_rate": 9.45665533742026e-06, "loss": 1.1285, "step": 2123 }, { "epoch": 0.53, "learning_rate": 9.448553885264717e-06, "loss": 1.0848, "step": 2124 }, { "epoch": 0.53, "learning_rate": 9.440452796130476e-06, "loss": 1.1085, "step": 2125 }, { "epoch": 0.53, "learning_rate": 9.43235207535054e-06, "loss": 1.0443, "step": 2126 }, { "epoch": 0.53, "learning_rate": 9.424251728257684e-06, "loss": 1.0745, "step": 2127 }, { "epoch": 0.53, "learning_rate": 9.416151760184424e-06, "loss": 1.1582, "step": 2128 }, { "epoch": 0.53, "learning_rate": 9.408052176463034e-06, "loss": 1.0732, "step": 2129 }, { "epoch": 0.53, "learning_rate": 9.399952982425536e-06, "loss": 1.0685, "step": 2130 }, { "epoch": 0.53, "learning_rate": 9.391854183403692e-06, "loss": 1.0651, "step": 2131 }, { "epoch": 0.53, "learning_rate": 9.383755784729007e-06, "loss": 1.0998, "step": 2132 }, { "epoch": 0.53, "learning_rate": 9.37565779173271e-06, "loss": 1.1183, "step": 2133 }, { "epoch": 0.53, "learning_rate": 9.367560209745789e-06, "loss": 1.1019, "step": 2134 }, { "epoch": 0.53, "learning_rate": 9.35946304409893e-06, "loss": 1.0941, "step": 2135 }, { "epoch": 0.53, "learning_rate": 9.351366300122569e-06, "loss": 1.1192, "step": 2136 }, { "epoch": 0.54, "learning_rate": 9.34326998314686e-06, "loss": 1.0695, "step": 2137 }, { "epoch": 0.54, "learning_rate": 9.335174098501664e-06, "loss": 1.0989, "step": 2138 }, { "epoch": 0.54, "learning_rate": 9.32707865151657e-06, "loss": 1.109, "step": 2139 }, { "epoch": 0.54, "learning_rate": 9.318983647520869e-06, "loss": 1.0563, "step": 2140 }, { "epoch": 0.54, "learning_rate": 9.310889091843572e-06, "loss": 1.084, "step": 2141 }, { "epoch": 0.54, "learning_rate": 9.302794989813383e-06, "loss": 1.0751, "step": 2142 }, { "epoch": 0.54, "learning_rate": 9.294701346758714e-06, "loss": 1.0693, "step": 2143 }, { "epoch": 0.54, "learning_rate": 9.286608168007678e-06, "loss": 1.0943, "step": 2144 }, { "epoch": 0.54, "learning_rate": 9.278515458888074e-06, "loss": 1.0769, "step": 2145 }, { "epoch": 0.54, "learning_rate": 9.270423224727397e-06, "loss": 1.117, "step": 2146 }, { "epoch": 0.54, "learning_rate": 9.262331470852826e-06, "loss": 1.0672, "step": 2147 }, { "epoch": 0.54, "learning_rate": 9.25424020259123e-06, "loss": 1.1043, "step": 2148 }, { "epoch": 0.54, "learning_rate": 9.24614942526915e-06, "loss": 1.1141, "step": 2149 }, { "epoch": 0.54, "learning_rate": 9.238059144212807e-06, "loss": 1.076, "step": 2150 }, { "epoch": 0.54, "learning_rate": 9.229969364748106e-06, "loss": 1.1, "step": 2151 }, { "epoch": 0.54, "learning_rate": 9.221880092200601e-06, "loss": 1.0724, "step": 2152 }, { "epoch": 0.54, "learning_rate": 9.213791331895534e-06, "loss": 1.1147, "step": 2153 }, { "epoch": 0.54, "learning_rate": 9.205703089157787e-06, "loss": 1.0944, "step": 2154 }, { "epoch": 0.54, "learning_rate": 9.197615369311926e-06, "loss": 1.0671, "step": 2155 }, { "epoch": 0.54, "learning_rate": 9.18952817768215e-06, "loss": 1.0473, "step": 2156 }, { "epoch": 0.54, "learning_rate": 9.181441519592323e-06, "loss": 1.0981, "step": 2157 }, { "epoch": 0.54, "learning_rate": 9.173355400365965e-06, "loss": 1.0701, "step": 2158 }, { "epoch": 0.54, "learning_rate": 9.165269825326219e-06, "loss": 1.1223, "step": 2159 }, { "epoch": 0.54, "learning_rate": 9.157184799795889e-06, "loss": 1.0762, "step": 2160 }, { "epoch": 0.54, "learning_rate": 9.149100329097408e-06, "loss": 1.0802, "step": 2161 }, { "epoch": 0.54, "learning_rate": 9.14101641855285e-06, "loss": 1.082, "step": 2162 }, { "epoch": 0.54, "learning_rate": 9.13293307348391e-06, "loss": 1.0933, "step": 2163 }, { "epoch": 0.54, "learning_rate": 9.124850299211923e-06, "loss": 1.1122, "step": 2164 }, { "epoch": 0.54, "learning_rate": 9.116768101057844e-06, "loss": 1.0691, "step": 2165 }, { "epoch": 0.54, "learning_rate": 9.108686484342241e-06, "loss": 1.0712, "step": 2166 }, { "epoch": 0.54, "learning_rate": 9.100605454385313e-06, "loss": 1.0658, "step": 2167 }, { "epoch": 0.54, "learning_rate": 9.092525016506858e-06, "loss": 1.109, "step": 2168 }, { "epoch": 0.54, "learning_rate": 9.084445176026298e-06, "loss": 1.0896, "step": 2169 }, { "epoch": 0.54, "learning_rate": 9.07636593826265e-06, "loss": 1.0543, "step": 2170 }, { "epoch": 0.54, "learning_rate": 9.068287308534543e-06, "loss": 1.0892, "step": 2171 }, { "epoch": 0.54, "learning_rate": 9.060209292160203e-06, "loss": 1.0738, "step": 2172 }, { "epoch": 0.54, "learning_rate": 9.052131894457445e-06, "loss": 1.0576, "step": 2173 }, { "epoch": 0.54, "learning_rate": 9.044055120743695e-06, "loss": 1.0475, "step": 2174 }, { "epoch": 0.54, "learning_rate": 9.035978976335941e-06, "loss": 1.1169, "step": 2175 }, { "epoch": 0.54, "learning_rate": 9.02790346655079e-06, "loss": 1.1075, "step": 2176 }, { "epoch": 0.55, "learning_rate": 9.019828596704394e-06, "loss": 1.0816, "step": 2177 }, { "epoch": 0.55, "learning_rate": 9.011754372112517e-06, "loss": 1.0917, "step": 2178 }, { "epoch": 0.55, "learning_rate": 9.003680798090484e-06, "loss": 1.0978, "step": 2179 }, { "epoch": 0.55, "learning_rate": 8.99560787995318e-06, "loss": 1.1317, "step": 2180 }, { "epoch": 0.55, "learning_rate": 8.987535623015088e-06, "loss": 1.0781, "step": 2181 }, { "epoch": 0.55, "learning_rate": 8.979464032590222e-06, "loss": 1.1077, "step": 2182 }, { "epoch": 0.55, "learning_rate": 8.971393113992182e-06, "loss": 1.1193, "step": 2183 }, { "epoch": 0.55, "learning_rate": 8.963322872534115e-06, "loss": 1.091, "step": 2184 }, { "epoch": 0.55, "learning_rate": 8.95525331352872e-06, "loss": 1.1194, "step": 2185 }, { "epoch": 0.55, "learning_rate": 8.947184442288261e-06, "loss": 1.0549, "step": 2186 }, { "epoch": 0.55, "learning_rate": 8.939116264124528e-06, "loss": 1.0487, "step": 2187 }, { "epoch": 0.55, "learning_rate": 8.931048784348875e-06, "loss": 1.0544, "step": 2188 }, { "epoch": 0.55, "learning_rate": 8.922982008272178e-06, "loss": 1.1051, "step": 2189 }, { "epoch": 0.55, "learning_rate": 8.914915941204869e-06, "loss": 1.146, "step": 2190 }, { "epoch": 0.55, "learning_rate": 8.906850588456895e-06, "loss": 1.0969, "step": 2191 }, { "epoch": 0.55, "learning_rate": 8.898785955337744e-06, "loss": 1.067, "step": 2192 }, { "epoch": 0.55, "learning_rate": 8.890722047156428e-06, "loss": 1.0395, "step": 2193 }, { "epoch": 0.55, "learning_rate": 8.882658869221479e-06, "loss": 1.0795, "step": 2194 }, { "epoch": 0.55, "learning_rate": 8.874596426840953e-06, "loss": 1.0966, "step": 2195 }, { "epoch": 0.55, "learning_rate": 8.866534725322416e-06, "loss": 1.0768, "step": 2196 }, { "epoch": 0.55, "learning_rate": 8.858473769972954e-06, "loss": 1.102, "step": 2197 }, { "epoch": 0.55, "learning_rate": 8.850413566099147e-06, "loss": 1.1444, "step": 2198 }, { "epoch": 0.55, "learning_rate": 8.8423541190071e-06, "loss": 1.0793, "step": 2199 }, { "epoch": 0.55, "learning_rate": 8.83429543400241e-06, "loss": 1.1079, "step": 2200 }, { "epoch": 0.55, "learning_rate": 8.826237516390165e-06, "loss": 1.0925, "step": 2201 }, { "epoch": 0.55, "learning_rate": 8.818180371474964e-06, "loss": 1.0631, "step": 2202 }, { "epoch": 0.55, "learning_rate": 8.810124004560882e-06, "loss": 1.0679, "step": 2203 }, { "epoch": 0.55, "learning_rate": 8.802068420951494e-06, "loss": 1.131, "step": 2204 }, { "epoch": 0.55, "learning_rate": 8.794013625949848e-06, "loss": 1.1236, "step": 2205 }, { "epoch": 0.55, "learning_rate": 8.785959624858482e-06, "loss": 1.0709, "step": 2206 }, { "epoch": 0.55, "learning_rate": 8.777906422979408e-06, "loss": 1.0688, "step": 2207 }, { "epoch": 0.55, "learning_rate": 8.76985402561411e-06, "loss": 1.0594, "step": 2208 }, { "epoch": 0.55, "learning_rate": 8.76180243806355e-06, "loss": 1.075, "step": 2209 }, { "epoch": 0.55, "learning_rate": 8.753751665628141e-06, "loss": 1.0615, "step": 2210 }, { "epoch": 0.55, "learning_rate": 8.74570171360778e-06, "loss": 1.0732, "step": 2211 }, { "epoch": 0.55, "learning_rate": 8.737652587301807e-06, "loss": 1.0773, "step": 2212 }, { "epoch": 0.55, "learning_rate": 8.729604292009026e-06, "loss": 1.0653, "step": 2213 }, { "epoch": 0.55, "learning_rate": 8.721556833027696e-06, "loss": 1.0715, "step": 2214 }, { "epoch": 0.55, "learning_rate": 8.713510215655518e-06, "loss": 1.0934, "step": 2215 }, { "epoch": 0.55, "learning_rate": 8.705464445189648e-06, "loss": 1.0961, "step": 2216 }, { "epoch": 0.56, "learning_rate": 8.697419526926673e-06, "loss": 1.0383, "step": 2217 }, { "epoch": 0.56, "learning_rate": 8.689375466162634e-06, "loss": 1.0988, "step": 2218 }, { "epoch": 0.56, "learning_rate": 8.681332268192991e-06, "loss": 1.1591, "step": 2219 }, { "epoch": 0.56, "learning_rate": 8.67328993831265e-06, "loss": 1.095, "step": 2220 }, { "epoch": 0.56, "learning_rate": 8.665248481815941e-06, "loss": 1.0793, "step": 2221 }, { "epoch": 0.56, "learning_rate": 8.657207903996609e-06, "loss": 1.1088, "step": 2222 }, { "epoch": 0.56, "learning_rate": 8.649168210147842e-06, "loss": 1.1038, "step": 2223 }, { "epoch": 0.56, "learning_rate": 8.641129405562221e-06, "loss": 1.113, "step": 2224 }, { "epoch": 0.56, "learning_rate": 8.633091495531763e-06, "loss": 1.1177, "step": 2225 }, { "epoch": 0.56, "learning_rate": 8.625054485347882e-06, "loss": 1.1366, "step": 2226 }, { "epoch": 0.56, "learning_rate": 8.617018380301405e-06, "loss": 1.1198, "step": 2227 }, { "epoch": 0.56, "learning_rate": 8.608983185682568e-06, "loss": 1.0582, "step": 2228 }, { "epoch": 0.56, "learning_rate": 8.600948906780999e-06, "loss": 1.0971, "step": 2229 }, { "epoch": 0.56, "learning_rate": 8.592915548885727e-06, "loss": 1.0654, "step": 2230 }, { "epoch": 0.56, "learning_rate": 8.584883117285173e-06, "loss": 1.1058, "step": 2231 }, { "epoch": 0.56, "learning_rate": 8.576851617267151e-06, "loss": 1.0846, "step": 2232 }, { "epoch": 0.56, "learning_rate": 8.568821054118859e-06, "loss": 1.0954, "step": 2233 }, { "epoch": 0.56, "learning_rate": 8.560791433126882e-06, "loss": 1.0897, "step": 2234 }, { "epoch": 0.56, "learning_rate": 8.552762759577183e-06, "loss": 1.1011, "step": 2235 }, { "epoch": 0.56, "learning_rate": 8.544735038755097e-06, "loss": 1.1087, "step": 2236 }, { "epoch": 0.56, "learning_rate": 8.536708275945341e-06, "loss": 1.0966, "step": 2237 }, { "epoch": 0.56, "learning_rate": 8.528682476431988e-06, "loss": 1.0447, "step": 2238 }, { "epoch": 0.56, "learning_rate": 8.520657645498495e-06, "loss": 1.084, "step": 2239 }, { "epoch": 0.56, "learning_rate": 8.51263378842766e-06, "loss": 1.1372, "step": 2240 }, { "epoch": 0.56, "learning_rate": 8.50461091050166e-06, "loss": 1.1271, "step": 2241 }, { "epoch": 0.56, "learning_rate": 8.49658901700201e-06, "loss": 1.0338, "step": 2242 }, { "epoch": 0.56, "learning_rate": 8.488568113209593e-06, "loss": 1.0972, "step": 2243 }, { "epoch": 0.56, "learning_rate": 8.480548204404633e-06, "loss": 1.0759, "step": 2244 }, { "epoch": 0.56, "learning_rate": 8.472529295866689e-06, "loss": 1.0856, "step": 2245 }, { "epoch": 0.56, "learning_rate": 8.464511392874686e-06, "loss": 1.1099, "step": 2246 }, { "epoch": 0.56, "learning_rate": 8.456494500706859e-06, "loss": 1.0679, "step": 2247 }, { "epoch": 0.56, "learning_rate": 8.448478624640798e-06, "loss": 1.0952, "step": 2248 }, { "epoch": 0.56, "learning_rate": 8.440463769953414e-06, "loss": 1.0898, "step": 2249 }, { "epoch": 0.56, "learning_rate": 8.432449941920949e-06, "loss": 1.0816, "step": 2250 }, { "epoch": 0.56, "learning_rate": 8.424437145818973e-06, "loss": 1.1288, "step": 2251 }, { "epoch": 0.56, "learning_rate": 8.416425386922366e-06, "loss": 1.0994, "step": 2252 }, { "epoch": 0.56, "learning_rate": 8.408414670505336e-06, "loss": 1.0734, "step": 2253 }, { "epoch": 0.56, "learning_rate": 8.4004050018414e-06, "loss": 1.0906, "step": 2254 }, { "epoch": 0.56, "learning_rate": 8.392396386203385e-06, "loss": 1.118, "step": 2255 }, { "epoch": 0.57, "learning_rate": 8.384388828863424e-06, "loss": 1.0864, "step": 2256 }, { "epoch": 0.57, "learning_rate": 8.376382335092955e-06, "loss": 1.0773, "step": 2257 }, { "epoch": 0.57, "learning_rate": 8.368376910162718e-06, "loss": 1.0756, "step": 2258 }, { "epoch": 0.57, "learning_rate": 8.360372559342746e-06, "loss": 1.1031, "step": 2259 }, { "epoch": 0.57, "learning_rate": 8.352369287902367e-06, "loss": 1.1107, "step": 2260 }, { "epoch": 0.57, "learning_rate": 8.344367101110193e-06, "loss": 1.1027, "step": 2261 }, { "epoch": 0.57, "learning_rate": 8.336366004234134e-06, "loss": 1.1255, "step": 2262 }, { "epoch": 0.57, "learning_rate": 8.328366002541362e-06, "loss": 1.1115, "step": 2263 }, { "epoch": 0.57, "learning_rate": 8.320367101298351e-06, "loss": 1.1115, "step": 2264 }, { "epoch": 0.57, "learning_rate": 8.312369305770843e-06, "loss": 1.1097, "step": 2265 }, { "epoch": 0.57, "learning_rate": 8.304372621223838e-06, "loss": 1.106, "step": 2266 }, { "epoch": 0.57, "learning_rate": 8.296377052921629e-06, "loss": 1.0897, "step": 2267 }, { "epoch": 0.57, "learning_rate": 8.28838260612775e-06, "loss": 1.1248, "step": 2268 }, { "epoch": 0.57, "learning_rate": 8.280389286105013e-06, "loss": 1.0786, "step": 2269 }, { "epoch": 0.57, "learning_rate": 8.272397098115483e-06, "loss": 1.032, "step": 2270 }, { "epoch": 0.57, "learning_rate": 8.26440604742048e-06, "loss": 1.091, "step": 2271 }, { "epoch": 0.57, "learning_rate": 8.256416139280577e-06, "loss": 1.1011, "step": 2272 }, { "epoch": 0.57, "learning_rate": 8.24842737895559e-06, "loss": 1.0319, "step": 2273 }, { "epoch": 0.57, "learning_rate": 8.240439771704584e-06, "loss": 1.0864, "step": 2274 }, { "epoch": 0.57, "learning_rate": 8.232453322785863e-06, "loss": 1.1094, "step": 2275 }, { "epoch": 0.57, "learning_rate": 8.224468037456969e-06, "loss": 1.1066, "step": 2276 }, { "epoch": 0.57, "learning_rate": 8.216483920974678e-06, "loss": 1.0711, "step": 2277 }, { "epoch": 0.57, "learning_rate": 8.208500978594995e-06, "loss": 1.0814, "step": 2278 }, { "epoch": 0.57, "learning_rate": 8.200519215573159e-06, "loss": 1.1124, "step": 2279 }, { "epoch": 0.57, "learning_rate": 8.19253863716362e-06, "loss": 1.0941, "step": 2280 }, { "epoch": 0.57, "learning_rate": 8.184559248620061e-06, "loss": 1.0919, "step": 2281 }, { "epoch": 0.57, "learning_rate": 8.176581055195371e-06, "loss": 1.0475, "step": 2282 }, { "epoch": 0.57, "learning_rate": 8.168604062141663e-06, "loss": 1.0839, "step": 2283 }, { "epoch": 0.57, "learning_rate": 8.160628274710247e-06, "loss": 1.0724, "step": 2284 }, { "epoch": 0.57, "learning_rate": 8.15265369815165e-06, "loss": 1.1124, "step": 2285 }, { "epoch": 0.57, "learning_rate": 8.144680337715604e-06, "loss": 1.0577, "step": 2286 }, { "epoch": 0.57, "learning_rate": 8.136708198651022e-06, "loss": 1.0773, "step": 2287 }, { "epoch": 0.57, "learning_rate": 8.128737286206038e-06, "loss": 1.0749, "step": 2288 }, { "epoch": 0.57, "learning_rate": 8.120767605627958e-06, "loss": 1.102, "step": 2289 }, { "epoch": 0.57, "learning_rate": 8.112799162163289e-06, "loss": 1.0692, "step": 2290 }, { "epoch": 0.57, "learning_rate": 8.104831961057718e-06, "loss": 1.1424, "step": 2291 }, { "epoch": 0.57, "learning_rate": 8.096866007556112e-06, "loss": 1.0808, "step": 2292 }, { "epoch": 0.57, "learning_rate": 8.088901306902533e-06, "loss": 1.0784, "step": 2293 }, { "epoch": 0.57, "learning_rate": 8.080937864340194e-06, "loss": 1.0639, "step": 2294 }, { "epoch": 0.57, "learning_rate": 8.072975685111497e-06, "loss": 1.0697, "step": 2295 }, { "epoch": 0.58, "learning_rate": 8.065014774458004e-06, "loss": 1.0859, "step": 2296 }, { "epoch": 0.58, "learning_rate": 8.057055137620448e-06, "loss": 1.0591, "step": 2297 }, { "epoch": 0.58, "learning_rate": 8.04909677983872e-06, "loss": 1.0836, "step": 2298 }, { "epoch": 0.58, "learning_rate": 8.041139706351864e-06, "loss": 1.0983, "step": 2299 }, { "epoch": 0.58, "learning_rate": 8.033183922398091e-06, "loss": 1.0592, "step": 2300 }, { "epoch": 0.58, "learning_rate": 8.025229433214752e-06, "loss": 1.0592, "step": 2301 }, { "epoch": 0.58, "learning_rate": 8.01727624403835e-06, "loss": 1.094, "step": 2302 }, { "epoch": 0.58, "learning_rate": 8.009324360104533e-06, "loss": 1.0727, "step": 2303 }, { "epoch": 0.58, "learning_rate": 8.001373786648091e-06, "loss": 1.0647, "step": 2304 }, { "epoch": 0.58, "learning_rate": 7.993424528902938e-06, "loss": 1.0975, "step": 2305 }, { "epoch": 0.58, "learning_rate": 7.985476592102142e-06, "loss": 1.1031, "step": 2306 }, { "epoch": 0.58, "learning_rate": 7.977529981477893e-06, "loss": 1.0748, "step": 2307 }, { "epoch": 0.58, "learning_rate": 7.969584702261503e-06, "loss": 1.0958, "step": 2308 }, { "epoch": 0.58, "learning_rate": 7.961640759683416e-06, "loss": 1.0838, "step": 2309 }, { "epoch": 0.58, "learning_rate": 7.953698158973182e-06, "loss": 1.0815, "step": 2310 }, { "epoch": 0.58, "learning_rate": 7.945756905359491e-06, "loss": 1.1285, "step": 2311 }, { "epoch": 0.58, "learning_rate": 7.93781700407012e-06, "loss": 1.0696, "step": 2312 }, { "epoch": 0.58, "learning_rate": 7.929878460331973e-06, "loss": 1.07, "step": 2313 }, { "epoch": 0.58, "learning_rate": 7.92194127937106e-06, "loss": 1.1239, "step": 2314 }, { "epoch": 0.58, "learning_rate": 7.914005466412481e-06, "loss": 1.0846, "step": 2315 }, { "epoch": 0.58, "learning_rate": 7.906071026680451e-06, "loss": 1.0838, "step": 2316 }, { "epoch": 0.58, "learning_rate": 7.898137965398271e-06, "loss": 1.0864, "step": 2317 }, { "epoch": 0.58, "learning_rate": 7.890206287788337e-06, "loss": 1.0971, "step": 2318 }, { "epoch": 0.58, "learning_rate": 7.882275999072133e-06, "loss": 1.0544, "step": 2319 }, { "epoch": 0.58, "learning_rate": 7.874347104470234e-06, "loss": 1.0725, "step": 2320 }, { "epoch": 0.58, "learning_rate": 7.866419609202293e-06, "loss": 1.0904, "step": 2321 }, { "epoch": 0.58, "learning_rate": 7.858493518487038e-06, "loss": 1.1176, "step": 2322 }, { "epoch": 0.58, "learning_rate": 7.850568837542282e-06, "loss": 1.0656, "step": 2323 }, { "epoch": 0.58, "learning_rate": 7.842645571584899e-06, "loss": 1.0681, "step": 2324 }, { "epoch": 0.58, "learning_rate": 7.834723725830845e-06, "loss": 1.0977, "step": 2325 }, { "epoch": 0.58, "learning_rate": 7.826803305495124e-06, "loss": 1.0863, "step": 2326 }, { "epoch": 0.58, "learning_rate": 7.818884315791815e-06, "loss": 1.0513, "step": 2327 }, { "epoch": 0.58, "learning_rate": 7.810966761934053e-06, "loss": 1.076, "step": 2328 }, { "epoch": 0.58, "learning_rate": 7.803050649134023e-06, "loss": 1.1174, "step": 2329 }, { "epoch": 0.58, "learning_rate": 7.795135982602968e-06, "loss": 1.0781, "step": 2330 }, { "epoch": 0.58, "learning_rate": 7.787222767551164e-06, "loss": 1.0612, "step": 2331 }, { "epoch": 0.58, "learning_rate": 7.779311009187958e-06, "loss": 1.1113, "step": 2332 }, { "epoch": 0.58, "learning_rate": 7.771400712721711e-06, "loss": 1.0501, "step": 2333 }, { "epoch": 0.58, "learning_rate": 7.763491883359835e-06, "loss": 1.0632, "step": 2334 }, { "epoch": 0.58, "learning_rate": 7.755584526308782e-06, "loss": 1.1147, "step": 2335 }, { "epoch": 0.59, "learning_rate": 7.747678646774018e-06, "loss": 1.0484, "step": 2336 }, { "epoch": 0.59, "learning_rate": 7.73977424996005e-06, "loss": 1.1018, "step": 2337 }, { "epoch": 0.59, "learning_rate": 7.731871341070402e-06, "loss": 1.0613, "step": 2338 }, { "epoch": 0.59, "learning_rate": 7.723969925307624e-06, "loss": 1.0512, "step": 2339 }, { "epoch": 0.59, "learning_rate": 7.716070007873275e-06, "loss": 1.1365, "step": 2340 }, { "epoch": 0.59, "learning_rate": 7.708171593967935e-06, "loss": 1.0907, "step": 2341 }, { "epoch": 0.59, "learning_rate": 7.700274688791196e-06, "loss": 1.0636, "step": 2342 }, { "epoch": 0.59, "learning_rate": 7.692379297541646e-06, "loss": 1.1126, "step": 2343 }, { "epoch": 0.59, "learning_rate": 7.684485425416888e-06, "loss": 1.0932, "step": 2344 }, { "epoch": 0.59, "learning_rate": 7.676593077613515e-06, "loss": 1.0675, "step": 2345 }, { "epoch": 0.59, "learning_rate": 7.668702259327128e-06, "loss": 1.0998, "step": 2346 }, { "epoch": 0.59, "learning_rate": 7.660812975752305e-06, "loss": 1.0589, "step": 2347 }, { "epoch": 0.59, "learning_rate": 7.65292523208263e-06, "loss": 1.0523, "step": 2348 }, { "epoch": 0.59, "learning_rate": 7.645039033510669e-06, "loss": 1.1024, "step": 2349 }, { "epoch": 0.59, "learning_rate": 7.637154385227961e-06, "loss": 1.093, "step": 2350 }, { "epoch": 0.59, "learning_rate": 7.62927129242504e-06, "loss": 1.0816, "step": 2351 }, { "epoch": 0.59, "learning_rate": 7.621389760291397e-06, "loss": 1.0858, "step": 2352 }, { "epoch": 0.59, "learning_rate": 7.613509794015517e-06, "loss": 1.0888, "step": 2353 }, { "epoch": 0.59, "learning_rate": 7.6056313987848364e-06, "loss": 1.1166, "step": 2354 }, { "epoch": 0.59, "learning_rate": 7.597754579785764e-06, "loss": 1.0663, "step": 2355 }, { "epoch": 0.59, "learning_rate": 7.589879342203681e-06, "loss": 1.1019, "step": 2356 }, { "epoch": 0.59, "learning_rate": 7.582005691222904e-06, "loss": 1.0587, "step": 2357 }, { "epoch": 0.59, "learning_rate": 7.574133632026734e-06, "loss": 1.0755, "step": 2358 }, { "epoch": 0.59, "learning_rate": 7.566263169797399e-06, "loss": 1.0781, "step": 2359 }, { "epoch": 0.59, "learning_rate": 7.558394309716088e-06, "loss": 1.1208, "step": 2360 }, { "epoch": 0.59, "learning_rate": 7.550527056962934e-06, "loss": 1.0522, "step": 2361 }, { "epoch": 0.59, "learning_rate": 7.542661416717013e-06, "loss": 1.0698, "step": 2362 }, { "epoch": 0.59, "learning_rate": 7.534797394156333e-06, "loss": 1.0566, "step": 2363 }, { "epoch": 0.59, "learning_rate": 7.5269349944578454e-06, "loss": 1.0787, "step": 2364 }, { "epoch": 0.59, "learning_rate": 7.51907422279743e-06, "loss": 1.123, "step": 2365 }, { "epoch": 0.59, "learning_rate": 7.511215084349891e-06, "loss": 1.1077, "step": 2366 }, { "epoch": 0.59, "learning_rate": 7.503357584288965e-06, "loss": 1.0389, "step": 2367 }, { "epoch": 0.59, "learning_rate": 7.495501727787302e-06, "loss": 1.0708, "step": 2368 }, { "epoch": 0.59, "learning_rate": 7.487647520016479e-06, "loss": 1.0711, "step": 2369 }, { "epoch": 0.59, "learning_rate": 7.4797949661469735e-06, "loss": 1.0587, "step": 2370 }, { "epoch": 0.59, "learning_rate": 7.47194407134819e-06, "loss": 1.0589, "step": 2371 }, { "epoch": 0.59, "learning_rate": 7.464094840788435e-06, "loss": 1.0685, "step": 2372 }, { "epoch": 0.59, "learning_rate": 7.456247279634915e-06, "loss": 1.0426, "step": 2373 }, { "epoch": 0.59, "learning_rate": 7.448401393053746e-06, "loss": 1.0952, "step": 2374 }, { "epoch": 0.59, "learning_rate": 7.440557186209927e-06, "loss": 1.1322, "step": 2375 }, { "epoch": 0.6, "learning_rate": 7.432714664267373e-06, "loss": 1.1085, "step": 2376 }, { "epoch": 0.6, "learning_rate": 7.424873832388866e-06, "loss": 1.0663, "step": 2377 }, { "epoch": 0.6, "learning_rate": 7.417034695736092e-06, "loss": 1.0656, "step": 2378 }, { "epoch": 0.6, "learning_rate": 7.409197259469623e-06, "loss": 1.0306, "step": 2379 }, { "epoch": 0.6, "learning_rate": 7.401361528748892e-06, "loss": 1.0828, "step": 2380 }, { "epoch": 0.6, "learning_rate": 7.393527508732232e-06, "loss": 1.1053, "step": 2381 }, { "epoch": 0.6, "learning_rate": 7.385695204576832e-06, "loss": 1.094, "step": 2382 }, { "epoch": 0.6, "learning_rate": 7.377864621438769e-06, "loss": 1.0632, "step": 2383 }, { "epoch": 0.6, "learning_rate": 7.370035764472966e-06, "loss": 1.0858, "step": 2384 }, { "epoch": 0.6, "learning_rate": 7.362208638833228e-06, "loss": 1.0937, "step": 2385 }, { "epoch": 0.6, "learning_rate": 7.354383249672212e-06, "loss": 1.0719, "step": 2386 }, { "epoch": 0.6, "learning_rate": 7.346559602141431e-06, "loss": 1.0372, "step": 2387 }, { "epoch": 0.6, "learning_rate": 7.338737701391256e-06, "loss": 1.1202, "step": 2388 }, { "epoch": 0.6, "learning_rate": 7.330917552570904e-06, "loss": 1.1066, "step": 2389 }, { "epoch": 0.6, "learning_rate": 7.323099160828442e-06, "loss": 1.0713, "step": 2390 }, { "epoch": 0.6, "learning_rate": 7.315282531310777e-06, "loss": 1.0915, "step": 2391 }, { "epoch": 0.6, "learning_rate": 7.307467669163655e-06, "loss": 1.0631, "step": 2392 }, { "epoch": 0.6, "learning_rate": 7.299654579531667e-06, "loss": 1.1114, "step": 2393 }, { "epoch": 0.6, "learning_rate": 7.291843267558225e-06, "loss": 1.08, "step": 2394 }, { "epoch": 0.6, "learning_rate": 7.284033738385584e-06, "loss": 1.0741, "step": 2395 }, { "epoch": 0.6, "learning_rate": 7.276225997154808e-06, "loss": 1.0564, "step": 2396 }, { "epoch": 0.6, "learning_rate": 7.268420049005806e-06, "loss": 1.1027, "step": 2397 }, { "epoch": 0.6, "learning_rate": 7.260615899077286e-06, "loss": 1.0739, "step": 2398 }, { "epoch": 0.6, "learning_rate": 7.25281355250678e-06, "loss": 1.0937, "step": 2399 }, { "epoch": 0.6, "learning_rate": 7.245013014430645e-06, "loss": 1.0729, "step": 2400 }, { "epoch": 0.6, "learning_rate": 7.237214289984025e-06, "loss": 1.108, "step": 2401 }, { "epoch": 0.6, "learning_rate": 7.229417384300888e-06, "loss": 1.0824, "step": 2402 }, { "epoch": 0.6, "learning_rate": 7.221622302513994e-06, "loss": 1.098, "step": 2403 }, { "epoch": 0.6, "learning_rate": 7.21382904975491e-06, "loss": 1.0668, "step": 2404 }, { "epoch": 0.6, "learning_rate": 7.2060376311539905e-06, "loss": 1.0661, "step": 2405 }, { "epoch": 0.6, "learning_rate": 7.198248051840392e-06, "loss": 1.0758, "step": 2406 }, { "epoch": 0.6, "learning_rate": 7.190460316942055e-06, "loss": 1.095, "step": 2407 }, { "epoch": 0.6, "learning_rate": 7.182674431585703e-06, "loss": 1.1295, "step": 2408 }, { "epoch": 0.6, "learning_rate": 7.174890400896851e-06, "loss": 1.0779, "step": 2409 }, { "epoch": 0.6, "learning_rate": 7.167108229999782e-06, "loss": 1.0677, "step": 2410 }, { "epoch": 0.6, "learning_rate": 7.1593279240175635e-06, "loss": 1.0898, "step": 2411 }, { "epoch": 0.6, "learning_rate": 7.151549488072029e-06, "loss": 1.0718, "step": 2412 }, { "epoch": 0.6, "learning_rate": 7.1437729272837855e-06, "loss": 1.0755, "step": 2413 }, { "epoch": 0.6, "learning_rate": 7.135998246772207e-06, "loss": 1.063, "step": 2414 }, { "epoch": 0.6, "learning_rate": 7.12822545165542e-06, "loss": 1.0741, "step": 2415 }, { "epoch": 0.61, "learning_rate": 7.120454547050324e-06, "loss": 1.0958, "step": 2416 }, { "epoch": 0.61, "learning_rate": 7.112685538072557e-06, "loss": 1.0847, "step": 2417 }, { "epoch": 0.61, "learning_rate": 7.104918429836529e-06, "loss": 1.1205, "step": 2418 }, { "epoch": 0.61, "learning_rate": 7.097153227455379e-06, "loss": 1.0923, "step": 2419 }, { "epoch": 0.61, "learning_rate": 7.0893899360410025e-06, "loss": 1.0735, "step": 2420 }, { "epoch": 0.61, "learning_rate": 7.081628560704044e-06, "loss": 1.1153, "step": 2421 }, { "epoch": 0.61, "learning_rate": 7.0738691065538635e-06, "loss": 1.0814, "step": 2422 }, { "epoch": 0.61, "learning_rate": 7.066111578698585e-06, "loss": 1.1035, "step": 2423 }, { "epoch": 0.61, "learning_rate": 7.058355982245038e-06, "loss": 1.0658, "step": 2424 }, { "epoch": 0.61, "learning_rate": 7.050602322298798e-06, "loss": 1.0948, "step": 2425 }, { "epoch": 0.61, "learning_rate": 7.042850603964159e-06, "loss": 1.0924, "step": 2426 }, { "epoch": 0.61, "learning_rate": 7.035100832344137e-06, "loss": 1.0617, "step": 2427 }, { "epoch": 0.61, "learning_rate": 7.027353012540472e-06, "loss": 1.0995, "step": 2428 }, { "epoch": 0.61, "learning_rate": 7.019607149653606e-06, "loss": 1.085, "step": 2429 }, { "epoch": 0.61, "learning_rate": 7.011863248782711e-06, "loss": 1.0771, "step": 2430 }, { "epoch": 0.61, "learning_rate": 7.004121315025651e-06, "loss": 1.0701, "step": 2431 }, { "epoch": 0.61, "learning_rate": 6.996381353479004e-06, "loss": 1.0337, "step": 2432 }, { "epoch": 0.61, "learning_rate": 6.988643369238044e-06, "loss": 1.1223, "step": 2433 }, { "epoch": 0.61, "learning_rate": 6.980907367396752e-06, "loss": 1.1072, "step": 2434 }, { "epoch": 0.61, "learning_rate": 6.973173353047795e-06, "loss": 1.0988, "step": 2435 }, { "epoch": 0.61, "learning_rate": 6.965441331282537e-06, "loss": 1.0547, "step": 2436 }, { "epoch": 0.61, "learning_rate": 6.957711307191029e-06, "loss": 1.0868, "step": 2437 }, { "epoch": 0.61, "learning_rate": 6.949983285862003e-06, "loss": 1.0769, "step": 2438 }, { "epoch": 0.61, "learning_rate": 6.942257272382882e-06, "loss": 1.0351, "step": 2439 }, { "epoch": 0.61, "learning_rate": 6.934533271839751e-06, "loss": 1.0266, "step": 2440 }, { "epoch": 0.61, "learning_rate": 6.92681128931739e-06, "loss": 1.0894, "step": 2441 }, { "epoch": 0.61, "learning_rate": 6.91909132989924e-06, "loss": 1.0984, "step": 2442 }, { "epoch": 0.61, "learning_rate": 6.9113733986674025e-06, "loss": 1.0749, "step": 2443 }, { "epoch": 0.61, "learning_rate": 6.903657500702663e-06, "loss": 1.0837, "step": 2444 }, { "epoch": 0.61, "learning_rate": 6.895943641084448e-06, "loss": 1.0913, "step": 2445 }, { "epoch": 0.61, "learning_rate": 6.888231824890858e-06, "loss": 1.1126, "step": 2446 }, { "epoch": 0.61, "learning_rate": 6.880522057198639e-06, "loss": 1.0725, "step": 2447 }, { "epoch": 0.61, "learning_rate": 6.872814343083192e-06, "loss": 1.0747, "step": 2448 }, { "epoch": 0.61, "learning_rate": 6.8651086876185666e-06, "loss": 1.079, "step": 2449 }, { "epoch": 0.61, "learning_rate": 6.857405095877454e-06, "loss": 1.0913, "step": 2450 }, { "epoch": 0.61, "learning_rate": 6.849703572931192e-06, "loss": 1.0709, "step": 2451 }, { "epoch": 0.61, "learning_rate": 6.8420041238497525e-06, "loss": 1.0518, "step": 2452 }, { "epoch": 0.61, "learning_rate": 6.834306753701742e-06, "loss": 1.0703, "step": 2453 }, { "epoch": 0.61, "learning_rate": 6.826611467554399e-06, "loss": 1.1097, "step": 2454 }, { "epoch": 0.61, "learning_rate": 6.818918270473593e-06, "loss": 1.0679, "step": 2455 }, { "epoch": 0.62, "learning_rate": 6.8112271675238154e-06, "loss": 1.0724, "step": 2456 }, { "epoch": 0.62, "learning_rate": 6.803538163768177e-06, "loss": 1.0598, "step": 2457 }, { "epoch": 0.62, "learning_rate": 6.7958512642684136e-06, "loss": 1.0492, "step": 2458 }, { "epoch": 0.62, "learning_rate": 6.788166474084867e-06, "loss": 1.1001, "step": 2459 }, { "epoch": 0.62, "learning_rate": 6.780483798276502e-06, "loss": 1.0771, "step": 2460 }, { "epoch": 0.62, "learning_rate": 6.772803241900871e-06, "loss": 1.0841, "step": 2461 }, { "epoch": 0.62, "learning_rate": 6.7651248100141554e-06, "loss": 1.0873, "step": 2462 }, { "epoch": 0.62, "learning_rate": 6.7574485076711285e-06, "loss": 1.0711, "step": 2463 }, { "epoch": 0.62, "learning_rate": 6.749774339925149e-06, "loss": 1.0536, "step": 2464 }, { "epoch": 0.62, "learning_rate": 6.742102311828196e-06, "loss": 1.051, "step": 2465 }, { "epoch": 0.62, "learning_rate": 6.7344324284308126e-06, "loss": 1.117, "step": 2466 }, { "epoch": 0.62, "learning_rate": 6.726764694782152e-06, "loss": 1.0977, "step": 2467 }, { "epoch": 0.62, "learning_rate": 6.719099115929938e-06, "loss": 1.1125, "step": 2468 }, { "epoch": 0.62, "learning_rate": 6.7114356969204806e-06, "loss": 1.0315, "step": 2469 }, { "epoch": 0.62, "learning_rate": 6.703774442798679e-06, "loss": 1.0587, "step": 2470 }, { "epoch": 0.62, "learning_rate": 6.696115358607984e-06, "loss": 1.1424, "step": 2471 }, { "epoch": 0.62, "learning_rate": 6.688458449390438e-06, "loss": 1.0693, "step": 2472 }, { "epoch": 0.62, "learning_rate": 6.680803720186639e-06, "loss": 1.1025, "step": 2473 }, { "epoch": 0.62, "learning_rate": 6.673151176035762e-06, "loss": 1.0243, "step": 2474 }, { "epoch": 0.62, "learning_rate": 6.665500821975532e-06, "loss": 1.1024, "step": 2475 }, { "epoch": 0.62, "learning_rate": 6.657852663042237e-06, "loss": 1.1374, "step": 2476 }, { "epoch": 0.62, "learning_rate": 6.6502067042707245e-06, "loss": 1.1255, "step": 2477 }, { "epoch": 0.62, "learning_rate": 6.642562950694383e-06, "loss": 1.0999, "step": 2478 }, { "epoch": 0.62, "learning_rate": 6.634921407345159e-06, "loss": 1.0968, "step": 2479 }, { "epoch": 0.62, "learning_rate": 6.627282079253539e-06, "loss": 1.0981, "step": 2480 }, { "epoch": 0.62, "learning_rate": 6.619644971448558e-06, "loss": 1.072, "step": 2481 }, { "epoch": 0.62, "learning_rate": 6.6120100889577695e-06, "loss": 1.1486, "step": 2482 }, { "epoch": 0.62, "learning_rate": 6.604377436807296e-06, "loss": 1.0525, "step": 2483 }, { "epoch": 0.62, "learning_rate": 6.596747020021754e-06, "loss": 1.0716, "step": 2484 }, { "epoch": 0.62, "learning_rate": 6.589118843624316e-06, "loss": 1.1034, "step": 2485 }, { "epoch": 0.62, "learning_rate": 6.581492912636676e-06, "loss": 1.0833, "step": 2486 }, { "epoch": 0.62, "learning_rate": 6.573869232079029e-06, "loss": 1.1214, "step": 2487 }, { "epoch": 0.62, "learning_rate": 6.566247806970119e-06, "loss": 1.0423, "step": 2488 }, { "epoch": 0.62, "learning_rate": 6.558628642327177e-06, "loss": 1.0792, "step": 2489 }, { "epoch": 0.62, "learning_rate": 6.551011743165967e-06, "loss": 1.0884, "step": 2490 }, { "epoch": 0.62, "learning_rate": 6.54339711450075e-06, "loss": 1.0884, "step": 2491 }, { "epoch": 0.62, "learning_rate": 6.535784761344297e-06, "loss": 1.0625, "step": 2492 }, { "epoch": 0.62, "learning_rate": 6.52817468870788e-06, "loss": 1.1079, "step": 2493 }, { "epoch": 0.62, "learning_rate": 6.520566901601269e-06, "loss": 1.057, "step": 2494 }, { "epoch": 0.62, "learning_rate": 6.512961405032729e-06, "loss": 1.0954, "step": 2495 }, { "epoch": 0.63, "learning_rate": 6.505358204009018e-06, "loss": 1.0792, "step": 2496 }, { "epoch": 0.63, "learning_rate": 6.4977573035353855e-06, "loss": 1.0628, "step": 2497 }, { "epoch": 0.63, "learning_rate": 6.490158708615559e-06, "loss": 1.0852, "step": 2498 }, { "epoch": 0.63, "learning_rate": 6.482562424251757e-06, "loss": 1.1031, "step": 2499 }, { "epoch": 0.63, "learning_rate": 6.474968455444672e-06, "loss": 1.096, "step": 2500 }, { "epoch": 0.63, "learning_rate": 6.467376807193473e-06, "loss": 1.1185, "step": 2501 }, { "epoch": 0.63, "learning_rate": 6.459787484495803e-06, "loss": 1.1016, "step": 2502 }, { "epoch": 0.63, "learning_rate": 6.45220049234777e-06, "loss": 1.0975, "step": 2503 }, { "epoch": 0.63, "learning_rate": 6.444615835743955e-06, "loss": 1.0746, "step": 2504 }, { "epoch": 0.63, "learning_rate": 6.437033519677387e-06, "loss": 1.0877, "step": 2505 }, { "epoch": 0.63, "learning_rate": 6.429453549139572e-06, "loss": 1.0534, "step": 2506 }, { "epoch": 0.63, "learning_rate": 6.421875929120469e-06, "loss": 1.0763, "step": 2507 }, { "epoch": 0.63, "learning_rate": 6.414300664608471e-06, "loss": 1.0996, "step": 2508 }, { "epoch": 0.63, "learning_rate": 6.406727760590445e-06, "loss": 1.0761, "step": 2509 }, { "epoch": 0.63, "learning_rate": 6.399157222051688e-06, "loss": 1.0624, "step": 2510 }, { "epoch": 0.63, "learning_rate": 6.391589053975946e-06, "loss": 1.0534, "step": 2511 }, { "epoch": 0.63, "learning_rate": 6.384023261345402e-06, "loss": 1.0516, "step": 2512 }, { "epoch": 0.63, "learning_rate": 6.37645984914068e-06, "loss": 1.0789, "step": 2513 }, { "epoch": 0.63, "learning_rate": 6.368898822340831e-06, "loss": 1.0425, "step": 2514 }, { "epoch": 0.63, "learning_rate": 6.361340185923337e-06, "loss": 1.082, "step": 2515 }, { "epoch": 0.63, "learning_rate": 6.353783944864111e-06, "loss": 1.0575, "step": 2516 }, { "epoch": 0.63, "learning_rate": 6.346230104137483e-06, "loss": 1.0702, "step": 2517 }, { "epoch": 0.63, "learning_rate": 6.33867866871621e-06, "loss": 1.0757, "step": 2518 }, { "epoch": 0.63, "learning_rate": 6.331129643571452e-06, "loss": 1.0487, "step": 2519 }, { "epoch": 0.63, "learning_rate": 6.323583033672799e-06, "loss": 1.1075, "step": 2520 }, { "epoch": 0.63, "learning_rate": 6.316038843988243e-06, "loss": 1.0515, "step": 2521 }, { "epoch": 0.63, "learning_rate": 6.308497079484179e-06, "loss": 1.0917, "step": 2522 }, { "epoch": 0.63, "learning_rate": 6.300957745125416e-06, "loss": 1.0946, "step": 2523 }, { "epoch": 0.63, "learning_rate": 6.293420845875149e-06, "loss": 1.0556, "step": 2524 }, { "epoch": 0.63, "learning_rate": 6.285886386694988e-06, "loss": 1.0962, "step": 2525 }, { "epoch": 0.63, "learning_rate": 6.278354372544912e-06, "loss": 1.0663, "step": 2526 }, { "epoch": 0.63, "learning_rate": 6.2708248083833175e-06, "loss": 1.0636, "step": 2527 }, { "epoch": 0.63, "learning_rate": 6.263297699166975e-06, "loss": 1.1171, "step": 2528 }, { "epoch": 0.63, "learning_rate": 6.25577304985103e-06, "loss": 1.1037, "step": 2529 }, { "epoch": 0.63, "learning_rate": 6.2482508653890294e-06, "loss": 1.1017, "step": 2530 }, { "epoch": 0.63, "learning_rate": 6.240731150732877e-06, "loss": 1.0219, "step": 2531 }, { "epoch": 0.63, "learning_rate": 6.233213910832867e-06, "loss": 1.088, "step": 2532 }, { "epoch": 0.63, "learning_rate": 6.2256991506376515e-06, "loss": 1.0473, "step": 2533 }, { "epoch": 0.63, "learning_rate": 6.218186875094256e-06, "loss": 1.0967, "step": 2534 }, { "epoch": 0.63, "learning_rate": 6.210677089148078e-06, "loss": 1.0797, "step": 2535 }, { "epoch": 0.64, "learning_rate": 6.203169797742862e-06, "loss": 1.059, "step": 2536 }, { "epoch": 0.64, "learning_rate": 6.195665005820719e-06, "loss": 1.0875, "step": 2537 }, { "epoch": 0.64, "learning_rate": 6.188162718322108e-06, "loss": 1.0791, "step": 2538 }, { "epoch": 0.64, "learning_rate": 6.180662940185851e-06, "loss": 1.0512, "step": 2539 }, { "epoch": 0.64, "learning_rate": 6.173165676349103e-06, "loss": 1.0983, "step": 2540 }, { "epoch": 0.64, "learning_rate": 6.165670931747376e-06, "loss": 1.0627, "step": 2541 }, { "epoch": 0.64, "learning_rate": 6.158178711314522e-06, "loss": 1.0841, "step": 2542 }, { "epoch": 0.64, "learning_rate": 6.15068901998272e-06, "loss": 1.0798, "step": 2543 }, { "epoch": 0.64, "learning_rate": 6.143201862682502e-06, "loss": 1.0584, "step": 2544 }, { "epoch": 0.64, "learning_rate": 6.135717244342714e-06, "loss": 1.0248, "step": 2545 }, { "epoch": 0.64, "learning_rate": 6.128235169890548e-06, "loss": 1.0735, "step": 2546 }, { "epoch": 0.64, "learning_rate": 6.120755644251499e-06, "loss": 1.1091, "step": 2547 }, { "epoch": 0.64, "learning_rate": 6.113278672349407e-06, "loss": 1.0518, "step": 2548 }, { "epoch": 0.64, "learning_rate": 6.105804259106424e-06, "loss": 1.1085, "step": 2549 }, { "epoch": 0.64, "learning_rate": 6.0983324094430065e-06, "loss": 1.0754, "step": 2550 }, { "epoch": 0.64, "learning_rate": 6.090863128277938e-06, "loss": 1.0714, "step": 2551 }, { "epoch": 0.64, "learning_rate": 6.083396420528298e-06, "loss": 1.091, "step": 2552 }, { "epoch": 0.64, "learning_rate": 6.0759322911094875e-06, "loss": 1.0526, "step": 2553 }, { "epoch": 0.64, "learning_rate": 6.068470744935193e-06, "loss": 1.1373, "step": 2554 }, { "epoch": 0.64, "learning_rate": 6.0610117869174115e-06, "loss": 1.1153, "step": 2555 }, { "epoch": 0.64, "learning_rate": 6.053555421966439e-06, "loss": 1.1005, "step": 2556 }, { "epoch": 0.64, "learning_rate": 6.046101654990852e-06, "loss": 1.0381, "step": 2557 }, { "epoch": 0.64, "learning_rate": 6.038650490897527e-06, "loss": 1.1017, "step": 2558 }, { "epoch": 0.64, "learning_rate": 6.031201934591622e-06, "loss": 1.0688, "step": 2559 }, { "epoch": 0.64, "learning_rate": 6.023755990976582e-06, "loss": 1.0298, "step": 2560 }, { "epoch": 0.64, "learning_rate": 6.016312664954127e-06, "loss": 1.0497, "step": 2561 }, { "epoch": 0.64, "learning_rate": 6.008871961424259e-06, "loss": 1.0518, "step": 2562 }, { "epoch": 0.64, "learning_rate": 6.0014338852852526e-06, "loss": 1.0583, "step": 2563 }, { "epoch": 0.64, "learning_rate": 5.993998441433648e-06, "loss": 1.05, "step": 2564 }, { "epoch": 0.64, "learning_rate": 5.986565634764262e-06, "loss": 1.0847, "step": 2565 }, { "epoch": 0.64, "learning_rate": 5.979135470170163e-06, "loss": 1.0838, "step": 2566 }, { "epoch": 0.64, "learning_rate": 5.9717079525426914e-06, "loss": 1.0915, "step": 2567 }, { "epoch": 0.64, "learning_rate": 5.964283086771435e-06, "loss": 1.0965, "step": 2568 }, { "epoch": 0.64, "learning_rate": 5.956860877744246e-06, "loss": 1.1163, "step": 2569 }, { "epoch": 0.64, "learning_rate": 5.949441330347222e-06, "loss": 1.1154, "step": 2570 }, { "epoch": 0.64, "learning_rate": 5.942024449464707e-06, "loss": 1.1022, "step": 2571 }, { "epoch": 0.64, "learning_rate": 5.9346102399792974e-06, "loss": 1.1134, "step": 2572 }, { "epoch": 0.64, "learning_rate": 5.927198706771813e-06, "loss": 1.0756, "step": 2573 }, { "epoch": 0.64, "learning_rate": 5.91978985472134e-06, "loss": 1.1013, "step": 2574 }, { "epoch": 0.64, "learning_rate": 5.912383688705172e-06, "loss": 1.064, "step": 2575 }, { "epoch": 0.65, "learning_rate": 5.9049802135988475e-06, "loss": 1.0822, "step": 2576 }, { "epoch": 0.65, "learning_rate": 5.897579434276142e-06, "loss": 1.0905, "step": 2577 }, { "epoch": 0.65, "learning_rate": 5.890181355609035e-06, "loss": 1.0702, "step": 2578 }, { "epoch": 0.65, "learning_rate": 5.882785982467744e-06, "loss": 1.0789, "step": 2579 }, { "epoch": 0.65, "learning_rate": 5.875393319720701e-06, "loss": 1.0496, "step": 2580 }, { "epoch": 0.65, "learning_rate": 5.8680033722345565e-06, "loss": 1.0565, "step": 2581 }, { "epoch": 0.65, "learning_rate": 5.860616144874165e-06, "loss": 1.0776, "step": 2582 }, { "epoch": 0.65, "learning_rate": 5.853231642502599e-06, "loss": 1.105, "step": 2583 }, { "epoch": 0.65, "learning_rate": 5.845849869981137e-06, "loss": 1.0249, "step": 2584 }, { "epoch": 0.65, "learning_rate": 5.838470832169254e-06, "loss": 1.0587, "step": 2585 }, { "epoch": 0.65, "learning_rate": 5.831094533924631e-06, "loss": 1.0885, "step": 2586 }, { "epoch": 0.65, "learning_rate": 5.8237209801031425e-06, "loss": 1.0693, "step": 2587 }, { "epoch": 0.65, "learning_rate": 5.816350175558855e-06, "loss": 1.067, "step": 2588 }, { "epoch": 0.65, "learning_rate": 5.808982125144024e-06, "loss": 1.0803, "step": 2589 }, { "epoch": 0.65, "learning_rate": 5.801616833709099e-06, "loss": 1.0863, "step": 2590 }, { "epoch": 0.65, "learning_rate": 5.794254306102716e-06, "loss": 1.0728, "step": 2591 }, { "epoch": 0.65, "learning_rate": 5.78689454717167e-06, "loss": 1.0627, "step": 2592 }, { "epoch": 0.65, "learning_rate": 5.77953756176096e-06, "loss": 1.0471, "step": 2593 }, { "epoch": 0.65, "learning_rate": 5.77218335471374e-06, "loss": 1.0941, "step": 2594 }, { "epoch": 0.65, "learning_rate": 5.764831930871346e-06, "loss": 1.0806, "step": 2595 }, { "epoch": 0.65, "learning_rate": 5.757483295073273e-06, "loss": 1.0883, "step": 2596 }, { "epoch": 0.65, "learning_rate": 5.7501374521571925e-06, "loss": 1.0925, "step": 2597 }, { "epoch": 0.65, "learning_rate": 5.7427944069589266e-06, "loss": 1.0748, "step": 2598 }, { "epoch": 0.65, "learning_rate": 5.7354541643124575e-06, "loss": 1.0465, "step": 2599 }, { "epoch": 0.65, "learning_rate": 5.728116729049929e-06, "loss": 1.0672, "step": 2600 }, { "epoch": 0.65, "learning_rate": 5.72078210600163e-06, "loss": 1.0445, "step": 2601 }, { "epoch": 0.65, "learning_rate": 5.7134502999960005e-06, "loss": 1.0539, "step": 2602 }, { "epoch": 0.65, "learning_rate": 5.706121315859623e-06, "loss": 1.05, "step": 2603 }, { "epoch": 0.65, "learning_rate": 5.698795158417234e-06, "loss": 1.0292, "step": 2604 }, { "epoch": 0.65, "learning_rate": 5.691471832491687e-06, "loss": 1.0489, "step": 2605 }, { "epoch": 0.65, "learning_rate": 5.684151342903992e-06, "loss": 1.0795, "step": 2606 }, { "epoch": 0.65, "learning_rate": 5.676833694473292e-06, "loss": 1.0821, "step": 2607 }, { "epoch": 0.65, "learning_rate": 5.669518892016839e-06, "loss": 1.1252, "step": 2608 }, { "epoch": 0.65, "learning_rate": 5.662206940350032e-06, "loss": 1.068, "step": 2609 }, { "epoch": 0.65, "learning_rate": 5.654897844286387e-06, "loss": 1.102, "step": 2610 }, { "epoch": 0.65, "learning_rate": 5.647591608637537e-06, "loss": 1.0458, "step": 2611 }, { "epoch": 0.65, "learning_rate": 5.6402882382132295e-06, "loss": 1.0691, "step": 2612 }, { "epoch": 0.65, "learning_rate": 5.632987737821338e-06, "loss": 1.1038, "step": 2613 }, { "epoch": 0.65, "learning_rate": 5.6256901122678385e-06, "loss": 1.1064, "step": 2614 }, { "epoch": 0.65, "learning_rate": 5.618395366356808e-06, "loss": 1.0551, "step": 2615 }, { "epoch": 0.66, "learning_rate": 5.611103504890444e-06, "loss": 1.0786, "step": 2616 }, { "epoch": 0.66, "learning_rate": 5.603814532669032e-06, "loss": 1.0788, "step": 2617 }, { "epoch": 0.66, "learning_rate": 5.596528454490958e-06, "loss": 1.0504, "step": 2618 }, { "epoch": 0.66, "learning_rate": 5.589245275152703e-06, "loss": 1.0646, "step": 2619 }, { "epoch": 0.66, "learning_rate": 5.581964999448848e-06, "loss": 1.0436, "step": 2620 }, { "epoch": 0.66, "learning_rate": 5.57468763217205e-06, "loss": 1.0634, "step": 2621 }, { "epoch": 0.66, "learning_rate": 5.567413178113055e-06, "loss": 1.0868, "step": 2622 }, { "epoch": 0.66, "learning_rate": 5.560141642060699e-06, "loss": 1.1387, "step": 2623 }, { "epoch": 0.66, "learning_rate": 5.552873028801891e-06, "loss": 1.0712, "step": 2624 }, { "epoch": 0.66, "learning_rate": 5.545607343121611e-06, "loss": 1.0336, "step": 2625 }, { "epoch": 0.66, "learning_rate": 5.538344589802917e-06, "loss": 1.0336, "step": 2626 }, { "epoch": 0.66, "learning_rate": 5.531084773626942e-06, "loss": 1.0967, "step": 2627 }, { "epoch": 0.66, "learning_rate": 5.523827899372876e-06, "loss": 1.0695, "step": 2628 }, { "epoch": 0.66, "learning_rate": 5.516573971817973e-06, "loss": 1.0773, "step": 2629 }, { "epoch": 0.66, "learning_rate": 5.509322995737563e-06, "loss": 1.1075, "step": 2630 }, { "epoch": 0.66, "learning_rate": 5.502074975905003e-06, "loss": 1.0446, "step": 2631 }, { "epoch": 0.66, "learning_rate": 5.494829917091733e-06, "loss": 1.0804, "step": 2632 }, { "epoch": 0.66, "learning_rate": 5.487587824067231e-06, "loss": 1.0461, "step": 2633 }, { "epoch": 0.66, "learning_rate": 5.480348701599014e-06, "loss": 1.0283, "step": 2634 }, { "epoch": 0.66, "learning_rate": 5.473112554452666e-06, "loss": 1.0996, "step": 2635 }, { "epoch": 0.66, "learning_rate": 5.465879387391795e-06, "loss": 1.0422, "step": 2636 }, { "epoch": 0.66, "learning_rate": 5.458649205178048e-06, "loss": 1.0621, "step": 2637 }, { "epoch": 0.66, "learning_rate": 5.45142201257111e-06, "loss": 1.0736, "step": 2638 }, { "epoch": 0.66, "learning_rate": 5.444197814328707e-06, "loss": 1.0689, "step": 2639 }, { "epoch": 0.66, "learning_rate": 5.436976615206577e-06, "loss": 1.0442, "step": 2640 }, { "epoch": 0.66, "learning_rate": 5.429758419958493e-06, "loss": 1.0459, "step": 2641 }, { "epoch": 0.66, "learning_rate": 5.422543233336252e-06, "loss": 1.082, "step": 2642 }, { "epoch": 0.66, "learning_rate": 5.41533106008967e-06, "loss": 1.0344, "step": 2643 }, { "epoch": 0.66, "learning_rate": 5.408121904966574e-06, "loss": 1.0745, "step": 2644 }, { "epoch": 0.66, "learning_rate": 5.400915772712802e-06, "loss": 1.0452, "step": 2645 }, { "epoch": 0.66, "learning_rate": 5.393712668072222e-06, "loss": 1.0615, "step": 2646 }, { "epoch": 0.66, "learning_rate": 5.386512595786676e-06, "loss": 1.0755, "step": 2647 }, { "epoch": 0.66, "learning_rate": 5.379315560596037e-06, "loss": 1.0898, "step": 2648 }, { "epoch": 0.66, "learning_rate": 5.372121567238179e-06, "loss": 1.092, "step": 2649 }, { "epoch": 0.66, "learning_rate": 5.364930620448946e-06, "loss": 1.0609, "step": 2650 }, { "epoch": 0.66, "learning_rate": 5.357742724962207e-06, "loss": 1.0642, "step": 2651 }, { "epoch": 0.66, "learning_rate": 5.3505578855098085e-06, "loss": 1.0926, "step": 2652 }, { "epoch": 0.66, "learning_rate": 5.343376106821583e-06, "loss": 1.1059, "step": 2653 }, { "epoch": 0.66, "learning_rate": 5.33619739362535e-06, "loss": 1.0958, "step": 2654 }, { "epoch": 0.66, "learning_rate": 5.329021750646914e-06, "loss": 1.0744, "step": 2655 }, { "epoch": 0.67, "learning_rate": 5.321849182610069e-06, "loss": 1.0582, "step": 2656 }, { "epoch": 0.67, "learning_rate": 5.314679694236554e-06, "loss": 1.0809, "step": 2657 }, { "epoch": 0.67, "learning_rate": 5.3075132902461115e-06, "loss": 1.0385, "step": 2658 }, { "epoch": 0.67, "learning_rate": 5.300349975356438e-06, "loss": 1.0505, "step": 2659 }, { "epoch": 0.67, "learning_rate": 5.2931897542832015e-06, "loss": 1.0946, "step": 2660 }, { "epoch": 0.67, "learning_rate": 5.286032631740023e-06, "loss": 1.0509, "step": 2661 }, { "epoch": 0.67, "learning_rate": 5.278878612438505e-06, "loss": 1.1062, "step": 2662 }, { "epoch": 0.67, "learning_rate": 5.271727701088188e-06, "loss": 1.0752, "step": 2663 }, { "epoch": 0.67, "learning_rate": 5.26457990239657e-06, "loss": 1.061, "step": 2664 }, { "epoch": 0.67, "learning_rate": 5.2574352210691116e-06, "loss": 1.0534, "step": 2665 }, { "epoch": 0.67, "learning_rate": 5.250293661809205e-06, "loss": 1.1157, "step": 2666 }, { "epoch": 0.67, "learning_rate": 5.243155229318198e-06, "loss": 1.0469, "step": 2667 }, { "epoch": 0.67, "learning_rate": 5.236019928295373e-06, "loss": 1.0593, "step": 2668 }, { "epoch": 0.67, "learning_rate": 5.22888776343796e-06, "loss": 1.0309, "step": 2669 }, { "epoch": 0.67, "learning_rate": 5.221758739441116e-06, "loss": 1.0614, "step": 2670 }, { "epoch": 0.67, "learning_rate": 5.21463286099793e-06, "loss": 1.0608, "step": 2671 }, { "epoch": 0.67, "learning_rate": 5.207510132799436e-06, "loss": 1.0764, "step": 2672 }, { "epoch": 0.67, "learning_rate": 5.200390559534564e-06, "loss": 1.0767, "step": 2673 }, { "epoch": 0.67, "learning_rate": 5.193274145890198e-06, "loss": 1.1041, "step": 2674 }, { "epoch": 0.67, "learning_rate": 5.1861608965511246e-06, "loss": 1.0449, "step": 2675 }, { "epoch": 0.67, "learning_rate": 5.179050816200048e-06, "loss": 1.047, "step": 2676 }, { "epoch": 0.67, "learning_rate": 5.1719439095175985e-06, "loss": 1.051, "step": 2677 }, { "epoch": 0.67, "learning_rate": 5.164840181182303e-06, "loss": 1.1299, "step": 2678 }, { "epoch": 0.67, "learning_rate": 5.1577396358706e-06, "loss": 1.0542, "step": 2679 }, { "epoch": 0.67, "learning_rate": 5.1506422782568345e-06, "loss": 1.0855, "step": 2680 }, { "epoch": 0.67, "learning_rate": 5.143548113013257e-06, "loss": 1.1036, "step": 2681 }, { "epoch": 0.67, "learning_rate": 5.136457144810009e-06, "loss": 1.0472, "step": 2682 }, { "epoch": 0.67, "learning_rate": 5.129369378315128e-06, "loss": 1.0976, "step": 2683 }, { "epoch": 0.67, "learning_rate": 5.1222848181945515e-06, "loss": 1.0307, "step": 2684 }, { "epoch": 0.67, "learning_rate": 5.1152034691121e-06, "loss": 1.0779, "step": 2685 }, { "epoch": 0.67, "learning_rate": 5.108125335729479e-06, "loss": 1.1077, "step": 2686 }, { "epoch": 0.67, "learning_rate": 5.101050422706275e-06, "loss": 1.0922, "step": 2687 }, { "epoch": 0.67, "learning_rate": 5.093978734699968e-06, "loss": 1.0812, "step": 2688 }, { "epoch": 0.67, "learning_rate": 5.0869102763659e-06, "loss": 1.0234, "step": 2689 }, { "epoch": 0.67, "learning_rate": 5.079845052357291e-06, "loss": 1.0493, "step": 2690 }, { "epoch": 0.67, "learning_rate": 5.07278306732524e-06, "loss": 1.0816, "step": 2691 }, { "epoch": 0.67, "learning_rate": 5.065724325918702e-06, "loss": 1.0835, "step": 2692 }, { "epoch": 0.67, "learning_rate": 5.0586688327845016e-06, "loss": 1.0714, "step": 2693 }, { "epoch": 0.67, "learning_rate": 5.051616592567323e-06, "loss": 1.0545, "step": 2694 }, { "epoch": 0.67, "learning_rate": 5.044567609909723e-06, "loss": 1.0794, "step": 2695 }, { "epoch": 0.68, "learning_rate": 5.037521889452084e-06, "loss": 1.0696, "step": 2696 }, { "epoch": 0.68, "learning_rate": 5.03047943583267e-06, "loss": 1.0529, "step": 2697 }, { "epoch": 0.68, "learning_rate": 5.02344025368759e-06, "loss": 1.1071, "step": 2698 }, { "epoch": 0.68, "learning_rate": 5.016404347650776e-06, "loss": 1.0516, "step": 2699 }, { "epoch": 0.68, "learning_rate": 5.009371722354033e-06, "loss": 1.1032, "step": 2700 }, { "epoch": 0.68, "learning_rate": 5.00234238242699e-06, "loss": 1.0476, "step": 2701 }, { "epoch": 0.68, "learning_rate": 4.995316332497114e-06, "loss": 1.0547, "step": 2702 }, { "epoch": 0.68, "learning_rate": 4.9882935771897075e-06, "loss": 1.0853, "step": 2703 }, { "epoch": 0.68, "learning_rate": 4.981274121127913e-06, "loss": 1.0494, "step": 2704 }, { "epoch": 0.68, "learning_rate": 4.974257968932687e-06, "loss": 1.0793, "step": 2705 }, { "epoch": 0.68, "learning_rate": 4.967245125222816e-06, "loss": 1.078, "step": 2706 }, { "epoch": 0.68, "learning_rate": 4.960235594614917e-06, "loss": 1.0529, "step": 2707 }, { "epoch": 0.68, "learning_rate": 4.9532293817234144e-06, "loss": 1.0515, "step": 2708 }, { "epoch": 0.68, "learning_rate": 4.946226491160555e-06, "loss": 1.0452, "step": 2709 }, { "epoch": 0.68, "learning_rate": 4.93922692753639e-06, "loss": 1.0568, "step": 2710 }, { "epoch": 0.68, "learning_rate": 4.932230695458797e-06, "loss": 1.0941, "step": 2711 }, { "epoch": 0.68, "learning_rate": 4.925237799533445e-06, "loss": 1.0877, "step": 2712 }, { "epoch": 0.68, "learning_rate": 4.918248244363809e-06, "loss": 1.0778, "step": 2713 }, { "epoch": 0.68, "learning_rate": 4.91126203455118e-06, "loss": 1.1387, "step": 2714 }, { "epoch": 0.68, "learning_rate": 4.904279174694617e-06, "loss": 1.0719, "step": 2715 }, { "epoch": 0.68, "learning_rate": 4.897299669391006e-06, "loss": 1.0481, "step": 2716 }, { "epoch": 0.68, "learning_rate": 4.8903235232350045e-06, "loss": 1.0835, "step": 2717 }, { "epoch": 0.68, "learning_rate": 4.88335074081906e-06, "loss": 1.0591, "step": 2718 }, { "epoch": 0.68, "learning_rate": 4.8763813267334186e-06, "loss": 1.0962, "step": 2719 }, { "epoch": 0.68, "learning_rate": 4.869415285566091e-06, "loss": 1.0662, "step": 2720 }, { "epoch": 0.68, "learning_rate": 4.86245262190289e-06, "loss": 1.0533, "step": 2721 }, { "epoch": 0.68, "learning_rate": 4.855493340327375e-06, "loss": 1.0927, "step": 2722 }, { "epoch": 0.68, "learning_rate": 4.848537445420908e-06, "loss": 1.0286, "step": 2723 }, { "epoch": 0.68, "learning_rate": 4.841584941762605e-06, "loss": 1.0783, "step": 2724 }, { "epoch": 0.68, "learning_rate": 4.834635833929353e-06, "loss": 1.0707, "step": 2725 }, { "epoch": 0.68, "learning_rate": 4.827690126495801e-06, "loss": 1.0837, "step": 2726 }, { "epoch": 0.68, "learning_rate": 4.820747824034369e-06, "loss": 1.0732, "step": 2727 }, { "epoch": 0.68, "learning_rate": 4.813808931115228e-06, "loss": 1.0598, "step": 2728 }, { "epoch": 0.68, "learning_rate": 4.8068734523063e-06, "loss": 1.0673, "step": 2729 }, { "epoch": 0.68, "learning_rate": 4.799941392173272e-06, "loss": 1.0599, "step": 2730 }, { "epoch": 0.68, "learning_rate": 4.79301275527957e-06, "loss": 1.0905, "step": 2731 }, { "epoch": 0.68, "learning_rate": 4.786087546186372e-06, "loss": 1.108, "step": 2732 }, { "epoch": 0.68, "learning_rate": 4.779165769452591e-06, "loss": 1.0988, "step": 2733 }, { "epoch": 0.68, "learning_rate": 4.772247429634895e-06, "loss": 1.0716, "step": 2734 }, { "epoch": 0.68, "learning_rate": 4.765332531287679e-06, "loss": 1.0589, "step": 2735 }, { "epoch": 0.69, "learning_rate": 4.758421078963069e-06, "loss": 1.054, "step": 2736 }, { "epoch": 0.69, "learning_rate": 4.751513077210942e-06, "loss": 1.0663, "step": 2737 }, { "epoch": 0.69, "learning_rate": 4.744608530578872e-06, "loss": 1.0424, "step": 2738 }, { "epoch": 0.69, "learning_rate": 4.737707443612188e-06, "loss": 1.0651, "step": 2739 }, { "epoch": 0.69, "learning_rate": 4.730809820853928e-06, "loss": 1.0931, "step": 2740 }, { "epoch": 0.69, "learning_rate": 4.723915666844845e-06, "loss": 1.035, "step": 2741 }, { "epoch": 0.69, "learning_rate": 4.7170249861234226e-06, "loss": 1.0899, "step": 2742 }, { "epoch": 0.69, "learning_rate": 4.710137783225847e-06, "loss": 1.0761, "step": 2743 }, { "epoch": 0.69, "learning_rate": 4.703254062686017e-06, "loss": 1.0899, "step": 2744 }, { "epoch": 0.69, "learning_rate": 4.696373829035537e-06, "loss": 1.1066, "step": 2745 }, { "epoch": 0.69, "learning_rate": 4.6894970868037245e-06, "loss": 1.0673, "step": 2746 }, { "epoch": 0.69, "learning_rate": 4.6826238405175904e-06, "loss": 1.0976, "step": 2747 }, { "epoch": 0.69, "learning_rate": 4.675754094701843e-06, "loss": 1.0413, "step": 2748 }, { "epoch": 0.69, "learning_rate": 4.668887853878896e-06, "loss": 1.0643, "step": 2749 }, { "epoch": 0.69, "learning_rate": 4.662025122568848e-06, "loss": 1.0969, "step": 2750 }, { "epoch": 0.69, "learning_rate": 4.655165905289485e-06, "loss": 1.0507, "step": 2751 }, { "epoch": 0.69, "learning_rate": 4.648310206556284e-06, "loss": 1.0552, "step": 2752 }, { "epoch": 0.69, "learning_rate": 4.64145803088241e-06, "loss": 1.0433, "step": 2753 }, { "epoch": 0.69, "learning_rate": 4.634609382778701e-06, "loss": 1.0853, "step": 2754 }, { "epoch": 0.69, "learning_rate": 4.627764266753673e-06, "loss": 1.0825, "step": 2755 }, { "epoch": 0.69, "learning_rate": 4.620922687313525e-06, "loss": 1.094, "step": 2756 }, { "epoch": 0.69, "learning_rate": 4.614084648962119e-06, "loss": 1.0756, "step": 2757 }, { "epoch": 0.69, "learning_rate": 4.607250156200991e-06, "loss": 1.0877, "step": 2758 }, { "epoch": 0.69, "learning_rate": 4.600419213529336e-06, "loss": 1.0721, "step": 2759 }, { "epoch": 0.69, "learning_rate": 4.593591825444028e-06, "loss": 1.1289, "step": 2760 }, { "epoch": 0.69, "learning_rate": 4.5867679964395764e-06, "loss": 1.0538, "step": 2761 }, { "epoch": 0.69, "learning_rate": 4.579947731008167e-06, "loss": 1.0667, "step": 2762 }, { "epoch": 0.69, "learning_rate": 4.5731310336396426e-06, "loss": 1.0326, "step": 2763 }, { "epoch": 0.69, "learning_rate": 4.566317908821474e-06, "loss": 1.0351, "step": 2764 }, { "epoch": 0.69, "learning_rate": 4.5595083610388046e-06, "loss": 1.0606, "step": 2765 }, { "epoch": 0.69, "learning_rate": 4.55270239477441e-06, "loss": 1.0767, "step": 2766 }, { "epoch": 0.69, "learning_rate": 4.545900014508712e-06, "loss": 1.046, "step": 2767 }, { "epoch": 0.69, "learning_rate": 4.539101224719765e-06, "loss": 1.0688, "step": 2768 }, { "epoch": 0.69, "learning_rate": 4.5323060298832745e-06, "loss": 1.06, "step": 2769 }, { "epoch": 0.69, "learning_rate": 4.525514434472566e-06, "loss": 1.1011, "step": 2770 }, { "epoch": 0.69, "learning_rate": 4.518726442958599e-06, "loss": 1.1041, "step": 2771 }, { "epoch": 0.69, "learning_rate": 4.511942059809965e-06, "loss": 1.0827, "step": 2772 }, { "epoch": 0.69, "learning_rate": 4.505161289492874e-06, "loss": 1.0658, "step": 2773 }, { "epoch": 0.69, "learning_rate": 4.498384136471162e-06, "loss": 1.0833, "step": 2774 }, { "epoch": 0.69, "learning_rate": 4.491610605206277e-06, "loss": 1.0446, "step": 2775 }, { "epoch": 0.7, "learning_rate": 4.4848407001572945e-06, "loss": 1.0739, "step": 2776 }, { "epoch": 0.7, "learning_rate": 4.478074425780893e-06, "loss": 1.043, "step": 2777 }, { "epoch": 0.7, "learning_rate": 4.471311786531361e-06, "loss": 1.1053, "step": 2778 }, { "epoch": 0.7, "learning_rate": 4.464552786860607e-06, "loss": 1.0425, "step": 2779 }, { "epoch": 0.7, "learning_rate": 4.457797431218118e-06, "loss": 1.038, "step": 2780 }, { "epoch": 0.7, "learning_rate": 4.45104572405101e-06, "loss": 1.0531, "step": 2781 }, { "epoch": 0.7, "learning_rate": 4.444297669803981e-06, "loss": 1.047, "step": 2782 }, { "epoch": 0.7, "learning_rate": 4.4375532729193225e-06, "loss": 1.0787, "step": 2783 }, { "epoch": 0.7, "learning_rate": 4.430812537836934e-06, "loss": 1.113, "step": 2784 }, { "epoch": 0.7, "learning_rate": 4.4240754689942855e-06, "loss": 1.0247, "step": 2785 }, { "epoch": 0.7, "learning_rate": 4.417342070826452e-06, "loss": 1.0847, "step": 2786 }, { "epoch": 0.7, "learning_rate": 4.410612347766069e-06, "loss": 1.0832, "step": 2787 }, { "epoch": 0.7, "learning_rate": 4.403886304243378e-06, "loss": 1.0956, "step": 2788 }, { "epoch": 0.7, "learning_rate": 4.397163944686179e-06, "loss": 1.0879, "step": 2789 }, { "epoch": 0.7, "learning_rate": 4.3904452735198535e-06, "loss": 1.0585, "step": 2790 }, { "epoch": 0.7, "learning_rate": 4.383730295167362e-06, "loss": 1.0796, "step": 2791 }, { "epoch": 0.7, "learning_rate": 4.377019014049223e-06, "loss": 1.0746, "step": 2792 }, { "epoch": 0.7, "learning_rate": 4.370311434583525e-06, "loss": 1.0835, "step": 2793 }, { "epoch": 0.7, "learning_rate": 4.3636075611859185e-06, "loss": 1.0908, "step": 2794 }, { "epoch": 0.7, "learning_rate": 4.3569073982696204e-06, "loss": 1.0517, "step": 2795 }, { "epoch": 0.7, "learning_rate": 4.3502109502454e-06, "loss": 1.0497, "step": 2796 }, { "epoch": 0.7, "learning_rate": 4.343518221521576e-06, "loss": 1.042, "step": 2797 }, { "epoch": 0.7, "learning_rate": 4.33682921650403e-06, "loss": 1.06, "step": 2798 }, { "epoch": 0.7, "learning_rate": 4.330143939596186e-06, "loss": 1.0838, "step": 2799 }, { "epoch": 0.7, "learning_rate": 4.3234623951990115e-06, "loss": 1.0628, "step": 2800 }, { "epoch": 0.7, "learning_rate": 4.316784587711018e-06, "loss": 1.0884, "step": 2801 }, { "epoch": 0.7, "learning_rate": 4.310110521528268e-06, "loss": 0.9947, "step": 2802 }, { "epoch": 0.7, "learning_rate": 4.303440201044338e-06, "loss": 1.1203, "step": 2803 }, { "epoch": 0.7, "learning_rate": 4.296773630650358e-06, "loss": 1.0876, "step": 2804 }, { "epoch": 0.7, "learning_rate": 4.290110814734991e-06, "loss": 1.0785, "step": 2805 }, { "epoch": 0.7, "learning_rate": 4.283451757684409e-06, "loss": 1.053, "step": 2806 }, { "epoch": 0.7, "learning_rate": 4.276796463882329e-06, "loss": 1.0569, "step": 2807 }, { "epoch": 0.7, "learning_rate": 4.270144937709981e-06, "loss": 1.0668, "step": 2808 }, { "epoch": 0.7, "learning_rate": 4.263497183546118e-06, "loss": 1.0766, "step": 2809 }, { "epoch": 0.7, "learning_rate": 4.256853205767002e-06, "loss": 1.0741, "step": 2810 }, { "epoch": 0.7, "learning_rate": 4.250213008746426e-06, "loss": 1.0844, "step": 2811 }, { "epoch": 0.7, "learning_rate": 4.2435765968556775e-06, "loss": 1.0486, "step": 2812 }, { "epoch": 0.7, "learning_rate": 4.236943974463555e-06, "loss": 1.0582, "step": 2813 }, { "epoch": 0.7, "learning_rate": 4.2303151459363765e-06, "loss": 1.0844, "step": 2814 }, { "epoch": 0.71, "learning_rate": 4.223690115637944e-06, "loss": 1.0549, "step": 2815 }, { "epoch": 0.71, "learning_rate": 4.21706888792957e-06, "loss": 1.0975, "step": 2816 }, { "epoch": 0.71, "learning_rate": 4.210451467170054e-06, "loss": 1.0701, "step": 2817 }, { "epoch": 0.71, "learning_rate": 4.203837857715705e-06, "loss": 1.0319, "step": 2818 }, { "epoch": 0.71, "learning_rate": 4.19722806392031e-06, "loss": 1.0333, "step": 2819 }, { "epoch": 0.71, "learning_rate": 4.1906220901351446e-06, "loss": 1.0478, "step": 2820 }, { "epoch": 0.71, "learning_rate": 4.18401994070898e-06, "loss": 1.0592, "step": 2821 }, { "epoch": 0.71, "learning_rate": 4.1774216199880595e-06, "loss": 1.0625, "step": 2822 }, { "epoch": 0.71, "learning_rate": 4.170827132316109e-06, "loss": 1.0908, "step": 2823 }, { "epoch": 0.71, "learning_rate": 4.1642364820343276e-06, "loss": 1.0643, "step": 2824 }, { "epoch": 0.71, "learning_rate": 4.1576496734814e-06, "loss": 1.0916, "step": 2825 }, { "epoch": 0.71, "learning_rate": 4.15106671099347e-06, "loss": 1.0452, "step": 2826 }, { "epoch": 0.71, "learning_rate": 4.14448759890415e-06, "loss": 1.1186, "step": 2827 }, { "epoch": 0.71, "learning_rate": 4.1379123415445325e-06, "loss": 1.0509, "step": 2828 }, { "epoch": 0.71, "learning_rate": 4.131340943243144e-06, "loss": 1.0763, "step": 2829 }, { "epoch": 0.71, "learning_rate": 4.124773408326e-06, "loss": 1.0855, "step": 2830 }, { "epoch": 0.71, "learning_rate": 4.118209741116556e-06, "loss": 1.0605, "step": 2831 }, { "epoch": 0.71, "learning_rate": 4.111649945935724e-06, "loss": 1.0352, "step": 2832 }, { "epoch": 0.71, "learning_rate": 4.105094027101874e-06, "loss": 1.0626, "step": 2833 }, { "epoch": 0.71, "learning_rate": 4.098541988930814e-06, "loss": 1.0537, "step": 2834 }, { "epoch": 0.71, "learning_rate": 4.0919938357358045e-06, "loss": 1.0818, "step": 2835 }, { "epoch": 0.71, "learning_rate": 4.085449571827541e-06, "loss": 1.0591, "step": 2836 }, { "epoch": 0.71, "learning_rate": 4.078909201514172e-06, "loss": 1.0626, "step": 2837 }, { "epoch": 0.71, "learning_rate": 4.072372729101269e-06, "loss": 1.0693, "step": 2838 }, { "epoch": 0.71, "learning_rate": 4.065840158891841e-06, "loss": 1.077, "step": 2839 }, { "epoch": 0.71, "learning_rate": 4.059311495186338e-06, "loss": 1.0752, "step": 2840 }, { "epoch": 0.71, "learning_rate": 4.052786742282625e-06, "loss": 1.0703, "step": 2841 }, { "epoch": 0.71, "learning_rate": 4.046265904475999e-06, "loss": 1.0807, "step": 2842 }, { "epoch": 0.71, "learning_rate": 4.039748986059175e-06, "loss": 1.061, "step": 2843 }, { "epoch": 0.71, "learning_rate": 4.0332359913223015e-06, "loss": 1.0617, "step": 2844 }, { "epoch": 0.71, "learning_rate": 4.026726924552921e-06, "loss": 1.0905, "step": 2845 }, { "epoch": 0.71, "learning_rate": 4.020221790036012e-06, "loss": 1.0983, "step": 2846 }, { "epoch": 0.71, "learning_rate": 4.0137205920539504e-06, "loss": 1.0841, "step": 2847 }, { "epoch": 0.71, "learning_rate": 4.007223334886531e-06, "loss": 1.0617, "step": 2848 }, { "epoch": 0.71, "learning_rate": 4.000730022810947e-06, "loss": 1.0674, "step": 2849 }, { "epoch": 0.71, "learning_rate": 3.9942406601017925e-06, "loss": 1.09, "step": 2850 }, { "epoch": 0.71, "learning_rate": 3.9877552510310765e-06, "loss": 1.0503, "step": 2851 }, { "epoch": 0.71, "learning_rate": 3.981273799868181e-06, "loss": 1.0727, "step": 2852 }, { "epoch": 0.71, "learning_rate": 3.974796310879906e-06, "loss": 1.0927, "step": 2853 }, { "epoch": 0.71, "learning_rate": 3.968322788330429e-06, "loss": 1.0587, "step": 2854 }, { "epoch": 0.72, "learning_rate": 3.961853236481319e-06, "loss": 1.0682, "step": 2855 }, { "epoch": 0.72, "learning_rate": 3.955387659591538e-06, "loss": 1.0751, "step": 2856 }, { "epoch": 0.72, "learning_rate": 3.948926061917423e-06, "loss": 1.0501, "step": 2857 }, { "epoch": 0.72, "learning_rate": 3.942468447712695e-06, "loss": 1.0306, "step": 2858 }, { "epoch": 0.72, "learning_rate": 3.936014821228448e-06, "loss": 1.097, "step": 2859 }, { "epoch": 0.72, "learning_rate": 3.929565186713161e-06, "loss": 1.0557, "step": 2860 }, { "epoch": 0.72, "learning_rate": 3.923119548412679e-06, "loss": 1.0521, "step": 2861 }, { "epoch": 0.72, "learning_rate": 3.916677910570207e-06, "loss": 1.0674, "step": 2862 }, { "epoch": 0.72, "learning_rate": 3.910240277426337e-06, "loss": 1.0978, "step": 2863 }, { "epoch": 0.72, "learning_rate": 3.903806653219008e-06, "loss": 1.0496, "step": 2864 }, { "epoch": 0.72, "learning_rate": 3.8973770421835255e-06, "loss": 1.0583, "step": 2865 }, { "epoch": 0.72, "learning_rate": 3.89095144855255e-06, "loss": 1.0235, "step": 2866 }, { "epoch": 0.72, "learning_rate": 3.884529876556111e-06, "loss": 1.0668, "step": 2867 }, { "epoch": 0.72, "learning_rate": 3.878112330421564e-06, "loss": 1.036, "step": 2868 }, { "epoch": 0.72, "learning_rate": 3.871698814373638e-06, "loss": 1.0433, "step": 2869 }, { "epoch": 0.72, "learning_rate": 3.865289332634407e-06, "loss": 1.0598, "step": 2870 }, { "epoch": 0.72, "learning_rate": 3.858883889423272e-06, "loss": 1.0569, "step": 2871 }, { "epoch": 0.72, "learning_rate": 3.852482488956992e-06, "loss": 1.0574, "step": 2872 }, { "epoch": 0.72, "learning_rate": 3.84608513544966e-06, "loss": 1.0526, "step": 2873 }, { "epoch": 0.72, "learning_rate": 3.839691833112701e-06, "loss": 1.0659, "step": 2874 }, { "epoch": 0.72, "learning_rate": 3.833302586154874e-06, "loss": 1.0522, "step": 2875 }, { "epoch": 0.72, "learning_rate": 3.826917398782277e-06, "loss": 1.075, "step": 2876 }, { "epoch": 0.72, "learning_rate": 3.8205362751983265e-06, "loss": 1.0459, "step": 2877 }, { "epoch": 0.72, "learning_rate": 3.8141592196037602e-06, "loss": 1.0842, "step": 2878 }, { "epoch": 0.72, "learning_rate": 3.807786236196652e-06, "loss": 1.0491, "step": 2879 }, { "epoch": 0.72, "learning_rate": 3.8014173291723846e-06, "loss": 1.0562, "step": 2880 }, { "epoch": 0.72, "learning_rate": 3.7950525027236585e-06, "loss": 1.06, "step": 2881 }, { "epoch": 0.72, "learning_rate": 3.788691761040484e-06, "loss": 1.0677, "step": 2882 }, { "epoch": 0.72, "learning_rate": 3.7823351083101954e-06, "loss": 1.0693, "step": 2883 }, { "epoch": 0.72, "learning_rate": 3.7759825487174227e-06, "loss": 1.0784, "step": 2884 }, { "epoch": 0.72, "learning_rate": 3.769634086444104e-06, "loss": 1.0434, "step": 2885 }, { "epoch": 0.72, "learning_rate": 3.763289725669488e-06, "loss": 1.0394, "step": 2886 }, { "epoch": 0.72, "learning_rate": 3.7569494705701117e-06, "loss": 1.0171, "step": 2887 }, { "epoch": 0.72, "learning_rate": 3.7506133253198173e-06, "loss": 1.0895, "step": 2888 }, { "epoch": 0.72, "learning_rate": 3.7442812940897334e-06, "loss": 1.0916, "step": 2889 }, { "epoch": 0.72, "learning_rate": 3.737953381048294e-06, "loss": 1.0891, "step": 2890 }, { "epoch": 0.72, "learning_rate": 3.73162959036121e-06, "loss": 1.1092, "step": 2891 }, { "epoch": 0.72, "learning_rate": 3.7253099261914794e-06, "loss": 1.0268, "step": 2892 }, { "epoch": 0.72, "learning_rate": 3.7189943926993954e-06, "loss": 1.0439, "step": 2893 }, { "epoch": 0.72, "learning_rate": 3.7126829940425113e-06, "loss": 1.046, "step": 2894 }, { "epoch": 0.73, "learning_rate": 3.706375734375679e-06, "loss": 1.033, "step": 2895 }, { "epoch": 0.73, "learning_rate": 3.700072617851015e-06, "loss": 1.0586, "step": 2896 }, { "epoch": 0.73, "learning_rate": 3.6937736486179065e-06, "loss": 1.0503, "step": 2897 }, { "epoch": 0.73, "learning_rate": 3.6874788308230203e-06, "loss": 1.0663, "step": 2898 }, { "epoch": 0.73, "learning_rate": 3.681188168610281e-06, "loss": 1.0852, "step": 2899 }, { "epoch": 0.73, "learning_rate": 3.6749016661208814e-06, "loss": 1.0905, "step": 2900 }, { "epoch": 0.73, "learning_rate": 3.6686193274932724e-06, "loss": 1.095, "step": 2901 }, { "epoch": 0.73, "learning_rate": 3.6623411568631718e-06, "loss": 1.0276, "step": 2902 }, { "epoch": 0.73, "learning_rate": 3.6560671583635467e-06, "loss": 1.0537, "step": 2903 }, { "epoch": 0.73, "learning_rate": 3.6497973361246153e-06, "loss": 1.063, "step": 2904 }, { "epoch": 0.73, "learning_rate": 3.6435316942738584e-06, "loss": 1.08, "step": 2905 }, { "epoch": 0.73, "learning_rate": 3.6372702369359938e-06, "loss": 1.0256, "step": 2906 }, { "epoch": 0.73, "learning_rate": 3.6310129682329854e-06, "loss": 1.074, "step": 2907 }, { "epoch": 0.73, "learning_rate": 3.624759892284042e-06, "loss": 1.0548, "step": 2908 }, { "epoch": 0.73, "learning_rate": 3.61851101320562e-06, "loss": 1.0795, "step": 2909 }, { "epoch": 0.73, "learning_rate": 3.612266335111395e-06, "loss": 1.055, "step": 2910 }, { "epoch": 0.73, "learning_rate": 3.606025862112291e-06, "loss": 1.0775, "step": 2911 }, { "epoch": 0.73, "learning_rate": 3.5997895983164655e-06, "loss": 1.0529, "step": 2912 }, { "epoch": 0.73, "learning_rate": 3.5935575478292974e-06, "loss": 1.0497, "step": 2913 }, { "epoch": 0.73, "learning_rate": 3.5873297147533913e-06, "loss": 1.0507, "step": 2914 }, { "epoch": 0.73, "learning_rate": 3.5811061031885776e-06, "loss": 1.0684, "step": 2915 }, { "epoch": 0.73, "learning_rate": 3.574886717231919e-06, "loss": 1.0649, "step": 2916 }, { "epoch": 0.73, "learning_rate": 3.56867156097767e-06, "loss": 1.0375, "step": 2917 }, { "epoch": 0.73, "learning_rate": 3.5624606385173253e-06, "loss": 1.0909, "step": 2918 }, { "epoch": 0.73, "learning_rate": 3.556253953939591e-06, "loss": 1.0864, "step": 2919 }, { "epoch": 0.73, "learning_rate": 3.550051511330361e-06, "loss": 1.0299, "step": 2920 }, { "epoch": 0.73, "learning_rate": 3.5438533147727637e-06, "loss": 1.0615, "step": 2921 }, { "epoch": 0.73, "learning_rate": 3.537659368347115e-06, "loss": 1.0541, "step": 2922 }, { "epoch": 0.73, "learning_rate": 3.531469676130941e-06, "loss": 1.0482, "step": 2923 }, { "epoch": 0.73, "learning_rate": 3.5252842421989585e-06, "loss": 1.088, "step": 2924 }, { "epoch": 0.73, "learning_rate": 3.5191030706230967e-06, "loss": 1.0642, "step": 2925 }, { "epoch": 0.73, "learning_rate": 3.5129261654724633e-06, "loss": 1.1026, "step": 2926 }, { "epoch": 0.73, "learning_rate": 3.5067535308133615e-06, "loss": 1.0583, "step": 2927 }, { "epoch": 0.73, "learning_rate": 3.500585170709292e-06, "loss": 0.9925, "step": 2928 }, { "epoch": 0.73, "learning_rate": 3.4944210892209296e-06, "loss": 1.0667, "step": 2929 }, { "epoch": 0.73, "learning_rate": 3.4882612904061384e-06, "loss": 1.0941, "step": 2930 }, { "epoch": 0.73, "learning_rate": 3.4821057783199596e-06, "loss": 1.0844, "step": 2931 }, { "epoch": 0.73, "learning_rate": 3.4759545570146215e-06, "loss": 1.0967, "step": 2932 }, { "epoch": 0.73, "learning_rate": 3.469807630539517e-06, "loss": 1.0747, "step": 2933 }, { "epoch": 0.73, "learning_rate": 3.4636650029412123e-06, "loss": 1.1198, "step": 2934 }, { "epoch": 0.74, "learning_rate": 3.4575266782634585e-06, "loss": 1.0716, "step": 2935 }, { "epoch": 0.74, "learning_rate": 3.4513926605471504e-06, "loss": 1.0718, "step": 2936 }, { "epoch": 0.74, "learning_rate": 3.445262953830367e-06, "loss": 1.0451, "step": 2937 }, { "epoch": 0.74, "learning_rate": 3.4391375621483412e-06, "loss": 1.0874, "step": 2938 }, { "epoch": 0.74, "learning_rate": 3.4330164895334626e-06, "loss": 1.0927, "step": 2939 }, { "epoch": 0.74, "learning_rate": 3.426899740015288e-06, "loss": 1.0758, "step": 2940 }, { "epoch": 0.74, "learning_rate": 3.4207873176205177e-06, "loss": 1.057, "step": 2941 }, { "epoch": 0.74, "learning_rate": 3.4146792263730065e-06, "loss": 1.0669, "step": 2942 }, { "epoch": 0.74, "learning_rate": 3.408575470293758e-06, "loss": 1.0722, "step": 2943 }, { "epoch": 0.74, "learning_rate": 3.402476053400926e-06, "loss": 1.0623, "step": 2944 }, { "epoch": 0.74, "learning_rate": 3.3963809797098034e-06, "loss": 1.1386, "step": 2945 }, { "epoch": 0.74, "learning_rate": 3.390290253232822e-06, "loss": 1.0769, "step": 2946 }, { "epoch": 0.74, "learning_rate": 3.3842038779795594e-06, "loss": 1.119, "step": 2947 }, { "epoch": 0.74, "learning_rate": 3.378121857956722e-06, "loss": 1.0671, "step": 2948 }, { "epoch": 0.74, "learning_rate": 3.3720441971681515e-06, "loss": 1.0548, "step": 2949 }, { "epoch": 0.74, "learning_rate": 3.3659708996148155e-06, "loss": 1.0182, "step": 2950 }, { "epoch": 0.74, "learning_rate": 3.3599019692948196e-06, "loss": 1.0683, "step": 2951 }, { "epoch": 0.74, "learning_rate": 3.3538374102033865e-06, "loss": 1.0532, "step": 2952 }, { "epoch": 0.74, "learning_rate": 3.347777226332859e-06, "loss": 1.0871, "step": 2953 }, { "epoch": 0.74, "learning_rate": 3.34172142167271e-06, "loss": 1.0753, "step": 2954 }, { "epoch": 0.74, "learning_rate": 3.33567000020952e-06, "loss": 1.0468, "step": 2955 }, { "epoch": 0.74, "learning_rate": 3.329622965926986e-06, "loss": 1.0548, "step": 2956 }, { "epoch": 0.74, "learning_rate": 3.323580322805917e-06, "loss": 1.0708, "step": 2957 }, { "epoch": 0.74, "learning_rate": 3.3175420748242405e-06, "loss": 1.0446, "step": 2958 }, { "epoch": 0.74, "learning_rate": 3.31150822595697e-06, "loss": 1.0835, "step": 2959 }, { "epoch": 0.74, "learning_rate": 3.305478780176241e-06, "loss": 1.1046, "step": 2960 }, { "epoch": 0.74, "learning_rate": 3.299453741451293e-06, "loss": 1.0721, "step": 2961 }, { "epoch": 0.74, "learning_rate": 3.2934331137484432e-06, "loss": 1.0892, "step": 2962 }, { "epoch": 0.74, "learning_rate": 3.287416901031126e-06, "loss": 1.0556, "step": 2963 }, { "epoch": 0.74, "learning_rate": 3.2814051072598596e-06, "loss": 1.0496, "step": 2964 }, { "epoch": 0.74, "learning_rate": 3.275397736392254e-06, "loss": 1.0729, "step": 2965 }, { "epoch": 0.74, "learning_rate": 3.269394792383006e-06, "loss": 1.0922, "step": 2966 }, { "epoch": 0.74, "learning_rate": 3.263396279183907e-06, "loss": 1.0683, "step": 2967 }, { "epoch": 0.74, "learning_rate": 3.257402200743821e-06, "loss": 1.058, "step": 2968 }, { "epoch": 0.74, "learning_rate": 3.2514125610086957e-06, "loss": 1.0593, "step": 2969 }, { "epoch": 0.74, "learning_rate": 3.2454273639215627e-06, "loss": 1.0799, "step": 2970 }, { "epoch": 0.74, "learning_rate": 3.239446613422521e-06, "loss": 1.0733, "step": 2971 }, { "epoch": 0.74, "learning_rate": 3.2334703134487464e-06, "loss": 1.0586, "step": 2972 }, { "epoch": 0.74, "learning_rate": 3.2274984679344803e-06, "loss": 1.0801, "step": 2973 }, { "epoch": 0.74, "learning_rate": 3.2215310808110443e-06, "loss": 1.0789, "step": 2974 }, { "epoch": 0.75, "learning_rate": 3.215568156006804e-06, "loss": 1.0758, "step": 2975 }, { "epoch": 0.75, "learning_rate": 3.209609697447206e-06, "loss": 1.0324, "step": 2976 }, { "epoch": 0.75, "learning_rate": 3.2036557090547536e-06, "loss": 1.0534, "step": 2977 }, { "epoch": 0.75, "learning_rate": 3.197706194749001e-06, "loss": 1.0729, "step": 2978 }, { "epoch": 0.75, "learning_rate": 3.191761158446558e-06, "loss": 1.0722, "step": 2979 }, { "epoch": 0.75, "learning_rate": 3.1858206040610883e-06, "loss": 1.0414, "step": 2980 }, { "epoch": 0.75, "learning_rate": 3.1798845355033126e-06, "loss": 1.0504, "step": 2981 }, { "epoch": 0.75, "learning_rate": 3.173952956680981e-06, "loss": 1.0673, "step": 2982 }, { "epoch": 0.75, "learning_rate": 3.168025871498902e-06, "loss": 1.1111, "step": 2983 }, { "epoch": 0.75, "learning_rate": 3.1621032838589307e-06, "loss": 1.046, "step": 2984 }, { "epoch": 0.75, "learning_rate": 3.156185197659939e-06, "loss": 1.0553, "step": 2985 }, { "epoch": 0.75, "learning_rate": 3.15027161679786e-06, "loss": 1.0667, "step": 2986 }, { "epoch": 0.75, "learning_rate": 3.144362545165647e-06, "loss": 1.0989, "step": 2987 }, { "epoch": 0.75, "learning_rate": 3.138457986653286e-06, "loss": 1.0739, "step": 2988 }, { "epoch": 0.75, "learning_rate": 3.132557945147794e-06, "loss": 1.086, "step": 2989 }, { "epoch": 0.75, "learning_rate": 3.126662424533219e-06, "loss": 1.0323, "step": 2990 }, { "epoch": 0.75, "learning_rate": 3.1207714286906253e-06, "loss": 1.0774, "step": 2991 }, { "epoch": 0.75, "learning_rate": 3.1148849614981003e-06, "loss": 1.0462, "step": 2992 }, { "epoch": 0.75, "learning_rate": 3.109003026830756e-06, "loss": 1.0279, "step": 2993 }, { "epoch": 0.75, "learning_rate": 3.1031256285607127e-06, "loss": 1.1018, "step": 2994 }, { "epoch": 0.75, "learning_rate": 3.0972527705571088e-06, "loss": 1.0732, "step": 2995 }, { "epoch": 0.75, "learning_rate": 3.0913844566860896e-06, "loss": 1.0334, "step": 2996 }, { "epoch": 0.75, "learning_rate": 3.085520690810817e-06, "loss": 1.036, "step": 2997 }, { "epoch": 0.75, "learning_rate": 3.0796614767914523e-06, "loss": 1.0547, "step": 2998 }, { "epoch": 0.75, "learning_rate": 3.073806818485159e-06, "loss": 1.0494, "step": 2999 }, { "epoch": 0.75, "learning_rate": 3.0679567197461135e-06, "loss": 1.0681, "step": 3000 }, { "epoch": 0.75, "learning_rate": 3.0621111844254682e-06, "loss": 1.0694, "step": 3001 }, { "epoch": 0.75, "learning_rate": 3.0562702163713954e-06, "loss": 1.0224, "step": 3002 }, { "epoch": 0.75, "learning_rate": 3.0504338194290463e-06, "loss": 1.062, "step": 3003 }, { "epoch": 0.75, "learning_rate": 3.044601997440564e-06, "loss": 1.0635, "step": 3004 }, { "epoch": 0.75, "learning_rate": 3.0387747542450887e-06, "loss": 1.043, "step": 3005 }, { "epoch": 0.75, "learning_rate": 3.032952093678737e-06, "loss": 1.036, "step": 3006 }, { "epoch": 0.75, "learning_rate": 3.0271340195746124e-06, "loss": 1.0637, "step": 3007 }, { "epoch": 0.75, "learning_rate": 3.021320535762795e-06, "loss": 1.0896, "step": 3008 }, { "epoch": 0.75, "learning_rate": 3.0155116460703536e-06, "loss": 1.0291, "step": 3009 }, { "epoch": 0.75, "learning_rate": 3.0097073543213227e-06, "loss": 1.0598, "step": 3010 }, { "epoch": 0.75, "learning_rate": 3.003907664336709e-06, "loss": 1.0718, "step": 3011 }, { "epoch": 0.75, "learning_rate": 2.998112579934501e-06, "loss": 1.0756, "step": 3012 }, { "epoch": 0.75, "learning_rate": 2.9923221049296448e-06, "loss": 1.0828, "step": 3013 }, { "epoch": 0.75, "learning_rate": 2.986536243134056e-06, "loss": 1.0853, "step": 3014 }, { "epoch": 0.76, "learning_rate": 2.9807549983566076e-06, "loss": 1.0695, "step": 3015 }, { "epoch": 0.76, "learning_rate": 2.974978374403147e-06, "loss": 1.0526, "step": 3016 }, { "epoch": 0.76, "learning_rate": 2.9692063750764666e-06, "loss": 1.0764, "step": 3017 }, { "epoch": 0.76, "learning_rate": 2.963439004176316e-06, "loss": 1.054, "step": 3018 }, { "epoch": 0.76, "learning_rate": 2.9576762654994075e-06, "loss": 1.1056, "step": 3019 }, { "epoch": 0.76, "learning_rate": 2.9519181628393932e-06, "loss": 1.0867, "step": 3020 }, { "epoch": 0.76, "learning_rate": 2.9461646999868766e-06, "loss": 1.1165, "step": 3021 }, { "epoch": 0.76, "learning_rate": 2.9404158807294057e-06, "loss": 1.0835, "step": 3022 }, { "epoch": 0.76, "learning_rate": 2.934671708851482e-06, "loss": 1.0487, "step": 3023 }, { "epoch": 0.76, "learning_rate": 2.9289321881345257e-06, "loss": 1.0692, "step": 3024 }, { "epoch": 0.76, "learning_rate": 2.9231973223569156e-06, "loss": 1.0844, "step": 3025 }, { "epoch": 0.76, "learning_rate": 2.917467115293964e-06, "loss": 1.0202, "step": 3026 }, { "epoch": 0.76, "learning_rate": 2.9117415707178996e-06, "loss": 1.0503, "step": 3027 }, { "epoch": 0.76, "learning_rate": 2.906020692397903e-06, "loss": 1.0652, "step": 3028 }, { "epoch": 0.76, "learning_rate": 2.9003044841000682e-06, "loss": 1.0859, "step": 3029 }, { "epoch": 0.76, "learning_rate": 2.8945929495874215e-06, "loss": 1.0517, "step": 3030 }, { "epoch": 0.76, "learning_rate": 2.888886092619907e-06, "loss": 1.0489, "step": 3031 }, { "epoch": 0.76, "learning_rate": 2.8831839169543998e-06, "loss": 1.0251, "step": 3032 }, { "epoch": 0.76, "learning_rate": 2.877486426344683e-06, "loss": 1.0295, "step": 3033 }, { "epoch": 0.76, "learning_rate": 2.871793624541458e-06, "loss": 1.0523, "step": 3034 }, { "epoch": 0.76, "learning_rate": 2.8661055152923456e-06, "loss": 1.0695, "step": 3035 }, { "epoch": 0.76, "learning_rate": 2.86042210234187e-06, "loss": 1.0848, "step": 3036 }, { "epoch": 0.76, "learning_rate": 2.8547433894314656e-06, "loss": 1.0397, "step": 3037 }, { "epoch": 0.76, "learning_rate": 2.8490693802994717e-06, "loss": 1.0419, "step": 3038 }, { "epoch": 0.76, "learning_rate": 2.84340007868114e-06, "loss": 1.0634, "step": 3039 }, { "epoch": 0.76, "learning_rate": 2.8377354883086105e-06, "loss": 1.0804, "step": 3040 }, { "epoch": 0.76, "learning_rate": 2.8320756129109263e-06, "loss": 1.0637, "step": 3041 }, { "epoch": 0.76, "learning_rate": 2.8264204562140328e-06, "loss": 1.0437, "step": 3042 }, { "epoch": 0.76, "learning_rate": 2.820770021940762e-06, "loss": 1.0935, "step": 3043 }, { "epoch": 0.76, "learning_rate": 2.8151243138108388e-06, "loss": 1.0831, "step": 3044 }, { "epoch": 0.76, "learning_rate": 2.809483335540872e-06, "loss": 1.0794, "step": 3045 }, { "epoch": 0.76, "learning_rate": 2.8038470908443717e-06, "loss": 1.0607, "step": 3046 }, { "epoch": 0.76, "learning_rate": 2.798215583431715e-06, "loss": 1.0797, "step": 3047 }, { "epoch": 0.76, "learning_rate": 2.7925888170101667e-06, "loss": 1.0413, "step": 3048 }, { "epoch": 0.76, "learning_rate": 2.78696679528388e-06, "loss": 1.046, "step": 3049 }, { "epoch": 0.76, "learning_rate": 2.781349521953863e-06, "loss": 1.1324, "step": 3050 }, { "epoch": 0.76, "learning_rate": 2.775737000718021e-06, "loss": 1.0759, "step": 3051 }, { "epoch": 0.76, "learning_rate": 2.7701292352711164e-06, "loss": 1.0687, "step": 3052 }, { "epoch": 0.76, "learning_rate": 2.7645262293047825e-06, "loss": 1.041, "step": 3053 }, { "epoch": 0.76, "learning_rate": 2.758927986507529e-06, "loss": 1.0638, "step": 3054 }, { "epoch": 0.77, "learning_rate": 2.753334510564719e-06, "loss": 1.0783, "step": 3055 }, { "epoch": 0.77, "learning_rate": 2.7477458051585815e-06, "loss": 1.0528, "step": 3056 }, { "epoch": 0.77, "learning_rate": 2.742161873968202e-06, "loss": 1.0994, "step": 3057 }, { "epoch": 0.77, "learning_rate": 2.7365827206695327e-06, "loss": 1.0179, "step": 3058 }, { "epoch": 0.77, "learning_rate": 2.7310083489353713e-06, "loss": 1.0731, "step": 3059 }, { "epoch": 0.77, "learning_rate": 2.725438762435366e-06, "loss": 1.0579, "step": 3060 }, { "epoch": 0.77, "learning_rate": 2.719873964836026e-06, "loss": 1.0937, "step": 3061 }, { "epoch": 0.77, "learning_rate": 2.7143139598006985e-06, "loss": 1.0406, "step": 3062 }, { "epoch": 0.77, "learning_rate": 2.7087587509895787e-06, "loss": 1.062, "step": 3063 }, { "epoch": 0.77, "learning_rate": 2.7032083420597e-06, "loss": 1.0365, "step": 3064 }, { "epoch": 0.77, "learning_rate": 2.6976627366649488e-06, "loss": 1.0745, "step": 3065 }, { "epoch": 0.77, "learning_rate": 2.6921219384560305e-06, "loss": 1.0763, "step": 3066 }, { "epoch": 0.77, "learning_rate": 2.6865859510805005e-06, "loss": 1.1081, "step": 3067 }, { "epoch": 0.77, "learning_rate": 2.681054778182748e-06, "loss": 1.1141, "step": 3068 }, { "epoch": 0.77, "learning_rate": 2.675528423403977e-06, "loss": 1.0752, "step": 3069 }, { "epoch": 0.77, "learning_rate": 2.6700068903822398e-06, "loss": 1.069, "step": 3070 }, { "epoch": 0.77, "learning_rate": 2.6644901827523993e-06, "loss": 1.0582, "step": 3071 }, { "epoch": 0.77, "learning_rate": 2.658978304146149e-06, "loss": 1.0611, "step": 3072 }, { "epoch": 0.77, "learning_rate": 2.6534712581919985e-06, "loss": 1.0584, "step": 3073 }, { "epoch": 0.77, "learning_rate": 2.647969048515282e-06, "loss": 1.0448, "step": 3074 }, { "epoch": 0.77, "learning_rate": 2.6424716787381546e-06, "loss": 1.0368, "step": 3075 }, { "epoch": 0.77, "learning_rate": 2.636979152479563e-06, "loss": 1.0374, "step": 3076 }, { "epoch": 0.77, "learning_rate": 2.6314914733552922e-06, "loss": 1.0303, "step": 3077 }, { "epoch": 0.77, "learning_rate": 2.62600864497792e-06, "loss": 1.0352, "step": 3078 }, { "epoch": 0.77, "learning_rate": 2.6205306709568358e-06, "loss": 1.0736, "step": 3079 }, { "epoch": 0.77, "learning_rate": 2.6150575548982295e-06, "loss": 1.058, "step": 3080 }, { "epoch": 0.77, "learning_rate": 2.609589300405102e-06, "loss": 1.0615, "step": 3081 }, { "epoch": 0.77, "learning_rate": 2.6041259110772467e-06, "loss": 1.0306, "step": 3082 }, { "epoch": 0.77, "learning_rate": 2.5986673905112503e-06, "loss": 1.0551, "step": 3083 }, { "epoch": 0.77, "learning_rate": 2.5932137423005066e-06, "loss": 1.072, "step": 3084 }, { "epoch": 0.77, "learning_rate": 2.5877649700351915e-06, "loss": 1.0729, "step": 3085 }, { "epoch": 0.77, "learning_rate": 2.5823210773022755e-06, "loss": 1.0285, "step": 3086 }, { "epoch": 0.77, "learning_rate": 2.576882067685513e-06, "loss": 1.0939, "step": 3087 }, { "epoch": 0.77, "learning_rate": 2.571447944765455e-06, "loss": 1.1102, "step": 3088 }, { "epoch": 0.77, "learning_rate": 2.5660187121194145e-06, "loss": 1.0446, "step": 3089 }, { "epoch": 0.77, "learning_rate": 2.5605943733215044e-06, "loss": 1.0953, "step": 3090 }, { "epoch": 0.77, "learning_rate": 2.5551749319426167e-06, "loss": 1.0481, "step": 3091 }, { "epoch": 0.77, "learning_rate": 2.5497603915503975e-06, "loss": 1.0413, "step": 3092 }, { "epoch": 0.77, "learning_rate": 2.5443507557092926e-06, "loss": 1.1241, "step": 3093 }, { "epoch": 0.77, "learning_rate": 2.5389460279805043e-06, "loss": 1.0657, "step": 3094 }, { "epoch": 0.78, "learning_rate": 2.5335462119220055e-06, "loss": 1.0728, "step": 3095 }, { "epoch": 0.78, "learning_rate": 2.528151311088537e-06, "loss": 1.0627, "step": 3096 }, { "epoch": 0.78, "learning_rate": 2.522761329031608e-06, "loss": 1.0777, "step": 3097 }, { "epoch": 0.78, "learning_rate": 2.517376269299484e-06, "loss": 1.0543, "step": 3098 }, { "epoch": 0.78, "learning_rate": 2.511996135437189e-06, "loss": 1.0879, "step": 3099 }, { "epoch": 0.78, "learning_rate": 2.506620930986512e-06, "loss": 1.0371, "step": 3100 }, { "epoch": 0.78, "learning_rate": 2.501250659485992e-06, "loss": 1.0172, "step": 3101 }, { "epoch": 0.78, "learning_rate": 2.495885324470918e-06, "loss": 1.0783, "step": 3102 }, { "epoch": 0.78, "learning_rate": 2.4905249294733303e-06, "loss": 1.063, "step": 3103 }, { "epoch": 0.78, "learning_rate": 2.485169478022025e-06, "loss": 1.0745, "step": 3104 }, { "epoch": 0.78, "learning_rate": 2.4798189736425347e-06, "loss": 1.072, "step": 3105 }, { "epoch": 0.78, "learning_rate": 2.474473419857135e-06, "loss": 1.0923, "step": 3106 }, { "epoch": 0.78, "learning_rate": 2.469132820184853e-06, "loss": 1.0507, "step": 3107 }, { "epoch": 0.78, "learning_rate": 2.463797178141443e-06, "loss": 1.0738, "step": 3108 }, { "epoch": 0.78, "learning_rate": 2.4584664972393992e-06, "loss": 1.0422, "step": 3109 }, { "epoch": 0.78, "learning_rate": 2.4531407809879504e-06, "loss": 1.0422, "step": 3110 }, { "epoch": 0.78, "learning_rate": 2.44782003289306e-06, "loss": 1.0515, "step": 3111 }, { "epoch": 0.78, "learning_rate": 2.4425042564574186e-06, "loss": 1.0771, "step": 3112 }, { "epoch": 0.78, "learning_rate": 2.4371934551804377e-06, "loss": 1.123, "step": 3113 }, { "epoch": 0.78, "learning_rate": 2.4318876325582697e-06, "loss": 1.0537, "step": 3114 }, { "epoch": 0.78, "learning_rate": 2.426586792083767e-06, "loss": 1.0711, "step": 3115 }, { "epoch": 0.78, "learning_rate": 2.421290937246523e-06, "loss": 1.053, "step": 3116 }, { "epoch": 0.78, "learning_rate": 2.416000071532838e-06, "loss": 1.0741, "step": 3117 }, { "epoch": 0.78, "learning_rate": 2.4107141984257278e-06, "loss": 1.0414, "step": 3118 }, { "epoch": 0.78, "learning_rate": 2.4054333214049287e-06, "loss": 1.0551, "step": 3119 }, { "epoch": 0.78, "learning_rate": 2.400157443946881e-06, "loss": 1.0665, "step": 3120 }, { "epoch": 0.78, "learning_rate": 2.394886569524737e-06, "loss": 1.0714, "step": 3121 }, { "epoch": 0.78, "learning_rate": 2.3896207016083505e-06, "loss": 1.0912, "step": 3122 }, { "epoch": 0.78, "learning_rate": 2.38435984366429e-06, "loss": 1.052, "step": 3123 }, { "epoch": 0.78, "learning_rate": 2.3791039991558164e-06, "loss": 1.0661, "step": 3124 }, { "epoch": 0.78, "learning_rate": 2.3738531715428913e-06, "loss": 1.0414, "step": 3125 }, { "epoch": 0.78, "learning_rate": 2.3686073642821807e-06, "loss": 1.0795, "step": 3126 }, { "epoch": 0.78, "learning_rate": 2.3633665808270366e-06, "loss": 1.0272, "step": 3127 }, { "epoch": 0.78, "learning_rate": 2.3581308246275103e-06, "loss": 1.0569, "step": 3128 }, { "epoch": 0.78, "learning_rate": 2.3529000991303363e-06, "loss": 1.0547, "step": 3129 }, { "epoch": 0.78, "learning_rate": 2.3476744077789514e-06, "loss": 1.084, "step": 3130 }, { "epoch": 0.78, "learning_rate": 2.3424537540134583e-06, "loss": 1.0375, "step": 3131 }, { "epoch": 0.78, "learning_rate": 2.3372381412706592e-06, "loss": 1.0509, "step": 3132 }, { "epoch": 0.78, "learning_rate": 2.332027572984039e-06, "loss": 1.0431, "step": 3133 }, { "epoch": 0.78, "learning_rate": 2.3268220525837436e-06, "loss": 1.1148, "step": 3134 }, { "epoch": 0.79, "learning_rate": 2.321621583496617e-06, "loss": 1.0621, "step": 3135 }, { "epoch": 0.79, "learning_rate": 2.316426169146165e-06, "loss": 1.0437, "step": 3136 }, { "epoch": 0.79, "learning_rate": 2.3112358129525714e-06, "loss": 1.0535, "step": 3137 }, { "epoch": 0.79, "learning_rate": 2.306050518332684e-06, "loss": 1.0447, "step": 3138 }, { "epoch": 0.79, "learning_rate": 2.300870288700026e-06, "loss": 1.0586, "step": 3139 }, { "epoch": 0.79, "learning_rate": 2.2956951274647887e-06, "loss": 1.0836, "step": 3140 }, { "epoch": 0.79, "learning_rate": 2.2905250380338105e-06, "loss": 1.0929, "step": 3141 }, { "epoch": 0.79, "learning_rate": 2.285360023810611e-06, "loss": 1.0425, "step": 3142 }, { "epoch": 0.79, "learning_rate": 2.2802000881953547e-06, "loss": 1.0127, "step": 3143 }, { "epoch": 0.79, "learning_rate": 2.2750452345848684e-06, "loss": 1.0333, "step": 3144 }, { "epoch": 0.79, "learning_rate": 2.26989546637263e-06, "loss": 1.0908, "step": 3145 }, { "epoch": 0.79, "learning_rate": 2.2647507869487784e-06, "loss": 1.052, "step": 3146 }, { "epoch": 0.79, "learning_rate": 2.2596111997000935e-06, "loss": 1.0595, "step": 3147 }, { "epoch": 0.79, "learning_rate": 2.2544767080100017e-06, "loss": 1.0281, "step": 3148 }, { "epoch": 0.79, "learning_rate": 2.2493473152585865e-06, "loss": 1.059, "step": 3149 }, { "epoch": 0.79, "learning_rate": 2.244223024822564e-06, "loss": 1.0696, "step": 3150 }, { "epoch": 0.79, "learning_rate": 2.239103840075293e-06, "loss": 1.0597, "step": 3151 }, { "epoch": 0.79, "learning_rate": 2.2339897643867724e-06, "loss": 1.013, "step": 3152 }, { "epoch": 0.79, "learning_rate": 2.2288808011236428e-06, "loss": 1.0364, "step": 3153 }, { "epoch": 0.79, "learning_rate": 2.2237769536491728e-06, "loss": 1.067, "step": 3154 }, { "epoch": 0.79, "learning_rate": 2.218678225323262e-06, "loss": 1.0707, "step": 3155 }, { "epoch": 0.79, "learning_rate": 2.213584619502451e-06, "loss": 1.0469, "step": 3156 }, { "epoch": 0.79, "learning_rate": 2.2084961395398897e-06, "loss": 1.0335, "step": 3157 }, { "epoch": 0.79, "learning_rate": 2.2034127887853717e-06, "loss": 1.0984, "step": 3158 }, { "epoch": 0.79, "learning_rate": 2.1983345705853066e-06, "loss": 1.0877, "step": 3159 }, { "epoch": 0.79, "learning_rate": 2.1932614882827196e-06, "loss": 1.0623, "step": 3160 }, { "epoch": 0.79, "learning_rate": 2.1881935452172676e-06, "loss": 1.0612, "step": 3161 }, { "epoch": 0.79, "learning_rate": 2.1831307447252136e-06, "loss": 1.0604, "step": 3162 }, { "epoch": 0.79, "learning_rate": 2.1780730901394385e-06, "loss": 1.0882, "step": 3163 }, { "epoch": 0.79, "learning_rate": 2.1730205847894346e-06, "loss": 1.0857, "step": 3164 }, { "epoch": 0.79, "learning_rate": 2.1679732320013113e-06, "loss": 1.0768, "step": 3165 }, { "epoch": 0.79, "learning_rate": 2.162931035097776e-06, "loss": 1.0355, "step": 3166 }, { "epoch": 0.79, "learning_rate": 2.157893997398146e-06, "loss": 1.024, "step": 3167 }, { "epoch": 0.79, "learning_rate": 2.1528621222183454e-06, "loss": 1.0686, "step": 3168 }, { "epoch": 0.79, "learning_rate": 2.1478354128708965e-06, "loss": 1.0206, "step": 3169 }, { "epoch": 0.79, "learning_rate": 2.1428138726649204e-06, "loss": 1.0767, "step": 3170 }, { "epoch": 0.79, "learning_rate": 2.137797504906135e-06, "loss": 1.0478, "step": 3171 }, { "epoch": 0.79, "learning_rate": 2.1327863128968584e-06, "loss": 1.0839, "step": 3172 }, { "epoch": 0.79, "learning_rate": 2.1277802999359955e-06, "loss": 1.0623, "step": 3173 }, { "epoch": 0.79, "learning_rate": 2.1227794693190407e-06, "loss": 1.0688, "step": 3174 }, { "epoch": 0.8, "learning_rate": 2.1177838243380856e-06, "loss": 1.0666, "step": 3175 }, { "epoch": 0.8, "learning_rate": 2.112793368281799e-06, "loss": 1.0033, "step": 3176 }, { "epoch": 0.8, "learning_rate": 2.1078081044354394e-06, "loss": 1.0615, "step": 3177 }, { "epoch": 0.8, "learning_rate": 2.1028280360808405e-06, "loss": 1.0668, "step": 3178 }, { "epoch": 0.8, "learning_rate": 2.0978531664964295e-06, "loss": 1.0533, "step": 3179 }, { "epoch": 0.8, "learning_rate": 2.09288349895719e-06, "loss": 1.0826, "step": 3180 }, { "epoch": 0.8, "learning_rate": 2.0879190367347003e-06, "loss": 1.0766, "step": 3181 }, { "epoch": 0.8, "learning_rate": 2.082959783097107e-06, "loss": 1.055, "step": 3182 }, { "epoch": 0.8, "learning_rate": 2.078005741309118e-06, "loss": 1.0716, "step": 3183 }, { "epoch": 0.8, "learning_rate": 2.073056914632026e-06, "loss": 1.0512, "step": 3184 }, { "epoch": 0.8, "learning_rate": 2.068113306323677e-06, "loss": 1.0554, "step": 3185 }, { "epoch": 0.8, "learning_rate": 2.0631749196384885e-06, "loss": 1.0443, "step": 3186 }, { "epoch": 0.8, "learning_rate": 2.058241757827437e-06, "loss": 1.0481, "step": 3187 }, { "epoch": 0.8, "learning_rate": 2.053313824138067e-06, "loss": 1.0508, "step": 3188 }, { "epoch": 0.8, "learning_rate": 2.0483911218144713e-06, "loss": 1.0746, "step": 3189 }, { "epoch": 0.8, "learning_rate": 2.043473654097302e-06, "loss": 1.0806, "step": 3190 }, { "epoch": 0.8, "learning_rate": 2.038561424223774e-06, "loss": 1.045, "step": 3191 }, { "epoch": 0.8, "learning_rate": 2.03365443542764e-06, "loss": 1.1309, "step": 3192 }, { "epoch": 0.8, "learning_rate": 2.028752690939212e-06, "loss": 1.0727, "step": 3193 }, { "epoch": 0.8, "learning_rate": 2.023856193985344e-06, "loss": 1.062, "step": 3194 }, { "epoch": 0.8, "learning_rate": 2.0189649477894424e-06, "loss": 1.0883, "step": 3195 }, { "epoch": 0.8, "learning_rate": 2.014078955571451e-06, "loss": 1.0393, "step": 3196 }, { "epoch": 0.8, "learning_rate": 2.009198220547854e-06, "loss": 1.0895, "step": 3197 }, { "epoch": 0.8, "learning_rate": 2.004322745931685e-06, "loss": 1.0965, "step": 3198 }, { "epoch": 0.8, "learning_rate": 1.9994525349324978e-06, "loss": 1.0362, "step": 3199 }, { "epoch": 0.8, "learning_rate": 1.994587590756397e-06, "loss": 1.0604, "step": 3200 }, { "epoch": 0.8, "learning_rate": 1.9897279166060112e-06, "loss": 1.0707, "step": 3201 }, { "epoch": 0.8, "learning_rate": 1.984873515680501e-06, "loss": 1.0674, "step": 3202 }, { "epoch": 0.8, "learning_rate": 1.980024391175559e-06, "loss": 1.0559, "step": 3203 }, { "epoch": 0.8, "learning_rate": 1.9751805462833983e-06, "loss": 1.0915, "step": 3204 }, { "epoch": 0.8, "learning_rate": 1.970341984192767e-06, "loss": 1.0573, "step": 3205 }, { "epoch": 0.8, "learning_rate": 1.965508708088919e-06, "loss": 1.0294, "step": 3206 }, { "epoch": 0.8, "learning_rate": 1.9606807211536462e-06, "loss": 1.0297, "step": 3207 }, { "epoch": 0.8, "learning_rate": 1.9558580265652448e-06, "loss": 1.0688, "step": 3208 }, { "epoch": 0.8, "learning_rate": 1.951040627498535e-06, "loss": 1.067, "step": 3209 }, { "epoch": 0.8, "learning_rate": 1.946228527124845e-06, "loss": 1.1092, "step": 3210 }, { "epoch": 0.8, "learning_rate": 1.941421728612023e-06, "loss": 1.0421, "step": 3211 }, { "epoch": 0.8, "learning_rate": 1.9366202351244203e-06, "loss": 1.0407, "step": 3212 }, { "epoch": 0.8, "learning_rate": 1.9318240498228935e-06, "loss": 1.031, "step": 3213 }, { "epoch": 0.8, "learning_rate": 1.9270331758648165e-06, "loss": 1.0208, "step": 3214 }, { "epoch": 0.81, "learning_rate": 1.922247616404055e-06, "loss": 1.0877, "step": 3215 }, { "epoch": 0.81, "learning_rate": 1.917467374590981e-06, "loss": 1.0843, "step": 3216 }, { "epoch": 0.81, "learning_rate": 1.9126924535724612e-06, "loss": 1.0547, "step": 3217 }, { "epoch": 0.81, "learning_rate": 1.9079228564918706e-06, "loss": 1.0773, "step": 3218 }, { "epoch": 0.81, "learning_rate": 1.903158586489069e-06, "loss": 1.0562, "step": 3219 }, { "epoch": 0.81, "learning_rate": 1.8983996467004085e-06, "loss": 1.0991, "step": 3220 }, { "epoch": 0.81, "learning_rate": 1.8936460402587475e-06, "loss": 1.0593, "step": 3221 }, { "epoch": 0.81, "learning_rate": 1.8888977702934086e-06, "loss": 1.0406, "step": 3222 }, { "epoch": 0.81, "learning_rate": 1.8841548399302256e-06, "loss": 1.0947, "step": 3223 }, { "epoch": 0.81, "learning_rate": 1.8794172522915022e-06, "loss": 1.0424, "step": 3224 }, { "epoch": 0.81, "learning_rate": 1.8746850104960279e-06, "loss": 1.0641, "step": 3225 }, { "epoch": 0.81, "learning_rate": 1.8699581176590798e-06, "loss": 1.0718, "step": 3226 }, { "epoch": 0.81, "learning_rate": 1.8652365768924052e-06, "loss": 1.0667, "step": 3227 }, { "epoch": 0.81, "learning_rate": 1.8605203913042313e-06, "loss": 1.0681, "step": 3228 }, { "epoch": 0.81, "learning_rate": 1.8558095639992591e-06, "loss": 1.0343, "step": 3229 }, { "epoch": 0.81, "learning_rate": 1.8511040980786665e-06, "loss": 1.0889, "step": 3230 }, { "epoch": 0.81, "learning_rate": 1.846403996640096e-06, "loss": 1.0816, "step": 3231 }, { "epoch": 0.81, "learning_rate": 1.8417092627776601e-06, "loss": 1.069, "step": 3232 }, { "epoch": 0.81, "learning_rate": 1.8370198995819432e-06, "loss": 1.0977, "step": 3233 }, { "epoch": 0.81, "learning_rate": 1.8323359101399873e-06, "loss": 1.0759, "step": 3234 }, { "epoch": 0.81, "learning_rate": 1.827657297535298e-06, "loss": 1.0686, "step": 3235 }, { "epoch": 0.81, "learning_rate": 1.8229840648478425e-06, "loss": 1.0758, "step": 3236 }, { "epoch": 0.81, "learning_rate": 1.8183162151540502e-06, "loss": 1.08, "step": 3237 }, { "epoch": 0.81, "learning_rate": 1.8136537515268015e-06, "loss": 1.0645, "step": 3238 }, { "epoch": 0.81, "learning_rate": 1.8089966770354283e-06, "loss": 1.0509, "step": 3239 }, { "epoch": 0.81, "learning_rate": 1.804344994745727e-06, "loss": 1.0608, "step": 3240 }, { "epoch": 0.81, "learning_rate": 1.7996987077199312e-06, "loss": 1.062, "step": 3241 }, { "epoch": 0.81, "learning_rate": 1.7950578190167301e-06, "loss": 1.0644, "step": 3242 }, { "epoch": 0.81, "learning_rate": 1.790422331691254e-06, "loss": 1.07, "step": 3243 }, { "epoch": 0.81, "learning_rate": 1.7857922487950873e-06, "loss": 0.9982, "step": 3244 }, { "epoch": 0.81, "learning_rate": 1.7811675733762402e-06, "loss": 1.0479, "step": 3245 }, { "epoch": 0.81, "learning_rate": 1.776548308479179e-06, "loss": 1.0637, "step": 3246 }, { "epoch": 0.81, "learning_rate": 1.771934457144805e-06, "loss": 1.0559, "step": 3247 }, { "epoch": 0.81, "learning_rate": 1.7673260224104437e-06, "loss": 1.0575, "step": 3248 }, { "epoch": 0.81, "learning_rate": 1.7627230073098722e-06, "loss": 1.0487, "step": 3249 }, { "epoch": 0.81, "learning_rate": 1.7581254148732874e-06, "loss": 1.0513, "step": 3250 }, { "epoch": 0.81, "learning_rate": 1.7535332481273204e-06, "loss": 1.0441, "step": 3251 }, { "epoch": 0.81, "learning_rate": 1.7489465100950297e-06, "loss": 1.0208, "step": 3252 }, { "epoch": 0.81, "learning_rate": 1.7443652037959046e-06, "loss": 1.0653, "step": 3253 }, { "epoch": 0.81, "learning_rate": 1.7397893322458525e-06, "loss": 1.0621, "step": 3254 }, { "epoch": 0.82, "learning_rate": 1.7352188984572026e-06, "loss": 1.1013, "step": 3255 }, { "epoch": 0.82, "learning_rate": 1.730653905438714e-06, "loss": 1.0637, "step": 3256 }, { "epoch": 0.82, "learning_rate": 1.7260943561955523e-06, "loss": 1.0608, "step": 3257 }, { "epoch": 0.82, "learning_rate": 1.721540253729307e-06, "loss": 1.0526, "step": 3258 }, { "epoch": 0.82, "learning_rate": 1.716991601037975e-06, "loss": 1.061, "step": 3259 }, { "epoch": 0.82, "learning_rate": 1.712448401115977e-06, "loss": 1.0658, "step": 3260 }, { "epoch": 0.82, "learning_rate": 1.7079106569541325e-06, "loss": 1.0668, "step": 3261 }, { "epoch": 0.82, "learning_rate": 1.7033783715396733e-06, "loss": 1.0431, "step": 3262 }, { "epoch": 0.82, "learning_rate": 1.698851547856244e-06, "loss": 1.0447, "step": 3263 }, { "epoch": 0.82, "learning_rate": 1.6943301888838804e-06, "loss": 1.0563, "step": 3264 }, { "epoch": 0.82, "learning_rate": 1.689814297599034e-06, "loss": 1.096, "step": 3265 }, { "epoch": 0.82, "learning_rate": 1.6853038769745466e-06, "loss": 1.0636, "step": 3266 }, { "epoch": 0.82, "learning_rate": 1.6807989299796701e-06, "loss": 1.046, "step": 3267 }, { "epoch": 0.82, "learning_rate": 1.676299459580043e-06, "loss": 1.079, "step": 3268 }, { "epoch": 0.82, "learning_rate": 1.6718054687376994e-06, "loss": 1.0662, "step": 3269 }, { "epoch": 0.82, "learning_rate": 1.667316960411075e-06, "loss": 1.052, "step": 3270 }, { "epoch": 0.82, "learning_rate": 1.6628339375549819e-06, "loss": 1.0693, "step": 3271 }, { "epoch": 0.82, "learning_rate": 1.6583564031206357e-06, "loss": 1.0513, "step": 3272 }, { "epoch": 0.82, "learning_rate": 1.6538843600556297e-06, "loss": 1.0403, "step": 3273 }, { "epoch": 0.82, "learning_rate": 1.6494178113039438e-06, "loss": 1.0551, "step": 3274 }, { "epoch": 0.82, "learning_rate": 1.6449567598059458e-06, "loss": 1.057, "step": 3275 }, { "epoch": 0.82, "learning_rate": 1.6405012084983774e-06, "loss": 1.069, "step": 3276 }, { "epoch": 0.82, "learning_rate": 1.6360511603143648e-06, "loss": 1.0551, "step": 3277 }, { "epoch": 0.82, "learning_rate": 1.6316066181834066e-06, "loss": 1.0631, "step": 3278 }, { "epoch": 0.82, "learning_rate": 1.627167585031384e-06, "loss": 1.0325, "step": 3279 }, { "epoch": 0.82, "learning_rate": 1.6227340637805455e-06, "loss": 1.0688, "step": 3280 }, { "epoch": 0.82, "learning_rate": 1.6183060573495102e-06, "loss": 1.0689, "step": 3281 }, { "epoch": 0.82, "learning_rate": 1.613883568653275e-06, "loss": 1.0258, "step": 3282 }, { "epoch": 0.82, "learning_rate": 1.6094666006031956e-06, "loss": 1.0325, "step": 3283 }, { "epoch": 0.82, "learning_rate": 1.605055156106996e-06, "loss": 1.0671, "step": 3284 }, { "epoch": 0.82, "learning_rate": 1.6006492380687633e-06, "loss": 1.0728, "step": 3285 }, { "epoch": 0.82, "learning_rate": 1.596248849388955e-06, "loss": 1.0659, "step": 3286 }, { "epoch": 0.82, "learning_rate": 1.5918539929643717e-06, "loss": 1.0733, "step": 3287 }, { "epoch": 0.82, "learning_rate": 1.587464671688187e-06, "loss": 1.0356, "step": 3288 }, { "epoch": 0.82, "learning_rate": 1.5830808884499304e-06, "loss": 1.0873, "step": 3289 }, { "epoch": 0.82, "learning_rate": 1.5787026461354692e-06, "loss": 1.0599, "step": 3290 }, { "epoch": 0.82, "learning_rate": 1.5743299476270445e-06, "loss": 1.0734, "step": 3291 }, { "epoch": 0.82, "learning_rate": 1.5699627958032338e-06, "loss": 1.0228, "step": 3292 }, { "epoch": 0.82, "learning_rate": 1.5656011935389682e-06, "loss": 1.0094, "step": 3293 }, { "epoch": 0.82, "learning_rate": 1.561245143705522e-06, "loss": 1.0666, "step": 3294 }, { "epoch": 0.83, "learning_rate": 1.5568946491705218e-06, "loss": 1.0624, "step": 3295 }, { "epoch": 0.83, "learning_rate": 1.5525497127979305e-06, "loss": 1.0711, "step": 3296 }, { "epoch": 0.83, "learning_rate": 1.5482103374480495e-06, "loss": 1.0672, "step": 3297 }, { "epoch": 0.83, "learning_rate": 1.54387652597753e-06, "loss": 1.0622, "step": 3298 }, { "epoch": 0.83, "learning_rate": 1.5395482812393513e-06, "loss": 1.0187, "step": 3299 }, { "epoch": 0.83, "learning_rate": 1.5352256060828308e-06, "loss": 1.0419, "step": 3300 }, { "epoch": 0.83, "learning_rate": 1.5309085033536164e-06, "loss": 1.0459, "step": 3301 }, { "epoch": 0.83, "learning_rate": 1.526596975893696e-06, "loss": 1.0542, "step": 3302 }, { "epoch": 0.83, "learning_rate": 1.5222910265413803e-06, "loss": 1.0392, "step": 3303 }, { "epoch": 0.83, "learning_rate": 1.5179906581313063e-06, "loss": 1.0539, "step": 3304 }, { "epoch": 0.83, "learning_rate": 1.513695873494444e-06, "loss": 1.014, "step": 3305 }, { "epoch": 0.83, "learning_rate": 1.5094066754580827e-06, "loss": 1.0705, "step": 3306 }, { "epoch": 0.83, "learning_rate": 1.5051230668458361e-06, "loss": 1.0281, "step": 3307 }, { "epoch": 0.83, "learning_rate": 1.5008450504776318e-06, "loss": 1.0529, "step": 3308 }, { "epoch": 0.83, "learning_rate": 1.4965726291697292e-06, "loss": 1.0488, "step": 3309 }, { "epoch": 0.83, "learning_rate": 1.492305805734693e-06, "loss": 1.0496, "step": 3310 }, { "epoch": 0.83, "learning_rate": 1.4880445829814038e-06, "loss": 1.0309, "step": 3311 }, { "epoch": 0.83, "learning_rate": 1.483788963715066e-06, "loss": 1.0386, "step": 3312 }, { "epoch": 0.83, "learning_rate": 1.479538950737177e-06, "loss": 1.078, "step": 3313 }, { "epoch": 0.83, "learning_rate": 1.4752945468455614e-06, "loss": 1.0562, "step": 3314 }, { "epoch": 0.83, "learning_rate": 1.4710557548343408e-06, "loss": 1.0625, "step": 3315 }, { "epoch": 0.83, "learning_rate": 1.4668225774939425e-06, "loss": 1.0172, "step": 3316 }, { "epoch": 0.83, "learning_rate": 1.4625950176111048e-06, "loss": 1.0933, "step": 3317 }, { "epoch": 0.83, "learning_rate": 1.458373077968862e-06, "loss": 1.0879, "step": 3318 }, { "epoch": 0.83, "learning_rate": 1.4541567613465501e-06, "loss": 1.0627, "step": 3319 }, { "epoch": 0.83, "learning_rate": 1.4499460705198e-06, "loss": 1.0748, "step": 3320 }, { "epoch": 0.83, "learning_rate": 1.4457410082605483e-06, "loss": 1.066, "step": 3321 }, { "epoch": 0.83, "learning_rate": 1.4415415773370167e-06, "loss": 1.0638, "step": 3322 }, { "epoch": 0.83, "learning_rate": 1.4373477805137226e-06, "loss": 1.0484, "step": 3323 }, { "epoch": 0.83, "learning_rate": 1.4331596205514798e-06, "loss": 1.0486, "step": 3324 }, { "epoch": 0.83, "learning_rate": 1.4289771002073839e-06, "loss": 1.0616, "step": 3325 }, { "epoch": 0.83, "learning_rate": 1.42480022223482e-06, "loss": 1.0957, "step": 3326 }, { "epoch": 0.83, "learning_rate": 1.420628989383459e-06, "loss": 1.0268, "step": 3327 }, { "epoch": 0.83, "learning_rate": 1.416463404399262e-06, "loss": 1.0826, "step": 3328 }, { "epoch": 0.83, "learning_rate": 1.4123034700244575e-06, "loss": 1.0193, "step": 3329 }, { "epoch": 0.83, "learning_rate": 1.4081491889975696e-06, "loss": 1.0625, "step": 3330 }, { "epoch": 0.83, "learning_rate": 1.4040005640533906e-06, "loss": 1.0427, "step": 3331 }, { "epoch": 0.83, "learning_rate": 1.3998575979229944e-06, "loss": 1.057, "step": 3332 }, { "epoch": 0.83, "learning_rate": 1.395720293333729e-06, "loss": 1.0475, "step": 3333 }, { "epoch": 0.83, "learning_rate": 1.3915886530092104e-06, "loss": 1.0279, "step": 3334 }, { "epoch": 0.84, "learning_rate": 1.387462679669338e-06, "loss": 1.0492, "step": 3335 }, { "epoch": 0.84, "learning_rate": 1.383342376030261e-06, "loss": 1.0555, "step": 3336 }, { "epoch": 0.84, "learning_rate": 1.3792277448044167e-06, "loss": 1.0767, "step": 3337 }, { "epoch": 0.84, "learning_rate": 1.3751187887004947e-06, "loss": 1.0289, "step": 3338 }, { "epoch": 0.84, "learning_rate": 1.3710155104234534e-06, "loss": 1.046, "step": 3339 }, { "epoch": 0.84, "learning_rate": 1.3669179126745147e-06, "loss": 1.0729, "step": 3340 }, { "epoch": 0.84, "learning_rate": 1.3628259981511594e-06, "loss": 1.0316, "step": 3341 }, { "epoch": 0.84, "learning_rate": 1.358739769547125e-06, "loss": 1.0247, "step": 3342 }, { "epoch": 0.84, "learning_rate": 1.3546592295524075e-06, "loss": 1.0664, "step": 3343 }, { "epoch": 0.84, "learning_rate": 1.3505843808532605e-06, "loss": 1.0768, "step": 3344 }, { "epoch": 0.84, "learning_rate": 1.346515226132189e-06, "loss": 1.0845, "step": 3345 }, { "epoch": 0.84, "learning_rate": 1.342451768067945e-06, "loss": 1.0168, "step": 3346 }, { "epoch": 0.84, "learning_rate": 1.3383940093355408e-06, "loss": 1.0671, "step": 3347 }, { "epoch": 0.84, "learning_rate": 1.3343419526062285e-06, "loss": 1.0767, "step": 3348 }, { "epoch": 0.84, "learning_rate": 1.3302956005475076e-06, "loss": 1.0773, "step": 3349 }, { "epoch": 0.84, "learning_rate": 1.326254955823123e-06, "loss": 1.0657, "step": 3350 }, { "epoch": 0.84, "learning_rate": 1.3222200210930668e-06, "loss": 1.0439, "step": 3351 }, { "epoch": 0.84, "learning_rate": 1.3181907990135624e-06, "loss": 1.0464, "step": 3352 }, { "epoch": 0.84, "learning_rate": 1.3141672922370796e-06, "loss": 1.0432, "step": 3353 }, { "epoch": 0.84, "learning_rate": 1.3101495034123313e-06, "loss": 1.0756, "step": 3354 }, { "epoch": 0.84, "learning_rate": 1.3061374351842494e-06, "loss": 1.0583, "step": 3355 }, { "epoch": 0.84, "learning_rate": 1.3021310901940166e-06, "loss": 1.0634, "step": 3356 }, { "epoch": 0.84, "learning_rate": 1.2981304710790398e-06, "loss": 1.0828, "step": 3357 }, { "epoch": 0.84, "learning_rate": 1.294135580472956e-06, "loss": 1.0895, "step": 3358 }, { "epoch": 0.84, "learning_rate": 1.2901464210056324e-06, "loss": 1.0818, "step": 3359 }, { "epoch": 0.84, "learning_rate": 1.2861629953031685e-06, "loss": 1.0455, "step": 3360 }, { "epoch": 0.84, "learning_rate": 1.282185305987881e-06, "loss": 1.0744, "step": 3361 }, { "epoch": 0.84, "learning_rate": 1.2782133556783128e-06, "loss": 1.062, "step": 3362 }, { "epoch": 0.84, "learning_rate": 1.2742471469892349e-06, "loss": 1.0693, "step": 3363 }, { "epoch": 0.84, "learning_rate": 1.2702866825316295e-06, "loss": 1.0199, "step": 3364 }, { "epoch": 0.84, "learning_rate": 1.2663319649127025e-06, "loss": 1.0534, "step": 3365 }, { "epoch": 0.84, "learning_rate": 1.2623829967358736e-06, "loss": 1.0889, "step": 3366 }, { "epoch": 0.84, "learning_rate": 1.258439780600783e-06, "loss": 1.0632, "step": 3367 }, { "epoch": 0.84, "learning_rate": 1.25450231910328e-06, "loss": 1.0385, "step": 3368 }, { "epoch": 0.84, "learning_rate": 1.2505706148354225e-06, "loss": 1.0735, "step": 3369 }, { "epoch": 0.84, "learning_rate": 1.2466446703854884e-06, "loss": 0.9921, "step": 3370 }, { "epoch": 0.84, "learning_rate": 1.242724488337953e-06, "loss": 1.0357, "step": 3371 }, { "epoch": 0.84, "learning_rate": 1.2388100712735051e-06, "loss": 1.0799, "step": 3372 }, { "epoch": 0.84, "learning_rate": 1.2349014217690337e-06, "loss": 1.0455, "step": 3373 }, { "epoch": 0.85, "learning_rate": 1.2309985423976368e-06, "loss": 1.023, "step": 3374 }, { "epoch": 0.85, "learning_rate": 1.227101435728608e-06, "loss": 1.0407, "step": 3375 }, { "epoch": 0.85, "learning_rate": 1.2232101043274437e-06, "loss": 1.0702, "step": 3376 }, { "epoch": 0.85, "learning_rate": 1.2193245507558404e-06, "loss": 1.064, "step": 3377 }, { "epoch": 0.85, "learning_rate": 1.2154447775716826e-06, "loss": 1.0524, "step": 3378 }, { "epoch": 0.85, "learning_rate": 1.2115707873290604e-06, "loss": 1.0609, "step": 3379 }, { "epoch": 0.85, "learning_rate": 1.2077025825782518e-06, "loss": 1.0796, "step": 3380 }, { "epoch": 0.85, "learning_rate": 1.2038401658657218e-06, "loss": 1.0391, "step": 3381 }, { "epoch": 0.85, "learning_rate": 1.199983539734134e-06, "loss": 1.0826, "step": 3382 }, { "epoch": 0.85, "learning_rate": 1.196132706722336e-06, "loss": 1.0743, "step": 3383 }, { "epoch": 0.85, "learning_rate": 1.1922876693653584e-06, "loss": 1.0774, "step": 3384 }, { "epoch": 0.85, "learning_rate": 1.18844843019442e-06, "loss": 1.0612, "step": 3385 }, { "epoch": 0.85, "learning_rate": 1.1846149917369232e-06, "loss": 1.0606, "step": 3386 }, { "epoch": 0.85, "learning_rate": 1.1807873565164507e-06, "loss": 1.0773, "step": 3387 }, { "epoch": 0.85, "learning_rate": 1.1769655270527624e-06, "loss": 1.0385, "step": 3388 }, { "epoch": 0.85, "learning_rate": 1.1731495058618013e-06, "loss": 1.061, "step": 3389 }, { "epoch": 0.85, "learning_rate": 1.1693392954556848e-06, "loss": 1.0657, "step": 3390 }, { "epoch": 0.85, "learning_rate": 1.1655348983427007e-06, "loss": 1.0685, "step": 3391 }, { "epoch": 0.85, "learning_rate": 1.1617363170273144e-06, "loss": 1.0284, "step": 3392 }, { "epoch": 0.85, "learning_rate": 1.157943554010167e-06, "loss": 1.1139, "step": 3393 }, { "epoch": 0.85, "learning_rate": 1.1541566117880542e-06, "loss": 1.0715, "step": 3394 }, { "epoch": 0.85, "learning_rate": 1.1503754928539556e-06, "loss": 1.0559, "step": 3395 }, { "epoch": 0.85, "learning_rate": 1.1466001996970145e-06, "loss": 1.0455, "step": 3396 }, { "epoch": 0.85, "learning_rate": 1.1428307348025313e-06, "loss": 1.0432, "step": 3397 }, { "epoch": 0.85, "learning_rate": 1.139067100651976e-06, "loss": 1.0725, "step": 3398 }, { "epoch": 0.85, "learning_rate": 1.1353092997229754e-06, "loss": 1.0662, "step": 3399 }, { "epoch": 0.85, "learning_rate": 1.131557334489326e-06, "loss": 1.0437, "step": 3400 }, { "epoch": 0.85, "learning_rate": 1.1278112074209679e-06, "loss": 1.0708, "step": 3401 }, { "epoch": 0.85, "learning_rate": 1.1240709209840095e-06, "loss": 1.0742, "step": 3402 }, { "epoch": 0.85, "learning_rate": 1.1203364776407156e-06, "loss": 1.069, "step": 3403 }, { "epoch": 0.85, "learning_rate": 1.1166078798494905e-06, "loss": 1.0942, "step": 3404 }, { "epoch": 0.85, "learning_rate": 1.1128851300649058e-06, "loss": 1.0266, "step": 3405 }, { "epoch": 0.85, "learning_rate": 1.1091682307376739e-06, "loss": 1.0737, "step": 3406 }, { "epoch": 0.85, "learning_rate": 1.105457184314661e-06, "loss": 1.0524, "step": 3407 }, { "epoch": 0.85, "learning_rate": 1.1017519932388743e-06, "loss": 1.0347, "step": 3408 }, { "epoch": 0.85, "learning_rate": 1.0980526599494733e-06, "loss": 1.0338, "step": 3409 }, { "epoch": 0.85, "learning_rate": 1.0943591868817581e-06, "loss": 1.0292, "step": 3410 }, { "epoch": 0.85, "learning_rate": 1.0906715764671683e-06, "loss": 1.0395, "step": 3411 }, { "epoch": 0.85, "learning_rate": 1.0869898311332906e-06, "loss": 1.0833, "step": 3412 }, { "epoch": 0.85, "learning_rate": 1.083313953303844e-06, "loss": 1.0468, "step": 3413 }, { "epoch": 0.86, "learning_rate": 1.0796439453986885e-06, "loss": 1.115, "step": 3414 }, { "epoch": 0.86, "learning_rate": 1.075979809833818e-06, "loss": 1.0558, "step": 3415 }, { "epoch": 0.86, "learning_rate": 1.0723215490213635e-06, "loss": 1.0282, "step": 3416 }, { "epoch": 0.86, "learning_rate": 1.0686691653695868e-06, "loss": 1.0704, "step": 3417 }, { "epoch": 0.86, "learning_rate": 1.0650226612828774e-06, "loss": 1.0535, "step": 3418 }, { "epoch": 0.86, "learning_rate": 1.0613820391617635e-06, "loss": 1.0275, "step": 3419 }, { "epoch": 0.86, "learning_rate": 1.0577473014028872e-06, "loss": 1.0452, "step": 3420 }, { "epoch": 0.86, "learning_rate": 1.0541184503990321e-06, "loss": 1.0509, "step": 3421 }, { "epoch": 0.86, "learning_rate": 1.0504954885390961e-06, "loss": 1.0922, "step": 3422 }, { "epoch": 0.86, "learning_rate": 1.0468784182081015e-06, "loss": 1.0202, "step": 3423 }, { "epoch": 0.86, "learning_rate": 1.043267241787198e-06, "loss": 1.0548, "step": 3424 }, { "epoch": 0.86, "learning_rate": 1.0396619616536496e-06, "loss": 1.061, "step": 3425 }, { "epoch": 0.86, "learning_rate": 1.036062580180841e-06, "loss": 1.0473, "step": 3426 }, { "epoch": 0.86, "learning_rate": 1.0324690997382691e-06, "loss": 1.0334, "step": 3427 }, { "epoch": 0.86, "learning_rate": 1.0288815226915572e-06, "loss": 1.0415, "step": 3428 }, { "epoch": 0.86, "learning_rate": 1.0252998514024314e-06, "loss": 1.0541, "step": 3429 }, { "epoch": 0.86, "learning_rate": 1.0217240882287315e-06, "loss": 1.0574, "step": 3430 }, { "epoch": 0.86, "learning_rate": 1.0181542355244167e-06, "loss": 1.0621, "step": 3431 }, { "epoch": 0.86, "learning_rate": 1.0145902956395449e-06, "loss": 1.0986, "step": 3432 }, { "epoch": 0.86, "learning_rate": 1.0110322709202868e-06, "loss": 1.0759, "step": 3433 }, { "epoch": 0.86, "learning_rate": 1.007480163708916e-06, "loss": 1.0393, "step": 3434 }, { "epoch": 0.86, "learning_rate": 1.003933976343816e-06, "loss": 1.0338, "step": 3435 }, { "epoch": 0.86, "learning_rate": 1.0003937111594674e-06, "loss": 1.0557, "step": 3436 }, { "epoch": 0.86, "learning_rate": 9.968593704864526e-07, "loss": 1.0747, "step": 3437 }, { "epoch": 0.86, "learning_rate": 9.933309566514604e-07, "loss": 1.074, "step": 3438 }, { "epoch": 0.86, "learning_rate": 9.898084719772716e-07, "loss": 1.0934, "step": 3439 }, { "epoch": 0.86, "learning_rate": 9.862919187827636e-07, "loss": 1.0552, "step": 3440 }, { "epoch": 0.86, "learning_rate": 9.827812993829112e-07, "loss": 1.0534, "step": 3441 }, { "epoch": 0.86, "learning_rate": 9.792766160887868e-07, "loss": 1.0649, "step": 3442 }, { "epoch": 0.86, "learning_rate": 9.75777871207544e-07, "loss": 1.0799, "step": 3443 }, { "epoch": 0.86, "learning_rate": 9.722850670424378e-07, "loss": 1.0701, "step": 3444 }, { "epoch": 0.86, "learning_rate": 9.687982058928124e-07, "loss": 1.0334, "step": 3445 }, { "epoch": 0.86, "learning_rate": 9.653172900540885e-07, "loss": 1.1033, "step": 3446 }, { "epoch": 0.86, "learning_rate": 9.618423218177842e-07, "loss": 1.0748, "step": 3447 }, { "epoch": 0.86, "learning_rate": 9.583733034714982e-07, "loss": 1.018, "step": 3448 }, { "epoch": 0.86, "learning_rate": 9.549102372989128e-07, "loss": 1.0677, "step": 3449 }, { "epoch": 0.86, "learning_rate": 9.514531255797876e-07, "loss": 1.0764, "step": 3450 }, { "epoch": 0.86, "learning_rate": 9.480019705899723e-07, "loss": 1.1147, "step": 3451 }, { "epoch": 0.86, "learning_rate": 9.445567746013862e-07, "loss": 1.0572, "step": 3452 }, { "epoch": 0.86, "learning_rate": 9.411175398820271e-07, "loss": 1.0739, "step": 3453 }, { "epoch": 0.87, "learning_rate": 9.376842686959742e-07, "loss": 1.0561, "step": 3454 }, { "epoch": 0.87, "learning_rate": 9.342569633033749e-07, "loss": 1.0576, "step": 3455 }, { "epoch": 0.87, "learning_rate": 9.3083562596045e-07, "loss": 1.082, "step": 3456 }, { "epoch": 0.87, "learning_rate": 9.274202589194936e-07, "loss": 1.0284, "step": 3457 }, { "epoch": 0.87, "learning_rate": 9.240108644288714e-07, "loss": 1.1271, "step": 3458 }, { "epoch": 0.87, "learning_rate": 9.206074447330083e-07, "loss": 1.0882, "step": 3459 }, { "epoch": 0.87, "learning_rate": 9.172100020724051e-07, "loss": 1.0477, "step": 3460 }, { "epoch": 0.87, "learning_rate": 9.138185386836285e-07, "loss": 1.0584, "step": 3461 }, { "epoch": 0.87, "learning_rate": 9.104330567993014e-07, "loss": 1.0417, "step": 3462 }, { "epoch": 0.87, "learning_rate": 9.070535586481144e-07, "loss": 1.1084, "step": 3463 }, { "epoch": 0.87, "learning_rate": 9.036800464548157e-07, "loss": 1.041, "step": 3464 }, { "epoch": 0.87, "learning_rate": 9.003125224402198e-07, "loss": 1.086, "step": 3465 }, { "epoch": 0.87, "learning_rate": 8.96950988821188e-07, "loss": 1.0176, "step": 3466 }, { "epoch": 0.87, "learning_rate": 8.935954478106457e-07, "loss": 1.0795, "step": 3467 }, { "epoch": 0.87, "learning_rate": 8.902459016175768e-07, "loss": 1.0397, "step": 3468 }, { "epoch": 0.87, "learning_rate": 8.869023524470078e-07, "loss": 1.0577, "step": 3469 }, { "epoch": 0.87, "learning_rate": 8.835648025000265e-07, "loss": 1.0574, "step": 3470 }, { "epoch": 0.87, "learning_rate": 8.802332539737668e-07, "loss": 1.0714, "step": 3471 }, { "epoch": 0.87, "learning_rate": 8.769077090614142e-07, "loss": 1.0179, "step": 3472 }, { "epoch": 0.87, "learning_rate": 8.735881699521998e-07, "loss": 1.0522, "step": 3473 }, { "epoch": 0.87, "learning_rate": 8.702746388314031e-07, "loss": 1.0521, "step": 3474 }, { "epoch": 0.87, "learning_rate": 8.669671178803485e-07, "loss": 1.0242, "step": 3475 }, { "epoch": 0.87, "learning_rate": 8.636656092763995e-07, "loss": 1.0682, "step": 3476 }, { "epoch": 0.87, "learning_rate": 8.603701151929689e-07, "loss": 1.1036, "step": 3477 }, { "epoch": 0.87, "learning_rate": 8.570806377995056e-07, "loss": 1.039, "step": 3478 }, { "epoch": 0.87, "learning_rate": 8.537971792614974e-07, "loss": 1.0751, "step": 3479 }, { "epoch": 0.87, "learning_rate": 8.505197417404687e-07, "loss": 1.021, "step": 3480 }, { "epoch": 0.87, "learning_rate": 8.472483273939858e-07, "loss": 1.0616, "step": 3481 }, { "epoch": 0.87, "learning_rate": 8.439829383756459e-07, "loss": 1.0843, "step": 3482 }, { "epoch": 0.87, "learning_rate": 8.407235768350786e-07, "loss": 1.0566, "step": 3483 }, { "epoch": 0.87, "learning_rate": 8.374702449179495e-07, "loss": 1.0565, "step": 3484 }, { "epoch": 0.87, "learning_rate": 8.342229447659489e-07, "loss": 1.0553, "step": 3485 }, { "epoch": 0.87, "learning_rate": 8.309816785168035e-07, "loss": 1.0577, "step": 3486 }, { "epoch": 0.87, "learning_rate": 8.277464483042618e-07, "loss": 1.0833, "step": 3487 }, { "epoch": 0.87, "learning_rate": 8.245172562581017e-07, "loss": 1.0536, "step": 3488 }, { "epoch": 0.87, "learning_rate": 8.212941045041267e-07, "loss": 1.054, "step": 3489 }, { "epoch": 0.87, "learning_rate": 8.180769951641632e-07, "loss": 1.0452, "step": 3490 }, { "epoch": 0.87, "learning_rate": 8.14865930356058e-07, "loss": 1.0679, "step": 3491 }, { "epoch": 0.87, "learning_rate": 8.116609121936791e-07, "loss": 1.0364, "step": 3492 }, { "epoch": 0.87, "learning_rate": 8.084619427869178e-07, "loss": 1.0626, "step": 3493 }, { "epoch": 0.88, "learning_rate": 8.052690242416805e-07, "loss": 1.0643, "step": 3494 }, { "epoch": 0.88, "learning_rate": 8.020821586598881e-07, "loss": 1.0579, "step": 3495 }, { "epoch": 0.88, "learning_rate": 7.989013481394813e-07, "loss": 1.0293, "step": 3496 }, { "epoch": 0.88, "learning_rate": 7.957265947744131e-07, "loss": 1.0561, "step": 3497 }, { "epoch": 0.88, "learning_rate": 7.925579006546469e-07, "loss": 1.0538, "step": 3498 }, { "epoch": 0.88, "learning_rate": 7.893952678661576e-07, "loss": 1.0312, "step": 3499 }, { "epoch": 0.88, "learning_rate": 7.862386984909354e-07, "loss": 1.1167, "step": 3500 }, { "epoch": 0.88, "learning_rate": 7.830881946069712e-07, "loss": 1.0725, "step": 3501 }, { "epoch": 0.88, "learning_rate": 7.799437582882663e-07, "loss": 1.0523, "step": 3502 }, { "epoch": 0.88, "learning_rate": 7.768053916048301e-07, "loss": 1.0871, "step": 3503 }, { "epoch": 0.88, "learning_rate": 7.736730966226725e-07, "loss": 1.0844, "step": 3504 }, { "epoch": 0.88, "learning_rate": 7.705468754038093e-07, "loss": 1.0777, "step": 3505 }, { "epoch": 0.88, "learning_rate": 7.674267300062521e-07, "loss": 1.0353, "step": 3506 }, { "epoch": 0.88, "learning_rate": 7.643126624840235e-07, "loss": 1.1084, "step": 3507 }, { "epoch": 0.88, "learning_rate": 7.612046748871327e-07, "loss": 1.0487, "step": 3508 }, { "epoch": 0.88, "learning_rate": 7.58102769261595e-07, "loss": 1.0712, "step": 3509 }, { "epoch": 0.88, "learning_rate": 7.550069476494215e-07, "loss": 1.0659, "step": 3510 }, { "epoch": 0.88, "learning_rate": 7.519172120886098e-07, "loss": 1.0227, "step": 3511 }, { "epoch": 0.88, "learning_rate": 7.488335646131628e-07, "loss": 1.0279, "step": 3512 }, { "epoch": 0.88, "learning_rate": 7.457560072530668e-07, "loss": 1.0485, "step": 3513 }, { "epoch": 0.88, "learning_rate": 7.426845420343032e-07, "loss": 1.0842, "step": 3514 }, { "epoch": 0.88, "learning_rate": 7.396191709788392e-07, "loss": 1.0454, "step": 3515 }, { "epoch": 0.88, "learning_rate": 7.365598961046361e-07, "loss": 1.0566, "step": 3516 }, { "epoch": 0.88, "learning_rate": 7.335067194256373e-07, "loss": 1.0165, "step": 3517 }, { "epoch": 0.88, "learning_rate": 7.304596429517708e-07, "loss": 1.085, "step": 3518 }, { "epoch": 0.88, "learning_rate": 7.274186686889539e-07, "loss": 1.0842, "step": 3519 }, { "epoch": 0.88, "learning_rate": 7.243837986390822e-07, "loss": 1.0636, "step": 3520 }, { "epoch": 0.88, "learning_rate": 7.213550348000354e-07, "loss": 1.0198, "step": 3521 }, { "epoch": 0.88, "learning_rate": 7.183323791656694e-07, "loss": 1.0693, "step": 3522 }, { "epoch": 0.88, "learning_rate": 7.153158337258259e-07, "loss": 1.0519, "step": 3523 }, { "epoch": 0.88, "learning_rate": 7.1230540046632e-07, "loss": 1.0862, "step": 3524 }, { "epoch": 0.88, "learning_rate": 7.093010813689405e-07, "loss": 1.0615, "step": 3525 }, { "epoch": 0.88, "learning_rate": 7.063028784114567e-07, "loss": 1.0606, "step": 3526 }, { "epoch": 0.88, "learning_rate": 7.033107935676098e-07, "loss": 1.0836, "step": 3527 }, { "epoch": 0.88, "learning_rate": 7.003248288071118e-07, "loss": 1.0732, "step": 3528 }, { "epoch": 0.88, "learning_rate": 6.973449860956438e-07, "loss": 1.0723, "step": 3529 }, { "epoch": 0.88, "learning_rate": 6.943712673948643e-07, "loss": 1.0595, "step": 3530 }, { "epoch": 0.88, "learning_rate": 6.914036746623932e-07, "loss": 1.0503, "step": 3531 }, { "epoch": 0.88, "learning_rate": 6.884422098518184e-07, "loss": 1.0888, "step": 3532 }, { "epoch": 0.88, "learning_rate": 6.854868749127008e-07, "loss": 1.0344, "step": 3533 }, { "epoch": 0.89, "learning_rate": 6.82537671790554e-07, "loss": 1.035, "step": 3534 }, { "epoch": 0.89, "learning_rate": 6.795946024268674e-07, "loss": 1.0605, "step": 3535 }, { "epoch": 0.89, "learning_rate": 6.76657668759082e-07, "loss": 1.028, "step": 3536 }, { "epoch": 0.89, "learning_rate": 6.737268727206058e-07, "loss": 1.0372, "step": 3537 }, { "epoch": 0.89, "learning_rate": 6.708022162408056e-07, "loss": 1.0599, "step": 3538 }, { "epoch": 0.89, "learning_rate": 6.678837012450057e-07, "loss": 1.0409, "step": 3539 }, { "epoch": 0.89, "learning_rate": 6.649713296544869e-07, "loss": 1.0589, "step": 3540 }, { "epoch": 0.89, "learning_rate": 6.620651033864844e-07, "loss": 1.104, "step": 3541 }, { "epoch": 0.89, "learning_rate": 6.591650243541925e-07, "loss": 1.0401, "step": 3542 }, { "epoch": 0.89, "learning_rate": 6.562710944667561e-07, "loss": 1.0635, "step": 3543 }, { "epoch": 0.89, "learning_rate": 6.53383315629268e-07, "loss": 1.0322, "step": 3544 }, { "epoch": 0.89, "learning_rate": 6.505016897427807e-07, "loss": 1.0485, "step": 3545 }, { "epoch": 0.89, "learning_rate": 6.476262187042892e-07, "loss": 1.0711, "step": 3546 }, { "epoch": 0.89, "learning_rate": 6.447569044067381e-07, "loss": 1.0975, "step": 3547 }, { "epoch": 0.89, "learning_rate": 6.418937487390175e-07, "loss": 1.0504, "step": 3548 }, { "epoch": 0.89, "learning_rate": 6.390367535859698e-07, "loss": 1.0358, "step": 3549 }, { "epoch": 0.89, "learning_rate": 6.361859208283727e-07, "loss": 1.0505, "step": 3550 }, { "epoch": 0.89, "learning_rate": 6.333412523429539e-07, "loss": 1.0357, "step": 3551 }, { "epoch": 0.89, "learning_rate": 6.305027500023841e-07, "loss": 1.105, "step": 3552 }, { "epoch": 0.89, "learning_rate": 6.276704156752644e-07, "loss": 1.079, "step": 3553 }, { "epoch": 0.89, "learning_rate": 6.248442512261487e-07, "loss": 1.0862, "step": 3554 }, { "epoch": 0.89, "learning_rate": 6.220242585155223e-07, "loss": 1.0538, "step": 3555 }, { "epoch": 0.89, "learning_rate": 6.192104393998066e-07, "loss": 1.0649, "step": 3556 }, { "epoch": 0.89, "learning_rate": 6.164027957313601e-07, "loss": 1.0269, "step": 3557 }, { "epoch": 0.89, "learning_rate": 6.136013293584775e-07, "loss": 1.032, "step": 3558 }, { "epoch": 0.89, "learning_rate": 6.108060421253902e-07, "loss": 1.0275, "step": 3559 }, { "epoch": 0.89, "learning_rate": 6.08016935872251e-07, "loss": 1.0802, "step": 3560 }, { "epoch": 0.89, "learning_rate": 6.052340124351552e-07, "loss": 1.0801, "step": 3561 }, { "epoch": 0.89, "learning_rate": 6.024572736461221e-07, "loss": 1.071, "step": 3562 }, { "epoch": 0.89, "learning_rate": 5.996867213330993e-07, "loss": 1.0685, "step": 3563 }, { "epoch": 0.89, "learning_rate": 5.969223573199623e-07, "loss": 1.0747, "step": 3564 }, { "epoch": 0.89, "learning_rate": 5.941641834265166e-07, "loss": 1.1038, "step": 3565 }, { "epoch": 0.89, "learning_rate": 5.914122014684875e-07, "loss": 1.0721, "step": 3566 }, { "epoch": 0.89, "learning_rate": 5.88666413257526e-07, "loss": 1.062, "step": 3567 }, { "epoch": 0.89, "learning_rate": 5.859268206012092e-07, "loss": 1.0777, "step": 3568 }, { "epoch": 0.89, "learning_rate": 5.831934253030292e-07, "loss": 1.0854, "step": 3569 }, { "epoch": 0.89, "learning_rate": 5.804662291624019e-07, "loss": 1.0391, "step": 3570 }, { "epoch": 0.89, "learning_rate": 5.777452339746626e-07, "loss": 1.0201, "step": 3571 }, { "epoch": 0.89, "learning_rate": 5.750304415310659e-07, "loss": 1.0371, "step": 3572 }, { "epoch": 0.89, "learning_rate": 5.723218536187747e-07, "loss": 1.0878, "step": 3573 }, { "epoch": 0.9, "learning_rate": 5.696194720208792e-07, "loss": 1.0674, "step": 3574 }, { "epoch": 0.9, "learning_rate": 5.669232985163787e-07, "loss": 1.0571, "step": 3575 }, { "epoch": 0.9, "learning_rate": 5.64233334880181e-07, "loss": 1.0908, "step": 3576 }, { "epoch": 0.9, "learning_rate": 5.61549582883113e-07, "loss": 1.09, "step": 3577 }, { "epoch": 0.9, "learning_rate": 5.588720442919104e-07, "loss": 1.0067, "step": 3578 }, { "epoch": 0.9, "learning_rate": 5.562007208692166e-07, "loss": 1.0434, "step": 3579 }, { "epoch": 0.9, "learning_rate": 5.535356143735838e-07, "loss": 1.0233, "step": 3580 }, { "epoch": 0.9, "learning_rate": 5.50876726559475e-07, "loss": 1.037, "step": 3581 }, { "epoch": 0.9, "learning_rate": 5.482240591772547e-07, "loss": 1.0411, "step": 3582 }, { "epoch": 0.9, "learning_rate": 5.455776139731939e-07, "loss": 1.0364, "step": 3583 }, { "epoch": 0.9, "learning_rate": 5.429373926894721e-07, "loss": 1.1161, "step": 3584 }, { "epoch": 0.9, "learning_rate": 5.403033970641647e-07, "loss": 1.0364, "step": 3585 }, { "epoch": 0.9, "learning_rate": 5.376756288312524e-07, "loss": 1.0885, "step": 3586 }, { "epoch": 0.9, "learning_rate": 5.350540897206146e-07, "loss": 1.0886, "step": 3587 }, { "epoch": 0.9, "learning_rate": 5.324387814580323e-07, "loss": 1.107, "step": 3588 }, { "epoch": 0.9, "learning_rate": 5.298297057651836e-07, "loss": 1.0582, "step": 3589 }, { "epoch": 0.9, "learning_rate": 5.272268643596412e-07, "loss": 1.0655, "step": 3590 }, { "epoch": 0.9, "learning_rate": 5.246302589548791e-07, "loss": 1.1039, "step": 3591 }, { "epoch": 0.9, "learning_rate": 5.22039891260262e-07, "loss": 1.0703, "step": 3592 }, { "epoch": 0.9, "learning_rate": 5.194557629810482e-07, "loss": 1.0349, "step": 3593 }, { "epoch": 0.9, "learning_rate": 5.16877875818389e-07, "loss": 1.1, "step": 3594 }, { "epoch": 0.9, "learning_rate": 5.143062314693303e-07, "loss": 1.0523, "step": 3595 }, { "epoch": 0.9, "learning_rate": 5.117408316268047e-07, "loss": 1.049, "step": 3596 }, { "epoch": 0.9, "learning_rate": 5.091816779796321e-07, "loss": 1.064, "step": 3597 }, { "epoch": 0.9, "learning_rate": 5.066287722125296e-07, "loss": 1.0659, "step": 3598 }, { "epoch": 0.9, "learning_rate": 5.040821160060893e-07, "loss": 1.0429, "step": 3599 }, { "epoch": 0.9, "learning_rate": 5.01541711036797e-07, "loss": 1.0326, "step": 3600 }, { "epoch": 0.9, "learning_rate": 4.990075589770226e-07, "loss": 1.0426, "step": 3601 }, { "epoch": 0.9, "learning_rate": 4.964796614950151e-07, "loss": 1.0407, "step": 3602 }, { "epoch": 0.9, "learning_rate": 4.939580202549122e-07, "loss": 1.0618, "step": 3603 }, { "epoch": 0.9, "learning_rate": 4.914426369167291e-07, "loss": 1.0292, "step": 3604 }, { "epoch": 0.9, "learning_rate": 4.889335131363615e-07, "loss": 1.047, "step": 3605 }, { "epoch": 0.9, "learning_rate": 4.864306505655858e-07, "loss": 1.0592, "step": 3606 }, { "epoch": 0.9, "learning_rate": 4.839340508520563e-07, "loss": 1.0851, "step": 3607 }, { "epoch": 0.9, "learning_rate": 4.814437156393048e-07, "loss": 1.0735, "step": 3608 }, { "epoch": 0.9, "learning_rate": 4.789596465667357e-07, "loss": 1.0665, "step": 3609 }, { "epoch": 0.9, "learning_rate": 4.7648184526963516e-07, "loss": 1.0149, "step": 3610 }, { "epoch": 0.9, "learning_rate": 4.740103133791574e-07, "loss": 1.0435, "step": 3611 }, { "epoch": 0.9, "learning_rate": 4.7154505252232976e-07, "loss": 1.0385, "step": 3612 }, { "epoch": 0.9, "learning_rate": 4.6908606432205404e-07, "loss": 1.0889, "step": 3613 }, { "epoch": 0.91, "learning_rate": 4.666333503971032e-07, "loss": 1.083, "step": 3614 }, { "epoch": 0.91, "learning_rate": 4.641869123621145e-07, "loss": 1.065, "step": 3615 }, { "epoch": 0.91, "learning_rate": 4.6174675182759755e-07, "loss": 1.0337, "step": 3616 }, { "epoch": 0.91, "learning_rate": 4.59312870399935e-07, "loss": 1.037, "step": 3617 }, { "epoch": 0.91, "learning_rate": 4.5688526968136193e-07, "loss": 1.0523, "step": 3618 }, { "epoch": 0.91, "learning_rate": 4.544639512699922e-07, "loss": 1.0836, "step": 3619 }, { "epoch": 0.91, "learning_rate": 4.5204891675979746e-07, "loss": 1.0322, "step": 3620 }, { "epoch": 0.91, "learning_rate": 4.496401677406126e-07, "loss": 1.1007, "step": 3621 }, { "epoch": 0.91, "learning_rate": 4.47237705798137e-07, "loss": 1.0931, "step": 3622 }, { "epoch": 0.91, "learning_rate": 4.4484153251392883e-07, "loss": 1.0486, "step": 3623 }, { "epoch": 0.91, "learning_rate": 4.424516494654119e-07, "loss": 1.0965, "step": 3624 }, { "epoch": 0.91, "learning_rate": 4.4006805822585985e-07, "loss": 1.0354, "step": 3625 }, { "epoch": 0.91, "learning_rate": 4.3769076036441316e-07, "loss": 1.0755, "step": 3626 }, { "epoch": 0.91, "learning_rate": 4.3531975744606436e-07, "loss": 1.0722, "step": 3627 }, { "epoch": 0.91, "learning_rate": 4.329550510316627e-07, "loss": 1.0623, "step": 3628 }, { "epoch": 0.91, "learning_rate": 4.305966426779118e-07, "loss": 1.087, "step": 3629 }, { "epoch": 0.91, "learning_rate": 4.282445339373731e-07, "loss": 1.0567, "step": 3630 }, { "epoch": 0.91, "learning_rate": 4.258987263584569e-07, "loss": 1.0404, "step": 3631 }, { "epoch": 0.91, "learning_rate": 4.2355922148542563e-07, "loss": 1.049, "step": 3632 }, { "epoch": 0.91, "learning_rate": 4.2122602085839625e-07, "loss": 1.09, "step": 3633 }, { "epoch": 0.91, "learning_rate": 4.1889912601333127e-07, "loss": 1.0852, "step": 3634 }, { "epoch": 0.91, "learning_rate": 4.165785384820442e-07, "loss": 1.0164, "step": 3635 }, { "epoch": 0.91, "learning_rate": 4.142642597921953e-07, "loss": 1.0843, "step": 3636 }, { "epoch": 0.91, "learning_rate": 4.1195629146729366e-07, "loss": 1.0518, "step": 3637 }, { "epoch": 0.91, "learning_rate": 4.0965463502669165e-07, "loss": 1.0226, "step": 3638 }, { "epoch": 0.91, "learning_rate": 4.073592919855873e-07, "loss": 1.0489, "step": 3639 }, { "epoch": 0.91, "learning_rate": 4.0507026385502747e-07, "loss": 1.0817, "step": 3640 }, { "epoch": 0.91, "learning_rate": 4.0278755214189e-07, "loss": 1.0322, "step": 3641 }, { "epoch": 0.91, "learning_rate": 4.005111583489074e-07, "loss": 1.0499, "step": 3642 }, { "epoch": 0.91, "learning_rate": 3.9824108397464646e-07, "loss": 1.0494, "step": 3643 }, { "epoch": 0.91, "learning_rate": 3.9597733051351286e-07, "loss": 1.0544, "step": 3644 }, { "epoch": 0.91, "learning_rate": 3.937198994557556e-07, "loss": 1.072, "step": 3645 }, { "epoch": 0.91, "learning_rate": 3.9146879228745827e-07, "loss": 1.0218, "step": 3646 }, { "epoch": 0.91, "learning_rate": 3.8922401049054206e-07, "loss": 1.0703, "step": 3647 }, { "epoch": 0.91, "learning_rate": 3.8698555554276375e-07, "loss": 1.0373, "step": 3648 }, { "epoch": 0.91, "learning_rate": 3.8475342891771795e-07, "loss": 1.0556, "step": 3649 }, { "epoch": 0.91, "learning_rate": 3.8252763208483036e-07, "loss": 1.0362, "step": 3650 }, { "epoch": 0.91, "learning_rate": 3.8030816650935777e-07, "loss": 1.0534, "step": 3651 }, { "epoch": 0.91, "learning_rate": 3.7809503365239696e-07, "loss": 1.079, "step": 3652 }, { "epoch": 0.91, "learning_rate": 3.758882349708659e-07, "loss": 1.081, "step": 3653 }, { "epoch": 0.92, "learning_rate": 3.7368777191752024e-07, "loss": 1.068, "step": 3654 }, { "epoch": 0.92, "learning_rate": 3.714936459409402e-07, "loss": 1.055, "step": 3655 }, { "epoch": 0.92, "learning_rate": 3.693058584855369e-07, "loss": 1.063, "step": 3656 }, { "epoch": 0.92, "learning_rate": 3.671244109915495e-07, "loss": 1.0648, "step": 3657 }, { "epoch": 0.92, "learning_rate": 3.6494930489503813e-07, "loss": 1.0572, "step": 3658 }, { "epoch": 0.92, "learning_rate": 3.6278054162789623e-07, "loss": 1.0646, "step": 3659 }, { "epoch": 0.92, "learning_rate": 3.6061812261783623e-07, "loss": 1.0532, "step": 3660 }, { "epoch": 0.92, "learning_rate": 3.584620492883939e-07, "loss": 1.0678, "step": 3661 }, { "epoch": 0.92, "learning_rate": 3.5631232305893047e-07, "loss": 1.0347, "step": 3662 }, { "epoch": 0.92, "learning_rate": 3.5416894534463064e-07, "loss": 1.0505, "step": 3663 }, { "epoch": 0.92, "learning_rate": 3.5203191755649014e-07, "loss": 1.0235, "step": 3664 }, { "epoch": 0.92, "learning_rate": 3.499012411013358e-07, "loss": 1.0492, "step": 3665 }, { "epoch": 0.92, "learning_rate": 3.477769173818091e-07, "loss": 1.0444, "step": 3666 }, { "epoch": 0.92, "learning_rate": 3.4565894779636564e-07, "loss": 1.0409, "step": 3667 }, { "epoch": 0.92, "learning_rate": 3.435473337392836e-07, "loss": 1.0723, "step": 3668 }, { "epoch": 0.92, "learning_rate": 3.4144207660065545e-07, "loss": 1.0657, "step": 3669 }, { "epoch": 0.92, "learning_rate": 3.3934317776638694e-07, "loss": 1.0534, "step": 3670 }, { "epoch": 0.92, "learning_rate": 3.372506386181995e-07, "loss": 1.0671, "step": 3671 }, { "epoch": 0.92, "learning_rate": 3.3516446053363015e-07, "loss": 1.0399, "step": 3672 }, { "epoch": 0.92, "learning_rate": 3.3308464488602587e-07, "loss": 1.1033, "step": 3673 }, { "epoch": 0.92, "learning_rate": 3.3101119304454253e-07, "loss": 1.0598, "step": 3674 }, { "epoch": 0.92, "learning_rate": 3.2894410637415277e-07, "loss": 1.0131, "step": 3675 }, { "epoch": 0.92, "learning_rate": 3.2688338623563575e-07, "loss": 1.0265, "step": 3676 }, { "epoch": 0.92, "learning_rate": 3.2482903398557865e-07, "loss": 1.0619, "step": 3677 }, { "epoch": 0.92, "learning_rate": 3.2278105097637735e-07, "loss": 1.0982, "step": 3678 }, { "epoch": 0.92, "learning_rate": 3.2073943855623677e-07, "loss": 1.0345, "step": 3679 }, { "epoch": 0.92, "learning_rate": 3.187041980691652e-07, "loss": 1.0607, "step": 3680 }, { "epoch": 0.92, "learning_rate": 3.166753308549775e-07, "loss": 1.0739, "step": 3681 }, { "epoch": 0.92, "learning_rate": 3.1465283824929413e-07, "loss": 1.0542, "step": 3682 }, { "epoch": 0.92, "learning_rate": 3.1263672158353465e-07, "loss": 1.0513, "step": 3683 }, { "epoch": 0.92, "learning_rate": 3.106269821849273e-07, "loss": 1.0516, "step": 3684 }, { "epoch": 0.92, "learning_rate": 3.086236213764981e-07, "loss": 1.0837, "step": 3685 }, { "epoch": 0.92, "learning_rate": 3.0662664047707566e-07, "loss": 1.0464, "step": 3686 }, { "epoch": 0.92, "learning_rate": 3.046360408012883e-07, "loss": 1.0956, "step": 3687 }, { "epoch": 0.92, "learning_rate": 3.0265182365956213e-07, "loss": 1.0675, "step": 3688 }, { "epoch": 0.92, "learning_rate": 3.006739903581257e-07, "loss": 1.0423, "step": 3689 }, { "epoch": 0.92, "learning_rate": 2.9870254219899753e-07, "loss": 1.0881, "step": 3690 }, { "epoch": 0.92, "learning_rate": 2.9673748048000273e-07, "loss": 1.0394, "step": 3691 }, { "epoch": 0.92, "learning_rate": 2.947788064947532e-07, "loss": 1.0909, "step": 3692 }, { "epoch": 0.92, "learning_rate": 2.928265215326598e-07, "loss": 1.0541, "step": 3693 }, { "epoch": 0.93, "learning_rate": 2.908806268789277e-07, "loss": 0.9983, "step": 3694 }, { "epoch": 0.93, "learning_rate": 2.889411238145545e-07, "loss": 1.055, "step": 3695 }, { "epoch": 0.93, "learning_rate": 2.870080136163311e-07, "loss": 1.025, "step": 3696 }, { "epoch": 0.93, "learning_rate": 2.8508129755683734e-07, "loss": 1.0658, "step": 3697 }, { "epoch": 0.93, "learning_rate": 2.831609769044463e-07, "loss": 1.0341, "step": 3698 }, { "epoch": 0.93, "learning_rate": 2.8124705292332135e-07, "loss": 1.0429, "step": 3699 }, { "epoch": 0.93, "learning_rate": 2.7933952687341224e-07, "loss": 1.0499, "step": 3700 }, { "epoch": 0.93, "learning_rate": 2.7743840001045683e-07, "loss": 1.05, "step": 3701 }, { "epoch": 0.93, "learning_rate": 2.755436735859862e-07, "loss": 1.053, "step": 3702 }, { "epoch": 0.93, "learning_rate": 2.736553488473093e-07, "loss": 1.1142, "step": 3703 }, { "epoch": 0.93, "learning_rate": 2.717734270375272e-07, "loss": 1.0635, "step": 3704 }, { "epoch": 0.93, "learning_rate": 2.698979093955245e-07, "loss": 1.0888, "step": 3705 }, { "epoch": 0.93, "learning_rate": 2.6802879715596585e-07, "loss": 1.0862, "step": 3706 }, { "epoch": 0.93, "learning_rate": 2.6616609154930586e-07, "loss": 1.0815, "step": 3707 }, { "epoch": 0.93, "learning_rate": 2.643097938017769e-07, "loss": 1.07, "step": 3708 }, { "epoch": 0.93, "learning_rate": 2.624599051353938e-07, "loss": 0.9936, "step": 3709 }, { "epoch": 0.93, "learning_rate": 2.606164267679545e-07, "loss": 1.0738, "step": 3710 }, { "epoch": 0.93, "learning_rate": 2.587793599130339e-07, "loss": 1.0492, "step": 3711 }, { "epoch": 0.93, "learning_rate": 2.56948705779988e-07, "loss": 1.0283, "step": 3712 }, { "epoch": 0.93, "learning_rate": 2.5512446557394933e-07, "loss": 1.0335, "step": 3713 }, { "epoch": 0.93, "learning_rate": 2.5330664049583066e-07, "loss": 1.0328, "step": 3714 }, { "epoch": 0.93, "learning_rate": 2.514952317423225e-07, "loss": 1.0329, "step": 3715 }, { "epoch": 0.93, "learning_rate": 2.4969024050588543e-07, "loss": 1.0652, "step": 3716 }, { "epoch": 0.93, "learning_rate": 2.478916679747623e-07, "loss": 1.0808, "step": 3717 }, { "epoch": 0.93, "learning_rate": 2.4609951533296595e-07, "loss": 1.0861, "step": 3718 }, { "epoch": 0.93, "learning_rate": 2.4431378376028603e-07, "loss": 1.0332, "step": 3719 }, { "epoch": 0.93, "learning_rate": 2.4253447443228106e-07, "loss": 1.0491, "step": 3720 }, { "epoch": 0.93, "learning_rate": 2.4076158852028743e-07, "loss": 1.0861, "step": 3721 }, { "epoch": 0.93, "learning_rate": 2.389951271914082e-07, "loss": 1.0827, "step": 3722 }, { "epoch": 0.93, "learning_rate": 2.3723509160851867e-07, "loss": 1.0328, "step": 3723 }, { "epoch": 0.93, "learning_rate": 2.354814829302643e-07, "loss": 1.0671, "step": 3724 }, { "epoch": 0.93, "learning_rate": 2.3373430231105943e-07, "loss": 1.0439, "step": 3725 }, { "epoch": 0.93, "learning_rate": 2.319935509010862e-07, "loss": 1.0398, "step": 3726 }, { "epoch": 0.93, "learning_rate": 2.3025922984629577e-07, "loss": 1.0306, "step": 3727 }, { "epoch": 0.93, "learning_rate": 2.2853134028840594e-07, "loss": 1.025, "step": 3728 }, { "epoch": 0.93, "learning_rate": 2.2680988336489685e-07, "loss": 1.0412, "step": 3729 }, { "epoch": 0.93, "learning_rate": 2.250948602090197e-07, "loss": 1.0265, "step": 3730 }, { "epoch": 0.93, "learning_rate": 2.2338627194978812e-07, "loss": 1.0584, "step": 3731 }, { "epoch": 0.93, "learning_rate": 2.216841197119768e-07, "loss": 1.0695, "step": 3732 }, { "epoch": 0.93, "learning_rate": 2.1998840461612825e-07, "loss": 1.093, "step": 3733 }, { "epoch": 0.94, "learning_rate": 2.18299127778544e-07, "loss": 1.0212, "step": 3734 }, { "epoch": 0.94, "learning_rate": 2.1661629031128784e-07, "loss": 1.1022, "step": 3735 }, { "epoch": 0.94, "learning_rate": 2.1493989332218468e-07, "loss": 1.052, "step": 3736 }, { "epoch": 0.94, "learning_rate": 2.1326993791482175e-07, "loss": 1.038, "step": 3737 }, { "epoch": 0.94, "learning_rate": 2.116064251885408e-07, "loss": 1.0739, "step": 3738 }, { "epoch": 0.94, "learning_rate": 2.099493562384469e-07, "loss": 1.0372, "step": 3739 }, { "epoch": 0.94, "learning_rate": 2.0829873215540308e-07, "loss": 1.0455, "step": 3740 }, { "epoch": 0.94, "learning_rate": 2.0665455402602563e-07, "loss": 1.0374, "step": 3741 }, { "epoch": 0.94, "learning_rate": 2.0501682293269099e-07, "loss": 1.0171, "step": 3742 }, { "epoch": 0.94, "learning_rate": 2.0338553995352893e-07, "loss": 1.0587, "step": 3743 }, { "epoch": 0.94, "learning_rate": 2.0176070616242828e-07, "loss": 1.0615, "step": 3744 }, { "epoch": 0.94, "learning_rate": 2.0014232262902887e-07, "loss": 1.0302, "step": 3745 }, { "epoch": 0.94, "learning_rate": 1.9853039041872412e-07, "loss": 1.0634, "step": 3746 }, { "epoch": 0.94, "learning_rate": 1.9692491059266405e-07, "loss": 1.0769, "step": 3747 }, { "epoch": 0.94, "learning_rate": 1.9532588420774546e-07, "loss": 1.0182, "step": 3748 }, { "epoch": 0.94, "learning_rate": 1.9373331231662184e-07, "loss": 1.0678, "step": 3749 }, { "epoch": 0.94, "learning_rate": 1.921471959676957e-07, "loss": 1.0477, "step": 3750 }, { "epoch": 0.94, "learning_rate": 1.905675362051196e-07, "loss": 1.0612, "step": 3751 }, { "epoch": 0.94, "learning_rate": 1.889943340687961e-07, "loss": 1.0139, "step": 3752 }, { "epoch": 0.94, "learning_rate": 1.8742759059437564e-07, "loss": 1.0503, "step": 3753 }, { "epoch": 0.94, "learning_rate": 1.8586730681325992e-07, "loss": 1.0709, "step": 3754 }, { "epoch": 0.94, "learning_rate": 1.8431348375259396e-07, "loss": 1.0675, "step": 3755 }, { "epoch": 0.94, "learning_rate": 1.827661224352717e-07, "loss": 1.0302, "step": 3756 }, { "epoch": 0.94, "learning_rate": 1.812252238799339e-07, "loss": 1.0401, "step": 3757 }, { "epoch": 0.94, "learning_rate": 1.7969078910096582e-07, "loss": 1.0419, "step": 3758 }, { "epoch": 0.94, "learning_rate": 1.7816281910849831e-07, "loss": 1.0371, "step": 3759 }, { "epoch": 0.94, "learning_rate": 1.7664131490840676e-07, "loss": 1.0594, "step": 3760 }, { "epoch": 0.94, "learning_rate": 1.7512627750230772e-07, "loss": 1.0558, "step": 3761 }, { "epoch": 0.94, "learning_rate": 1.7361770788756115e-07, "loss": 1.0552, "step": 3762 }, { "epoch": 0.94, "learning_rate": 1.721156070572738e-07, "loss": 1.083, "step": 3763 }, { "epoch": 0.94, "learning_rate": 1.70619976000288e-07, "loss": 1.0639, "step": 3764 }, { "epoch": 0.94, "learning_rate": 1.6913081570118838e-07, "loss": 1.0178, "step": 3765 }, { "epoch": 0.94, "learning_rate": 1.676481271403041e-07, "loss": 1.0753, "step": 3766 }, { "epoch": 0.94, "learning_rate": 1.661719112936988e-07, "loss": 1.0808, "step": 3767 }, { "epoch": 0.94, "learning_rate": 1.6470216913317628e-07, "loss": 1.0207, "step": 3768 }, { "epoch": 0.94, "learning_rate": 1.632389016262792e-07, "loss": 1.1015, "step": 3769 }, { "epoch": 0.94, "learning_rate": 1.617821097362915e-07, "loss": 1.0328, "step": 3770 }, { "epoch": 0.94, "learning_rate": 1.6033179442222602e-07, "loss": 1.0375, "step": 3771 }, { "epoch": 0.94, "learning_rate": 1.5888795663883904e-07, "loss": 1.0294, "step": 3772 }, { "epoch": 0.94, "learning_rate": 1.5745059733662028e-07, "loss": 1.0267, "step": 3773 }, { "epoch": 0.95, "learning_rate": 1.560197174617939e-07, "loss": 1.0865, "step": 3774 }, { "epoch": 0.95, "learning_rate": 1.5459531795632087e-07, "loss": 1.0739, "step": 3775 }, { "epoch": 0.95, "learning_rate": 1.5317739975789225e-07, "loss": 1.0208, "step": 3776 }, { "epoch": 0.95, "learning_rate": 1.5176596379993692e-07, "loss": 1.0583, "step": 3777 }, { "epoch": 0.95, "learning_rate": 1.503610110116127e-07, "loss": 1.0638, "step": 3778 }, { "epoch": 0.95, "learning_rate": 1.4896254231781093e-07, "loss": 1.0329, "step": 3779 }, { "epoch": 0.95, "learning_rate": 1.4757055863915627e-07, "loss": 1.0562, "step": 3780 }, { "epoch": 0.95, "learning_rate": 1.4618506089200014e-07, "loss": 1.0299, "step": 3781 }, { "epoch": 0.95, "learning_rate": 1.448060499884285e-07, "loss": 1.0466, "step": 3782 }, { "epoch": 0.95, "learning_rate": 1.4343352683625412e-07, "loss": 1.0445, "step": 3783 }, { "epoch": 0.95, "learning_rate": 1.4206749233902084e-07, "loss": 1.0633, "step": 3784 }, { "epoch": 0.95, "learning_rate": 1.4070794739599713e-07, "loss": 1.0375, "step": 3785 }, { "epoch": 0.95, "learning_rate": 1.3935489290218485e-07, "loss": 1.0789, "step": 3786 }, { "epoch": 0.95, "learning_rate": 1.3800832974830925e-07, "loss": 1.0894, "step": 3787 }, { "epoch": 0.95, "learning_rate": 1.3666825882082347e-07, "loss": 1.0614, "step": 3788 }, { "epoch": 0.95, "learning_rate": 1.3533468100190738e-07, "loss": 1.0406, "step": 3789 }, { "epoch": 0.95, "learning_rate": 1.3400759716946433e-07, "loss": 1.0912, "step": 3790 }, { "epoch": 0.95, "learning_rate": 1.3268700819712543e-07, "loss": 1.054, "step": 3791 }, { "epoch": 0.95, "learning_rate": 1.313729149542442e-07, "loss": 1.0446, "step": 3792 }, { "epoch": 0.95, "learning_rate": 1.3006531830589751e-07, "loss": 1.0445, "step": 3793 }, { "epoch": 0.95, "learning_rate": 1.2876421911288906e-07, "loss": 1.0577, "step": 3794 }, { "epoch": 0.95, "learning_rate": 1.274696182317392e-07, "loss": 1.0232, "step": 3795 }, { "epoch": 0.95, "learning_rate": 1.2618151651469735e-07, "loss": 1.0802, "step": 3796 }, { "epoch": 0.95, "learning_rate": 1.248999148097274e-07, "loss": 1.0695, "step": 3797 }, { "epoch": 0.95, "learning_rate": 1.2362481396052118e-07, "loss": 1.0505, "step": 3798 }, { "epoch": 0.95, "learning_rate": 1.2235621480648497e-07, "loss": 1.0704, "step": 3799 }, { "epoch": 0.95, "learning_rate": 1.2109411818274851e-07, "loss": 1.0865, "step": 3800 }, { "epoch": 0.95, "learning_rate": 1.1983852492016057e-07, "loss": 1.0363, "step": 3801 }, { "epoch": 0.95, "learning_rate": 1.1858943584528771e-07, "loss": 1.0396, "step": 3802 }, { "epoch": 0.95, "learning_rate": 1.1734685178041439e-07, "loss": 1.0633, "step": 3803 }, { "epoch": 0.95, "learning_rate": 1.1611077354354406e-07, "loss": 1.0875, "step": 3804 }, { "epoch": 0.95, "learning_rate": 1.148812019483958e-07, "loss": 1.0387, "step": 3805 }, { "epoch": 0.95, "learning_rate": 1.1365813780440771e-07, "loss": 1.0664, "step": 3806 }, { "epoch": 0.95, "learning_rate": 1.1244158191673126e-07, "loss": 1.0796, "step": 3807 }, { "epoch": 0.95, "learning_rate": 1.1123153508623474e-07, "loss": 1.0573, "step": 3808 }, { "epoch": 0.95, "learning_rate": 1.1002799810950315e-07, "loss": 1.0344, "step": 3809 }, { "epoch": 0.95, "learning_rate": 1.0883097177883162e-07, "loss": 1.0705, "step": 3810 }, { "epoch": 0.95, "learning_rate": 1.0764045688223424e-07, "loss": 1.0566, "step": 3811 }, { "epoch": 0.95, "learning_rate": 1.064564542034352e-07, "loss": 1.0206, "step": 3812 }, { "epoch": 0.95, "learning_rate": 1.0527896452187325e-07, "loss": 1.0648, "step": 3813 }, { "epoch": 0.96, "learning_rate": 1.0410798861269834e-07, "loss": 1.0664, "step": 3814 }, { "epoch": 0.96, "learning_rate": 1.029435272467727e-07, "loss": 1.0771, "step": 3815 }, { "epoch": 0.96, "learning_rate": 1.0178558119067316e-07, "loss": 1.0484, "step": 3816 }, { "epoch": 0.96, "learning_rate": 1.0063415120668107e-07, "loss": 1.0583, "step": 3817 }, { "epoch": 0.96, "learning_rate": 9.948923805279453e-08, "loss": 1.0884, "step": 3818 }, { "epoch": 0.96, "learning_rate": 9.835084248271843e-08, "loss": 1.0582, "step": 3819 }, { "epoch": 0.96, "learning_rate": 9.721896524586439e-08, "loss": 1.097, "step": 3820 }, { "epoch": 0.96, "learning_rate": 9.609360708736082e-08, "loss": 1.0897, "step": 3821 }, { "epoch": 0.96, "learning_rate": 9.497476874803624e-08, "loss": 1.0819, "step": 3822 }, { "epoch": 0.96, "learning_rate": 9.386245096443259e-08, "loss": 1.0912, "step": 3823 }, { "epoch": 0.96, "learning_rate": 9.275665446879633e-08, "loss": 0.9992, "step": 3824 }, { "epoch": 0.96, "learning_rate": 9.165737998908298e-08, "loss": 1.0457, "step": 3825 }, { "epoch": 0.96, "learning_rate": 9.056462824895252e-08, "loss": 1.0584, "step": 3826 }, { "epoch": 0.96, "learning_rate": 8.947839996777286e-08, "loss": 1.0481, "step": 3827 }, { "epoch": 0.96, "learning_rate": 8.839869586061533e-08, "loss": 1.0339, "step": 3828 }, { "epoch": 0.96, "learning_rate": 8.732551663825917e-08, "loss": 1.0732, "step": 3829 }, { "epoch": 0.96, "learning_rate": 8.62588630071859e-08, "loss": 1.0542, "step": 3830 }, { "epoch": 0.96, "learning_rate": 8.51987356695816e-08, "loss": 1.1036, "step": 3831 }, { "epoch": 0.96, "learning_rate": 8.41451353233369e-08, "loss": 1.0378, "step": 3832 }, { "epoch": 0.96, "learning_rate": 8.309806266204478e-08, "loss": 1.0714, "step": 3833 }, { "epoch": 0.96, "learning_rate": 8.205751837500165e-08, "loss": 1.0504, "step": 3834 }, { "epoch": 0.96, "learning_rate": 8.102350314720731e-08, "loss": 1.0194, "step": 3835 }, { "epoch": 0.96, "learning_rate": 7.999601765935949e-08, "loss": 1.0658, "step": 3836 }, { "epoch": 0.96, "learning_rate": 7.897506258786047e-08, "loss": 1.0302, "step": 3837 }, { "epoch": 0.96, "learning_rate": 7.796063860481595e-08, "loss": 1.0462, "step": 3838 }, { "epoch": 0.96, "learning_rate": 7.695274637802508e-08, "loss": 1.0557, "step": 3839 }, { "epoch": 0.96, "learning_rate": 7.595138657099376e-08, "loss": 1.0898, "step": 3840 }, { "epoch": 0.96, "learning_rate": 7.495655984292361e-08, "loss": 1.0546, "step": 3841 }, { "epoch": 0.96, "learning_rate": 7.396826684871849e-08, "loss": 1.0222, "step": 3842 }, { "epoch": 0.96, "learning_rate": 7.298650823897802e-08, "loss": 1.063, "step": 3843 }, { "epoch": 0.96, "learning_rate": 7.201128466000185e-08, "loss": 1.0509, "step": 3844 }, { "epoch": 0.96, "learning_rate": 7.104259675378755e-08, "loss": 1.0946, "step": 3845 }, { "epoch": 0.96, "learning_rate": 7.008044515803058e-08, "loss": 1.0318, "step": 3846 }, { "epoch": 0.96, "learning_rate": 6.912483050612095e-08, "loss": 1.0233, "step": 3847 }, { "epoch": 0.96, "learning_rate": 6.817575342714988e-08, "loss": 1.0197, "step": 3848 }, { "epoch": 0.96, "learning_rate": 6.723321454590093e-08, "loss": 1.0396, "step": 3849 }, { "epoch": 0.96, "learning_rate": 6.629721448285331e-08, "loss": 1.077, "step": 3850 }, { "epoch": 0.96, "learning_rate": 6.536775385418525e-08, "loss": 1.0637, "step": 3851 }, { "epoch": 0.96, "learning_rate": 6.444483327176843e-08, "loss": 1.0551, "step": 3852 }, { "epoch": 0.96, "learning_rate": 6.352845334316682e-08, "loss": 1.0567, "step": 3853 }, { "epoch": 0.97, "learning_rate": 6.261861467164121e-08, "loss": 1.0496, "step": 3854 }, { "epoch": 0.97, "learning_rate": 6.171531785614582e-08, "loss": 1.0436, "step": 3855 }, { "epoch": 0.97, "learning_rate": 6.08185634913283e-08, "loss": 1.0736, "step": 3856 }, { "epoch": 0.97, "learning_rate": 5.992835216752756e-08, "loss": 1.0934, "step": 3857 }, { "epoch": 0.97, "learning_rate": 5.9044684470779266e-08, "loss": 1.0711, "step": 3858 }, { "epoch": 0.97, "learning_rate": 5.816756098280585e-08, "loss": 1.1031, "step": 3859 }, { "epoch": 0.97, "learning_rate": 5.7296982281026534e-08, "loss": 1.041, "step": 3860 }, { "epoch": 0.97, "learning_rate": 5.643294893855067e-08, "loss": 1.0022, "step": 3861 }, { "epoch": 0.97, "learning_rate": 5.557546152417548e-08, "loss": 1.0369, "step": 3862 }, { "epoch": 0.97, "learning_rate": 5.472452060239386e-08, "loss": 1.0877, "step": 3863 }, { "epoch": 0.97, "learning_rate": 5.388012673338661e-08, "loss": 1.0585, "step": 3864 }, { "epoch": 0.97, "learning_rate": 5.304228047302462e-08, "loss": 1.0056, "step": 3865 }, { "epoch": 0.97, "learning_rate": 5.221098237286781e-08, "loss": 1.0401, "step": 3866 }, { "epoch": 0.97, "learning_rate": 5.138623298016732e-08, "loss": 1.0253, "step": 3867 }, { "epoch": 0.97, "learning_rate": 5.056803283786216e-08, "loss": 1.0247, "step": 3868 }, { "epoch": 0.97, "learning_rate": 4.975638248457815e-08, "loss": 1.0385, "step": 3869 }, { "epoch": 0.97, "learning_rate": 4.895128245463454e-08, "loss": 1.0778, "step": 3870 }, { "epoch": 0.97, "learning_rate": 4.815273327803183e-08, "loss": 1.0626, "step": 3871 }, { "epoch": 0.97, "learning_rate": 4.736073548046172e-08, "loss": 1.0325, "step": 3872 }, { "epoch": 0.97, "learning_rate": 4.657528958330382e-08, "loss": 1.0558, "step": 3873 }, { "epoch": 0.97, "learning_rate": 4.57963961036223e-08, "loss": 1.0568, "step": 3874 }, { "epoch": 0.97, "learning_rate": 4.502405555416922e-08, "loss": 1.0251, "step": 3875 }, { "epoch": 0.97, "learning_rate": 4.4258268443382326e-08, "loss": 1.0705, "step": 3876 }, { "epoch": 0.97, "learning_rate": 4.3499035275385015e-08, "loss": 1.0428, "step": 3877 }, { "epoch": 0.97, "learning_rate": 4.274635654998638e-08, "loss": 1.0219, "step": 3878 }, { "epoch": 0.97, "learning_rate": 4.200023276268228e-08, "loss": 1.0827, "step": 3879 }, { "epoch": 0.97, "learning_rate": 4.126066440464982e-08, "loss": 1.0581, "step": 3880 }, { "epoch": 0.97, "learning_rate": 4.0527651962752877e-08, "loss": 1.0942, "step": 3881 }, { "epoch": 0.97, "learning_rate": 3.980119591954101e-08, "loss": 1.0788, "step": 3882 }, { "epoch": 0.97, "learning_rate": 3.90812967532439e-08, "loss": 1.0614, "step": 3883 }, { "epoch": 0.97, "learning_rate": 3.8367954937779116e-08, "loss": 1.1033, "step": 3884 }, { "epoch": 0.97, "learning_rate": 3.766117094274213e-08, "loss": 1.0773, "step": 3885 }, { "epoch": 0.97, "learning_rate": 3.6960945233416315e-08, "loss": 1.0581, "step": 3886 }, { "epoch": 0.97, "learning_rate": 3.626727827076626e-08, "loss": 1.0574, "step": 3887 }, { "epoch": 0.97, "learning_rate": 3.558017051143559e-08, "loss": 1.0498, "step": 3888 }, { "epoch": 0.97, "learning_rate": 3.48996224077569e-08, "loss": 1.0489, "step": 3889 }, { "epoch": 0.97, "learning_rate": 3.42256344077363e-08, "loss": 1.0942, "step": 3890 }, { "epoch": 0.97, "learning_rate": 3.355820695506884e-08, "loss": 1.074, "step": 3891 }, { "epoch": 0.97, "learning_rate": 3.289734048912419e-08, "loss": 1.0599, "step": 3892 }, { "epoch": 0.97, "learning_rate": 3.224303544495766e-08, "loss": 1.1286, "step": 3893 }, { "epoch": 0.98, "learning_rate": 3.159529225330249e-08, "loss": 1.0193, "step": 3894 }, { "epoch": 0.98, "learning_rate": 3.0954111340574246e-08, "loss": 1.0904, "step": 3895 }, { "epoch": 0.98, "learning_rate": 3.03194931288664e-08, "loss": 1.0495, "step": 3896 }, { "epoch": 0.98, "learning_rate": 2.9691438035953645e-08, "loss": 1.033, "step": 3897 }, { "epoch": 0.98, "learning_rate": 2.9069946475289712e-08, "loss": 1.1002, "step": 3898 }, { "epoch": 0.98, "learning_rate": 2.845501885600621e-08, "loss": 1.0593, "step": 3899 }, { "epoch": 0.98, "learning_rate": 2.7846655582915995e-08, "loss": 1.0786, "step": 3900 }, { "epoch": 0.98, "learning_rate": 2.724485705650981e-08, "loss": 1.0742, "step": 3901 }, { "epoch": 0.98, "learning_rate": 2.6649623672954094e-08, "loss": 1.0452, "step": 3902 }, { "epoch": 0.98, "learning_rate": 2.606095582409762e-08, "loss": 1.0702, "step": 3903 }, { "epoch": 0.98, "learning_rate": 2.547885389746485e-08, "loss": 1.0463, "step": 3904 }, { "epoch": 0.98, "learning_rate": 2.4903318276257027e-08, "loss": 1.0159, "step": 3905 }, { "epoch": 0.98, "learning_rate": 2.4334349339354413e-08, "loss": 1.0529, "step": 3906 }, { "epoch": 0.98, "learning_rate": 2.377194746131406e-08, "loss": 1.0664, "step": 3907 }, { "epoch": 0.98, "learning_rate": 2.321611301236981e-08, "loss": 1.0765, "step": 3908 }, { "epoch": 0.98, "learning_rate": 2.2666846358430082e-08, "loss": 1.0683, "step": 3909 }, { "epoch": 0.98, "learning_rate": 2.212414786108341e-08, "loss": 1.0584, "step": 3910 }, { "epoch": 0.98, "learning_rate": 2.1588017877590683e-08, "loss": 1.0372, "step": 3911 }, { "epoch": 0.98, "learning_rate": 2.10584567608918e-08, "loss": 1.0642, "step": 3912 }, { "epoch": 0.98, "learning_rate": 2.0535464859600118e-08, "loss": 1.0539, "step": 3913 }, { "epoch": 0.98, "learning_rate": 2.0019042518006902e-08, "loss": 1.0292, "step": 3914 }, { "epoch": 0.98, "learning_rate": 1.9509190076074657e-08, "loss": 1.1013, "step": 3915 }, { "epoch": 0.98, "learning_rate": 1.9005907869446007e-08, "loss": 1.0642, "step": 3916 }, { "epoch": 0.98, "learning_rate": 1.8509196229432592e-08, "loss": 1.0377, "step": 3917 }, { "epoch": 0.98, "learning_rate": 1.8019055483025073e-08, "loss": 0.977, "step": 3918 }, { "epoch": 0.98, "learning_rate": 1.7535485952887565e-08, "loss": 1.1226, "step": 3919 }, { "epoch": 0.98, "learning_rate": 1.705848795735654e-08, "loss": 1.0667, "step": 3920 }, { "epoch": 0.98, "learning_rate": 1.6588061810444144e-08, "loss": 1.0327, "step": 3921 }, { "epoch": 0.98, "learning_rate": 1.6124207821835993e-08, "loss": 1.0899, "step": 3922 }, { "epoch": 0.98, "learning_rate": 1.5666926296890038e-08, "loss": 1.0409, "step": 3923 }, { "epoch": 0.98, "learning_rate": 1.5216217536637712e-08, "loss": 1.0294, "step": 3924 }, { "epoch": 0.98, "learning_rate": 1.4772081837786111e-08, "loss": 1.0449, "step": 3925 }, { "epoch": 0.98, "learning_rate": 1.4334519492711362e-08, "loss": 1.1324, "step": 3926 }, { "epoch": 0.98, "learning_rate": 1.3903530789464158e-08, "loss": 1.0687, "step": 3927 }, { "epoch": 0.98, "learning_rate": 1.3479116011769766e-08, "loss": 1.0585, "step": 3928 }, { "epoch": 0.98, "learning_rate": 1.3061275439021359e-08, "loss": 1.0524, "step": 3929 }, { "epoch": 0.98, "learning_rate": 1.2650009346288905e-08, "loss": 1.0583, "step": 3930 }, { "epoch": 0.98, "learning_rate": 1.224531800431028e-08, "loss": 1.0909, "step": 3931 }, { "epoch": 0.98, "learning_rate": 1.1847201679496823e-08, "loss": 1.0741, "step": 3932 }, { "epoch": 0.99, "learning_rate": 1.145566063393333e-08, "loss": 1.0499, "step": 3933 }, { "epoch": 0.99, "learning_rate": 1.107069512537362e-08, "loss": 1.0305, "step": 3934 }, { "epoch": 0.99, "learning_rate": 1.0692305407242754e-08, "loss": 1.0799, "step": 3935 }, { "epoch": 0.99, "learning_rate": 1.0320491728638138e-08, "loss": 1.0854, "step": 3936 }, { "epoch": 0.99, "learning_rate": 9.955254334328424e-09, "loss": 1.0788, "step": 3937 }, { "epoch": 0.99, "learning_rate": 9.59659346475128e-09, "loss": 1.0683, "step": 3938 }, { "epoch": 0.99, "learning_rate": 9.244509356015619e-09, "loss": 1.0315, "step": 3939 }, { "epoch": 0.99, "learning_rate": 8.899002239902698e-09, "loss": 1.0607, "step": 3940 }, { "epoch": 0.99, "learning_rate": 8.560072343860582e-09, "loss": 1.0402, "step": 3941 }, { "epoch": 0.99, "learning_rate": 8.227719891011898e-09, "loss": 1.0313, "step": 3942 }, { "epoch": 0.99, "learning_rate": 7.90194510014386e-09, "loss": 1.018, "step": 3943 }, { "epoch": 0.99, "learning_rate": 7.582748185719357e-09, "loss": 1.0704, "step": 3944 }, { "epoch": 0.99, "learning_rate": 7.270129357866973e-09, "loss": 1.0695, "step": 3945 }, { "epoch": 0.99, "learning_rate": 6.964088822385418e-09, "loss": 1.0801, "step": 3946 }, { "epoch": 0.99, "learning_rate": 6.664626780744643e-09, "loss": 1.0703, "step": 3947 }, { "epoch": 0.99, "learning_rate": 6.371743430082511e-09, "loss": 1.0418, "step": 3948 }, { "epoch": 0.99, "learning_rate": 6.0854389632081236e-09, "loss": 1.059, "step": 3949 }, { "epoch": 0.99, "learning_rate": 5.805713568594051e-09, "loss": 1.0654, "step": 3950 }, { "epoch": 0.99, "learning_rate": 5.532567430389657e-09, "loss": 1.0699, "step": 3951 }, { "epoch": 0.99, "learning_rate": 5.266000728406662e-09, "loss": 1.0736, "step": 3952 }, { "epoch": 0.99, "learning_rate": 5.0060136381291365e-09, "loss": 1.0366, "step": 3953 }, { "epoch": 0.99, "learning_rate": 4.752606330707954e-09, "loss": 1.0666, "step": 3954 }, { "epoch": 0.99, "learning_rate": 4.505778972964114e-09, "loss": 1.023, "step": 3955 }, { "epoch": 0.99, "learning_rate": 4.265531727385419e-09, "loss": 1.0415, "step": 3956 }, { "epoch": 0.99, "learning_rate": 4.031864752127579e-09, "loss": 1.0284, "step": 3957 }, { "epoch": 0.99, "learning_rate": 3.804778201017545e-09, "loss": 1.0507, "step": 3958 }, { "epoch": 0.99, "learning_rate": 3.5842722235468475e-09, "loss": 1.0547, "step": 3959 }, { "epoch": 0.99, "learning_rate": 3.3703469648760367e-09, "loss": 1.0219, "step": 3960 }, { "epoch": 0.99, "learning_rate": 3.1630025658335727e-09, "loss": 1.0203, "step": 3961 }, { "epoch": 0.99, "learning_rate": 2.9622391629169357e-09, "loss": 1.0363, "step": 3962 }, { "epoch": 0.99, "learning_rate": 2.7680568882904047e-09, "loss": 1.0588, "step": 3963 }, { "epoch": 0.99, "learning_rate": 2.5804558697839487e-09, "loss": 1.0986, "step": 3964 }, { "epoch": 0.99, "learning_rate": 2.3994362308987773e-09, "loss": 1.037, "step": 3965 }, { "epoch": 0.99, "learning_rate": 2.2249980908006786e-09, "loss": 1.0808, "step": 3966 }, { "epoch": 0.99, "learning_rate": 2.0571415643233506e-09, "loss": 1.06, "step": 3967 }, { "epoch": 0.99, "learning_rate": 1.895866761968401e-09, "loss": 1.0868, "step": 3968 }, { "epoch": 0.99, "learning_rate": 1.741173789904238e-09, "loss": 1.0673, "step": 3969 }, { "epoch": 0.99, "learning_rate": 1.593062749967178e-09, "loss": 1.0698, "step": 3970 }, { "epoch": 0.99, "learning_rate": 1.4515337396581175e-09, "loss": 1.0932, "step": 3971 }, { "epoch": 0.99, "learning_rate": 1.316586852148083e-09, "loss": 1.0619, "step": 3972 }, { "epoch": 1.0, "learning_rate": 1.18822217627379e-09, "loss": 1.0375, "step": 3973 }, { "epoch": 1.0, "learning_rate": 1.0664397965387542e-09, "loss": 1.0836, "step": 3974 }, { "epoch": 1.0, "learning_rate": 9.512397931110695e-10, "loss": 1.0691, "step": 3975 }, { "epoch": 1.0, "learning_rate": 8.426222418311814e-10, "loss": 1.0515, "step": 3976 }, { "epoch": 1.0, "learning_rate": 7.405872142007831e-10, "loss": 1.0818, "step": 3977 }, { "epoch": 1.0, "learning_rate": 6.451347773905881e-10, "loss": 1.0622, "step": 3978 }, { "epoch": 1.0, "learning_rate": 5.562649942369991e-10, "loss": 1.0656, "step": 3979 }, { "epoch": 1.0, "learning_rate": 4.739779232454389e-10, "loss": 1.0437, "step": 3980 }, { "epoch": 1.0, "learning_rate": 3.982736185859093e-10, "loss": 1.05, "step": 3981 }, { "epoch": 1.0, "learning_rate": 3.291521300929912e-10, "loss": 1.0397, "step": 3982 }, { "epoch": 1.0, "learning_rate": 2.666135032725059e-10, "loss": 1.066, "step": 3983 }, { "epoch": 1.0, "learning_rate": 2.1065777929263343e-10, "loss": 1.0999, "step": 3984 }, { "epoch": 1.0, "learning_rate": 1.6128499499057371e-10, "loss": 1.0265, "step": 3985 }, { "epoch": 1.0, "learning_rate": 1.1849518286699558e-10, "loss": 1.0529, "step": 3986 }, { "epoch": 1.0, "learning_rate": 8.228837109269805e-11, "loss": 1.04, "step": 3987 }, { "epoch": 1.0, "learning_rate": 5.266458350083881e-11, "loss": 1.0636, "step": 3988 }, { "epoch": 1.0, "learning_rate": 2.962383959581594e-11, "loss": 1.0789, "step": 3989 }, { "epoch": 1.0, "learning_rate": 1.3166154542165743e-11, "loss": 1.0883, "step": 3990 }, { "epoch": 1.0, "learning_rate": 3.2915391778853835e-12, "loss": 1.077, "step": 3991 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.0401, "step": 3992 }, { "epoch": 1.0, "step": 3992, "total_flos": 1.0165915460504125e+19, "train_loss": 1.1065648059567852, "train_runtime": 86966.5983, "train_samples_per_second": 5.877, "train_steps_per_second": 0.046 } ], "max_steps": 3992, "num_train_epochs": 1, "total_flos": 1.0165915460504125e+19, "trial_name": null, "trial_params": null }