Training in progress, step 400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 161533192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:068b3b1bc314a43d0eee3fa3201a994ea1f37fea65c47f3391566c7f615e4324
|
3 |
size 161533192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 82461044
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dba59ffe60af080b3920ea163a5a588299c3bb8bd5e6e66a912a79abcc7ef34
|
3 |
size 82461044
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba9c3a034fe91f3e6fd2e0635e959f990d8cb75b401553f86500be7643d0a95f
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b230d39ff18e054306fe88dd158c885b4e4aab2378a582b64a99349a20eb587
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2139,6 +2139,714 @@
|
|
2139 |
"eval_samples_per_second": 14.018,
|
2140 |
"eval_steps_per_second": 3.505,
|
2141 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2142 |
}
|
2143 |
],
|
2144 |
"logging_steps": 1,
|
@@ -2167,7 +2875,7 @@
|
|
2167 |
"attributes": {}
|
2168 |
}
|
2169 |
},
|
2170 |
-
"total_flos":
|
2171 |
"train_batch_size": 4,
|
2172 |
"trial_name": null,
|
2173 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.6927062273025513,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-400",
|
4 |
+
"epoch": 0.15939033198017583,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2139 |
"eval_samples_per_second": 14.018,
|
2140 |
"eval_steps_per_second": 3.505,
|
2141 |
"step": 300
|
2142 |
+
},
|
2143 |
+
{
|
2144 |
+
"epoch": 0.1199412248150823,
|
2145 |
+
"grad_norm": 0.5043081045150757,
|
2146 |
+
"learning_rate": 5.020767189299369e-05,
|
2147 |
+
"loss": 1.6154,
|
2148 |
+
"step": 301
|
2149 |
+
},
|
2150 |
+
{
|
2151 |
+
"epoch": 0.12033970064503274,
|
2152 |
+
"grad_norm": 0.5781189203262329,
|
2153 |
+
"learning_rate": 4.9585518766315496e-05,
|
2154 |
+
"loss": 1.7757,
|
2155 |
+
"step": 302
|
2156 |
+
},
|
2157 |
+
{
|
2158 |
+
"epoch": 0.1207381764749832,
|
2159 |
+
"grad_norm": 0.5455333590507507,
|
2160 |
+
"learning_rate": 4.896597113499479e-05,
|
2161 |
+
"loss": 1.5694,
|
2162 |
+
"step": 303
|
2163 |
+
},
|
2164 |
+
{
|
2165 |
+
"epoch": 0.12113665230493363,
|
2166 |
+
"grad_norm": 0.5005789399147034,
|
2167 |
+
"learning_rate": 4.834906101817438e-05,
|
2168 |
+
"loss": 1.716,
|
2169 |
+
"step": 304
|
2170 |
+
},
|
2171 |
+
{
|
2172 |
+
"epoch": 0.12153512813488407,
|
2173 |
+
"grad_norm": 0.5123348832130432,
|
2174 |
+
"learning_rate": 4.773482029868657e-05,
|
2175 |
+
"loss": 1.7465,
|
2176 |
+
"step": 305
|
2177 |
+
},
|
2178 |
+
{
|
2179 |
+
"epoch": 0.1219336039648345,
|
2180 |
+
"grad_norm": 0.5470724105834961,
|
2181 |
+
"learning_rate": 4.712328072140505e-05,
|
2182 |
+
"loss": 1.7012,
|
2183 |
+
"step": 306
|
2184 |
+
},
|
2185 |
+
{
|
2186 |
+
"epoch": 0.12233207979478494,
|
2187 |
+
"grad_norm": 0.47325006127357483,
|
2188 |
+
"learning_rate": 4.651447389160458e-05,
|
2189 |
+
"loss": 1.6349,
|
2190 |
+
"step": 307
|
2191 |
+
},
|
2192 |
+
{
|
2193 |
+
"epoch": 0.1227305556247354,
|
2194 |
+
"grad_norm": 0.4630093276500702,
|
2195 |
+
"learning_rate": 4.5908431273327436e-05,
|
2196 |
+
"loss": 1.5126,
|
2197 |
+
"step": 308
|
2198 |
+
},
|
2199 |
+
{
|
2200 |
+
"epoch": 0.12312903145468583,
|
2201 |
+
"grad_norm": 0.5252417922019958,
|
2202 |
+
"learning_rate": 4.530518418775733e-05,
|
2203 |
+
"loss": 1.7035,
|
2204 |
+
"step": 309
|
2205 |
+
},
|
2206 |
+
{
|
2207 |
+
"epoch": 0.12352750728463627,
|
2208 |
+
"grad_norm": 0.5543851256370544,
|
2209 |
+
"learning_rate": 4.470476381160065e-05,
|
2210 |
+
"loss": 1.9395,
|
2211 |
+
"step": 310
|
2212 |
+
},
|
2213 |
+
{
|
2214 |
+
"epoch": 0.1239259831145867,
|
2215 |
+
"grad_norm": 0.4824533462524414,
|
2216 |
+
"learning_rate": 4.4107201175475275e-05,
|
2217 |
+
"loss": 1.5935,
|
2218 |
+
"step": 311
|
2219 |
+
},
|
2220 |
+
{
|
2221 |
+
"epoch": 0.12432445894453714,
|
2222 |
+
"grad_norm": 0.5243806838989258,
|
2223 |
+
"learning_rate": 4.351252716230685e-05,
|
2224 |
+
"loss": 1.7277,
|
2225 |
+
"step": 312
|
2226 |
+
},
|
2227 |
+
{
|
2228 |
+
"epoch": 0.12472293477448758,
|
2229 |
+
"grad_norm": 0.501171886920929,
|
2230 |
+
"learning_rate": 4.292077250573266e-05,
|
2231 |
+
"loss": 1.594,
|
2232 |
+
"step": 313
|
2233 |
+
},
|
2234 |
+
{
|
2235 |
+
"epoch": 0.12512141060443802,
|
2236 |
+
"grad_norm": 0.47692808508872986,
|
2237 |
+
"learning_rate": 4.2331967788513295e-05,
|
2238 |
+
"loss": 1.4772,
|
2239 |
+
"step": 314
|
2240 |
+
},
|
2241 |
+
{
|
2242 |
+
"epoch": 0.12551988643438847,
|
2243 |
+
"grad_norm": 0.5134193897247314,
|
2244 |
+
"learning_rate": 4.174614344095213e-05,
|
2245 |
+
"loss": 1.643,
|
2246 |
+
"step": 315
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 0.1259183622643389,
|
2250 |
+
"grad_norm": 0.5090487003326416,
|
2251 |
+
"learning_rate": 4.116332973932256e-05,
|
2252 |
+
"loss": 1.6696,
|
2253 |
+
"step": 316
|
2254 |
+
},
|
2255 |
+
{
|
2256 |
+
"epoch": 0.12631683809428934,
|
2257 |
+
"grad_norm": 0.5596434473991394,
|
2258 |
+
"learning_rate": 4.058355680430337e-05,
|
2259 |
+
"loss": 1.4942,
|
2260 |
+
"step": 317
|
2261 |
+
},
|
2262 |
+
{
|
2263 |
+
"epoch": 0.1267153139242398,
|
2264 |
+
"grad_norm": 0.5060478448867798,
|
2265 |
+
"learning_rate": 4.0006854599421926e-05,
|
2266 |
+
"loss": 1.7277,
|
2267 |
+
"step": 318
|
2268 |
+
},
|
2269 |
+
{
|
2270 |
+
"epoch": 0.12711378975419022,
|
2271 |
+
"grad_norm": 0.5043231248855591,
|
2272 |
+
"learning_rate": 3.943325292950579e-05,
|
2273 |
+
"loss": 1.653,
|
2274 |
+
"step": 319
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 0.12751226558414067,
|
2278 |
+
"grad_norm": 0.49735555052757263,
|
2279 |
+
"learning_rate": 3.886278143914219e-05,
|
2280 |
+
"loss": 1.6637,
|
2281 |
+
"step": 320
|
2282 |
+
},
|
2283 |
+
{
|
2284 |
+
"epoch": 0.1279107414140911,
|
2285 |
+
"grad_norm": 0.5129334926605225,
|
2286 |
+
"learning_rate": 3.829546961114607e-05,
|
2287 |
+
"loss": 1.7365,
|
2288 |
+
"step": 321
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 0.12830921724404154,
|
2292 |
+
"grad_norm": 0.5209783911705017,
|
2293 |
+
"learning_rate": 3.773134676503629e-05,
|
2294 |
+
"loss": 1.7903,
|
2295 |
+
"step": 322
|
2296 |
+
},
|
2297 |
+
{
|
2298 |
+
"epoch": 0.128707693073992,
|
2299 |
+
"grad_norm": 0.5349249243736267,
|
2300 |
+
"learning_rate": 3.7170442055520415e-05,
|
2301 |
+
"loss": 1.7308,
|
2302 |
+
"step": 323
|
2303 |
+
},
|
2304 |
+
{
|
2305 |
+
"epoch": 0.12910616890394241,
|
2306 |
+
"grad_norm": 0.5156399011611938,
|
2307 |
+
"learning_rate": 3.661278447098789e-05,
|
2308 |
+
"loss": 1.5822,
|
2309 |
+
"step": 324
|
2310 |
+
},
|
2311 |
+
{
|
2312 |
+
"epoch": 0.12950464473389287,
|
2313 |
+
"grad_norm": 0.5426967144012451,
|
2314 |
+
"learning_rate": 3.605840283201195e-05,
|
2315 |
+
"loss": 1.6398,
|
2316 |
+
"step": 325
|
2317 |
+
},
|
2318 |
+
{
|
2319 |
+
"epoch": 0.1299031205638433,
|
2320 |
+
"grad_norm": 0.5003894567489624,
|
2321 |
+
"learning_rate": 3.550732578986006e-05,
|
2322 |
+
"loss": 1.745,
|
2323 |
+
"step": 326
|
2324 |
+
},
|
2325 |
+
{
|
2326 |
+
"epoch": 0.13030159639379374,
|
2327 |
+
"grad_norm": 0.5355332493782043,
|
2328 |
+
"learning_rate": 3.495958182501325e-05,
|
2329 |
+
"loss": 1.6628,
|
2330 |
+
"step": 327
|
2331 |
+
},
|
2332 |
+
{
|
2333 |
+
"epoch": 0.1307000722237442,
|
2334 |
+
"grad_norm": 0.5347525477409363,
|
2335 |
+
"learning_rate": 3.441519924569408e-05,
|
2336 |
+
"loss": 1.7257,
|
2337 |
+
"step": 328
|
2338 |
+
},
|
2339 |
+
{
|
2340 |
+
"epoch": 0.1310985480536946,
|
2341 |
+
"grad_norm": 0.5936457514762878,
|
2342 |
+
"learning_rate": 3.387420618640379e-05,
|
2343 |
+
"loss": 1.7605,
|
2344 |
+
"step": 329
|
2345 |
+
},
|
2346 |
+
{
|
2347 |
+
"epoch": 0.13149702388364506,
|
2348 |
+
"grad_norm": 0.4799719750881195,
|
2349 |
+
"learning_rate": 3.3336630606468134e-05,
|
2350 |
+
"loss": 1.5599,
|
2351 |
+
"step": 330
|
2352 |
+
},
|
2353 |
+
{
|
2354 |
+
"epoch": 0.1318954997135955,
|
2355 |
+
"grad_norm": 0.534975528717041,
|
2356 |
+
"learning_rate": 3.280250028859248e-05,
|
2357 |
+
"loss": 1.741,
|
2358 |
+
"step": 331
|
2359 |
+
},
|
2360 |
+
{
|
2361 |
+
"epoch": 0.13229397554354594,
|
2362 |
+
"grad_norm": 0.5082612037658691,
|
2363 |
+
"learning_rate": 3.227184283742591e-05,
|
2364 |
+
"loss": 1.6139,
|
2365 |
+
"step": 332
|
2366 |
+
},
|
2367 |
+
{
|
2368 |
+
"epoch": 0.1326924513734964,
|
2369 |
+
"grad_norm": 0.5057691335678101,
|
2370 |
+
"learning_rate": 3.174468567813461e-05,
|
2371 |
+
"loss": 1.7092,
|
2372 |
+
"step": 333
|
2373 |
+
},
|
2374 |
+
{
|
2375 |
+
"epoch": 0.1330909272034468,
|
2376 |
+
"grad_norm": 0.4517490863800049,
|
2377 |
+
"learning_rate": 3.122105605498442e-05,
|
2378 |
+
"loss": 1.5781,
|
2379 |
+
"step": 334
|
2380 |
+
},
|
2381 |
+
{
|
2382 |
+
"epoch": 0.13348940303339726,
|
2383 |
+
"grad_norm": 0.4567318558692932,
|
2384 |
+
"learning_rate": 3.070098102993302e-05,
|
2385 |
+
"loss": 1.5871,
|
2386 |
+
"step": 335
|
2387 |
+
},
|
2388 |
+
{
|
2389 |
+
"epoch": 0.1338878788633477,
|
2390 |
+
"grad_norm": 0.5688018202781677,
|
2391 |
+
"learning_rate": 3.018448748123097e-05,
|
2392 |
+
"loss": 1.8721,
|
2393 |
+
"step": 336
|
2394 |
+
},
|
2395 |
+
{
|
2396 |
+
"epoch": 0.13428635469329814,
|
2397 |
+
"grad_norm": 0.5304664969444275,
|
2398 |
+
"learning_rate": 2.9671602102032926e-05,
|
2399 |
+
"loss": 1.7813,
|
2400 |
+
"step": 337
|
2401 |
+
},
|
2402 |
+
{
|
2403 |
+
"epoch": 0.13468483052324856,
|
2404 |
+
"grad_norm": 0.4832111597061157,
|
2405 |
+
"learning_rate": 2.9162351399017963e-05,
|
2406 |
+
"loss": 1.6609,
|
2407 |
+
"step": 338
|
2408 |
+
},
|
2409 |
+
{
|
2410 |
+
"epoch": 0.135083306353199,
|
2411 |
+
"grad_norm": 0.5615909695625305,
|
2412 |
+
"learning_rate": 2.8656761691019673e-05,
|
2413 |
+
"loss": 1.7713,
|
2414 |
+
"step": 339
|
2415 |
+
},
|
2416 |
+
{
|
2417 |
+
"epoch": 0.13548178218314946,
|
2418 |
+
"grad_norm": 0.4976556599140167,
|
2419 |
+
"learning_rate": 2.8154859107665987e-05,
|
2420 |
+
"loss": 1.6278,
|
2421 |
+
"step": 340
|
2422 |
+
},
|
2423 |
+
{
|
2424 |
+
"epoch": 0.13588025801309989,
|
2425 |
+
"grad_norm": 0.48554837703704834,
|
2426 |
+
"learning_rate": 2.7656669588028762e-05,
|
2427 |
+
"loss": 1.6038,
|
2428 |
+
"step": 341
|
2429 |
+
},
|
2430 |
+
{
|
2431 |
+
"epoch": 0.13627873384305034,
|
2432 |
+
"grad_norm": 0.50529545545578,
|
2433 |
+
"learning_rate": 2.7162218879283176e-05,
|
2434 |
+
"loss": 1.515,
|
2435 |
+
"step": 342
|
2436 |
+
},
|
2437 |
+
{
|
2438 |
+
"epoch": 0.13667720967300076,
|
2439 |
+
"grad_norm": 0.5264208912849426,
|
2440 |
+
"learning_rate": 2.667153253537713e-05,
|
2441 |
+
"loss": 1.7788,
|
2442 |
+
"step": 343
|
2443 |
+
},
|
2444 |
+
{
|
2445 |
+
"epoch": 0.1370756855029512,
|
2446 |
+
"grad_norm": 0.5166341066360474,
|
2447 |
+
"learning_rate": 2.618463591571052e-05,
|
2448 |
+
"loss": 1.7624,
|
2449 |
+
"step": 344
|
2450 |
+
},
|
2451 |
+
{
|
2452 |
+
"epoch": 0.13747416133290166,
|
2453 |
+
"grad_norm": 0.4670686721801758,
|
2454 |
+
"learning_rate": 2.570155418382473e-05,
|
2455 |
+
"loss": 1.5216,
|
2456 |
+
"step": 345
|
2457 |
+
},
|
2458 |
+
{
|
2459 |
+
"epoch": 0.13787263716285209,
|
2460 |
+
"grad_norm": 0.5010607242584229,
|
2461 |
+
"learning_rate": 2.5222312306101925e-05,
|
2462 |
+
"loss": 1.6297,
|
2463 |
+
"step": 346
|
2464 |
+
},
|
2465 |
+
{
|
2466 |
+
"epoch": 0.13827111299280254,
|
2467 |
+
"grad_norm": 0.44925105571746826,
|
2468 |
+
"learning_rate": 2.474693505047504e-05,
|
2469 |
+
"loss": 1.4302,
|
2470 |
+
"step": 347
|
2471 |
+
},
|
2472 |
+
{
|
2473 |
+
"epoch": 0.13866958882275296,
|
2474 |
+
"grad_norm": 0.44039008021354675,
|
2475 |
+
"learning_rate": 2.427544698514753e-05,
|
2476 |
+
"loss": 1.4163,
|
2477 |
+
"step": 348
|
2478 |
+
},
|
2479 |
+
{
|
2480 |
+
"epoch": 0.1390680646527034,
|
2481 |
+
"grad_norm": 0.5106916427612305,
|
2482 |
+
"learning_rate": 2.3807872477323733e-05,
|
2483 |
+
"loss": 1.5566,
|
2484 |
+
"step": 349
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 0.13946654048265386,
|
2488 |
+
"grad_norm": 0.5118552446365356,
|
2489 |
+
"learning_rate": 2.334423569194948e-05,
|
2490 |
+
"loss": 1.6767,
|
2491 |
+
"step": 350
|
2492 |
+
},
|
2493 |
+
{
|
2494 |
+
"epoch": 0.13986501631260428,
|
2495 |
+
"grad_norm": 0.5088701248168945,
|
2496 |
+
"learning_rate": 2.288456059046331e-05,
|
2497 |
+
"loss": 1.6389,
|
2498 |
+
"step": 351
|
2499 |
+
},
|
2500 |
+
{
|
2501 |
+
"epoch": 0.14026349214255474,
|
2502 |
+
"grad_norm": 0.5484685301780701,
|
2503 |
+
"learning_rate": 2.242887092955801e-05,
|
2504 |
+
"loss": 1.6978,
|
2505 |
+
"step": 352
|
2506 |
+
},
|
2507 |
+
{
|
2508 |
+
"epoch": 0.14066196797250516,
|
2509 |
+
"grad_norm": 0.5057936906814575,
|
2510 |
+
"learning_rate": 2.1977190259952883e-05,
|
2511 |
+
"loss": 1.7238,
|
2512 |
+
"step": 353
|
2513 |
+
},
|
2514 |
+
{
|
2515 |
+
"epoch": 0.1410604438024556,
|
2516 |
+
"grad_norm": 0.4977273643016815,
|
2517 |
+
"learning_rate": 2.1529541925176555e-05,
|
2518 |
+
"loss": 1.589,
|
2519 |
+
"step": 354
|
2520 |
+
},
|
2521 |
+
{
|
2522 |
+
"epoch": 0.14145891963240606,
|
2523 |
+
"grad_norm": 0.556425929069519,
|
2524 |
+
"learning_rate": 2.1085949060360654e-05,
|
2525 |
+
"loss": 1.784,
|
2526 |
+
"step": 355
|
2527 |
+
},
|
2528 |
+
{
|
2529 |
+
"epoch": 0.14185739546235648,
|
2530 |
+
"grad_norm": 0.48254185914993286,
|
2531 |
+
"learning_rate": 2.064643459104405e-05,
|
2532 |
+
"loss": 1.5242,
|
2533 |
+
"step": 356
|
2534 |
+
},
|
2535 |
+
{
|
2536 |
+
"epoch": 0.14225587129230693,
|
2537 |
+
"grad_norm": 0.5121050477027893,
|
2538 |
+
"learning_rate": 2.0211021231988102e-05,
|
2539 |
+
"loss": 1.6523,
|
2540 |
+
"step": 357
|
2541 |
+
},
|
2542 |
+
{
|
2543 |
+
"epoch": 0.14265434712225736,
|
2544 |
+
"grad_norm": 0.5412747263908386,
|
2545 |
+
"learning_rate": 1.977973148600266e-05,
|
2546 |
+
"loss": 1.7816,
|
2547 |
+
"step": 358
|
2548 |
+
},
|
2549 |
+
{
|
2550 |
+
"epoch": 0.1430528229522078,
|
2551 |
+
"grad_norm": 0.5190417170524597,
|
2552 |
+
"learning_rate": 1.935258764278314e-05,
|
2553 |
+
"loss": 1.7215,
|
2554 |
+
"step": 359
|
2555 |
+
},
|
2556 |
+
{
|
2557 |
+
"epoch": 0.14345129878215823,
|
2558 |
+
"grad_norm": 0.5047377347946167,
|
2559 |
+
"learning_rate": 1.8929611777758526e-05,
|
2560 |
+
"loss": 1.6061,
|
2561 |
+
"step": 360
|
2562 |
+
},
|
2563 |
+
{
|
2564 |
+
"epoch": 0.14384977461210868,
|
2565 |
+
"grad_norm": 0.5179762840270996,
|
2566 |
+
"learning_rate": 1.851082575095051e-05,
|
2567 |
+
"loss": 1.6054,
|
2568 |
+
"step": 361
|
2569 |
+
},
|
2570 |
+
{
|
2571 |
+
"epoch": 0.14424825044205913,
|
2572 |
+
"grad_norm": 0.533320963382721,
|
2573 |
+
"learning_rate": 1.8096251205843684e-05,
|
2574 |
+
"loss": 1.6372,
|
2575 |
+
"step": 362
|
2576 |
+
},
|
2577 |
+
{
|
2578 |
+
"epoch": 0.14464672627200956,
|
2579 |
+
"grad_norm": 0.4938521981239319,
|
2580 |
+
"learning_rate": 1.7685909568267033e-05,
|
2581 |
+
"loss": 1.7578,
|
2582 |
+
"step": 363
|
2583 |
+
},
|
2584 |
+
{
|
2585 |
+
"epoch": 0.14504520210196,
|
2586 |
+
"grad_norm": 0.5236971974372864,
|
2587 |
+
"learning_rate": 1.7279822045286576e-05,
|
2588 |
+
"loss": 1.7821,
|
2589 |
+
"step": 364
|
2590 |
+
},
|
2591 |
+
{
|
2592 |
+
"epoch": 0.14544367793191043,
|
2593 |
+
"grad_norm": 0.548584520816803,
|
2594 |
+
"learning_rate": 1.6878009624109313e-05,
|
2595 |
+
"loss": 1.7914,
|
2596 |
+
"step": 365
|
2597 |
+
},
|
2598 |
+
{
|
2599 |
+
"epoch": 0.14584215376186088,
|
2600 |
+
"grad_norm": 0.5472350716590881,
|
2601 |
+
"learning_rate": 1.648049307099874e-05,
|
2602 |
+
"loss": 1.5642,
|
2603 |
+
"step": 366
|
2604 |
+
},
|
2605 |
+
{
|
2606 |
+
"epoch": 0.14624062959181133,
|
2607 |
+
"grad_norm": 0.5605772733688354,
|
2608 |
+
"learning_rate": 1.6087292930201394e-05,
|
2609 |
+
"loss": 1.7474,
|
2610 |
+
"step": 367
|
2611 |
+
},
|
2612 |
+
{
|
2613 |
+
"epoch": 0.14663910542176176,
|
2614 |
+
"grad_norm": 0.47889217734336853,
|
2615 |
+
"learning_rate": 1.569842952288527e-05,
|
2616 |
+
"loss": 1.5833,
|
2617 |
+
"step": 368
|
2618 |
+
},
|
2619 |
+
{
|
2620 |
+
"epoch": 0.1470375812517122,
|
2621 |
+
"grad_norm": 0.5094882845878601,
|
2622 |
+
"learning_rate": 1.5313922946089486e-05,
|
2623 |
+
"loss": 1.702,
|
2624 |
+
"step": 369
|
2625 |
+
},
|
2626 |
+
{
|
2627 |
+
"epoch": 0.14743605708166263,
|
2628 |
+
"grad_norm": 0.48179590702056885,
|
2629 |
+
"learning_rate": 1.4933793071685732e-05,
|
2630 |
+
"loss": 1.5017,
|
2631 |
+
"step": 370
|
2632 |
+
},
|
2633 |
+
{
|
2634 |
+
"epoch": 0.14783453291161308,
|
2635 |
+
"grad_norm": 0.5010417699813843,
|
2636 |
+
"learning_rate": 1.4558059545351143e-05,
|
2637 |
+
"loss": 1.5445,
|
2638 |
+
"step": 371
|
2639 |
+
},
|
2640 |
+
{
|
2641 |
+
"epoch": 0.14823300874156353,
|
2642 |
+
"grad_norm": 0.43870919942855835,
|
2643 |
+
"learning_rate": 1.4186741785553115e-05,
|
2644 |
+
"loss": 1.5428,
|
2645 |
+
"step": 372
|
2646 |
+
},
|
2647 |
+
{
|
2648 |
+
"epoch": 0.14863148457151396,
|
2649 |
+
"grad_norm": 0.4662686288356781,
|
2650 |
+
"learning_rate": 1.3819858982545598e-05,
|
2651 |
+
"loss": 1.4941,
|
2652 |
+
"step": 373
|
2653 |
+
},
|
2654 |
+
{
|
2655 |
+
"epoch": 0.1490299604014644,
|
2656 |
+
"grad_norm": 0.5589818358421326,
|
2657 |
+
"learning_rate": 1.3457430097377421e-05,
|
2658 |
+
"loss": 1.7253,
|
2659 |
+
"step": 374
|
2660 |
+
},
|
2661 |
+
{
|
2662 |
+
"epoch": 0.14942843623141483,
|
2663 |
+
"grad_norm": 0.5113766193389893,
|
2664 |
+
"learning_rate": 1.3099473860912326e-05,
|
2665 |
+
"loss": 1.5904,
|
2666 |
+
"step": 375
|
2667 |
+
},
|
2668 |
+
{
|
2669 |
+
"epoch": 0.14982691206136528,
|
2670 |
+
"grad_norm": 0.5116039514541626,
|
2671 |
+
"learning_rate": 1.2746008772860884e-05,
|
2672 |
+
"loss": 1.6287,
|
2673 |
+
"step": 376
|
2674 |
+
},
|
2675 |
+
{
|
2676 |
+
"epoch": 0.1502253878913157,
|
2677 |
+
"grad_norm": 0.5835950970649719,
|
2678 |
+
"learning_rate": 1.2397053100824463e-05,
|
2679 |
+
"loss": 1.4924,
|
2680 |
+
"step": 377
|
2681 |
+
},
|
2682 |
+
{
|
2683 |
+
"epoch": 0.15062386372126615,
|
2684 |
+
"grad_norm": 0.5390010476112366,
|
2685 |
+
"learning_rate": 1.2052624879351104e-05,
|
2686 |
+
"loss": 1.5715,
|
2687 |
+
"step": 378
|
2688 |
+
},
|
2689 |
+
{
|
2690 |
+
"epoch": 0.1510223395512166,
|
2691 |
+
"grad_norm": 0.5362573266029358,
|
2692 |
+
"learning_rate": 1.1712741909003444e-05,
|
2693 |
+
"loss": 1.7802,
|
2694 |
+
"step": 379
|
2695 |
+
},
|
2696 |
+
{
|
2697 |
+
"epoch": 0.15142081538116703,
|
2698 |
+
"grad_norm": 0.49706023931503296,
|
2699 |
+
"learning_rate": 1.1377421755438832e-05,
|
2700 |
+
"loss": 1.7128,
|
2701 |
+
"step": 380
|
2702 |
+
},
|
2703 |
+
{
|
2704 |
+
"epoch": 0.15181929121111748,
|
2705 |
+
"grad_norm": 0.5436707735061646,
|
2706 |
+
"learning_rate": 1.1046681748501408e-05,
|
2707 |
+
"loss": 1.7381,
|
2708 |
+
"step": 381
|
2709 |
+
},
|
2710 |
+
{
|
2711 |
+
"epoch": 0.1522177670410679,
|
2712 |
+
"grad_norm": 0.51026850938797,
|
2713 |
+
"learning_rate": 1.0720538981326556e-05,
|
2714 |
+
"loss": 1.6813,
|
2715 |
+
"step": 382
|
2716 |
+
},
|
2717 |
+
{
|
2718 |
+
"epoch": 0.15261624287101835,
|
2719 |
+
"grad_norm": 0.5093562006950378,
|
2720 |
+
"learning_rate": 1.0399010309457457e-05,
|
2721 |
+
"loss": 1.6643,
|
2722 |
+
"step": 383
|
2723 |
+
},
|
2724 |
+
{
|
2725 |
+
"epoch": 0.1530147187009688,
|
2726 |
+
"grad_norm": 0.5157011151313782,
|
2727 |
+
"learning_rate": 1.0082112349974016e-05,
|
2728 |
+
"loss": 1.7036,
|
2729 |
+
"step": 384
|
2730 |
+
},
|
2731 |
+
{
|
2732 |
+
"epoch": 0.15341319453091923,
|
2733 |
+
"grad_norm": 0.46138855814933777,
|
2734 |
+
"learning_rate": 9.76986148063398e-06,
|
2735 |
+
"loss": 1.3899,
|
2736 |
+
"step": 385
|
2737 |
+
},
|
2738 |
+
{
|
2739 |
+
"epoch": 0.15381167036086968,
|
2740 |
+
"grad_norm": 0.48433151841163635,
|
2741 |
+
"learning_rate": 9.462273839026624e-06,
|
2742 |
+
"loss": 1.5909,
|
2743 |
+
"step": 386
|
2744 |
+
},
|
2745 |
+
{
|
2746 |
+
"epoch": 0.1542101461908201,
|
2747 |
+
"grad_norm": 0.5252178907394409,
|
2748 |
+
"learning_rate": 9.159365321738655e-06,
|
2749 |
+
"loss": 1.7776,
|
2750 |
+
"step": 387
|
2751 |
+
},
|
2752 |
+
{
|
2753 |
+
"epoch": 0.15460862202077055,
|
2754 |
+
"grad_norm": 0.5377585291862488,
|
2755 |
+
"learning_rate": 8.861151583532656e-06,
|
2756 |
+
"loss": 1.7147,
|
2757 |
+
"step": 388
|
2758 |
+
},
|
2759 |
+
{
|
2760 |
+
"epoch": 0.155007097850721,
|
2761 |
+
"grad_norm": 0.5604181885719299,
|
2762 |
+
"learning_rate": 8.56764803653809e-06,
|
2763 |
+
"loss": 1.8522,
|
2764 |
+
"step": 389
|
2765 |
+
},
|
2766 |
+
{
|
2767 |
+
"epoch": 0.15540557368067143,
|
2768 |
+
"grad_norm": 0.5364322662353516,
|
2769 |
+
"learning_rate": 8.278869849454718e-06,
|
2770 |
+
"loss": 1.6899,
|
2771 |
+
"step": 390
|
2772 |
+
},
|
2773 |
+
{
|
2774 |
+
"epoch": 0.15580404951062188,
|
2775 |
+
"grad_norm": 0.4611172676086426,
|
2776 |
+
"learning_rate": 7.994831946768622e-06,
|
2777 |
+
"loss": 1.5186,
|
2778 |
+
"step": 391
|
2779 |
+
},
|
2780 |
+
{
|
2781 |
+
"epoch": 0.1562025253405723,
|
2782 |
+
"grad_norm": 0.5083454251289368,
|
2783 |
+
"learning_rate": 7.715549007981027e-06,
|
2784 |
+
"loss": 1.6604,
|
2785 |
+
"step": 392
|
2786 |
+
},
|
2787 |
+
{
|
2788 |
+
"epoch": 0.15660100117052275,
|
2789 |
+
"grad_norm": 0.5075681209564209,
|
2790 |
+
"learning_rate": 7.441035466849489e-06,
|
2791 |
+
"loss": 1.5289,
|
2792 |
+
"step": 393
|
2793 |
+
},
|
2794 |
+
{
|
2795 |
+
"epoch": 0.1569994770004732,
|
2796 |
+
"grad_norm": 0.5153861045837402,
|
2797 |
+
"learning_rate": 7.171305510642023e-06,
|
2798 |
+
"loss": 1.6241,
|
2799 |
+
"step": 394
|
2800 |
+
},
|
2801 |
+
{
|
2802 |
+
"epoch": 0.15739795283042363,
|
2803 |
+
"grad_norm": 0.5271784663200378,
|
2804 |
+
"learning_rate": 6.906373079403849e-06,
|
2805 |
+
"loss": 1.7079,
|
2806 |
+
"step": 395
|
2807 |
+
},
|
2808 |
+
{
|
2809 |
+
"epoch": 0.15779642866037408,
|
2810 |
+
"grad_norm": 0.49165791273117065,
|
2811 |
+
"learning_rate": 6.646251865236997e-06,
|
2812 |
+
"loss": 1.7268,
|
2813 |
+
"step": 396
|
2814 |
+
},
|
2815 |
+
{
|
2816 |
+
"epoch": 0.1581949044903245,
|
2817 |
+
"grad_norm": 0.5324728488922119,
|
2818 |
+
"learning_rate": 6.390955311592617e-06,
|
2819 |
+
"loss": 1.6794,
|
2820 |
+
"step": 397
|
2821 |
+
},
|
2822 |
+
{
|
2823 |
+
"epoch": 0.15859338032027495,
|
2824 |
+
"grad_norm": 0.5144858956336975,
|
2825 |
+
"learning_rate": 6.140496612576241e-06,
|
2826 |
+
"loss": 1.7149,
|
2827 |
+
"step": 398
|
2828 |
+
},
|
2829 |
+
{
|
2830 |
+
"epoch": 0.15899185615022537,
|
2831 |
+
"grad_norm": 0.5114040374755859,
|
2832 |
+
"learning_rate": 5.8948887122658335e-06,
|
2833 |
+
"loss": 1.561,
|
2834 |
+
"step": 399
|
2835 |
+
},
|
2836 |
+
{
|
2837 |
+
"epoch": 0.15939033198017583,
|
2838 |
+
"grad_norm": 0.5673094391822815,
|
2839 |
+
"learning_rate": 5.65414430404293e-06,
|
2840 |
+
"loss": 1.873,
|
2841 |
+
"step": 400
|
2842 |
+
},
|
2843 |
+
{
|
2844 |
+
"epoch": 0.15939033198017583,
|
2845 |
+
"eval_loss": 1.6927062273025513,
|
2846 |
+
"eval_runtime": 603.1583,
|
2847 |
+
"eval_samples_per_second": 14.016,
|
2848 |
+
"eval_steps_per_second": 3.505,
|
2849 |
+
"step": 400
|
2850 |
}
|
2851 |
],
|
2852 |
"logging_steps": 1,
|
|
|
2875 |
"attributes": {}
|
2876 |
}
|
2877 |
},
|
2878 |
+
"total_flos": 1.1184619093229568e+18,
|
2879 |
"train_batch_size": 4,
|
2880 |
"trial_name": null,
|
2881 |
"trial_params": null
|