Training in progress, step 1650, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 131146352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6029a505f307e3098b30acc19cdd7ba452e55709d8c353bc4a3f4f8ba146e277
|
3 |
size 131146352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67210516
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e748695332f398e0372a0342f533eda6dda257cbd0c6ff0c31662fd1d9df830
|
3 |
size 67210516
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2eb5cad9f3cbd36a844058d2bce505f26319b38c69d84d8607ffff4425c91e1
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03bf9dc9befb01615f74b39d2b43ebf93f55dc1a1259dddadf80e9de69443c5a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11207,6 +11207,356 @@
|
|
11207 |
"learning_rate": 9.99999984307167e-05,
|
11208 |
"loss": 3.5878,
|
11209 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11210 |
}
|
11211 |
],
|
11212 |
"logging_steps": 1,
|
@@ -11226,7 +11576,7 @@
|
|
11226 |
"attributes": {}
|
11227 |
}
|
11228 |
},
|
11229 |
-
"total_flos": 2.
|
11230 |
"train_batch_size": 4,
|
11231 |
"trial_name": null,
|
11232 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.04432147845707532,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1650,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11207 |
"learning_rate": 9.99999984307167e-05,
|
11208 |
"loss": 3.5878,
|
11209 |
"step": 1600
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.04300526485441066,
|
11213 |
+
"grad_norm": 1.3168871402740479,
|
11214 |
+
"learning_rate": 9.999999842874834e-05,
|
11215 |
+
"loss": 2.8627,
|
11216 |
+
"step": 1601
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.043032126356505856,
|
11220 |
+
"grad_norm": 1.3844327926635742,
|
11221 |
+
"learning_rate": 9.999999842677874e-05,
|
11222 |
+
"loss": 2.8701,
|
11223 |
+
"step": 1602
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.04305898785860105,
|
11227 |
+
"grad_norm": 1.3635506629943848,
|
11228 |
+
"learning_rate": 9.999999842480789e-05,
|
11229 |
+
"loss": 2.6733,
|
11230 |
+
"step": 1603
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.04308584936069625,
|
11234 |
+
"grad_norm": 1.3588570356369019,
|
11235 |
+
"learning_rate": 9.999999842283582e-05,
|
11236 |
+
"loss": 2.8806,
|
11237 |
+
"step": 1604
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.04311271086279145,
|
11241 |
+
"grad_norm": 1.3737713098526,
|
11242 |
+
"learning_rate": 9.999999842086252e-05,
|
11243 |
+
"loss": 2.9025,
|
11244 |
+
"step": 1605
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.04313957236488664,
|
11248 |
+
"grad_norm": 1.40369713306427,
|
11249 |
+
"learning_rate": 9.999999841888799e-05,
|
11250 |
+
"loss": 2.8484,
|
11251 |
+
"step": 1606
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.043166433866981845,
|
11255 |
+
"grad_norm": 1.2884548902511597,
|
11256 |
+
"learning_rate": 9.99999984169122e-05,
|
11257 |
+
"loss": 2.7603,
|
11258 |
+
"step": 1607
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.04319329536907704,
|
11262 |
+
"grad_norm": 1.351904273033142,
|
11263 |
+
"learning_rate": 9.99999984149352e-05,
|
11264 |
+
"loss": 3.0526,
|
11265 |
+
"step": 1608
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.043220156871172234,
|
11269 |
+
"grad_norm": 1.4556187391281128,
|
11270 |
+
"learning_rate": 9.999999841295697e-05,
|
11271 |
+
"loss": 3.0934,
|
11272 |
+
"step": 1609
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.043247018373267436,
|
11276 |
+
"grad_norm": 1.3308874368667603,
|
11277 |
+
"learning_rate": 9.999999841097748e-05,
|
11278 |
+
"loss": 2.7632,
|
11279 |
+
"step": 1610
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.04327387987536263,
|
11283 |
+
"grad_norm": 1.3838894367218018,
|
11284 |
+
"learning_rate": 9.999999840899678e-05,
|
11285 |
+
"loss": 3.0159,
|
11286 |
+
"step": 1611
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.043300741377457826,
|
11290 |
+
"grad_norm": 1.504431962966919,
|
11291 |
+
"learning_rate": 9.999999840701483e-05,
|
11292 |
+
"loss": 3.0511,
|
11293 |
+
"step": 1612
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.04332760287955303,
|
11297 |
+
"grad_norm": 1.2878671884536743,
|
11298 |
+
"learning_rate": 9.999999840503167e-05,
|
11299 |
+
"loss": 2.8596,
|
11300 |
+
"step": 1613
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.04335446438164822,
|
11304 |
+
"grad_norm": 1.3833988904953003,
|
11305 |
+
"learning_rate": 9.999999840304725e-05,
|
11306 |
+
"loss": 2.7807,
|
11307 |
+
"step": 1614
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.04338132588374342,
|
11311 |
+
"grad_norm": 1.3519421815872192,
|
11312 |
+
"learning_rate": 9.999999840106163e-05,
|
11313 |
+
"loss": 2.7777,
|
11314 |
+
"step": 1615
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.04340818738583862,
|
11318 |
+
"grad_norm": 1.4081404209136963,
|
11319 |
+
"learning_rate": 9.999999839907475e-05,
|
11320 |
+
"loss": 2.7198,
|
11321 |
+
"step": 1616
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.043435048887933814,
|
11325 |
+
"grad_norm": 1.3711954355239868,
|
11326 |
+
"learning_rate": 9.999999839708662e-05,
|
11327 |
+
"loss": 2.717,
|
11328 |
+
"step": 1617
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.04346191039002901,
|
11332 |
+
"grad_norm": 1.3935779333114624,
|
11333 |
+
"learning_rate": 9.999999839509728e-05,
|
11334 |
+
"loss": 3.0255,
|
11335 |
+
"step": 1618
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.04348877189212421,
|
11339 |
+
"grad_norm": 1.4566105604171753,
|
11340 |
+
"learning_rate": 9.99999983931067e-05,
|
11341 |
+
"loss": 3.1218,
|
11342 |
+
"step": 1619
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.043515633394219405,
|
11346 |
+
"grad_norm": 1.4367201328277588,
|
11347 |
+
"learning_rate": 9.99999983911149e-05,
|
11348 |
+
"loss": 3.0931,
|
11349 |
+
"step": 1620
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.0435424948963146,
|
11353 |
+
"grad_norm": 1.390453815460205,
|
11354 |
+
"learning_rate": 9.999999838912187e-05,
|
11355 |
+
"loss": 2.903,
|
11356 |
+
"step": 1621
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.0435693563984098,
|
11360 |
+
"grad_norm": 1.2955039739608765,
|
11361 |
+
"learning_rate": 9.999999838712759e-05,
|
11362 |
+
"loss": 2.9835,
|
11363 |
+
"step": 1622
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.043596217900505,
|
11367 |
+
"grad_norm": 1.5156861543655396,
|
11368 |
+
"learning_rate": 9.999999838513207e-05,
|
11369 |
+
"loss": 3.1388,
|
11370 |
+
"step": 1623
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.04362307940260019,
|
11374 |
+
"grad_norm": 1.4825620651245117,
|
11375 |
+
"learning_rate": 9.999999838313532e-05,
|
11376 |
+
"loss": 3.1583,
|
11377 |
+
"step": 1624
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.04364994090469539,
|
11381 |
+
"grad_norm": 1.369698166847229,
|
11382 |
+
"learning_rate": 9.999999838113734e-05,
|
11383 |
+
"loss": 2.9362,
|
11384 |
+
"step": 1625
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.04367680240679059,
|
11388 |
+
"grad_norm": 1.3391295671463013,
|
11389 |
+
"learning_rate": 9.999999837913813e-05,
|
11390 |
+
"loss": 2.8266,
|
11391 |
+
"step": 1626
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.04370366390888578,
|
11395 |
+
"grad_norm": 1.4127203226089478,
|
11396 |
+
"learning_rate": 9.999999837713768e-05,
|
11397 |
+
"loss": 2.827,
|
11398 |
+
"step": 1627
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.043730525410980985,
|
11402 |
+
"grad_norm": 1.542043685913086,
|
11403 |
+
"learning_rate": 9.999999837513601e-05,
|
11404 |
+
"loss": 3.0887,
|
11405 |
+
"step": 1628
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.04375738691307618,
|
11409 |
+
"grad_norm": 1.513839602470398,
|
11410 |
+
"learning_rate": 9.99999983731331e-05,
|
11411 |
+
"loss": 3.0853,
|
11412 |
+
"step": 1629
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.043784248415171374,
|
11416 |
+
"grad_norm": 1.4729801416397095,
|
11417 |
+
"learning_rate": 9.999999837112895e-05,
|
11418 |
+
"loss": 2.98,
|
11419 |
+
"step": 1630
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.043811109917266576,
|
11423 |
+
"grad_norm": 1.509283185005188,
|
11424 |
+
"learning_rate": 9.999999836912355e-05,
|
11425 |
+
"loss": 3.2404,
|
11426 |
+
"step": 1631
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.04383797141936177,
|
11430 |
+
"grad_norm": 1.543927550315857,
|
11431 |
+
"learning_rate": 9.999999836711694e-05,
|
11432 |
+
"loss": 3.119,
|
11433 |
+
"step": 1632
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.043864832921456966,
|
11437 |
+
"grad_norm": 1.5025025606155396,
|
11438 |
+
"learning_rate": 9.999999836510909e-05,
|
11439 |
+
"loss": 3.1039,
|
11440 |
+
"step": 1633
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.04389169442355217,
|
11444 |
+
"grad_norm": 1.3757987022399902,
|
11445 |
+
"learning_rate": 9.999999836310001e-05,
|
11446 |
+
"loss": 2.8637,
|
11447 |
+
"step": 1634
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.04391855592564736,
|
11451 |
+
"grad_norm": 1.5544795989990234,
|
11452 |
+
"learning_rate": 9.99999983610897e-05,
|
11453 |
+
"loss": 3.0113,
|
11454 |
+
"step": 1635
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.04394541742774256,
|
11458 |
+
"grad_norm": 1.4375749826431274,
|
11459 |
+
"learning_rate": 9.999999835907815e-05,
|
11460 |
+
"loss": 2.9539,
|
11461 |
+
"step": 1636
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.04397227892983776,
|
11465 |
+
"grad_norm": 1.557188868522644,
|
11466 |
+
"learning_rate": 9.999999835706537e-05,
|
11467 |
+
"loss": 3.1977,
|
11468 |
+
"step": 1637
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.043999140431932954,
|
11472 |
+
"grad_norm": 1.6400185823440552,
|
11473 |
+
"learning_rate": 9.999999835505136e-05,
|
11474 |
+
"loss": 3.1995,
|
11475 |
+
"step": 1638
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.04402600193402815,
|
11479 |
+
"grad_norm": 1.4655009508132935,
|
11480 |
+
"learning_rate": 9.99999983530361e-05,
|
11481 |
+
"loss": 2.9985,
|
11482 |
+
"step": 1639
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.04405286343612335,
|
11486 |
+
"grad_norm": 1.4965097904205322,
|
11487 |
+
"learning_rate": 9.999999835101961e-05,
|
11488 |
+
"loss": 3.1393,
|
11489 |
+
"step": 1640
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.044079724938218545,
|
11493 |
+
"grad_norm": 1.5797890424728394,
|
11494 |
+
"learning_rate": 9.999999834900189e-05,
|
11495 |
+
"loss": 3.0014,
|
11496 |
+
"step": 1641
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.04410658644031374,
|
11500 |
+
"grad_norm": 1.567730188369751,
|
11501 |
+
"learning_rate": 9.999999834698295e-05,
|
11502 |
+
"loss": 2.9215,
|
11503 |
+
"step": 1642
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.04413344794240894,
|
11507 |
+
"grad_norm": 1.6687631607055664,
|
11508 |
+
"learning_rate": 9.999999834496276e-05,
|
11509 |
+
"loss": 3.2531,
|
11510 |
+
"step": 1643
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.04416030944450414,
|
11514 |
+
"grad_norm": 1.6272687911987305,
|
11515 |
+
"learning_rate": 9.999999834294133e-05,
|
11516 |
+
"loss": 3.3399,
|
11517 |
+
"step": 1644
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.04418717094659933,
|
11521 |
+
"grad_norm": 1.6043704748153687,
|
11522 |
+
"learning_rate": 9.999999834091869e-05,
|
11523 |
+
"loss": 3.0772,
|
11524 |
+
"step": 1645
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.04421403244869453,
|
11528 |
+
"grad_norm": 1.6957578659057617,
|
11529 |
+
"learning_rate": 9.999999833889479e-05,
|
11530 |
+
"loss": 3.3066,
|
11531 |
+
"step": 1646
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.04424089395078973,
|
11535 |
+
"grad_norm": 1.7268688678741455,
|
11536 |
+
"learning_rate": 9.999999833686968e-05,
|
11537 |
+
"loss": 3.3125,
|
11538 |
+
"step": 1647
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.04426775545288492,
|
11542 |
+
"grad_norm": 1.7406071424484253,
|
11543 |
+
"learning_rate": 9.999999833484333e-05,
|
11544 |
+
"loss": 3.2264,
|
11545 |
+
"step": 1648
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.044294616954980125,
|
11549 |
+
"grad_norm": 1.8623542785644531,
|
11550 |
+
"learning_rate": 9.999999833281574e-05,
|
11551 |
+
"loss": 3.3522,
|
11552 |
+
"step": 1649
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.04432147845707532,
|
11556 |
+
"grad_norm": 1.8407955169677734,
|
11557 |
+
"learning_rate": 9.999999833078691e-05,
|
11558 |
+
"loss": 3.3308,
|
11559 |
+
"step": 1650
|
11560 |
}
|
11561 |
],
|
11562 |
"logging_steps": 1,
|
|
|
11576 |
"attributes": {}
|
11577 |
}
|
11578 |
},
|
11579 |
+
"total_flos": 2.2518960104669184e+18,
|
11580 |
"train_batch_size": 4,
|
11581 |
"trial_name": null,
|
11582 |
"trial_params": null
|