Training in progress, step 650, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 578859568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91ea2e833e395bd896b7338e0b159889cb0a9805a20a0ba81249634cf8be6acb
|
3 |
size 578859568
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 295198386
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8408296ac89ee50ae11d1a24615517bae811a4a08e26902b3f91c3361afa9523
|
3 |
size 295198386
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d04b06ab68f7f17dc4df2206cd558b4bd98d8e29313159b73014f73bdd405dcc
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18275b6327bd0d7d1ad9ae6ef36f205b2f0d81f6499994f6ecb9553362d17a42
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.6910951733589172,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4311,6 +4311,364 @@
|
|
4311 |
"eval_samples_per_second": 11.449,
|
4312 |
"eval_steps_per_second": 5.734,
|
4313 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4314 |
}
|
4315 |
],
|
4316 |
"logging_steps": 1,
|
@@ -4325,7 +4683,7 @@
|
|
4325 |
"early_stopping_threshold": 0.0
|
4326 |
},
|
4327 |
"attributes": {
|
4328 |
-
"early_stopping_patience_counter":
|
4329 |
}
|
4330 |
},
|
4331 |
"TrainerControl": {
|
@@ -4334,12 +4692,12 @@
|
|
4334 |
"should_evaluate": false,
|
4335 |
"should_log": false,
|
4336 |
"should_save": true,
|
4337 |
-
"should_training_stop":
|
4338 |
},
|
4339 |
"attributes": {}
|
4340 |
}
|
4341 |
},
|
4342 |
-
"total_flos":
|
4343 |
"train_batch_size": 2,
|
4344 |
"trial_name": null,
|
4345 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.6910951733589172,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
+
"epoch": 0.018067977289942392,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 650,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4311 |
"eval_samples_per_second": 11.449,
|
4312 |
"eval_steps_per_second": 5.734,
|
4313 |
"step": 600
|
4314 |
+
},
|
4315 |
+
{
|
4316 |
+
"epoch": 0.016705929771162118,
|
4317 |
+
"grad_norm": 0.11464305222034454,
|
4318 |
+
"learning_rate": 0.0007000509087229895,
|
4319 |
+
"loss": 0.9103,
|
4320 |
+
"step": 601
|
4321 |
+
},
|
4322 |
+
{
|
4323 |
+
"epoch": 0.01673372665930049,
|
4324 |
+
"grad_norm": 0.09989949315786362,
|
4325 |
+
"learning_rate": 0.0006970252132582728,
|
4326 |
+
"loss": 0.8176,
|
4327 |
+
"step": 602
|
4328 |
+
},
|
4329 |
+
{
|
4330 |
+
"epoch": 0.016761523547438865,
|
4331 |
+
"grad_norm": 0.09268354624509811,
|
4332 |
+
"learning_rate": 0.0006940025687462952,
|
4333 |
+
"loss": 0.7983,
|
4334 |
+
"step": 603
|
4335 |
+
},
|
4336 |
+
{
|
4337 |
+
"epoch": 0.016789320435577237,
|
4338 |
+
"grad_norm": 0.09152427315711975,
|
4339 |
+
"learning_rate": 0.0006909830056250527,
|
4340 |
+
"loss": 0.7975,
|
4341 |
+
"step": 604
|
4342 |
+
},
|
4343 |
+
{
|
4344 |
+
"epoch": 0.01681711732371561,
|
4345 |
+
"grad_norm": 0.09386585652828217,
|
4346 |
+
"learning_rate": 0.000687966554301513,
|
4347 |
+
"loss": 0.8584,
|
4348 |
+
"step": 605
|
4349 |
+
},
|
4350 |
+
{
|
4351 |
+
"epoch": 0.01684491421185398,
|
4352 |
+
"grad_norm": 0.08477571606636047,
|
4353 |
+
"learning_rate": 0.0006849532451513074,
|
4354 |
+
"loss": 0.7387,
|
4355 |
+
"step": 606
|
4356 |
+
},
|
4357 |
+
{
|
4358 |
+
"epoch": 0.016872711099992357,
|
4359 |
+
"grad_norm": 0.08988666534423828,
|
4360 |
+
"learning_rate": 0.0006819431085184251,
|
4361 |
+
"loss": 0.8264,
|
4362 |
+
"step": 607
|
4363 |
+
},
|
4364 |
+
{
|
4365 |
+
"epoch": 0.01690050798813073,
|
4366 |
+
"grad_norm": 0.09714596718549728,
|
4367 |
+
"learning_rate": 0.0006789361747149092,
|
4368 |
+
"loss": 0.9452,
|
4369 |
+
"step": 608
|
4370 |
+
},
|
4371 |
+
{
|
4372 |
+
"epoch": 0.0169283048762691,
|
4373 |
+
"grad_norm": 0.10461269319057465,
|
4374 |
+
"learning_rate": 0.0006759324740205494,
|
4375 |
+
"loss": 0.7174,
|
4376 |
+
"step": 609
|
4377 |
+
},
|
4378 |
+
{
|
4379 |
+
"epoch": 0.016956101764407476,
|
4380 |
+
"grad_norm": 0.09161835163831711,
|
4381 |
+
"learning_rate": 0.0006729320366825784,
|
4382 |
+
"loss": 0.796,
|
4383 |
+
"step": 610
|
4384 |
+
},
|
4385 |
+
{
|
4386 |
+
"epoch": 0.016983898652545848,
|
4387 |
+
"grad_norm": 0.0949753150343895,
|
4388 |
+
"learning_rate": 0.0006699348929153668,
|
4389 |
+
"loss": 0.975,
|
4390 |
+
"step": 611
|
4391 |
+
},
|
4392 |
+
{
|
4393 |
+
"epoch": 0.01701169554068422,
|
4394 |
+
"grad_norm": 0.09028909355401993,
|
4395 |
+
"learning_rate": 0.0006669410729001193,
|
4396 |
+
"loss": 0.7738,
|
4397 |
+
"step": 612
|
4398 |
+
},
|
4399 |
+
{
|
4400 |
+
"epoch": 0.017039492428822592,
|
4401 |
+
"grad_norm": 0.08454867452383041,
|
4402 |
+
"learning_rate": 0.0006639506067845697,
|
4403 |
+
"loss": 0.7062,
|
4404 |
+
"step": 613
|
4405 |
+
},
|
4406 |
+
{
|
4407 |
+
"epoch": 0.017067289316960967,
|
4408 |
+
"grad_norm": 0.10592840611934662,
|
4409 |
+
"learning_rate": 0.0006609635246826793,
|
4410 |
+
"loss": 0.7745,
|
4411 |
+
"step": 614
|
4412 |
+
},
|
4413 |
+
{
|
4414 |
+
"epoch": 0.01709508620509934,
|
4415 |
+
"grad_norm": 0.09267466515302658,
|
4416 |
+
"learning_rate": 0.0006579798566743314,
|
4417 |
+
"loss": 0.8491,
|
4418 |
+
"step": 615
|
4419 |
+
},
|
4420 |
+
{
|
4421 |
+
"epoch": 0.01712288309323771,
|
4422 |
+
"grad_norm": 0.10221099853515625,
|
4423 |
+
"learning_rate": 0.0006549996328050296,
|
4424 |
+
"loss": 0.9564,
|
4425 |
+
"step": 616
|
4426 |
+
},
|
4427 |
+
{
|
4428 |
+
"epoch": 0.017150679981376087,
|
4429 |
+
"grad_norm": 0.09640829265117645,
|
4430 |
+
"learning_rate": 0.000652022883085595,
|
4431 |
+
"loss": 0.6694,
|
4432 |
+
"step": 617
|
4433 |
+
},
|
4434 |
+
{
|
4435 |
+
"epoch": 0.01717847686951446,
|
4436 |
+
"grad_norm": 0.09555254131555557,
|
4437 |
+
"learning_rate": 0.0006490496374918646,
|
4438 |
+
"loss": 0.7825,
|
4439 |
+
"step": 618
|
4440 |
+
},
|
4441 |
+
{
|
4442 |
+
"epoch": 0.01720627375765283,
|
4443 |
+
"grad_norm": 0.1080060750246048,
|
4444 |
+
"learning_rate": 0.0006460799259643883,
|
4445 |
+
"loss": 0.8122,
|
4446 |
+
"step": 619
|
4447 |
+
},
|
4448 |
+
{
|
4449 |
+
"epoch": 0.017234070645791202,
|
4450 |
+
"grad_norm": 0.09308885037899017,
|
4451 |
+
"learning_rate": 0.0006431137784081283,
|
4452 |
+
"loss": 0.7393,
|
4453 |
+
"step": 620
|
4454 |
+
},
|
4455 |
+
{
|
4456 |
+
"epoch": 0.017261867533929578,
|
4457 |
+
"grad_norm": 0.10485529899597168,
|
4458 |
+
"learning_rate": 0.0006401512246921576,
|
4459 |
+
"loss": 0.7577,
|
4460 |
+
"step": 621
|
4461 |
+
},
|
4462 |
+
{
|
4463 |
+
"epoch": 0.01728966442206795,
|
4464 |
+
"grad_norm": 0.10300412029027939,
|
4465 |
+
"learning_rate": 0.0006371922946493591,
|
4466 |
+
"loss": 0.7016,
|
4467 |
+
"step": 622
|
4468 |
+
},
|
4469 |
+
{
|
4470 |
+
"epoch": 0.01731746131020632,
|
4471 |
+
"grad_norm": 0.09915035963058472,
|
4472 |
+
"learning_rate": 0.0006342370180761255,
|
4473 |
+
"loss": 0.7562,
|
4474 |
+
"step": 623
|
4475 |
+
},
|
4476 |
+
{
|
4477 |
+
"epoch": 0.017345258198344694,
|
4478 |
+
"grad_norm": 0.11094118654727936,
|
4479 |
+
"learning_rate": 0.0006312854247320594,
|
4480 |
+
"loss": 0.7113,
|
4481 |
+
"step": 624
|
4482 |
+
},
|
4483 |
+
{
|
4484 |
+
"epoch": 0.01737305508648307,
|
4485 |
+
"grad_norm": 0.09752795100212097,
|
4486 |
+
"learning_rate": 0.0006283375443396726,
|
4487 |
+
"loss": 0.7649,
|
4488 |
+
"step": 625
|
4489 |
+
},
|
4490 |
+
{
|
4491 |
+
"epoch": 0.01740085197462144,
|
4492 |
+
"grad_norm": 0.10030993074178696,
|
4493 |
+
"learning_rate": 0.0006253934065840879,
|
4494 |
+
"loss": 0.7446,
|
4495 |
+
"step": 626
|
4496 |
+
},
|
4497 |
+
{
|
4498 |
+
"epoch": 0.017428648862759813,
|
4499 |
+
"grad_norm": 0.1134578287601471,
|
4500 |
+
"learning_rate": 0.0006224530411127403,
|
4501 |
+
"loss": 0.8147,
|
4502 |
+
"step": 627
|
4503 |
+
},
|
4504 |
+
{
|
4505 |
+
"epoch": 0.01745644575089819,
|
4506 |
+
"grad_norm": 0.09963490813970566,
|
4507 |
+
"learning_rate": 0.000619516477535077,
|
4508 |
+
"loss": 0.6904,
|
4509 |
+
"step": 628
|
4510 |
+
},
|
4511 |
+
{
|
4512 |
+
"epoch": 0.01748424263903656,
|
4513 |
+
"grad_norm": 0.10086818039417267,
|
4514 |
+
"learning_rate": 0.0006165837454222607,
|
4515 |
+
"loss": 0.5791,
|
4516 |
+
"step": 629
|
4517 |
+
},
|
4518 |
+
{
|
4519 |
+
"epoch": 0.017512039527174932,
|
4520 |
+
"grad_norm": 0.11571143567562103,
|
4521 |
+
"learning_rate": 0.0006136548743068713,
|
4522 |
+
"loss": 0.7572,
|
4523 |
+
"step": 630
|
4524 |
+
},
|
4525 |
+
{
|
4526 |
+
"epoch": 0.017539836415313304,
|
4527 |
+
"grad_norm": 0.10508367419242859,
|
4528 |
+
"learning_rate": 0.0006107298936826086,
|
4529 |
+
"loss": 0.5869,
|
4530 |
+
"step": 631
|
4531 |
+
},
|
4532 |
+
{
|
4533 |
+
"epoch": 0.01756763330345168,
|
4534 |
+
"grad_norm": 0.1044749990105629,
|
4535 |
+
"learning_rate": 0.0006078088330039945,
|
4536 |
+
"loss": 0.595,
|
4537 |
+
"step": 632
|
4538 |
+
},
|
4539 |
+
{
|
4540 |
+
"epoch": 0.01759543019159005,
|
4541 |
+
"grad_norm": 0.1138482466340065,
|
4542 |
+
"learning_rate": 0.0006048917216860781,
|
4543 |
+
"loss": 0.668,
|
4544 |
+
"step": 633
|
4545 |
+
},
|
4546 |
+
{
|
4547 |
+
"epoch": 0.017623227079728424,
|
4548 |
+
"grad_norm": 0.10499613732099533,
|
4549 |
+
"learning_rate": 0.0006019785891041381,
|
4550 |
+
"loss": 0.6028,
|
4551 |
+
"step": 634
|
4552 |
+
},
|
4553 |
+
{
|
4554 |
+
"epoch": 0.0176510239678668,
|
4555 |
+
"grad_norm": 0.10078407824039459,
|
4556 |
+
"learning_rate": 0.0005990694645933865,
|
4557 |
+
"loss": 0.5796,
|
4558 |
+
"step": 635
|
4559 |
+
},
|
4560 |
+
{
|
4561 |
+
"epoch": 0.01767882085600517,
|
4562 |
+
"grad_norm": 0.09239528328180313,
|
4563 |
+
"learning_rate": 0.0005961643774486753,
|
4564 |
+
"loss": 0.5735,
|
4565 |
+
"step": 636
|
4566 |
+
},
|
4567 |
+
{
|
4568 |
+
"epoch": 0.017706617744143543,
|
4569 |
+
"grad_norm": 0.09768297523260117,
|
4570 |
+
"learning_rate": 0.0005932633569242,
|
4571 |
+
"loss": 0.5082,
|
4572 |
+
"step": 637
|
4573 |
+
},
|
4574 |
+
{
|
4575 |
+
"epoch": 0.017734414632281915,
|
4576 |
+
"grad_norm": 0.10613156110048294,
|
4577 |
+
"learning_rate": 0.0005903664322332048,
|
4578 |
+
"loss": 0.5554,
|
4579 |
+
"step": 638
|
4580 |
+
},
|
4581 |
+
{
|
4582 |
+
"epoch": 0.01776221152042029,
|
4583 |
+
"grad_norm": 0.10876414179801941,
|
4584 |
+
"learning_rate": 0.000587473632547689,
|
4585 |
+
"loss": 0.6091,
|
4586 |
+
"step": 639
|
4587 |
+
},
|
4588 |
+
{
|
4589 |
+
"epoch": 0.017790008408558662,
|
4590 |
+
"grad_norm": 0.10759898275136948,
|
4591 |
+
"learning_rate": 0.0005845849869981136,
|
4592 |
+
"loss": 0.5748,
|
4593 |
+
"step": 640
|
4594 |
+
},
|
4595 |
+
{
|
4596 |
+
"epoch": 0.017817805296697034,
|
4597 |
+
"grad_norm": 0.12154053151607513,
|
4598 |
+
"learning_rate": 0.0005817005246731073,
|
4599 |
+
"loss": 0.6063,
|
4600 |
+
"step": 641
|
4601 |
+
},
|
4602 |
+
{
|
4603 |
+
"epoch": 0.017845602184835406,
|
4604 |
+
"grad_norm": 0.11394521594047546,
|
4605 |
+
"learning_rate": 0.0005788202746191734,
|
4606 |
+
"loss": 0.6124,
|
4607 |
+
"step": 642
|
4608 |
+
},
|
4609 |
+
{
|
4610 |
+
"epoch": 0.01787339907297378,
|
4611 |
+
"grad_norm": 0.09602084010839462,
|
4612 |
+
"learning_rate": 0.0005759442658403985,
|
4613 |
+
"loss": 0.4391,
|
4614 |
+
"step": 643
|
4615 |
+
},
|
4616 |
+
{
|
4617 |
+
"epoch": 0.017901195961112153,
|
4618 |
+
"grad_norm": 0.12600000202655792,
|
4619 |
+
"learning_rate": 0.0005730725272981583,
|
4620 |
+
"loss": 0.6201,
|
4621 |
+
"step": 644
|
4622 |
+
},
|
4623 |
+
{
|
4624 |
+
"epoch": 0.017928992849250525,
|
4625 |
+
"grad_norm": 0.1129770576953888,
|
4626 |
+
"learning_rate": 0.0005702050879108284,
|
4627 |
+
"loss": 0.4814,
|
4628 |
+
"step": 645
|
4629 |
+
},
|
4630 |
+
{
|
4631 |
+
"epoch": 0.0179567897373889,
|
4632 |
+
"grad_norm": 0.121727854013443,
|
4633 |
+
"learning_rate": 0.0005673419765534915,
|
4634 |
+
"loss": 0.5071,
|
4635 |
+
"step": 646
|
4636 |
+
},
|
4637 |
+
{
|
4638 |
+
"epoch": 0.017984586625527273,
|
4639 |
+
"grad_norm": 0.11814267188310623,
|
4640 |
+
"learning_rate": 0.0005644832220576479,
|
4641 |
+
"loss": 0.5387,
|
4642 |
+
"step": 647
|
4643 |
+
},
|
4644 |
+
{
|
4645 |
+
"epoch": 0.018012383513665645,
|
4646 |
+
"grad_norm": 0.14177252352237701,
|
4647 |
+
"learning_rate": 0.0005616288532109225,
|
4648 |
+
"loss": 0.6006,
|
4649 |
+
"step": 648
|
4650 |
+
},
|
4651 |
+
{
|
4652 |
+
"epoch": 0.018040180401804017,
|
4653 |
+
"grad_norm": 0.17021676898002625,
|
4654 |
+
"learning_rate": 0.0005587788987567784,
|
4655 |
+
"loss": 0.5445,
|
4656 |
+
"step": 649
|
4657 |
+
},
|
4658 |
+
{
|
4659 |
+
"epoch": 0.018067977289942392,
|
4660 |
+
"grad_norm": 0.17510192096233368,
|
4661 |
+
"learning_rate": 0.0005559333873942258,
|
4662 |
+
"loss": 0.5694,
|
4663 |
+
"step": 650
|
4664 |
+
},
|
4665 |
+
{
|
4666 |
+
"epoch": 0.018067977289942392,
|
4667 |
+
"eval_loss": 0.7097320556640625,
|
4668 |
+
"eval_runtime": 50.365,
|
4669 |
+
"eval_samples_per_second": 11.456,
|
4670 |
+
"eval_steps_per_second": 5.738,
|
4671 |
+
"step": 650
|
4672 |
}
|
4673 |
],
|
4674 |
"logging_steps": 1,
|
|
|
4683 |
"early_stopping_threshold": 0.0
|
4684 |
},
|
4685 |
"attributes": {
|
4686 |
+
"early_stopping_patience_counter": 3
|
4687 |
}
|
4688 |
},
|
4689 |
"TrainerControl": {
|
|
|
4692 |
"should_evaluate": false,
|
4693 |
"should_log": false,
|
4694 |
"should_save": true,
|
4695 |
+
"should_training_stop": true
|
4696 |
},
|
4697 |
"attributes": {}
|
4698 |
}
|
4699 |
},
|
4700 |
+
"total_flos": 4.150175529644851e+16,
|
4701 |
"train_batch_size": 2,
|
4702 |
"trial_name": null,
|
4703 |
"trial_params": null
|