Training in progress, step 100, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 578859568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:330ebf2fc78d7f6f8cb0c815f67bcae506da819a7aff7e37748efa931b16d910
|
3 |
size 578859568
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 294324372
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b00ebe9b91258a190a07ebaabacfcadcdf9d0e664ff19ed32400b152a5e4f7c
|
3 |
size 294324372
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:632841951b590cd1045c6936a248754bca8a52877d07e548317e1fb181970885
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9f081c44b322e29d9c3ab667f1f86bff894c62055bb3540dff4b22ee74b02c5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.9230208396911621,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-50",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -373,6 +373,364 @@
|
|
373 |
"eval_samples_per_second": 11.549,
|
374 |
"eval_steps_per_second": 5.784,
|
375 |
"step": 50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
}
|
377 |
],
|
378 |
"logging_steps": 1,
|
@@ -387,7 +745,7 @@
|
|
387 |
"early_stopping_threshold": 0.0
|
388 |
},
|
389 |
"attributes": {
|
390 |
-
"early_stopping_patience_counter":
|
391 |
}
|
392 |
},
|
393 |
"TrainerControl": {
|
@@ -401,7 +759,7 @@
|
|
401 |
"attributes": {}
|
402 |
}
|
403 |
},
|
404 |
-
"total_flos":
|
405 |
"train_batch_size": 2,
|
406 |
"trial_name": null,
|
407 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.9230208396911621,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-50",
|
4 |
+
"epoch": 0.0027796888138372907,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
373 |
"eval_samples_per_second": 11.549,
|
374 |
"eval_steps_per_second": 5.784,
|
375 |
"step": 50
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"epoch": 0.0014176412950570184,
|
379 |
+
"grad_norm": 0.1536542773246765,
|
380 |
+
"learning_rate": 0.001965648708885559,
|
381 |
+
"loss": 1.0933,
|
382 |
+
"step": 51
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"epoch": 0.0014454381831953912,
|
386 |
+
"grad_norm": 0.1058596670627594,
|
387 |
+
"learning_rate": 0.001963962860695853,
|
388 |
+
"loss": 1.0704,
|
389 |
+
"step": 52
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"epoch": 0.0014732350713337642,
|
393 |
+
"grad_norm": 0.08224259316921234,
|
394 |
+
"learning_rate": 0.001962237387768529,
|
395 |
+
"loss": 1.1229,
|
396 |
+
"step": 53
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"epoch": 0.001501031959472137,
|
400 |
+
"grad_norm": 0.08697132021188736,
|
401 |
+
"learning_rate": 0.0019604723610310193,
|
402 |
+
"loss": 0.9118,
|
403 |
+
"step": 54
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"epoch": 0.00152882884761051,
|
407 |
+
"grad_norm": 0.08103405684232712,
|
408 |
+
"learning_rate": 0.0019586678530366607,
|
409 |
+
"loss": 0.946,
|
410 |
+
"step": 55
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"epoch": 0.0015566257357488829,
|
414 |
+
"grad_norm": 0.07975370436906815,
|
415 |
+
"learning_rate": 0.0019568239379617086,
|
416 |
+
"loss": 0.9038,
|
417 |
+
"step": 56
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"epoch": 0.001584422623887256,
|
421 |
+
"grad_norm": 0.07595320045948029,
|
422 |
+
"learning_rate": 0.0019549406916022907,
|
423 |
+
"loss": 0.8122,
|
424 |
+
"step": 57
|
425 |
+
},
|
426 |
+
{
|
427 |
+
"epoch": 0.0016122195120256287,
|
428 |
+
"grad_norm": 0.08768190443515778,
|
429 |
+
"learning_rate": 0.0019530181913712872,
|
430 |
+
"loss": 0.8606,
|
431 |
+
"step": 58
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"epoch": 0.0016400164001640015,
|
435 |
+
"grad_norm": 0.09886912256479263,
|
436 |
+
"learning_rate": 0.0019510565162951536,
|
437 |
+
"loss": 0.8902,
|
438 |
+
"step": 59
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"epoch": 0.0016678132883023746,
|
442 |
+
"grad_norm": 0.08801861107349396,
|
443 |
+
"learning_rate": 0.0019490557470106687,
|
444 |
+
"loss": 0.9151,
|
445 |
+
"step": 60
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"epoch": 0.0016956101764407474,
|
449 |
+
"grad_norm": 0.08995132893323898,
|
450 |
+
"learning_rate": 0.0019470159657616214,
|
451 |
+
"loss": 0.7793,
|
452 |
+
"step": 61
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"epoch": 0.0017234070645791204,
|
456 |
+
"grad_norm": 0.08425740152597427,
|
457 |
+
"learning_rate": 0.0019449372563954293,
|
458 |
+
"loss": 0.7646,
|
459 |
+
"step": 62
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 0.0017512039527174932,
|
463 |
+
"grad_norm": 0.11669428646564484,
|
464 |
+
"learning_rate": 0.001942819704359693,
|
465 |
+
"loss": 0.9056,
|
466 |
+
"step": 63
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 0.0017790008408558663,
|
470 |
+
"grad_norm": 0.08303668349981308,
|
471 |
+
"learning_rate": 0.0019406633966986826,
|
472 |
+
"loss": 0.7583,
|
473 |
+
"step": 64
|
474 |
+
},
|
475 |
+
{
|
476 |
+
"epoch": 0.001806797728994239,
|
477 |
+
"grad_norm": 0.08263064175844193,
|
478 |
+
"learning_rate": 0.0019384684220497604,
|
479 |
+
"loss": 0.9233,
|
480 |
+
"step": 65
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"epoch": 0.001834594617132612,
|
484 |
+
"grad_norm": 0.08262008428573608,
|
485 |
+
"learning_rate": 0.0019362348706397372,
|
486 |
+
"loss": 0.8359,
|
487 |
+
"step": 66
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"epoch": 0.001862391505270985,
|
491 |
+
"grad_norm": 0.10420376062393188,
|
492 |
+
"learning_rate": 0.0019339628342811633,
|
493 |
+
"loss": 0.9978,
|
494 |
+
"step": 67
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"epoch": 0.0018901883934093577,
|
498 |
+
"grad_norm": 0.0830477699637413,
|
499 |
+
"learning_rate": 0.001931652406368554,
|
500 |
+
"loss": 0.8834,
|
501 |
+
"step": 68
|
502 |
+
},
|
503 |
+
{
|
504 |
+
"epoch": 0.0019179852815477307,
|
505 |
+
"grad_norm": 0.08504804968833923,
|
506 |
+
"learning_rate": 0.0019293036818745519,
|
507 |
+
"loss": 0.9164,
|
508 |
+
"step": 69
|
509 |
+
},
|
510 |
+
{
|
511 |
+
"epoch": 0.0019457821696861036,
|
512 |
+
"grad_norm": 0.08910652250051498,
|
513 |
+
"learning_rate": 0.0019269167573460217,
|
514 |
+
"loss": 0.9095,
|
515 |
+
"step": 70
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"epoch": 0.0019735790578244766,
|
519 |
+
"grad_norm": 0.09257230162620544,
|
520 |
+
"learning_rate": 0.0019244917309000815,
|
521 |
+
"loss": 0.7138,
|
522 |
+
"step": 71
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"epoch": 0.0020013759459628494,
|
526 |
+
"grad_norm": 0.09553885459899902,
|
527 |
+
"learning_rate": 0.0019220287022200706,
|
528 |
+
"loss": 0.9544,
|
529 |
+
"step": 72
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"epoch": 0.002029172834101222,
|
533 |
+
"grad_norm": 0.08890817314386368,
|
534 |
+
"learning_rate": 0.0019195277725514508,
|
535 |
+
"loss": 0.7013,
|
536 |
+
"step": 73
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"epoch": 0.0020569697222395955,
|
540 |
+
"grad_norm": 0.10616549849510193,
|
541 |
+
"learning_rate": 0.0019169890446976451,
|
542 |
+
"loss": 0.7119,
|
543 |
+
"step": 74
|
544 |
+
},
|
545 |
+
{
|
546 |
+
"epoch": 0.0020847666103779683,
|
547 |
+
"grad_norm": 0.09758912026882172,
|
548 |
+
"learning_rate": 0.0019144126230158124,
|
549 |
+
"loss": 0.811,
|
550 |
+
"step": 75
|
551 |
+
},
|
552 |
+
{
|
553 |
+
"epoch": 0.002112563498516341,
|
554 |
+
"grad_norm": 0.09248580783605576,
|
555 |
+
"learning_rate": 0.001911798613412557,
|
556 |
+
"loss": 0.8025,
|
557 |
+
"step": 76
|
558 |
+
},
|
559 |
+
{
|
560 |
+
"epoch": 0.002140360386654714,
|
561 |
+
"grad_norm": 0.09431200474500656,
|
562 |
+
"learning_rate": 0.001909147123339575,
|
563 |
+
"loss": 0.7038,
|
564 |
+
"step": 77
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 0.0021681572747930867,
|
568 |
+
"grad_norm": 0.09258091449737549,
|
569 |
+
"learning_rate": 0.001906458261789238,
|
570 |
+
"loss": 0.7752,
|
571 |
+
"step": 78
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"epoch": 0.00219595416293146,
|
575 |
+
"grad_norm": 0.08860747516155243,
|
576 |
+
"learning_rate": 0.0019037321392901135,
|
577 |
+
"loss": 0.6832,
|
578 |
+
"step": 79
|
579 |
+
},
|
580 |
+
{
|
581 |
+
"epoch": 0.0022237510510698328,
|
582 |
+
"grad_norm": 0.10791260004043579,
|
583 |
+
"learning_rate": 0.001900968867902419,
|
584 |
+
"loss": 0.7183,
|
585 |
+
"step": 80
|
586 |
+
},
|
587 |
+
{
|
588 |
+
"epoch": 0.0022515479392082056,
|
589 |
+
"grad_norm": 0.0878261998295784,
|
590 |
+
"learning_rate": 0.001898168561213419,
|
591 |
+
"loss": 0.6677,
|
592 |
+
"step": 81
|
593 |
+
},
|
594 |
+
{
|
595 |
+
"epoch": 0.0022793448273465784,
|
596 |
+
"grad_norm": 0.10915020108222961,
|
597 |
+
"learning_rate": 0.0018953313343327532,
|
598 |
+
"loss": 0.8602,
|
599 |
+
"step": 82
|
600 |
+
},
|
601 |
+
{
|
602 |
+
"epoch": 0.0023071417154849516,
|
603 |
+
"grad_norm": 0.10625939816236496,
|
604 |
+
"learning_rate": 0.001892457303887706,
|
605 |
+
"loss": 0.8385,
|
606 |
+
"step": 83
|
607 |
+
},
|
608 |
+
{
|
609 |
+
"epoch": 0.0023349386036233244,
|
610 |
+
"grad_norm": 0.10215223580598831,
|
611 |
+
"learning_rate": 0.001889546588018412,
|
612 |
+
"loss": 0.7723,
|
613 |
+
"step": 84
|
614 |
+
},
|
615 |
+
{
|
616 |
+
"epoch": 0.0023627354917616973,
|
617 |
+
"grad_norm": 0.08778225630521774,
|
618 |
+
"learning_rate": 0.0018865993063730002,
|
619 |
+
"loss": 0.6503,
|
620 |
+
"step": 85
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"epoch": 0.00239053237990007,
|
624 |
+
"grad_norm": 0.10662350058555603,
|
625 |
+
"learning_rate": 0.0018836155801026753,
|
626 |
+
"loss": 0.6592,
|
627 |
+
"step": 86
|
628 |
+
},
|
629 |
+
{
|
630 |
+
"epoch": 0.002418329268038443,
|
631 |
+
"grad_norm": 0.10347293317317963,
|
632 |
+
"learning_rate": 0.001880595531856738,
|
633 |
+
"loss": 0.602,
|
634 |
+
"step": 87
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 0.002446126156176816,
|
638 |
+
"grad_norm": 0.11098446696996689,
|
639 |
+
"learning_rate": 0.001877539285777543,
|
640 |
+
"loss": 0.7291,
|
641 |
+
"step": 88
|
642 |
+
},
|
643 |
+
{
|
644 |
+
"epoch": 0.002473923044315189,
|
645 |
+
"grad_norm": 0.10774262994527817,
|
646 |
+
"learning_rate": 0.0018744469674953957,
|
647 |
+
"loss": 0.6501,
|
648 |
+
"step": 89
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"epoch": 0.0025017199324535618,
|
652 |
+
"grad_norm": 0.10596223175525665,
|
653 |
+
"learning_rate": 0.0018713187041233894,
|
654 |
+
"loss": 0.7274,
|
655 |
+
"step": 90
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"epoch": 0.0025295168205919346,
|
659 |
+
"grad_norm": 0.11689383536577225,
|
660 |
+
"learning_rate": 0.0018681546242521785,
|
661 |
+
"loss": 0.6693,
|
662 |
+
"step": 91
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 0.002557313708730308,
|
666 |
+
"grad_norm": 0.11212435364723206,
|
667 |
+
"learning_rate": 0.0018649548579446936,
|
668 |
+
"loss": 0.6218,
|
669 |
+
"step": 92
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 0.0025851105968686806,
|
673 |
+
"grad_norm": 0.13619789481163025,
|
674 |
+
"learning_rate": 0.0018617195367307952,
|
675 |
+
"loss": 0.5839,
|
676 |
+
"step": 93
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 0.0026129074850070534,
|
680 |
+
"grad_norm": 0.18084552884101868,
|
681 |
+
"learning_rate": 0.001858448793601866,
|
682 |
+
"loss": 0.6083,
|
683 |
+
"step": 94
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 0.0026407043731454262,
|
687 |
+
"grad_norm": 0.14780890941619873,
|
688 |
+
"learning_rate": 0.0018551427630053464,
|
689 |
+
"loss": 0.6095,
|
690 |
+
"step": 95
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 0.0026685012612837995,
|
694 |
+
"grad_norm": 0.12189039587974548,
|
695 |
+
"learning_rate": 0.0018518015808392043,
|
696 |
+
"loss": 0.6313,
|
697 |
+
"step": 96
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"epoch": 0.0026962981494221723,
|
701 |
+
"grad_norm": 0.2006332129240036,
|
702 |
+
"learning_rate": 0.0018484253844463525,
|
703 |
+
"loss": 0.6528,
|
704 |
+
"step": 97
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 0.002724095037560545,
|
708 |
+
"grad_norm": 0.19439570605754852,
|
709 |
+
"learning_rate": 0.0018450143126090013,
|
710 |
+
"loss": 0.655,
|
711 |
+
"step": 98
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"epoch": 0.002751891925698918,
|
715 |
+
"grad_norm": 0.23627929389476776,
|
716 |
+
"learning_rate": 0.0018415685055429532,
|
717 |
+
"loss": 0.6663,
|
718 |
+
"step": 99
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"epoch": 0.0027796888138372907,
|
722 |
+
"grad_norm": 0.22060082852840424,
|
723 |
+
"learning_rate": 0.0018380881048918405,
|
724 |
+
"loss": 0.6549,
|
725 |
+
"step": 100
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 0.0027796888138372907,
|
729 |
+
"eval_loss": 1.0665974617004395,
|
730 |
+
"eval_runtime": 49.9362,
|
731 |
+
"eval_samples_per_second": 11.555,
|
732 |
+
"eval_steps_per_second": 5.787,
|
733 |
+
"step": 100
|
734 |
}
|
735 |
],
|
736 |
"logging_steps": 1,
|
|
|
745 |
"early_stopping_threshold": 0.0
|
746 |
},
|
747 |
"attributes": {
|
748 |
+
"early_stopping_patience_counter": 1
|
749 |
}
|
750 |
},
|
751 |
"TrainerControl": {
|
|
|
759 |
"attributes": {}
|
760 |
}
|
761 |
},
|
762 |
+
"total_flos": 6589805739638784.0,
|
763 |
"train_batch_size": 2,
|
764 |
"trial_name": null,
|
765 |
"trial_params": null
|