lesso commited on
Commit
a6a7240
·
verified ·
1 Parent(s): 0da769c

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70c6749d5bfc06ff675c0de3d06c8bab76723ac84b1f320cecb4f8f7bf8572f8
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1789ce8598d9b50d553bb646bd4f53e47386d447f48349eee9e044df045a0624
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76330a93416b82eab93a3789c01979b5583ed9c64366a88a2cd35c1c9f213407
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6289bd9d5bbda42e10ebca40c62d43059c94447b0c5169cab9e08b829239d1c9
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e00c53a47bbf43600a8eeae59305df9f053c38eadca365d27e37e9e27824759f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c46c26285cc2d316f6194e372dcaec23fa60292decd0c55f7d25677f2dfd35
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3082d89d14e55a69c6ec6c2ca1c3df65b8415cc327c81db7a6363e26d4561be
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7da77f823b3b7b98780ed896403aab79fd218aa0d6b8705e9fef6de28201601
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.4506478309631348,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.023798191337458353,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -373,6 +373,364 @@
373
  "eval_samples_per_second": 9.393,
374
  "eval_steps_per_second": 2.356,
375
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 1,
@@ -401,7 +759,7 @@
401
  "attributes": {}
402
  }
403
  },
404
- "total_flos": 1.78874952450048e+16,
405
  "train_batch_size": 4,
406
  "trial_name": null,
407
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.34207284450531,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.047596382674916705,
5
  "eval_steps": 50,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
373
  "eval_samples_per_second": 9.393,
374
  "eval_steps_per_second": 2.356,
375
  "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.02427415516420752,
379
+ "grad_norm": 8.132503509521484,
380
+ "learning_rate": 7.975421052631579e-05,
381
+ "loss": 2.8369,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 0.024750118990956686,
386
+ "grad_norm": 7.467820644378662,
387
+ "learning_rate": 7.921894736842106e-05,
388
+ "loss": 3.1423,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 0.025226082817705855,
393
+ "grad_norm": 6.4784369468688965,
394
+ "learning_rate": 7.868368421052632e-05,
395
+ "loss": 2.7428,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 0.02570204664445502,
400
+ "grad_norm": 6.803327560424805,
401
+ "learning_rate": 7.814842105263157e-05,
402
+ "loss": 2.8238,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 0.02617801047120419,
407
+ "grad_norm": 9.669547080993652,
408
+ "learning_rate": 7.761315789473685e-05,
409
+ "loss": 2.7933,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 0.026653974297953357,
414
+ "grad_norm": 9.685455322265625,
415
+ "learning_rate": 7.70778947368421e-05,
416
+ "loss": 3.0942,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 0.027129938124702522,
421
+ "grad_norm": 8.498844146728516,
422
+ "learning_rate": 7.654263157894737e-05,
423
+ "loss": 3.0961,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 0.02760590195145169,
428
+ "grad_norm": 7.0263776779174805,
429
+ "learning_rate": 7.600736842105264e-05,
430
+ "loss": 2.9287,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 0.028081865778200855,
435
+ "grad_norm": 7.7478742599487305,
436
+ "learning_rate": 7.54721052631579e-05,
437
+ "loss": 2.7414,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 0.028557829604950024,
442
+ "grad_norm": 6.691257953643799,
443
+ "learning_rate": 7.493684210526315e-05,
444
+ "loss": 2.6123,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 0.029033793431699192,
449
+ "grad_norm": 7.181465148925781,
450
+ "learning_rate": 7.440157894736843e-05,
451
+ "loss": 2.6851,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 0.029509757258448358,
456
+ "grad_norm": 6.531435012817383,
457
+ "learning_rate": 7.386631578947369e-05,
458
+ "loss": 2.6416,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 0.029985721085197526,
463
+ "grad_norm": 9.599217414855957,
464
+ "learning_rate": 7.333105263157895e-05,
465
+ "loss": 2.7074,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 0.03046168491194669,
470
+ "grad_norm": 7.4359846115112305,
471
+ "learning_rate": 7.279578947368422e-05,
472
+ "loss": 2.8352,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 0.03093764873869586,
477
+ "grad_norm": 6.91318941116333,
478
+ "learning_rate": 7.226052631578947e-05,
479
+ "loss": 2.5786,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 0.031413612565445025,
484
+ "grad_norm": 8.484053611755371,
485
+ "learning_rate": 7.172526315789474e-05,
486
+ "loss": 3.0093,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 0.0318895763921942,
491
+ "grad_norm": 7.760731220245361,
492
+ "learning_rate": 7.119e-05,
493
+ "loss": 2.8831,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 0.03236554021894336,
498
+ "grad_norm": 7.7634100914001465,
499
+ "learning_rate": 7.065473684210527e-05,
500
+ "loss": 2.8134,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 0.03284150404569253,
505
+ "grad_norm": 15.038714408874512,
506
+ "learning_rate": 7.011947368421053e-05,
507
+ "loss": 2.446,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 0.03331746787244169,
512
+ "grad_norm": 6.84593391418457,
513
+ "learning_rate": 6.95842105263158e-05,
514
+ "loss": 2.8314,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 0.033793431699190864,
519
+ "grad_norm": 6.8339667320251465,
520
+ "learning_rate": 6.904894736842105e-05,
521
+ "loss": 2.507,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 0.03426939552594003,
526
+ "grad_norm": 8.750052452087402,
527
+ "learning_rate": 6.851368421052632e-05,
528
+ "loss": 2.637,
529
+ "step": 72
530
+ },
531
+ {
532
+ "epoch": 0.034745359352689194,
533
+ "grad_norm": 7.736267566680908,
534
+ "learning_rate": 6.797842105263158e-05,
535
+ "loss": 2.748,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 0.035221323179438366,
540
+ "grad_norm": 7.89774227142334,
541
+ "learning_rate": 6.744315789473685e-05,
542
+ "loss": 2.7948,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 0.03569728700618753,
547
+ "grad_norm": 7.224119663238525,
548
+ "learning_rate": 6.690789473684211e-05,
549
+ "loss": 2.7275,
550
+ "step": 75
551
+ },
552
+ {
553
+ "epoch": 0.036173250832936696,
554
+ "grad_norm": 7.180510520935059,
555
+ "learning_rate": 6.637263157894738e-05,
556
+ "loss": 2.5776,
557
+ "step": 76
558
+ },
559
+ {
560
+ "epoch": 0.03664921465968586,
561
+ "grad_norm": 6.440933704376221,
562
+ "learning_rate": 6.583736842105263e-05,
563
+ "loss": 2.241,
564
+ "step": 77
565
+ },
566
+ {
567
+ "epoch": 0.03712517848643503,
568
+ "grad_norm": 8.913047790527344,
569
+ "learning_rate": 6.53021052631579e-05,
570
+ "loss": 2.9477,
571
+ "step": 78
572
+ },
573
+ {
574
+ "epoch": 0.0376011423131842,
575
+ "grad_norm": 7.736593723297119,
576
+ "learning_rate": 6.476684210526316e-05,
577
+ "loss": 2.8696,
578
+ "step": 79
579
+ },
580
+ {
581
+ "epoch": 0.03807710613993336,
582
+ "grad_norm": 7.847418308258057,
583
+ "learning_rate": 6.423157894736841e-05,
584
+ "loss": 2.3813,
585
+ "step": 80
586
+ },
587
+ {
588
+ "epoch": 0.038553069966682535,
589
+ "grad_norm": 9.366430282592773,
590
+ "learning_rate": 6.369631578947368e-05,
591
+ "loss": 2.8983,
592
+ "step": 81
593
+ },
594
+ {
595
+ "epoch": 0.0390290337934317,
596
+ "grad_norm": 6.681727886199951,
597
+ "learning_rate": 6.316105263157896e-05,
598
+ "loss": 2.4428,
599
+ "step": 82
600
+ },
601
+ {
602
+ "epoch": 0.039504997620180865,
603
+ "grad_norm": 8.800130844116211,
604
+ "learning_rate": 6.262578947368421e-05,
605
+ "loss": 2.9551,
606
+ "step": 83
607
+ },
608
+ {
609
+ "epoch": 0.03998096144693003,
610
+ "grad_norm": 8.04470157623291,
611
+ "learning_rate": 6.209052631578948e-05,
612
+ "loss": 2.7357,
613
+ "step": 84
614
+ },
615
+ {
616
+ "epoch": 0.0404569252736792,
617
+ "grad_norm": 6.852024555206299,
618
+ "learning_rate": 6.155526315789474e-05,
619
+ "loss": 2.6536,
620
+ "step": 85
621
+ },
622
+ {
623
+ "epoch": 0.04093288910042837,
624
+ "grad_norm": 7.370736598968506,
625
+ "learning_rate": 6.102e-05,
626
+ "loss": 2.6235,
627
+ "step": 86
628
+ },
629
+ {
630
+ "epoch": 0.04140885292717753,
631
+ "grad_norm": 7.6142401695251465,
632
+ "learning_rate": 6.048473684210526e-05,
633
+ "loss": 2.7034,
634
+ "step": 87
635
+ },
636
+ {
637
+ "epoch": 0.041884816753926704,
638
+ "grad_norm": 7.17495059967041,
639
+ "learning_rate": 5.9949473684210527e-05,
640
+ "loss": 2.7207,
641
+ "step": 88
642
+ },
643
+ {
644
+ "epoch": 0.04236078058067587,
645
+ "grad_norm": 7.216758728027344,
646
+ "learning_rate": 5.94142105263158e-05,
647
+ "loss": 2.4448,
648
+ "step": 89
649
+ },
650
+ {
651
+ "epoch": 0.042836744407425034,
652
+ "grad_norm": 7.9468817710876465,
653
+ "learning_rate": 5.887894736842106e-05,
654
+ "loss": 2.9818,
655
+ "step": 90
656
+ },
657
+ {
658
+ "epoch": 0.043312708234174206,
659
+ "grad_norm": 8.539376258850098,
660
+ "learning_rate": 5.834368421052632e-05,
661
+ "loss": 2.4128,
662
+ "step": 91
663
+ },
664
+ {
665
+ "epoch": 0.04378867206092337,
666
+ "grad_norm": 8.345818519592285,
667
+ "learning_rate": 5.780842105263158e-05,
668
+ "loss": 2.7606,
669
+ "step": 92
670
+ },
671
+ {
672
+ "epoch": 0.044264635887672536,
673
+ "grad_norm": 8.508038520812988,
674
+ "learning_rate": 5.727315789473684e-05,
675
+ "loss": 2.6035,
676
+ "step": 93
677
+ },
678
+ {
679
+ "epoch": 0.0447405997144217,
680
+ "grad_norm": 8.71206283569336,
681
+ "learning_rate": 5.673789473684211e-05,
682
+ "loss": 2.9024,
683
+ "step": 94
684
+ },
685
+ {
686
+ "epoch": 0.04521656354117087,
687
+ "grad_norm": 7.256693363189697,
688
+ "learning_rate": 5.620263157894738e-05,
689
+ "loss": 2.5654,
690
+ "step": 95
691
+ },
692
+ {
693
+ "epoch": 0.04569252736792004,
694
+ "grad_norm": 6.628811359405518,
695
+ "learning_rate": 5.566736842105264e-05,
696
+ "loss": 2.0765,
697
+ "step": 96
698
+ },
699
+ {
700
+ "epoch": 0.0461684911946692,
701
+ "grad_norm": 9.192995071411133,
702
+ "learning_rate": 5.51321052631579e-05,
703
+ "loss": 3.0441,
704
+ "step": 97
705
+ },
706
+ {
707
+ "epoch": 0.046644455021418375,
708
+ "grad_norm": 9.181817054748535,
709
+ "learning_rate": 5.459684210526316e-05,
710
+ "loss": 2.8811,
711
+ "step": 98
712
+ },
713
+ {
714
+ "epoch": 0.04712041884816754,
715
+ "grad_norm": 9.44265079498291,
716
+ "learning_rate": 5.406157894736842e-05,
717
+ "loss": 2.6583,
718
+ "step": 99
719
+ },
720
+ {
721
+ "epoch": 0.047596382674916705,
722
+ "grad_norm": 9.560362815856934,
723
+ "learning_rate": 5.352631578947368e-05,
724
+ "loss": 2.5805,
725
+ "step": 100
726
+ },
727
+ {
728
+ "epoch": 0.047596382674916705,
729
+ "eval_loss": 1.34207284450531,
730
+ "eval_runtime": 94.197,
731
+ "eval_samples_per_second": 9.395,
732
+ "eval_steps_per_second": 2.357,
733
+ "step": 100
734
  }
735
  ],
736
  "logging_steps": 1,
 
759
  "attributes": {}
760
  }
761
  },
762
+ "total_flos": 3.57749904900096e+16,
763
  "train_batch_size": 4,
764
  "trial_name": null,
765
  "trial_params": null