error577 commited on
Commit
49d76c9
·
verified ·
1 Parent(s): 6bb7c86

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6999e24123d71e266bce59d9e1aba3d3dc179d65fddb2799602f1ec6b06ec88
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16785a61aa31cea9df62cbd18f8208d660df29185f09b9dd0e9aca0d2438e96a
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a24c8e6f1679121a45e039226c334660e57958441ed30e7b750f382172ca295
3
  size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9518499bb798c1ce79529bad3ff555316b6ee700915fcec92bf1504941a05032
3
  size 857274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86f99ea925931374a371696b51a9fff3344409b7210b2f4affdc7a33396cfaa0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cace2bb20bc8ba875286724acbfadddec3e5175c4ce467dea9a6adf2fcb4cb03
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1be5e7bccc826efde7cd880e840bc1b1679bfe2c43db648052cc366c4bb703b5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2036ec7b8a4c0dbbd07ada2b2af7c3be05d304eb60a4492cb7e057daf83ea234
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.052824893731170816,
5
  "eval_steps": 20,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,154 @@
607
  "eval_samples_per_second": 74.747,
608
  "eval_steps_per_second": 74.747,
609
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 1,
@@ -626,7 +774,7 @@
626
  "attributes": {}
627
  }
628
  },
629
- "total_flos": 56162839953408.0,
630
  "train_batch_size": 1,
631
  "trial_name": null,
632
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06603111716396352,
5
  "eval_steps": 20,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 74.747,
608
  "eval_steps_per_second": 74.747,
609
  "step": 80
610
+ },
611
+ {
612
+ "epoch": 0.05348520490281045,
613
+ "grad_norm": 259394.6875,
614
+ "learning_rate": 0.00025777547044259435,
615
+ "loss": 200.2073,
616
+ "step": 81
617
+ },
618
+ {
619
+ "epoch": 0.05414551607445008,
620
+ "grad_norm": 1378326.625,
621
+ "learning_rate": 0.0002566389691674106,
622
+ "loss": 100.5475,
623
+ "step": 82
624
+ },
625
+ {
626
+ "epoch": 0.05480582724608972,
627
+ "grad_norm": 63462.6875,
628
+ "learning_rate": 0.00025548995333638197,
629
+ "loss": 189.407,
630
+ "step": 83
631
+ },
632
+ {
633
+ "epoch": 0.055466138417729356,
634
+ "grad_norm": 149989.21875,
635
+ "learning_rate": 0.00025432855779161076,
636
+ "loss": 655.0445,
637
+ "step": 84
638
+ },
639
+ {
640
+ "epoch": 0.05612644958936899,
641
+ "grad_norm": 161908.9375,
642
+ "learning_rate": 0.00025315491882801347,
643
+ "loss": 542.2335,
644
+ "step": 85
645
+ },
646
+ {
647
+ "epoch": 0.05678676076100862,
648
+ "grad_norm": 140391.09375,
649
+ "learning_rate": 0.00025196917417732615,
650
+ "loss": 178.0071,
651
+ "step": 86
652
+ },
653
+ {
654
+ "epoch": 0.05744707193264826,
655
+ "grad_norm": 45774.61328125,
656
+ "learning_rate": 0.0002507714629919409,
657
+ "loss": 145.9398,
658
+ "step": 87
659
+ },
660
+ {
661
+ "epoch": 0.058107383104287896,
662
+ "grad_norm": 74355.359375,
663
+ "learning_rate": 0.0002495619258285757,
664
+ "loss": 162.5158,
665
+ "step": 88
666
+ },
667
+ {
668
+ "epoch": 0.05876769427592753,
669
+ "grad_norm": 112329.7265625,
670
+ "learning_rate": 0.0002483407046317794,
671
+ "loss": 223.498,
672
+ "step": 89
673
+ },
674
+ {
675
+ "epoch": 0.05942800544756716,
676
+ "grad_norm": 488449.875,
677
+ "learning_rate": 0.00024710794271727413,
678
+ "loss": 223.1561,
679
+ "step": 90
680
+ },
681
+ {
682
+ "epoch": 0.0600883166192068,
683
+ "grad_norm": 146916.296875,
684
+ "learning_rate": 0.0002458637847551364,
685
+ "loss": 252.3947,
686
+ "step": 91
687
+ },
688
+ {
689
+ "epoch": 0.060748627790846436,
690
+ "grad_norm": 115853.0703125,
691
+ "learning_rate": 0.00024460837675281926,
692
+ "loss": 265.611,
693
+ "step": 92
694
+ },
695
+ {
696
+ "epoch": 0.06140893896248607,
697
+ "grad_norm": 95760.921875,
698
+ "learning_rate": 0.00024334186603801807,
699
+ "loss": 195.9439,
700
+ "step": 93
701
+ },
702
+ {
703
+ "epoch": 0.06206925013412571,
704
+ "grad_norm": 58220.4609375,
705
+ "learning_rate": 0.00024206440124138062,
706
+ "loss": 173.6973,
707
+ "step": 94
708
+ },
709
+ {
710
+ "epoch": 0.06272956130576535,
711
+ "grad_norm": 44573.25390625,
712
+ "learning_rate": 0.0002407761322790648,
713
+ "loss": 130.0355,
714
+ "step": 95
715
+ },
716
+ {
717
+ "epoch": 0.06338987247740498,
718
+ "grad_norm": 48302.27734375,
719
+ "learning_rate": 0.00023947721033514512,
720
+ "loss": 110.2012,
721
+ "step": 96
722
+ },
723
+ {
724
+ "epoch": 0.0640501836490446,
725
+ "grad_norm": 18446.73046875,
726
+ "learning_rate": 0.00023816778784387094,
727
+ "loss": 118.2505,
728
+ "step": 97
729
+ },
730
+ {
731
+ "epoch": 0.06471049482068425,
732
+ "grad_norm": 35311.09375,
733
+ "learning_rate": 0.0002368480184717773,
734
+ "loss": 133.5809,
735
+ "step": 98
736
+ },
737
+ {
738
+ "epoch": 0.06537080599232388,
739
+ "grad_norm": 38145.79296875,
740
+ "learning_rate": 0.00023551805709965147,
741
+ "loss": 129.8271,
742
+ "step": 99
743
+ },
744
+ {
745
+ "epoch": 0.06603111716396352,
746
+ "grad_norm": 32865.98046875,
747
+ "learning_rate": 0.00023417805980435736,
748
+ "loss": 116.0362,
749
+ "step": 100
750
+ },
751
+ {
752
+ "epoch": 0.06603111716396352,
753
+ "eval_loss": 9.961955070495605,
754
+ "eval_runtime": 6.5733,
755
+ "eval_samples_per_second": 75.305,
756
+ "eval_steps_per_second": 75.305,
757
+ "step": 100
758
  }
759
  ],
760
  "logging_steps": 1,
 
774
  "attributes": {}
775
  }
776
  },
777
+ "total_flos": 65231696953344.0,
778
  "train_batch_size": 1,
779
  "trial_name": null,
780
  "trial_params": null