eddysang commited on
Commit
f27d128
·
verified ·
1 Parent(s): 4d24d64

Training in progress, step 117, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c317579f0ce6943c96efcd29641b0da777b663afd35615d7139e5030c2ea7fa
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b2a4ce332b6d3f2d7b5027963d2bac8c8a90a8166a714fcfa99acf1139b970
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f840bb68a6dbf9aeb6a06b461912087949474294f107d721ad10f08734a94f2
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f008f744c603be58af8a9cf079728bb5cbea9265abdcb5dd15ed0a5ff49df40a
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42ff49e4fdfd413d94e203d9be9412a3f3634f923d746d4404b2715d238c396e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85180cb5b242ee948b2af64053ea3c4e0cc8b032b0a9568c39646d04dd2d77e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b57ac6e48796727694162b0b2b513045a14687c5b4988558b227383b3e2c1d53
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72ced90745bf11dd0913ccb678fa84f97d9d9d2dcc6e8de79651c15430da9657
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13840715325431482,
5
  "eval_steps": 50,
6
- "global_step": 104,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -759,6 +759,97 @@
759
  "learning_rate": 4.176063408005691e-05,
760
  "loss": 47.4479,
761
  "step": 104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  }
763
  ],
764
  "logging_steps": 1,
@@ -778,7 +869,7 @@
778
  "attributes": {}
779
  }
780
  },
781
- "total_flos": 5.852454887274578e+17,
782
  "train_batch_size": 2,
783
  "trial_name": null,
784
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.15570804741110417,
5
  "eval_steps": 50,
6
+ "global_step": 117,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
759
  "learning_rate": 4.176063408005691e-05,
760
  "loss": 47.4479,
761
  "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.13973799126637554,
765
+ "grad_norm": 5.846158504486084,
766
+ "learning_rate": 4.014576209671735e-05,
767
+ "loss": 46.6026,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.14106882927843625,
772
+ "grad_norm": 6.544958591461182,
773
+ "learning_rate": 3.855124400680454e-05,
774
+ "loss": 47.0602,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.142399667290497,
779
+ "grad_norm": 4.549857139587402,
780
+ "learning_rate": 3.697801096398074e-05,
781
+ "loss": 44.632,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.1437305053025577,
786
+ "grad_norm": 4.476944446563721,
787
+ "learning_rate": 3.542698169204003e-05,
788
+ "loss": 44.4824,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.14506134331461842,
793
+ "grad_norm": 5.5804314613342285,
794
+ "learning_rate": 3.389906194839976e-05,
795
+ "loss": 45.0248,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.14639218132667914,
800
+ "grad_norm": 5.525055408477783,
801
+ "learning_rate": 3.239514399516332e-05,
802
+ "loss": 44.9731,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.14772301933873985,
807
+ "grad_norm": 4.815814018249512,
808
+ "learning_rate": 3.091610607806452e-05,
809
+ "loss": 43.9473,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.1490538573508006,
814
+ "grad_norm": 4.596780300140381,
815
+ "learning_rate": 2.946281191359666e-05,
816
+ "loss": 43.4745,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.1503846953628613,
821
+ "grad_norm": 4.904426097869873,
822
+ "learning_rate": 2.803611018462647e-05,
823
+ "loss": 46.6434,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.15171553337492202,
828
+ "grad_norm": 5.079036712646484,
829
+ "learning_rate": 2.663683404478722e-05,
830
+ "loss": 45.0433,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.15304637138698274,
835
+ "grad_norm": 4.7992072105407715,
836
+ "learning_rate": 2.5265800631940373e-05,
837
+ "loss": 45.1488,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 0.15437720939904345,
842
+ "grad_norm": 8.740615844726562,
843
+ "learning_rate": 2.3923810590990202e-05,
844
+ "loss": 46.0447,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 0.15570804741110417,
849
+ "grad_norm": 4.366965293884277,
850
+ "learning_rate": 2.2611647606329732e-05,
851
+ "loss": 43.9194,
852
+ "step": 117
853
  }
854
  ],
855
  "logging_steps": 1,
 
869
  "attributes": {}
870
  }
871
  },
872
+ "total_flos": 6.6051716349218e+17,
873
  "train_batch_size": 2,
874
  "trial_name": null,
875
  "trial_params": null