eddysang commited on
Commit
6234245
·
verified ·
1 Parent(s): e6f06ae

Training in progress, step 130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5b2a4ce332b6d3f2d7b5027963d2bac8c8a90a8166a714fcfa99acf1139b970
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869af5620d3d85d7c0db115351fa817c5904fa6f51f53e8d481d34177a3d9341
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f008f744c603be58af8a9cf079728bb5cbea9265abdcb5dd15ed0a5ff49df40a
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd7f4c7619ded30407f68888930fdfa979bea1a64de2ebe37b0fdc904faa13e
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85180cb5b242ee948b2af64053ea3c4e0cc8b032b0a9568c39646d04dd2d77e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a670f2d851c0c4cc79716f31d1954ad248700ff958468f6d2721e422863a0fe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72ced90745bf11dd0913ccb678fa84f97d9d9d2dcc6e8de79651c15430da9657
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8d02756226521458daee3f69c94f8a0b4245ed6c8f1de64c08045d2547f98c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.15570804741110417,
5
  "eval_steps": 50,
6
- "global_step": 117,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -850,6 +850,97 @@
850
  "learning_rate": 2.2611647606329732e-05,
851
  "loss": 43.9194,
852
  "step": 117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
  }
854
  ],
855
  "logging_steps": 1,
@@ -869,7 +960,7 @@
869
  "attributes": {}
870
  }
871
  },
872
- "total_flos": 6.6051716349218e+17,
873
  "train_batch_size": 2,
874
  "trial_name": null,
875
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.17300894156789354,
5
  "eval_steps": 50,
6
+ "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
850
  "learning_rate": 2.2611647606329732e-05,
851
  "loss": 43.9194,
852
  "step": 117
853
+ },
854
+ {
855
+ "epoch": 0.1570388854231649,
856
+ "grad_norm": 4.738860607147217,
857
+ "learning_rate": 2.1330077944190924e-05,
858
+ "loss": 44.4921,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 0.15836972343522562,
863
+ "grad_norm": 5.531933307647705,
864
+ "learning_rate": 2.0079850005167007e-05,
865
+ "loss": 47.8288,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 0.15970056144728634,
870
+ "grad_norm": 5.006840229034424,
871
+ "learning_rate": 1.8861693887167408e-05,
872
+ "loss": 47.1657,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 0.16103139945934705,
877
+ "grad_norm": 4.700850963592529,
878
+ "learning_rate": 1.767632095906137e-05,
879
+ "loss": 45.0584,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 0.16236223747140777,
884
+ "grad_norm": 5.489245414733887,
885
+ "learning_rate": 1.652442344525833e-05,
886
+ "loss": 45.8487,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 0.1636930754834685,
891
+ "grad_norm": 4.946503162384033,
892
+ "learning_rate": 1.5406674021468438e-05,
893
+ "loss": 46.2189,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 0.16502391349552922,
898
+ "grad_norm": 8.596658706665039,
899
+ "learning_rate": 1.4323725421878949e-05,
900
+ "loss": 44.8387,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 0.16635475150758994,
905
+ "grad_norm": 4.896723747253418,
906
+ "learning_rate": 1.3276210057975772e-05,
907
+ "loss": 46.1841,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 0.16768558951965065,
912
+ "grad_norm": 5.261892795562744,
913
+ "learning_rate": 1.2264739649232993e-05,
914
+ "loss": 44.6968,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 0.16901642753171137,
919
+ "grad_norm": 5.7036027908325195,
920
+ "learning_rate": 1.1289904865885935e-05,
921
+ "loss": 43.3378,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 0.17034726554377208,
926
+ "grad_norm": 4.333169460296631,
927
+ "learning_rate": 1.0352274983996303e-05,
928
+ "loss": 46.5715,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 0.17167810355583282,
933
+ "grad_norm": 5.31616735458374,
934
+ "learning_rate": 9.452397553011157e-06,
935
+ "loss": 46.6349,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 0.17300894156789354,
940
+ "grad_norm": 4.633855819702148,
941
+ "learning_rate": 8.590798076009264e-06,
942
+ "loss": 46.4312,
943
+ "step": 130
944
  }
945
  ],
946
  "logging_steps": 1,
 
960
  "attributes": {}
961
  }
962
  },
963
+ "total_flos": 7.338166546386125e+17,
964
  "train_batch_size": 2,
965
  "trial_name": null,
966
  "trial_params": null