leixa commited on
Commit
8c6e9a2
·
verified ·
1 Parent(s): 9891ec0

Training in progress, step 374, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2d25b88e084ebd1782d201ceb195fe6db0730594ab101aeca0c6eef087fe8aa
3
  size 692136856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8451e588a45163d61a250025d9fe1becefe5afb2ebefc4d027e4e03754fa2eb3
3
  size 692136856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2450dc03895aa4a5edc22e7d6710bfb27b0462b4e19005486a1604c4a7f4ceee
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d4f6611287b070b2030de06a9d530bcbb55107b364b6ac4e1e5598f37ed1871
3
  size 85723732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd3ee42edb81b08784cce5af0d531cc06157d25956a649c4a46b5f301140f33b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852b4f618a18afb719aa9c5d0ac61182c6a8e953aadb45c389358df7f1d84b41
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6999f9aad8d44fbf7db1d80d56ad86630abb8e28a7187e80ed24f8546462146
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51094b5d327949483be134a2a7ce82f120d34a302bf097e81122d94eff7cf8c6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.022909507445589918,
5
  "eval_steps": 34,
6
- "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -886,6 +886,91 @@
886
  "eval_samples_per_second": 14.028,
887
  "eval_steps_per_second": 1.754,
888
  "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  }
890
  ],
891
  "logging_steps": 3,
@@ -905,7 +990,7 @@
905
  "attributes": {}
906
  }
907
  },
908
- "total_flos": 4.7812833290747904e+17,
909
  "train_batch_size": 8,
910
  "trial_name": null,
911
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.025200458190148912,
5
  "eval_steps": 34,
6
+ "global_step": 374,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
886
  "eval_samples_per_second": 14.028,
887
  "eval_steps_per_second": 1.754,
888
  "step": 340
889
+ },
890
+ {
891
+ "epoch": 0.02304426925409339,
892
+ "grad_norm": 0.6962878108024597,
893
+ "learning_rate": 2.679304450853401e-06,
894
+ "loss": 0.9048,
895
+ "step": 342
896
+ },
897
+ {
898
+ "epoch": 0.023246411966848596,
899
+ "grad_norm": 0.6920527219772339,
900
+ "learning_rate": 2.4137391347404476e-06,
901
+ "loss": 0.8219,
902
+ "step": 345
903
+ },
904
+ {
905
+ "epoch": 0.0234485546796038,
906
+ "grad_norm": 0.7300416827201843,
907
+ "learning_rate": 2.1613635589349756e-06,
908
+ "loss": 0.8256,
909
+ "step": 348
910
+ },
911
+ {
912
+ "epoch": 0.023650697392359005,
913
+ "grad_norm": 0.6342306137084961,
914
+ "learning_rate": 1.922325103666281e-06,
915
+ "loss": 0.8223,
916
+ "step": 351
917
+ },
918
+ {
919
+ "epoch": 0.02385284010511421,
920
+ "grad_norm": 0.6507661938667297,
921
+ "learning_rate": 1.696763360660808e-06,
922
+ "loss": 0.8241,
923
+ "step": 354
924
+ },
925
+ {
926
+ "epoch": 0.024054982817869417,
927
+ "grad_norm": 0.7264770269393921,
928
+ "learning_rate": 1.4848100516245717e-06,
929
+ "loss": 0.8538,
930
+ "step": 357
931
+ },
932
+ {
933
+ "epoch": 0.02425712553062462,
934
+ "grad_norm": 0.8372617363929749,
935
+ "learning_rate": 1.286588951321363e-06,
936
+ "loss": 0.8162,
937
+ "step": 360
938
+ },
939
+ {
940
+ "epoch": 0.024459268243379826,
941
+ "grad_norm": 0.7067350149154663,
942
+ "learning_rate": 1.102215815291774e-06,
943
+ "loss": 0.7685,
944
+ "step": 363
945
+ },
946
+ {
947
+ "epoch": 0.02466141095613503,
948
+ "grad_norm": 0.7689423561096191,
949
+ "learning_rate": 9.317983122552332e-07,
950
+ "loss": 0.8076,
951
+ "step": 366
952
+ },
953
+ {
954
+ "epoch": 0.024863553668890235,
955
+ "grad_norm": 0.6847316026687622,
956
+ "learning_rate": 7.754359612344859e-07,
957
+ "loss": 0.8129,
958
+ "step": 369
959
+ },
960
+ {
961
+ "epoch": 0.025065696381645443,
962
+ "grad_norm": 0.7521365880966187,
963
+ "learning_rate": 6.332200734393057e-07,
964
+ "loss": 0.8118,
965
+ "step": 372
966
+ },
967
+ {
968
+ "epoch": 0.025200458190148912,
969
+ "eval_loss": 0.8087900876998901,
970
+ "eval_runtime": 1779.996,
971
+ "eval_samples_per_second": 14.042,
972
+ "eval_steps_per_second": 1.756,
973
+ "step": 374
974
  }
975
  ],
976
  "logging_steps": 3,
 
990
  "attributes": {}
991
  }
992
  },
993
+ "total_flos": 5.2594116619822694e+17,
994
  "train_batch_size": 8,
995
  "trial_name": null,
996
  "trial_params": null