error577 commited on
Commit
3c3cd88
·
verified ·
1 Parent(s): 4ac07ab

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:393273cdca286ccb40e14205ea36723e7c5c1240080ed35a78dbeb665d070ee9
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b583587d17c6c7a97a11c9e59c5d69ae6878502971c7d00024d35410cc8bc4ab
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d54a2e2580a8617019d978feb2d6817db3c1af9507f0f49fc2ef229fd5ccdce
3
  size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a855e5c8ec97ac48e47fa165986806e7c257deeed8f91cad3e04ecdca2ca02a2
3
  size 857274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed95f9b60d673089274be5847cbb94c802446b0a76989fa02d0940d21190b7e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8bec8ada3d1dd406d9aa43c4a27a47f026c1e315b02eae7d5bc447ebde9bea
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a705acd53999263a453e7147fb79a4ea1e0e69c6cfafd3b3b9170876c31d6d7f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50bdc92009888fc74192f19079ad05c940764efafef78e70a576eb66733f4db5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07923734059675622,
5
  "eval_steps": 20,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -903,6 +903,154 @@
903
  "eval_samples_per_second": 75.145,
904
  "eval_steps_per_second": 75.145,
905
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  }
907
  ],
908
  "logging_steps": 1,
@@ -922,7 +1070,7 @@
922
  "attributes": {}
923
  }
924
  },
925
- "total_flos": 81003622170624.0,
926
  "train_batch_size": 1,
927
  "trial_name": null,
928
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.09244356402954892,
5
  "eval_steps": 20,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
903
  "eval_samples_per_second": 75.145,
904
  "eval_steps_per_second": 75.145,
905
  "step": 120
906
+ },
907
+ {
908
+ "epoch": 0.07989765176839586,
909
+ "grad_norm": 10677.837890625,
910
+ "learning_rate": 0.0002040079434189748,
911
+ "loss": 90.3255,
912
+ "step": 121
913
+ },
914
+ {
915
+ "epoch": 0.08055796294003549,
916
+ "grad_norm": 13001.4951171875,
917
+ "learning_rate": 0.00020248882546534326,
918
+ "loss": 94.9763,
919
+ "step": 122
920
+ },
921
+ {
922
+ "epoch": 0.08121827411167512,
923
+ "grad_norm": 88364.6875,
924
+ "learning_rate": 0.00020096354771531976,
925
+ "loss": 210.5573,
926
+ "step": 123
927
+ },
928
+ {
929
+ "epoch": 0.08187858528331476,
930
+ "grad_norm": 314990.40625,
931
+ "learning_rate": 0.00019943228916701104,
932
+ "loss": 602.6875,
933
+ "step": 124
934
+ },
935
+ {
936
+ "epoch": 0.0825388964549544,
937
+ "grad_norm": 264931.5,
938
+ "learning_rate": 0.00019789522952039695,
939
+ "loss": 513.6562,
940
+ "step": 125
941
+ },
942
+ {
943
+ "epoch": 0.08319920762659404,
944
+ "grad_norm": 157227.640625,
945
+ "learning_rate": 0.0001963525491562421,
946
+ "loss": 511.8125,
947
+ "step": 126
948
+ },
949
+ {
950
+ "epoch": 0.08385951879823367,
951
+ "grad_norm": 537004.0625,
952
+ "learning_rate": 0.00019480442911492702,
953
+ "loss": 268.8125,
954
+ "step": 127
955
+ },
956
+ {
957
+ "epoch": 0.0845198299698733,
958
+ "grad_norm": 1529112.375,
959
+ "learning_rate": 0.00019325105107520263,
960
+ "loss": 257.3359,
961
+ "step": 128
962
+ },
963
+ {
964
+ "epoch": 0.08518014114151294,
965
+ "grad_norm": 231625.046875,
966
+ "learning_rate": 0.00019169259733286913,
967
+ "loss": 202.3438,
968
+ "step": 129
969
+ },
970
+ {
971
+ "epoch": 0.08584045231315257,
972
+ "grad_norm": 306631.5625,
973
+ "learning_rate": 0.00019012925077938314,
974
+ "loss": 181.8047,
975
+ "step": 130
976
+ },
977
+ {
978
+ "epoch": 0.08650076348479221,
979
+ "grad_norm": 118856.3203125,
980
+ "learning_rate": 0.0001885611948803941,
981
+ "loss": 179.7969,
982
+ "step": 131
983
+ },
984
+ {
985
+ "epoch": 0.08716107465643184,
986
+ "grad_norm": 98292.1796875,
987
+ "learning_rate": 0.0001869886136542143,
988
+ "loss": 392.7283,
989
+ "step": 132
990
+ },
991
+ {
992
+ "epoch": 0.08782138582807147,
993
+ "grad_norm": 89386.7734375,
994
+ "learning_rate": 0.00018541169165022298,
995
+ "loss": 318.7403,
996
+ "step": 133
997
+ },
998
+ {
999
+ "epoch": 0.08848169699971112,
1000
+ "grad_norm": 137117.296875,
1001
+ "learning_rate": 0.00018383061392720913,
1002
+ "loss": 245.4405,
1003
+ "step": 134
1004
+ },
1005
+ {
1006
+ "epoch": 0.08914200817135075,
1007
+ "grad_norm": 150909.71875,
1008
+ "learning_rate": 0.0001822455660316536,
1009
+ "loss": 156.249,
1010
+ "step": 135
1011
+ },
1012
+ {
1013
+ "epoch": 0.08980231934299038,
1014
+ "grad_norm": 60323.1875,
1015
+ "learning_rate": 0.00018065673397595473,
1016
+ "loss": 113.9602,
1017
+ "step": 136
1018
+ },
1019
+ {
1020
+ "epoch": 0.09046263051463002,
1021
+ "grad_norm": 42032.9921875,
1022
+ "learning_rate": 0.00017906430421659876,
1023
+ "loss": 107.9544,
1024
+ "step": 137
1025
+ },
1026
+ {
1027
+ "epoch": 0.09112294168626965,
1028
+ "grad_norm": 33694.890625,
1029
+ "learning_rate": 0.00017746846363227842,
1030
+ "loss": 111.8,
1031
+ "step": 138
1032
+ },
1033
+ {
1034
+ "epoch": 0.0917832528579093,
1035
+ "grad_norm": 34851.86328125,
1036
+ "learning_rate": 0.00017586939950196186,
1037
+ "loss": 103.5465,
1038
+ "step": 139
1039
+ },
1040
+ {
1041
+ "epoch": 0.09244356402954892,
1042
+ "grad_norm": 24885.861328125,
1043
+ "learning_rate": 0.00017426729948291474,
1044
+ "loss": 101.8808,
1045
+ "step": 140
1046
+ },
1047
+ {
1048
+ "epoch": 0.09244356402954892,
1049
+ "eval_loss": 10.751233100891113,
1050
+ "eval_runtime": 6.5882,
1051
+ "eval_samples_per_second": 75.135,
1052
+ "eval_steps_per_second": 75.135,
1053
+ "step": 140
1054
  }
1055
  ],
1056
  "logging_steps": 1,
 
1070
  "attributes": {}
1071
  }
1072
  },
1073
+ "total_flos": 93374726012928.0,
1074
  "train_batch_size": 1,
1075
  "trial_name": null,
1076
  "trial_params": null