eddysang commited on
Commit
49c2349
·
verified ·
1 Parent(s): fd22296

Training in progress, step 143, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:869af5620d3d85d7c0db115351fa817c5904fa6f51f53e8d481d34177a3d9341
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d435ce0de6012fb4236c06060f6cf69269f2b1360a5d6aa435758082414bf7f8
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afd7f4c7619ded30407f68888930fdfa979bea1a64de2ebe37b0fdc904faa13e
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bf61cf0d057bf8db7e5742b05c339faebcc7038ea97dc73bda7aee1fbb09c7
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a670f2d851c0c4cc79716f31d1954ad248700ff958468f6d2721e422863a0fe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a37cbe2fd5d44041cacd48fad439e5298ef2fc2ac9fdb757c4af88acea9033f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b8d02756226521458daee3f69c94f8a0b4245ed6c8f1de64c08045d2547f98c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2a6301db2b5e58d7c810f9f0f35dee18c800d0cef729c658d7f5a962d36075
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.17300894156789354,
5
  "eval_steps": 50,
6
- "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -941,6 +941,97 @@
941
  "learning_rate": 8.590798076009264e-06,
942
  "loss": 46.4312,
943
  "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
944
  }
945
  ],
946
  "logging_steps": 1,
@@ -960,7 +1051,7 @@
960
  "attributes": {}
961
  }
962
  },
963
- "total_flos": 7.338166546386125e+17,
964
  "train_batch_size": 2,
965
  "trial_name": null,
966
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.19030983572468288,
5
  "eval_steps": 50,
6
+ "global_step": 143,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
941
  "learning_rate": 8.590798076009264e-06,
942
  "loss": 46.4312,
943
  "step": 130
944
+ },
945
+ {
946
+ "epoch": 0.17433977957995425,
947
+ "grad_norm": 4.295351982116699,
948
+ "learning_rate": 7.767979702822217e-06,
949
+ "loss": 45.6427,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 0.17567061759201497,
954
+ "grad_norm": 4.867600917816162,
955
+ "learning_rate": 6.984422936209094e-06,
956
+ "loss": 43.972,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 0.17700145560407568,
961
+ "grad_norm": 5.0242695808410645,
962
+ "learning_rate": 6.240585351256319e-06,
963
+ "loss": 44.2976,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 0.17833229361613642,
968
+ "grad_norm": 5.848794460296631,
969
+ "learning_rate": 5.536901328166773e-06,
970
+ "loss": 46.4279,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 0.17966313162819714,
975
+ "grad_norm": 4.636799335479736,
976
+ "learning_rate": 4.8737817985938955e-06,
977
+ "loss": 43.4801,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 0.18099396964025785,
982
+ "grad_norm": 4.535057544708252,
983
+ "learning_rate": 4.251614005669263e-06,
984
+ "loss": 44.8191,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.18232480765231857,
989
+ "grad_norm": 7.119007587432861,
990
+ "learning_rate": 3.670761277863485e-06,
991
+ "loss": 43.3487,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.18365564566437928,
996
+ "grad_norm": 5.639936923980713,
997
+ "learning_rate": 3.131562816812533e-06,
998
+ "loss": 46.6444,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.18498648367644,
1003
+ "grad_norm": 5.142310619354248,
1004
+ "learning_rate": 2.6343334992336485e-06,
1005
+ "loss": 43.399,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.18631732168850074,
1010
+ "grad_norm": 4.855108737945557,
1011
+ "learning_rate": 2.179363693046099e-06,
1012
+ "loss": 44.3601,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.18764815970056145,
1017
+ "grad_norm": 4.390917778015137,
1018
+ "learning_rate": 1.7669190878045914e-06,
1019
+ "loss": 45.47,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.18897899771262217,
1024
+ "grad_norm": 5.170991897583008,
1025
+ "learning_rate": 1.3972405395439922e-06,
1026
+ "loss": 45.9972,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.19030983572468288,
1031
+ "grad_norm": 4.540877342224121,
1032
+ "learning_rate": 1.0705439301261887e-06,
1033
+ "loss": 45.5314,
1034
+ "step": 143
1035
  }
1036
  ],
1037
  "logging_steps": 1,
 
1051
  "attributes": {}
1052
  }
1053
  },
1054
+ "total_flos": 8.072804944199025e+17,
1055
  "train_batch_size": 2,
1056
  "trial_name": null,
1057
  "trial_params": null