eddysang commited on
Commit
f477cec
·
verified ·
1 Parent(s): c40a47a

Training in progress, step 27, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ddd296d70d9b3b61ea53b45e334f04990034e1561c23a9d5310e615af21d625
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d86c95965cefd952d5ca27059b33f80c64b6b50fce4dda34b1747e02db21f81
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b59d2161e7818ef52d0dedcff1f80a4ff4d3161cac3cfbe9a413303665cb10c1
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a70a8743b5c2d0bb79fb4dcc70920aea4481b8a9f6fa403ceab3c107ef115e
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b93b0856df0781ee689edcb1cf7d2e5a3fef386bea76c4a707f7fab6bcaef139
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00908dcb63c73e3df9f03fbc0b4e06062fa22233b29723cfa3cb055febf171b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4b679dfcad285902b67d18379c2cc1ca0aebfc7646cb33fa82a3fb8ed15c820
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f9e237c4e244cd6a21b3069d52ab1ce3e784c965dcb77abb8266616185916c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3873570948217888,
5
  "eval_steps": 50,
6
- "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -141,6 +141,69 @@
141
  "learning_rate": 0.000135,
142
  "loss": 40.6851,
143
  "step": 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  }
145
  ],
146
  "logging_steps": 1,
@@ -160,7 +223,7 @@
160
  "attributes": {}
161
  }
162
  },
163
- "total_flos": 9.466481367790387e+16,
164
  "train_batch_size": 2,
165
  "trial_name": null,
166
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5810356422326832,
5
  "eval_steps": 50,
6
+ "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
141
  "learning_rate": 0.000135,
142
  "loss": 40.6851,
143
  "step": 18
144
+ },
145
+ {
146
+ "epoch": 0.4088769334229993,
147
+ "grad_norm": 54.74737548828125,
148
+ "learning_rate": 0.0001425,
149
+ "loss": 40.0946,
150
+ "step": 19
151
+ },
152
+ {
153
+ "epoch": 0.43039677202420984,
154
+ "grad_norm": 113.8772201538086,
155
+ "learning_rate": 0.00015,
156
+ "loss": 39.7055,
157
+ "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.4519166106254203,
161
+ "grad_norm": 77.01616668701172,
162
+ "learning_rate": 0.00014994217771805422,
163
+ "loss": 36.2917,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.4734364492266308,
168
+ "grad_norm": 24.734567642211914,
169
+ "learning_rate": 0.00014976880002998458,
170
+ "loss": 38.7726,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.4949562878278413,
175
+ "grad_norm": 33.39565658569336,
176
+ "learning_rate": 0.00014948013427161947,
177
+ "loss": 34.3707,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.5164761264290518,
182
+ "grad_norm": 19.322717666625977,
183
+ "learning_rate": 0.00014907662554463532,
184
+ "loss": 35.1135,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.5379959650302623,
189
+ "grad_norm": 25.940776824951172,
190
+ "learning_rate": 0.00014855889603024227,
191
+ "loss": 35.1159,
192
+ "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.5595158036314728,
196
+ "grad_norm": 19.108659744262695,
197
+ "learning_rate": 0.00014792774402982574,
198
+ "loss": 34.2692,
199
+ "step": 26
200
+ },
201
+ {
202
+ "epoch": 0.5810356422326832,
203
+ "grad_norm": 18.458415985107422,
204
+ "learning_rate": 0.0001471841427340235,
205
+ "loss": 34.6374,
206
+ "step": 27
207
  }
208
  ],
209
  "logging_steps": 1,
 
223
  "attributes": {}
224
  }
225
  },
226
+ "total_flos": 1.419972205168558e+17,
227
  "train_batch_size": 2,
228
  "trial_name": null,
229
  "trial_params": null