eddysang commited on
Commit
9bd8c47
·
verified ·
1 Parent(s): 8f687b6

Training in progress, step 34, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba041080b6ba40d6138b88d19b0255d6b2726eee4078e2781dd18a6f8228ff5f
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff72e4efe6d28a195f3570744a19808d24176a4a5c3166d09509ecf448dd971e
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:056906055ce9d76afcef49ded9dbddfd8ebf3a5c03590c1c92688d4e0e5f3da3
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fabc0e47233f6f67098c93eb17ccaff88b58404fd8853fd611da5f2ccf11189d
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db583067794f20b01832d4ddabaad594d5a607a594a6f04f852e2b423222180d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a1457ae5d909a79141dea2965aadc86d4c660ec11af6ef8eed50147437a542
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0abe1a027b3fea2bf654a1c387b6eb2241fa486bab4a282d3a0e829c4308c91
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05759661196400212,
5
  "eval_steps": 50,
6
- "global_step": 17,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -134,6 +134,125 @@
134
  "learning_rate": 0.00012749999999999998,
135
  "loss": 10.3736,
136
  "step": 17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  ],
139
  "logging_steps": 1,
@@ -153,7 +272,7 @@
153
  "attributes": {}
154
  }
155
  },
156
- "total_flos": 7270570131456.0,
157
  "train_batch_size": 2,
158
  "trial_name": null,
159
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11519322392800424,
5
  "eval_steps": 50,
6
+ "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
134
  "learning_rate": 0.00012749999999999998,
135
  "loss": 10.3736,
136
  "step": 17
137
+ },
138
+ {
139
+ "epoch": 0.060984647961884596,
140
+ "grad_norm": 0.04979519173502922,
141
+ "learning_rate": 0.000135,
142
+ "loss": 10.3729,
143
+ "step": 18
144
+ },
145
+ {
146
+ "epoch": 0.06437268395976707,
147
+ "grad_norm": 0.04958589747548103,
148
+ "learning_rate": 0.0001425,
149
+ "loss": 10.3735,
150
+ "step": 19
151
+ },
152
+ {
153
+ "epoch": 0.06776071995764955,
154
+ "grad_norm": 0.054519593715667725,
155
+ "learning_rate": 0.00015,
156
+ "loss": 10.3723,
157
+ "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.07114875595553202,
161
+ "grad_norm": 0.060721371322870255,
162
+ "learning_rate": 0.00014998857713672935,
163
+ "loss": 10.372,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.0745367919534145,
168
+ "grad_norm": 0.06067592278122902,
169
+ "learning_rate": 0.00014995431202643217,
170
+ "loss": 10.3718,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.07792482795129699,
175
+ "grad_norm": 0.06362656503915787,
176
+ "learning_rate": 0.000149897215106593,
177
+ "loss": 10.3713,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.08131286394917946,
182
+ "grad_norm": 0.067110076546669,
183
+ "learning_rate": 0.0001498173037694868,
184
+ "loss": 10.3714,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.08470089994706194,
189
+ "grad_norm": 0.07370075583457947,
190
+ "learning_rate": 0.0001497146023568809,
191
+ "loss": 10.3712,
192
+ "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.08808893594494442,
196
+ "grad_norm": 0.0760367140173912,
197
+ "learning_rate": 0.00014958914215262048,
198
+ "loss": 10.371,
199
+ "step": 26
200
+ },
201
+ {
202
+ "epoch": 0.09147697194282689,
203
+ "grad_norm": 0.08042097836732864,
204
+ "learning_rate": 0.00014944096137309914,
205
+ "loss": 10.37,
206
+ "step": 27
207
+ },
208
+ {
209
+ "epoch": 0.09486500794070937,
210
+ "grad_norm": 0.08724083006381989,
211
+ "learning_rate": 0.00014927010515561776,
212
+ "loss": 10.3697,
213
+ "step": 28
214
+ },
215
+ {
216
+ "epoch": 0.09825304393859184,
217
+ "grad_norm": 0.08779574185609818,
218
+ "learning_rate": 0.00014907662554463532,
219
+ "loss": 10.3689,
220
+ "step": 29
221
+ },
222
+ {
223
+ "epoch": 0.10164107993647432,
224
+ "grad_norm": 0.09096319228410721,
225
+ "learning_rate": 0.0001488605814759156,
226
+ "loss": 10.3677,
227
+ "step": 30
228
+ },
229
+ {
230
+ "epoch": 0.1050291159343568,
231
+ "grad_norm": 0.10570746660232544,
232
+ "learning_rate": 0.00014862203875857477,
233
+ "loss": 10.3666,
234
+ "step": 31
235
+ },
236
+ {
237
+ "epoch": 0.10841715193223928,
238
+ "grad_norm": 0.1051798090338707,
239
+ "learning_rate": 0.0001483610700550354,
240
+ "loss": 10.3678,
241
+ "step": 32
242
+ },
243
+ {
244
+ "epoch": 0.11180518793012176,
245
+ "grad_norm": 0.11076612770557404,
246
+ "learning_rate": 0.00014807775485889264,
247
+ "loss": 10.366,
248
+ "step": 33
249
+ },
250
+ {
251
+ "epoch": 0.11519322392800424,
252
+ "grad_norm": 0.11073443293571472,
253
+ "learning_rate": 0.0001477721794706997,
254
+ "loss": 10.3658,
255
+ "step": 34
256
  }
257
  ],
258
  "logging_steps": 1,
 
272
  "attributes": {}
273
  }
274
  },
275
+ "total_flos": 14554505281536.0,
276
  "train_batch_size": 2,
277
  "trial_name": null,
278
  "trial_params": null