eddysang commited on
Commit
6d2d175
·
verified ·
1 Parent(s): c34b783

Training in progress, step 51, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff72e4efe6d28a195f3570744a19808d24176a4a5c3166d09509ecf448dd971e
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b14a95c7f959dcbd54f89ee9ea99cc1dbf6d6e00e3dd380e71efa60af12ee96
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fabc0e47233f6f67098c93eb17ccaff88b58404fd8853fd611da5f2ccf11189d
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e15873f91766aa485609b54a44fa78a3d3396e31ce52599854df27197085e2a
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5a1457ae5d909a79141dea2965aadc86d4c660ec11af6ef8eed50147437a542
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeaac88ea211375a529b1fd2970f1cc397730f5d948fdb30bae28e4d8b193fb4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86702c0a3caad6c51746e54805a7289de03dff9cc5abc148a58966cf1f4d339
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.11519322392800424,
5
  "eval_steps": 50,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -253,6 +253,133 @@
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 10.3658,
255
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
  ],
258
  "logging_steps": 1,
@@ -272,7 +399,7 @@
272
  "attributes": {}
273
  }
274
  },
275
- "total_flos": 14554505281536.0,
276
  "train_batch_size": 2,
277
  "trial_name": null,
278
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.17278983589200636,
5
  "eval_steps": 50,
6
+ "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
253
  "learning_rate": 0.0001477721794706997,
254
  "loss": 10.3658,
255
  "step": 34
256
+ },
257
+ {
258
+ "epoch": 0.11858125992588671,
259
+ "grad_norm": 0.11666611582040787,
260
+ "learning_rate": 0.0001474444369716801,
261
+ "loss": 10.3642,
262
+ "step": 35
263
+ },
264
+ {
265
+ "epoch": 0.12196929592376919,
266
+ "grad_norm": 0.12058889120817184,
267
+ "learning_rate": 0.0001470946271953739,
268
+ "loss": 10.3624,
269
+ "step": 36
270
+ },
271
+ {
272
+ "epoch": 0.12535733192165166,
273
+ "grad_norm": 0.13473331928253174,
274
+ "learning_rate": 0.00014672285669722765,
275
+ "loss": 10.3619,
276
+ "step": 37
277
+ },
278
+ {
279
+ "epoch": 0.12874536791953414,
280
+ "grad_norm": 0.12408842891454697,
281
+ "learning_rate": 0.00014632923872213652,
282
+ "loss": 10.3616,
283
+ "step": 38
284
+ },
285
+ {
286
+ "epoch": 0.13213340391741663,
287
+ "grad_norm": 0.12850341200828552,
288
+ "learning_rate": 0.00014591389316994876,
289
+ "loss": 10.3609,
290
+ "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.1355214399152991,
294
+ "grad_norm": 0.12435595691204071,
295
+ "learning_rate": 0.0001454769465589431,
296
+ "loss": 10.3597,
297
+ "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.1389094759131816,
301
+ "grad_norm": 0.11300837248563766,
302
+ "learning_rate": 0.00014501853198729012,
303
+ "loss": 10.3583,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.14229751191106405,
308
+ "grad_norm": 0.11374777555465698,
309
+ "learning_rate": 0.00014453878909250904,
310
+ "loss": 10.357,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.14568554790894653,
315
+ "grad_norm": 0.11273212730884552,
316
+ "learning_rate": 0.00014403786400893302,
317
+ "loss": 10.3559,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.149073583906829,
322
+ "grad_norm": 0.10369884222745895,
323
+ "learning_rate": 0.00014351590932319504,
324
+ "loss": 10.3545,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.1524616199047115,
329
+ "grad_norm": 0.09857185184955597,
330
+ "learning_rate": 0.00014297308402774875,
331
+ "loss": 10.3544,
332
+ "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.15584965590259398,
336
+ "grad_norm": 0.09635099023580551,
337
+ "learning_rate": 0.0001424095534724375,
338
+ "loss": 10.3537,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.15923769190047643,
343
+ "grad_norm": 0.08793843537569046,
344
+ "learning_rate": 0.00014182548931412757,
345
+ "loss": 10.3528,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.1626257278983589,
350
+ "grad_norm": 0.07799001038074493,
351
+ "learning_rate": 0.0001412210694644195,
352
+ "loss": 10.3516,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.1660137638962414,
357
+ "grad_norm": 0.07476358115673065,
358
+ "learning_rate": 0.00014059647803545467,
359
+ "loss": 10.3515,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.16940179989412388,
364
+ "grad_norm": 0.07342197000980377,
365
+ "learning_rate": 0.0001399519052838329,
366
+ "loss": 10.3523,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 0.16940179989412388,
371
+ "eval_loss": 10.351042747497559,
372
+ "eval_runtime": 3.8684,
373
+ "eval_samples_per_second": 257.211,
374
+ "eval_steps_per_second": 128.735,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.17278983589200636,
379
+ "grad_norm": 0.06936267018318176,
380
+ "learning_rate": 0.00013928754755265842,
381
+ "loss": 10.3514,
382
+ "step": 51
383
  }
384
  ],
385
  "logging_steps": 1,
 
399
  "attributes": {}
400
  }
401
  },
402
+ "total_flos": 21825075412992.0,
403
  "train_batch_size": 2,
404
  "trial_name": null,
405
  "trial_params": null