eddysang commited on
Commit
4a4b793
·
verified ·
1 Parent(s): f268d80

Training in progress, step 52, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db89031f861b351398579fdeed7f2fb75adc951da6bbb79461eec30cb42cae7
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62eaa1c0aa39c35aed3a646e094cb0c5fd05d6e6bac7bb2bc0582888b4b98fe
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c83434275e79d234627e39a2ec4efe033481954aab821806a1dfd2935bd20358
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98da6b8aaefb6096bb3ab2e07390a761ded121ccf0edee18cbe1c58fa2b509b9
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b699cd69e707818116cdde38e402657f6f0fe8cde3928db61304dd17c81e968e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601131cde1c22105044011da47a3d843369b7dabae6b9b3a0d5857d64c1ee8c7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afb9c3c0c925ea8cf32a1dcba2e2f6242aaac5236534a8df451537dd590ac10a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0083f46b3c06809c7ba2e6389d4ff484bc53254e300d739b3ff127153f94c8d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05190268247036806,
5
  "eval_steps": 50,
6
- "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -288,6 +288,105 @@
288
  "learning_rate": 0.00014223202029717776,
289
  "loss": 46.5923,
290
  "step": 39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  }
292
  ],
293
  "logging_steps": 1,
@@ -307,7 +406,7 @@
307
  "attributes": {}
308
  }
309
  },
310
- "total_flos": 2.207202166135849e+17,
311
  "train_batch_size": 2,
312
  "trial_name": null,
313
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06920357662715741,
5
  "eval_steps": 50,
6
+ "global_step": 52,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
288
  "learning_rate": 0.00014223202029717776,
289
  "loss": 46.5923,
290
  "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.05323352048242878,
294
+ "grad_norm": 4.8244147300720215,
295
+ "learning_rate": 0.0001414092019239907,
296
+ "loss": 46.1269,
297
+ "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.0545643584944895,
301
+ "grad_norm": 4.208717346191406,
302
+ "learning_rate": 0.00014054760244698884,
303
+ "loss": 49.8436,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.05589519650655022,
308
+ "grad_norm": 3.8489153385162354,
309
+ "learning_rate": 0.00013964772501600368,
310
+ "loss": 45.9188,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.05722603451861094,
315
+ "grad_norm": 3.9226901531219482,
316
+ "learning_rate": 0.00013871009513411408,
317
+ "loss": 45.8486,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.058556872530671655,
322
+ "grad_norm": 4.560520172119141,
323
+ "learning_rate": 0.00013773526035076698,
324
+ "loss": 46.0863,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.05988771054273238,
329
+ "grad_norm": 5.09638786315918,
330
+ "learning_rate": 0.00013672378994202423,
331
+ "loss": 45.7102,
332
+ "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.0612185485547931,
336
+ "grad_norm": 4.719268321990967,
337
+ "learning_rate": 0.00013567627457812106,
338
+ "loss": 45.6349,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.06254938656685381,
343
+ "grad_norm": 4.546507835388184,
344
+ "learning_rate": 0.00013459332597853157,
345
+ "loss": 47.0023,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.06388022457891454,
350
+ "grad_norm": 4.938004970550537,
351
+ "learning_rate": 0.00013347557655474167,
352
+ "loss": 42.3119,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.06521106259097526,
357
+ "grad_norm": 4.659527778625488,
358
+ "learning_rate": 0.0001323236790409386,
359
+ "loss": 46.6489,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.06654190060303597,
364
+ "grad_norm": 4.204468727111816,
365
+ "learning_rate": 0.00013113830611283258,
366
+ "loss": 45.922,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 0.06654190060303597,
371
+ "eval_loss": 1.4664535522460938,
372
+ "eval_runtime": 577.3931,
373
+ "eval_samples_per_second": 4.383,
374
+ "eval_steps_per_second": 2.193,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.0678727386150967,
379
+ "grad_norm": 4.2126569747924805,
380
+ "learning_rate": 0.00012992014999483302,
381
+ "loss": 47.7475,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 0.06920357662715741,
386
+ "grad_norm": 4.303404331207275,
387
+ "learning_rate": 0.00012866992205580908,
388
+ "loss": 45.0831,
389
+ "step": 52
390
  }
391
  ],
392
  "logging_steps": 1,
 
406
  "attributes": {}
407
  }
408
  },
409
+ "total_flos": 2.940197077600174e+17,
410
  "train_batch_size": 2,
411
  "trial_name": null,
412
  "trial_params": null