eddysang commited on
Commit
170aa4d
·
verified ·
1 Parent(s): bc07b5c

Training in progress, step 63, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2a3673ac6ecfcf9fbd4e8a74b81927275b26bd0684e838caa39b7caf422cf88
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f652211dac610e80872598da1df98e590795842f441ad646c668780471aa84c1
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6817fbbcd33480f164784483673185908595dff99afaaa869a566aa99376d9a
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61ce972ea96e2306fbf26a1d40d3335709da2c916fff3ac1116fa3c2f0f5737
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f07b4f2bcc3f569f306e9b6554b0fe04785f073abde4e0109bc3d3d5eb765c2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1098628841f4bcb07d5546445f4de27d56441cc7dc92917c1ddcac8b9e18a99
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f68b4a4ba4603d307a8be653dd70e693a9f3506ea1af5f24d2d241dc54cfcb1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1620712844653664,
5
  "eval_steps": 50,
6
- "global_step": 54,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -401,6 +401,69 @@
401
  "learning_rate": 9.25084022891929e-05,
402
  "loss": 26.5253,
403
  "step": 54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  }
405
  ],
406
  "logging_steps": 1,
@@ -420,7 +483,7 @@
420
  "attributes": {}
421
  }
422
  },
423
- "total_flos": 2.839944410337116e+17,
424
  "train_batch_size": 2,
425
  "trial_name": null,
426
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3557498318762609,
5
  "eval_steps": 50,
6
+ "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
401
  "learning_rate": 9.25084022891929e-05,
402
  "loss": 26.5253,
403
  "step": 54
404
+ },
405
+ {
406
+ "epoch": 1.183591123066577,
407
+ "grad_norm": 25.87575912475586,
408
+ "learning_rate": 8.963177415120962e-05,
409
+ "loss": 22.2069,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 1.2051109616677875,
414
+ "grad_norm": 21.698827743530273,
415
+ "learning_rate": 8.673258487801731e-05,
416
+ "loss": 25.7023,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 1.226630800268998,
421
+ "grad_norm": 22.29081916809082,
422
+ "learning_rate": 8.381530480933783e-05,
423
+ "loss": 23.0083,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 1.2481506388702084,
428
+ "grad_norm": 19.079532623291016,
429
+ "learning_rate": 8.088443217958837e-05,
430
+ "loss": 25.0357,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 1.269670477471419,
435
+ "grad_norm": 19.22806167602539,
436
+ "learning_rate": 7.794448618193015e-05,
437
+ "loss": 27.0424,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 1.2911903160726295,
442
+ "grad_norm": 20.588699340820312,
443
+ "learning_rate": 7.5e-05,
444
+ "loss": 23.868,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 1.31271015467384,
449
+ "grad_norm": 21.745031356811523,
450
+ "learning_rate": 7.205551381806987e-05,
451
+ "loss": 23.2284,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 1.3342299932750503,
456
+ "grad_norm": 19.947467803955078,
457
+ "learning_rate": 6.911556782041163e-05,
458
+ "loss": 26.8844,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 1.3557498318762609,
463
+ "grad_norm": 23.738483428955078,
464
+ "learning_rate": 6.618469519066217e-05,
465
+ "loss": 25.726,
466
+ "step": 63
467
  }
468
  ],
469
  "logging_steps": 1,
 
483
  "attributes": {}
484
  }
485
  },
486
+ "total_flos": 3.3132684787266355e+17,
487
  "train_batch_size": 2,
488
  "trial_name": null,
489
  "trial_params": null