eddysang commited on
Commit
d81d445
·
verified ·
1 Parent(s): 612cb19

Training in progress, step 91, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e79db61b8a67b267ecd2022ebb47d0ced0f0386e603532a1a146bee55843d85
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0b8fa886b701ae9b83245d91dca14a4ae89fb20ff76e025c30669227f69b61
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6207d124702d19d5800f70cc9d2ff5c405cbcac003b4b0d5d47ce21d443298a
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348f29fb9df9b9eec2348b93560b583266a82d218ade71b7553f114e0f883df6
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48523dd1312a134256c6c76e541956b189e760ea17cbadfffbe6acae6afe1b0c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c72dedb2dc4931fa8d6b976c4421dfe3726c461103475f4c2e5fbbda6f4d034e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f67903029bd03962cd576ce98fdf2051c9d1f3ac8313e78e1fcac70600c55ded
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f056dd40c8c1415b0e9ddd654f5e86244de032aa74a96ad4901970d9bab358
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10380536494073613,
5
  "eval_steps": 50,
6
- "global_step": 78,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -569,6 +569,97 @@
569
  "learning_rate": 8.762677810102787e-05,
570
  "loss": 44.5024,
571
  "step": 78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  }
573
  ],
574
  "logging_steps": 1,
@@ -588,7 +679,7 @@
588
  "attributes": {}
589
  }
590
  },
591
- "total_flos": 4.4045434141802496e+17,
592
  "train_batch_size": 2,
593
  "trial_name": null,
594
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12110625909752547,
5
  "eval_steps": 50,
6
+ "global_step": 91,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
569
  "learning_rate": 8.762677810102787e-05,
570
  "loss": 44.5024,
571
  "step": 78
572
+ },
573
+ {
574
+ "epoch": 0.10513620295279684,
575
+ "grad_norm": 4.580371379852295,
576
+ "learning_rate": 8.583667871769158e-05,
577
+ "loss": 45.4198,
578
+ "step": 79
579
+ },
580
+ {
581
+ "epoch": 0.10646704096485755,
582
+ "grad_norm": 5.361691951751709,
583
+ "learning_rate": 8.404025101914921e-05,
584
+ "loss": 46.9717,
585
+ "step": 80
586
+ },
587
+ {
588
+ "epoch": 0.10779787897691828,
589
+ "grad_norm": 5.872640609741211,
590
+ "learning_rate": 8.223854406858862e-05,
591
+ "loss": 46.8967,
592
+ "step": 81
593
+ },
594
+ {
595
+ "epoch": 0.109128716988979,
596
+ "grad_norm": 4.439775466918945,
597
+ "learning_rate": 8.043261001213218e-05,
598
+ "loss": 45.777,
599
+ "step": 82
600
+ },
601
+ {
602
+ "epoch": 0.11045955500103971,
603
+ "grad_norm": 4.8212056159973145,
604
+ "learning_rate": 7.862350346441302e-05,
605
+ "loss": 46.7968,
606
+ "step": 83
607
+ },
608
+ {
609
+ "epoch": 0.11179039301310044,
610
+ "grad_norm": 4.29143762588501,
611
+ "learning_rate": 7.681228089270991e-05,
612
+ "loss": 44.3015,
613
+ "step": 84
614
+ },
615
+ {
616
+ "epoch": 0.11312123102516115,
617
+ "grad_norm": 4.222655773162842,
618
+ "learning_rate": 7.5e-05,
619
+ "loss": 45.1899,
620
+ "step": 85
621
+ },
622
+ {
623
+ "epoch": 0.11445206903722188,
624
+ "grad_norm": 4.317388534545898,
625
+ "learning_rate": 7.318771910729009e-05,
626
+ "loss": 45.5435,
627
+ "step": 86
628
+ },
629
+ {
630
+ "epoch": 0.1157829070492826,
631
+ "grad_norm": 4.136228561401367,
632
+ "learning_rate": 7.137649653558697e-05,
633
+ "loss": 47.2873,
634
+ "step": 87
635
+ },
636
+ {
637
+ "epoch": 0.11711374506134331,
638
+ "grad_norm": 4.299015998840332,
639
+ "learning_rate": 6.956738998786783e-05,
640
+ "loss": 47.1202,
641
+ "step": 88
642
+ },
643
+ {
644
+ "epoch": 0.11844458307340404,
645
+ "grad_norm": 5.2958784103393555,
646
+ "learning_rate": 6.776145593141136e-05,
647
+ "loss": 48.6786,
648
+ "step": 89
649
+ },
650
+ {
651
+ "epoch": 0.11977542108546475,
652
+ "grad_norm": 4.868504524230957,
653
+ "learning_rate": 6.595974898085078e-05,
654
+ "loss": 45.666,
655
+ "step": 90
656
+ },
657
+ {
658
+ "epoch": 0.12110625909752547,
659
+ "grad_norm": 5.617905139923096,
660
+ "learning_rate": 6.416332128230842e-05,
661
+ "loss": 44.0781,
662
+ "step": 91
663
  }
664
  ],
665
  "logging_steps": 1,
 
679
  "attributes": {}
680
  }
681
  },
682
+ "total_flos": 5.130964380250276e+17,
683
  "train_batch_size": 2,
684
  "trial_name": null,
685
  "trial_params": null