Sara Price
commited on
Training in progress, step 2400, checkpoint
Browse files- last-checkpoint/model-00001-of-00006.safetensors +1 -1
- last-checkpoint/model-00002-of-00006.safetensors +1 -1
- last-checkpoint/model-00003-of-00006.safetensors +1 -1
- last-checkpoint/model-00004-of-00006.safetensors +1 -1
- last-checkpoint/model-00005-of-00006.safetensors +1 -1
- last-checkpoint/model-00006-of-00006.safetensors +1 -1
- last-checkpoint/trainer_state.json +123 -3
last-checkpoint/model-00001-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4840658560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e32f3572a879128a25a247a0be200e23f39567f58fd956064c5c68b66cf5b5a9
|
3 |
size 4840658560
|
last-checkpoint/model-00002-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4857206856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af444108fd67a3c6e63c47ec8640ec8b500d36ed5bf7b185fccf407ad0527f47
|
3 |
size 4857206856
|
last-checkpoint/model-00003-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4857206904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb92006261485577e7645190694e9087dca6c01dee24ca945f56f7511967919c
|
3 |
size 4857206904
|
last-checkpoint/model-00004-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4857206904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24e2d5176675f79b6e2a0149a55de72ce168b30b40ca8178c6800ec8607f99ac
|
3 |
size 4857206904
|
last-checkpoint/model-00005-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4857206904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92e8488c95aae92eb0ae6aa54da0ba180cca2726f5bd061b36b57c208bc2d404
|
3 |
size 4857206904
|
last-checkpoint/model-00006-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2684734256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcfba0ab5a7de6b7625c5951644381b869990b9cf3bc79c0d13d322d631e496f
|
3 |
size 2684734256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -607,6 +607,126 @@
|
|
607 |
"eval_samples_per_second": 69.033,
|
608 |
"eval_steps_per_second": 3.54,
|
609 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
}
|
611 |
],
|
612 |
"logging_steps": 50,
|
@@ -614,7 +734,7 @@
|
|
614 |
"num_input_tokens_seen": 0,
|
615 |
"num_train_epochs": 10,
|
616 |
"save_steps": 400,
|
617 |
-
"total_flos":
|
618 |
"train_batch_size": 4,
|
619 |
"trial_name": null,
|
620 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.8,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 2400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
607 |
"eval_samples_per_second": 69.033,
|
608 |
"eval_steps_per_second": 3.54,
|
609 |
"step": 2000
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 4.1,
|
613 |
+
"grad_norm": 1.5479576587677002,
|
614 |
+
"learning_rate": 1.469471562785891e-05,
|
615 |
+
"loss": 0.1167,
|
616 |
+
"step": 2050
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 4.1,
|
620 |
+
"eval_loss": 0.9011654853820801,
|
621 |
+
"eval_runtime": 2.738,
|
622 |
+
"eval_samples_per_second": 56.976,
|
623 |
+
"eval_steps_per_second": 2.922,
|
624 |
+
"step": 2050
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"epoch": 4.2,
|
628 |
+
"grad_norm": 1.3002970218658447,
|
629 |
+
"learning_rate": 1.4383711467890776e-05,
|
630 |
+
"loss": 0.1186,
|
631 |
+
"step": 2100
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"epoch": 4.2,
|
635 |
+
"eval_loss": 0.9147914052009583,
|
636 |
+
"eval_runtime": 3.018,
|
637 |
+
"eval_samples_per_second": 51.69,
|
638 |
+
"eval_steps_per_second": 2.651,
|
639 |
+
"step": 2100
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 4.3,
|
643 |
+
"grad_norm": 1.7996995449066162,
|
644 |
+
"learning_rate": 1.4067366430758004e-05,
|
645 |
+
"loss": 0.1153,
|
646 |
+
"step": 2150
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 4.3,
|
650 |
+
"eval_loss": 0.9160046577453613,
|
651 |
+
"eval_runtime": 3.6692,
|
652 |
+
"eval_samples_per_second": 42.516,
|
653 |
+
"eval_steps_per_second": 2.18,
|
654 |
+
"step": 2150
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 4.4,
|
658 |
+
"grad_norm": 1.1670547723770142,
|
659 |
+
"learning_rate": 1.3746065934159123e-05,
|
660 |
+
"loss": 0.1214,
|
661 |
+
"step": 2200
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"epoch": 4.4,
|
665 |
+
"eval_loss": 0.9355931282043457,
|
666 |
+
"eval_runtime": 2.337,
|
667 |
+
"eval_samples_per_second": 66.753,
|
668 |
+
"eval_steps_per_second": 3.423,
|
669 |
+
"step": 2200
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 4.5,
|
673 |
+
"grad_norm": 1.1401852369308472,
|
674 |
+
"learning_rate": 1.342020143325669e-05,
|
675 |
+
"loss": 0.1193,
|
676 |
+
"step": 2250
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 4.5,
|
680 |
+
"eval_loss": 0.9175124764442444,
|
681 |
+
"eval_runtime": 2.2626,
|
682 |
+
"eval_samples_per_second": 68.947,
|
683 |
+
"eval_steps_per_second": 3.536,
|
684 |
+
"step": 2250
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 4.6,
|
688 |
+
"grad_norm": 0.8389841914176941,
|
689 |
+
"learning_rate": 1.3090169943749475e-05,
|
690 |
+
"loss": 0.1186,
|
691 |
+
"step": 2300
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"epoch": 4.6,
|
695 |
+
"eval_loss": 0.9386661052703857,
|
696 |
+
"eval_runtime": 2.2532,
|
697 |
+
"eval_samples_per_second": 69.235,
|
698 |
+
"eval_steps_per_second": 3.55,
|
699 |
+
"step": 2300
|
700 |
+
},
|
701 |
+
{
|
702 |
+
"epoch": 4.7,
|
703 |
+
"grad_norm": 1.2419942617416382,
|
704 |
+
"learning_rate": 1.2756373558169992e-05,
|
705 |
+
"loss": 0.1187,
|
706 |
+
"step": 2350
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"epoch": 4.7,
|
710 |
+
"eval_loss": 0.9336636662483215,
|
711 |
+
"eval_runtime": 2.2535,
|
712 |
+
"eval_samples_per_second": 69.225,
|
713 |
+
"eval_steps_per_second": 3.55,
|
714 |
+
"step": 2350
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 4.8,
|
718 |
+
"grad_norm": 1.0060522556304932,
|
719 |
+
"learning_rate": 1.2419218955996677e-05,
|
720 |
+
"loss": 0.1245,
|
721 |
+
"step": 2400
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"epoch": 4.8,
|
725 |
+
"eval_loss": 0.9188296794891357,
|
726 |
+
"eval_runtime": 2.2614,
|
727 |
+
"eval_samples_per_second": 68.983,
|
728 |
+
"eval_steps_per_second": 3.538,
|
729 |
+
"step": 2400
|
730 |
}
|
731 |
],
|
732 |
"logging_steps": 50,
|
|
|
734 |
"num_input_tokens_seen": 0,
|
735 |
"num_train_epochs": 10,
|
736 |
"save_steps": 400,
|
737 |
+
"total_flos": 1.1589925681181491e+17,
|
738 |
"train_batch_size": 4,
|
739 |
"trial_name": null,
|
740 |
"trial_params": null
|