Training in progress, step 130, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +94 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5b2a4ce332b6d3f2d7b5027963d2bac8c8a90a8166a714fcfa99acf1139b970
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:869af5620d3d85d7c0db115351fa817c5904fa6f51f53e8d481d34177a3d9341
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f008f744c603be58af8a9cf079728bb5cbea9265abdcb5dd15ed0a5ff49df40a
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:afd7f4c7619ded30407f68888930fdfa979bea1a64de2ebe37b0fdc904faa13e
 size 640009682

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a85180cb5b242ee948b2af64053ea3c4e0cc8b032b0a9568c39646d04dd2d77e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a670f2d851c0c4cc79716f31d1954ad248700ff958468f6d2721e422863a0fe
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72ced90745bf11dd0913ccb678fa84f97d9d9d2dcc6e8de79651c15430da9657
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b8d02756226521458daee3f69c94f8a0b4245ed6c8f1de64c08045d2547f98c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.15570804741110417,
   "eval_steps": 50,
-  "global_step": 117,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -850,6 +850,97 @@
       "learning_rate": 2.2611647606329732e-05,
       "loss": 43.9194,
       "step": 117
     }
   ],
   "logging_steps": 1,
@@ -869,7 +960,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.6051716349218e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.17300894156789354,
   "eval_steps": 50,
+  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.2611647606329732e-05,
       "loss": 43.9194,
       "step": 117
+    },
+    {
+      "epoch": 0.1570388854231649,
+      "grad_norm": 4.738860607147217,
+      "learning_rate": 2.1330077944190924e-05,
+      "loss": 44.4921,
+      "step": 118
+    },
+    {
+      "epoch": 0.15836972343522562,
+      "grad_norm": 5.531933307647705,
+      "learning_rate": 2.0079850005167007e-05,
+      "loss": 47.8288,
+      "step": 119
+    },
+    {
+      "epoch": 0.15970056144728634,
+      "grad_norm": 5.006840229034424,
+      "learning_rate": 1.8861693887167408e-05,
+      "loss": 47.1657,
+      "step": 120
+    },
+    {
+      "epoch": 0.16103139945934705,
+      "grad_norm": 4.700850963592529,
+      "learning_rate": 1.767632095906137e-05,
+      "loss": 45.0584,
+      "step": 121
+    },
+    {
+      "epoch": 0.16236223747140777,
+      "grad_norm": 5.489245414733887,
+      "learning_rate": 1.652442344525833e-05,
+      "loss": 45.8487,
+      "step": 122
+    },
+    {
+      "epoch": 0.1636930754834685,
+      "grad_norm": 4.946503162384033,
+      "learning_rate": 1.5406674021468438e-05,
+      "loss": 46.2189,
+      "step": 123
+    },
+    {
+      "epoch": 0.16502391349552922,
+      "grad_norm": 8.596658706665039,
+      "learning_rate": 1.4323725421878949e-05,
+      "loss": 44.8387,
+      "step": 124
+    },
+    {
+      "epoch": 0.16635475150758994,
+      "grad_norm": 4.896723747253418,
+      "learning_rate": 1.3276210057975772e-05,
+      "loss": 46.1841,
+      "step": 125
+    },
+    {
+      "epoch": 0.16768558951965065,
+      "grad_norm": 5.261892795562744,
+      "learning_rate": 1.2264739649232993e-05,
+      "loss": 44.6968,
+      "step": 126
+    },
+    {
+      "epoch": 0.16901642753171137,
+      "grad_norm": 5.7036027908325195,
+      "learning_rate": 1.1289904865885935e-05,
+      "loss": 43.3378,
+      "step": 127
+    },
+    {
+      "epoch": 0.17034726554377208,
+      "grad_norm": 4.333169460296631,
+      "learning_rate": 1.0352274983996303e-05,
+      "loss": 46.5715,
+      "step": 128
+    },
+    {
+      "epoch": 0.17167810355583282,
+      "grad_norm": 5.31616735458374,
+      "learning_rate": 9.452397553011157e-06,
+      "loss": 46.6349,
+      "step": 129
+    },
+    {
+      "epoch": 0.17300894156789354,
+      "grad_norm": 4.633855819702148,
+      "learning_rate": 8.590798076009264e-06,
+      "loss": 46.4312,
+      "step": 130
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.338166546386125e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null