Training in progress, step 143, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +94 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:869af5620d3d85d7c0db115351fa817c5904fa6f51f53e8d481d34177a3d9341
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:d435ce0de6012fb4236c06060f6cf69269f2b1360a5d6aa435758082414bf7f8
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afd7f4c7619ded30407f68888930fdfa979bea1a64de2ebe37b0fdc904faa13e
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0bf61cf0d057bf8db7e5742b05c339faebcc7038ea97dc73bda7aee1fbb09c7
 size 640009682

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a670f2d851c0c4cc79716f31d1954ad248700ff958468f6d2721e422863a0fe
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a37cbe2fd5d44041cacd48fad439e5298ef2fc2ac9fdb757c4af88acea9033f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b8d02756226521458daee3f69c94f8a0b4245ed6c8f1de64c08045d2547f98c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee2a6301db2b5e58d7c810f9f0f35dee18c800d0cef729c658d7f5a962d36075
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.17300894156789354,
   "eval_steps": 50,
-  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -941,6 +941,97 @@
       "learning_rate": 8.590798076009264e-06,
       "loss": 46.4312,
       "step": 130
     }
   ],
   "logging_steps": 1,
@@ -960,7 +1051,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.338166546386125e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.19030983572468288,
   "eval_steps": 50,
+  "global_step": 143,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.590798076009264e-06,
       "loss": 46.4312,
       "step": 130
+    },
+    {
+      "epoch": 0.17433977957995425,
+      "grad_norm": 4.295351982116699,
+      "learning_rate": 7.767979702822217e-06,
+      "loss": 45.6427,
+      "step": 131
+    },
+    {
+      "epoch": 0.17567061759201497,
+      "grad_norm": 4.867600917816162,
+      "learning_rate": 6.984422936209094e-06,
+      "loss": 43.972,
+      "step": 132
+    },
+    {
+      "epoch": 0.17700145560407568,
+      "grad_norm": 5.0242695808410645,
+      "learning_rate": 6.240585351256319e-06,
+      "loss": 44.2976,
+      "step": 133
+    },
+    {
+      "epoch": 0.17833229361613642,
+      "grad_norm": 5.848794460296631,
+      "learning_rate": 5.536901328166773e-06,
+      "loss": 46.4279,
+      "step": 134
+    },
+    {
+      "epoch": 0.17966313162819714,
+      "grad_norm": 4.636799335479736,
+      "learning_rate": 4.8737817985938955e-06,
+      "loss": 43.4801,
+      "step": 135
+    },
+    {
+      "epoch": 0.18099396964025785,
+      "grad_norm": 4.535057544708252,
+      "learning_rate": 4.251614005669263e-06,
+      "loss": 44.8191,
+      "step": 136
+    },
+    {
+      "epoch": 0.18232480765231857,
+      "grad_norm": 7.119007587432861,
+      "learning_rate": 3.670761277863485e-06,
+      "loss": 43.3487,
+      "step": 137
+    },
+    {
+      "epoch": 0.18365564566437928,
+      "grad_norm": 5.639936923980713,
+      "learning_rate": 3.131562816812533e-06,
+      "loss": 46.6444,
+      "step": 138
+    },
+    {
+      "epoch": 0.18498648367644,
+      "grad_norm": 5.142310619354248,
+      "learning_rate": 2.6343334992336485e-06,
+      "loss": 43.399,
+      "step": 139
+    },
+    {
+      "epoch": 0.18631732168850074,
+      "grad_norm": 4.855108737945557,
+      "learning_rate": 2.179363693046099e-06,
+      "loss": 44.3601,
+      "step": 140
+    },
+    {
+      "epoch": 0.18764815970056145,
+      "grad_norm": 4.390917778015137,
+      "learning_rate": 1.7669190878045914e-06,
+      "loss": 45.47,
+      "step": 141
+    },
+    {
+      "epoch": 0.18897899771262217,
+      "grad_norm": 5.170991897583008,
+      "learning_rate": 1.3972405395439922e-06,
+      "loss": 45.9972,
+      "step": 142
+    },
+    {
+      "epoch": 0.19030983572468288,
+      "grad_norm": 4.540877342224121,
+      "learning_rate": 1.0705439301261887e-06,
+      "loss": 45.5314,
+      "step": 143
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.072804944199025e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null