Training in progress, step 4, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c91736d9fdd6e721af04d706b90156e7ef80a0dcb29d06a21a3d3f6a6d6a6b9e
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e3b2734b3de8b9018d1e489fd085289bc50aa37a9dc2060454879d1630e29ca
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6adc1972bbac13fd151f61bc442fee4161164caf7722a886f1b4dbab0b7dc358
 size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc99bb428e07e5daccebeae44d47d8333700e5cdefe2f0f9ec1dce6373577dbf
 size 43122580

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c851e102a850987a7112fc8c592cffcff5441bc073f8f6c074d6c9a8692d8ef8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5063e42d33236efce97a1743e854e894f2a2c7cac2e300112055d90419bf6d8f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60f8f27a9d76e31d3721bb37f2f4c76e866b8dedf8d4315f175dac906a4a966d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7adb031dc938779259a07e684ea318126e92e52b40c256b93ef474d2ac57a5b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.00023472628714360115,
   "eval_steps": 3,
-  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -29,6 +29,28 @@
       "learning_rate": 2e-05,
       "loss": 4.2225,
       "step": 2
     }
   ],
   "logging_steps": 1,
@@ -48,7 +70,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 762945252360192.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0004694525742872023,
   "eval_steps": 3,
+  "global_step": 4,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2e-05,
       "loss": 4.2225,
       "step": 2
+    },
+    {
+      "epoch": 0.0003520894307154017,
+      "grad_norm": 1.3053418397903442,
+      "learning_rate": 3e-05,
+      "loss": 2.4203,
+      "step": 3
+    },
+    {
+      "epoch": 0.0003520894307154017,
+      "eval_loss": 3.2794156074523926,
+      "eval_runtime": 1578.0902,
+      "eval_samples_per_second": 4.547,
+      "eval_steps_per_second": 4.547,
+      "step": 3
+    },
+    {
+      "epoch": 0.0004694525742872023,
+      "grad_norm": 1.3151017427444458,
+      "learning_rate": 4e-05,
+      "loss": 2.5524,
+      "step": 4
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1502770951618560.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null