Training in progress, step 26, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +94 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c4c57ed7563c381ecea9a5a3742625f1fbd7b95cfba1a59af1fbdcfb15fa7b4
 size 500770656

 version https://git-lfs.github.com/spec/v1
+oid sha256:798291a48541d51ec27dccfb04969ed84a240c4b5aad0fb27cc0e2669bfd528d
 size 500770656

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b828ae868e266585515a010b81284dc9974bb2d9d5380076fc4b1829f212ba01
 size 1001863522

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cc8f86ad37f0cf51611f9aeecc8a395f82787608cb2ce6f7ccf27330ce2e6ee
 size 1001863522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32dce9a620fb887bf8c21a022ec964dc4bdc29e99cf526da3edd74c931dc7985
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:55ea9ad2466bee9501938172cccd6c85022832f061db4916ca506511c63fd6ce
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18566815dcf6671d9e6506c7faf4f1e794eb811de4804054ae3a16b2108e6c1f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:34f9c194f14d5c67265ac71b8d34cb6f960441ca9a99e80d33d78e02859e649b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.18205689277899342,
   "eval_steps": 50,
-  "global_step": 13,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -106,6 +106,97 @@
       "learning_rate": 9.75e-05,
       "loss": 16.2046,
       "step": 13
     }
   ],
   "logging_steps": 1,
@@ -125,7 +216,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3267472744448e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.36411378555798685,
   "eval_steps": 50,
+  "global_step": 26,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.75e-05,
       "loss": 16.2046,
       "step": 13
+    },
+    {
+      "epoch": 0.19606126914660832,
+      "grad_norm": 64.03682708740234,
+      "learning_rate": 0.00010499999999999999,
+      "loss": 13.2331,
+      "step": 14
+    },
+    {
+      "epoch": 0.2100656455142232,
+      "grad_norm": 34.19150924682617,
+      "learning_rate": 0.0001125,
+      "loss": 11.2453,
+      "step": 15
+    },
+    {
+      "epoch": 0.22407002188183808,
+      "grad_norm": 34.346343994140625,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 9.5668,
+      "step": 16
+    },
+    {
+      "epoch": 0.23807439824945295,
+      "grad_norm": 26.109838485717773,
+      "learning_rate": 0.00012749999999999998,
+      "loss": 6.7749,
+      "step": 17
+    },
+    {
+      "epoch": 0.25207877461706785,
+      "grad_norm": 17.14702606201172,
+      "learning_rate": 0.000135,
+      "loss": 6.2887,
+      "step": 18
+    },
+    {
+      "epoch": 0.2660831509846827,
+      "grad_norm": 15.40426254272461,
+      "learning_rate": 0.0001425,
+      "loss": 5.1345,
+      "step": 19
+    },
+    {
+      "epoch": 0.2800875273522976,
+      "grad_norm": 13.663360595703125,
+      "learning_rate": 0.00015,
+      "loss": 4.3361,
+      "step": 20
+    },
+    {
+      "epoch": 0.29409190371991245,
+      "grad_norm": 14.994868278503418,
+      "learning_rate": 0.00014997810105601446,
+      "loss": 3.2669,
+      "step": 21
+    },
+    {
+      "epoch": 0.3080962800875274,
+      "grad_norm": 17.923627853393555,
+      "learning_rate": 0.0001499124170124245,
+      "loss": 2.8837,
+      "step": 22
+    },
+    {
+      "epoch": 0.32210065645514224,
+      "grad_norm": 10.781777381896973,
+      "learning_rate": 0.00014980298622686183,
+      "loss": 2.5867,
+      "step": 23
+    },
+    {
+      "epoch": 0.3361050328227571,
+      "grad_norm": 17.933012008666992,
+      "learning_rate": 0.00014964987260382363,
+      "loss": 3.79,
+      "step": 24
+    },
+    {
+      "epoch": 0.350109409190372,
+      "grad_norm": 14.919720649719238,
+      "learning_rate": 0.00014945316555735403,
+      "loss": 3.5252,
+      "step": 25
+    },
+    {
+      "epoch": 0.36411378555798685,
+      "grad_norm": 13.19090747833252,
+      "learning_rate": 0.0001492129799588288,
+      "loss": 2.2783,
+      "step": 26
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.6534945488896e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null