Training in progress, step 34, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba041080b6ba40d6138b88d19b0255d6b2726eee4078e2781dd18a6f8228ff5f
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff72e4efe6d28a195f3570744a19808d24176a4a5c3166d09509ecf448dd971e
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:056906055ce9d76afcef49ded9dbddfd8ebf3a5c03590c1c92688d4e0e5f3da3
 size 212298

 version https://git-lfs.github.com/spec/v1
+oid sha256:fabc0e47233f6f67098c93eb17ccaff88b58404fd8853fd611da5f2ccf11189d
 size 212298

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db583067794f20b01832d4ddabaad594d5a607a594a6f04f852e2b423222180d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5a1457ae5d909a79141dea2965aadc86d4c660ec11af6ef8eed50147437a542
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0abe1a027b3fea2bf654a1c387b6eb2241fa486bab4a282d3a0e829c4308c91
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.05759661196400212,
   "eval_steps": 50,
-  "global_step": 17,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -134,6 +134,125 @@
       "learning_rate": 0.00012749999999999998,
       "loss": 10.3736,
       "step": 17
     }
   ],
   "logging_steps": 1,
@@ -153,7 +272,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7270570131456.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11519322392800424,
   "eval_steps": 50,
+  "global_step": 34,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00012749999999999998,
       "loss": 10.3736,
       "step": 17
+    },
+    {
+      "epoch": 0.060984647961884596,
+      "grad_norm": 0.04979519173502922,
+      "learning_rate": 0.000135,
+      "loss": 10.3729,
+      "step": 18
+    },
+    {
+      "epoch": 0.06437268395976707,
+      "grad_norm": 0.04958589747548103,
+      "learning_rate": 0.0001425,
+      "loss": 10.3735,
+      "step": 19
+    },
+    {
+      "epoch": 0.06776071995764955,
+      "grad_norm": 0.054519593715667725,
+      "learning_rate": 0.00015,
+      "loss": 10.3723,
+      "step": 20
+    },
+    {
+      "epoch": 0.07114875595553202,
+      "grad_norm": 0.060721371322870255,
+      "learning_rate": 0.00014998857713672935,
+      "loss": 10.372,
+      "step": 21
+    },
+    {
+      "epoch": 0.0745367919534145,
+      "grad_norm": 0.06067592278122902,
+      "learning_rate": 0.00014995431202643217,
+      "loss": 10.3718,
+      "step": 22
+    },
+    {
+      "epoch": 0.07792482795129699,
+      "grad_norm": 0.06362656503915787,
+      "learning_rate": 0.000149897215106593,
+      "loss": 10.3713,
+      "step": 23
+    },
+    {
+      "epoch": 0.08131286394917946,
+      "grad_norm": 0.067110076546669,
+      "learning_rate": 0.0001498173037694868,
+      "loss": 10.3714,
+      "step": 24
+    },
+    {
+      "epoch": 0.08470089994706194,
+      "grad_norm": 0.07370075583457947,
+      "learning_rate": 0.0001497146023568809,
+      "loss": 10.3712,
+      "step": 25
+    },
+    {
+      "epoch": 0.08808893594494442,
+      "grad_norm": 0.0760367140173912,
+      "learning_rate": 0.00014958914215262048,
+      "loss": 10.371,
+      "step": 26
+    },
+    {
+      "epoch": 0.09147697194282689,
+      "grad_norm": 0.08042097836732864,
+      "learning_rate": 0.00014944096137309914,
+      "loss": 10.37,
+      "step": 27
+    },
+    {
+      "epoch": 0.09486500794070937,
+      "grad_norm": 0.08724083006381989,
+      "learning_rate": 0.00014927010515561776,
+      "loss": 10.3697,
+      "step": 28
+    },
+    {
+      "epoch": 0.09825304393859184,
+      "grad_norm": 0.08779574185609818,
+      "learning_rate": 0.00014907662554463532,
+      "loss": 10.3689,
+      "step": 29
+    },
+    {
+      "epoch": 0.10164107993647432,
+      "grad_norm": 0.09096319228410721,
+      "learning_rate": 0.0001488605814759156,
+      "loss": 10.3677,
+      "step": 30
+    },
+    {
+      "epoch": 0.1050291159343568,
+      "grad_norm": 0.10570746660232544,
+      "learning_rate": 0.00014862203875857477,
+      "loss": 10.3666,
+      "step": 31
+    },
+    {
+      "epoch": 0.10841715193223928,
+      "grad_norm": 0.1051798090338707,
+      "learning_rate": 0.0001483610700550354,
+      "loss": 10.3678,
+      "step": 32
+    },
+    {
+      "epoch": 0.11180518793012176,
+      "grad_norm": 0.11076612770557404,
+      "learning_rate": 0.00014807775485889264,
+      "loss": 10.366,
+      "step": 33
+    },
+    {
+      "epoch": 0.11519322392800424,
+      "grad_norm": 0.11073443293571472,
+      "learning_rate": 0.0001477721794706997,
+      "loss": 10.3658,
+      "step": 34
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 14554505281536.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null