Training in progress, step 51, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1b54cccbbed2f75eaef804b9662f6ef8e1c82290e1144dee6ea63fb77aeb26f
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:2690da81d83e89e9ee7b7dc7113de883b592b46176c59f118aaff5ad1b1c42cc
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90bcb817a546831a005a9504935df07250d9cc27fd981d0add1cf5aa973ee4f9
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:580ef17b80875f7190859d2da30842b5c7fe663cfb653f5c46ee771354997856
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7abcd57538b0647768495b9aeb3b444e111ce136e810d289cc8fe830dca41b95
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:02f54eb84b382db409dccc97cebf46240cf5b6285939222273bf59b8c0558286
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e1983b20d7ce0214623b79adb071ed1f5c168cabcab4cc0ff2c0c61c63ddce9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c86702c0a3caad6c51746e54805a7289de03dff9cc5abc148a58966cf1f4d339
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.050057510927076145,
   "eval_steps": 50,
-  "global_step": 34,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -253,6 +253,133 @@
       "learning_rate": 0.0001477721794706997,
       "loss": 0.0717,
       "step": 34
     }
   ],
   "logging_steps": 1,
@@ -272,7 +399,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.597635405563822e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07508626639061422,
   "eval_steps": 50,
+  "global_step": 51,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001477721794706997,
       "loss": 0.0717,
       "step": 34
+    },
+    {
+      "epoch": 0.05152979066022544,
+      "grad_norm": 0.15129899978637695,
+      "learning_rate": 0.0001474444369716801,
+      "loss": 0.1103,
+      "step": 35
+    },
+    {
+      "epoch": 0.05300207039337474,
+      "grad_norm": 0.14446967840194702,
+      "learning_rate": 0.0001470946271953739,
+      "loss": 0.1674,
+      "step": 36
+    },
+    {
+      "epoch": 0.05447435012652404,
+      "grad_norm": 0.10094312578439713,
+      "learning_rate": 0.00014672285669722765,
+      "loss": 0.0696,
+      "step": 37
+    },
+    {
+      "epoch": 0.05594662985967334,
+      "grad_norm": 0.17120350897312164,
+      "learning_rate": 0.00014632923872213652,
+      "loss": 0.2139,
+      "step": 38
+    },
+    {
+      "epoch": 0.057418909592822635,
+      "grad_norm": 0.14146435260772705,
+      "learning_rate": 0.00014591389316994876,
+      "loss": 0.0925,
+      "step": 39
+    },
+    {
+      "epoch": 0.058891189325971934,
+      "grad_norm": 0.14251448214054108,
+      "learning_rate": 0.0001454769465589431,
+      "loss": 0.1002,
+      "step": 40
+    },
+    {
+      "epoch": 0.06036346905912123,
+      "grad_norm": 0.07004090398550034,
+      "learning_rate": 0.00014501853198729012,
+      "loss": 0.0538,
+      "step": 41
+    },
+    {
+      "epoch": 0.06183574879227053,
+      "grad_norm": 0.14318852126598358,
+      "learning_rate": 0.00014453878909250904,
+      "loss": 0.1316,
+      "step": 42
+    },
+    {
+      "epoch": 0.06330802852541983,
+      "grad_norm": 0.10623105615377426,
+      "learning_rate": 0.00014403786400893302,
+      "loss": 0.0866,
+      "step": 43
+    },
+    {
+      "epoch": 0.06478030825856913,
+      "grad_norm": 0.10893028974533081,
+      "learning_rate": 0.00014351590932319504,
+      "loss": 0.0518,
+      "step": 44
+    },
+    {
+      "epoch": 0.06625258799171843,
+      "grad_norm": 0.1411529928445816,
+      "learning_rate": 0.00014297308402774875,
+      "loss": 0.1357,
+      "step": 45
+    },
+    {
+      "epoch": 0.06772486772486773,
+      "grad_norm": 0.10105417668819427,
+      "learning_rate": 0.0001424095534724375,
+      "loss": 0.0654,
+      "step": 46
+    },
+    {
+      "epoch": 0.06919714745801703,
+      "grad_norm": 0.14420634508132935,
+      "learning_rate": 0.00014182548931412757,
+      "loss": 0.0935,
+      "step": 47
+    },
+    {
+      "epoch": 0.07066942719116633,
+      "grad_norm": 0.12569449841976166,
+      "learning_rate": 0.0001412210694644195,
+      "loss": 0.0848,
+      "step": 48
+    },
+    {
+      "epoch": 0.07214170692431562,
+      "grad_norm": 0.09209802001714706,
+      "learning_rate": 0.00014059647803545467,
+      "loss": 0.0473,
+      "step": 49
+    },
+    {
+      "epoch": 0.07361398665746492,
+      "grad_norm": 0.12560804188251495,
+      "learning_rate": 0.0001399519052838329,
+      "loss": 0.0785,
+      "step": 50
+    },
+    {
+      "epoch": 0.07361398665746492,
+      "eval_loss": 0.08997488021850586,
+      "eval_runtime": 784.5444,
+      "eval_samples_per_second": 2.916,
+      "eval_steps_per_second": 1.458,
+      "step": 50
+    },
+    {
+      "epoch": 0.07508626639061422,
+      "grad_norm": 0.15386323630809784,
+      "learning_rate": 0.00013928754755265842,
+      "loss": 0.1427,
+      "step": 51
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.401115673671762e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null