Training in progress, step 136, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7a0675b0ccff9d3ee1f89a5d3ec1b7f2360c88f112fca496ed1e53a1f95999b
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:d16dfef8d2f27178a4d1f4edab1fa39d16a60f03abb441761d33f24ee556f18e
 size 640009682

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c587490379a5a51115375645f2d5ea52b7a18285f8ceca78cb845b39b459b45e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:29ed09c79b26c5877cb26e4ced9f9a10914f152b078ea4dd8bdd5fffdeeaad84
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.13076024998283084,
   "eval_steps": 50,
-  "global_step": 119,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -864,6 +864,125 @@
       "learning_rate": 6.326741512198266e-05,
       "loss": 0.0,
       "step": 119
     }
   ],
   "logging_steps": 1,
@@ -883,7 +1002,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.974956063351112e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.14944028569466383,
   "eval_steps": 50,
+  "global_step": 136,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.326741512198266e-05,
       "loss": 0.0,
       "step": 119
+    },
+    {
+      "epoch": 0.13185907561293866,
+      "grad_norm": NaN,
+      "learning_rate": 6.197638667498022e-05,
+      "loss": 0.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.1329579012430465,
+      "grad_norm": NaN,
+      "learning_rate": 6.068932534675913e-05,
+      "loss": 0.0,
+      "step": 121
+    },
+    {
+      "epoch": 0.1340567268731543,
+      "grad_norm": NaN,
+      "learning_rate": 5.9406623188668055e-05,
+      "loss": 0.0,
+      "step": 122
+    },
+    {
+      "epoch": 0.13515555250326214,
+      "grad_norm": NaN,
+      "learning_rate": 5.812867092421013e-05,
+      "loss": 0.0,
+      "step": 123
+    },
+    {
+      "epoch": 0.13625437813336996,
+      "grad_norm": NaN,
+      "learning_rate": 5.685585783002493e-05,
+      "loss": 0.0,
+      "step": 124
+    },
+    {
+      "epoch": 0.1373532037634778,
+      "grad_norm": NaN,
+      "learning_rate": 5.558857161731093e-05,
+      "loss": 0.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.1384520293935856,
+      "grad_norm": NaN,
+      "learning_rate": 5.4327198313725064e-05,
+      "loss": 0.0,
+      "step": 126
+    },
+    {
+      "epoch": 0.13955085502369344,
+      "grad_norm": NaN,
+      "learning_rate": 5.307212214579474e-05,
+      "loss": 0.0,
+      "step": 127
+    },
+    {
+      "epoch": 0.14064968065380126,
+      "grad_norm": NaN,
+      "learning_rate": 5.182372542187895e-05,
+      "loss": 0.0,
+      "step": 128
+    },
+    {
+      "epoch": 0.14174850628390906,
+      "grad_norm": NaN,
+      "learning_rate": 5.058238841571326e-05,
+      "loss": 0.0,
+      "step": 129
+    },
+    {
+      "epoch": 0.14284733191401688,
+      "grad_norm": NaN,
+      "learning_rate": 4.934848925057484e-05,
+      "loss": 0.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.1439461575441247,
+      "grad_norm": NaN,
+      "learning_rate": 4.812240378410248e-05,
+      "loss": 0.0,
+      "step": 131
+    },
+    {
+      "epoch": 0.14504498317423253,
+      "grad_norm": NaN,
+      "learning_rate": 4.690450549380659e-05,
+      "loss": 0.0,
+      "step": 132
+    },
+    {
+      "epoch": 0.14614380880434036,
+      "grad_norm": NaN,
+      "learning_rate": 4.569516536330447e-05,
+      "loss": 0.0,
+      "step": 133
+    },
+    {
+      "epoch": 0.14724263443444818,
+      "grad_norm": NaN,
+      "learning_rate": 4.449475176931499e-05,
+      "loss": 0.0,
+      "step": 134
+    },
+    {
+      "epoch": 0.148341460064556,
+      "grad_norm": NaN,
+      "learning_rate": 4.3303630369447554e-05,
+      "loss": 0.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.14944028569466383,
+      "grad_norm": NaN,
+      "learning_rate": 4.212216399081918e-05,
+      "loss": 0.0,
+      "step": 136
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.965978331541668e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null