Training in progress, step 150, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +256 -4

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2186e8979e7e1ae275f2d28a42525f59945d051a71c1387da3064f1a1661f55
 size 37965300

 version https://git-lfs.github.com/spec/v1
+oid sha256:c176d54ad21da2583d0c1b8a23b42d5d4f5d087533579337f01ee622178a9065
 size 37965300

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fba86ea897f6fd3fff9c155539f11b4bf21fdc8ffe2c6459031ed6181eefd5f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a39d244929ff4d03b82e59d530f09311a95ad8bbc413f0e1a16b9f12428b3e0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d35a71cd96ab153cf58aa68810dc4405b5e1a7fbfef5501d233ae84e34aa51c4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:285055c203c0e51e433ff14d6fa6e0c364698ec978202e506a844a8372901f4a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.20221729490022172,
   "eval_steps": 38,
-  "global_step": 114,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -837,6 +837,258 @@
       "eval_samples_per_second": 9.685,
       "eval_steps_per_second": 4.842,
       "step": 114
     }
   ],
   "logging_steps": 1,
@@ -851,12 +1103,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3722866184945664.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2660753880266075,
   "eval_steps": 38,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.685,
       "eval_steps_per_second": 4.842,
       "step": 114
+    },
+    {
+      "epoch": 0.2039911308203991,
+      "grad_norm": NaN,
+      "learning_rate": 5.857864376269051e-05,
+      "loss": 0.0,
+      "step": 115
+    },
+    {
+      "epoch": 0.2057649667405765,
+      "grad_norm": NaN,
+      "learning_rate": 5.544102723452171e-05,
+      "loss": 0.0,
+      "step": 116
+    },
+    {
+      "epoch": 0.2075388026607539,
+      "grad_norm": NaN,
+      "learning_rate": 5.237620050253189e-05,
+      "loss": 0.0,
+      "step": 117
+    },
+    {
+      "epoch": 0.20931263858093127,
+      "grad_norm": NaN,
+      "learning_rate": 4.938570679927783e-05,
+      "loss": 0.0,
+      "step": 118
+    },
+    {
+      "epoch": 0.21108647450110865,
+      "grad_norm": NaN,
+      "learning_rate": 4.647105192839778e-05,
+      "loss": 0.0,
+      "step": 119
+    },
+    {
+      "epoch": 0.21286031042128603,
+      "grad_norm": NaN,
+      "learning_rate": 4.363370350639404e-05,
+      "loss": 0.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.2146341463414634,
+      "grad_norm": NaN,
+      "learning_rate": 4.087509022364382e-05,
+      "loss": 0.0,
+      "step": 121
+    },
+    {
+      "epoch": 0.2164079822616408,
+      "grad_norm": NaN,
+      "learning_rate": 3.819660112501053e-05,
+      "loss": 0.0,
+      "step": 122
+    },
+    {
+      "epoch": 0.21818181818181817,
+      "grad_norm": NaN,
+      "learning_rate": 3.5599584910418035e-05,
+      "loss": 0.0,
+      "step": 123
+    },
+    {
+      "epoch": 0.21995565410199558,
+      "grad_norm": NaN,
+      "learning_rate": 3.3085349255739474e-05,
+      "loss": 0.0,
+      "step": 124
+    },
+    {
+      "epoch": 0.22172949002217296,
+      "grad_norm": NaN,
+      "learning_rate": 3.0655160154343174e-05,
+      "loss": 0.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.22350332594235034,
+      "grad_norm": NaN,
+      "learning_rate": 2.831024127962678e-05,
+      "loss": 0.0,
+      "step": 126
+    },
+    {
+      "epoch": 0.22527716186252772,
+      "grad_norm": NaN,
+      "learning_rate": 2.6051773368860934e-05,
+      "loss": 0.0,
+      "step": 127
+    },
+    {
+      "epoch": 0.2270509977827051,
+      "grad_norm": NaN,
+      "learning_rate": 2.38808936286524e-05,
+      "loss": 0.0,
+      "step": 128
+    },
+    {
+      "epoch": 0.22882483370288248,
+      "grad_norm": NaN,
+      "learning_rate": 2.1798695162326442e-05,
+      "loss": 0.0,
+      "step": 129
+    },
+    {
+      "epoch": 0.23059866962305986,
+      "grad_norm": NaN,
+      "learning_rate": 1.9806226419516192e-05,
+      "loss": 0.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.23237250554323724,
+      "grad_norm": NaN,
+      "learning_rate": 1.790449066823683e-05,
+      "loss": 0.0,
+      "step": 131
+    },
+    {
+      "epoch": 0.23414634146341465,
+      "grad_norm": NaN,
+      "learning_rate": 1.6094445489709885e-05,
+      "loss": 0.0,
+      "step": 132
+    },
+    {
+      "epoch": 0.23592017738359203,
+      "grad_norm": NaN,
+      "learning_rate": 1.4377002296192233e-05,
+      "loss": 0.0,
+      "step": 133
+    },
+    {
+      "epoch": 0.2376940133037694,
+      "grad_norm": NaN,
+      "learning_rate": 1.275302587205256e-05,
+      "loss": 0.0,
+      "step": 134
+    },
+    {
+      "epoch": 0.2394678492239468,
+      "grad_norm": NaN,
+      "learning_rate": 1.1223333938326485e-05,
+      "loss": 0.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.24124168514412417,
+      "grad_norm": NaN,
+      "learning_rate": 9.788696740969295e-06,
+      "loss": 0.0,
+      "step": 136
+    },
+    {
+      "epoch": 0.24301552106430155,
+      "grad_norm": NaN,
+      "learning_rate": 8.44983666301391e-06,
+      "loss": 0.0,
+      "step": 137
+    },
+    {
+      "epoch": 0.24478935698447893,
+      "grad_norm": NaN,
+      "learning_rate": 7.2074278608293525e-06,
+      "loss": 0.0,
+      "step": 138
+    },
+    {
+      "epoch": 0.2465631929046563,
+      "grad_norm": NaN,
+      "learning_rate": 6.062095924662625e-06,
+      "loss": 0.0,
+      "step": 139
+    },
+    {
+      "epoch": 0.24833702882483372,
+      "grad_norm": NaN,
+      "learning_rate": 5.0144175636352765e-06,
+      "loss": 0.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.2501108647450111,
+      "grad_norm": NaN,
+      "learning_rate": 4.064920315352904e-06,
+      "loss": 0.0,
+      "step": 141
+    },
+    {
+      "epoch": 0.2518847006651885,
+      "grad_norm": NaN,
+      "learning_rate": 3.2140822802740668e-06,
+      "loss": 0.0,
+      "step": 142
+    },
+    {
+      "epoch": 0.25365853658536586,
+      "grad_norm": NaN,
+      "learning_rate": 2.462331880972468e-06,
+      "loss": 0.0,
+      "step": 143
+    },
+    {
+      "epoch": 0.25543237250554324,
+      "grad_norm": NaN,
+      "learning_rate": 1.81004764641306e-06,
+      "loss": 0.0,
+      "step": 144
+    },
+    {
+      "epoch": 0.2572062084257206,
+      "grad_norm": NaN,
+      "learning_rate": 1.2575580213514792e-06,
+      "loss": 0.0,
+      "step": 145
+    },
+    {
+      "epoch": 0.258980044345898,
+      "grad_norm": NaN,
+      "learning_rate": 8.051412009521864e-07,
+      "loss": 0.0,
+      "step": 146
+    },
+    {
+      "epoch": 0.2607538802660754,
+      "grad_norm": NaN,
+      "learning_rate": 4.530249907087836e-07,
+      "loss": 0.0,
+      "step": 147
+    },
+    {
+      "epoch": 0.26252771618625276,
+      "grad_norm": NaN,
+      "learning_rate": 2.0138669173708213e-07,
+      "loss": 0.0,
+      "step": 148
+    },
+    {
+      "epoch": 0.26430155210643014,
+      "grad_norm": NaN,
+      "learning_rate": 5.035301149869387e-08,
+      "loss": 0.0,
+      "step": 149
+    },
+    {
+      "epoch": 0.2660753880266075,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 0.0,
+      "step": 150
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4898508138086400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null