Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:118c8f4a0300508828f2e88d46204c176479b3e1c99732414232884e7bd4f0aa
 size 578859568

 version https://git-lfs.github.com/spec/v1
+oid sha256:452543d07fdb291d5cff953230d5055c3c2f07f9ace1a5a08eb8c03ff8024649
 size 578859568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:418dfccc8b5c8b256ab0a2b604a7503b16aa1374f1a08b80b34bdb5a8112e300
 size 294324372

 version https://git-lfs.github.com/spec/v1
+oid sha256:577b6226186110858f099544da2576c9d028b63194affe90b3d117f6d328a1d0
 size 294324372

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a024eb23a26bb335e268967056f207c3831f0e4adc54f097ba2f8a926be0960d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9aad1d594dee184a7652a540da097599d5a876408e16a28fe7bd53aa7da90dd5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48de68a65bf60a607751c984fbe709cfb356970dfe50a7f6e3454a1ff0e15e08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ccd6c733a208e149811e7f1cec59782806cc39c64e33a8b91e1933bc027c5e4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8552482724189758,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.0041695332207559365,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.538,
       "eval_steps_per_second": 5.779,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1117,7 +1475,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9760677895864320.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7939153909683228,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0055593776276745815,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.538,
       "eval_steps_per_second": 5.779,
       "step": 150
+    },
+    {
+      "epoch": 0.004197330108894309,
+      "grad_norm": 0.4086618423461914,
+      "learning_rate": 0.0016184643766056315,
+      "loss": 1.0707,
+      "step": 151
+    },
+    {
+      "epoch": 0.004225126997032682,
+      "grad_norm": 0.2321600317955017,
+      "learning_rate": 0.0016134135287043667,
+      "loss": 1.0719,
+      "step": 152
+    },
+    {
+      "epoch": 0.004252923885171055,
+      "grad_norm": 0.09448766708374023,
+      "learning_rate": 0.0016083374657755133,
+      "loss": 1.035,
+      "step": 153
+    },
+    {
+      "epoch": 0.004280720773309428,
+      "grad_norm": 0.0990653857588768,
+      "learning_rate": 0.0016032363964761363,
+      "loss": 0.9306,
+      "step": 154
+    },
+    {
+      "epoch": 0.004308517661447801,
+      "grad_norm": 0.09489451348781586,
+      "learning_rate": 0.001598110530491216,
+      "loss": 0.8499,
+      "step": 155
+    },
+    {
+      "epoch": 0.004336314549586173,
+      "grad_norm": 0.09735696017742157,
+      "learning_rate": 0.0015929600785250257,
+      "loss": 0.8704,
+      "step": 156
+    },
+    {
+      "epoch": 0.004364111437724547,
+      "grad_norm": 0.1042320653796196,
+      "learning_rate": 0.0015877852522924731,
+      "loss": 0.8512,
+      "step": 157
+    },
+    {
+      "epoch": 0.00439190832586292,
+      "grad_norm": 0.12351106107234955,
+      "learning_rate": 0.0015825862645103962,
+      "loss": 0.9413,
+      "step": 158
+    },
+    {
+      "epoch": 0.004419705214001293,
+      "grad_norm": 0.09481119364500046,
+      "learning_rate": 0.0015773633288888196,
+      "loss": 0.8296,
+      "step": 159
+    },
+    {
+      "epoch": 0.0044475021021396655,
+      "grad_norm": 0.105812206864357,
+      "learning_rate": 0.0015721166601221697,
+      "loss": 0.7714,
+      "step": 160
+    },
+    {
+      "epoch": 0.004475298990278038,
+      "grad_norm": 0.11305416375398636,
+      "learning_rate": 0.00156684647388045,
+      "loss": 0.9412,
+      "step": 161
+    },
+    {
+      "epoch": 0.004503095878416411,
+      "grad_norm": 0.10926243662834167,
+      "learning_rate": 0.0015615529868003748,
+      "loss": 0.8682,
+      "step": 162
+    },
+    {
+      "epoch": 0.004530892766554784,
+      "grad_norm": 0.10087363421916962,
+      "learning_rate": 0.0015562364164764648,
+      "loss": 0.8325,
+      "step": 163
+    },
+    {
+      "epoch": 0.004558689654693157,
+      "grad_norm": 0.09206137806177139,
+      "learning_rate": 0.0015508969814521025,
+      "loss": 0.7958,
+      "step": 164
+    },
+    {
+      "epoch": 0.00458648654283153,
+      "grad_norm": 0.11067818850278854,
+      "learning_rate": 0.0015455349012105486,
+      "loss": 1.0522,
+      "step": 165
+    },
+    {
+      "epoch": 0.004614283430969903,
+      "grad_norm": 0.11335323750972748,
+      "learning_rate": 0.0015401503961659203,
+      "loss": 0.9374,
+      "step": 166
+    },
+    {
+      "epoch": 0.004642080319108276,
+      "grad_norm": 0.1060660108923912,
+      "learning_rate": 0.0015347436876541297,
+      "loss": 0.9861,
+      "step": 167
+    },
+    {
+      "epoch": 0.004669877207246649,
+      "grad_norm": 0.12663975358009338,
+      "learning_rate": 0.0015293149979237874,
+      "loss": 0.7834,
+      "step": 168
+    },
+    {
+      "epoch": 0.004697674095385022,
+      "grad_norm": 0.1315016895532608,
+      "learning_rate": 0.0015238645501270654,
+      "loss": 0.9251,
+      "step": 169
+    },
+    {
+      "epoch": 0.0047254709835233945,
+      "grad_norm": 0.16219031810760498,
+      "learning_rate": 0.0015183925683105253,
+      "loss": 0.8951,
+      "step": 170
+    },
+    {
+      "epoch": 0.004753267871661767,
+      "grad_norm": 0.1081567108631134,
+      "learning_rate": 0.0015128992774059062,
+      "loss": 0.9457,
+      "step": 171
+    },
+    {
+      "epoch": 0.00478106475980014,
+      "grad_norm": 0.1203177273273468,
+      "learning_rate": 0.0015073849032208823,
+      "loss": 0.9307,
+      "step": 172
+    },
+    {
+      "epoch": 0.004808861647938513,
+      "grad_norm": 0.11848781257867813,
+      "learning_rate": 0.0015018496724297776,
+      "loss": 0.8301,
+      "step": 173
+    },
+    {
+      "epoch": 0.004836658536076886,
+      "grad_norm": 0.13573849201202393,
+      "learning_rate": 0.0014962938125642501,
+      "loss": 0.723,
+      "step": 174
+    },
+    {
+      "epoch": 0.0048644554242152594,
+      "grad_norm": 0.10743360966444016,
+      "learning_rate": 0.001490717552003938,
+      "loss": 0.7861,
+      "step": 175
+    },
+    {
+      "epoch": 0.004892252312353632,
+      "grad_norm": 0.10989518463611603,
+      "learning_rate": 0.001485121119967072,
+      "loss": 0.7905,
+      "step": 176
+    },
+    {
+      "epoch": 0.004920049200492005,
+      "grad_norm": 0.10301119089126587,
+      "learning_rate": 0.001479504746501054,
+      "loss": 0.6952,
+      "step": 177
+    },
+    {
+      "epoch": 0.004947846088630378,
+      "grad_norm": 0.12021514773368835,
+      "learning_rate": 0.0014738686624729987,
+      "loss": 0.782,
+      "step": 178
+    },
+    {
+      "epoch": 0.004975642976768751,
+      "grad_norm": 0.11153309047222137,
+      "learning_rate": 0.0014682130995602458,
+      "loss": 0.8065,
+      "step": 179
+    },
+    {
+      "epoch": 0.0050034398649071235,
+      "grad_norm": 0.11229830235242844,
+      "learning_rate": 0.0014625382902408355,
+      "loss": 0.7113,
+      "step": 180
+    },
+    {
+      "epoch": 0.005031236753045496,
+      "grad_norm": 0.11783714592456818,
+      "learning_rate": 0.0014568444677839517,
+      "loss": 0.6218,
+      "step": 181
+    },
+    {
+      "epoch": 0.005059033641183869,
+      "grad_norm": 0.12635229527950287,
+      "learning_rate": 0.0014511318662403345,
+      "loss": 0.6719,
+      "step": 182
+    },
+    {
+      "epoch": 0.005086830529322243,
+      "grad_norm": 0.12254615128040314,
+      "learning_rate": 0.0014454007204326592,
+      "loss": 0.7263,
+      "step": 183
+    },
+    {
+      "epoch": 0.005114627417460616,
+      "grad_norm": 0.11896725744009018,
+      "learning_rate": 0.0014396512659458822,
+      "loss": 0.8887,
+      "step": 184
+    },
+    {
+      "epoch": 0.005142424305598988,
+      "grad_norm": 0.10813646763563156,
+      "learning_rate": 0.0014338837391175581,
+      "loss": 0.6242,
+      "step": 185
+    },
+    {
+      "epoch": 0.005170221193737361,
+      "grad_norm": 0.10913447290658951,
+      "learning_rate": 0.0014280983770281258,
+      "loss": 0.5932,
+      "step": 186
+    },
+    {
+      "epoch": 0.005198018081875734,
+      "grad_norm": 0.11929253488779068,
+      "learning_rate": 0.00142229541749116,
+      "loss": 0.7183,
+      "step": 187
+    },
+    {
+      "epoch": 0.005225814970014107,
+      "grad_norm": 0.1152404397726059,
+      "learning_rate": 0.001416475099043599,
+      "loss": 0.7041,
+      "step": 188
+    },
+    {
+      "epoch": 0.00525361185815248,
+      "grad_norm": 0.11904522776603699,
+      "learning_rate": 0.001410637660935938,
+      "loss": 0.7405,
+      "step": 189
+    },
+    {
+      "epoch": 0.0052814087462908525,
+      "grad_norm": 0.10983660072088242,
+      "learning_rate": 0.0014047833431223937,
+      "loss": 0.7169,
+      "step": 190
+    },
+    {
+      "epoch": 0.005309205634429225,
+      "grad_norm": 0.1285189539194107,
+      "learning_rate": 0.0013989123862510418,
+      "loss": 0.7584,
+      "step": 191
+    },
+    {
+      "epoch": 0.005337002522567599,
+      "grad_norm": 0.13100877404212952,
+      "learning_rate": 0.0013930250316539236,
+      "loss": 0.7606,
+      "step": 192
+    },
+    {
+      "epoch": 0.005364799410705972,
+      "grad_norm": 0.14966635406017303,
+      "learning_rate": 0.0013871215213371283,
+      "loss": 0.5972,
+      "step": 193
+    },
+    {
+      "epoch": 0.005392596298844345,
+      "grad_norm": 0.13933199644088745,
+      "learning_rate": 0.0013812020979708417,
+      "loss": 0.5656,
+      "step": 194
+    },
+    {
+      "epoch": 0.005420393186982717,
+      "grad_norm": 0.13888134062290192,
+      "learning_rate": 0.0013752670048793744,
+      "loss": 0.7759,
+      "step": 195
+    },
+    {
+      "epoch": 0.00544819007512109,
+      "grad_norm": 0.14111551642417908,
+      "learning_rate": 0.0013693164860311565,
+      "loss": 0.7513,
+      "step": 196
+    },
+    {
+      "epoch": 0.005475986963259463,
+      "grad_norm": 0.16336259245872498,
+      "learning_rate": 0.0013633507860287115,
+      "loss": 0.7926,
+      "step": 197
+    },
+    {
+      "epoch": 0.005503783851397836,
+      "grad_norm": 0.1474616378545761,
+      "learning_rate": 0.0013573701500986012,
+      "loss": 0.5559,
+      "step": 198
+    },
+    {
+      "epoch": 0.005531580739536209,
+      "grad_norm": 0.18535536527633667,
+      "learning_rate": 0.001351374824081343,
+      "loss": 0.7313,
+      "step": 199
+    },
+    {
+      "epoch": 0.0055593776276745815,
+      "grad_norm": 0.1758536696434021,
+      "learning_rate": 0.0013453650544213076,
+      "loss": 0.606,
+      "step": 200
+    },
+    {
+      "epoch": 0.0055593776276745815,
+      "eval_loss": 0.7939153909683228,
+      "eval_runtime": 50.0064,
+      "eval_samples_per_second": 11.539,
+      "eval_steps_per_second": 5.779,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.2931550052089856e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null