Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15fc94388fdfec3ba6d5ce9fa605f721e69a2e4f9daa171c615a7c27718fced9
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:39ccc2f42deac5cabdb2dbb32596741725438d1790378482ff3ec4b8a65c5dfb
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c24ba66412dc811038bf53ce3271486bdfef01215f52265ad0303db582357171
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:3718ec66c1c52e26885734efb8882f43c41d83eb0209c52528cfcfbf8fb5d57c
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d1f67cea69fd8539e383cea731f10417fc8d962637ffeb0b80577963c78cc1f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8949f40459fe18a0d3bf9267b2538c644f6dc3382efa243a183debe2a69f18bf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7e9f0c2a27af03f3c1874438820d046de94b36aaec3b0cc778f96def4616314
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:530505d607699f384741067a5f9139d72f043713adb680898a3f1b5714170c97
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.087826132774353,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.06532745386248572,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,714 @@
       "eval_samples_per_second": 13.315,
       "eval_steps_per_second": 3.331,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -751,7 +1459,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.453090731542446e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.0087602138519287,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.13065490772497143,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.315,
       "eval_steps_per_second": 3.331,
       "step": 100
+    },
+    {
+      "epoch": 0.06598072840111056,
+      "grad_norm": 6.478489875793457,
+      "learning_rate": 9.31367192988896e-05,
+      "loss": 4.3501,
+      "step": 101
+    },
+    {
+      "epoch": 0.06663400293973543,
+      "grad_norm": 4.0702080726623535,
+      "learning_rate": 9.297032057507264e-05,
+      "loss": 4.2697,
+      "step": 102
+    },
+    {
+      "epoch": 0.06728727747836027,
+      "grad_norm": 2.496466875076294,
+      "learning_rate": 9.280208114573859e-05,
+      "loss": 3.8445,
+      "step": 103
+    },
+    {
+      "epoch": 0.06794055201698514,
+      "grad_norm": 2.428135633468628,
+      "learning_rate": 9.263200821770461e-05,
+      "loss": 4.0675,
+      "step": 104
+    },
+    {
+      "epoch": 0.06859382655561,
+      "grad_norm": 2.5381500720977783,
+      "learning_rate": 9.246010907632895e-05,
+      "loss": 4.223,
+      "step": 105
+    },
+    {
+      "epoch": 0.06924710109423485,
+      "grad_norm": 2.4143152236938477,
+      "learning_rate": 9.228639108519868e-05,
+      "loss": 3.9627,
+      "step": 106
+    },
+    {
+      "epoch": 0.06990037563285971,
+      "grad_norm": 2.6589813232421875,
+      "learning_rate": 9.211086168581433e-05,
+      "loss": 4.0724,
+      "step": 107
+    },
+    {
+      "epoch": 0.07055365017148457,
+      "grad_norm": 2.906999349594116,
+      "learning_rate": 9.193352839727121e-05,
+      "loss": 4.0853,
+      "step": 108
+    },
+    {
+      "epoch": 0.07120692471010942,
+      "grad_norm": 2.604168653488159,
+      "learning_rate": 9.175439881593716e-05,
+      "loss": 3.9059,
+      "step": 109
+    },
+    {
+      "epoch": 0.07186019924873428,
+      "grad_norm": 2.6407856941223145,
+      "learning_rate": 9.157348061512727e-05,
+      "loss": 4.1648,
+      "step": 110
+    },
+    {
+      "epoch": 0.07251347378735913,
+      "grad_norm": 2.5845110416412354,
+      "learning_rate": 9.139078154477512e-05,
+      "loss": 3.9705,
+      "step": 111
+    },
+    {
+      "epoch": 0.073166748325984,
+      "grad_norm": 2.725156784057617,
+      "learning_rate": 9.120630943110077e-05,
+      "loss": 3.8773,
+      "step": 112
+    },
+    {
+      "epoch": 0.07382002286460886,
+      "grad_norm": 2.776826858520508,
+      "learning_rate": 9.102007217627568e-05,
+      "loss": 4.1886,
+      "step": 113
+    },
+    {
+      "epoch": 0.0744732974032337,
+      "grad_norm": 2.7510571479797363,
+      "learning_rate": 9.083207775808396e-05,
+      "loss": 3.8013,
+      "step": 114
+    },
+    {
+      "epoch": 0.07512657194185857,
+      "grad_norm": 2.9689526557922363,
+      "learning_rate": 9.064233422958077e-05,
+      "loss": 3.8871,
+      "step": 115
+    },
+    {
+      "epoch": 0.07577984648048343,
+      "grad_norm": 2.9751687049865723,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 4.0687,
+      "step": 116
+    },
+    {
+      "epoch": 0.07643312101910828,
+      "grad_norm": 3.208277702331543,
+      "learning_rate": 9.025763242814291e-05,
+      "loss": 4.0952,
+      "step": 117
+    },
+    {
+      "epoch": 0.07708639555773314,
+      "grad_norm": 2.978297233581543,
+      "learning_rate": 9.006269063455304e-05,
+      "loss": 3.7542,
+      "step": 118
+    },
+    {
+      "epoch": 0.07773967009635799,
+      "grad_norm": 3.079564332962036,
+      "learning_rate": 8.986603268863536e-05,
+      "loss": 3.7339,
+      "step": 119
+    },
+    {
+      "epoch": 0.07839294463498285,
+      "grad_norm": 3.3465781211853027,
+      "learning_rate": 8.966766701456177e-05,
+      "loss": 4.1308,
+      "step": 120
+    },
+    {
+      "epoch": 0.07904621917360771,
+      "grad_norm": 3.38362717628479,
+      "learning_rate": 8.94676021096575e-05,
+      "loss": 3.6047,
+      "step": 121
+    },
+    {
+      "epoch": 0.07969949371223256,
+      "grad_norm": 3.4256107807159424,
+      "learning_rate": 8.926584654403724e-05,
+      "loss": 4.0278,
+      "step": 122
+    },
+    {
+      "epoch": 0.08035276825085742,
+      "grad_norm": 3.728395700454712,
+      "learning_rate": 8.906240896023794e-05,
+      "loss": 3.5618,
+      "step": 123
+    },
+    {
+      "epoch": 0.08100604278948229,
+      "grad_norm": 4.323002338409424,
+      "learning_rate": 8.885729807284856e-05,
+      "loss": 3.7879,
+      "step": 124
+    },
+    {
+      "epoch": 0.08165931732810713,
+      "grad_norm": 4.338598728179932,
+      "learning_rate": 8.865052266813685e-05,
+      "loss": 3.4469,
+      "step": 125
+    },
+    {
+      "epoch": 0.082312591866732,
+      "grad_norm": 4.841395854949951,
+      "learning_rate": 8.844209160367299e-05,
+      "loss": 3.6624,
+      "step": 126
+    },
+    {
+      "epoch": 0.08296586640535684,
+      "grad_norm": 5.293141841888428,
+      "learning_rate": 8.823201380795001e-05,
+      "loss": 3.6083,
+      "step": 127
+    },
+    {
+      "epoch": 0.0836191409439817,
+      "grad_norm": 6.072646141052246,
+      "learning_rate": 8.802029828000156e-05,
+      "loss": 3.7721,
+      "step": 128
+    },
+    {
+      "epoch": 0.08427241548260657,
+      "grad_norm": 5.880528450012207,
+      "learning_rate": 8.780695408901613e-05,
+      "loss": 3.6489,
+      "step": 129
+    },
+    {
+      "epoch": 0.08492569002123142,
+      "grad_norm": 6.2913055419921875,
+      "learning_rate": 8.759199037394887e-05,
+      "loss": 4.0706,
+      "step": 130
+    },
+    {
+      "epoch": 0.08557896455985628,
+      "grad_norm": 6.430176258087158,
+      "learning_rate": 8.737541634312985e-05,
+      "loss": 3.6765,
+      "step": 131
+    },
+    {
+      "epoch": 0.08623223909848114,
+      "grad_norm": 6.8034987449646,
+      "learning_rate": 8.715724127386972e-05,
+      "loss": 3.7984,
+      "step": 132
+    },
+    {
+      "epoch": 0.08688551363710599,
+      "grad_norm": 8.133237838745117,
+      "learning_rate": 8.693747451206232e-05,
+      "loss": 3.7513,
+      "step": 133
+    },
+    {
+      "epoch": 0.08753878817573085,
+      "grad_norm": 9.387150764465332,
+      "learning_rate": 8.671612547178428e-05,
+      "loss": 3.943,
+      "step": 134
+    },
+    {
+      "epoch": 0.0881920627143557,
+      "grad_norm": 9.971433639526367,
+      "learning_rate": 8.649320363489179e-05,
+      "loss": 3.5509,
+      "step": 135
+    },
+    {
+      "epoch": 0.08884533725298056,
+      "grad_norm": 8.350509643554688,
+      "learning_rate": 8.626871855061438e-05,
+      "loss": 2.8905,
+      "step": 136
+    },
+    {
+      "epoch": 0.08949861179160543,
+      "grad_norm": 10.003087043762207,
+      "learning_rate": 8.604267983514594e-05,
+      "loss": 3.369,
+      "step": 137
+    },
+    {
+      "epoch": 0.09015188633023027,
+      "grad_norm": 11.017991065979004,
+      "learning_rate": 8.581509717123273e-05,
+      "loss": 3.826,
+      "step": 138
+    },
+    {
+      "epoch": 0.09080516086885514,
+      "grad_norm": 11.706796646118164,
+      "learning_rate": 8.558598030775857e-05,
+      "loss": 2.9855,
+      "step": 139
+    },
+    {
+      "epoch": 0.09145843540748,
+      "grad_norm": 12.903501510620117,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 2.6285,
+      "step": 140
+    },
+    {
+      "epoch": 0.09211170994610485,
+      "grad_norm": 12.504664421081543,
+      "learning_rate": 8.51231833058426e-05,
+      "loss": 3.0194,
+      "step": 141
+    },
+    {
+      "epoch": 0.09276498448472971,
+      "grad_norm": 14.484477043151855,
+      "learning_rate": 8.488952299208401e-05,
+      "loss": 3.1958,
+      "step": 142
+    },
+    {
+      "epoch": 0.09341825902335456,
+      "grad_norm": 17.707530975341797,
+      "learning_rate": 8.46543681272818e-05,
+      "loss": 3.957,
+      "step": 143
+    },
+    {
+      "epoch": 0.09407153356197942,
+      "grad_norm": 20.62574005126953,
+      "learning_rate": 8.44177287846877e-05,
+      "loss": 3.2235,
+      "step": 144
+    },
+    {
+      "epoch": 0.09472480810060428,
+      "grad_norm": 18.29495620727539,
+      "learning_rate": 8.417961510114356e-05,
+      "loss": 2.3518,
+      "step": 145
+    },
+    {
+      "epoch": 0.09537808263922913,
+      "grad_norm": 28.382152557373047,
+      "learning_rate": 8.39400372766471e-05,
+      "loss": 4.1565,
+      "step": 146
+    },
+    {
+      "epoch": 0.096031357177854,
+      "grad_norm": 18.329111099243164,
+      "learning_rate": 8.36990055739149e-05,
+      "loss": 2.1261,
+      "step": 147
+    },
+    {
+      "epoch": 0.09668463171647886,
+      "grad_norm": 18.869550704956055,
+      "learning_rate": 8.345653031794292e-05,
+      "loss": 2.6734,
+      "step": 148
+    },
+    {
+      "epoch": 0.0973379062551037,
+      "grad_norm": 23.500282287597656,
+      "learning_rate": 8.321262189556409e-05,
+      "loss": 3.4861,
+      "step": 149
+    },
+    {
+      "epoch": 0.09799118079372857,
+      "grad_norm": 38.71291732788086,
+      "learning_rate": 8.296729075500344e-05,
+      "loss": 5.1638,
+      "step": 150
+    },
+    {
+      "epoch": 0.09864445533235343,
+      "grad_norm": 3.8299853801727295,
+      "learning_rate": 8.272054740543052e-05,
+      "loss": 4.0414,
+      "step": 151
+    },
+    {
+      "epoch": 0.09929772987097828,
+      "grad_norm": 3.310225248336792,
+      "learning_rate": 8.247240241650918e-05,
+      "loss": 4.6365,
+      "step": 152
+    },
+    {
+      "epoch": 0.09995100440960314,
+      "grad_norm": 2.5045037269592285,
+      "learning_rate": 8.222286641794488e-05,
+      "loss": 4.4531,
+      "step": 153
+    },
+    {
+      "epoch": 0.10060427894822799,
+      "grad_norm": 2.3877153396606445,
+      "learning_rate": 8.197195009902924e-05,
+      "loss": 4.7702,
+      "step": 154
+    },
+    {
+      "epoch": 0.10125755348685285,
+      "grad_norm": 2.4667954444885254,
+      "learning_rate": 8.171966420818228e-05,
+      "loss": 4.1018,
+      "step": 155
+    },
+    {
+      "epoch": 0.10191082802547771,
+      "grad_norm": 2.424144983291626,
+      "learning_rate": 8.146601955249188e-05,
+      "loss": 4.1768,
+      "step": 156
+    },
+    {
+      "epoch": 0.10256410256410256,
+      "grad_norm": 2.476624011993408,
+      "learning_rate": 8.121102699725089e-05,
+      "loss": 3.963,
+      "step": 157
+    },
+    {
+      "epoch": 0.10321737710272742,
+      "grad_norm": 2.590161085128784,
+      "learning_rate": 8.095469746549172e-05,
+      "loss": 3.9657,
+      "step": 158
+    },
+    {
+      "epoch": 0.10387065164135229,
+      "grad_norm": 2.8294732570648193,
+      "learning_rate": 8.069704193751832e-05,
+      "loss": 4.19,
+      "step": 159
+    },
+    {
+      "epoch": 0.10452392617997713,
+      "grad_norm": 2.526905059814453,
+      "learning_rate": 8.043807145043604e-05,
+      "loss": 3.7846,
+      "step": 160
+    },
+    {
+      "epoch": 0.105177200718602,
+      "grad_norm": 2.666771411895752,
+      "learning_rate": 8.017779709767858e-05,
+      "loss": 4.2755,
+      "step": 161
+    },
+    {
+      "epoch": 0.10583047525722684,
+      "grad_norm": 2.6683263778686523,
+      "learning_rate": 7.991623002853296e-05,
+      "loss": 3.8973,
+      "step": 162
+    },
+    {
+      "epoch": 0.1064837497958517,
+      "grad_norm": 2.753737688064575,
+      "learning_rate": 7.965338144766186e-05,
+      "loss": 4.0508,
+      "step": 163
+    },
+    {
+      "epoch": 0.10713702433447657,
+      "grad_norm": 2.6762168407440186,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 3.8199,
+      "step": 164
+    },
+    {
+      "epoch": 0.10779029887310142,
+      "grad_norm": 2.7907073497772217,
+      "learning_rate": 7.912388484339012e-05,
+      "loss": 4.0572,
+      "step": 165
+    },
+    {
+      "epoch": 0.10844357341172628,
+      "grad_norm": 2.9300825595855713,
+      "learning_rate": 7.88572595018617e-05,
+      "loss": 3.85,
+      "step": 166
+    },
+    {
+      "epoch": 0.10909684795035114,
+      "grad_norm": 2.946000337600708,
+      "learning_rate": 7.858939801138061e-05,
+      "loss": 4.0084,
+      "step": 167
+    },
+    {
+      "epoch": 0.10975012248897599,
+      "grad_norm": 3.150700569152832,
+      "learning_rate": 7.832031184624164e-05,
+      "loss": 4.0213,
+      "step": 168
+    },
+    {
+      "epoch": 0.11040339702760085,
+      "grad_norm": 3.130871057510376,
+      "learning_rate": 7.80500125332005e-05,
+      "loss": 3.7754,
+      "step": 169
+    },
+    {
+      "epoch": 0.1110566715662257,
+      "grad_norm": 3.2514853477478027,
+      "learning_rate": 7.777851165098012e-05,
+      "loss": 3.8823,
+      "step": 170
+    },
+    {
+      "epoch": 0.11170994610485056,
+      "grad_norm": 3.6097805500030518,
+      "learning_rate": 7.750582082977467e-05,
+      "loss": 3.8464,
+      "step": 171
+    },
+    {
+      "epoch": 0.11236322064347543,
+      "grad_norm": 3.4300525188446045,
+      "learning_rate": 7.723195175075136e-05,
+      "loss": 3.587,
+      "step": 172
+    },
+    {
+      "epoch": 0.11301649518210027,
+      "grad_norm": 3.962552070617676,
+      "learning_rate": 7.695691614555003e-05,
+      "loss": 3.9351,
+      "step": 173
+    },
+    {
+      "epoch": 0.11366976972072514,
+      "grad_norm": 3.7221381664276123,
+      "learning_rate": 7.668072579578058e-05,
+      "loss": 3.7333,
+      "step": 174
+    },
+    {
+      "epoch": 0.11432304425935,
+      "grad_norm": 3.8638551235198975,
+      "learning_rate": 7.64033925325184e-05,
+      "loss": 3.2305,
+      "step": 175
+    },
+    {
+      "epoch": 0.11497631879797485,
+      "grad_norm": 4.228523254394531,
+      "learning_rate": 7.612492823579745e-05,
+      "loss": 3.7991,
+      "step": 176
+    },
+    {
+      "epoch": 0.11562959333659971,
+      "grad_norm": 4.375904083251953,
+      "learning_rate": 7.584534483410137e-05,
+      "loss": 3.3348,
+      "step": 177
+    },
+    {
+      "epoch": 0.11628286787522456,
+      "grad_norm": 5.0705156326293945,
+      "learning_rate": 7.55646543038526e-05,
+      "loss": 4.2036,
+      "step": 178
+    },
+    {
+      "epoch": 0.11693614241384942,
+      "grad_norm": 5.519981384277344,
+      "learning_rate": 7.528286866889924e-05,
+      "loss": 3.987,
+      "step": 179
+    },
+    {
+      "epoch": 0.11758941695247428,
+      "grad_norm": 5.943816184997559,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 3.4623,
+      "step": 180
+    },
+    {
+      "epoch": 0.11824269149109913,
+      "grad_norm": 6.2165751457214355,
+      "learning_rate": 7.471606041430723e-05,
+      "loss": 3.4462,
+      "step": 181
+    },
+    {
+      "epoch": 0.11889596602972399,
+      "grad_norm": 6.183679580688477,
+      "learning_rate": 7.443106207484776e-05,
+      "loss": 3.6863,
+      "step": 182
+    },
+    {
+      "epoch": 0.11954924056834886,
+      "grad_norm": 7.212845802307129,
+      "learning_rate": 7.414501719000187e-05,
+      "loss": 3.4877,
+      "step": 183
+    },
+    {
+      "epoch": 0.1202025151069737,
+      "grad_norm": 7.299278736114502,
+      "learning_rate": 7.385793801298042e-05,
+      "loss": 3.1686,
+      "step": 184
+    },
+    {
+      "epoch": 0.12085578964559857,
+      "grad_norm": 7.535823345184326,
+      "learning_rate": 7.35698368412999e-05,
+      "loss": 3.7468,
+      "step": 185
+    },
+    {
+      "epoch": 0.12150906418422341,
+      "grad_norm": 9.03598690032959,
+      "learning_rate": 7.328072601625557e-05,
+      "loss": 3.3324,
+      "step": 186
+    },
+    {
+      "epoch": 0.12216233872284828,
+      "grad_norm": 8.97368335723877,
+      "learning_rate": 7.2990617922393e-05,
+      "loss": 3.0458,
+      "step": 187
+    },
+    {
+      "epoch": 0.12281561326147314,
+      "grad_norm": 11.850152015686035,
+      "learning_rate": 7.269952498697734e-05,
+      "loss": 3.6011,
+      "step": 188
+    },
+    {
+      "epoch": 0.12346888780009799,
+      "grad_norm": 12.661114692687988,
+      "learning_rate": 7.240745967946113e-05,
+      "loss": 3.4378,
+      "step": 189
+    },
+    {
+      "epoch": 0.12412216233872285,
+      "grad_norm": 15.797039985656738,
+      "learning_rate": 7.211443451095007e-05,
+      "loss": 3.9093,
+      "step": 190
+    },
+    {
+      "epoch": 0.12477543687734771,
+      "grad_norm": 13.158418655395508,
+      "learning_rate": 7.18204620336671e-05,
+      "loss": 3.2219,
+      "step": 191
+    },
+    {
+      "epoch": 0.12542871141597256,
+      "grad_norm": 14.715095520019531,
+      "learning_rate": 7.152555484041476e-05,
+      "loss": 2.8733,
+      "step": 192
+    },
+    {
+      "epoch": 0.1260819859545974,
+      "grad_norm": 15.411552429199219,
+      "learning_rate": 7.122972556403567e-05,
+      "loss": 3.4753,
+      "step": 193
+    },
+    {
+      "epoch": 0.12673526049322228,
+      "grad_norm": 15.468315124511719,
+      "learning_rate": 7.09329868768714e-05,
+      "loss": 3.6554,
+      "step": 194
+    },
+    {
+      "epoch": 0.12738853503184713,
+      "grad_norm": 17.27188491821289,
+      "learning_rate": 7.063535149021973e-05,
+      "loss": 3.4804,
+      "step": 195
+    },
+    {
+      "epoch": 0.12804180957047198,
+      "grad_norm": 21.4166316986084,
+      "learning_rate": 7.033683215379002e-05,
+      "loss": 2.967,
+      "step": 196
+    },
+    {
+      "epoch": 0.12869508410909686,
+      "grad_norm": 18.70254135131836,
+      "learning_rate": 7.003744165515705e-05,
+      "loss": 2.5688,
+      "step": 197
+    },
+    {
+      "epoch": 0.1293483586477217,
+      "grad_norm": 22.17182159423828,
+      "learning_rate": 6.973719281921335e-05,
+      "loss": 3.8092,
+      "step": 198
+    },
+    {
+      "epoch": 0.13000163318634655,
+      "grad_norm": 21.631132125854492,
+      "learning_rate": 6.943609850761979e-05,
+      "loss": 3.4749,
+      "step": 199
+    },
+    {
+      "epoch": 0.13065490772497143,
+      "grad_norm": 27.764705657958984,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 3.2158,
+      "step": 200
+    },
+    {
+      "epoch": 0.13065490772497143,
+      "eval_loss": 1.0087602138519287,
+      "eval_runtime": 193.6858,
+      "eval_samples_per_second": 13.31,
+      "eval_steps_per_second": 3.33,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.899023380663501e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null