Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7937b68fd545c576104617d8c382cba7a38dd50f09805a5505f8cc3b2b4a2c7a
 size 42487072

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47670b132dd2edcb461863c6036215ff9426791f6d2edad95787f2bf9c4b0c0
 size 42487072

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac187f718ec4c2057634f870805bfb3892ecf2c98cc966fb021bcfabb1b848e9
 size 21735354

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bd0e3116f3e6ed58e866a2497073987ad79e1519a7ec0fde83c88a80e218921
 size 21735354

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c87e2e0accb60de579a7f577ed3a530cb2e0c7c1e5e57cd42d4d852b8c2d72a6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b9b9e7a306be6412315e098350a79bbe4983f3eeaaad687e5922807f860c487
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7e9f0c2a27af03f3c1874438820d046de94b36aaec3b0cc778f96def4616314
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:530505d607699f384741067a5f9139d72f043713adb680898a3f1b5714170c97
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.370993137359619,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.02046349823502328,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,714 @@
       "eval_samples_per_second": 174.318,
       "eval_steps_per_second": 43.59,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -751,7 +1459,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1885706926424064.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.208616256713867,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.04092699647004656,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 174.318,
       "eval_steps_per_second": 43.59,
       "step": 100
+    },
+    {
+      "epoch": 0.02066813321737351,
+      "grad_norm": 11.214513778686523,
+      "learning_rate": 9.31367192988896e-05,
+      "loss": 14.0912,
+      "step": 101
+    },
+    {
+      "epoch": 0.020872768199723744,
+      "grad_norm": 10.00228214263916,
+      "learning_rate": 9.297032057507264e-05,
+      "loss": 13.9889,
+      "step": 102
+    },
+    {
+      "epoch": 0.021077403182073975,
+      "grad_norm": 10.31716251373291,
+      "learning_rate": 9.280208114573859e-05,
+      "loss": 14.1193,
+      "step": 103
+    },
+    {
+      "epoch": 0.02128203816442421,
+      "grad_norm": 9.314844131469727,
+      "learning_rate": 9.263200821770461e-05,
+      "loss": 13.7396,
+      "step": 104
+    },
+    {
+      "epoch": 0.02148667314677444,
+      "grad_norm": 7.567698001861572,
+      "learning_rate": 9.246010907632895e-05,
+      "loss": 13.7879,
+      "step": 105
+    },
+    {
+      "epoch": 0.021691308129124672,
+      "grad_norm": 6.531108856201172,
+      "learning_rate": 9.228639108519868e-05,
+      "loss": 13.5185,
+      "step": 106
+    },
+    {
+      "epoch": 0.021895943111474907,
+      "grad_norm": 5.202017307281494,
+      "learning_rate": 9.211086168581433e-05,
+      "loss": 13.3491,
+      "step": 107
+    },
+    {
+      "epoch": 0.022100578093825138,
+      "grad_norm": 4.530038356781006,
+      "learning_rate": 9.193352839727121e-05,
+      "loss": 13.2004,
+      "step": 108
+    },
+    {
+      "epoch": 0.022305213076175372,
+      "grad_norm": 4.831387996673584,
+      "learning_rate": 9.175439881593716e-05,
+      "loss": 13.4205,
+      "step": 109
+    },
+    {
+      "epoch": 0.022509848058525603,
+      "grad_norm": 4.692884922027588,
+      "learning_rate": 9.157348061512727e-05,
+      "loss": 13.4912,
+      "step": 110
+    },
+    {
+      "epoch": 0.022714483040875838,
+      "grad_norm": 5.204988479614258,
+      "learning_rate": 9.139078154477512e-05,
+      "loss": 13.1214,
+      "step": 111
+    },
+    {
+      "epoch": 0.02291911802322607,
+      "grad_norm": 4.781569004058838,
+      "learning_rate": 9.120630943110077e-05,
+      "loss": 12.6118,
+      "step": 112
+    },
+    {
+      "epoch": 0.023123753005576304,
+      "grad_norm": 4.754026412963867,
+      "learning_rate": 9.102007217627568e-05,
+      "loss": 13.186,
+      "step": 113
+    },
+    {
+      "epoch": 0.023328387987926535,
+      "grad_norm": 5.035665035247803,
+      "learning_rate": 9.083207775808396e-05,
+      "loss": 12.7322,
+      "step": 114
+    },
+    {
+      "epoch": 0.02353302297027677,
+      "grad_norm": 5.12575626373291,
+      "learning_rate": 9.064233422958077e-05,
+      "loss": 13.0182,
+      "step": 115
+    },
+    {
+      "epoch": 0.023737657952627,
+      "grad_norm": 5.39860200881958,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 13.4676,
+      "step": 116
+    },
+    {
+      "epoch": 0.023942292934977235,
+      "grad_norm": 5.005839824676514,
+      "learning_rate": 9.025763242814291e-05,
+      "loss": 13.0532,
+      "step": 117
+    },
+    {
+      "epoch": 0.024146927917327466,
+      "grad_norm": 5.046457290649414,
+      "learning_rate": 9.006269063455304e-05,
+      "loss": 13.336,
+      "step": 118
+    },
+    {
+      "epoch": 0.0243515628996777,
+      "grad_norm": 4.951815128326416,
+      "learning_rate": 8.986603268863536e-05,
+      "loss": 13.1308,
+      "step": 119
+    },
+    {
+      "epoch": 0.02455619788202793,
+      "grad_norm": 5.16800594329834,
+      "learning_rate": 8.966766701456177e-05,
+      "loss": 12.7553,
+      "step": 120
+    },
+    {
+      "epoch": 0.024760832864378166,
+      "grad_norm": 5.190509796142578,
+      "learning_rate": 8.94676021096575e-05,
+      "loss": 13.2239,
+      "step": 121
+    },
+    {
+      "epoch": 0.024965467846728397,
+      "grad_norm": 5.662418365478516,
+      "learning_rate": 8.926584654403724e-05,
+      "loss": 13.2593,
+      "step": 122
+    },
+    {
+      "epoch": 0.025170102829078632,
+      "grad_norm": 5.604646682739258,
+      "learning_rate": 8.906240896023794e-05,
+      "loss": 13.2693,
+      "step": 123
+    },
+    {
+      "epoch": 0.025374737811428863,
+      "grad_norm": 5.807793140411377,
+      "learning_rate": 8.885729807284856e-05,
+      "loss": 13.465,
+      "step": 124
+    },
+    {
+      "epoch": 0.025579372793779098,
+      "grad_norm": 6.032169818878174,
+      "learning_rate": 8.865052266813685e-05,
+      "loss": 13.0197,
+      "step": 125
+    },
+    {
+      "epoch": 0.02578400777612933,
+      "grad_norm": 5.969254970550537,
+      "learning_rate": 8.844209160367299e-05,
+      "loss": 12.91,
+      "step": 126
+    },
+    {
+      "epoch": 0.025988642758479563,
+      "grad_norm": 5.627323627471924,
+      "learning_rate": 8.823201380795001e-05,
+      "loss": 12.9693,
+      "step": 127
+    },
+    {
+      "epoch": 0.026193277740829794,
+      "grad_norm": 5.775904655456543,
+      "learning_rate": 8.802029828000156e-05,
+      "loss": 13.3716,
+      "step": 128
+    },
+    {
+      "epoch": 0.02639791272318003,
+      "grad_norm": 6.050631999969482,
+      "learning_rate": 8.780695408901613e-05,
+      "loss": 12.9946,
+      "step": 129
+    },
+    {
+      "epoch": 0.02660254770553026,
+      "grad_norm": 6.608086109161377,
+      "learning_rate": 8.759199037394887e-05,
+      "loss": 12.7268,
+      "step": 130
+    },
+    {
+      "epoch": 0.026807182687880494,
+      "grad_norm": 6.4099202156066895,
+      "learning_rate": 8.737541634312985e-05,
+      "loss": 13.3797,
+      "step": 131
+    },
+    {
+      "epoch": 0.027011817670230726,
+      "grad_norm": 6.958422660827637,
+      "learning_rate": 8.715724127386972e-05,
+      "loss": 13.2627,
+      "step": 132
+    },
+    {
+      "epoch": 0.02721645265258096,
+      "grad_norm": 6.657001495361328,
+      "learning_rate": 8.693747451206232e-05,
+      "loss": 13.1662,
+      "step": 133
+    },
+    {
+      "epoch": 0.02742108763493119,
+      "grad_norm": 6.775047302246094,
+      "learning_rate": 8.671612547178428e-05,
+      "loss": 12.8757,
+      "step": 134
+    },
+    {
+      "epoch": 0.027625722617281426,
+      "grad_norm": 6.7623419761657715,
+      "learning_rate": 8.649320363489179e-05,
+      "loss": 12.5799,
+      "step": 135
+    },
+    {
+      "epoch": 0.027830357599631657,
+      "grad_norm": 7.408362865447998,
+      "learning_rate": 8.626871855061438e-05,
+      "loss": 13.8727,
+      "step": 136
+    },
+    {
+      "epoch": 0.02803499258198189,
+      "grad_norm": 6.984137535095215,
+      "learning_rate": 8.604267983514594e-05,
+      "loss": 12.6957,
+      "step": 137
+    },
+    {
+      "epoch": 0.028239627564332122,
+      "grad_norm": 7.494143486022949,
+      "learning_rate": 8.581509717123273e-05,
+      "loss": 13.5292,
+      "step": 138
+    },
+    {
+      "epoch": 0.028444262546682357,
+      "grad_norm": 7.043254375457764,
+      "learning_rate": 8.558598030775857e-05,
+      "loss": 12.5103,
+      "step": 139
+    },
+    {
+      "epoch": 0.028648897529032588,
+      "grad_norm": 7.2675957679748535,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 12.8951,
+      "step": 140
+    },
+    {
+      "epoch": 0.02885353251138282,
+      "grad_norm": 7.874957084655762,
+      "learning_rate": 8.51231833058426e-05,
+      "loss": 12.9737,
+      "step": 141
+    },
+    {
+      "epoch": 0.029058167493733054,
+      "grad_norm": 8.002019882202148,
+      "learning_rate": 8.488952299208401e-05,
+      "loss": 12.8148,
+      "step": 142
+    },
+    {
+      "epoch": 0.029262802476083285,
+      "grad_norm": 8.36933422088623,
+      "learning_rate": 8.46543681272818e-05,
+      "loss": 12.4946,
+      "step": 143
+    },
+    {
+      "epoch": 0.02946743745843352,
+      "grad_norm": 9.498835563659668,
+      "learning_rate": 8.44177287846877e-05,
+      "loss": 13.271,
+      "step": 144
+    },
+    {
+      "epoch": 0.02967207244078375,
+      "grad_norm": 8.976995468139648,
+      "learning_rate": 8.417961510114356e-05,
+      "loss": 12.5241,
+      "step": 145
+    },
+    {
+      "epoch": 0.029876707423133985,
+      "grad_norm": 9.178775787353516,
+      "learning_rate": 8.39400372766471e-05,
+      "loss": 12.4166,
+      "step": 146
+    },
+    {
+      "epoch": 0.030081342405484216,
+      "grad_norm": 10.875651359558105,
+      "learning_rate": 8.36990055739149e-05,
+      "loss": 12.7323,
+      "step": 147
+    },
+    {
+      "epoch": 0.03028597738783445,
+      "grad_norm": 11.843050003051758,
+      "learning_rate": 8.345653031794292e-05,
+      "loss": 12.6294,
+      "step": 148
+    },
+    {
+      "epoch": 0.030490612370184682,
+      "grad_norm": 12.797874450683594,
+      "learning_rate": 8.321262189556409e-05,
+      "loss": 11.9468,
+      "step": 149
+    },
+    {
+      "epoch": 0.030695247352534916,
+      "grad_norm": 21.556180953979492,
+      "learning_rate": 8.296729075500344e-05,
+      "loss": 14.375,
+      "step": 150
+    },
+    {
+      "epoch": 0.030899882334885147,
+      "grad_norm": 5.878223419189453,
+      "learning_rate": 8.272054740543052e-05,
+      "loss": 13.2625,
+      "step": 151
+    },
+    {
+      "epoch": 0.031104517317235382,
+      "grad_norm": 6.683862209320068,
+      "learning_rate": 8.247240241650918e-05,
+      "loss": 13.4469,
+      "step": 152
+    },
+    {
+      "epoch": 0.03130915229958561,
+      "grad_norm": 6.695138931274414,
+      "learning_rate": 8.222286641794488e-05,
+      "loss": 13.8935,
+      "step": 153
+    },
+    {
+      "epoch": 0.031513787281935844,
+      "grad_norm": 6.529450416564941,
+      "learning_rate": 8.197195009902924e-05,
+      "loss": 13.2827,
+      "step": 154
+    },
+    {
+      "epoch": 0.03171842226428608,
+      "grad_norm": 5.889492034912109,
+      "learning_rate": 8.171966420818228e-05,
+      "loss": 13.2815,
+      "step": 155
+    },
+    {
+      "epoch": 0.03192305724663631,
+      "grad_norm": 5.005529403686523,
+      "learning_rate": 8.146601955249188e-05,
+      "loss": 13.1348,
+      "step": 156
+    },
+    {
+      "epoch": 0.032127692228986544,
+      "grad_norm": 4.527781009674072,
+      "learning_rate": 8.121102699725089e-05,
+      "loss": 12.9616,
+      "step": 157
+    },
+    {
+      "epoch": 0.032332327211336775,
+      "grad_norm": 3.992450714111328,
+      "learning_rate": 8.095469746549172e-05,
+      "loss": 13.2171,
+      "step": 158
+    },
+    {
+      "epoch": 0.032536962193687013,
+      "grad_norm": 3.9536304473876953,
+      "learning_rate": 8.069704193751832e-05,
+      "loss": 13.5083,
+      "step": 159
+    },
+    {
+      "epoch": 0.032741597176037245,
+      "grad_norm": 4.0044264793396,
+      "learning_rate": 8.043807145043604e-05,
+      "loss": 13.5044,
+      "step": 160
+    },
+    {
+      "epoch": 0.032946232158387476,
+      "grad_norm": 4.166686058044434,
+      "learning_rate": 8.017779709767858e-05,
+      "loss": 12.9416,
+      "step": 161
+    },
+    {
+      "epoch": 0.03315086714073771,
+      "grad_norm": 4.292598724365234,
+      "learning_rate": 7.991623002853296e-05,
+      "loss": 12.932,
+      "step": 162
+    },
+    {
+      "epoch": 0.033355502123087945,
+      "grad_norm": 4.622048377990723,
+      "learning_rate": 7.965338144766186e-05,
+      "loss": 13.1667,
+      "step": 163
+    },
+    {
+      "epoch": 0.033560137105438176,
+      "grad_norm": 4.218106746673584,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 12.9429,
+      "step": 164
+    },
+    {
+      "epoch": 0.03376477208778841,
+      "grad_norm": 4.615002155303955,
+      "learning_rate": 7.912388484339012e-05,
+      "loss": 13.4383,
+      "step": 165
+    },
+    {
+      "epoch": 0.03396940707013864,
+      "grad_norm": 4.371853828430176,
+      "learning_rate": 7.88572595018617e-05,
+      "loss": 12.6596,
+      "step": 166
+    },
+    {
+      "epoch": 0.034174042052488876,
+      "grad_norm": 4.507296562194824,
+      "learning_rate": 7.858939801138061e-05,
+      "loss": 13.0297,
+      "step": 167
+    },
+    {
+      "epoch": 0.03437867703483911,
+      "grad_norm": 4.6610941886901855,
+      "learning_rate": 7.832031184624164e-05,
+      "loss": 12.6801,
+      "step": 168
+    },
+    {
+      "epoch": 0.03458331201718934,
+      "grad_norm": 4.3974714279174805,
+      "learning_rate": 7.80500125332005e-05,
+      "loss": 12.6394,
+      "step": 169
+    },
+    {
+      "epoch": 0.03478794699953957,
+      "grad_norm": 4.65360689163208,
+      "learning_rate": 7.777851165098012e-05,
+      "loss": 13.2642,
+      "step": 170
+    },
+    {
+      "epoch": 0.03499258198188981,
+      "grad_norm": 4.651695251464844,
+      "learning_rate": 7.750582082977467e-05,
+      "loss": 13.3055,
+      "step": 171
+    },
+    {
+      "epoch": 0.03519721696424004,
+      "grad_norm": 5.114010810852051,
+      "learning_rate": 7.723195175075136e-05,
+      "loss": 13.0045,
+      "step": 172
+    },
+    {
+      "epoch": 0.03540185194659027,
+      "grad_norm": 5.113755702972412,
+      "learning_rate": 7.695691614555003e-05,
+      "loss": 12.9366,
+      "step": 173
+    },
+    {
+      "epoch": 0.0356064869289405,
+      "grad_norm": 5.089533805847168,
+      "learning_rate": 7.668072579578058e-05,
+      "loss": 12.959,
+      "step": 174
+    },
+    {
+      "epoch": 0.03581112191129073,
+      "grad_norm": 5.559483051300049,
+      "learning_rate": 7.64033925325184e-05,
+      "loss": 13.2842,
+      "step": 175
+    },
+    {
+      "epoch": 0.03601575689364097,
+      "grad_norm": 5.3359761238098145,
+      "learning_rate": 7.612492823579745e-05,
+      "loss": 13.0262,
+      "step": 176
+    },
+    {
+      "epoch": 0.0362203918759912,
+      "grad_norm": 5.409842014312744,
+      "learning_rate": 7.584534483410137e-05,
+      "loss": 13.0076,
+      "step": 177
+    },
+    {
+      "epoch": 0.03642502685834143,
+      "grad_norm": 5.253081321716309,
+      "learning_rate": 7.55646543038526e-05,
+      "loss": 11.9703,
+      "step": 178
+    },
+    {
+      "epoch": 0.03662966184069166,
+      "grad_norm": 5.482647895812988,
+      "learning_rate": 7.528286866889924e-05,
+      "loss": 12.6692,
+      "step": 179
+    },
+    {
+      "epoch": 0.0368342968230419,
+      "grad_norm": 5.659306049346924,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 13.0874,
+      "step": 180
+    },
+    {
+      "epoch": 0.03703893180539213,
+      "grad_norm": 5.71022891998291,
+      "learning_rate": 7.471606041430723e-05,
+      "loss": 12.9602,
+      "step": 181
+    },
+    {
+      "epoch": 0.03724356678774236,
+      "grad_norm": 6.031240940093994,
+      "learning_rate": 7.443106207484776e-05,
+      "loss": 12.8276,
+      "step": 182
+    },
+    {
+      "epoch": 0.037448201770092594,
+      "grad_norm": 5.916280746459961,
+      "learning_rate": 7.414501719000187e-05,
+      "loss": 12.7036,
+      "step": 183
+    },
+    {
+      "epoch": 0.03765283675244283,
+      "grad_norm": 6.090421676635742,
+      "learning_rate": 7.385793801298042e-05,
+      "loss": 12.5362,
+      "step": 184
+    },
+    {
+      "epoch": 0.03785747173479306,
+      "grad_norm": 6.97968053817749,
+      "learning_rate": 7.35698368412999e-05,
+      "loss": 13.1994,
+      "step": 185
+    },
+    {
+      "epoch": 0.038062106717143294,
+      "grad_norm": 6.6946587562561035,
+      "learning_rate": 7.328072601625557e-05,
+      "loss": 12.9428,
+      "step": 186
+    },
+    {
+      "epoch": 0.038266741699493526,
+      "grad_norm": 6.86458158493042,
+      "learning_rate": 7.2990617922393e-05,
+      "loss": 13.5336,
+      "step": 187
+    },
+    {
+      "epoch": 0.038471376681843764,
+      "grad_norm": 7.41053581237793,
+      "learning_rate": 7.269952498697734e-05,
+      "loss": 13.1752,
+      "step": 188
+    },
+    {
+      "epoch": 0.038676011664193995,
+      "grad_norm": 6.769413948059082,
+      "learning_rate": 7.240745967946113e-05,
+      "loss": 12.3618,
+      "step": 189
+    },
+    {
+      "epoch": 0.038880646646544226,
+      "grad_norm": 8.171807289123535,
+      "learning_rate": 7.211443451095007e-05,
+      "loss": 13.4612,
+      "step": 190
+    },
+    {
+      "epoch": 0.03908528162889446,
+      "grad_norm": 7.6870598793029785,
+      "learning_rate": 7.18204620336671e-05,
+      "loss": 12.8721,
+      "step": 191
+    },
+    {
+      "epoch": 0.039289916611244695,
+      "grad_norm": 7.984126567840576,
+      "learning_rate": 7.152555484041476e-05,
+      "loss": 12.5025,
+      "step": 192
+    },
+    {
+      "epoch": 0.039494551593594926,
+      "grad_norm": 8.748424530029297,
+      "learning_rate": 7.122972556403567e-05,
+      "loss": 12.5803,
+      "step": 193
+    },
+    {
+      "epoch": 0.03969918657594516,
+      "grad_norm": 8.19789981842041,
+      "learning_rate": 7.09329868768714e-05,
+      "loss": 13.0793,
+      "step": 194
+    },
+    {
+      "epoch": 0.03990382155829539,
+      "grad_norm": 8.25755786895752,
+      "learning_rate": 7.063535149021973e-05,
+      "loss": 13.2436,
+      "step": 195
+    },
+    {
+      "epoch": 0.040108456540645626,
+      "grad_norm": 10.084080696105957,
+      "learning_rate": 7.033683215379002e-05,
+      "loss": 12.4769,
+      "step": 196
+    },
+    {
+      "epoch": 0.04031309152299586,
+      "grad_norm": 11.04244327545166,
+      "learning_rate": 7.003744165515705e-05,
+      "loss": 13.3229,
+      "step": 197
+    },
+    {
+      "epoch": 0.04051772650534609,
+      "grad_norm": 10.718149185180664,
+      "learning_rate": 6.973719281921335e-05,
+      "loss": 13.0458,
+      "step": 198
+    },
+    {
+      "epoch": 0.04072236148769632,
+      "grad_norm": 12.596996307373047,
+      "learning_rate": 6.943609850761979e-05,
+      "loss": 13.2156,
+      "step": 199
+    },
+    {
+      "epoch": 0.04092699647004656,
+      "grad_norm": 16.626497268676758,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 12.1366,
+      "step": 200
+    },
+    {
+      "epoch": 0.04092699647004656,
+      "eval_loss": 3.208616256713867,
+      "eval_runtime": 47.3335,
+      "eval_samples_per_second": 173.872,
+      "eval_steps_per_second": 43.479,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3771413852848128.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null