Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7032c700062a12e7d557b23e74cfadfc91a0420a0ae4a3b2af217f1222d5b973
 size 578859568

 version https://git-lfs.github.com/spec/v1
+oid sha256:024de3f8e2afb4251670fd08dca4001b5a22b5aa81ae8963df0898d4eaf258bf
 size 578859568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e4dd87342bbdc488b7be2102740593ad560124616d462b5e22f0d4e4dc37a98
 size 294324692

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbc5bb928545579b4a25c09020cb0ddd259af2200a0a70f135813471914ab7ec
 size 294324692

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:675d87ddb0df920fcc76dfa60cec09c41e3d4f94fc027859559749f5c5664296
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:404ce932a6e24aba5cc2e7fbd9a324e1334ed0f859ca94dba9e1b8e1bea61d54
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cca9c47b2bb879349fb9e7d9ad8f1da04c3b167e659c3d39ce46064118df0eea
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fe5dbedd3d0105f98d40d84fbe544af591501f8969d82c59cef4d7bb5f81712
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7205991148948669,
-  "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.009728910848430519,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 11.514,
       "eval_steps_per_second": 5.767,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2549,7 +2907,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.248730763853824e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7006093859672546,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.011118755255349163,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.514,
       "eval_steps_per_second": 5.767,
       "step": 350
+    },
+    {
+      "epoch": 0.00975670773656889,
+      "grad_norm": 0.1047213226556778,
+      "learning_rate": 0.0004226366711111808,
+      "loss": 0.942,
+      "step": 351
+    },
+    {
+      "epoch": 0.009784504624707265,
+      "grad_norm": 0.10103016346693039,
+      "learning_rate": 0.00041741373548960395,
+      "loss": 0.7983,
+      "step": 352
+    },
+    {
+      "epoch": 0.009812301512845636,
+      "grad_norm": 0.10744207352399826,
+      "learning_rate": 0.00041221474770752696,
+      "loss": 1.0167,
+      "step": 353
+    },
+    {
+      "epoch": 0.00984009840098401,
+      "grad_norm": 0.10127032548189163,
+      "learning_rate": 0.0004070399214749743,
+      "loss": 0.9075,
+      "step": 354
+    },
+    {
+      "epoch": 0.009867895289122382,
+      "grad_norm": 0.10049542039632797,
+      "learning_rate": 0.000401889469508784,
+      "loss": 0.9626,
+      "step": 355
+    },
+    {
+      "epoch": 0.009895692177260756,
+      "grad_norm": 0.08796097338199615,
+      "learning_rate": 0.00039676360352386354,
+      "loss": 0.7321,
+      "step": 356
+    },
+    {
+      "epoch": 0.009923489065399128,
+      "grad_norm": 0.07982050627470016,
+      "learning_rate": 0.00039166253422448684,
+      "loss": 0.6331,
+      "step": 357
+    },
+    {
+      "epoch": 0.009951285953537501,
+      "grad_norm": 0.08986510336399078,
+      "learning_rate": 0.0003865864712956336,
+      "loss": 0.646,
+      "step": 358
+    },
+    {
+      "epoch": 0.009979082841675875,
+      "grad_norm": 0.0990855023264885,
+      "learning_rate": 0.00038153562339436853,
+      "loss": 0.7897,
+      "step": 359
+    },
+    {
+      "epoch": 0.010006879729814247,
+      "grad_norm": 0.09031044691801071,
+      "learning_rate": 0.0003765101981412665,
+      "loss": 0.7884,
+      "step": 360
+    },
+    {
+      "epoch": 0.01003467661795262,
+      "grad_norm": 0.08221515268087387,
+      "learning_rate": 0.0003715104021118764,
+      "loss": 0.787,
+      "step": 361
+    },
+    {
+      "epoch": 0.010062473506090993,
+      "grad_norm": 0.09221215546131134,
+      "learning_rate": 0.00036653644082823046,
+      "loss": 0.7991,
+      "step": 362
+    },
+    {
+      "epoch": 0.010090270394229366,
+      "grad_norm": 0.08920681476593018,
+      "learning_rate": 0.00036158851875039456,
+      "loss": 0.9703,
+      "step": 363
+    },
+    {
+      "epoch": 0.010118067282367738,
+      "grad_norm": 0.09488007426261902,
+      "learning_rate": 0.0003566668392680662,
+      "loss": 0.759,
+      "step": 364
+    },
+    {
+      "epoch": 0.010145864170506112,
+      "grad_norm": 0.0824998989701271,
+      "learning_rate": 0.0003517716046922118,
+      "loss": 0.6603,
+      "step": 365
+    },
+    {
+      "epoch": 0.010173661058644486,
+      "grad_norm": 0.0937090590596199,
+      "learning_rate": 0.00034690301624675125,
+      "loss": 0.7799,
+      "step": 366
+    },
+    {
+      "epoch": 0.010201457946782858,
+      "grad_norm": 0.08511339873075485,
+      "learning_rate": 0.00034206127406028743,
+      "loss": 0.6858,
+      "step": 367
+    },
+    {
+      "epoch": 0.010229254834921231,
+      "grad_norm": 0.09240502119064331,
+      "learning_rate": 0.0003372465771578771,
+      "loss": 0.8179,
+      "step": 368
+    },
+    {
+      "epoch": 0.010257051723059603,
+      "grad_norm": 0.08915253728628159,
+      "learning_rate": 0.000332459123452852,
+      "loss": 0.7399,
+      "step": 369
+    },
+    {
+      "epoch": 0.010284848611197977,
+      "grad_norm": 0.09466725587844849,
+      "learning_rate": 0.00032769910973868313,
+      "loss": 0.7574,
+      "step": 370
+    },
+    {
+      "epoch": 0.010312645499336349,
+      "grad_norm": 0.09638349711894989,
+      "learning_rate": 0.00032296673168089073,
+      "loss": 0.7101,
+      "step": 371
+    },
+    {
+      "epoch": 0.010340442387474722,
+      "grad_norm": 0.09288477897644043,
+      "learning_rate": 0.0003182621838090006,
+      "loss": 0.7876,
+      "step": 372
+    },
+    {
+      "epoch": 0.010368239275613094,
+      "grad_norm": 0.08711199462413788,
+      "learning_rate": 0.0003135856595085498,
+      "loss": 0.548,
+      "step": 373
+    },
+    {
+      "epoch": 0.010396036163751468,
+      "grad_norm": 0.09743466973304749,
+      "learning_rate": 0.00030893735101313535,
+      "loss": 0.7647,
+      "step": 374
+    },
+    {
+      "epoch": 0.010423833051889842,
+      "grad_norm": 0.10079675167798996,
+      "learning_rate": 0.0003043174493965136,
+      "loss": 0.6033,
+      "step": 375
+    },
+    {
+      "epoch": 0.010451629940028214,
+      "grad_norm": 0.11834803968667984,
+      "learning_rate": 0.0002997261445647453,
+      "loss": 0.7904,
+      "step": 376
+    },
+    {
+      "epoch": 0.010479426828166587,
+      "grad_norm": 0.10599831491708755,
+      "learning_rate": 0.00029516362524838847,
+      "loss": 0.6515,
+      "step": 377
+    },
+    {
+      "epoch": 0.01050722371630496,
+      "grad_norm": 0.09899824112653732,
+      "learning_rate": 0.0002906300789947421,
+      "loss": 0.5683,
+      "step": 378
+    },
+    {
+      "epoch": 0.010535020604443333,
+      "grad_norm": 0.09238414466381073,
+      "learning_rate": 0.00028612569216013674,
+      "loss": 0.5779,
+      "step": 379
+    },
+    {
+      "epoch": 0.010562817492581705,
+      "grad_norm": 0.0977087989449501,
+      "learning_rate": 0.0002816506499022725,
+      "loss": 0.562,
+      "step": 380
+    },
+    {
+      "epoch": 0.010590614380720079,
+      "grad_norm": 0.09863676875829697,
+      "learning_rate": 0.00027720513617260855,
+      "loss": 0.6458,
+      "step": 381
+    },
+    {
+      "epoch": 0.01061841126885845,
+      "grad_norm": 0.11672049015760422,
+      "learning_rate": 0.0002727893337088027,
+      "loss": 0.7436,
+      "step": 382
+    },
+    {
+      "epoch": 0.010646208156996824,
+      "grad_norm": 0.10594779998064041,
+      "learning_rate": 0.0002684034240271986,
+      "loss": 0.6286,
+      "step": 383
+    },
+    {
+      "epoch": 0.010674005045135198,
+      "grad_norm": 0.11194431781768799,
+      "learning_rate": 0.00026404758741536505,
+      "loss": 0.6866,
+      "step": 384
+    },
+    {
+      "epoch": 0.01070180193327357,
+      "grad_norm": 0.10711811482906342,
+      "learning_rate": 0.00025972200292468463,
+      "loss": 0.5329,
+      "step": 385
+    },
+    {
+      "epoch": 0.010729598821411944,
+      "grad_norm": 0.112278513610363,
+      "learning_rate": 0.00025542684836299314,
+      "loss": 0.6123,
+      "step": 386
+    },
+    {
+      "epoch": 0.010757395709550316,
+      "grad_norm": 0.12985379993915558,
+      "learning_rate": 0.0002511623002872718,
+      "loss": 0.686,
+      "step": 387
+    },
+    {
+      "epoch": 0.01078519259768869,
+      "grad_norm": 0.12807150185108185,
+      "learning_rate": 0.00024692853399638914,
+      "loss": 0.7307,
+      "step": 388
+    },
+    {
+      "epoch": 0.010812989485827061,
+      "grad_norm": 0.11589968949556351,
+      "learning_rate": 0.00024272572352389488,
+      "loss": 0.7484,
+      "step": 389
+    },
+    {
+      "epoch": 0.010840786373965435,
+      "grad_norm": 0.11197637766599655,
+      "learning_rate": 0.0002385540416308656,
+      "loss": 0.645,
+      "step": 390
+    },
+    {
+      "epoch": 0.010868583262103807,
+      "grad_norm": 0.11974731087684631,
+      "learning_rate": 0.00023441365979880524,
+      "loss": 0.6447,
+      "step": 391
+    },
+    {
+      "epoch": 0.01089638015024218,
+      "grad_norm": 0.11931908875703812,
+      "learning_rate": 0.00023030474822259396,
+      "loss": 0.5268,
+      "step": 392
+    },
+    {
+      "epoch": 0.010924177038380554,
+      "grad_norm": 0.1177850067615509,
+      "learning_rate": 0.0002262274758034931,
+      "loss": 0.5544,
+      "step": 393
+    },
+    {
+      "epoch": 0.010951973926518926,
+      "grad_norm": 0.12234242260456085,
+      "learning_rate": 0.00022218201014220264,
+      "loss": 0.6351,
+      "step": 394
+    },
+    {
+      "epoch": 0.0109797708146573,
+      "grad_norm": 0.12924128770828247,
+      "learning_rate": 0.0002181685175319702,
+      "loss": 0.6468,
+      "step": 395
+    },
+    {
+      "epoch": 0.011007567702795672,
+      "grad_norm": 0.1454528272151947,
+      "learning_rate": 0.00021418716295175765,
+      "loss": 0.6534,
+      "step": 396
+    },
+    {
+      "epoch": 0.011035364590934045,
+      "grad_norm": 0.13180844485759735,
+      "learning_rate": 0.0002102381100594577,
+      "loss": 0.5698,
+      "step": 397
+    },
+    {
+      "epoch": 0.011063161479072417,
+      "grad_norm": 0.1514425277709961,
+      "learning_rate": 0.00020632152118516778,
+      "loss": 0.6062,
+      "step": 398
+    },
+    {
+      "epoch": 0.011090958367210791,
+      "grad_norm": 0.1729186773300171,
+      "learning_rate": 0.00020243755732451564,
+      "loss": 0.6178,
+      "step": 399
+    },
+    {
+      "epoch": 0.011118755255349163,
+      "grad_norm": 0.2056732028722763,
+      "learning_rate": 0.0001985863781320435,
+      "loss": 0.7743,
+      "step": 400
+    },
+    {
+      "epoch": 0.011118755255349163,
+      "eval_loss": 0.7006093859672546,
+      "eval_runtime": 49.996,
+      "eval_samples_per_second": 11.541,
+      "eval_steps_per_second": 5.78,
+      "step": 400
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.5658179794763776e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null