Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b0c195b03a78891addfd541c6f5a6e05e308cb677dddc2b5d1e1bc7a4317910
 size 578859568

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6519a65749b3640a198541c50a2d98674828ec6c9d6a4383408f62deb368efa
 size 578859568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ea03775d6f122ef22f9a34fa6d7c975927ce0c5091946eb8c4d70964cfe011e
 size 294324692

 version https://git-lfs.github.com/spec/v1
+oid sha256:366e0623982882f9616a1c85db3efb50f3266236a2f25d9632bdc591d388e3eb
 size 294324692

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a000bcd0fcfbd6dc706ee094bc40e59bcb50a28e8797f55a0743ef881fecdf71
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ceff29ed030645c389d768390ebf4a4817b18da01053a3a2c553da3856524de
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9443e18e9eff1c8055981c18d9a28ff4f85044c4c7fdc07a0fbff8845c622c60
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:90b3c6a1abf04b992261ec3713fab3774b78fe3ab6f52bec78378c6c0a4a1110
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6939424276351929,
-  "best_model_checkpoint": "miner_id_24/checkpoint-450",
-  "epoch": 0.012508599662267809,
   "eval_steps": 50,
-  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3237,6 +3237,364 @@
       "eval_samples_per_second": 11.548,
       "eval_steps_per_second": 5.784,
       "step": 450
     }
   ],
   "logging_steps": 1,
@@ -3260,12 +3618,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.879669611266048e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6910951733589172,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.013898444069186455,
   "eval_steps": 50,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.548,
       "eval_steps_per_second": 5.784,
       "step": 450
+    },
+    {
+      "epoch": 0.012536396550406182,
+      "grad_norm": 0.08057525753974915,
+      "learning_rate": 4.894348370484647e-05,
+      "loss": 0.8512,
+      "step": 451
+    },
+    {
+      "epoch": 0.012564193438544554,
+      "grad_norm": 0.08833472430706024,
+      "learning_rate": 4.698180862871282e-05,
+      "loss": 0.995,
+      "step": 452
+    },
+    {
+      "epoch": 0.012591990326682928,
+      "grad_norm": 0.08962654322385788,
+      "learning_rate": 4.505930839770966e-05,
+      "loss": 0.9639,
+      "step": 453
+    },
+    {
+      "epoch": 0.0126197872148213,
+      "grad_norm": 0.08324563503265381,
+      "learning_rate": 4.3176062038291274e-05,
+      "loss": 0.7867,
+      "step": 454
+    },
+    {
+      "epoch": 0.012647584102959674,
+      "grad_norm": 0.0913391187787056,
+      "learning_rate": 4.1332146963339423e-05,
+      "loss": 0.7687,
+      "step": 455
+    },
+    {
+      "epoch": 0.012675380991098047,
+      "grad_norm": 0.08055524528026581,
+      "learning_rate": 3.952763896898071e-05,
+      "loss": 0.7876,
+      "step": 456
+    },
+    {
+      "epoch": 0.01270317787923642,
+      "grad_norm": 0.08405828475952148,
+      "learning_rate": 3.776261223147126e-05,
+      "loss": 0.7312,
+      "step": 457
+    },
+    {
+      "epoch": 0.012730974767374793,
+      "grad_norm": 0.08189340680837631,
+      "learning_rate": 3.603713930414676e-05,
+      "loss": 0.7638,
+      "step": 458
+    },
+    {
+      "epoch": 0.012758771655513165,
+      "grad_norm": 0.08689261227846146,
+      "learning_rate": 3.435129111444113e-05,
+      "loss": 0.7503,
+      "step": 459
+    },
+    {
+      "epoch": 0.012786568543651539,
+      "grad_norm": 0.08499288558959961,
+      "learning_rate": 3.270513696097055e-05,
+      "loss": 0.7546,
+      "step": 460
+    },
+    {
+      "epoch": 0.01281436543178991,
+      "grad_norm": 0.08908785879611969,
+      "learning_rate": 3.109874451068473e-05,
+      "loss": 0.7841,
+      "step": 461
+    },
+    {
+      "epoch": 0.012842162319928284,
+      "grad_norm": 0.08109744638204575,
+      "learning_rate": 2.9532179796085356e-05,
+      "loss": 0.6244,
+      "step": 462
+    },
+    {
+      "epoch": 0.012869959208066656,
+      "grad_norm": 0.08815158903598785,
+      "learning_rate": 2.800550721251216e-05,
+      "loss": 0.697,
+      "step": 463
+    },
+    {
+      "epoch": 0.01289775609620503,
+      "grad_norm": 0.09392455220222473,
+      "learning_rate": 2.6518789515495355e-05,
+      "loss": 0.6835,
+      "step": 464
+    },
+    {
+      "epoch": 0.012925552984343404,
+      "grad_norm": 0.1170358955860138,
+      "learning_rate": 2.5072087818176382e-05,
+      "loss": 0.938,
+      "step": 465
+    },
+    {
+      "epoch": 0.012953349872481775,
+      "grad_norm": 0.09133084863424301,
+      "learning_rate": 2.36654615887959e-05,
+      "loss": 0.7392,
+      "step": 466
+    },
+    {
+      "epoch": 0.01298114676062015,
+      "grad_norm": 0.09215465933084488,
+      "learning_rate": 2.2298968648248653e-05,
+      "loss": 0.8161,
+      "step": 467
+    },
+    {
+      "epoch": 0.013008943648758521,
+      "grad_norm": 0.08623038232326508,
+      "learning_rate": 2.0972665167707127e-05,
+      "loss": 0.7643,
+      "step": 468
+    },
+    {
+      "epoch": 0.013036740536896895,
+      "grad_norm": 0.08695585280656815,
+      "learning_rate": 1.968660566631275e-05,
+      "loss": 0.6102,
+      "step": 469
+    },
+    {
+      "epoch": 0.013064537425035267,
+      "grad_norm": 0.08945546299219131,
+      "learning_rate": 1.844084300893456e-05,
+      "loss": 0.7888,
+      "step": 470
+    },
+    {
+      "epoch": 0.01309233431317364,
+      "grad_norm": 0.09600325673818588,
+      "learning_rate": 1.7235428403996167e-05,
+      "loss": 0.8451,
+      "step": 471
+    },
+    {
+      "epoch": 0.013120131201312012,
+      "grad_norm": 0.08915964514017105,
+      "learning_rate": 1.6070411401370334e-05,
+      "loss": 0.6192,
+      "step": 472
+    },
+    {
+      "epoch": 0.013147928089450386,
+      "grad_norm": 0.1044343113899231,
+      "learning_rate": 1.494583989034326e-05,
+      "loss": 0.6746,
+      "step": 473
+    },
+    {
+      "epoch": 0.01317572497758876,
+      "grad_norm": 0.0989852100610733,
+      "learning_rate": 1.386176009764506e-05,
+      "loss": 0.7388,
+      "step": 474
+    },
+    {
+      "epoch": 0.013203521865727132,
+      "grad_norm": 0.11182911694049835,
+      "learning_rate": 1.2818216585549825e-05,
+      "loss": 0.9093,
+      "step": 475
+    },
+    {
+      "epoch": 0.013231318753865505,
+      "grad_norm": 0.10432388633489609,
+      "learning_rate": 1.1815252250044316e-05,
+      "loss": 0.6842,
+      "step": 476
+    },
+    {
+      "epoch": 0.013259115642003877,
+      "grad_norm": 0.11141140758991241,
+      "learning_rate": 1.0852908319063826e-05,
+      "loss": 0.6488,
+      "step": 477
+    },
+    {
+      "epoch": 0.013286912530142251,
+      "grad_norm": 0.10428661853075027,
+      "learning_rate": 9.931224350798185e-06,
+      "loss": 0.7708,
+      "step": 478
+    },
+    {
+      "epoch": 0.013314709418280623,
+      "grad_norm": 0.10303416848182678,
+      "learning_rate": 9.0502382320653e-06,
+      "loss": 0.6728,
+      "step": 479
+    },
+    {
+      "epoch": 0.013342506306418997,
+      "grad_norm": 0.10627992451190948,
+      "learning_rate": 8.209986176753947e-06,
+      "loss": 0.6125,
+      "step": 480
+    },
+    {
+      "epoch": 0.013370303194557369,
+      "grad_norm": 0.09629444032907486,
+      "learning_rate": 7.4105027243349665e-06,
+      "loss": 0.6386,
+      "step": 481
+    },
+    {
+      "epoch": 0.013398100082695742,
+      "grad_norm": 0.09442020207643509,
+      "learning_rate": 6.65182073844195e-06,
+      "loss": 0.5644,
+      "step": 482
+    },
+    {
+      "epoch": 0.013425896970834116,
+      "grad_norm": 0.0994250699877739,
+      "learning_rate": 5.933971405519656e-06,
+      "loss": 0.6243,
+      "step": 483
+    },
+    {
+      "epoch": 0.013453693858972488,
+      "grad_norm": 0.1111208125948906,
+      "learning_rate": 5.256984233542595e-06,
+      "loss": 0.7222,
+      "step": 484
+    },
+    {
+      "epoch": 0.013481490747110862,
+      "grad_norm": 0.11001411825418472,
+      "learning_rate": 4.6208870508017695e-06,
+      "loss": 0.693,
+      "step": 485
+    },
+    {
+      "epoch": 0.013509287635249233,
+      "grad_norm": 0.0949028953909874,
+      "learning_rate": 4.025706004760932e-06,
+      "loss": 0.6378,
+      "step": 486
+    },
+    {
+      "epoch": 0.013537084523387607,
+      "grad_norm": 0.10430373251438141,
+      "learning_rate": 3.471465560981768e-06,
+      "loss": 0.5924,
+      "step": 487
+    },
+    {
+      "epoch": 0.013564881411525979,
+      "grad_norm": 0.09569145739078522,
+      "learning_rate": 2.958188502118153e-06,
+      "loss": 0.5534,
+      "step": 488
+    },
+    {
+      "epoch": 0.013592678299664353,
+      "grad_norm": 0.09768345206975937,
+      "learning_rate": 2.4858959269794535e-06,
+      "loss": 0.4815,
+      "step": 489
+    },
+    {
+      "epoch": 0.013620475187802725,
+      "grad_norm": 0.09561553597450256,
+      "learning_rate": 2.054607249663665e-06,
+      "loss": 0.5637,
+      "step": 490
+    },
+    {
+      "epoch": 0.013648272075941098,
+      "grad_norm": 0.11304374039173126,
+      "learning_rate": 1.6643401987591622e-06,
+      "loss": 0.5943,
+      "step": 491
+    },
+    {
+      "epoch": 0.013676068964079472,
+      "grad_norm": 0.11450091749429703,
+      "learning_rate": 1.3151108166156167e-06,
+      "loss": 0.583,
+      "step": 492
+    },
+    {
+      "epoch": 0.013703865852217844,
+      "grad_norm": 0.11334867030382156,
+      "learning_rate": 1.0069334586854107e-06,
+      "loss": 0.6076,
+      "step": 493
+    },
+    {
+      "epoch": 0.013731662740356218,
+      "grad_norm": 0.1081780344247818,
+      "learning_rate": 7.398207929323331e-07,
+      "loss": 0.5119,
+      "step": 494
+    },
+    {
+      "epoch": 0.01375945962849459,
+      "grad_norm": 0.11384209990501404,
+      "learning_rate": 5.137837993121064e-07,
+      "loss": 0.48,
+      "step": 495
+    },
+    {
+      "epoch": 0.013787256516632963,
+      "grad_norm": 0.11100795120000839,
+      "learning_rate": 3.2883176932019256e-07,
+      "loss": 0.53,
+      "step": 496
+    },
+    {
+      "epoch": 0.013815053404771335,
+      "grad_norm": 0.14853325486183167,
+      "learning_rate": 1.8497230560998722e-07,
+      "loss": 0.5982,
+      "step": 497
+    },
+    {
+      "epoch": 0.013842850292909709,
+      "grad_norm": 0.1417522132396698,
+      "learning_rate": 8.221132168073631e-08,
+      "loss": 0.5259,
+      "step": 498
+    },
+    {
+      "epoch": 0.013870647181048081,
+      "grad_norm": 0.15358008444309235,
+      "learning_rate": 2.0553041633952775e-08,
+      "loss": 0.5328,
+      "step": 499
+    },
+    {
+      "epoch": 0.013898444069186455,
+      "grad_norm": 0.21747428178787231,
+      "learning_rate": 0.0,
+      "loss": 0.5217,
+      "step": 500
+    },
+    {
+      "epoch": 0.013898444069186455,
+      "eval_loss": 0.6910951733589172,
+      "eval_runtime": 50.0791,
+      "eval_samples_per_second": 11.522,
+      "eval_steps_per_second": 5.771,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.1913641871671296e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null