Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e98fb0b87a0f6f24304595dfc61183b93d8e22384e36d5e7cacba0d44d6b05e4
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b4fa038c971ac34dcae7913a853823780441d4eea44dd0e4b73b1229a7813dd
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dea54fc002c5fe5a7654a8e9ddfa21fd20fdba6d87035abf856de6f5e085c480
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:3007e9e0c04d0a10a1244ca12be9e308704feb9063386934a7ddb74f829db1ea
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57b6a2948e57b12ffd9d877589e42bf11782127ea2b8a84057fce6eb3e1144b2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f85f9af538cf2b77c6bd576ebfd9318e40eedb504d1dc93e89f48bbc8e18e06b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0149711e8b0d5ef3903d9d19787165d98c2601e242e137eebd29bf8de54b0133
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb2481782945fd527fd68f03d6ecc086ae38c0c705bfbe5de3172ab751f2e6c8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.03374957813027337,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 219.821,
       "eval_steps_per_second": 54.977,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -387,7 +745,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8039615692800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.06749915626054674,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 219.821,
       "eval_steps_per_second": 54.977,
       "step": 50
+    },
+    {
+      "epoch": 0.03442456969287884,
+      "grad_norm": 5.4881169489817694e-05,
+      "learning_rate": 7.11550911504845e-05,
+      "loss": 46.0,
+      "step": 51
+    },
+    {
+      "epoch": 0.03509956125548431,
+      "grad_norm": 7.321939483517781e-05,
+      "learning_rate": 7.073604443427437e-05,
+      "loss": 46.0,
+      "step": 52
+    },
+    {
+      "epoch": 0.03577455281808977,
+      "grad_norm": 7.501189247705042e-05,
+      "learning_rate": 7.03085947904134e-05,
+      "loss": 46.0,
+      "step": 53
+    },
+    {
+      "epoch": 0.03644954438069524,
+      "grad_norm": 7.780554005876184e-05,
+      "learning_rate": 6.987285907936617e-05,
+      "loss": 46.0,
+      "step": 54
+    },
+    {
+      "epoch": 0.03712453594330071,
+      "grad_norm": 9.891616355162114e-05,
+      "learning_rate": 6.942895642692527e-05,
+      "loss": 46.0,
+      "step": 55
+    },
+    {
+      "epoch": 0.03779952750590618,
+      "grad_norm": 0.00010379558807471767,
+      "learning_rate": 6.897700819164357e-05,
+      "loss": 46.0,
+      "step": 56
+    },
+    {
+      "epoch": 0.038474519068511646,
+      "grad_norm": 6.376452802214772e-05,
+      "learning_rate": 6.851713793165589e-05,
+      "loss": 46.0,
+      "step": 57
+    },
+    {
+      "epoch": 0.03914951063111711,
+      "grad_norm": 0.00011976366658927873,
+      "learning_rate": 6.804947137089955e-05,
+      "loss": 46.0,
+      "step": 58
+    },
+    {
+      "epoch": 0.03982450219372258,
+      "grad_norm": 0.00011366497346898541,
+      "learning_rate": 6.757413636474263e-05,
+      "loss": 46.0,
+      "step": 59
+    },
+    {
+      "epoch": 0.040499493756328046,
+      "grad_norm": 0.00012407948088366538,
+      "learning_rate": 6.709126286502965e-05,
+      "loss": 46.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.041174485318933515,
+      "grad_norm": 0.00011083694698754698,
+      "learning_rate": 6.660098288455393e-05,
+      "loss": 46.0,
+      "step": 61
+    },
+    {
+      "epoch": 0.041849476881538984,
+      "grad_norm": 0.00012434367090463638,
+      "learning_rate": 6.610343046096674e-05,
+      "loss": 46.0,
+      "step": 62
+    },
+    {
+      "epoch": 0.042524468444144446,
+      "grad_norm": 9.564027277519926e-05,
+      "learning_rate": 6.559874162013267e-05,
+      "loss": 46.0,
+      "step": 63
+    },
+    {
+      "epoch": 0.043199460006749915,
+      "grad_norm": 8.99869337445125e-05,
+      "learning_rate": 6.508705433894149e-05,
+      "loss": 46.0,
+      "step": 64
+    },
+    {
+      "epoch": 0.043874451569355384,
+      "grad_norm": 0.00011574227391975,
+      "learning_rate": 6.456850850758673e-05,
+      "loss": 46.0,
+      "step": 65
+    },
+    {
+      "epoch": 0.04454944313196085,
+      "grad_norm": 0.00011398899368941784,
+      "learning_rate": 6.404324589132101e-05,
+      "loss": 46.0,
+      "step": 66
+    },
+    {
+      "epoch": 0.045224434694566315,
+      "grad_norm": 8.823705138638616e-05,
+      "learning_rate": 6.351141009169893e-05,
+      "loss": 46.0,
+      "step": 67
+    },
+    {
+      "epoch": 0.045899426257171784,
+      "grad_norm": 0.0001269930216949433,
+      "learning_rate": 6.297314650731775e-05,
+      "loss": 46.0,
+      "step": 68
+    },
+    {
+      "epoch": 0.04657441781977725,
+      "grad_norm": 0.0001396976731484756,
+      "learning_rate": 6.242860229406692e-05,
+      "loss": 46.0,
+      "step": 69
+    },
+    {
+      "epoch": 0.04724940938238272,
+      "grad_norm": 0.0001476786273997277,
+      "learning_rate": 6.18779263248971e-05,
+      "loss": 46.0,
+      "step": 70
+    },
+    {
+      "epoch": 0.04792440094498819,
+      "grad_norm": 0.0001579802919877693,
+      "learning_rate": 6.132126914911976e-05,
+      "loss": 46.0,
+      "step": 71
+    },
+    {
+      "epoch": 0.04859939250759365,
+      "grad_norm": 0.00014366269169840962,
+      "learning_rate": 6.075878295124861e-05,
+      "loss": 46.0,
+      "step": 72
+    },
+    {
+      "epoch": 0.04927438407019912,
+      "grad_norm": 0.00015545522910542786,
+      "learning_rate": 6.019062150939376e-05,
+      "loss": 46.0,
+      "step": 73
+    },
+    {
+      "epoch": 0.04994937563280459,
+      "grad_norm": 0.00011848592112073675,
+      "learning_rate": 5.9616940153220336e-05,
+      "loss": 46.0,
+      "step": 74
+    },
+    {
+      "epoch": 0.05062436719541006,
+      "grad_norm": 0.00015544149209745228,
+      "learning_rate": 5.903789572148295e-05,
+      "loss": 46.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.05129935875801552,
+      "grad_norm": 0.00016012511332519352,
+      "learning_rate": 5.845364651914752e-05,
+      "loss": 46.0,
+      "step": 76
+    },
+    {
+      "epoch": 0.05197435032062099,
+      "grad_norm": 0.00024290069995913655,
+      "learning_rate": 5.786435227411227e-05,
+      "loss": 46.0,
+      "step": 77
+    },
+    {
+      "epoch": 0.05264934188322646,
+      "grad_norm": 0.00016308229533024132,
+      "learning_rate": 5.727017409353971e-05,
+      "loss": 46.0,
+      "step": 78
+    },
+    {
+      "epoch": 0.05332433344583193,
+      "grad_norm": 0.0001312433450948447,
+      "learning_rate": 5.667127441981162e-05,
+      "loss": 46.0,
+      "step": 79
+    },
+    {
+      "epoch": 0.0539993250084374,
+      "grad_norm": 0.00020814283925574273,
+      "learning_rate": 5.606781698611879e-05,
+      "loss": 46.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.05467431657104286,
+      "grad_norm": 0.00017544587899465114,
+      "learning_rate": 5.5459966771698096e-05,
+      "loss": 46.0,
+      "step": 81
+    },
+    {
+      "epoch": 0.05534930813364833,
+      "grad_norm": 0.00021598087914753705,
+      "learning_rate": 5.4847889956728834e-05,
+      "loss": 46.0,
+      "step": 82
+    },
+    {
+      "epoch": 0.0560242996962538,
+      "grad_norm": 0.00022435332357417792,
+      "learning_rate": 5.423175387690067e-05,
+      "loss": 46.0,
+      "step": 83
+    },
+    {
+      "epoch": 0.056699291258859266,
+      "grad_norm": 0.00025759119307622313,
+      "learning_rate": 5.361172697766573e-05,
+      "loss": 46.0,
+      "step": 84
+    },
+    {
+      "epoch": 0.05737428282146473,
+      "grad_norm": 0.00027369000599719584,
+      "learning_rate": 5.298797876818735e-05,
+      "loss": 46.0,
+      "step": 85
+    },
+    {
+      "epoch": 0.0580492743840702,
+      "grad_norm": 0.00021083872707094997,
+      "learning_rate": 5.23606797749979e-05,
+      "loss": 46.0,
+      "step": 86
+    },
+    {
+      "epoch": 0.058724265946675666,
+      "grad_norm": 0.00017781296628527343,
+      "learning_rate": 5.17300014953786e-05,
+      "loss": 46.0,
+      "step": 87
+    },
+    {
+      "epoch": 0.059399257509281135,
+      "grad_norm": 0.0002701023768167943,
+      "learning_rate": 5.109611635047379e-05,
+      "loss": 46.0,
+      "step": 88
+    },
+    {
+      "epoch": 0.060074249071886604,
+      "grad_norm": 0.00020761927589774132,
+      "learning_rate": 5.04591976381528e-05,
+      "loss": 46.0,
+      "step": 89
+    },
+    {
+      "epoch": 0.060749240634492066,
+      "grad_norm": 0.0002119742421200499,
+      "learning_rate": 4.981941948563197e-05,
+      "loss": 46.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.061424232197097535,
+      "grad_norm": 0.0003119312459602952,
+      "learning_rate": 4.9176956801870065e-05,
+      "loss": 46.0,
+      "step": 91
+    },
+    {
+      "epoch": 0.062099223759703004,
+      "grad_norm": 0.00029339047614485025,
+      "learning_rate": 4.853198522974988e-05,
+      "loss": 46.0,
+      "step": 92
+    },
+    {
+      "epoch": 0.06277421532230847,
+      "grad_norm": 0.00024452415527775884,
+      "learning_rate": 4.788468109805921e-05,
+      "loss": 46.0,
+      "step": 93
+    },
+    {
+      "epoch": 0.06344920688491394,
+      "grad_norm": 0.00024892069632187486,
+      "learning_rate": 4.7235221373284407e-05,
+      "loss": 46.0,
+      "step": 94
+    },
+    {
+      "epoch": 0.0641241984475194,
+      "grad_norm": 0.00032684431062079966,
+      "learning_rate": 4.658378361122936e-05,
+      "loss": 46.0,
+      "step": 95
+    },
+    {
+      "epoch": 0.06479919001012488,
+      "grad_norm": 0.00033167307265102863,
+      "learning_rate": 4.593054590847368e-05,
+      "loss": 46.0,
+      "step": 96
+    },
+    {
+      "epoch": 0.06547418157273034,
+      "grad_norm": 0.00021435694361571223,
+      "learning_rate": 4.5275686853682765e-05,
+      "loss": 46.0,
+      "step": 97
+    },
+    {
+      "epoch": 0.0661491731353358,
+      "grad_norm": 0.00033286900725215673,
+      "learning_rate": 4.4619385478783456e-05,
+      "loss": 46.0,
+      "step": 98
+    },
+    {
+      "epoch": 0.06682416469794128,
+      "grad_norm": 0.0003271261812187731,
+      "learning_rate": 4.396182121001852e-05,
+      "loss": 46.0,
+      "step": 99
+    },
+    {
+      "epoch": 0.06749915626054674,
+      "grad_norm": 0.0008189138607122004,
+      "learning_rate": 4.33031738188933e-05,
+      "loss": 46.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.06749915626054674,
+      "eval_loss": 11.5,
+      "eval_runtime": 11.1305,
+      "eval_samples_per_second": 224.16,
+      "eval_steps_per_second": 56.062,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 16079231385600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null