Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41cbd358074cc5b581cd252db8c0d9a2d04eb4e2cc2199e3899d1da41b212437
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0a96a2e7cf953b933f7821f6c6fe2ec6cc1df94a1c5fe701bc8d0c1480a161f
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d29f8f7904570f58aba7557a31b9c31cb1f21c19953cc44073ebe8f7b802c98e
 size 41119636

 version https://git-lfs.github.com/spec/v1
+oid sha256:c804bba2dd8a410c88ebd927b70bfc7182d9cfb0d6a36bf9c4fc2dd815eb07d6
 size 41119636

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4a14ba56da41c309efd4baba06a9fcd7effa4fdeee89840dfb88b7ec4f0fedd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:955fc11d95973ad3e55aafab2e3c2f9e1ac3dd672ace2bf41cd1f3359c6778ae
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9e02dc10b7239989ab9b4418ee704e53fad611ad6b77ad633028bb8eb5238dd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fc7800513a1b4dd006c457152c700dd768bb49ee4ed8e4d9665a4e42095b054
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0060779189205616,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 26.551,
       "eval_steps_per_second": 13.276,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -392,7 +750,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9283870679629824.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0121558378411232,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.551,
       "eval_steps_per_second": 13.276,
       "step": 50
+    },
+    {
+      "epoch": 0.006199477298972832,
+      "grad_norm": 2.2631587982177734,
+      "learning_rate": 0.00017788772787621126,
+      "loss": 9.446,
+      "step": 51
+    },
+    {
+      "epoch": 0.006321035677384064,
+      "grad_norm": 4.675655364990234,
+      "learning_rate": 0.00017684011108568592,
+      "loss": 9.3449,
+      "step": 52
+    },
+    {
+      "epoch": 0.006442594055795296,
+      "grad_norm": 1.7152106761932373,
+      "learning_rate": 0.0001757714869760335,
+      "loss": 9.475,
+      "step": 53
+    },
+    {
+      "epoch": 0.006564152434206528,
+      "grad_norm": 1.5290554761886597,
+      "learning_rate": 0.0001746821476984154,
+      "loss": 9.117,
+      "step": 54
+    },
+    {
+      "epoch": 0.00668571081261776,
+      "grad_norm": 2.0921902656555176,
+      "learning_rate": 0.00017357239106731317,
+      "loss": 9.5546,
+      "step": 55
+    },
+    {
+      "epoch": 0.006807269191028992,
+      "grad_norm": 2.1920764446258545,
+      "learning_rate": 0.00017244252047910892,
+      "loss": 8.953,
+      "step": 56
+    },
+    {
+      "epoch": 0.0069288275694402235,
+      "grad_norm": 1.7566354274749756,
+      "learning_rate": 0.00017129284482913972,
+      "loss": 9.6787,
+      "step": 57
+    },
+    {
+      "epoch": 0.0070503859478514555,
+      "grad_norm": 1.4819419384002686,
+      "learning_rate": 0.00017012367842724887,
+      "loss": 7.3843,
+      "step": 58
+    },
+    {
+      "epoch": 0.007171944326262687,
+      "grad_norm": 1.6789450645446777,
+      "learning_rate": 0.0001689353409118566,
+      "loss": 9.802,
+      "step": 59
+    },
+    {
+      "epoch": 0.007293502704673919,
+      "grad_norm": 1.282571792602539,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 7.7288,
+      "step": 60
+    },
+    {
+      "epoch": 0.007415061083085151,
+      "grad_norm": 2.066798448562622,
+      "learning_rate": 0.0001665024572113848,
+      "loss": 9.3806,
+      "step": 61
+    },
+    {
+      "epoch": 0.007536619461496383,
+      "grad_norm": 1.7693638801574707,
+      "learning_rate": 0.00016525857615241687,
+      "loss": 9.6393,
+      "step": 62
+    },
+    {
+      "epoch": 0.007658177839907615,
+      "grad_norm": 1.7502071857452393,
+      "learning_rate": 0.00016399685405033167,
+      "loss": 8.3708,
+      "step": 63
+    },
+    {
+      "epoch": 0.007779736218318848,
+      "grad_norm": 2.8502800464630127,
+      "learning_rate": 0.0001627176358473537,
+      "loss": 8.8738,
+      "step": 64
+    },
+    {
+      "epoch": 0.00790129459673008,
+      "grad_norm": 2.4103214740753174,
+      "learning_rate": 0.0001614212712689668,
+      "loss": 9.5765,
+      "step": 65
+    },
+    {
+      "epoch": 0.008022852975141312,
+      "grad_norm": 1.8917535543441772,
+      "learning_rate": 0.00016010811472830252,
+      "loss": 9.4486,
+      "step": 66
+    },
+    {
+      "epoch": 0.008144411353552544,
+      "grad_norm": 1.5890737771987915,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 8.5677,
+      "step": 67
+    },
+    {
+      "epoch": 0.008265969731963776,
+      "grad_norm": 2.1370444297790527,
+      "learning_rate": 0.00015743286626829437,
+      "loss": 9.4762,
+      "step": 68
+    },
+    {
+      "epoch": 0.008387528110375008,
+      "grad_norm": 1.7275190353393555,
+      "learning_rate": 0.0001560715057351673,
+      "loss": 8.829,
+      "step": 69
+    },
+    {
+      "epoch": 0.00850908648878624,
+      "grad_norm": 1.7048512697219849,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 8.8259,
+      "step": 70
+    },
+    {
+      "epoch": 0.008630644867197472,
+      "grad_norm": 1.903066635131836,
+      "learning_rate": 0.0001533031728727994,
+      "loss": 9.6929,
+      "step": 71
+    },
+    {
+      "epoch": 0.008752203245608704,
+      "grad_norm": 1.5013515949249268,
+      "learning_rate": 0.00015189695737812152,
+      "loss": 8.1273,
+      "step": 72
+    },
+    {
+      "epoch": 0.008873761624019936,
+      "grad_norm": 1.6483365297317505,
+      "learning_rate": 0.0001504765537734844,
+      "loss": 8.8742,
+      "step": 73
+    },
+    {
+      "epoch": 0.008995320002431167,
+      "grad_norm": 1.8312402963638306,
+      "learning_rate": 0.00014904235038305083,
+      "loss": 8.7848,
+      "step": 74
+    },
+    {
+      "epoch": 0.0091168783808424,
+      "grad_norm": 1.3816986083984375,
+      "learning_rate": 0.00014759473930370736,
+      "loss": 8.2036,
+      "step": 75
+    },
+    {
+      "epoch": 0.009238436759253631,
+      "grad_norm": 2.2364020347595215,
+      "learning_rate": 0.0001461341162978688,
+      "loss": 10.4628,
+      "step": 76
+    },
+    {
+      "epoch": 0.009359995137664863,
+      "grad_norm": 1.573431134223938,
+      "learning_rate": 0.00014466088068528068,
+      "loss": 8.8974,
+      "step": 77
+    },
+    {
+      "epoch": 0.009481553516076095,
+      "grad_norm": 2.187255859375,
+      "learning_rate": 0.00014317543523384928,
+      "loss": 8.5312,
+      "step": 78
+    },
+    {
+      "epoch": 0.009603111894487327,
+      "grad_norm": 2.0174520015716553,
+      "learning_rate": 0.00014167818604952906,
+      "loss": 8.829,
+      "step": 79
+    },
+    {
+      "epoch": 0.00972467027289856,
+      "grad_norm": 1.8616173267364502,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 8.3692,
+      "step": 80
+    },
+    {
+      "epoch": 0.009846228651309791,
+      "grad_norm": 1.8460533618927002,
+      "learning_rate": 0.00013864991692924523,
+      "loss": 8.6283,
+      "step": 81
+    },
+    {
+      "epoch": 0.009967787029721023,
+      "grad_norm": 2.5459718704223633,
+      "learning_rate": 0.00013711972489182208,
+      "loss": 8.4909,
+      "step": 82
+    },
+    {
+      "epoch": 0.010089345408132255,
+      "grad_norm": 1.9989314079284668,
+      "learning_rate": 0.00013557938469225167,
+      "loss": 8.9659,
+      "step": 83
+    },
+    {
+      "epoch": 0.010210903786543487,
+      "grad_norm": 1.3886724710464478,
+      "learning_rate": 0.00013402931744416433,
+      "loss": 9.1106,
+      "step": 84
+    },
+    {
+      "epoch": 0.010332462164954719,
+      "grad_norm": 2.354243040084839,
+      "learning_rate": 0.00013246994692046836,
+      "loss": 9.2569,
+      "step": 85
+    },
+    {
+      "epoch": 0.01045402054336595,
+      "grad_norm": 2.085102081298828,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 8.9907,
+      "step": 86
+    },
+    {
+      "epoch": 0.010575578921777183,
+      "grad_norm": 1.2773852348327637,
+      "learning_rate": 0.0001293250037384465,
+      "loss": 8.7438,
+      "step": 87
+    },
+    {
+      "epoch": 0.010697137300188415,
+      "grad_norm": 1.6545124053955078,
+      "learning_rate": 0.00012774029087618446,
+      "loss": 8.7085,
+      "step": 88
+    },
+    {
+      "epoch": 0.010818695678599647,
+      "grad_norm": 2.1138100624084473,
+      "learning_rate": 0.00012614799409538198,
+      "loss": 8.8577,
+      "step": 89
+    },
+    {
+      "epoch": 0.010940254057010879,
+      "grad_norm": 1.8220653533935547,
+      "learning_rate": 0.00012454854871407994,
+      "loss": 9.198,
+      "step": 90
+    },
+    {
+      "epoch": 0.011061812435422112,
+      "grad_norm": 1.6138783693313599,
+      "learning_rate": 0.00012294239200467516,
+      "loss": 8.9333,
+      "step": 91
+    },
+    {
+      "epoch": 0.011183370813833344,
+      "grad_norm": 1.6465097665786743,
+      "learning_rate": 0.0001213299630743747,
+      "loss": 9.123,
+      "step": 92
+    },
+    {
+      "epoch": 0.011304929192244576,
+      "grad_norm": 1.7768160104751587,
+      "learning_rate": 0.00011971170274514802,
+      "loss": 8.6597,
+      "step": 93
+    },
+    {
+      "epoch": 0.011426487570655808,
+      "grad_norm": 1.6465460062026978,
+      "learning_rate": 0.000118088053433211,
+      "loss": 9.841,
+      "step": 94
+    },
+    {
+      "epoch": 0.01154804594906704,
+      "grad_norm": 1.3596290349960327,
+      "learning_rate": 0.00011645945902807341,
+      "loss": 8.5273,
+      "step": 95
+    },
+    {
+      "epoch": 0.011669604327478272,
+      "grad_norm": 1.612764596939087,
+      "learning_rate": 0.0001148263647711842,
+      "loss": 8.4275,
+      "step": 96
+    },
+    {
+      "epoch": 0.011791162705889504,
+      "grad_norm": 2.3744266033172607,
+      "learning_rate": 0.00011318921713420691,
+      "loss": 9.6372,
+      "step": 97
+    },
+    {
+      "epoch": 0.011912721084300736,
+      "grad_norm": 1.4123613834381104,
+      "learning_rate": 0.00011154846369695863,
+      "loss": 8.4314,
+      "step": 98
+    },
+    {
+      "epoch": 0.012034279462711968,
+      "grad_norm": 1.9357587099075317,
+      "learning_rate": 0.0001099045530250463,
+      "loss": 8.7244,
+      "step": 99
+    },
+    {
+      "epoch": 0.0121558378411232,
+      "grad_norm": 1.6507443189620972,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 10.0314,
+      "step": 100
+    },
+    {
+      "epoch": 0.0121558378411232,
+      "eval_loss": 2.20096755027771,
+      "eval_runtime": 130.4185,
+      "eval_samples_per_second": 26.561,
+      "eval_steps_per_second": 13.28,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.873061628346368e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null