Training in progress, step 85, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f124e3c2fc8ec891656fbfca5ea9b6718202104b53c2f90f6f237b8efab7e7d6
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:376b17e9e9631961076e3bbeb3993d19795f20b76c2f2005817821e13465e6e1
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34aed8fc1414c28cb971615850dd2195d1ae883d038da558c27b755343438bb1
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3727032b69e6649af9f27beaabc3fe9ad69040d535a04f1eb340b53219c80bb
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4c0dfbada95d07c631cece0f7fdca52ecc156135c62bc34b4e8b1a466156bc4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:427ad7b17c78789ef97429efbe9d6211a7d09f2d08a147aa825d611691ab1bf8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f4e77fd2a3bb3f08929494d77da2f57f8781f91a45852bcf8f71a5777dd088c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b383ad1d61ff4e9bbd86bd276c043e414782d2bb7de68ada3e289a786eb79681
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10011502185415229,
   "eval_steps": 50,
-  "global_step": 68,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -499,6 +499,125 @@
       "learning_rate": 0.00012518479547691435,
       "loss": 0.0903,
       "step": 68
     }
   ],
   "logging_steps": 1,
@@ -518,7 +637,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.176620549823529e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12514377731769036,
   "eval_steps": 50,
+  "global_step": 85,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00012518479547691435,
       "loss": 0.0903,
       "step": 68
+    },
+    {
+      "epoch": 0.10158730158730159,
+      "grad_norm": 0.1023801937699318,
+      "learning_rate": 0.00012420442717428804,
+      "loss": 0.0845,
+      "step": 69
+    },
+    {
+      "epoch": 0.10305958132045089,
+      "grad_norm": 0.10249310731887817,
+      "learning_rate": 0.00012320907072649044,
+      "loss": 0.0539,
+      "step": 70
+    },
+    {
+      "epoch": 0.10453186105360018,
+      "grad_norm": 0.10111914575099945,
+      "learning_rate": 0.0001221990293287378,
+      "loss": 0.0424,
+      "step": 71
+    },
+    {
+      "epoch": 0.10600414078674948,
+      "grad_norm": 0.16136892139911652,
+      "learning_rate": 0.00012117461064942435,
+      "loss": 0.1277,
+      "step": 72
+    },
+    {
+      "epoch": 0.10747642051989878,
+      "grad_norm": 0.13022761046886444,
+      "learning_rate": 0.00012013612673640363,
+      "loss": 0.118,
+      "step": 73
+    },
+    {
+      "epoch": 0.10894870025304808,
+      "grad_norm": 0.10115568339824677,
+      "learning_rate": 0.00011908389392193547,
+      "loss": 0.0554,
+      "step": 74
+    },
+    {
+      "epoch": 0.11042097998619738,
+      "grad_norm": 0.1352306455373764,
+      "learning_rate": 0.00011801823272632844,
+      "loss": 0.0683,
+      "step": 75
+    },
+    {
+      "epoch": 0.11189325971934667,
+      "grad_norm": 0.11654029786586761,
+      "learning_rate": 0.00011693946776030599,
+      "loss": 0.0656,
+      "step": 76
+    },
+    {
+      "epoch": 0.11336553945249597,
+      "grad_norm": 0.1405310332775116,
+      "learning_rate": 0.00011584792762612703,
+      "loss": 0.0681,
+      "step": 77
+    },
+    {
+      "epoch": 0.11483781918564527,
+      "grad_norm": 0.19620081782341003,
+      "learning_rate": 0.00011474394481749035,
+      "loss": 0.1183,
+      "step": 78
+    },
+    {
+      "epoch": 0.11631009891879457,
+      "grad_norm": 0.09413562715053558,
+      "learning_rate": 0.00011362785561825406,
+      "loss": 0.0377,
+      "step": 79
+    },
+    {
+      "epoch": 0.11778237865194387,
+      "grad_norm": 0.10567747801542282,
+      "learning_rate": 0.0001125,
+      "loss": 0.0776,
+      "step": 80
+    },
+    {
+      "epoch": 0.11925465838509317,
+      "grad_norm": 0.15690375864505768,
+      "learning_rate": 0.00011136072151847529,
+      "loss": 0.0366,
+      "step": 81
+    },
+    {
+      "epoch": 0.12072693811824246,
+      "grad_norm": 0.09006724506616592,
+      "learning_rate": 0.00011021036720894179,
+      "loss": 0.0319,
+      "step": 82
+    },
+    {
+      "epoch": 0.12219921785139176,
+      "grad_norm": 0.1135464459657669,
+      "learning_rate": 0.00010904928748046599,
+      "loss": 0.0482,
+      "step": 83
+    },
+    {
+      "epoch": 0.12367149758454106,
+      "grad_norm": 0.09596288949251175,
+      "learning_rate": 0.0001078778360091808,
+      "loss": 0.0454,
+      "step": 84
+    },
+    {
+      "epoch": 0.12514377731769036,
+      "grad_norm": 0.17637494206428528,
+      "learning_rate": 0.00010669636963055245,
+      "loss": 0.1116,
+      "step": 85
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.995021026974761e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null