oldiday commited on
Commit
073c492
·
verified ·
1 Parent(s): a10f63a

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7f918c26d69f33331d37b75b868e141ca18d1cdd93f49fde57032a9c8ee4c15
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9894f4e130ceba8e360f75920d312b51e983d2f01a2cd31d0cd1b331577f06ce
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44afee9e69378b5db64658d6fbab28fed6f648f9009b03a36a1c8afab726f091
3
  size 51418196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183195870a6827892ba1e7e0f681ac6e54431d35097d3ada59699d70826c5b08
3
  size 51418196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92841b692d803cc88356b7ad5ae13429ba7f74f4dcb983a5058e2275b1e31d52
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c46d4aa8a39b760feb2ece556f0543b4b9f6c3fd4315fd4ddccefb37d238482f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:596785cc644037bdf9b1374ba5340995054de5f4bde563878d8bc4f03a7aa10e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d10d0fa96665f6b4af4824faec3d1d9f4e8b4343723a14d86cab932da6ce3225
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8176446557044983,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.176522506619594,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 22.059,
145
  "eval_steps_per_second": 5.515,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +215,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 1.010387192339497e+17,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7979318499565125,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.23536334215945867,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 22.059,
145
  "eval_steps_per_second": 5.515,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.18829067372756694,
150
+ "grad_norm": 0.7749770283699036,
151
+ "learning_rate": 0.0001697631521134985,
152
+ "loss": 5.9457,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.20005884083553988,
157
+ "grad_norm": 0.843367338180542,
158
+ "learning_rate": 0.00016585113790650388,
159
+ "loss": 4.737,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.2118270079435128,
164
+ "grad_norm": 0.7175513505935669,
165
+ "learning_rate": 0.0001617524614946192,
166
+ "loss": 2.8268,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.22359517505148574,
171
+ "grad_norm": 0.8358870148658752,
172
+ "learning_rate": 0.0001574787410214407,
173
+ "loss": 1.3233,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.23536334215945867,
178
+ "grad_norm": 2.047827959060669,
179
+ "learning_rate": 0.00015304209081197425,
180
+ "loss": 1.4239,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.23536334215945867,
185
+ "eval_loss": 0.7979318499565125,
186
+ "eval_runtime": 64.8822,
187
+ "eval_samples_per_second": 22.071,
188
+ "eval_steps_per_second": 5.518,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 1.3447268011081728e+17,
219
  "train_batch_size": 8,
220
  "trial_name": null,
221
  "trial_params": null