auxyus commited on
Commit
8a9b7e9
·
verified ·
1 Parent(s): 86fa560

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2f5dacad9e643e3111202e8da6cbb000ee0d02f836c7137abce8695d0b95e3
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d005130bbd79f63ed3576d1fb5bde54f18c3f8002c2569b44bdd45c4dfa32a2e
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00f026b1a2fd6ad0b196a5e8b6d969154739d312e4ffad36b61e1ee1004d316c
3
  size 320194002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88148e0e8f3d7719ea166dae64c36638701c6fcf4624688705dc0786ae94a247
3
  size 320194002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0233142f8a03e66bb48544179782708459094384dd42895597bcdc509d989f7e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bec849e25acd8dd93c6dc0cc1f78247dec56dc50df0766dc1fb28a024ff6966
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d618e72555a9c2fdcc9a6d8a15da7779669af7b5e00bbe111e68822be4539346
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d48e757787bf7fc889c741def8f67efb8c383ce859146812f7dbd958398696a3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.49462634325027466,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.02829121086382497,
5
  "eval_steps": 150,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -241,6 +241,119 @@
241
  "eval_samples_per_second": 23.843,
242
  "eval_steps_per_second": 5.969,
243
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
245
  ],
246
  "logging_steps": 10,
@@ -269,7 +382,7 @@
269
  "attributes": {}
270
  }
271
  },
272
- "total_flos": 2.56173970292736e+16,
273
  "train_batch_size": 4,
274
  "trial_name": null,
275
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4766067862510681,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.04243681629573746,
5
  "eval_steps": 150,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
241
  "eval_samples_per_second": 23.843,
242
  "eval_steps_per_second": 5.969,
243
  "step": 300
244
+ },
245
+ {
246
+ "epoch": 0.02923425122595247,
247
+ "grad_norm": 1.1752290725708008,
248
+ "learning_rate": 2.4554044110755066e-05,
249
+ "loss": 0.5023,
250
+ "step": 310
251
+ },
252
+ {
253
+ "epoch": 0.03017729158807997,
254
+ "grad_norm": 1.613908290863037,
255
+ "learning_rate": 2.3279186236030468e-05,
256
+ "loss": 0.452,
257
+ "step": 320
258
+ },
259
+ {
260
+ "epoch": 0.03112033195020747,
261
+ "grad_norm": 1.15360426902771,
262
+ "learning_rate": 2.2e-05,
263
+ "loss": 0.4926,
264
+ "step": 330
265
+ },
266
+ {
267
+ "epoch": 0.03206337231233497,
268
+ "grad_norm": 0.985500693321228,
269
+ "learning_rate": 2.072081376396953e-05,
270
+ "loss": 0.5447,
271
+ "step": 340
272
+ },
273
+ {
274
+ "epoch": 0.033006412674462464,
275
+ "grad_norm": 0.9483439922332764,
276
+ "learning_rate": 1.9445955889244933e-05,
277
+ "loss": 0.5562,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 0.033949453036589965,
282
+ "grad_norm": 1.3792706727981567,
283
+ "learning_rate": 1.8179740091327534e-05,
284
+ "loss": 0.5402,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 0.034892493398717465,
289
+ "grad_norm": 0.8846226334571838,
290
+ "learning_rate": 1.6926450843666314e-05,
291
+ "loss": 0.5073,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 0.035835533760844966,
296
+ "grad_norm": 1.1274725198745728,
297
+ "learning_rate": 1.569032888035602e-05,
298
+ "loss": 0.4089,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 0.03677857412297246,
303
+ "grad_norm": 1.4087094068527222,
304
+ "learning_rate": 1.447555684683529e-05,
305
+ "loss": 0.5137,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 0.03772161448509996,
310
+ "grad_norm": 1.4227954149246216,
311
+ "learning_rate": 1.3286245147138549e-05,
312
+ "loss": 0.4764,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 0.03866465484722746,
317
+ "grad_norm": 1.189937710762024,
318
+ "learning_rate": 1.2126418035589831e-05,
319
+ "loss": 0.4483,
320
+ "step": 410
321
+ },
322
+ {
323
+ "epoch": 0.03960769520935496,
324
+ "grad_norm": 1.5125129222869873,
325
+ "learning_rate": 1.1000000000000005e-05,
326
+ "loss": 0.3932,
327
+ "step": 420
328
+ },
329
+ {
330
+ "epoch": 0.040550735571482456,
331
+ "grad_norm": 1.0411937236785889,
332
+ "learning_rate": 9.910802482442268e-06,
333
+ "loss": 0.47,
334
+ "step": 430
335
+ },
336
+ {
337
+ "epoch": 0.04149377593360996,
338
+ "grad_norm": 2.2022852897644043,
339
+ "learning_rate": 8.86251098253871e-06,
340
+ "loss": 0.5126,
341
+ "step": 440
342
+ },
343
+ {
344
+ "epoch": 0.04243681629573746,
345
+ "grad_norm": 1.1059151887893677,
346
+ "learning_rate": 7.858672586896134e-06,
347
+ "loss": 0.4136,
348
+ "step": 450
349
+ },
350
+ {
351
+ "epoch": 0.04243681629573746,
352
+ "eval_loss": 0.4766067862510681,
353
+ "eval_runtime": 93.545,
354
+ "eval_samples_per_second": 23.871,
355
+ "eval_steps_per_second": 5.976,
356
+ "step": 450
357
  }
358
  ],
359
  "logging_steps": 10,
 
382
  "attributes": {}
383
  }
384
  },
385
+ "total_flos": 3.834345877929984e+16,
386
  "train_batch_size": 4,
387
  "trial_name": null,
388
  "trial_params": null