error577 commited on
Commit
0e3e770
·
verified ·
1 Parent(s): 65aba90

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e326cab99b9185673026663619315b66fbc144ff1c4a4cc0f73758d0a7e97df
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c6a02a38beb7dea41136c4d1a17953ef3ab09e2f49fa4c1f40e845d6fb77b7
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acb1b63fea4a66c08312e4978f73870c35ab3335f4b1705b84dcf6d655978dc5
3
  size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a65e6dfa03b98ef71b5e0aaa1ab80ad33f2c75979e27bb12757b7cfc567a33a9
3
  size 857274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f720d721c792d0a9412a85ed8fc6e2eb685bfd50ae210c270b08af2ed177b79e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:084f32ddd70fbc2c6c255d9cdf7853d5ed9830616f8380ca99fe57f35f8837a8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a75dcc5ec09eada6641b366eac390a2a47e7ec4306b94cfdb718bc9a73ac9b0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2601abf854b6be8698cb0f6fe00b6f4cb5b0fb02bcdf39c9a58443d971d2cfbf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.026412446865585408,
5
  "eval_steps": 20,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -311,6 +311,154 @@
311
  "eval_samples_per_second": 75.597,
312
  "eval_steps_per_second": 75.597,
313
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
  ],
316
  "logging_steps": 1,
@@ -330,7 +478,7 @@
330
  "attributes": {}
331
  }
332
  },
333
- "total_flos": 28561970823168.0,
334
  "train_batch_size": 1,
335
  "trial_name": null,
336
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03961867029837811,
5
  "eval_steps": 20,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
311
  "eval_samples_per_second": 75.597,
312
  "eval_steps_per_second": 75.597,
313
  "step": 40
314
+ },
315
+ {
316
+ "epoch": 0.02707275803722504,
317
+ "grad_norm": 166313.5,
318
+ "learning_rate": 0.00029162079221537,
319
+ "loss": 178.7949,
320
+ "step": 41
321
+ },
322
+ {
323
+ "epoch": 0.027733069208864678,
324
+ "grad_norm": 135891.28125,
325
+ "learning_rate": 0.0002910769889254386,
326
+ "loss": 201.0785,
327
+ "step": 42
328
+ },
329
+ {
330
+ "epoch": 0.02839338038050431,
331
+ "grad_norm": 64060.7890625,
332
+ "learning_rate": 0.0002905166296253533,
333
+ "loss": 163.5094,
334
+ "step": 43
335
+ },
336
+ {
337
+ "epoch": 0.029053691552143948,
338
+ "grad_norm": 96362.4921875,
339
+ "learning_rate": 0.0002899397800757626,
340
+ "loss": 140.6384,
341
+ "step": 44
342
+ },
343
+ {
344
+ "epoch": 0.02971400272378358,
345
+ "grad_norm": 166254.203125,
346
+ "learning_rate": 0.0002893465079725187,
347
+ "loss": 139.1684,
348
+ "step": 45
349
+ },
350
+ {
351
+ "epoch": 0.030374313895423218,
352
+ "grad_norm": 161925.5625,
353
+ "learning_rate": 0.0002887368829387333,
354
+ "loss": 140.9152,
355
+ "step": 46
356
+ },
357
+ {
358
+ "epoch": 0.031034625067062855,
359
+ "grad_norm": 637966.3125,
360
+ "learning_rate": 0.0002881109765166071,
361
+ "loss": 131.3419,
362
+ "step": 47
363
+ },
364
+ {
365
+ "epoch": 0.03169493623870249,
366
+ "grad_norm": 367775.03125,
367
+ "learning_rate": 0.00028746886215903387,
368
+ "loss": 155.0525,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 0.032355247410342125,
373
+ "grad_norm": 307120.84375,
374
+ "learning_rate": 0.00028681061522098047,
375
+ "loss": 148.0313,
376
+ "step": 49
377
+ },
378
+ {
379
+ "epoch": 0.03301555858198176,
380
+ "grad_norm": 164428.203125,
381
+ "learning_rate": 0.0002861363129506435,
382
+ "loss": 139.0605,
383
+ "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.03367586975362139,
387
+ "grad_norm": 56655.140625,
388
+ "learning_rate": 0.0002854460344803842,
389
+ "loss": 105.2498,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.03433618092526103,
394
+ "grad_norm": 110095.71875,
395
+ "learning_rate": 0.00028473986081744163,
396
+ "loss": 107.1039,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.034996492096900665,
401
+ "grad_norm": 84727.8125,
402
+ "learning_rate": 0.000284017874834426,
403
+ "loss": 114.7597,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.0356568032685403,
408
+ "grad_norm": 108558.9140625,
409
+ "learning_rate": 0.0002832801612595937,
410
+ "loss": 131.0451,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.03631711444017993,
415
+ "grad_norm": 35595.47265625,
416
+ "learning_rate": 0.0002825268066669034,
417
+ "loss": 135.1516,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.03697742561181957,
422
+ "grad_norm": 54421.08203125,
423
+ "learning_rate": 0.00028175789946585693,
424
+ "loss": 116.2731,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.037637736783459205,
429
+ "grad_norm": 72844.515625,
430
+ "learning_rate": 0.0002809735298911234,
431
+ "loss": 101.419,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.03829804795509884,
436
+ "grad_norm": 58473.41015625,
437
+ "learning_rate": 0.00028017378999195015,
438
+ "loss": 101.8432,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.03895835912673848,
443
+ "grad_norm": 41094.68359375,
444
+ "learning_rate": 0.0002793587736213603,
445
+ "loss": 114.5148,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.03961867029837811,
450
+ "grad_norm": 75345.5234375,
451
+ "learning_rate": 0.00027852857642513836,
452
+ "loss": 119.3659,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.03961867029837811,
457
+ "eval_loss": 16.175268173217773,
458
+ "eval_runtime": 6.5722,
459
+ "eval_samples_per_second": 75.317,
460
+ "eval_steps_per_second": 75.317,
461
+ "step": 60
462
  }
463
  ],
464
  "logging_steps": 1,
 
478
  "attributes": {}
479
  }
480
  },
481
+ "total_flos": 40390914736128.0,
482
  "train_batch_size": 1,
483
  "trial_name": null,
484
  "trial_params": null