mamung commited on
Commit
184cedb
·
verified ·
1 Parent(s): e0a80cf

Training in progress, step 81, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7facd3b68821b030d42438973cc60a6f98783203c89c6af0c9df6064a91d112c
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:003736294802a15151f66f8e67f2e4706c410829601a7ecb1578474277867165
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9df98b952a993c028712d01917c33ffa810e4469add5fd029bc7022e9ce56793
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712483dcfe94bc060ec2acfd864831b36255af57775103f39f5c9eb61881b2f5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d43bd90ad476e419738deb9472ad85fd5991005a147e1627aa99867bdfc5655
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ab64d5ace66796aa9c2fa2fc4e0206d69a1eb4ef03f574f85ee8eb16a64b71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.060166083459549796,
5
  "eval_steps": 50,
6
- "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -527,6 +527,69 @@
527
  "learning_rate": 4.095071251953399e-05,
528
  "loss": 0.0,
529
  "step": 72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  }
531
  ],
532
  "logging_steps": 1,
@@ -546,7 +609,7 @@
546
  "attributes": {}
547
  }
548
  },
549
- "total_flos": 6.000598022986138e+16,
550
  "train_batch_size": 2,
551
  "trial_name": null,
552
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06768684389199352,
5
  "eval_steps": 50,
6
+ "global_step": 81,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
527
  "learning_rate": 4.095071251953399e-05,
528
  "loss": 0.0,
529
  "step": 72
530
+ },
531
+ {
532
+ "epoch": 0.0610017235075991,
533
+ "grad_norm": NaN,
534
+ "learning_rate": 3.83534068877284e-05,
535
+ "loss": 0.0,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 0.0618373635556484,
540
+ "grad_norm": NaN,
541
+ "learning_rate": 3.5812607646303834e-05,
542
+ "loss": 0.0,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 0.06267300360369771,
547
+ "grad_norm": NaN,
548
+ "learning_rate": 3.333223252352985e-05,
549
+ "loss": 0.0,
550
+ "step": 75
551
+ },
552
+ {
553
+ "epoch": 0.06350864365174701,
554
+ "grad_norm": NaN,
555
+ "learning_rate": 3.091610607806452e-05,
556
+ "loss": 0.0,
557
+ "step": 76
558
+ },
559
+ {
560
+ "epoch": 0.0643442836997963,
561
+ "grad_norm": NaN,
562
+ "learning_rate": 2.856795380176244e-05,
563
+ "loss": 0.0,
564
+ "step": 77
565
+ },
566
+ {
567
+ "epoch": 0.06517992374784562,
568
+ "grad_norm": NaN,
569
+ "learning_rate": 2.6291396375236232e-05,
570
+ "loss": 0.0,
571
+ "step": 78
572
+ },
573
+ {
574
+ "epoch": 0.06601556379589492,
575
+ "grad_norm": NaN,
576
+ "learning_rate": 2.4089944085029363e-05,
577
+ "loss": 0.0,
578
+ "step": 79
579
+ },
580
+ {
581
+ "epoch": 0.06685120384394422,
582
+ "grad_norm": NaN,
583
+ "learning_rate": 2.1966991411008938e-05,
584
+ "loss": 0.0,
585
+ "step": 80
586
+ },
587
+ {
588
+ "epoch": 0.06768684389199352,
589
+ "grad_norm": NaN,
590
+ "learning_rate": 1.99258117923236e-05,
591
+ "loss": 0.0,
592
+ "step": 81
593
  }
594
  ],
595
  "logging_steps": 1,
 
609
  "attributes": {}
610
  }
611
  },
612
+ "total_flos": 6.750672775859405e+16,
613
  "train_batch_size": 2,
614
  "trial_name": null,
615
  "trial_params": null