mrferr3t commited on
Commit
1792c96
·
verified ·
1 Parent(s): bbb9c89

Training in progress, step 5130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b21cc99de8ca5ddf9e4a922930cda76a97a0aba3cdf76c251e6e26f5d0f0e15
3
  size 25271744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ff0f17e9477554b95d7b8b9645bf8881d7d66d8bb60aa0203c8588ec6b7a65
3
  size 25271744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8d66ebcaa30fc938bfc1d874a3106ca9c13c743d65a1b90fa7c19b688b7bdec
3
  size 13685836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646a634e95194ed4bb6e177b3c72ff5ec73777bb24cbd8064bdcb3dfe4244cc4
3
  size 13685836
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac53cece13c5c94f6e5521c6b047f880cf2350ff33fd76420fac360bdf2e3085
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf2deb0bfd772b0af3d54bfa96b905de9797cd61d86ad22f3775dbbd3b6e070
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:510274deb14f6db23dde92d3051a600a5a7f0ba46cdb8379aec74de70e9c85df
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b96d7c5488649967d63309339d02de27cab5657995c3fb8c7e46217d2d189b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.700386643409729,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-4788",
4
- "epoch": 2.095675788594109,
5
  "eval_steps": 114,
6
- "global_step": 5016,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -675,6 +675,21 @@
675
  "eval_samples_per_second": 88.057,
676
  "eval_steps_per_second": 2.752,
677
  "step": 5016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
  }
679
  ],
680
  "logging_steps": 114,
@@ -689,7 +704,7 @@
689
  "early_stopping_threshold": 0.0
690
  },
691
  "attributes": {
692
- "early_stopping_patience_counter": 2
693
  }
694
  },
695
  "TrainerControl": {
@@ -698,12 +713,12 @@
698
  "should_evaluate": false,
699
  "should_log": false,
700
  "should_save": true,
701
- "should_training_stop": false
702
  },
703
  "attributes": {}
704
  }
705
  },
706
- "total_flos": 9.569305802330604e+17,
707
  "train_batch_size": 16,
708
  "trial_name": null,
709
  "trial_params": null
 
1
  {
2
  "best_metric": 0.700386643409729,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-4788",
4
+ "epoch": 2.14330478378943,
5
  "eval_steps": 114,
6
+ "global_step": 5130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
675
  "eval_samples_per_second": 88.057,
676
  "eval_steps_per_second": 2.752,
677
  "step": 5016
678
+ },
679
+ {
680
+ "epoch": 2.14330478378943,
681
+ "grad_norm": 0.4279311001300812,
682
+ "learning_rate": 0.00039956372993929837,
683
+ "loss": 0.6282,
684
+ "step": 5130
685
+ },
686
+ {
687
+ "epoch": 2.14330478378943,
688
+ "eval_loss": 0.7085138559341431,
689
+ "eval_runtime": 46.1621,
690
+ "eval_samples_per_second": 87.323,
691
+ "eval_steps_per_second": 2.73,
692
+ "step": 5130
693
  }
694
  ],
695
  "logging_steps": 114,
 
704
  "early_stopping_threshold": 0.0
705
  },
706
  "attributes": {
707
+ "early_stopping_patience_counter": 3
708
  }
709
  },
710
  "TrainerControl": {
 
713
  "should_evaluate": false,
714
  "should_log": false,
715
  "should_save": true,
716
+ "should_training_stop": true
717
  },
718
  "attributes": {}
719
  }
720
  },
721
+ "total_flos": 9.78979848758231e+17,
722
  "train_batch_size": 16,
723
  "trial_name": null,
724
  "trial_params": null