baby-dev commited on
Commit
df95424
·
verified ·
1 Parent(s): 4782746

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4e9ba1929ee972b77bcd72d468a37fe2ae415207ef7e59323b5fbe61463f4e4
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daaff03669a032b7c022a482789b2f3c995b01f72ad79b3cc8068b8f1e434c60
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6af29e74f68512be973befebf6448552f1b28a5dcab1e75907db8bfa13fa2ff3
3
  size 170920532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c0ce3f9ee64b0c869751a3b334d7eed8318d1c81a574dbeacadb98868be199f
3
  size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab3d18275216c730a64e666d0a0b15c9be1210bce446ef49cbf8dbc3a28973b2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5b7fc8ba333afed8a1a8bd04eb95e00d2bd168f67ada07b2962cd70cb734ce
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a64b03d0773f91f522e81c4f703747e71530a9ddde23207251d238f2ace6db2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02db3dbfb390a8f0700b2ea5550de43719bf1ee696229ad5e263b02923655626
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.30838942527771,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.06826404532732609,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -125,6 +125,28 @@
125
  "eval_samples_per_second": 35.116,
126
  "eval_steps_per_second": 8.779,
127
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  }
129
  ],
130
  "logging_steps": 50,
@@ -139,7 +161,7 @@
139
  "early_stopping_threshold": 0.0
140
  },
141
  "attributes": {
142
- "early_stopping_patience_counter": 0
143
  }
144
  },
145
  "TrainerControl": {
@@ -153,7 +175,7 @@
153
  "attributes": {}
154
  }
155
  },
156
- "total_flos": 1.76813166821376e+17,
157
  "train_batch_size": 4,
158
  "trial_name": null,
159
  "trial_params": null
 
1
  {
2
  "best_metric": 2.30838942527771,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.08191685439279131,
5
  "eval_steps": 100,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
125
  "eval_samples_per_second": 35.116,
126
  "eval_steps_per_second": 8.779,
127
  "step": 500
128
+ },
129
+ {
130
+ "epoch": 0.07509044986005871,
131
+ "grad_norm": 115.34134674072266,
132
+ "learning_rate": 0.00025,
133
+ "loss": 8.8665,
134
+ "step": 550
135
+ },
136
+ {
137
+ "epoch": 0.08191685439279131,
138
+ "grad_norm": 85.31719207763672,
139
+ "learning_rate": 0.00025,
140
+ "loss": 9.0255,
141
+ "step": 600
142
+ },
143
+ {
144
+ "epoch": 0.08191685439279131,
145
+ "eval_loss": 2.3584725856781006,
146
+ "eval_runtime": 175.5365,
147
+ "eval_samples_per_second": 35.138,
148
+ "eval_steps_per_second": 8.784,
149
+ "step": 600
150
  }
151
  ],
152
  "logging_steps": 50,
 
161
  "early_stopping_threshold": 0.0
162
  },
163
  "attributes": {
164
+ "early_stopping_patience_counter": 1
165
  }
166
  },
167
  "TrainerControl": {
 
175
  "attributes": {}
176
  }
177
  },
178
+ "total_flos": 2.121758001856512e+17,
179
  "train_batch_size": 4,
180
  "trial_name": null,
181
  "trial_params": null