lesso01 commited on
Commit
9a98b43
·
verified ·
1 Parent(s): 746742c

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e2446240d353c1f64cd3e199db9012e63178e7af9c88c8e7b4eb27f0e98311f
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85f011a34046b6096ee0ee36b5807fb5ce0698cb170d9ea1d69b6e2bea5a5cb2
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4b86cf7e073f97eabad82028f7649b86b9c8f16c4ae8ca878f05af20e6145f9
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f420036f34db53076133388a78fceb9c0431ee2247317b43b2c379e1b74ca75
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17e33bc2ae45877b03713d1b54e9f0e817e6bd75f767e38ef69da10a08dd2c60
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7f2990010235b504c68672f1e5f14dd3dc9391974bcae7dca89c49b46c5863
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd7cf500208b0b7f84d388a948be38cab3fa49ec9e5ed4521e775b2c1f3ef453
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:debca7aba43b980bfea4b7bfb85075c00dd93d0ca0b163523a96099f24c727ea
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.31679707765579224,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.13303769401330376,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 12.445,
145
  "eval_steps_per_second": 3.118,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +215,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 3.77448931786752e+16,
176
  "train_batch_size": 4,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3108082711696625,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.17738359201773837,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 12.445,
145
  "eval_steps_per_second": 3.118,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.1419068736141907,
150
+ "grad_norm": 0.9246871471405029,
151
+ "learning_rate": 0.00017279364993403443,
152
+ "loss": 0.312,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.15077605321507762,
157
+ "grad_norm": 2.4459915161132812,
158
+ "learning_rate": 0.00016774762593906525,
159
+ "loss": 0.3171,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.15964523281596452,
164
+ "grad_norm": 1.1200841665267944,
165
+ "learning_rate": 0.00016237397827022866,
166
+ "loss": 0.2871,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.16851441241685144,
171
+ "grad_norm": 1.3566011190414429,
172
+ "learning_rate": 0.00015669888679881007,
173
+ "loss": 0.171,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.17738359201773837,
178
+ "grad_norm": 1.6793667078018188,
179
+ "learning_rate": 0.00015075,
180
+ "loss": 0.1994,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.17738359201773837,
185
+ "eval_loss": 0.3108082711696625,
186
+ "eval_runtime": 38.1848,
187
+ "eval_samples_per_second": 12.44,
188
+ "eval_steps_per_second": 3.116,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 5.015876915743949e+16,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null