lesso18 commited on
Commit
70c136c
·
verified ·
1 Parent(s): 38907bf

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:770c5ef10e164b978041217efa00995725baee1a96e1bcab08e7c0f08a1a1142
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f59464bfc44a795ce67c13121418dcb7761ee089cb4bd4498344162bfbd975
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15b57d57833f094609f80c37c5bd1f6d794f57eed1b2568d8bd0fe6960af82d5
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c0b8266476e98e91064e6e8cb6cb63d19610892851ea71d739ed34351148dfa
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6cb22d889af96bc5968098482fe54159da994b406807fec9cba907cd974afc0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987bfc5266f94f9eaf4eb2e472c93f1ab68943e41dbe5bc6bc4a25adabfaef8c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59836f5c73dcbc1cd9e839a79f50d96177860a3d03c4212fba8b4249bf77b097
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bd3e6d855545c1432814c389e729ea8251b6467b50d9c34c2b5ab61d032f043
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6451599597930908,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.45112781954887216,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 16.363,
145
  "eval_steps_per_second": 4.091,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +215,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 2.82846296014848e+16,
176
  "train_batch_size": 4,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.3894352912902832,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.6015037593984962,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 16.363,
145
  "eval_steps_per_second": 4.091,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.48120300751879697,
150
+ "grad_norm": 6.32410192489624,
151
+ "learning_rate": 0.00014634335741888678,
152
+ "loss": 1.5461,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.5112781954887218,
157
+ "grad_norm": 3.9985594749450684,
158
+ "learning_rate": 0.00013476898507990882,
159
+ "loss": 1.1999,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.5413533834586466,
164
+ "grad_norm": 5.48227071762085,
165
+ "learning_rate": 0.00012287737989477975,
166
+ "loss": 1.088,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.5714285714285714,
171
+ "grad_norm": 6.1915602684021,
172
+ "learning_rate": 0.0001108149352020996,
173
+ "loss": 1.3135,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.6015037593984962,
178
+ "grad_norm": 8.36176586151123,
179
+ "learning_rate": 9.873014748512275e-05,
180
+ "loss": 1.4151,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.6015037593984962,
185
+ "eval_loss": 1.3894352912902832,
186
+ "eval_runtime": 8.5422,
187
+ "eval_samples_per_second": 16.389,
188
+ "eval_steps_per_second": 4.097,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 3.77128394686464e+16,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null