tryingpro commited on
Commit
8f91ca8
·
verified ·
1 Parent(s): 0b46ac0

Training in progress, step 72, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d78a9012c6e065bcf14f3d35f286d5e7b124a0efd18f76b69fe914af3f08a98
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80200b7412a4d398d83f5e1c5b2790c427ac843ac6f8dcc4ca7165f149bbe890
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cc6e2224b3c3c885410bfc9b1f2156db041cd811531070f3aa15eff3f01dd00
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82149b96dd41c99aa2266e2fb6f3c7a68484a26112c5c82203a7dc357fdf591
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26ea915120766314e2bacd5006186d9012cb095e8e05a1dafbc048035142676f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54695bae591aceb2574c0570650d7a15033601e879a5235bc5ef9cc452369efd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79429063411e5f67f663bb03057ea480ea282e8a7a96044eb033f6b9f55143b5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f589044fe72a89eeac5f01dfea2599de04455cb66e0a4998ee4eb766326c4c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03994071300413449,
5
  "eval_steps": 8,
6
- "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -226,6 +226,35 @@
226
  "eval_samples_per_second": 9.394,
227
  "eval_steps_per_second": 4.697,
228
  "step": 64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  }
230
  ],
231
  "logging_steps": 3,
@@ -245,7 +274,7 @@
245
  "attributes": {}
246
  }
247
  },
248
- "total_flos": 3.4049229412368384e+17,
249
  "train_batch_size": 2,
250
  "trial_name": null,
251
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0449333021296513,
5
  "eval_steps": 8,
6
+ "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
226
  "eval_samples_per_second": 9.394,
227
  "eval_steps_per_second": 4.697,
228
  "step": 64
229
+ },
230
+ {
231
+ "epoch": 0.04118886028551369,
232
+ "grad_norm": 0.35130053758621216,
233
+ "learning_rate": 5.261313375270014e-05,
234
+ "loss": 0.8259,
235
+ "step": 66
236
+ },
237
+ {
238
+ "epoch": 0.043061081207582494,
239
+ "grad_norm": 0.32489344477653503,
240
+ "learning_rate": 4.12214747707527e-05,
241
+ "loss": 0.765,
242
+ "step": 69
243
+ },
244
+ {
245
+ "epoch": 0.0449333021296513,
246
+ "grad_norm": 0.3194122910499573,
247
+ "learning_rate": 3.089373510131354e-05,
248
+ "loss": 0.7716,
249
+ "step": 72
250
+ },
251
+ {
252
+ "epoch": 0.0449333021296513,
253
+ "eval_loss": 0.7537363767623901,
254
+ "eval_runtime": 574.4019,
255
+ "eval_samples_per_second": 9.398,
256
+ "eval_steps_per_second": 4.699,
257
+ "step": 72
258
  }
259
  ],
260
  "logging_steps": 3,
 
274
  "attributes": {}
275
  }
276
  },
277
+ "total_flos": 3.830538308891443e+17,
278
  "train_batch_size": 2,
279
  "trial_name": null,
280
  "trial_params": null