dixedus commited on
Commit
67b9669
·
verified ·
1 Parent(s): 0947dca

Training in progress, step 102, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be4a3e12ace7db54eb920b934f6c0ee05cc4f4a8c194adf5d5df9e3b0cdd68c9
3
  size 34793120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be35ee36755478e76a6f4ad9db283dac6d2eb1a117f15b7c878f4bd414dd2feb
3
  size 34793120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bedc55c1f8f18184593c54334179fa1f18a4e48f30fc591992b4c57dd36d69e
3
  size 18132116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26dc26a33c15a18b360aa7a53225e04ab4eaa8b9255524cb6abac713731bbb76
3
  size 18132116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c81be7588d13c4d18f0badc374dddf114640e4f47aaffe28b0dd04921466a3ad
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c0c3b8743a7e8d1ccbd11d11a82d7e1b52534db8cb37208129f7a94751cd17
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24510621ac929102038dbb9557bac0e17f0ed52c36608ebab8e700d1b1464f1d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcbdf5cce354397b1cc7dbc75ae72cd1ce74fbf84991f656a8ae8c5ec4cf6c4c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4689655172413793,
5
  "eval_steps": 17,
6
- "global_step": 85,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -251,6 +251,56 @@
251
  "eval_samples_per_second": 68.623,
252
  "eval_steps_per_second": 8.775,
253
  "step": 85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  }
255
  ],
256
  "logging_steps": 3,
@@ -270,7 +320,7 @@
270
  "attributes": {}
271
  }
272
  },
273
- "total_flos": 5403198986649600.0,
274
  "train_batch_size": 8,
275
  "trial_name": null,
276
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5627586206896552,
5
  "eval_steps": 17,
6
+ "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
251
  "eval_samples_per_second": 68.623,
252
  "eval_steps_per_second": 8.775,
253
  "step": 85
254
+ },
255
+ {
256
+ "epoch": 0.48,
257
+ "grad_norm": 0.5210072994232178,
258
+ "learning_rate": 6.466250186922325e-05,
259
+ "loss": 2.006,
260
+ "step": 87
261
+ },
262
+ {
263
+ "epoch": 0.496551724137931,
264
+ "grad_norm": 0.4956110119819641,
265
+ "learning_rate": 6.227427435703997e-05,
266
+ "loss": 2.0443,
267
+ "step": 90
268
+ },
269
+ {
270
+ "epoch": 0.5131034482758621,
271
+ "grad_norm": 0.5796875357627869,
272
+ "learning_rate": 5.985585137257401e-05,
273
+ "loss": 2.173,
274
+ "step": 93
275
+ },
276
+ {
277
+ "epoch": 0.5296551724137931,
278
+ "grad_norm": 0.5329291224479675,
279
+ "learning_rate": 5.74131823855921e-05,
280
+ "loss": 2.1224,
281
+ "step": 96
282
+ },
283
+ {
284
+ "epoch": 0.5462068965517242,
285
+ "grad_norm": 0.555596113204956,
286
+ "learning_rate": 5.495227651252315e-05,
287
+ "loss": 2.0329,
288
+ "step": 99
289
+ },
290
+ {
291
+ "epoch": 0.5627586206896552,
292
+ "grad_norm": 0.462854266166687,
293
+ "learning_rate": 5.247918773366112e-05,
294
+ "loss": 1.9292,
295
+ "step": 102
296
+ },
297
+ {
298
+ "epoch": 0.5627586206896552,
299
+ "eval_loss": 2.1433825492858887,
300
+ "eval_runtime": 4.4422,
301
+ "eval_samples_per_second": 68.66,
302
+ "eval_steps_per_second": 8.779,
303
+ "step": 102
304
  }
305
  ],
306
  "logging_steps": 3,
 
320
  "attributes": {}
321
  }
322
  },
323
+ "total_flos": 6483838783979520.0,
324
  "train_batch_size": 8,
325
  "trial_name": null,
326
  "trial_params": null