eddysang commited on
Commit
c5a8d62
·
verified ·
1 Parent(s): bfa4e34

Training in progress, step 52, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfee01d8103d09d582705d572b496d7643cb63480e69b153f1382b5081aa8af8
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a1f7edba342dcae21bc1bcc931a8d18e8fcd5b7452808965c7181e3f8fb2f1
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec7a651830d44bcee6bff272d2302cedf7b43d75bfde70141fda080f96fb44f
3
  size 1001863522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31134d8c644da3fe4e7fe5335cea6c99d373c2c3f4bfc42b0a9f8a83ee3881e
3
  size 1001863522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d83117332e05dc7fd0876bf6486e3944c984131f2f9ab4cca3b30ddf4c26a56
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e4f84759c0d0345e3d0ec8c8f48cc65de394faca774211b25c2f484b7ca4dbc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afb9c3c0c925ea8cf32a1dcba2e2f6242aaac5236534a8df451537dd590ac10a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0083f46b3c06809c7ba2e6389d4ff484bc53254e300d739b3ff127153f94c8d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5461706783369803,
5
  "eval_steps": 50,
6
- "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -288,6 +288,105 @@
288
  "learning_rate": 0.00014223202029717776,
289
  "loss": 2.6442,
290
  "step": 39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  }
292
  ],
293
  "logging_steps": 1,
@@ -307,7 +406,7 @@
307
  "attributes": {}
308
  }
309
  },
310
- "total_flos": 3.9802418233344e+17,
311
  "train_batch_size": 2,
312
  "trial_name": null,
313
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7282275711159737,
5
  "eval_steps": 50,
6
+ "global_step": 52,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
288
  "learning_rate": 0.00014223202029717776,
289
  "loss": 2.6442,
290
  "step": 39
291
+ },
292
+ {
293
+ "epoch": 0.5601750547045952,
294
+ "grad_norm": 7.39982271194458,
295
+ "learning_rate": 0.0001414092019239907,
296
+ "loss": 1.7663,
297
+ "step": 40
298
+ },
299
+ {
300
+ "epoch": 0.57417943107221,
301
+ "grad_norm": 8.183086395263672,
302
+ "learning_rate": 0.00014054760244698884,
303
+ "loss": 1.7819,
304
+ "step": 41
305
+ },
306
+ {
307
+ "epoch": 0.5881838074398249,
308
+ "grad_norm": 6.441479682922363,
309
+ "learning_rate": 0.00013964772501600368,
310
+ "loss": 1.7133,
311
+ "step": 42
312
+ },
313
+ {
314
+ "epoch": 0.6021881838074398,
315
+ "grad_norm": 11.74283504486084,
316
+ "learning_rate": 0.00013871009513411408,
317
+ "loss": 2.4207,
318
+ "step": 43
319
+ },
320
+ {
321
+ "epoch": 0.6161925601750547,
322
+ "grad_norm": 6.086061477661133,
323
+ "learning_rate": 0.00013773526035076698,
324
+ "loss": 1.271,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 0.6301969365426696,
329
+ "grad_norm": 8.047619819641113,
330
+ "learning_rate": 0.00013672378994202423,
331
+ "loss": 1.837,
332
+ "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.6442013129102845,
336
+ "grad_norm": 11.720404624938965,
337
+ "learning_rate": 0.00013567627457812106,
338
+ "loss": 1.9628,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.6582056892778994,
343
+ "grad_norm": 7.418464660644531,
344
+ "learning_rate": 0.00013459332597853157,
345
+ "loss": 2.1906,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.6722100656455142,
350
+ "grad_norm": 5.253023147583008,
351
+ "learning_rate": 0.00013347557655474167,
352
+ "loss": 1.2249,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.6862144420131291,
357
+ "grad_norm": 6.372616291046143,
358
+ "learning_rate": 0.0001323236790409386,
359
+ "loss": 1.5337,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.700218818380744,
364
+ "grad_norm": 6.444860458374023,
365
+ "learning_rate": 0.00013113830611283258,
366
+ "loss": 1.9269,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 0.700218818380744,
371
+ "eval_loss": 0.06303766369819641,
372
+ "eval_runtime": 37.4056,
373
+ "eval_samples_per_second": 6.443,
374
+ "eval_steps_per_second": 3.235,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.7142231947483588,
379
+ "grad_norm": 6.532812118530273,
380
+ "learning_rate": 0.00012992014999483302,
381
+ "loss": 1.7148,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 0.7282275711159737,
386
+ "grad_norm": 5.408838748931885,
387
+ "learning_rate": 0.00012866992205580908,
388
+ "loss": 2.6251,
389
+ "step": 52
390
  }
391
  ],
392
  "logging_steps": 1,
 
406
  "attributes": {}
407
  }
408
  },
409
+ "total_flos": 5.3069890977792e+17,
410
  "train_batch_size": 2,
411
  "trial_name": null,
412
  "trial_params": null