eddysang commited on
Commit
eadbdb9
·
verified ·
1 Parent(s): 37b99d0

Training in progress, step 54, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795d9bc010f9024cfc519a932bbf5d9f224c5305587a440075604f0e93b50adf
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a3673ac6ecfcf9fbd4e8a74b81927275b26bd0684e838caa39b7caf422cf88
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87e3fea581a136135d89bf36c83792eff24f24efbc1064d189e71138ec095b3f
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6817fbbcd33480f164784483673185908595dff99afaaa869a566aa99376d9a
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2c838b63f3004ffa4c11a6eaefac6d22bf6e2f78bfb1531b43e7aa35aacfbd1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f07b4f2bcc3f569f306e9b6554b0fe04785f073abde4e0109bc3d3d5eb765c2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b665a2618254d0693455029a117143eb612684fd8f287fa7207d501afc785521
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f68b4a4ba4603d307a8be653dd70e693a9f3506ea1af5f24d2d241dc54cfcb1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.968392737054472,
5
  "eval_steps": 50,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -330,6 +330,77 @@
330
  "learning_rate": 0.00011666776747647015,
331
  "loss": 32.9791,
332
  "step": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  }
334
  ],
335
  "logging_steps": 1,
@@ -349,7 +420,7 @@
349
  "attributes": {}
350
  }
351
  },
352
- "total_flos": 2.3666203419475968e+17,
353
  "train_batch_size": 2,
354
  "trial_name": null,
355
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1620712844653664,
5
  "eval_steps": 50,
6
+ "global_step": 54,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
330
  "learning_rate": 0.00011666776747647015,
331
  "loss": 32.9791,
332
  "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.9899125756556826,
336
+ "grad_norm": 22.867385864257812,
337
+ "learning_rate": 0.00011418739235369615,
338
+ "loss": 23.7823,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 1.0114324142568931,
343
+ "grad_norm": 18.280826568603516,
344
+ "learning_rate": 0.00011164659311227163,
345
+ "loss": 32.527,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 1.0329522528581037,
350
+ "grad_norm": 19.427658081054688,
351
+ "learning_rate": 0.00010904928748046599,
352
+ "loss": 27.8595,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 1.054472091459314,
357
+ "grad_norm": 19.467891693115234,
358
+ "learning_rate": 0.0001063994803153071,
359
+ "loss": 26.26,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 1.0759919300605245,
364
+ "grad_norm": 16.39354705810547,
365
+ "learning_rate": 0.00010370125742738173,
366
+ "loss": 24.7449,
367
+ "step": 50
368
+ },
369
+ {
370
+ "epoch": 1.0759919300605245,
371
+ "eval_loss": 0.8254633545875549,
372
+ "eval_runtime": 26.532,
373
+ "eval_samples_per_second": 5.917,
374
+ "eval_steps_per_second": 2.978,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.097511768661735,
379
+ "grad_norm": 27.119155883789062,
380
+ "learning_rate": 0.00010095877928081196,
381
+ "loss": 25.2238,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 1.1190316072629456,
386
+ "grad_norm": 21.737436294555664,
387
+ "learning_rate": 9.817627457812105e-05,
388
+ "loss": 24.4688,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 1.1405514458641561,
393
+ "grad_norm": 18.230609893798828,
394
+ "learning_rate": 9.535803373988056e-05,
395
+ "loss": 19.7777,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 1.1620712844653664,
400
+ "grad_norm": 17.096773147583008,
401
+ "learning_rate": 9.25084022891929e-05,
402
+ "loss": 26.5253,
403
+ "step": 54
404
  }
405
  ],
406
  "logging_steps": 1,
 
420
  "attributes": {}
421
  }
422
  },
423
+ "total_flos": 2.839944410337116e+17,
424
  "train_batch_size": 2,
425
  "trial_name": null,
426
  "trial_params": null