eddysang commited on
Commit
9f4aab9
·
verified ·
1 Parent(s): fe7bf58

Training in progress, step 68, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b14a95c7f959dcbd54f89ee9ea99cc1dbf6d6e00e3dd380e71efa60af12ee96
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e86ee6e154e49069091d893455757e17295ee85019f64a663e52903b19cfc1cf
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e15873f91766aa485609b54a44fa78a3d3396e31ce52599854df27197085e2a
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1a75388d3fb482b018a16ca66ce2bf8d2b482576fc3daf3f841c59f4c236cb
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeaac88ea211375a529b1fd2970f1cc397730f5d948fdb30bae28e4d8b193fb4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051417fc01d33b78aa641f032182ab64da1fcc15fe5a876899f334dcea2dc357
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c86702c0a3caad6c51746e54805a7289de03dff9cc5abc148a58966cf1f4d339
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4e77fd2a3bb3f08929494d77da2f57f8781f91a45852bcf8f71a5777dd088c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.17278983589200636,
5
  "eval_steps": 50,
6
- "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -380,6 +380,125 @@
380
  "learning_rate": 0.00013928754755265842,
381
  "loss": 10.3514,
382
  "step": 51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  }
384
  ],
385
  "logging_steps": 1,
@@ -399,7 +518,7 @@
399
  "attributes": {}
400
  }
401
  },
402
- "total_flos": 21825075412992.0,
403
  "train_batch_size": 2,
404
  "trial_name": null,
405
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23038644785600848,
5
  "eval_steps": 50,
6
+ "global_step": 68,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
380
  "learning_rate": 0.00013928754755265842,
381
  "loss": 10.3514,
382
  "step": 51
383
+ },
384
+ {
385
+ "epoch": 0.17617787188988884,
386
+ "grad_norm": 0.06112508103251457,
387
+ "learning_rate": 0.00013860360721173193,
388
+ "loss": 10.3511,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 0.1795659078877713,
393
+ "grad_norm": 0.06208924576640129,
394
+ "learning_rate": 0.0001379002925959068,
395
+ "loss": 10.3505,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 0.18295394388565378,
400
+ "grad_norm": 0.057701293379068375,
401
+ "learning_rate": 0.0001371778179416281,
402
+ "loss": 10.3505,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 0.18634197988353626,
407
+ "grad_norm": 0.050447553396224976,
408
+ "learning_rate": 0.00013643640332167438,
409
+ "loss": 10.349,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 0.18973001588141875,
414
+ "grad_norm": 0.05121852830052376,
415
+ "learning_rate": 0.00013567627457812106,
416
+ "loss": 10.3496,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 0.19311805187930123,
421
+ "grad_norm": 0.04376755282282829,
422
+ "learning_rate": 0.00013489766325354695,
423
+ "loss": 10.3488,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 0.19650608787718368,
428
+ "grad_norm": 0.04570882394909859,
429
+ "learning_rate": 0.00013410080652050412,
430
+ "loss": 10.3493,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 0.19989412387506617,
435
+ "grad_norm": 0.047347478568553925,
436
+ "learning_rate": 0.0001332859471092728,
437
+ "loss": 10.35,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 0.20328215987294865,
442
+ "grad_norm": 0.048713624477386475,
443
+ "learning_rate": 0.00013245333323392333,
444
+ "loss": 10.3484,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 0.20667019587083113,
449
+ "grad_norm": 0.04044219106435776,
450
+ "learning_rate": 0.0001316032185167079,
451
+ "loss": 10.3485,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 0.2100582318687136,
456
+ "grad_norm": 0.03352762386202812,
457
+ "learning_rate": 0.00013073586191080457,
458
+ "loss": 10.3494,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 0.2134462678665961,
463
+ "grad_norm": 0.03678755834698677,
464
+ "learning_rate": 0.00012985152762143778,
465
+ "loss": 10.3492,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 0.21683430386447855,
470
+ "grad_norm": 0.035951532423496246,
471
+ "learning_rate": 0.00012895048502539882,
472
+ "loss": 10.3475,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 0.22022233986236103,
477
+ "grad_norm": 0.04304710775613785,
478
+ "learning_rate": 0.00012803300858899104,
479
+ "loss": 10.349,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 0.22361037586024352,
484
+ "grad_norm": 0.035194698721170425,
485
+ "learning_rate": 0.0001270993777844248,
486
+ "loss": 10.3468,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 0.226998411858126,
491
+ "grad_norm": 0.029510466381907463,
492
+ "learning_rate": 0.0001261498770046874,
493
+ "loss": 10.3484,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 0.23038644785600848,
498
+ "grad_norm": 0.02603858895599842,
499
+ "learning_rate": 0.00012518479547691435,
500
+ "loss": 10.3485,
501
+ "step": 68
502
  }
503
  ],
504
  "logging_steps": 1,
 
518
  "attributes": {}
519
  }
520
  },
521
+ "total_flos": 29122375581696.0,
522
  "train_batch_size": 2,
523
  "trial_name": null,
524
  "trial_params": null