Training in progress, step 700, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:271a1bc981038a2e9a2be89f59b086109d0acdc161e99c620f8d27a304cd854a
 size 556856304

 version https://git-lfs.github.com/spec/v1
+oid sha256:80abaf385de52296916718e681935c6768a0a32c4d5289ee386f17e1664b163f
 size 556856304

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3d999bd034df420c2d4748e53b52965d2c7e75c692a0b66bf826ca3f10ee1c8
 size 21599316

 version https://git-lfs.github.com/spec/v1
+oid sha256:580ac5ff6f77821425e037ad3c74c0bfd112859b028a2db428feb4839651d2af
 size 21599316

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfdf4d2c7d06a45608d3aefb664ef7c0b1e7c1768d6d28419ae2acb17d46d34a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:486b6099b25fd8543d7c61a0c8411551172858931459e611b5edfdb1e356f30a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:344239db3127022ab4555da8a93e03326ad98987159e0d995028c1aed64245ed
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7650a95ad72c1e724156e425d1136ef779a743435f328ed6c165002f9e288809
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.012315504272532637,
   "eval_steps": 50,
-  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4669,6 +4669,364 @@
       "eval_samples_per_second": 40.017,
       "eval_steps_per_second": 20.009,
       "step": 650
     }
   ],
   "logging_steps": 1,
@@ -4688,7 +5046,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.93432181850112e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.013262850755035147,
   "eval_steps": 50,
+  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 40.017,
       "eval_steps_per_second": 20.009,
       "step": 650
+    },
+    {
+      "epoch": 0.012334451202182687,
+      "grad_norm": 0.13303719460964203,
+      "learning_rate": 0.0001999999994930954,
+      "loss": 0.0354,
+      "step": 651
+    },
+    {
+      "epoch": 0.012353398131832737,
+      "grad_norm": 0.15215599536895752,
+      "learning_rate": 0.00019999999949151252,
+      "loss": 0.0312,
+      "step": 652
+    },
+    {
+      "epoch": 0.012372345061482787,
+      "grad_norm": 0.12846222519874573,
+      "learning_rate": 0.00019999999948992723,
+      "loss": 0.0323,
+      "step": 653
+    },
+    {
+      "epoch": 0.012391291991132838,
+      "grad_norm": 0.621687650680542,
+      "learning_rate": 0.00019999999948833947,
+      "loss": 0.0343,
+      "step": 654
+    },
+    {
+      "epoch": 0.012410238920782888,
+      "grad_norm": 0.7231767773628235,
+      "learning_rate": 0.00019999999948674924,
+      "loss": 0.0284,
+      "step": 655
+    },
+    {
+      "epoch": 0.012429185850432938,
+      "grad_norm": 0.07879934459924698,
+      "learning_rate": 0.0001999999994851565,
+      "loss": 0.0335,
+      "step": 656
+    },
+    {
+      "epoch": 0.012448132780082987,
+      "grad_norm": 0.08739863336086273,
+      "learning_rate": 0.00019999999948356134,
+      "loss": 0.0412,
+      "step": 657
+    },
+    {
+      "epoch": 0.012467079709733037,
+      "grad_norm": 0.14491000771522522,
+      "learning_rate": 0.00019999999948196369,
+      "loss": 0.0265,
+      "step": 658
+    },
+    {
+      "epoch": 0.012486026639383087,
+      "grad_norm": 0.09429813921451569,
+      "learning_rate": 0.0001999999994803636,
+      "loss": 0.0246,
+      "step": 659
+    },
+    {
+      "epoch": 0.012504973569033137,
+      "grad_norm": 0.282419353723526,
+      "learning_rate": 0.000199999999478761,
+      "loss": 0.0805,
+      "step": 660
+    },
+    {
+      "epoch": 0.012523920498683188,
+      "grad_norm": 0.1799023151397705,
+      "learning_rate": 0.00019999999947715598,
+      "loss": 0.0264,
+      "step": 661
+    },
+    {
+      "epoch": 0.012542867428333238,
+      "grad_norm": 0.18289320170879364,
+      "learning_rate": 0.00019999999947554846,
+      "loss": 0.0493,
+      "step": 662
+    },
+    {
+      "epoch": 0.012561814357983288,
+      "grad_norm": 0.23105517029762268,
+      "learning_rate": 0.00019999999947393845,
+      "loss": 0.0388,
+      "step": 663
+    },
+    {
+      "epoch": 0.012580761287633338,
+      "grad_norm": 0.19387099146842957,
+      "learning_rate": 0.00019999999947232602,
+      "loss": 0.0294,
+      "step": 664
+    },
+    {
+      "epoch": 0.012599708217283389,
+      "grad_norm": 0.07087501883506775,
+      "learning_rate": 0.00019999999947071108,
+      "loss": 0.0219,
+      "step": 665
+    },
+    {
+      "epoch": 0.012618655146933439,
+      "grad_norm": 0.3370932936668396,
+      "learning_rate": 0.00019999999946909372,
+      "loss": 0.0639,
+      "step": 666
+    },
+    {
+      "epoch": 0.012637602076583489,
+      "grad_norm": 0.22018681466579437,
+      "learning_rate": 0.00019999999946747387,
+      "loss": 0.0441,
+      "step": 667
+    },
+    {
+      "epoch": 0.01265654900623354,
+      "grad_norm": 0.06425543874502182,
+      "learning_rate": 0.00019999999946585155,
+      "loss": 0.0174,
+      "step": 668
+    },
+    {
+      "epoch": 0.01267549593588359,
+      "grad_norm": 0.26767414808273315,
+      "learning_rate": 0.00019999999946422676,
+      "loss": 0.0542,
+      "step": 669
+    },
+    {
+      "epoch": 0.01269444286553364,
+      "grad_norm": 0.19884276390075684,
+      "learning_rate": 0.0001999999994625995,
+      "loss": 0.0294,
+      "step": 670
+    },
+    {
+      "epoch": 0.01271338979518369,
+      "grad_norm": 0.17157883942127228,
+      "learning_rate": 0.0001999999994609698,
+      "loss": 0.0324,
+      "step": 671
+    },
+    {
+      "epoch": 0.01273233672483374,
+      "grad_norm": 0.07747308164834976,
+      "learning_rate": 0.0001999999994593376,
+      "loss": 0.0285,
+      "step": 672
+    },
+    {
+      "epoch": 0.01275128365448379,
+      "grad_norm": 0.06676504760980606,
+      "learning_rate": 0.00019999999945770295,
+      "loss": 0.0238,
+      "step": 673
+    },
+    {
+      "epoch": 0.01277023058413384,
+      "grad_norm": 0.3494616448879242,
+      "learning_rate": 0.00019999999945606584,
+      "loss": 0.0369,
+      "step": 674
+    },
+    {
+      "epoch": 0.012789177513783891,
+      "grad_norm": 0.3873502016067505,
+      "learning_rate": 0.00019999999945442625,
+      "loss": 0.0668,
+      "step": 675
+    },
+    {
+      "epoch": 0.012808124443433941,
+      "grad_norm": 0.12122321873903275,
+      "learning_rate": 0.00019999999945278417,
+      "loss": 0.034,
+      "step": 676
+    },
+    {
+      "epoch": 0.012827071373083991,
+      "grad_norm": 0.15738695859909058,
+      "learning_rate": 0.00019999999945113965,
+      "loss": 0.0318,
+      "step": 677
+    },
+    {
+      "epoch": 0.012846018302734042,
+      "grad_norm": 0.24829640984535217,
+      "learning_rate": 0.00019999999944949267,
+      "loss": 0.0497,
+      "step": 678
+    },
+    {
+      "epoch": 0.012864965232384092,
+      "grad_norm": 0.26685649156570435,
+      "learning_rate": 0.0001999999994478432,
+      "loss": 0.053,
+      "step": 679
+    },
+    {
+      "epoch": 0.012883912162034142,
+      "grad_norm": 0.21460634469985962,
+      "learning_rate": 0.00019999999944619127,
+      "loss": 0.0323,
+      "step": 680
+    },
+    {
+      "epoch": 0.012902859091684192,
+      "grad_norm": 0.29949328303337097,
+      "learning_rate": 0.0001999999994445369,
+      "loss": 0.0359,
+      "step": 681
+    },
+    {
+      "epoch": 0.012921806021334243,
+      "grad_norm": 0.22662204504013062,
+      "learning_rate": 0.00019999999944288,
+      "loss": 0.0463,
+      "step": 682
+    },
+    {
+      "epoch": 0.012940752950984293,
+      "grad_norm": 0.164027601480484,
+      "learning_rate": 0.0001999999994412207,
+      "loss": 0.0387,
+      "step": 683
+    },
+    {
+      "epoch": 0.012959699880634343,
+      "grad_norm": 0.12347234785556793,
+      "learning_rate": 0.00019999999943955888,
+      "loss": 0.0301,
+      "step": 684
+    },
+    {
+      "epoch": 0.012978646810284393,
+      "grad_norm": 0.35990050435066223,
+      "learning_rate": 0.00019999999943789463,
+      "loss": 0.0402,
+      "step": 685
+    },
+    {
+      "epoch": 0.012997593739934444,
+      "grad_norm": 0.16936954855918884,
+      "learning_rate": 0.00019999999943622792,
+      "loss": 0.0449,
+      "step": 686
+    },
+    {
+      "epoch": 0.013016540669584494,
+      "grad_norm": 0.2638076841831207,
+      "learning_rate": 0.0001999999994345587,
+      "loss": 0.0564,
+      "step": 687
+    },
+    {
+      "epoch": 0.013035487599234544,
+      "grad_norm": 0.2737369239330292,
+      "learning_rate": 0.00019999999943288703,
+      "loss": 0.0477,
+      "step": 688
+    },
+    {
+      "epoch": 0.013054434528884594,
+      "grad_norm": 0.14391621947288513,
+      "learning_rate": 0.0001999999994312129,
+      "loss": 0.0374,
+      "step": 689
+    },
+    {
+      "epoch": 0.013073381458534645,
+      "grad_norm": 0.049424514174461365,
+      "learning_rate": 0.00019999999942953628,
+      "loss": 0.021,
+      "step": 690
+    },
+    {
+      "epoch": 0.013092328388184695,
+      "grad_norm": 0.22283080220222473,
+      "learning_rate": 0.00019999999942785723,
+      "loss": 0.045,
+      "step": 691
+    },
+    {
+      "epoch": 0.013111275317834745,
+      "grad_norm": 0.15707112848758698,
+      "learning_rate": 0.00019999999942617568,
+      "loss": 0.0307,
+      "step": 692
+    },
+    {
+      "epoch": 0.013130222247484795,
+      "grad_norm": 0.18966948986053467,
+      "learning_rate": 0.0001999999994244917,
+      "loss": 0.045,
+      "step": 693
+    },
+    {
+      "epoch": 0.013149169177134845,
+      "grad_norm": 0.06199893355369568,
+      "learning_rate": 0.00019999999942280522,
+      "loss": 0.0229,
+      "step": 694
+    },
+    {
+      "epoch": 0.013168116106784896,
+      "grad_norm": 0.10518538951873779,
+      "learning_rate": 0.00019999999942111628,
+      "loss": 0.0334,
+      "step": 695
+    },
+    {
+      "epoch": 0.013187063036434946,
+      "grad_norm": 0.40504929423332214,
+      "learning_rate": 0.0001999999994194249,
+      "loss": 0.0544,
+      "step": 696
+    },
+    {
+      "epoch": 0.013206009966084996,
+      "grad_norm": 0.06708107143640518,
+      "learning_rate": 0.00019999999941773102,
+      "loss": 0.0235,
+      "step": 697
+    },
+    {
+      "epoch": 0.013224956895735046,
+      "grad_norm": 0.12683062255382538,
+      "learning_rate": 0.00019999999941603468,
+      "loss": 0.0275,
+      "step": 698
+    },
+    {
+      "epoch": 0.013243903825385097,
+      "grad_norm": 0.08281129598617554,
+      "learning_rate": 0.00019999999941433587,
+      "loss": 0.0365,
+      "step": 699
+    },
+    {
+      "epoch": 0.013262850755035147,
+      "grad_norm": 0.3280352056026459,
+      "learning_rate": 0.0001999999994126346,
+      "loss": 0.0484,
+      "step": 700
+    },
+    {
+      "epoch": 0.013262850755035147,
+      "eval_loss": 0.008949960581958294,
+      "eval_runtime": 553.8099,
+      "eval_samples_per_second": 40.127,
+      "eval_steps_per_second": 20.065,
+      "step": 700
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.46484314406912e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null