mrferr3t commited on
Commit
dae6de3
·
verified ·
1 Parent(s): 717e9aa

Training in progress, step 99, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,8 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "Wqkv",
24
  "out_proj",
 
25
  "layer"
26
  ],
27
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "out_proj",
24
+ "Wqkv",
25
  "layer"
26
  ],
27
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff3dba4ded2fdf628b0c9bd810333742a4ada5e95901002fce7ac0ae675ad29
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802a01c1ed23b73188902d952f4f15ea954fee5824fd44125d5ad95cd36945ef
3
  size 5752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3599abb1c1bbafb5cf23e547962c6be67eff25d97cbb4fc116adeba928bd6c31
3
  size 15814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee63e5cf5d7064f73e2b1c77aada83b8d7d19728003dd8eb4163a75837779be
3
  size 15814
last-checkpoint/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.30745341614906835,
5
  "eval_steps": 50,
6
  "global_step": 99,
7
  "is_hyper_param_search": false,
@@ -9,45 +9,45 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.003105590062111801,
13
  "eval_loss": 11.5,
14
- "eval_runtime": 0.794,
15
- "eval_samples_per_second": 341.322,
16
- "eval_steps_per_second": 21.411,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 0.062111801242236024,
21
- "grad_norm": 5.816050361318048e-06,
22
  "learning_rate": 0.0005,
23
  "loss": 11.5,
24
  "step": 20
25
  },
26
  {
27
- "epoch": 0.12422360248447205,
28
- "grad_norm": 5.217343641561456e-06,
29
  "learning_rate": 0.00042501051864235636,
30
  "loss": 11.5,
31
  "step": 40
32
  },
33
  {
34
- "epoch": 0.15527950310559005,
35
  "eval_loss": 11.5,
36
- "eval_runtime": 0.9798,
37
- "eval_samples_per_second": 276.6,
38
- "eval_steps_per_second": 17.351,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.18633540372670807,
43
- "grad_norm": 6.795597983000334e-06,
44
  "learning_rate": 0.00024502945308373244,
45
  "loss": 11.5,
46
  "step": 60
47
  },
48
  {
49
- "epoch": 0.2484472049689441,
50
- "grad_norm": 7.33325032342691e-06,
51
  "learning_rate": 6.803029740762648e-05,
52
  "loss": 11.5,
53
  "step": 80
@@ -70,8 +70,8 @@
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 3944729935872.0,
74
- "train_batch_size": 16,
75
  "trial_name": null,
76
  "trial_params": null
77
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.15396578538102643,
5
  "eval_steps": 50,
6
  "global_step": 99,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0015552099533437014,
13
  "eval_loss": 11.5,
14
+ "eval_runtime": 0.9095,
15
+ "eval_samples_per_second": 297.968,
16
+ "eval_steps_per_second": 37.383,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 0.03110419906687403,
21
+ "grad_norm": 6.635740191995865e-06,
22
  "learning_rate": 0.0005,
23
  "loss": 11.5,
24
  "step": 20
25
  },
26
  {
27
+ "epoch": 0.06220839813374806,
28
+ "grad_norm": 1.2428505215211771e-05,
29
  "learning_rate": 0.00042501051864235636,
30
  "loss": 11.5,
31
  "step": 40
32
  },
33
  {
34
+ "epoch": 0.07776049766718507,
35
  "eval_loss": 11.5,
36
+ "eval_runtime": 0.8873,
37
+ "eval_samples_per_second": 305.428,
38
+ "eval_steps_per_second": 38.319,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.09331259720062209,
43
+ "grad_norm": 1.2873988453065977e-05,
44
  "learning_rate": 0.00024502945308373244,
45
  "loss": 11.5,
46
  "step": 60
47
  },
48
  {
49
+ "epoch": 0.12441679626749612,
50
+ "grad_norm": 1.2784214959538076e-05,
51
  "learning_rate": 6.803029740762648e-05,
52
  "loss": 11.5,
53
  "step": 80
 
70
  "attributes": {}
71
  }
72
  },
73
+ "total_flos": 1972364967936.0,
74
+ "train_batch_size": 8,
75
  "trial_name": null,
76
  "trial_params": null
77
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ca3e4660f8a5a3e2a0a6131d1b2056bbc7e9740103969e0f2d007680d31942d
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6d97d90dddbe76304ec413367b35d93d0004cea3ab128ff75bc30a5753a0c7
3
  size 6776