winnieyangwannan commited on
Commit
74be784
·
verified ·
1 Parent(s): 3825324

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "q_proj",
28
- "gate_proj",
29
  "up_proj",
30
  "o_proj",
 
 
 
31
  "k_proj",
32
- "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "up_proj",
27
  "o_proj",
28
+ "v_proj",
29
+ "q_proj",
30
+ "down_proj",
31
  "k_proj",
32
+ "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f8c90dbaaadd6c4ee44c558694e8d771de6a6989493310a3ae2a3af7c4b0ba3
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f17a98b06387f5018f383be7587b01f3cec74945ad5b1204b615110e64bba081
3
  size 167832240
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14248c03e373c71af1272e2b77ab7dfffdda213123167f4bf223d9007c9aca5c
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f3923095888de51d7b67581eb3f1d4e2720f3eb5b4c9b9193c40f5e60ecbf2
3
  size 335922386
checkpoint-100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6ad92166b7d68158335d1db022a3ca0c87b7711f464c878dd8fe3b8bf6429fa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44154687ffe134613e15a79d0d47a7995b4cae27d59b177de6fb7656559737c1
3
  size 14244
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8f79ab7cab95bdf3cc6c9b1467eef18704561fae62d7e0154da08f6db3fb3c0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa06820825157a9ccd40dd69e73309298e5d4c9dfccf4542d49d2063af57261e
3
  size 1064
checkpoint-100/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10504201680672269,
5
  "eval_steps": 50,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
@@ -9,94 +9,94 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01050420168067227,
13
- "grad_norm": 2.5582146644592285,
14
- "learning_rate": 4.98249299719888e-05,
15
- "loss": 1.6787,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.02100840336134454,
20
- "grad_norm": 0.9345070719718933,
21
- "learning_rate": 4.96498599439776e-05,
22
- "loss": 0.518,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.031512605042016806,
27
- "grad_norm": 1.6358414888381958,
28
- "learning_rate": 4.947478991596639e-05,
29
- "loss": 0.4604,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.04201680672268908,
34
- "grad_norm": 0.7778844237327576,
35
- "learning_rate": 4.9299719887955186e-05,
36
- "loss": 0.3771,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.052521008403361345,
41
- "grad_norm": 0.7006077766418457,
42
- "learning_rate": 4.912464985994398e-05,
43
- "loss": 0.3842,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.052521008403361345,
48
- "eval_loss": 0.42603224515914917,
49
- "eval_runtime": 13.5673,
50
- "eval_samples_per_second": 35.379,
51
- "eval_steps_per_second": 2.211,
52
  "step": 50
53
  },
54
  {
55
- "epoch": 0.06302521008403361,
56
- "grad_norm": 0.6415153741836548,
57
- "learning_rate": 4.8949579831932775e-05,
58
- "loss": 0.3399,
59
  "step": 60
60
  },
61
  {
62
- "epoch": 0.07352941176470588,
63
- "grad_norm": 0.6030780076980591,
64
- "learning_rate": 4.877450980392157e-05,
65
- "loss": 0.3447,
66
  "step": 70
67
  },
68
  {
69
- "epoch": 0.08403361344537816,
70
- "grad_norm": 0.688852071762085,
71
- "learning_rate": 4.859943977591036e-05,
72
- "loss": 0.3219,
73
  "step": 80
74
  },
75
  {
76
- "epoch": 0.09453781512605042,
77
- "grad_norm": 0.6371557712554932,
78
- "learning_rate": 4.8424369747899164e-05,
79
- "loss": 0.3379,
80
  "step": 90
81
  },
82
  {
83
- "epoch": 0.10504201680672269,
84
- "grad_norm": 0.7739270329475403,
85
- "learning_rate": 4.824929971988796e-05,
86
- "loss": 0.3177,
87
  "step": 100
88
  },
89
  {
90
- "epoch": 0.10504201680672269,
91
- "eval_loss": 0.3801896274089813,
92
- "eval_runtime": 13.6107,
93
- "eval_samples_per_second": 35.266,
94
- "eval_steps_per_second": 2.204,
95
  "step": 100
96
  }
97
  ],
98
  "logging_steps": 10,
99
- "max_steps": 2856,
100
  "num_input_tokens_seen": 0,
101
  "num_train_epochs": 3,
102
  "save_steps": 100,
@@ -112,7 +112,7 @@
112
  "attributes": {}
113
  }
114
  },
115
- "total_flos": 1.1886860020875264e+16,
116
  "train_batch_size": 16,
117
  "trial_name": null,
118
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1053740779768177,
5
  "eval_steps": 50,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01053740779768177,
13
+ "grad_norm": 3.070249080657959,
14
+ "learning_rate": 4.982437653670531e-05,
15
+ "loss": 1.7879,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.02107481559536354,
20
+ "grad_norm": 1.702326774597168,
21
+ "learning_rate": 4.964875307341061e-05,
22
+ "loss": 0.5567,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.03161222339304531,
27
+ "grad_norm": 1.1947294473648071,
28
+ "learning_rate": 4.947312961011591e-05,
29
+ "loss": 0.4493,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.04214963119072708,
34
+ "grad_norm": 0.9556658267974854,
35
+ "learning_rate": 4.929750614682122e-05,
36
+ "loss": 0.3728,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.05268703898840885,
41
+ "grad_norm": 0.7952510714530945,
42
+ "learning_rate": 4.9121882683526524e-05,
43
+ "loss": 0.3535,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.05268703898840885,
48
+ "eval_loss": 0.4311191439628601,
49
+ "eval_runtime": 13.6539,
50
+ "eval_samples_per_second": 35.155,
51
+ "eval_steps_per_second": 2.197,
52
  "step": 50
53
  },
54
  {
55
+ "epoch": 0.06322444678609063,
56
+ "grad_norm": 0.6962826251983643,
57
+ "learning_rate": 4.894625922023183e-05,
58
+ "loss": 0.3507,
59
  "step": 60
60
  },
61
  {
62
+ "epoch": 0.0737618545837724,
63
+ "grad_norm": 0.6941961646080017,
64
+ "learning_rate": 4.877063575693713e-05,
65
+ "loss": 0.3585,
66
  "step": 70
67
  },
68
  {
69
+ "epoch": 0.08429926238145416,
70
+ "grad_norm": 0.6864392757415771,
71
+ "learning_rate": 4.8595012293642434e-05,
72
+ "loss": 0.3496,
73
  "step": 80
74
  },
75
  {
76
+ "epoch": 0.09483667017913593,
77
+ "grad_norm": 0.7322937846183777,
78
+ "learning_rate": 4.841938883034774e-05,
79
+ "loss": 0.3295,
80
  "step": 90
81
  },
82
  {
83
+ "epoch": 0.1053740779768177,
84
+ "grad_norm": 0.6921488046646118,
85
+ "learning_rate": 4.824376536705304e-05,
86
+ "loss": 0.3357,
87
  "step": 100
88
  },
89
  {
90
+ "epoch": 0.1053740779768177,
91
+ "eval_loss": 0.39120009541511536,
92
+ "eval_runtime": 13.7031,
93
+ "eval_samples_per_second": 35.029,
94
+ "eval_steps_per_second": 2.189,
95
  "step": 100
96
  }
97
  ],
98
  "logging_steps": 10,
99
+ "max_steps": 2847,
100
  "num_input_tokens_seen": 0,
101
  "num_train_epochs": 3,
102
  "save_steps": 100,
 
112
  "attributes": {}
113
  }
114
  },
115
+ "total_flos": 1.1795573109030912e+16,
116
  "train_batch_size": 16,
117
  "trial_name": null,
118
  "trial_params": null
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5894a90f0aacad19de132730666f8b4647a0c4aa14309866a5f87d3723ce6a7
3
  size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fa9d8b62d1ebe6967a504a7decdb5eeee2bb4aac96e7414f3930f9adcff095
3
  size 5880