pefanis27 commited on
Commit
2220e12
·
verified ·
1 Parent(s): c8ec0a0

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -12,14 +12,14 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 64,
16
  "lora_bias": false,
17
- "lora_dropout": 0.1,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
- "r": 16,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 32,
16
  "lora_bias": false,
17
+ "lora_dropout": 0.05,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
+ "r": 8,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9530a4f7189fe3a34ddd73af5e9cbbc39d01141ff2a83bc58fc0472af0f0c315
3
- size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:263a69bdb9de370405091d8f38a7af188f5c8271576941714e6e32907ca8a968
3
+ size 50365768
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.0,
3
- "eval_loss": 1.001703143119812,
4
- "eval_runtime": 16.7881,
5
- "eval_samples_per_second": 2.561,
6
- "eval_steps_per_second": 0.655,
7
- "total_flos": 3.918186203657011e+16,
8
- "train_loss": 0.8946734860412076,
9
- "train_runtime": 2550.8567,
10
- "train_samples_per_second": 2.039,
11
- "train_steps_per_second": 0.51
12
  }
 
1
  {
2
+ "epoch": 6.9523809523809526,
3
+ "eval_loss": 1.0488358736038208,
4
+ "eval_runtime": 3.908,
5
+ "eval_samples_per_second": 4.35,
6
+ "eval_steps_per_second": 2.303,
7
+ "total_flos": 6666636405768192.0,
8
+ "train_loss": 1.0272208958455962,
9
+ "train_runtime": 456.6289,
10
+ "train_samples_per_second": 1.818,
11
+ "train_steps_per_second": 0.219
12
  }
config.json CHANGED
@@ -132,7 +132,7 @@
132
  "rope_theta": 10000.0,
133
  "sliding_window": 262144,
134
  "tie_word_embeddings": false,
135
- "torch_dtype": "bfloat16",
136
  "transformers_version": "4.46.3",
137
  "use_cache": false,
138
  "vocab_size": 32064
 
132
  "rope_theta": 10000.0,
133
  "sliding_window": 262144,
134
  "tie_word_embeddings": false,
135
+ "torch_dtype": "float16",
136
  "transformers_version": "4.46.3",
137
  "use_cache": false,
138
  "vocab_size": 32064
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 9.0,
3
- "eval_loss": 1.001703143119812,
4
- "eval_runtime": 16.7881,
5
- "eval_samples_per_second": 2.561,
6
- "eval_steps_per_second": 0.655
7
  }
 
1
  {
2
+ "epoch": 6.9523809523809526,
3
+ "eval_loss": 1.0488358736038208,
4
+ "eval_runtime": 3.908,
5
+ "eval_samples_per_second": 4.35,
6
+ "eval_steps_per_second": 2.303
7
  }
runs/Jan27_07-19-08_dmlab/events.out.tfevents.1737955149.dmlab.4693.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d50650fec322040c8739e03f8b8742ee7d1adaa78118a48fc44de863cfb576
3
+ size 10681
runs/Jan27_07-26-28_dmlab/events.out.tfevents.1737955589.dmlab.6099.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9830511c021a38d82724693d0f05c2a2bca3f697ff1ccb6458c4139d8d89f34
3
+ size 11975
runs/Jan27_07-26-28_dmlab/events.out.tfevents.1737956049.dmlab.6099.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a51e8af1c39f169c39c965a77839333888a26ef622142b21e3bdd8f25f1ad6e
3
+ size 354
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 2048,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -121,7 +121,7 @@
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
- "model_max_length": 4096,
125
  "pad_token": "<unk>",
126
  "padding_side": "left",
127
  "sp_model_kwargs": {},
 
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
+ "model_max_length": 1024,
125
  "pad_token": "<unk>",
126
  "padding_side": "left",
127
  "sp_model_kwargs": {},
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.0,
3
- "total_flos": 3.918186203657011e+16,
4
- "train_loss": 0.8946734860412076,
5
- "train_runtime": 2550.8567,
6
- "train_samples_per_second": 2.039,
7
- "train_steps_per_second": 0.51
8
  }
 
1
  {
2
+ "epoch": 6.9523809523809526,
3
+ "total_flos": 6666636405768192.0,
4
+ "train_loss": 1.0272208958455962,
5
+ "train_runtime": 456.6289,
6
+ "train_samples_per_second": 1.818,
7
+ "train_steps_per_second": 0.219
8
  }
trainer_state.json CHANGED
@@ -1,162 +1,132 @@
1
  {
2
- "best_metric": 0.9914960861206055,
3
- "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-312",
4
- "epoch": 9.0,
5
  "eval_steps": 500,
6
- "global_step": 468,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "grad_norm": 0.8883442878723145,
14
- "learning_rate": 2e-05,
15
- "loss": 1.2693,
16
- "step": 52
17
  },
18
  {
19
- "epoch": 1.0,
20
- "eval_loss": 1.1401562690734863,
21
- "eval_runtime": 16.8795,
22
- "eval_samples_per_second": 2.547,
23
- "eval_steps_per_second": 0.652,
24
- "step": 52
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 0.8219994902610779,
29
- "learning_rate": 4e-05,
30
- "loss": 0.9808,
31
- "step": 104
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 1.0025322437286377,
36
- "eval_runtime": 16.7914,
37
- "eval_samples_per_second": 2.561,
38
- "eval_steps_per_second": 0.655,
39
- "step": 104
40
  },
41
  {
42
- "epoch": 3.0,
43
- "grad_norm": 0.8449307680130005,
44
- "learning_rate": 6e-05,
45
- "loss": 0.895,
46
- "step": 156
47
  },
48
  {
49
- "epoch": 3.0,
50
- "eval_loss": 1.0123087167739868,
51
- "eval_runtime": 16.7952,
52
- "eval_samples_per_second": 2.56,
53
- "eval_steps_per_second": 0.655,
54
- "step": 156
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 0.622931718826294,
59
- "learning_rate": 8e-05,
60
- "loss": 0.8651,
61
- "step": 208
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 1.0015382766723633,
66
- "eval_runtime": 16.795,
67
- "eval_samples_per_second": 2.56,
68
- "eval_steps_per_second": 0.655,
69
- "step": 208
70
  },
71
  {
72
- "epoch": 5.0,
73
- "grad_norm": 0.6589271426200867,
74
- "learning_rate": 0.0001,
75
- "loss": 0.8471,
76
- "step": 260
77
  },
78
  {
79
- "epoch": 5.0,
80
- "eval_loss": 1.0026640892028809,
81
- "eval_runtime": 16.7954,
82
- "eval_samples_per_second": 2.56,
83
- "eval_steps_per_second": 0.655,
84
- "step": 260
85
  },
86
  {
87
  "epoch": 6.0,
88
- "grad_norm": 0.7380354404449463,
89
- "learning_rate": 9.938441702975689e-05,
90
- "loss": 0.8273,
91
- "step": 312
92
  },
93
  {
94
  "epoch": 6.0,
95
- "eval_loss": 0.9914960861206055,
96
- "eval_runtime": 16.7888,
97
- "eval_samples_per_second": 2.561,
98
- "eval_steps_per_second": 0.655,
99
- "step": 312
100
- },
101
- {
102
- "epoch": 7.0,
103
- "grad_norm": 0.7154495716094971,
104
- "learning_rate": 9.755282581475769e-05,
105
- "loss": 0.8068,
106
- "step": 364
107
- },
108
- {
109
- "epoch": 7.0,
110
- "eval_loss": 0.9957849979400635,
111
- "eval_runtime": 16.7927,
112
- "eval_samples_per_second": 2.561,
113
- "eval_steps_per_second": 0.655,
114
- "step": 364
115
- },
116
- {
117
- "epoch": 8.0,
118
- "grad_norm": 0.9513155817985535,
119
- "learning_rate": 9.45503262094184e-05,
120
- "loss": 0.7883,
121
- "step": 416
122
- },
123
- {
124
- "epoch": 8.0,
125
- "eval_loss": 1.0050514936447144,
126
- "eval_runtime": 16.8033,
127
- "eval_samples_per_second": 2.559,
128
- "eval_steps_per_second": 0.655,
129
- "step": 416
130
- },
131
- {
132
- "epoch": 9.0,
133
- "grad_norm": 0.9450660347938538,
134
- "learning_rate": 9.045084971874738e-05,
135
- "loss": 0.7722,
136
- "step": 468
137
- },
138
- {
139
- "epoch": 9.0,
140
- "eval_loss": 1.008681058883667,
141
- "eval_runtime": 16.7907,
142
- "eval_samples_per_second": 2.561,
143
- "eval_steps_per_second": 0.655,
144
- "step": 468
145
- },
146
- {
147
- "epoch": 9.0,
148
- "step": 468,
149
- "total_flos": 3.918186203657011e+16,
150
- "train_loss": 0.8946734860412076,
151
- "train_runtime": 2550.8567,
152
- "train_samples_per_second": 2.039,
153
- "train_steps_per_second": 0.51
154
  }
155
  ],
156
  "logging_steps": 500,
157
- "max_steps": 1300,
158
  "num_input_tokens_seen": 0,
159
- "num_train_epochs": 25,
160
  "save_steps": 500,
161
  "stateful_callbacks": {
162
  "EarlyStoppingCallback": {
@@ -179,8 +149,8 @@
179
  "attributes": {}
180
  }
181
  },
182
- "total_flos": 3.918186203657011e+16,
183
- "train_batch_size": 4,
184
  "trial_name": null,
185
  "trial_params": null
186
  }
 
1
  {
2
+ "best_metric": 1.0488358736038208,
3
+ "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-42",
4
+ "epoch": 6.9523809523809526,
5
  "eval_steps": 500,
6
+ "global_step": 73,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9523809523809523,
13
+ "grad_norm": 2.678637981414795,
14
+ "learning_rate": 0.0001,
15
+ "loss": 1.5168,
16
+ "step": 10
17
  },
18
  {
19
+ "epoch": 0.9523809523809523,
20
+ "eval_loss": 1.3078712224960327,
21
+ "eval_runtime": 3.9056,
22
+ "eval_samples_per_second": 4.353,
23
+ "eval_steps_per_second": 2.304,
24
+ "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 2.171729564666748,
29
+ "learning_rate": 9.635919272833938e-05,
30
+ "loss": 1.0953,
31
+ "step": 21
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 1.1388882398605347,
36
+ "eval_runtime": 3.9103,
37
+ "eval_samples_per_second": 4.348,
38
+ "eval_steps_per_second": 2.302,
39
+ "step": 21
40
  },
41
  {
42
+ "epoch": 2.9523809523809526,
43
+ "grad_norm": 2.044914960861206,
44
+ "learning_rate": 8.715724127386972e-05,
45
+ "loss": 1.0287,
46
+ "step": 31
47
  },
48
  {
49
+ "epoch": 2.9523809523809526,
50
+ "eval_loss": 1.0680148601531982,
51
+ "eval_runtime": 3.9097,
52
+ "eval_samples_per_second": 4.348,
53
+ "eval_steps_per_second": 2.302,
54
+ "step": 31
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 3.0813491344451904,
59
+ "learning_rate": 7.191855733945387e-05,
60
+ "loss": 0.8858,
61
+ "step": 42
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 1.0488358736038208,
66
+ "eval_runtime": 3.9065,
67
+ "eval_samples_per_second": 4.352,
68
+ "eval_steps_per_second": 2.304,
69
+ "step": 42
70
  },
71
  {
72
+ "epoch": 4.9523809523809526,
73
+ "grad_norm": 2.399099349975586,
74
+ "learning_rate": 5.522642316338268e-05,
75
+ "loss": 0.9442,
76
+ "step": 52
77
  },
78
  {
79
+ "epoch": 4.9523809523809526,
80
+ "eval_loss": 1.0551538467407227,
81
+ "eval_runtime": 3.9027,
82
+ "eval_samples_per_second": 4.356,
83
+ "eval_steps_per_second": 2.306,
84
+ "step": 52
85
  },
86
  {
87
  "epoch": 6.0,
88
+ "grad_norm": 2.558990240097046,
89
+ "learning_rate": 3.6218132209150045e-05,
90
+ "loss": 0.8393,
91
+ "step": 63
92
  },
93
  {
94
  "epoch": 6.0,
95
+ "eval_loss": 1.0510764122009277,
96
+ "eval_runtime": 3.9324,
97
+ "eval_samples_per_second": 4.323,
98
+ "eval_steps_per_second": 2.289,
99
+ "step": 63
100
+ },
101
+ {
102
+ "epoch": 6.9523809523809526,
103
+ "grad_norm": 2.0949923992156982,
104
+ "learning_rate": 2.061073738537635e-05,
105
+ "loss": 0.9065,
106
+ "step": 73
107
+ },
108
+ {
109
+ "epoch": 6.9523809523809526,
110
+ "eval_loss": 1.0621706247329712,
111
+ "eval_runtime": 3.9116,
112
+ "eval_samples_per_second": 4.346,
113
+ "eval_steps_per_second": 2.301,
114
+ "step": 73
115
+ },
116
+ {
117
+ "epoch": 6.9523809523809526,
118
+ "step": 73,
119
+ "total_flos": 6666636405768192.0,
120
+ "train_loss": 1.0272208958455962,
121
+ "train_runtime": 456.6289,
122
+ "train_samples_per_second": 1.818,
123
+ "train_steps_per_second": 0.219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "logging_steps": 500,
127
+ "max_steps": 100,
128
  "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 10,
130
  "save_steps": 500,
131
  "stateful_callbacks": {
132
  "EarlyStoppingCallback": {
 
149
  "attributes": {}
150
  }
151
  },
152
+ "total_flos": 6666636405768192.0,
153
+ "train_batch_size": 2,
154
  "trial_name": null,
155
  "trial_params": null
156
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128eba89cd71392d5ec29707dedc993445621390f2e66243a915aa50897df7e4
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:906c3e67879a4a768ad056051772ba574f2f336ffda087c2d37e302a39428848
3
  size 5624