mlabonne commited on
Commit
6075314
·
verified ·
1 Parent(s): a66d9b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -66
app.py CHANGED
@@ -119,9 +119,9 @@ def repo_exists(repo_id: str) -> bool:
119
  def get_name(models: list[pd.Series], username: str, version=0) -> str:
120
  model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
121
  + models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
122
- + "-7B"
123
  if version > 0:
124
- model_name = model_name.split("-")[0] + f"-v{version}-7B"
125
 
126
  if repo_exists(f"{username}/{model_name}"):
127
  get_name(models, username, version+1)
@@ -144,74 +144,50 @@ def get_license(models: list[pd.Series]) -> str:
144
 
145
 
146
  def create_config(models: list[pd.Series]) -> str:
147
- slerp_config = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  models:
149
- - model: NousResearch/Meta-Llama-3-8B
150
- # No parameters necessary for base model
151
- - model: NousResearch/Meta-Llama-3-8B-Instruct
152
- parameters:
153
- density: 0.6
154
- weight: 0.5
155
- - model: mlabonne/OrpoLlama-3-8B
156
- parameters:
157
- density: 0.55
158
- weight: 0.05
159
  merge_method: dare_ties
160
- base_model: NousResearch/Meta-Llama-3-8B
161
  parameters:
162
- int8_mask: true
163
- dtype: float16
 
 
 
 
 
 
 
 
 
 
164
  """
165
- dare_config = slerp_config
166
- stock_config = slerp_config
167
- # slerp_config = f"""
168
- # slices:
169
- # - sources:
170
- # - model: {models[0]["Model"]}
171
- # layer_range: [0, 32]
172
- # - model: {models[1]["Model"]}
173
- # layer_range: [0, 32]
174
- # merge_method: slerp
175
- # base_model: {models[0]["Model"]}
176
- # parameters:
177
- # t:
178
- # - filter: self_attn
179
- # value: [0, 0.5, 0.3, 0.7, 1]
180
- # - filter: mlp
181
- # value: [1, 0.5, 0.7, 0.3, 0]
182
- # - value: 0.5
183
- # dtype: bfloat16
184
- # random_seed: 0
185
- # """
186
- # dare_config = f"""
187
- # models:
188
- # - model: mlabonne/Meta-Llama-3-8B
189
- # # No parameters necessary for base model
190
- # - model: {models[0]["Model"]}
191
- # parameters:
192
- # density: 0.53
193
- # weight: 0.5
194
- # - model: {models[1]["Model"]}
195
- # parameters:
196
- # density: 0.53
197
- # weight: 0.5
198
- # merge_method: dare_ties
199
- # base_model: mlabonne/Meta-Llama-3-8B
200
- # parameters:
201
- # int8_mask: true
202
- # dtype: bfloat16
203
- # random_seed: 0
204
- # """
205
- # stock_config = f"""
206
- # models:
207
- # - model: mlabonne/Meta-Llama-3-8B
208
- # - model: {models[0]["Model"]}
209
- # - model: {models[1]["Model"]}
210
- # merge_method: model_stock
211
- # base_model: mlabonne/Meta-Llama-3-8B
212
- # dtype: bfloat16
213
- # """
214
- yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
215
 
216
  with open('config.yaml', 'w', encoding="utf-8") as f:
217
  f.write(yaml_config)
 
119
  def get_name(models: list[pd.Series], username: str, version=0) -> str:
120
  model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
121
  + models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
122
+ + "-8B"
123
  if version > 0:
124
+ model_name = model_name.split("-")[0] + f"-v{version}-8B"
125
 
126
  if repo_exists(f"{username}/{model_name}"):
127
  get_name(models, username, version+1)
 
144
 
145
 
146
  def create_config(models: list[pd.Series]) -> str:
147
+ slerp_config = f"""
148
+ slices:
149
+ - sources:
150
+ - model: {models[0]["Model"]}
151
+ layer_range: [0, 32]
152
+ - model: {models[1]["Model"]}
153
+ layer_range: [0, 32]
154
+ merge_method: slerp
155
+ base_model: {models[0]["Model"]}
156
+ parameters:
157
+ t:
158
+ - filter: self_attn
159
+ value: [0, 0.5, 0.3, 0.7, 1]
160
+ - filter: mlp
161
+ value: [1, 0.5, 0.7, 0.3, 0]
162
+ - value: 0.5
163
+ dtype: bfloat16
164
+ random_seed: 0
165
+ """
166
+ dare_config = f"""
167
  models:
168
+ - model: {models[0]["Model"]}
169
+ # No parameters necessary for base model
170
+ - model: {models[1]["Model"]}
171
+ parameters:
172
+ density: 0.53
173
+ weight: 0.6
 
 
 
 
174
  merge_method: dare_ties
175
+ base_model: {models[0]["Model"]}
176
  parameters:
177
+ int8_mask: true
178
+ dtype: bfloat16
179
+ random_seed: 0
180
+ """
181
+ stock_config = f"""
182
+ models:
183
+ - model: mistralai/Mistral-7B-v0.1
184
+ - model: {models[0]["Model"]}
185
+ - model: {models[1]["Model"]}
186
+ merge_method: model_stock
187
+ base_model: mistralai/Mistral-7B-v0.1
188
+ dtype: bfloat16
189
  """
190
+ yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.3, 0.6, 0.1], k=1)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  with open('config.yaml', 'w', encoding="utf-8") as f:
193
  f.write(yaml_config)