Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -119,9 +119,9 @@ def repo_exists(repo_id: str) -> bool:
|
|
119 |
def get_name(models: list[pd.Series], username: str, version=0) -> str:
|
120 |
model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
121 |
+ models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
122 |
-
+ "-
|
123 |
if version > 0:
|
124 |
-
model_name = model_name.split("-")[0] + f"-v{version}-
|
125 |
|
126 |
if repo_exists(f"{username}/{model_name}"):
|
127 |
get_name(models, username, version+1)
|
@@ -144,74 +144,50 @@ def get_license(models: list[pd.Series]) -> str:
|
|
144 |
|
145 |
|
146 |
def create_config(models: list[pd.Series]) -> str:
|
147 |
-
slerp_config = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
models:
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
- model: mlabonne/OrpoLlama-3-8B
|
156 |
-
parameters:
|
157 |
-
density: 0.55
|
158 |
-
weight: 0.05
|
159 |
merge_method: dare_ties
|
160 |
-
base_model:
|
161 |
parameters:
|
162 |
-
|
163 |
-
dtype:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
"""
|
165 |
-
dare_config =
|
166 |
-
stock_config = slerp_config
|
167 |
-
# slerp_config = f"""
|
168 |
-
# slices:
|
169 |
-
# - sources:
|
170 |
-
# - model: {models[0]["Model"]}
|
171 |
-
# layer_range: [0, 32]
|
172 |
-
# - model: {models[1]["Model"]}
|
173 |
-
# layer_range: [0, 32]
|
174 |
-
# merge_method: slerp
|
175 |
-
# base_model: {models[0]["Model"]}
|
176 |
-
# parameters:
|
177 |
-
# t:
|
178 |
-
# - filter: self_attn
|
179 |
-
# value: [0, 0.5, 0.3, 0.7, 1]
|
180 |
-
# - filter: mlp
|
181 |
-
# value: [1, 0.5, 0.7, 0.3, 0]
|
182 |
-
# - value: 0.5
|
183 |
-
# dtype: bfloat16
|
184 |
-
# random_seed: 0
|
185 |
-
# """
|
186 |
-
# dare_config = f"""
|
187 |
-
# models:
|
188 |
-
# - model: mlabonne/Meta-Llama-3-8B
|
189 |
-
# # No parameters necessary for base model
|
190 |
-
# - model: {models[0]["Model"]}
|
191 |
-
# parameters:
|
192 |
-
# density: 0.53
|
193 |
-
# weight: 0.5
|
194 |
-
# - model: {models[1]["Model"]}
|
195 |
-
# parameters:
|
196 |
-
# density: 0.53
|
197 |
-
# weight: 0.5
|
198 |
-
# merge_method: dare_ties
|
199 |
-
# base_model: mlabonne/Meta-Llama-3-8B
|
200 |
-
# parameters:
|
201 |
-
# int8_mask: true
|
202 |
-
# dtype: bfloat16
|
203 |
-
# random_seed: 0
|
204 |
-
# """
|
205 |
-
# stock_config = f"""
|
206 |
-
# models:
|
207 |
-
# - model: mlabonne/Meta-Llama-3-8B
|
208 |
-
# - model: {models[0]["Model"]}
|
209 |
-
# - model: {models[1]["Model"]}
|
210 |
-
# merge_method: model_stock
|
211 |
-
# base_model: mlabonne/Meta-Llama-3-8B
|
212 |
-
# dtype: bfloat16
|
213 |
-
# """
|
214 |
-
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
|
215 |
|
216 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
217 |
f.write(yaml_config)
|
|
|
119 |
def get_name(models: list[pd.Series], username: str, version=0) -> str:
|
120 |
model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
121 |
+ models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
|
122 |
+
+ "-8B"
|
123 |
if version > 0:
|
124 |
+
model_name = model_name.split("-")[0] + f"-v{version}-8B"
|
125 |
|
126 |
if repo_exists(f"{username}/{model_name}"):
|
127 |
get_name(models, username, version+1)
|
|
|
144 |
|
145 |
|
146 |
def create_config(models: list[pd.Series]) -> str:
|
147 |
+
slerp_config = f"""
|
148 |
+
slices:
|
149 |
+
- sources:
|
150 |
+
- model: {models[0]["Model"]}
|
151 |
+
layer_range: [0, 32]
|
152 |
+
- model: {models[1]["Model"]}
|
153 |
+
layer_range: [0, 32]
|
154 |
+
merge_method: slerp
|
155 |
+
base_model: {models[0]["Model"]}
|
156 |
+
parameters:
|
157 |
+
t:
|
158 |
+
- filter: self_attn
|
159 |
+
value: [0, 0.5, 0.3, 0.7, 1]
|
160 |
+
- filter: mlp
|
161 |
+
value: [1, 0.5, 0.7, 0.3, 0]
|
162 |
+
- value: 0.5
|
163 |
+
dtype: bfloat16
|
164 |
+
random_seed: 0
|
165 |
+
"""
|
166 |
+
dare_config = f"""
|
167 |
models:
|
168 |
+
- model: {models[0]["Model"]}
|
169 |
+
# No parameters necessary for base model
|
170 |
+
- model: {models[1]["Model"]}
|
171 |
+
parameters:
|
172 |
+
density: 0.53
|
173 |
+
weight: 0.6
|
|
|
|
|
|
|
|
|
174 |
merge_method: dare_ties
|
175 |
+
base_model: {models[0]["Model"]}
|
176 |
parameters:
|
177 |
+
int8_mask: true
|
178 |
+
dtype: bfloat16
|
179 |
+
random_seed: 0
|
180 |
+
"""
|
181 |
+
stock_config = f"""
|
182 |
+
models:
|
183 |
+
- model: mistralai/Mistral-7B-v0.1
|
184 |
+
- model: {models[0]["Model"]}
|
185 |
+
- model: {models[1]["Model"]}
|
186 |
+
merge_method: model_stock
|
187 |
+
base_model: mistralai/Mistral-7B-v0.1
|
188 |
+
dtype: bfloat16
|
189 |
"""
|
190 |
+
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.3, 0.6, 0.1], k=1)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
193 |
f.write(yaml_config)
|