Steven10429 commited on
Commit
01e8a68
·
verified ·
1 Parent(s): eae3852

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -46
app.py CHANGED
@@ -15,7 +15,7 @@ current_logs = []
15
 
16
  def log(msg):
17
  """追加并打印日志信息"""
18
- print(msg)
19
  current_logs.append(msg)
20
  return "\n".join(current_logs)
21
 
@@ -113,12 +113,19 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
113
  5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
114
  6. 调整合并模型嵌入层尺寸并保存
115
  """
 
116
  model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
 
117
  adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
118
- added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
119
- model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
 
 
 
120
  peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
 
121
  model = peft_model.merge_and_unload()
 
122
  model.save_pretrained(output_dir)
123
  adapter_tokenizer.save_pretrained(output_dir)
124
  return output_dir
@@ -153,7 +160,7 @@ def clone_llamacpp_and_download_build():
153
 
154
 
155
  @timeit
156
- def quantize_and_push_model(model_path, repo_id, quant_method=None):
157
  """
158
  利用 llama-cpp-python 对模型进行量化,并上传到 Hugging Face Hub。
159
  使用的量化预设:
@@ -176,37 +183,26 @@ def quantize_and_push_model(model_path, repo_id, quant_method=None):
176
  temp_gguf_path = os.path.join(model_output_dir, f"{repo_id}-f16.gguf")
177
 
178
  if not os.path.exists(temp_gguf_path):
179
- print(f"正在将模型转换为GGML格式")
180
  convert_script = os.path.join(llamacpp_dir, "convert_hf_to_gguf.py")
181
  convert_cmd = f"python {convert_script} {model_path} --outfile {temp_gguf_path}"
182
  os.system(convert_cmd)
183
  else:
184
- print(f"GGML中间文件已存在,跳过转换")
185
 
186
  # 最终文件保存在 model_output 目录下
187
  final_path = os.path.join(model_output_dir, f"{repo_id}-{quant_method}.gguf")
188
- print(f"正在进行{quant_method}量化")
189
  quantize_bin = os.path.join(llamacpp_dir, "build", "bin", "llama-quantize")
190
  quant_cmd = f"{quantize_bin} {temp_gguf_path} {final_path} {quant_method}"
191
 
192
  if not os.path.exists(final_path):
193
  os.system(quant_cmd)
194
  else:
195
- print(f"{quant_method}量化文件已存在,跳过量化")
196
  return None
197
-
198
- # 异步上传量化模型到 Hugging Face Hub
199
- api = HfApi()
200
- future = api.upload_file(
201
- file_path=final_path,
202
- repo_id=repo_id,
203
- repo_type="model",
204
- commit_message=f"Quantized {quant_method}",
205
- commit_description=f"Quantized {model_path} with {quant_method}, using llama.cpp -> {quant_cmd} ",
206
- run_as_future=True
207
- )
208
- log(f"量化模型({quant_method})上传已安排;已获得 future 对象。")
209
- return future
210
 
211
  @timeit
212
  def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf_token):
@@ -225,6 +221,10 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
225
  os.environ["HF_TOKEN"] = hf_token
226
  api = HfApi(token=hf_token)
227
  username = api.whoami()["name"]
 
 
 
 
228
  if repo_name.strip().lower() == "auto":
229
  repo_name = f"{username}/{base_model_name.split('/')[-1]}_{lora_model_name.split('/')[-1]}"
230
 
@@ -235,35 +235,18 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
235
  log("Starting model merge process...")
236
  model_path = download_and_merge_model(base_model_name, lora_model_name, output_dir, device)
237
 
238
- # 异步上传合并后的模型
239
- log("Scheduling merged model upload...")
240
- future_merge = api.upload_large_folder(
 
 
 
 
241
  folder_path=model_path,
242
  repo_id=repo_name,
243
  repo_type="model",
244
  num_workers=4,
245
- run_as_future=True
246
  )
247
-
248
- # 启动量化任务,分别使用四种模式:
249
- futures = []
250
- for quant_method in quant_methods:
251
- future = quantize_and_push_model(f"{output_dir}/model.gguf", repo_name, bits=8, quant_method=quant_method)
252
- futures.append(future)
253
- log("Background uploads are in progress; performing other tasks if needed...")
254
-
255
- log("Waiting for merged model upload to complete...")
256
- future_merge.result()
257
- log("Merged model upload completed.")
258
-
259
- for future in futures:
260
- future.result()
261
- log(f"{future.result().__name__} completed.")
262
-
263
- final_message = f"All done! Model uploaded to: https://huggingface.co/{repo_name}"
264
- log(final_message)
265
- os.environ.pop("HF_TOKEN", None)
266
- log("HF_TOKEN removed from environment variables.")
267
  return "\n".join(current_logs)
268
  except Exception as e:
269
  error_message = f"Error during processing: {e}"
@@ -285,7 +268,7 @@ def create_ui():
285
  base_model = gr.Textbox(
286
  label="Base Model Path",
287
  placeholder="e.g., Qwen/Qwen2.5-14B-Instruct",
288
- value="Qwen/Qwen2.5-7B-Instruct"
289
  )
290
  lora_model = gr.Textbox(
291
  label="LoRA Model Path",
 
15
 
16
  def log(msg):
17
  """追加并打印日志信息"""
18
+ log(msg)
19
  current_logs.append(msg)
20
  return "\n".join(current_logs)
21
 
 
113
  5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
114
  6. 调整合并模型嵌入层尺寸并保存
115
  """
116
+ log("Loading base model...")
117
  model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
118
+ log("Loading adapter tokenizer...")
119
  adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
120
+ if adapter_tokenizer.pad_token != model.config.pad_token:
121
+ log("Resizing token embeddings...")
122
+ added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
123
+ model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
124
+ log("Loading LoRA adapter...")
125
  peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
126
+ log("Merging and unloading model...")
127
  model = peft_model.merge_and_unload()
128
+ log("Saving model...")
129
  model.save_pretrained(output_dir)
130
  adapter_tokenizer.save_pretrained(output_dir)
131
  return output_dir
 
160
 
161
 
162
  @timeit
163
+ def quantize(model_path, repo_id, quant_method=None):
164
  """
165
  利用 llama-cpp-python 对模型进行量化,并上传到 Hugging Face Hub。
166
  使用的量化预设:
 
183
  temp_gguf_path = os.path.join(model_output_dir, f"{repo_id}-f16.gguf")
184
 
185
  if not os.path.exists(temp_gguf_path):
186
+ log(f"正在将模型转换为GGML格式")
187
  convert_script = os.path.join(llamacpp_dir, "convert_hf_to_gguf.py")
188
  convert_cmd = f"python {convert_script} {model_path} --outfile {temp_gguf_path}"
189
  os.system(convert_cmd)
190
  else:
191
+ log(f"GGML中间文件已存在,跳过转换")
192
 
193
  # 最终文件保存在 model_output 目录下
194
  final_path = os.path.join(model_output_dir, f"{repo_id}-{quant_method}.gguf")
195
+ log(f"正在进行{quant_method}量化")
196
  quantize_bin = os.path.join(llamacpp_dir, "build", "bin", "llama-quantize")
197
  quant_cmd = f"{quantize_bin} {temp_gguf_path} {final_path} {quant_method}"
198
 
199
  if not os.path.exists(final_path):
200
  os.system(quant_cmd)
201
  else:
202
+ log(f"{quant_method}量化文件已存在,跳过量化")
203
  return None
204
+
205
+ return final_path
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  @timeit
208
  def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf_token):
 
221
  os.environ["HF_TOKEN"] = hf_token
222
  api = HfApi(token=hf_token)
223
  username = api.whoami()["name"]
224
+
225
+ if base_model_name.strip().lower() == "auto":
226
+ adapter_config = PeftConfig.from_pretrained(lora_model_name)
227
+ base_model_name = adapter_config.base_model_name_or_path
228
  if repo_name.strip().lower() == "auto":
229
  repo_name = f"{username}/{base_model_name.split('/')[-1]}_{lora_model_name.split('/')[-1]}"
230
 
 
235
  log("Starting model merge process...")
236
  model_path = download_and_merge_model(base_model_name, lora_model_name, output_dir, device)
237
 
238
+
239
+ # 量化模型
240
+ for quant_method in quant_methods:
241
+ quantize(f"{output_dir}/model.gguf", repo_name, bits=8, quant_method=quant_method)
242
+
243
+ # 上传合并后的模型和量化模型
244
+ api.upload_large_folder(
245
  folder_path=model_path,
246
  repo_id=repo_name,
247
  repo_type="model",
248
  num_workers=4,
 
249
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  return "\n".join(current_logs)
251
  except Exception as e:
252
  error_message = f"Error during processing: {e}"
 
268
  base_model = gr.Textbox(
269
  label="Base Model Path",
270
  placeholder="e.g., Qwen/Qwen2.5-14B-Instruct",
271
+ value="Auto"
272
  )
273
  lora_model = gr.Textbox(
274
  label="LoRA Model Path",