Steven10429 commited on
Commit
3a806f2
·
1 Parent(s): 0805af2
Files changed (2) hide show
  1. app.py +31 -34
  2. convert.log +0 -0
app.py CHANGED
@@ -19,16 +19,13 @@ log.setLevel(logging.INFO)
19
  log.addHandler(logging.StreamHandler())
20
  log.addHandler(logging.FileHandler("convert.log"))
21
 
22
- def log(msg):
23
- """追加并打印日志信息"""
24
- log.info(msg)
25
 
26
  def timeit(func):
27
  def wrapper(*args, **kwargs):
28
  start_time = time.time()
29
  result = func(*args, **kwargs)
30
  end_time = time.time()
31
- log(f"{func.__name__}: {end_time - start_time:.2f} s")
32
  return result
33
  return wrapper
34
 
@@ -41,35 +38,35 @@ def get_model_size_in_gb(model_name):
41
  # 使用 safetensors 大小(不假定文件扩展名)
42
  return model_info.safetensors.total / (1024 ** 3)
43
  except Exception as e:
44
- log(f"Unable to estimate model size: {e}")
45
  return 1 # 默认值
46
 
47
  @timeit
48
  def check_system_resources(model_name):
49
  """检查系统资源,决定使用 CPU 或 GPU"""
50
- log("Checking system resources...")
51
  system_memory = psutil.virtual_memory()
52
  total_memory_gb = system_memory.total / (1024 ** 3)
53
- log(f"Total system memory: {total_memory_gb:.1f}GB")
54
 
55
  model_size_gb = get_model_size_in_gb(model_name)
56
  required_memory_gb = model_size_gb * 2.5 # 预留额外内存
57
- log(f"Estimated required memory for model: {required_memory_gb:.1f}GB")
58
 
59
  if torch.cuda.is_available():
60
  gpu_name = torch.cuda.get_device_name(0)
61
  gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
62
- log(f"Detected GPU: {gpu_name} with {gpu_memory_gb:.1f}GB memory")
63
  if gpu_memory_gb >= required_memory_gb:
64
- log("✅ Sufficient GPU memory available; using GPU.")
65
  return "cuda", gpu_memory_gb
66
  else:
67
- log(f"⚠️ Insufficient GPU memory (requires {required_memory_gb:.1f}GB, found {gpu_memory_gb:.1f}GB).")
68
  else:
69
- log("❌ No GPU detected.")
70
 
71
  if total_memory_gb >= required_memory_gb:
72
- log("✅ Sufficient CPU memory available; using CPU.")
73
  return "cpu", total_memory_gb
74
  else:
75
  raise MemoryError(f"❌ Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
@@ -80,7 +77,7 @@ def setup_environment(model_name):
80
  try:
81
  device, _ = check_system_resources(model_name)
82
  except Exception as e:
83
- log(f"Resource check failed: {e}. Defaulting to CPU.")
84
  device = "cpu"
85
  return device
86
 
@@ -95,14 +92,14 @@ def create_hf_repo(repo_name, private=True):
95
  repo_name_with_index = repo_name
96
  while api.repo_exists(repo_name_with_index):
97
  retry_index += 1
98
- log(f"Repository {repo_name_with_index} exists; trying {repo_name}_{retry_index}")
99
  repo_name_with_index = f"{repo_name}_{retry_index}"
100
  repo_name = repo_name_with_index
101
  repo_url = create_repo(repo_name, private=private)
102
- log(f"Repository created successfully: {repo_url}")
103
  return repo_name
104
  except Exception as e:
105
- log(f"Failed to create repository: {e}")
106
  raise
107
 
108
  @timeit
@@ -115,18 +112,18 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
115
  5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
116
  6. 调整合并模型嵌入层尺寸并保存
117
  """
118
- log("Loading base model...")
119
  model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
120
- log("Loading adapter tokenizer...")
121
  adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
122
- log("Resizing token embeddings...")
123
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
124
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
125
- log("Loading LoRA adapter...")
126
  peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
127
- log("Merging and unloading model...")
128
  model = peft_model.merge_and_unload()
129
- log("Saving model...")
130
  model.save_pretrained(output_dir)
131
  adapter_tokenizer.save_pretrained(output_dir)
132
  return output_dir
@@ -138,10 +135,10 @@ def clone_llamacpp_and_download_build():
138
  llamacpp_dir = os.path.join(os.getcwd(), "llama.cpp")
139
 
140
  if not os.path.exists(llamacpp_dir):
141
- log(f"Cloning llama.cpp from {llamacpp_repo}...")
142
  os.system(f"git clone {llamacpp_repo} {llamacpp_dir}")
143
 
144
- log("Building llama.cpp...")
145
  build_dir = os.path.join(llamacpp_dir, "build")
146
  os.makedirs(build_dir, exist_ok=True)
147
 
@@ -155,7 +152,7 @@ def clone_llamacpp_and_download_build():
155
  os.system("cmake -B build")
156
  os.system("cmake --build build --config Release")
157
 
158
- log("llama.cpp build completed.")
159
  # 返回到原始目录
160
  os.chdir(os.path.dirname(llamacpp_dir))
161
 
@@ -186,17 +183,17 @@ def quantize(model_path, repo_id, quant_method=None):
186
  guff_16 = os.path.join(model_output_dir, f"{repo_id}-f16.gguf")
187
 
188
  if not os.path.exists(guff_16):
189
- log(f"正在将模型转换为GGML格式")
190
  convert_script = os.path.join(llamacpp_dir, "convert_hf_to_gguf.py")
191
  convert_cmd = f"python {convert_script} {model_path} --outfile {guff_16}"
192
  print(f"syscall:[{convert_cmd}]")
193
  os.system(convert_cmd)
194
  else:
195
- log(f"GGML中间文件已存在,跳过转换")
196
 
197
  # 最终文件保存在 model_output 目录下
198
  final_path = os.path.join(model_output_dir, f"{repo_id}-{quant_method}.gguf")
199
- log(f"正在进行{quant_method}量化")
200
  quantize_bin = os.path.join(llamacpp_dir, "build", "bin", "llama-quantize")
201
  quant_cmd = f"{quantize_bin} {guff_16} {final_path} {quant_method}"
202
  print(f"syscall:[{quant_cmd}]")
@@ -204,7 +201,7 @@ def quantize(model_path, repo_id, quant_method=None):
204
  if not os.path.exists(final_path):
205
  os.system(quant_cmd)
206
  else:
207
- log(f"{quant_method}量化文件已存在,跳过量化")
208
  return None
209
 
210
  return final_path
@@ -281,7 +278,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
281
  repo_name = create_hf_repo(repo_name)
282
 
283
  output_dir = os.path.join(".", "output", repo_name)
284
- log("Starting model merge process...")
285
  model_path = download_and_merge_model(base_model_name, lora_model_name, output_dir, device)
286
 
287
 
@@ -299,15 +296,15 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
299
  num_workers=os.cpu_count() if os.cpu_count() > 4 else 4,
300
  print_report_every=10,
301
  )
302
- log("Upload completed.")
303
 
304
  # rm -rf model_path
305
  shutil.rmtree(model_path)
306
- log("Removed model from local")
307
 
308
  except Exception as e:
309
  error_message = f"Error during processing: {e}"
310
- log(error_message)
311
  raise e
312
 
313
 
 
19
  log.addHandler(logging.StreamHandler())
20
  log.addHandler(logging.FileHandler("convert.log"))
21
 
 
 
 
22
 
23
  def timeit(func):
24
  def wrapper(*args, **kwargs):
25
  start_time = time.time()
26
  result = func(*args, **kwargs)
27
  end_time = time.time()
28
+ log.info(f"{func.__name__}: {end_time - start_time:.2f} s")
29
  return result
30
  return wrapper
31
 
 
38
  # 使用 safetensors 大小(不假定文件扩展名)
39
  return model_info.safetensors.total / (1024 ** 3)
40
  except Exception as e:
41
+ log.error(f"Unable to estimate model size: {e}")
42
  return 1 # 默认值
43
 
44
  @timeit
45
  def check_system_resources(model_name):
46
  """检查系统资源,决定使用 CPU 或 GPU"""
47
+ log.info("Checking system resources...")
48
  system_memory = psutil.virtual_memory()
49
  total_memory_gb = system_memory.total / (1024 ** 3)
50
+ log.info(f"Total system memory: {total_memory_gb:.1f}GB")
51
 
52
  model_size_gb = get_model_size_in_gb(model_name)
53
  required_memory_gb = model_size_gb * 2.5 # 预留额外内存
54
+ log.info(f"Estimated required memory for model: {required_memory_gb:.1f}GB")
55
 
56
  if torch.cuda.is_available():
57
  gpu_name = torch.cuda.get_device_name(0)
58
  gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
59
+ log.info(f"Detected GPU: {gpu_name} with {gpu_memory_gb:.1f}GB memory")
60
  if gpu_memory_gb >= required_memory_gb:
61
+ log.info("✅ Sufficient GPU memory available; using GPU.")
62
  return "cuda", gpu_memory_gb
63
  else:
64
+ log.warning(f"⚠️ Insufficient GPU memory (requires {required_memory_gb:.1f}GB, found {gpu_memory_gb:.1f}GB).")
65
  else:
66
+ log.error("❌ No GPU detected.")
67
 
68
  if total_memory_gb >= required_memory_gb:
69
+ log.info("✅ Sufficient CPU memory available; using CPU.")
70
  return "cpu", total_memory_gb
71
  else:
72
  raise MemoryError(f"❌ Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
 
77
  try:
78
  device, _ = check_system_resources(model_name)
79
  except Exception as e:
80
+ log.error(f"Resource check failed: {e}. Defaulting to CPU.")
81
  device = "cpu"
82
  return device
83
 
 
92
  repo_name_with_index = repo_name
93
  while api.repo_exists(repo_name_with_index):
94
  retry_index += 1
95
+ log.info(f"Repository {repo_name_with_index} exists; trying {repo_name}_{retry_index}")
96
  repo_name_with_index = f"{repo_name}_{retry_index}"
97
  repo_name = repo_name_with_index
98
  repo_url = create_repo(repo_name, private=private)
99
+ log.info(f"Repository created successfully: {repo_url}")
100
  return repo_name
101
  except Exception as e:
102
+ log.error(f"Failed to create repository: {e}")
103
  raise
104
 
105
  @timeit
 
112
  5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
113
  6. 调整合并模型嵌入层尺寸并保存
114
  """
115
+ log.info("Loading base model...")
116
  model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
117
+ log.info("Loading adapter tokenizer...")
118
  adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
119
+ log.info("Resizing token embeddings...")
120
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
121
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
122
+ log.info("Loading LoRA adapter...")
123
  peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
124
+ log.info("Merging and unloading model...")
125
  model = peft_model.merge_and_unload()
126
+ log.info("Saving model...")
127
  model.save_pretrained(output_dir)
128
  adapter_tokenizer.save_pretrained(output_dir)
129
  return output_dir
 
135
  llamacpp_dir = os.path.join(os.getcwd(), "llama.cpp")
136
 
137
  if not os.path.exists(llamacpp_dir):
138
+ log.info(f"Cloning llama.cpp from {llamacpp_repo}...")
139
  os.system(f"git clone {llamacpp_repo} {llamacpp_dir}")
140
 
141
+ log.info("Building llama.cpp...")
142
  build_dir = os.path.join(llamacpp_dir, "build")
143
  os.makedirs(build_dir, exist_ok=True)
144
 
 
152
  os.system("cmake -B build")
153
  os.system("cmake --build build --config Release")
154
 
155
+ log.info("llama.cpp build completed.")
156
  # 返回到原始目录
157
  os.chdir(os.path.dirname(llamacpp_dir))
158
 
 
183
  guff_16 = os.path.join(model_output_dir, f"{repo_id}-f16.gguf")
184
 
185
  if not os.path.exists(guff_16):
186
+ log.info(f"正在将模型转换为GGML格式")
187
  convert_script = os.path.join(llamacpp_dir, "convert_hf_to_gguf.py")
188
  convert_cmd = f"python {convert_script} {model_path} --outfile {guff_16}"
189
  print(f"syscall:[{convert_cmd}]")
190
  os.system(convert_cmd)
191
  else:
192
+ log.info(f"GGML中间文件已存在,跳过转换")
193
 
194
  # 最终文件保存在 model_output 目录下
195
  final_path = os.path.join(model_output_dir, f"{repo_id}-{quant_method}.gguf")
196
+ log.info(f"正在进行{quant_method}量化")
197
  quantize_bin = os.path.join(llamacpp_dir, "build", "bin", "llama-quantize")
198
  quant_cmd = f"{quantize_bin} {guff_16} {final_path} {quant_method}"
199
  print(f"syscall:[{quant_cmd}]")
 
201
  if not os.path.exists(final_path):
202
  os.system(quant_cmd)
203
  else:
204
+ log.info(f"{quant_method}量化文件已存在,跳过量化")
205
  return None
206
 
207
  return final_path
 
278
  repo_name = create_hf_repo(repo_name)
279
 
280
  output_dir = os.path.join(".", "output", repo_name)
281
+ log.info("Starting model merge process...")
282
  model_path = download_and_merge_model(base_model_name, lora_model_name, output_dir, device)
283
 
284
 
 
296
  num_workers=os.cpu_count() if os.cpu_count() > 4 else 4,
297
  print_report_every=10,
298
  )
299
+ log.info("Upload completed.")
300
 
301
  # rm -rf model_path
302
  shutil.rmtree(model_path)
303
+ log.info("Removed model from local")
304
 
305
  except Exception as e:
306
  error_message = f"Error during processing: {e}"
307
+ log.error(error_message)
308
  raise e
309
 
310
 
convert.log ADDED
File without changes