Commit
·
3a806f2
1
Parent(s):
0805af2
- app.py +31 -34
- convert.log +0 -0
app.py
CHANGED
@@ -19,16 +19,13 @@ log.setLevel(logging.INFO)
|
|
19 |
log.addHandler(logging.StreamHandler())
|
20 |
log.addHandler(logging.FileHandler("convert.log"))
|
21 |
|
22 |
-
def log(msg):
|
23 |
-
"""追加并打印日志信息"""
|
24 |
-
log.info(msg)
|
25 |
|
26 |
def timeit(func):
|
27 |
def wrapper(*args, **kwargs):
|
28 |
start_time = time.time()
|
29 |
result = func(*args, **kwargs)
|
30 |
end_time = time.time()
|
31 |
-
log(f"{func.__name__}: {end_time - start_time:.2f} s")
|
32 |
return result
|
33 |
return wrapper
|
34 |
|
@@ -41,35 +38,35 @@ def get_model_size_in_gb(model_name):
|
|
41 |
# 使用 safetensors 大小(不假定文件扩展名)
|
42 |
return model_info.safetensors.total / (1024 ** 3)
|
43 |
except Exception as e:
|
44 |
-
log(f"Unable to estimate model size: {e}")
|
45 |
return 1 # 默认值
|
46 |
|
47 |
@timeit
|
48 |
def check_system_resources(model_name):
|
49 |
"""检查系统资源,决定使用 CPU 或 GPU"""
|
50 |
-
log("Checking system resources...")
|
51 |
system_memory = psutil.virtual_memory()
|
52 |
total_memory_gb = system_memory.total / (1024 ** 3)
|
53 |
-
log(f"Total system memory: {total_memory_gb:.1f}GB")
|
54 |
|
55 |
model_size_gb = get_model_size_in_gb(model_name)
|
56 |
required_memory_gb = model_size_gb * 2.5 # 预留额外内存
|
57 |
-
log(f"Estimated required memory for model: {required_memory_gb:.1f}GB")
|
58 |
|
59 |
if torch.cuda.is_available():
|
60 |
gpu_name = torch.cuda.get_device_name(0)
|
61 |
gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
|
62 |
-
log(f"Detected GPU: {gpu_name} with {gpu_memory_gb:.1f}GB memory")
|
63 |
if gpu_memory_gb >= required_memory_gb:
|
64 |
-
log("✅ Sufficient GPU memory available; using GPU.")
|
65 |
return "cuda", gpu_memory_gb
|
66 |
else:
|
67 |
-
log(f"⚠️ Insufficient GPU memory (requires {required_memory_gb:.1f}GB, found {gpu_memory_gb:.1f}GB).")
|
68 |
else:
|
69 |
-
log("❌ No GPU detected.")
|
70 |
|
71 |
if total_memory_gb >= required_memory_gb:
|
72 |
-
log("✅ Sufficient CPU memory available; using CPU.")
|
73 |
return "cpu", total_memory_gb
|
74 |
else:
|
75 |
raise MemoryError(f"❌ Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
|
@@ -80,7 +77,7 @@ def setup_environment(model_name):
|
|
80 |
try:
|
81 |
device, _ = check_system_resources(model_name)
|
82 |
except Exception as e:
|
83 |
-
log(f"Resource check failed: {e}. Defaulting to CPU.")
|
84 |
device = "cpu"
|
85 |
return device
|
86 |
|
@@ -95,14 +92,14 @@ def create_hf_repo(repo_name, private=True):
|
|
95 |
repo_name_with_index = repo_name
|
96 |
while api.repo_exists(repo_name_with_index):
|
97 |
retry_index += 1
|
98 |
-
log(f"Repository {repo_name_with_index} exists; trying {repo_name}_{retry_index}")
|
99 |
repo_name_with_index = f"{repo_name}_{retry_index}"
|
100 |
repo_name = repo_name_with_index
|
101 |
repo_url = create_repo(repo_name, private=private)
|
102 |
-
log(f"Repository created successfully: {repo_url}")
|
103 |
return repo_name
|
104 |
except Exception as e:
|
105 |
-
log(f"Failed to create repository: {e}")
|
106 |
raise
|
107 |
|
108 |
@timeit
|
@@ -115,18 +112,18 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
|
|
115 |
5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
|
116 |
6. 调整合并模型嵌入层尺寸并保存
|
117 |
"""
|
118 |
-
log("Loading base model...")
|
119 |
model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
|
120 |
-
log("Loading adapter tokenizer...")
|
121 |
adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
|
122 |
-
log("Resizing token embeddings...")
|
123 |
added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
|
124 |
model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
|
125 |
-
log("Loading LoRA adapter...")
|
126 |
peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
|
127 |
-
log("Merging and unloading model...")
|
128 |
model = peft_model.merge_and_unload()
|
129 |
-
log("Saving model...")
|
130 |
model.save_pretrained(output_dir)
|
131 |
adapter_tokenizer.save_pretrained(output_dir)
|
132 |
return output_dir
|
@@ -138,10 +135,10 @@ def clone_llamacpp_and_download_build():
|
|
138 |
llamacpp_dir = os.path.join(os.getcwd(), "llama.cpp")
|
139 |
|
140 |
if not os.path.exists(llamacpp_dir):
|
141 |
-
log(f"Cloning llama.cpp from {llamacpp_repo}...")
|
142 |
os.system(f"git clone {llamacpp_repo} {llamacpp_dir}")
|
143 |
|
144 |
-
log("Building llama.cpp...")
|
145 |
build_dir = os.path.join(llamacpp_dir, "build")
|
146 |
os.makedirs(build_dir, exist_ok=True)
|
147 |
|
@@ -155,7 +152,7 @@ def clone_llamacpp_and_download_build():
|
|
155 |
os.system("cmake -B build")
|
156 |
os.system("cmake --build build --config Release")
|
157 |
|
158 |
-
log("llama.cpp build completed.")
|
159 |
# 返回到原始目录
|
160 |
os.chdir(os.path.dirname(llamacpp_dir))
|
161 |
|
@@ -186,17 +183,17 @@ def quantize(model_path, repo_id, quant_method=None):
|
|
186 |
guff_16 = os.path.join(model_output_dir, f"{repo_id}-f16.gguf")
|
187 |
|
188 |
if not os.path.exists(guff_16):
|
189 |
-
log(f"正在将模型转换为GGML格式")
|
190 |
convert_script = os.path.join(llamacpp_dir, "convert_hf_to_gguf.py")
|
191 |
convert_cmd = f"python {convert_script} {model_path} --outfile {guff_16}"
|
192 |
print(f"syscall:[{convert_cmd}]")
|
193 |
os.system(convert_cmd)
|
194 |
else:
|
195 |
-
log(f"GGML中间文件已存在,跳过转换")
|
196 |
|
197 |
# 最终文件保存在 model_output 目录下
|
198 |
final_path = os.path.join(model_output_dir, f"{repo_id}-{quant_method}.gguf")
|
199 |
-
log(f"正在进行{quant_method}量化")
|
200 |
quantize_bin = os.path.join(llamacpp_dir, "build", "bin", "llama-quantize")
|
201 |
quant_cmd = f"{quantize_bin} {guff_16} {final_path} {quant_method}"
|
202 |
print(f"syscall:[{quant_cmd}]")
|
@@ -204,7 +201,7 @@ def quantize(model_path, repo_id, quant_method=None):
|
|
204 |
if not os.path.exists(final_path):
|
205 |
os.system(quant_cmd)
|
206 |
else:
|
207 |
-
log(f"{quant_method}量化文件已存在,跳过量化")
|
208 |
return None
|
209 |
|
210 |
return final_path
|
@@ -281,7 +278,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
281 |
repo_name = create_hf_repo(repo_name)
|
282 |
|
283 |
output_dir = os.path.join(".", "output", repo_name)
|
284 |
-
log("Starting model merge process...")
|
285 |
model_path = download_and_merge_model(base_model_name, lora_model_name, output_dir, device)
|
286 |
|
287 |
|
@@ -299,15 +296,15 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
299 |
num_workers=os.cpu_count() if os.cpu_count() > 4 else 4,
|
300 |
print_report_every=10,
|
301 |
)
|
302 |
-
log("Upload completed.")
|
303 |
|
304 |
# rm -rf model_path
|
305 |
shutil.rmtree(model_path)
|
306 |
-
log("Removed model from local")
|
307 |
|
308 |
except Exception as e:
|
309 |
error_message = f"Error during processing: {e}"
|
310 |
-
log(error_message)
|
311 |
raise e
|
312 |
|
313 |
|
|
|
19 |
log.addHandler(logging.StreamHandler())
|
20 |
log.addHandler(logging.FileHandler("convert.log"))
|
21 |
|
|
|
|
|
|
|
22 |
|
23 |
def timeit(func):
|
24 |
def wrapper(*args, **kwargs):
|
25 |
start_time = time.time()
|
26 |
result = func(*args, **kwargs)
|
27 |
end_time = time.time()
|
28 |
+
log.info(f"{func.__name__}: {end_time - start_time:.2f} s")
|
29 |
return result
|
30 |
return wrapper
|
31 |
|
|
|
38 |
# 使用 safetensors 大小(不假定文件扩展名)
|
39 |
return model_info.safetensors.total / (1024 ** 3)
|
40 |
except Exception as e:
|
41 |
+
log.error(f"Unable to estimate model size: {e}")
|
42 |
return 1 # 默认值
|
43 |
|
44 |
@timeit
|
45 |
def check_system_resources(model_name):
|
46 |
"""检查系统资源,决定使用 CPU 或 GPU"""
|
47 |
+
log.info("Checking system resources...")
|
48 |
system_memory = psutil.virtual_memory()
|
49 |
total_memory_gb = system_memory.total / (1024 ** 3)
|
50 |
+
log.info(f"Total system memory: {total_memory_gb:.1f}GB")
|
51 |
|
52 |
model_size_gb = get_model_size_in_gb(model_name)
|
53 |
required_memory_gb = model_size_gb * 2.5 # 预留额外内存
|
54 |
+
log.info(f"Estimated required memory for model: {required_memory_gb:.1f}GB")
|
55 |
|
56 |
if torch.cuda.is_available():
|
57 |
gpu_name = torch.cuda.get_device_name(0)
|
58 |
gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
|
59 |
+
log.info(f"Detected GPU: {gpu_name} with {gpu_memory_gb:.1f}GB memory")
|
60 |
if gpu_memory_gb >= required_memory_gb:
|
61 |
+
log.info("✅ Sufficient GPU memory available; using GPU.")
|
62 |
return "cuda", gpu_memory_gb
|
63 |
else:
|
64 |
+
log.warning(f"⚠️ Insufficient GPU memory (requires {required_memory_gb:.1f}GB, found {gpu_memory_gb:.1f}GB).")
|
65 |
else:
|
66 |
+
log.error("❌ No GPU detected.")
|
67 |
|
68 |
if total_memory_gb >= required_memory_gb:
|
69 |
+
log.info("✅ Sufficient CPU memory available; using CPU.")
|
70 |
return "cpu", total_memory_gb
|
71 |
else:
|
72 |
raise MemoryError(f"❌ Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
|
|
|
77 |
try:
|
78 |
device, _ = check_system_resources(model_name)
|
79 |
except Exception as e:
|
80 |
+
log.error(f"Resource check failed: {e}. Defaulting to CPU.")
|
81 |
device = "cpu"
|
82 |
return device
|
83 |
|
|
|
92 |
repo_name_with_index = repo_name
|
93 |
while api.repo_exists(repo_name_with_index):
|
94 |
retry_index += 1
|
95 |
+
log.info(f"Repository {repo_name_with_index} exists; trying {repo_name}_{retry_index}")
|
96 |
repo_name_with_index = f"{repo_name}_{retry_index}"
|
97 |
repo_name = repo_name_with_index
|
98 |
repo_url = create_repo(repo_name, private=private)
|
99 |
+
log.info(f"Repository created successfully: {repo_url}")
|
100 |
return repo_name
|
101 |
except Exception as e:
|
102 |
+
log.error(f"Failed to create repository: {e}")
|
103 |
raise
|
104 |
|
105 |
@timeit
|
|
|
112 |
5. 求 base 与 adapter tokenizer 的词表并取并集,扩展 tokenizer
|
113 |
6. 调整合并模型嵌入层尺寸并保存
|
114 |
"""
|
115 |
+
log.info("Loading base model...")
|
116 |
model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True)
|
117 |
+
log.info("Loading adapter tokenizer...")
|
118 |
adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
|
119 |
+
log.info("Resizing token embeddings...")
|
120 |
added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
|
121 |
model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
|
122 |
+
log.info("Loading LoRA adapter...")
|
123 |
peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True)
|
124 |
+
log.info("Merging and unloading model...")
|
125 |
model = peft_model.merge_and_unload()
|
126 |
+
log.info("Saving model...")
|
127 |
model.save_pretrained(output_dir)
|
128 |
adapter_tokenizer.save_pretrained(output_dir)
|
129 |
return output_dir
|
|
|
135 |
llamacpp_dir = os.path.join(os.getcwd(), "llama.cpp")
|
136 |
|
137 |
if not os.path.exists(llamacpp_dir):
|
138 |
+
log.info(f"Cloning llama.cpp from {llamacpp_repo}...")
|
139 |
os.system(f"git clone {llamacpp_repo} {llamacpp_dir}")
|
140 |
|
141 |
+
log.info("Building llama.cpp...")
|
142 |
build_dir = os.path.join(llamacpp_dir, "build")
|
143 |
os.makedirs(build_dir, exist_ok=True)
|
144 |
|
|
|
152 |
os.system("cmake -B build")
|
153 |
os.system("cmake --build build --config Release")
|
154 |
|
155 |
+
log.info("llama.cpp build completed.")
|
156 |
# 返回到原始目录
|
157 |
os.chdir(os.path.dirname(llamacpp_dir))
|
158 |
|
|
|
183 |
guff_16 = os.path.join(model_output_dir, f"{repo_id}-f16.gguf")
|
184 |
|
185 |
if not os.path.exists(guff_16):
|
186 |
+
log.info(f"正在将模型转换为GGML格式")
|
187 |
convert_script = os.path.join(llamacpp_dir, "convert_hf_to_gguf.py")
|
188 |
convert_cmd = f"python {convert_script} {model_path} --outfile {guff_16}"
|
189 |
print(f"syscall:[{convert_cmd}]")
|
190 |
os.system(convert_cmd)
|
191 |
else:
|
192 |
+
log.info(f"GGML中间文件已存在,跳过转换")
|
193 |
|
194 |
# 最终文件保存在 model_output 目录下
|
195 |
final_path = os.path.join(model_output_dir, f"{repo_id}-{quant_method}.gguf")
|
196 |
+
log.info(f"正在进行{quant_method}量化")
|
197 |
quantize_bin = os.path.join(llamacpp_dir, "build", "bin", "llama-quantize")
|
198 |
quant_cmd = f"{quantize_bin} {guff_16} {final_path} {quant_method}"
|
199 |
print(f"syscall:[{quant_cmd}]")
|
|
|
201 |
if not os.path.exists(final_path):
|
202 |
os.system(quant_cmd)
|
203 |
else:
|
204 |
+
log.info(f"{quant_method}量化文件已存在,跳过量化")
|
205 |
return None
|
206 |
|
207 |
return final_path
|
|
|
278 |
repo_name = create_hf_repo(repo_name)
|
279 |
|
280 |
output_dir = os.path.join(".", "output", repo_name)
|
281 |
+
log.info("Starting model merge process...")
|
282 |
model_path = download_and_merge_model(base_model_name, lora_model_name, output_dir, device)
|
283 |
|
284 |
|
|
|
296 |
num_workers=os.cpu_count() if os.cpu_count() > 4 else 4,
|
297 |
print_report_every=10,
|
298 |
)
|
299 |
+
log.info("Upload completed.")
|
300 |
|
301 |
# rm -rf model_path
|
302 |
shutil.rmtree(model_path)
|
303 |
+
log.info("Removed model from local")
|
304 |
|
305 |
except Exception as e:
|
306 |
error_message = f"Error during processing: {e}"
|
307 |
+
log.error(error_message)
|
308 |
raise e
|
309 |
|
310 |
|
convert.log
ADDED
File without changes
|