Update app.py
Browse files
app.py
CHANGED
@@ -207,10 +207,9 @@ def quantize(model_path, repo_id, quant_method=None):
|
|
207 |
|
208 |
return final_path
|
209 |
|
210 |
-
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods
|
211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
212 |
-
readme_template = """---
|
213 |
-
tags:
|
214 |
- autotrain
|
215 |
- text-generation-inference
|
216 |
- text-generation
|
@@ -236,32 +235,6 @@ datasets:
|
|
236 |
- created_at: {created_at}
|
237 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
238 |
|
239 |
-
## Usage:
|
240 |
-
```python
|
241 |
-
|
242 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
243 |
-
|
244 |
-
model_path = "{username}/{repo_name}"
|
245 |
-
|
246 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
247 |
-
model = AutoModelForCausalLM.from_pretrained(
|
248 |
-
model_path,
|
249 |
-
device_map="auto",
|
250 |
-
torch_dtype='auto'
|
251 |
-
).eval()
|
252 |
-
|
253 |
-
# Prompt content: "hi"
|
254 |
-
messages = [
|
255 |
-
{"role": "user", "content": "hi"}
|
256 |
-
]
|
257 |
-
|
258 |
-
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
|
259 |
-
output_ids = model.generate(input_ids.to('cuda'))
|
260 |
-
response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
|
261 |
-
|
262 |
-
# Model response: "Hello! How can I assist you today?"
|
263 |
-
print(response)
|
264 |
-
```
|
265 |
""".format(
|
266 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
267 |
base_model_name=base_model_name,
|
@@ -269,7 +242,6 @@ print(response)
|
|
269 |
repo_name=repo_name,
|
270 |
quant_methods=quant_methods,
|
271 |
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
272 |
-
username=username
|
273 |
)
|
274 |
|
275 |
with open(readme_path, "w") as f:
|
@@ -288,8 +260,11 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
288 |
"""
|
289 |
try:
|
290 |
current_logs.clear()
|
|
|
|
|
|
|
|
|
291 |
login(hf_token)
|
292 |
-
os.environ["HF_TOKEN"] = hf_token
|
293 |
api = HfApi(token=hf_token)
|
294 |
username = api.whoami()["name"]
|
295 |
|
@@ -312,7 +287,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
312 |
for quant_method in quant_methods:
|
313 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
314 |
|
315 |
-
create_readme(repo_name, base_model_name, lora_model_name, quant_methods
|
316 |
|
317 |
# 上传合并后的模型和量化模型
|
318 |
api.upload_large_folder(
|
@@ -332,6 +307,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
|
|
332 |
except Exception as e:
|
333 |
error_message = f"Error during processing: {e}"
|
334 |
log(error_message)
|
|
|
335 |
return "\n".join(current_logs)
|
336 |
|
337 |
@timeit
|
@@ -368,7 +344,7 @@ def create_ui():
|
|
368 |
hf_token = gr.Textbox(
|
369 |
label="Hugging Face Token",
|
370 |
placeholder="Enter your Hugging Face Token",
|
371 |
-
value=
|
372 |
)
|
373 |
convert_btn = gr.Button("Start Conversion", variant="primary")
|
374 |
with gr.Column():
|
|
|
207 |
|
208 |
return final_path
|
209 |
|
210 |
+
def create_readme(repo_name, base_model_name, lora_model_name, quant_methods):
|
211 |
readme_path = os.path.join("output", repo_name, "README.md")
|
212 |
+
readme_template = """---tags:
|
|
|
213 |
- autotrain
|
214 |
- text-generation-inference
|
215 |
- text-generation
|
|
|
235 |
- created_at: {created_at}
|
236 |
- created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
|
237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
""".format(
|
239 |
quantization="\n- quantization" if len(quant_methods) > 0 else "",
|
240 |
base_model_name=base_model_name,
|
|
|
242 |
repo_name=repo_name,
|
243 |
quant_methods=quant_methods,
|
244 |
created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
|
|
245 |
)
|
246 |
|
247 |
with open(readme_path, "w") as f:
|
|
|
260 |
"""
|
261 |
try:
|
262 |
current_logs.clear()
|
263 |
+
if hf_token.strip().lower() == "auto":
|
264 |
+
hf_token = os.getenv("HF_TOKEN")
|
265 |
+
elif hf_token.startswith("hf_"):
|
266 |
+
os.environ["HF_TOKEN"] = hf_token
|
267 |
login(hf_token)
|
|
|
268 |
api = HfApi(token=hf_token)
|
269 |
username = api.whoami()["name"]
|
270 |
|
|
|
287 |
for quant_method in quant_methods:
|
288 |
quantize(output_dir, repo_name, quant_method=quant_method)
|
289 |
|
290 |
+
create_readme(repo_name, base_model_name, lora_model_name, quant_methods)
|
291 |
|
292 |
# 上传合并后的模型和量化模型
|
293 |
api.upload_large_folder(
|
|
|
307 |
except Exception as e:
|
308 |
error_message = f"Error during processing: {e}"
|
309 |
log(error_message)
|
310 |
+
raise e
|
311 |
return "\n".join(current_logs)
|
312 |
|
313 |
@timeit
|
|
|
344 |
hf_token = gr.Textbox(
|
345 |
label="Hugging Face Token",
|
346 |
placeholder="Enter your Hugging Face Token",
|
347 |
+
value="Auto"
|
348 |
)
|
349 |
convert_btn = gr.Button("Start Conversion", variant="primary")
|
350 |
with gr.Column():
|