Steven10429 commited on
Commit
063b06e
·
verified ·
1 Parent(s): 0904a72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -33
app.py CHANGED
@@ -207,10 +207,9 @@ def quantize(model_path, repo_id, quant_method=None):
207
 
208
  return final_path
209
 
210
- def create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username):
211
  readme_path = os.path.join("output", repo_name, "README.md")
212
- readme_template = """---
213
- tags:
214
  - autotrain
215
  - text-generation-inference
216
  - text-generation
@@ -236,32 +235,6 @@ datasets:
236
  - created_at: {created_at}
237
  - created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
238
 
239
- ## Usage:
240
- ```python
241
-
242
- from transformers import AutoModelForCausalLM, AutoTokenizer
243
-
244
- model_path = "{username}/{repo_name}"
245
-
246
- tokenizer = AutoTokenizer.from_pretrained(model_path)
247
- model = AutoModelForCausalLM.from_pretrained(
248
- model_path,
249
- device_map="auto",
250
- torch_dtype='auto'
251
- ).eval()
252
-
253
- # Prompt content: "hi"
254
- messages = [
255
- {"role": "user", "content": "hi"}
256
- ]
257
-
258
- input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt')
259
- output_ids = model.generate(input_ids.to('cuda'))
260
- response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
261
-
262
- # Model response: "Hello! How can I assist you today?"
263
- print(response)
264
- ```
265
  """.format(
266
  quantization="\n- quantization" if len(quant_methods) > 0 else "",
267
  base_model_name=base_model_name,
@@ -269,7 +242,6 @@ print(response)
269
  repo_name=repo_name,
270
  quant_methods=quant_methods,
271
  created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
272
- username=username
273
  )
274
 
275
  with open(readme_path, "w") as f:
@@ -288,8 +260,11 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
288
  """
289
  try:
290
  current_logs.clear()
 
 
 
 
291
  login(hf_token)
292
- os.environ["HF_TOKEN"] = hf_token
293
  api = HfApi(token=hf_token)
294
  username = api.whoami()["name"]
295
 
@@ -312,7 +287,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
312
  for quant_method in quant_methods:
313
  quantize(output_dir, repo_name, quant_method=quant_method)
314
 
315
- create_readme(repo_name, base_model_name, lora_model_name, quant_methods, username)
316
 
317
  # 上传合并后的模型和量化模型
318
  api.upload_large_folder(
@@ -332,6 +307,7 @@ def process_model(base_model_name, lora_model_name, repo_name, quant_methods, hf
332
  except Exception as e:
333
  error_message = f"Error during processing: {e}"
334
  log(error_message)
 
335
  return "\n".join(current_logs)
336
 
337
  @timeit
@@ -368,7 +344,7 @@ def create_ui():
368
  hf_token = gr.Textbox(
369
  label="Hugging Face Token",
370
  placeholder="Enter your Hugging Face Token",
371
- value=os.getenv("HF_TOKEN")
372
  )
373
  convert_btn = gr.Button("Start Conversion", variant="primary")
374
  with gr.Column():
 
207
 
208
  return final_path
209
 
210
+ def create_readme(repo_name, base_model_name, lora_model_name, quant_methods):
211
  readme_path = os.path.join("output", repo_name, "README.md")
212
+ readme_template = """---tags:
 
213
  - autotrain
214
  - text-generation-inference
215
  - text-generation
 
235
  - created_at: {created_at}
236
  - created_by: [Steven10429/apply_lora_and_quantize](https://github.com/Steven10429/apply_lora_and_quantize)
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  """.format(
239
  quantization="\n- quantization" if len(quant_methods) > 0 else "",
240
  base_model_name=base_model_name,
 
242
  repo_name=repo_name,
243
  quant_methods=quant_methods,
244
  created_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
 
245
  )
246
 
247
  with open(readme_path, "w") as f:
 
260
  """
261
  try:
262
  current_logs.clear()
263
+ if hf_token.strip().lower() == "auto":
264
+ hf_token = os.getenv("HF_TOKEN")
265
+ elif hf_token.startswith("hf_"):
266
+ os.environ["HF_TOKEN"] = hf_token
267
  login(hf_token)
 
268
  api = HfApi(token=hf_token)
269
  username = api.whoami()["name"]
270
 
 
287
  for quant_method in quant_methods:
288
  quantize(output_dir, repo_name, quant_method=quant_method)
289
 
290
+ create_readme(repo_name, base_model_name, lora_model_name, quant_methods)
291
 
292
  # 上传合并后的模型和量化模型
293
  api.upload_large_folder(
 
307
  except Exception as e:
308
  error_message = f"Error during processing: {e}"
309
  log(error_message)
310
+ raise e
311
  return "\n".join(current_logs)
312
 
313
  @timeit
 
344
  hf_token = gr.Textbox(
345
  label="Hugging Face Token",
346
  placeholder="Enter your Hugging Face Token",
347
+ value="Auto"
348
  )
349
  convert_btn = gr.Button("Start Conversion", variant="primary")
350
  with gr.Column():