LLM / app.py
PitterTMYT's picture
Update app.py
544984e verified
import gradio as gr
import transformers
import torch
# Model and pipeline setup
model_id = "yodayo-ai/nephra_v1.0"
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
offload_folder="offload" # Ensure this folder is available or adjust the path
)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
)
# Function to generate a response
def generate_response(user_input):
messages = [
{"role": "system", "content": "You are to play the role of a cheerful assistant."},
{"role": "user", "content": user_input},
]
prompt = pipeline.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
outputs = pipeline(
prompt,
max_new_tokens=512,
eos_token_id=[
pipeline.tokenizer.convert_tokens_to_ids(""),
pipeline.tokenizer.eos_token_id,
],
do_sample=True,
temperature=1.12,
min_p=0.075,
)
return outputs[0]["generated_text"][len(prompt):]
# Gradio Interface
interface = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="Chat with Nephra",
description="Interact with the Nephra model, a roleplaying and instruction-based AI.",
)
# Launch the Gradio app
interface.launch()