Spaces:
Sleeping
Sleeping
import gradio as gr | |
import transformers | |
import torch | |
# Model and pipeline setup | |
model_id = "yodayo-ai/nephra_v1.0" | |
model = transformers.AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
offload_folder="offload" # Ensure this folder is available or adjust the path | |
) | |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device_map="auto", | |
) | |
# Function to generate a response | |
def generate_response(user_input): | |
messages = [ | |
{"role": "system", "content": "You are to play the role of a cheerful assistant."}, | |
{"role": "user", "content": user_input}, | |
] | |
prompt = pipeline.tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
outputs = pipeline( | |
prompt, | |
max_new_tokens=512, | |
eos_token_id=[ | |
pipeline.tokenizer.convert_tokens_to_ids(""), | |
pipeline.tokenizer.eos_token_id, | |
], | |
do_sample=True, | |
temperature=1.12, | |
min_p=0.075, | |
) | |
return outputs[0]["generated_text"][len(prompt):] | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=generate_response, | |
inputs="text", | |
outputs="text", | |
title="Chat with Nephra", | |
description="Interact with the Nephra model, a roleplaying and instruction-based AI.", | |
) | |
# Launch the Gradio app | |
interface.launch() |