Spaces:

PitterTMYT
/

LLM

Sleeping

LLM / app.py

Update app.py

544984e verified 6 months ago

1.51 kB

	import gradio as gr
	import transformers
	import torch

	# Model and pipeline setup
	model_id = "yodayo-ai/nephra_v1.0"

	model = transformers.AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	offload_folder="offload" # Ensure this folder is available or adjust the path
	)

	tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)

	pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map="auto",
	)

	# Function to generate a response
	def generate_response(user_input):
	messages = [
	{"role": "system", "content": "You are to play the role of a cheerful assistant."},
	{"role": "user", "content": user_input},
	]

	prompt = pipeline.tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	outputs = pipeline(
	prompt,
	max_new_tokens=512,
	eos_token_id=[
	pipeline.tokenizer.convert_tokens_to_ids(""),
	pipeline.tokenizer.eos_token_id,
	],
	do_sample=True,
	temperature=1.12,
	min_p=0.075,
	)

	return outputs[0]["generated_text"][len(prompt):]

	# Gradio Interface
	interface = gr.Interface(
	fn=generate_response,
	inputs="text",
	outputs="text",
	title="Chat with Nephra",
	description="Interact with the Nephra model, a roleplaying and instruction-based AI.",
	)

	# Launch the Gradio app
	interface.launch()