Spaces:

diabolic6045
/

open-llama-3.2-1B-Instruct

Sleeping

App Files Files Community

open-llama-3.2-1B-Instruct / app.py

diabolic6045

Update app.py

7867487 verified 4 months ago

raw

history blame

2.03 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import spaces

	# Load the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("diabolic6045/open-llama-Instruct")
	model = AutoModelForCausalLM.from_pretrained("diabolic6045/open-llama-Instruct")
	model.eval()
	if torch.cuda.is_available():
	model.to('cuda')

	@Spaces.GPU()
	def respond(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	# Build the conversation history
	conversation = f"System: {system_message}\n"
	for user_msg, bot_msg in history:
	conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
	conversation += f"User: {message}\nAssistant:"

	# Tokenize the input
	inputs = tokenizer(conversation, return_tensors='pt', truncation=True, max_length=1024)
	if torch.cuda.is_available():
	inputs = {k: v.to('cuda') for k, v in inputs.items()}

	# Generate the response
	output = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	pad_token_id=tokenizer.eos_token_id
	)
	response = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract the assistant's reply
	response = response[len(conversation):].strip()
	return response

	# Create the Gradio interface with the Ocean theme
	demo = gr.ChatInterface(
	fn=respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System Message"),
	gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max New Tokens"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (Nucleus Sampling)"),
	],
	title="Open Llama Chatbot",
	description="Chat with an AI assistant powered by the Open Llama Instruct model.",
	theme=gr.themes.Ocean(),
	)

	if __name__ == "__main__":
	demo.launch()