import gradio as gr import transformers import torch # Model and pipeline setup model_id = "yodayo-ai/nephra_v1.0" model = transformers.AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto", offload_folder="offload" # Ensure this folder is available or adjust the path ) tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, device_map="auto", ) # Function to generate a response def generate_response(user_input): messages = [ {"role": "system", "content": "You are to play the role of a cheerful assistant."}, {"role": "user", "content": user_input}, ] prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) outputs = pipeline( prompt, max_new_tokens=512, eos_token_id=[ pipeline.tokenizer.convert_tokens_to_ids(""), pipeline.tokenizer.eos_token_id, ], do_sample=True, temperature=1.12, min_p=0.075, ) return outputs[0]["generated_text"][len(prompt):] # Gradio Interface interface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="Chat with Nephra", description="Interact with the Nephra model, a roleplaying and instruction-based AI.", ) # Launch the Gradio app interface.launch()