|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import gradio as gr |
|
import spaces |
|
import torch |
|
|
|
model_id = "AtlaAI/Selene-1-Mini-Llama-3.1-8B" |
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
|
@spaces.GPU |
|
def generate_response(prompt): |
|
messages = [{"role": "user", "content": prompt}] |
|
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
model_inputs = tokenizer([text], return_tensors="pt").to("cuda") |
|
|
|
generated_ids = model.generate( |
|
model_inputs.input_ids, |
|
max_new_tokens=512, |
|
do_sample=True |
|
) |
|
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] |
|
|
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
return response |
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(label="γγγ³γγγε
₯εγγ¦γγ γγ"), |
|
outputs=gr.Textbox(label="ηζγγγεΏη"), |
|
title="Selene-1-Mini-Llama-3.1-8B γγ’", |
|
description="γγγ³γγγε
₯εγγγ¨γγ’γγ«γεΏηγηζγγΎγγ" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|