File size: 1,270 Bytes
f5e51f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import spaces
import torch

model_id = "AtlaAI/Selene-1-Mini-Llama-3.1-8B"

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

@spaces.GPU
def generate_response(prompt):
    messages = [{"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
    
    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512,
        do_sample=True
    )
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

demo = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(label="γƒ—γƒ­γƒ³γƒ—γƒˆγ‚’ε…₯εŠ›γ—γ¦γγ γ•γ„"),
    outputs=gr.Textbox(label="η”Ÿζˆγ•γ‚ŒγŸεΏœη­”"),
    title="Selene-1-Mini-Llama-3.1-8B デヒ",
    description="γƒ—γƒ­γƒ³γƒ—γƒˆγ‚’ε…₯εŠ›γ™γ‚‹γ¨γ€γƒ’γƒ‡γƒ«γŒεΏœη­”γ‚’η”Ÿζˆγ—γΎγ™γ€‚"
)

if __name__ == "__main__":
    demo.launch()