|
import os |
|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") |
|
|
|
|
|
client = InferenceClient( |
|
"microsoft/Phi-3-mini-4k-instruct", |
|
token=hf_token, |
|
) |
|
|
|
|
|
def chat_with_model(user_input, history): |
|
response = "" |
|
|
|
for message in client.chat_completion( |
|
messages=[{"role": "user", "content": user_input}], |
|
max_tokens=500, |
|
stream=True, |
|
): |
|
response += message.choices[0].delta.content |
|
|
|
|
|
history.append((user_input, response)) |
|
return history, history |
|
|
|
|
|
with gr.Blocks(theme="nevreal/blues") as ui: |
|
gr.Markdown("# Gradio Chatbot with Phi-3-mini-4k-instruct") |
|
|
|
|
|
chatbot = gr.Chatbot() |
|
with gr.Row(): |
|
|
|
user_input = gr.Textbox(show_label=False, placeholder="Type a message...") |
|
|
|
send_button = gr.Button("Send") |
|
|
|
|
|
history = gr.State([]) |
|
|
|
|
|
send_button.click(fn=chat_with_model, inputs=[user_input, history], outputs=[chatbot, history]) |
|
|
|
|
|
ui.launch() |