Spaces:
Sleeping
Sleeping
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
from langchain_nvidia_ai_endpoints import ChatNVIDIA | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import ChatPromptTemplate | |
import gradio as gr | |
import os | |
os.environ["NVIDIA_API_KEY"] = "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" | |
prompt = ChatPromptTemplate.from_messages([("system", "You are a helpful AI assistant named Fred."), ("user", "{input}")]) | |
llm = ChatNVIDIA(model="mixtral_8x7b") | |
chain = prompt | llm | StrOutputParser() | |
def chat(prompt, history): | |
output = "" | |
for chunk in chain.stream({"input": prompt}): | |
output += chunk | |
yield output | |
demo = gr.ChatInterface(chat).queue() | |
demo.launch() | |