from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM from langchain_nvidia_ai_endpoints import ChatNVIDIA from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate import gradio as gr import os os.environ["NVIDIA_API_KEY"] = "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" prompt = ChatPromptTemplate.from_messages([("system", "You are a helpful AI assistant named Fred."), ("user", "{input}")]) llm = ChatNVIDIA(model="mixtral_8x7b") chain = prompt | llm | StrOutputParser() def chat(prompt, history): output = "" for chunk in chain.stream({"input": prompt}): output += chunk yield output demo = gr.ChatInterface(chat).queue() demo.launch()