#app.py.chatbot #app.py Modif04 #https://www.freddyboulton.com/blog/llama-cpp-python import gradio as gr from llama_cpp import Llama llm = Llama( model_path="/home/user/app/h2o-danube3-500m-chat-Q4_K_M.gguf", verbose=True ) def predict(message, history): # messages = [{"role": "system", "content": "You are a helpful assistant."}] # messages = [{"role": "assistant", "content": "You are a helpful assistant."}] # messages = [{"role": "assistant", "content": "Bonjour, comment puis-je vous aider?"}] messages = [] for user_message, bot_message in history: if user_message: messages.append({"role": "user", "content": user_message}) if bot_message: messages.append({"role": "assistant", "content": bot_message}) messages.append({"role": "user", "content": message}) response = "" for chunk in llm.create_chat_completion( stream=True, messages=messages, ): part = chunk["choices"][0]["delta"].get("content", None) if part: response += part yield response demo = gr.ChatInterface(predict) demo.launch() ##app.py Modif03 #import gradio as gr #from huggingface_hub import create_inference_endpoint, InferenceClient #from transformers import AutoModelForCausalLM, AutoTokenizer # ##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf" ##model = AutoModelForCausalLM.from_pretrained(model_name) ##tokenizer = AutoTokenizer.from_pretrained(model_name) # ##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") ##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf") ##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf") # ## Créez une instance Inference locale #endpoint = create_inference_endpoint( # "Local-Endpoint-MisterAI-H2O", # repository="MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf", ## model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf", # framework="pytorch", # task="text-generation", # accelerator="cpu", # vendor="local", # region="local", # type="unprotected", # instance_size="small", # instance_type="local", # URL="http://0.0.0.0:6789" #) # #print(f"Endpoint créé à l'URL : {endpoint.url}") # #client = endpoint.client # # # #def respond( # message, # history: list[tuple[str, str]], # system_message, # max_tokens, # temperature, # top_p, #): # messages = [{"role": "system", "content": system_message}] # # for val in history: # if val[0]: # messages.append({"role": "user", "content": val[0]}) # if val[1]: # messages.append({"role": "assistant", "content": val[1]}) # # messages.append({"role": "user", "content": message}) # # response = "" # # for message in client.chat_completion( # messages, # max_tokens=max_tokens, # stream=True, # temperature=temperature, # top_p=top_p, # ): # token = message.choices[0].delta.content # # response += token # yield response # #demo = gr.ChatInterface( # respond, # additional_inputs=[ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # gr.Slider( # minimum=0.1, # maximum=1.0, # value=0.95, # step=0.05, # label="Top-p (nucleus sampling)", # ), # ], #) # # #if __name__ == "__main__": # demo.launch() # # # # ##app.py Modif01 #import gradio as gr #from huggingface_hub import Inference, InferenceClient #from transformers import AutoModelForCausalLM, AutoTokenizer # ##model_name = "MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf" ##model = AutoModelForCausalLM.from_pretrained(model_name) ##tokenizer = AutoTokenizer.from_pretrained(model_name) # ##client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") ##client = InferenceClient("MisterAI/H20GPT_h2o-danube3-500m-chat-Q4_K_M_gguf") ##client = InferenceClient("/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf") # ## Créez une instance Inference locale #inference = Inference( # model_path="/home/user/app/H20GPT_h2o-danube3-500m-chat-Q4_K_M.gguf", # device="cpu", # Utilisez le CPU pour l'inference # token=None, # Pas de token nécessaire pour cette instance #) # #client = inference # # # #def respond( # message, # history: list[tuple[str, str]], # system_message, # max_tokens, # temperature, # top_p, #): # messages = [{"role": "system", "content": system_message}] # # for val in history: # if val[0]: # messages.append({"role": "user", "content": val[0]}) # if val[1]: # messages.append({"role": "assistant", "content": val[1]}) # # messages.append({"role": "user", "content": message}) # # response = "" # # for message in client.chat_completion( # messages, # max_tokens=max_tokens, # stream=True, # temperature=temperature, # top_p=top_p, # ): # token = message.choices[0].delta.content # # response += token # yield response # #demo = gr.ChatInterface( # respond, # additional_inputs=[ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # gr.Slider( # minimum=0.1, # maximum=1.0, # value=0.95, # step=0.05, # label="Top-p (nucleus sampling)", # ), # ], #) # # #if __name__ == "__main__": # demo.launch() # # # # # ##app.py ORIGINAL #import gradio as gr #from huggingface_hub import InferenceClient # #""" #For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference #""" #client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # # #def respond( # message, # history: list[tuple[str, str]], # system_message, # max_tokens, # temperature, # top_p, #): # messages = [{"role": "system", "content": system_message}] # # for val in history: # if val[0]: # messages.append({"role": "user", "content": val[0]}) # if val[1]: # messages.append({"role": "assistant", "content": val[1]}) # # messages.append({"role": "user", "content": message}) # # response = "" # # for message in client.chat_completion( # messages, # max_tokens=max_tokens, # stream=True, # temperature=temperature, # top_p=top_p, # ): # token = message.choices[0].delta.content # # response += token # yield response # #""" #For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface #""" #demo = gr.ChatInterface( # respond, # additional_inputs=[ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # gr.Slider( # minimum=0.1, # maximum=1.0, # value=0.95, # step=0.05, # label="Top-p (nucleus sampling)", # ), # ], #) # # #if __name__ == "__main__": # demo.launch()