from fastapi import FastAPI from transformers import pipeline import torch from pydantic import BaseModel from typing import List, Dict if torch.backends.mps.is_available(): device = torch.device("mps") elif torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") print(device) app = FastAPI() modelName = "Qwen/Qwen2.5-1.5B-Instruct" #Qwen/Qwen2.5-1.5B-Instruct pipe = pipeline("text-generation", model=modelName, device=device, batch_size=8) sentiment_model = pipeline("sentiment-analysis", device=device) class ChatRequest(BaseModel): conversationHistory: List[Dict[str, str]] @app.get("/") async def root(): return {"message": "Hello World"} # NOTE - we configure docs_url to serve the interactive Docs at the root path # of the app. This way, we can use the docs as a landing page for the app on Spaces. # app = FastAPI(docs_url="/") @app.get("/generate") def generate(text: str): """ Generate response. """ content = [{"role": "user", "content": text}] output = pipe(content, num_return_sequences=1, max_new_tokens=250) # print(output) print(output) return {"output": output[0]["generated_text"][-1]['content']} @app.post("/chat") def chat(request: ChatRequest): """ Generate reposnse form the NLP Model. """ output = pipe(request.conversationHistory, num_return_sequences=1, max_new_tokens=250) return output