Spaces:
Sleeping
Sleeping
import io | |
from starlette import status | |
from functions import * | |
from PyPDF2 import PdfReader | |
import pandas as pd | |
from fastapi import FastAPI, File, UploadFile, HTTPException | |
from pydantic import BaseModel | |
from fastapi.middleware.cors import CORSMiddleware | |
from langchain_community.document_loaders import UnstructuredURLLoader | |
from src.api.speech_api import speech_translator_router | |
from functions import client as supabase | |
from urllib.parse import urlparse | |
import nltk | |
nltk.download('punkt_tab') | |
app = FastAPI(title="ConversAI", root_path="/api/v1") | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
app.include_router(speech_translator_router, prefix="/speech") | |
async def sign_up(email, username, password): | |
res, _ = supabase.auth.sign_up( | |
{"email": email, "password": password, "role": "user"} | |
) | |
user_id = res[1].id | |
r_ = createUser(user_id=user_id, username=username) | |
print(r_) | |
response = { | |
"status": "success", | |
"code": 200, | |
"message": "Please check you email address for email verification", | |
} | |
return response | |
async def check_session(): | |
res = supabase.auth.get_session() | |
return res | |
async def get_user(access_token): | |
res = supabase.auth.get_user(jwt=access_token) | |
return res | |
async def refresh_token(refresh_token): | |
res = supabase.auth.refresh_token(refresh_token) | |
return res | |
async def sign_in(email, password): | |
try: | |
res = supabase.auth.sign_in_with_password( | |
{"email": email, "password": password} | |
) | |
user_id = res.user.id | |
access_token = res.session.access_token | |
refresh_token = res.session.refresh_token | |
store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute() | |
store_id = None | |
if store_session_check and store_session_check.data: | |
store_id = store_session_check.data[0].get("StoreID") | |
if not store_id: | |
response = ( | |
supabase.table("Stores").insert( | |
{ | |
"AccessToken": access_token, | |
"StoreID": user_id, | |
"RefreshToken": refresh_token, | |
} | |
).execute() | |
) | |
message = { | |
"message": "Success", | |
"code": status.HTTP_200_OK, | |
"user_id": user_id, | |
"access_token": access_token, | |
"refresh_token": refresh_token | |
} | |
return message | |
elif store_id == user_id: | |
raise HTTPException( | |
status_code=status.HTTP_400_BAD_REQUEST, | |
detail="You are already signed in. Please sign out first to sign in again." | |
) | |
else: | |
raise HTTPException( | |
status_code=status.HTTP_400_BAD_REQUEST, | |
detail="Failed to sign in. Please check your credentials." | |
) | |
except HTTPException as http_exc: | |
raise http_exc | |
except Exception as e: | |
raise HTTPException( | |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
detail=f"An unexpected error occurred during sign-in: {str(e)}" | |
) | |
async def login_with_token(token): | |
try: | |
res = supabase.auth.sign_in_with_id_token(token) | |
print(res) | |
user_id = res.user.id | |
access_token = res.session.access_token | |
refresh_token = res.session.refresh_token | |
store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute() | |
store_id = None | |
if store_session_check and store_session_check.data: | |
store_id = store_session_check.data[0].get("StoreID") | |
if not store_id: | |
response = ( | |
supabase.table("Stores").insert( | |
{ | |
"AccessToken": access_token, | |
"StoreID": user_id, | |
"RefreshToken": refresh_token, | |
} | |
).execute() | |
) | |
message = { | |
"message": "Success", | |
"code": status.HTTP_200_OK, | |
"user_id": user_id, | |
"access_token": access_token, | |
"refresh_token": refresh_token | |
} | |
return message | |
elif store_id == user_id: | |
raise HTTPException( | |
status_code=status.HTTP_400_BAD_REQUEST, | |
detail="You are already signed in. Please sign out first to sign in again." | |
) | |
else: | |
raise HTTPException( | |
status_code=status.HTTP_400_BAD_REQUEST, | |
detail="Failed to sign in. Please check your credentials." | |
) | |
except HTTPException as http_exc: | |
raise http_exc | |
except Exception as e: | |
raise HTTPException( | |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
detail=f"An unexpected error occurred during sign-in: {str(e)}" | |
) | |
async def set_session_data(access_token, refresh_token): | |
res = supabase.auth.set_session(access_token, refresh_token) | |
return res | |
async def sign_out(user_id): | |
try: | |
supabase.table("Stores").delete().eq( | |
"StoreID", user_id | |
).execute() | |
res = supabase.auth.sign_out() | |
response = {"message": "success"} | |
return response | |
except Exception as e: | |
raise HTTPException(status_code=400, detail=str(e)) | |
async def oauth(provider): | |
res = supabase.auth.sign_in_with_oauth({"provider": provider}) | |
return res | |
async def newChatbot(chatbotName: str, username: str): | |
currentBotCount = len(listTables(username=username)["output"]) | |
limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("user_id", username).execute().data[0][ | |
"chatbotLimit"] | |
if currentBotCount >= int(limit): | |
return { | |
"output": "CHATBOT LIMIT EXCEEDED" | |
} | |
client.table("ConversAI_ChatbotInfo").insert({"user_id": username, "chatbotname": chatbotName}).execute() | |
chatbotName = f"convai${username}${chatbotName}" | |
return createTable(tablename=chatbotName) | |
async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)): | |
source = pdf.filename | |
pdf = await pdf.read() | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file: | |
temp_file.write(pdf) | |
temp_file_path = temp_file.name | |
loader = PDFMinerLoader(file_path = temp_file_path, concatenate_pages = True) | |
text = loader.load()[0].page_content | |
os.remove(temp_file_path) | |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
"tokenLimit"] | |
newCount = currentCount + len(text) | |
if newCount < int(limit): | |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
"chatbotname", chatbotname).execute() | |
return addDocuments(text=text, source=source, vectorstore=vectorstore) | |
else: | |
return { | |
"output": "DOCUMENT EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
} | |
async def returnText(pdf: UploadFile = File(...)): | |
pdf = await pdf.read() | |
text = getTextFromImagePDF(pdfBytes=pdf) | |
return text | |
async def addText(vectorstore: str, text: str): | |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
newCount = currentCount + len(text) | |
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
"tokenLimit"] | |
if newCount < int(limit): | |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
"chatbotname", chatbotname).execute() | |
return addDocuments(text=text, source="text", vectorstore=vectorstore) | |
else: | |
return { | |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
} | |
class AddQAPair(BaseModel): | |
vectorstore: str | |
question: str | |
answer: str | |
async def addText(addQaPair: AddQAPair): | |
username, chatbotname = addQaPair.vectorstore.split("$")[1], addQaPair.vectorstore.split("$")[2] | |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}" | |
newCount = currentCount + len(qa) | |
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
"tokenLimit"] | |
if newCount < int(limit): | |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
"chatbotname", chatbotname).execute() | |
return addDocuments(text=qa, source="Q&A Pairs", vectorstore=addQaPair.vectorstore) | |
else: | |
return { | |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
} | |
async def addWebsite(vectorstore: str, websiteUrls: list[str]): | |
loader = UnstructuredURLLoader(urls=websiteUrls) | |
docs = loader.load() | |
text = "\n\n".join( | |
[f"{docs[doc].page_content}" for doc in range(len(docs))] | |
) | |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0] | |
newCount = currentCount + len(text) | |
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][ | |
"tokenLimit"] | |
if newCount < int(limit): | |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq( | |
"chatbotname", chatbotname).execute() | |
return addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore) | |
else: | |
return { | |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT." | |
} | |
async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"): | |
return answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel) | |
async def delete(chatbotName: str): | |
username, chatbotName = chatbotName.split("$")[1], chatbotName.split("$")[2] | |
client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname', chatbotName).execute() | |
return deleteTable(tableName=chatbotName) | |
async def delete(username: str): | |
return listTables(username=username) | |
async def crawlUrl(baseUrl: str): | |
return { | |
"urls": getLinks(url=baseUrl, timeout=30) | |
} | |
async def getCount(vectorstore: str): | |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2] | |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data) | |
return { | |
"currentCount": df[(df['user_id'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0] | |
} | |
async def getYTTranscript(urls: str): | |
return { | |
"transcript": getTranscript(urls=urls) | |
} | |
async def analyzeAndAnswer(query: str, file: UploadFile = File(...)): | |
extension = file.filename.split(".")[-1] | |
try: | |
if extension in ["xls", "xlsx", "xlsm", "xlsb"]: | |
df = pd.read_excel(io.BytesIO(await file.read())) | |
response = analyzeData(query=query, dataframe=df) | |
elif extension == "csv": | |
df = pd.read_csv(io.BytesIO(await file.read())) | |
response = analyzeData(query=query, dataframe=df) | |
else: | |
response = "INVALID FILE TYPE" | |
return { | |
"output": response | |
} | |
except: | |
return { | |
"output": "UNABLE TO ANSWER QUERY" | |
} | |