Spaces:
Sleeping
Sleeping
UPDATE: YT Transcripts
Browse files- app.py +5 -1
- functions.py +13 -1
- requirements.txt +1 -0
app.py
CHANGED
@@ -155,4 +155,8 @@ async def getCount(vectorstore: str):
|
|
155 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
156 |
return {
|
157 |
"currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
|
158 |
-
}
|
|
|
|
|
|
|
|
|
|
155 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
156 |
return {
|
157 |
"currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
|
158 |
+
}
|
159 |
+
|
160 |
+
@app.post("/getYoutubeTranscript")
|
161 |
+
async def getYTTranscript(url: str):
|
162 |
+
return getTranscript(url = url)
|
functions.py
CHANGED
@@ -10,6 +10,7 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
10 |
from langchain.memory import ChatMessageHistory
|
11 |
from langchain_core.chat_history import BaseChatMessageHistory
|
12 |
from langchain.storage import InMemoryStore
|
|
|
13 |
from langchain.docstore.document import Document
|
14 |
from langchain_huggingface import HuggingFaceEmbeddings
|
15 |
from langchain.retrievers import ContextualCompressionRetriever
|
@@ -294,4 +295,15 @@ def getTextFromImagePDF(pdfBytes):
|
|
294 |
allImages = convert_from_bytes(pdfBytes)
|
295 |
allImages = [np.array(image) for image in allImages]
|
296 |
text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
|
297 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
from langchain.memory import ChatMessageHistory
|
11 |
from langchain_core.chat_history import BaseChatMessageHistory
|
12 |
from langchain.storage import InMemoryStore
|
13 |
+
from langchain_community.document_loaders import YoutubeLoader
|
14 |
from langchain.docstore.document import Document
|
15 |
from langchain_huggingface import HuggingFaceEmbeddings
|
16 |
from langchain.retrievers import ContextualCompressionRetriever
|
|
|
295 |
allImages = convert_from_bytes(pdfBytes)
|
296 |
allImages = [np.array(image) for image in allImages]
|
297 |
text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
|
298 |
+
return text
|
299 |
+
|
300 |
+
|
301 |
+
def getTranscript(url: str):
|
302 |
+
loader = YoutubeLoader.from_youtube_url(
|
303 |
+
url, add_video_info=False
|
304 |
+
)
|
305 |
+
try:
|
306 |
+
doc = " ".join([x.page_content for x in loader.load()])
|
307 |
+
except:
|
308 |
+
doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
|
309 |
+
return doc
|
requirements.txt
CHANGED
@@ -18,6 +18,7 @@ python-dotenv
|
|
18 |
pydantic
|
19 |
pandas
|
20 |
easyocr
|
|
|
21 |
pdf2image
|
22 |
sentence-transformers
|
23 |
supabase
|
|
|
18 |
pydantic
|
19 |
pandas
|
20 |
easyocr
|
21 |
+
youtube-transcript-api
|
22 |
pdf2image
|
23 |
sentence-transformers
|
24 |
supabase
|