Spaces:
Sleeping
Sleeping
UPDATE: Web Crawler
Browse files
app.py
CHANGED
@@ -57,7 +57,7 @@ async def addWebsite(vectorstore: str, websiteUrl: str):
|
|
57 |
return re.sub(r"\n\n+", "\n\n", soup.text).strip()
|
58 |
loader = RecursiveUrlLoader(websiteUrl, max_depth=2, timeout = 60, extractor=bs4_extractor)
|
59 |
docs = loader.load()
|
60 |
-
text = "\n\n".join([docs[doc].page_content for doc in docs])
|
61 |
return addDocuments(text = text, vectorstore = vectorstore)
|
62 |
|
63 |
|
|
|
57 |
return re.sub(r"\n\n+", "\n\n", soup.text).strip()
|
58 |
loader = RecursiveUrlLoader(websiteUrl, max_depth=2, timeout = 60, extractor=bs4_extractor)
|
59 |
docs = loader.load()
|
60 |
+
text = "\n\n".join([docs[doc].page_content for doc in range(len(docs))])
|
61 |
return addDocuments(text = text, vectorstore = vectorstore)
|
62 |
|
63 |
|