Spaces:

alexneakameni
/

medivocate

Running

App Files Files Community

anekameni commited on 23 days ago

Commit

91dda71

1 Parent(s): f0d0fde

Add initial implementation of prompt engineering and custom embedding classes

Browse files

Files changed (13) hide show

.gitignore +2 -1
LICENSE +21 -0
app.py +9 -41
data/chroma_db/chroma.sqlite3 +2 -2
requirements.txt +2 -1
src/__init__.py +6 -0
src/prompt_engineering/__init__.py +0 -0
src/prompt_engineering/prompter.py +42 -0
src/rag_pipeline/prompts.py +10 -14
src/rag_pipeline/rag_system.py +31 -6
src/utilities/embedding.py +65 -0
src/utilities/llm_models.py +4 -9
src/vector_store/vector_store.py +113 -34

.gitignore CHANGED Viewed

@@ -179,4 +179,5 @@ data
 .python-version
-.venv

 .python-version
+.venv
+*.sh

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 KameniAlexNea
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py CHANGED Viewed

@@ -5,43 +5,23 @@ import gradio as gr
 from src.rag_pipeline.rag_system import RAGSystem
-# Set environment variable to optimize tokenization performance
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
 class ChatInterface:
-    """Interface for interacting with the RAG system via Gradio's chat component."""
     def __init__(self, rag_system: RAGSystem):
         self.rag_system = rag_system
-    def respond(self, message: str, history: List[dict]):
-        """
-        Processes a user message and returns responses incrementally using the RAG system.
-        Args:
-            message (str): User's input message.
-            history (List[dict]): Chat history as a list of role-content dictionaries.
-        Yields:
-            str: Incremental response generated by the RAG system.
-        """
-        # Convert history to (role, content) tuples and limit to the last 10 turns
-        processed_history = [(turn["role"], turn["content"]) for turn in history][-10:]
         result = ""
-        # Generate response incrementally
-        for text in self.rag_system.query(message, processed_history):
             result += text
             yield result
     def create_interface(self) -> gr.ChatInterface:
-        """
-        Creates the Gradio chat interface for Medivocate.
-        Returns:
-            gr.ChatInterface: Configured Gradio chat interface.
-        """
         description = (
             "Medivocate is an application that offers clear and structured information "
             "about African history and traditional medicine. The knowledge is exclusively "
@@ -55,24 +35,12 @@ class ChatInterface:
             description=description,
         )
-    def launch(self, share: bool = False):
-        """
-        Launches the Gradio interface.
-        Args:
-            share (bool): Whether to generate a public sharing link. Defaults to False.
-        """
-        interface = self.create_interface()
-        interface.launch(share=share)
-# Entry point
 if __name__ == "__main__":
-    # Initialize the RAG system with specified parameters
-    top_k_documents = 12
-    rag_system = RAGSystem(top_k_documents=top_k_documents)
     rag_system.initialize_vector_store()
-    # Create and launch the chat interface
     chat_interface = ChatInterface(rag_system)
-    chat_interface.launch(share=False)

 from src.rag_pipeline.rag_system import RAGSystem
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
 class ChatInterface:
     def __init__(self, rag_system: RAGSystem):
         self.rag_system = rag_system
+        self.history_depth = int(os.getenv("MAX_MESSAGES") or 5) * 2
+    def respond(self, message: str, history: List[List[str]]):
         result = ""
+        history = [(turn["role"], turn["content"]) for turn in history[-self.history_depth:]]
+        for text in self.rag_system.query(message, history):
             result += text
             yield result
+        return result
     def create_interface(self) -> gr.ChatInterface:
         description = (
             "Medivocate is an application that offers clear and structured information "
             "about African history and traditional medicine. The knowledge is exclusively "
             description=description,
         )
+# Usage example:
 if __name__ == "__main__":
+    rag_system = RAGSystem(top_k_documents=12)
     rag_system.initialize_vector_store()
     chat_interface = ChatInterface(rag_system)
+    demo = chat_interface.create_interface()
+    demo.launch(share=False)

data/chroma_db/chroma.sqlite3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0b087c7b5fc9ecb1419b553f6e8ce942bbe8f9112319eac6f047d4421907068
-size 304648192

 version https://git-lfs.github.com/spec/v1
+oid sha256:74b3c1038f9ab6b862da000b4ed0a2f2e92ad734d3ba05c409ce6a3224da10f7
+size 239828992

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ ollama==0.4.5
 chromadb==0.5.23
 tqdm==4.67.1
 gradio==5.9.1
-rank_bm25==0.2.2

 chromadb==0.5.23
 tqdm==4.67.1
 gradio==5.9.1
+rank_bm25==0.2.2
+gdown==5.2.0

src/__init__.py CHANGED Viewed

	@@ -0,0 +1,6 @@

+import logging
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)

src/prompt_engineering/__init__.py ADDED Viewed

File without changes

src/prompt_engineering/prompter.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from langchain.prompts import PromptTemplate
+from langchain_ollama import ChatOllama
+TEMPLATE = """
+J'ai un prompt posé par un utilisateur destiné à récupérer des informations à partir d'un système de génération augmentée par la récupération (RAG), où des segments de documents sont stockés sous forme d'embeddings pour une recherche efficace et précise. Votre tâche consiste à affiner ce prompt afin de :
+1. Améliorer la pertinence de la recherche en alignant la requête avec la granularité sémantique et l'intention des embeddings.
+2. Minimiser l'ambiguïté pour réduire le risque de récupérer des segments non pertinents ou trop génériques.
+3. Préserver autant que possible le langage, le ton et la structure du prompt original tout en le rendant plus clair et efficace.
+Voici le prompt original de l'utilisateur :
+{user_prompt}
+Instructions :
+- Réécrivez le prompt pour améliorer sa clarté et son alignement avec les objectifs de recherche basés sur les embeddings, sans modifier son ton ni son intention globale.
+- Supposant que l'utilisateur ne peut pas fournir de clarification, apportez des améliorations basées sur ce que le prompt semble vouloir accomplir.
+- Fournissez uniquement la version améliorée du prompt, en conservant autant que possible le langage original.
+"""
+class Prompter:
+    def __init__(self, llm: ChatOllama):
+        self.llm = llm
+        self.prompt = PromptTemplate(input_variables=["user_prompt"], template=TEMPLATE)
+    def __call__(self, prompt):
+        return self.llm.invoke(self.prompt.format(user_prompt=prompt))
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    from ..utilities.llm_models import get_llm_model_chat
+    args = ArgumentParser()
+    args.add_argument("--prompt", type=str)
+    parse = args.parse_args()
+    llm = get_llm_model_chat(temperature=0.7, max_tokens=256)
+    prompt = Prompter(llm)
+    print(prompt(parse.prompt).content)

src/rag_pipeline/prompts.py CHANGED Viewed

@@ -6,20 +6,16 @@ from langchain.prompts.chat import (
 )
 system_template = """
-**Vous êtes un assistant IA spécialisé dans l'histoire de l'Afrique et la médecine traditionnelle africaine. Votre rôle est de fournir des réponses claires, structurées et précises en utilisant exclusivement les éléments de contexte suivants :**
------------------
-{context}
------------------
-**Règles à suivre :**
-1. **Utilisez uniquement le contexte fourni pour répondre. **Si une information n'est pas présente dans le contexte, répondez : *"Je ne sais pas. Je ne dispose pas d'informations à ce sujet."*
-2. **Répondez uniquement aux questions en lien avec l'histoire de l'Afrique ou la médecine traditionnelle africaine.** Si une question n'est pas pertinente, indiquez :
-   *"Je ne peux répondre qu'à des questions relatives à l'histoire africaine ou à la médecine traditionnelle. Pouvez-vous reformuler votre question en lien avec ces sujets ?"*
-3. **Structurez vos réponses** : Lorsque pertinent, utilisez des points ou des listes pour rendre l'information plus claire et accessible.
-4. **Ne devinez pas.** Si le contexte est insuffisant pour répondre précisément, dites :
-   *"Je ne sais pas. Les informations dont je dispose ne couvrent pas ce sujet."*
-**Votre priorité est de fournir des informations exactes et de ne jamais sortir du cadre défini.**
 """
 messages = [

 )
 system_template = """
+Vous êtes un assistant IA qui fournit des informations sur l'histoire de l'Afrique et la médecine traditionnelle africaine. Vous recevez une question et fournissez une réponse claire et structurée. Lorsque cela est pertinent, utilisez des points et des listes pour structurer vos réponses.
+Utilisez uniquement les éléments de contexte suivants pour répondre à la question de l'utilisateur. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse.
+Si la question posée est dans une langue parlée en Afrique ou demande une traduction dans une de ces langues, répondez que vous ne savez pas et demandez à l'utilisateur de reformuler sa question.
+Si vous connaissez la réponse à la question mais que cette réponse ne provient pas du contexte ou n'est pas relative à l'histoire africaine ou à la médecine traditionnelle, répondez que vous ne savez pas et demandez à l'utilisateur de reformuler sa question.
+-----------------
+{context}
 """
 messages = [

src/rag_pipeline/rag_system.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import logging
-from typing import Optional
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain.chains.conversational_retrieval.base import (
-    ConversationalRetrievalChain,
 )
 from langchain.chains.history_aware_retriever import (
     create_history_aware_retriever,
@@ -25,7 +26,7 @@ class RAGSystem:
     ):
         self.top_k_documents = top_k_documents
         self.llm = self._get_llm()
-        self.chain: Optional[ConversationalRetrievalChain] = None
         self.vector_store_management = VectorStoreManager(
             docs_dir, persist_directory_dir, batch_size
         )
@@ -35,9 +36,13 @@ class RAGSystem:
     ):
         return get_llm_model_chat(temperature=0.1, max_tokens=1000)
-    def initialize_vector_store(self):
         """Initialize or load the vector store"""
-        self.vector_store_management.initialize_vector_store()
     def setup_rag_chain(self):
         if self.chain is not None:
@@ -59,7 +64,7 @@ class RAGSystem:
     def query(self, question: str, history: list = []):
         """Query the RAG system"""
-        if not self.vector_store_management.vector_store:
             self.initialize_vector_store()
         self.setup_rag_chain()
@@ -67,3 +72,23 @@ class RAGSystem:
         for token in self.chain.stream({"input": question, "chat_history": history}):
             if "answer" in token:
                 yield token["answer"]

 import logging
+import os
+from typing import List, Optional
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain.chains.conversational_retrieval.base import (
+    BaseConversationalRetrievalChain,
 )
 from langchain.chains.history_aware_retriever import (
     create_history_aware_retriever,
     ):
         self.top_k_documents = top_k_documents
         self.llm = self._get_llm()
+        self.chain: Optional[BaseConversationalRetrievalChain] = None
         self.vector_store_management = VectorStoreManager(
             docs_dir, persist_directory_dir, batch_size
         )
     ):
         return get_llm_model_chat(temperature=0.1, max_tokens=1000)
+    def load_documents(self) -> List:
+        """Load and split documents from the specified directory"""
+        return self.vector_store_management.load_documents()
+    def initialize_vector_store(self, documents: List = None):
         """Initialize or load the vector store"""
+        self.vector_store_management.initialize_vector_store(documents)
     def setup_rag_chain(self):
         if self.chain is not None:
     def query(self, question: str, history: list = []):
         """Query the RAG system"""
+        if not self.vector_store_management.vs_initialized:
             self.initialize_vector_store()
         self.setup_rag_chain()
         for token in self.chain.stream({"input": question, "chat_history": history}):
             if "answer" in token:
                 yield token["answer"]
+if __name__ == "__main__":
+    from glob import glob
+    docs_dir = "data/docs"
+    persist_directory_dir = "data/chroma_db"
+    batch_size = 64
+    # Initialize RAG system
+    rag = RAGSystem(docs_dir, persist_directory_dir, batch_size)
+    if len(glob(os.path.join(persist_directory_dir, "*/*.bin"))):
+        rag.initialize_vector_store()  # vector store initialized
+    else:
+        # Load and index documents
+        documents = rag.load_documents()
+        rag.initialize_vector_store(documents)  # documents
+    print(rag.query("Quand a eu lieu la traite négrière ?"))

src/utilities/embedding.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import logging
+import os
+from typing import Any, List
+import torch
+from langchain_core.embeddings import Embeddings
+from langchain_huggingface import (
+    HuggingFaceEmbeddings,
+    HuggingFaceEndpointEmbeddings,
+)
+from pydantic import BaseModel, Field
+class CustomEmbedding(BaseModel, Embeddings):
+    hosted_embedding: HuggingFaceEndpointEmbeddings = Field(
+        default_factory=lambda: None
+    )
+    cpu_embedding: HuggingFaceEmbeddings = Field(default_factory=lambda: None)
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.hosted_embedding = HuggingFaceEndpointEmbeddings(
+            model=os.getenv("HF_MODEL"),
+            model_kwargs={"encode_kwargs": {"normalize_embeddings": True}},
+            huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
+        )
+        self.cpu_embedding = HuggingFaceEmbeddings(
+            model_name=os.getenv("HF_MODEL"),  # You can replace with any HF model
+            model_kwargs={"device": "cpu" if not torch.cuda.is_available() else "cuda"},
+            encode_kwargs={"normalize_embeddings": True},
+        )
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """
+        Embed a list of documents using the hosted embedding. If the API request limit is reached,
+        fall back to using the CPU embedding.
+        Args:
+            texts (List[str]): List of documents to embed.
+        Returns:
+            List[List[float]]: List of embeddings for each document.
+        """
+        try:
+            return self.hosted_embedding.embed_documents(texts)
+        except:
+            logging.warning("Issue with batch hosted embedding, moving to CPU")
+            return self.cpu_embedding.embed_documents(texts)
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Embed a single query using the hosted embedding. If the API request limit is reached,
+        fall back to using the CPU embedding.
+        Args:
+            text (str): Query to embed.
+        Returns:
+            List[float]: Embedding for the query.
+        """
+        try:
+            return self.hosted_embedding.embed_query(text)
+        except:
+            logging.warning("Issue with hosted embedding, moving to CPU")
+            return self.cpu_embedding.embed_query(text)

src/utilities/llm_models.py CHANGED Viewed

@@ -2,9 +2,10 @@ import os
 from enum import Enum
 from langchain_groq import ChatGroq
-from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_ollama import ChatOllama, OllamaEmbeddings
 class LLMModel(Enum):
     OLLAMA = ChatOllama
@@ -12,9 +13,7 @@ class LLMModel(Enum):
 def get_llm_model_chat(temperature=0.01, max_tokens=None):
-    if str(os.getenv("USE_OLLAMA_CHAT")) == "1" and "localhost" not in str(
-        os.getenv("OLLAMA_HOST")
-    ):
         return ChatOllama(
             model=os.getenv("OLLAMA_MODEL"),
             temperature=temperature,
@@ -36,11 +35,7 @@ def get_llm_model_chat(temperature=0.01, max_tokens=None):
 def get_llm_model_embedding():
     if str(os.getenv("USE_HF_EMBEDDING")) == "1":
-        return HuggingFaceEmbeddings(
-            model_name=os.getenv("HF_MODEL"),  # You can replace with any HF model
-            model_kwargs={"device": "cpu"},
-            encode_kwargs={"normalize_embeddings": True},
-        )
     return OllamaEmbeddings(
         model=os.getenv("OLLAM_EMB"),
         base_url=os.getenv("OLLAMA_HOST"),

 from enum import Enum
 from langchain_groq import ChatGroq
 from langchain_ollama import ChatOllama, OllamaEmbeddings
+from .embedding import CustomEmbedding
 class LLMModel(Enum):
     OLLAMA = ChatOllama
 def get_llm_model_chat(temperature=0.01, max_tokens=None):
+    if str(os.getenv("USE_OLLAMA_CHAT")) == "1":
         return ChatOllama(
             model=os.getenv("OLLAMA_MODEL"),
             temperature=temperature,
 def get_llm_model_embedding():
     if str(os.getenv("USE_HF_EMBEDDING")) == "1":
+        return CustomEmbedding()
     return OllamaEmbeddings(
         model=os.getenv("OLLAM_EMB"),
         base_url=os.getenv("OLLAMA_HOST"),

src/vector_store/vector_store.py CHANGED Viewed

@@ -1,49 +1,95 @@
 import os
-from typing import Union
 from langchain.retrievers import EnsembleRetriever
 from langchain_chroma import Chroma
 from langchain_community.retrievers import BM25Retriever
 from langchain_core.documents import Document
 from ..utilities.llm_models import get_llm_model_embedding
 class VectorStoreManager:
     def __init__(self, docs_dir: str, persist_directory_dir: str, batch_size=64):
         self.embeddings = get_llm_model_embedding()
         self.vector_stores: dict[str, Union[Chroma, BM25Retriever]] = {
             "chroma": None,
             "bm25": None,
         }
-        self.vs_initialized = False
-        self.vector_store = None
         self.docs_dir = docs_dir
         self.persist_directory_dir = persist_directory_dir
         self.batch_size = batch_size
-        self.collection_name = (
-            os.getenv("OLLAM_EMB").split(":")[0].split("/")[-1].replace("-v1", "")
-        )
-    def initialize_vector_store(self):
         """Initialize or load the vector store"""
-        chroma_vs = Chroma(
-            collection_name=self.collection_name,
-            persist_directory=self.persist_directory_dir,
-            embedding_function=self.embeddings,
-        )
-        all_documents = chroma_vs.get()
-        documents = [
-            Document(page_content=content, id=doc_id, metadata=metadata)
-            for content, doc_id, metadata in zip(
-                all_documents["documents"],
-                all_documents["ids"],
-                all_documents["metadatas"],
             )
-        ]
-        bm25_vs: BM25Retriever = BM25Retriever.from_documents(documents=documents)
-        self.vector_stores["chroma"] = chroma_vs
-        self.vector_stores["bm25"] = bm25_vs
         self.vs_initialized = True
     def create_retriever(self, n_documents: int, bm25_portion: float = 0.4):
@@ -59,15 +105,48 @@ class VectorStoreManager:
         )
         return self.vector_store
-    def create_retriever(self, n_documents: int, bm25_portion: float = 0.4):
-        self.vector_stores["bm25"].k = n_documents
-        self.vector_store = EnsembleRetriever(
-            retrievers=[
-                self.vector_stores["bm25"],
-                self.vector_stores["chroma"].as_retriever(
-                    search_kwargs={"k": n_documents}
-                ),
-            ],
-            weights=[bm25_portion, 1 - bm25_portion],
         )
-        return self.vector_store

+import json
 import os
+from concurrent.futures import ThreadPoolExecutor
+from glob import glob
+from typing import List, Union
 from langchain.retrievers import EnsembleRetriever
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_chroma import Chroma
+from langchain_community.document_loaders import DirectoryLoader, TextLoader
 from langchain_community.retrievers import BM25Retriever
 from langchain_core.documents import Document
+from tqdm import tqdm
 from ..utilities.llm_models import get_llm_model_embedding
+def sanitize_metadata(metadata: dict):
+    sanitized = {}
+    for key, value in metadata.items():
+        if isinstance(value, list):
+            # Convert lists to comma-separated strings or handle appropriately
+            sanitized[key] = ", ".join(value)
+        elif isinstance(value, (str, int, float, bool)):
+            sanitized[key] = value
+        else:
+            raise ValueError(
+                f"Unsupported metadata type for key '{key}': {type(value)}"
+            )
+    return sanitized
+def get_collection_name():
+    return os.getenv("HF_MODEL").split(":")[0].split("/")[-1].replace("-v1", "")
 class VectorStoreManager:
     def __init__(self, docs_dir: str, persist_directory_dir: str, batch_size=64):
         self.embeddings = get_llm_model_embedding()
+        self.vs_initialized = False
+        self.vector_store = None
         self.vector_stores: dict[str, Union[Chroma, BM25Retriever]] = {
             "chroma": None,
             "bm25": None,
         }
         self.docs_dir = docs_dir
         self.persist_directory_dir = persist_directory_dir
         self.batch_size = batch_size
+        self.collection_name = get_collection_name()
+    def _batch_process_documents(self, documents: List):
+        """Process documents in batches"""
+        for i in tqdm(
+            range(0, len(documents), self.batch_size), desc="Processing documents"
+        ):
+            batch = documents[i : i + self.batch_size]
+            if not self.vs_initialized:
+                # Initialize vector store with first batch
+                self.vector_stores["chroma"] = Chroma.from_documents(
+                    collection_name=self.collection_name,
+                    documents=batch,
+                    embedding=self.embeddings,
+                    persist_directory=self.persist_directory_dir,
+                )
+                self.vs_initialized = True
+            else:
+                # Add subsequent batches
+                self.vector_stores["chroma"].add_documents(batch)
+        self.vector_stores["bm25"] = BM25Retriever.from_documents(documents)
+    def initialize_vector_store(self, documents: List = None):
         """Initialize or load the vector store"""
+        if documents:
+            self._batch_process_documents(documents)
+        else:
+            chroma_vs = Chroma(
+                collection_name=self.collection_name,
+                persist_directory=self.persist_directory_dir,
+                embedding_function=self.embeddings,
             )
+            if documents is None:
+                all_documents = chroma_vs.get(include=["documents"])
+                documents = [
+                    Document(page_content=content, id=doc_id)
+                    for content, doc_id in zip(
+                        all_documents["documents"], all_documents["ids"]
+                    )
+                ]
+            bm25_vs: BM25Retriever = BM25Retriever.from_documents(documents=documents)
+            self.vector_stores["chroma"] = chroma_vs
+            self.vector_stores["bm25"] = bm25_vs
         self.vs_initialized = True
     def create_retriever(self, n_documents: int, bm25_portion: float = 0.4):
         )
         return self.vector_store
+    def _load_text_documents(self) -> List:
+        """*
+        Load and split documents from the specified directory
+        @TODO Move this function to chunking
+        """
+        loader = DirectoryLoader(self.docs_dir, glob="**/*.txt", loader_cls=TextLoader)
+        documents = loader.load()
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
         )
+        return splitter.split_documents(documents)
+    def _load_json_documents(self) -> List:
+        """*
+        Load and split documents from the specified directory
+        @TODO Move this function to chunking
+        """
+        files = glob(os.path.join(self.docs_dir, "*.json"))
+        def load_json_file(file_path):
+            with open(file_path, "r") as f:
+                data = json.load(f)["kwargs"]
+            return Document.model_validate(
+                {**data, "metadata": sanitize_metadata(data["metadata"])}
+            )
+        with ThreadPoolExecutor() as executor:
+            documents = list(
+                tqdm(
+                    executor.map(load_json_file, files),
+                    total=len(files),
+                    desc="Loading JSON documents",
+                )
+            )
+        return documents
+    def load_documents(self) -> List:
+        files = glob(os.path.join(self.docs_dir, "*.json"))
+        if len(files):
+            return self._load_json_documents()
+        return self._load_text_documents()