Spaces:
Sleeping
Sleeping
import os | |
os.environ['TOKENIZERS_PARALLELISM'] = 'true' | |
from dotenv import load_dotenv | |
load_dotenv() # load .env api keys | |
mistral_api_key = os.getenv("MISTRAL_API_KEY") | |
print("mistral_api_key", mistral_api_key) | |
import pandas as pd | |
from langchain.output_parsers import PandasDataFrameOutputParser | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_mistralai import MistralAIEmbeddings | |
from langchain import hub | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
from typing import Literal | |
from langchain_core.prompts import PromptTemplate | |
from langchain_mistralai import ChatMistralAI | |
from pathlib import Path | |
from langchain.retrievers import ( | |
MergerRetriever, | |
) | |
import pprint | |
from typing import Any, Dict | |
from huggingface_hub import login | |
login(token=os.getenv("HUGGING_FACE_TOKEN")) | |
def load_chunk_persist_pdf(task) -> Chroma: | |
pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}")) | |
documents = [] | |
for file in os.listdir(pdf_folder_path): | |
if file.endswith('.pdf'): | |
pdf_path = os.path.join(pdf_folder_path, file) | |
loader = PyPDFLoader(pdf_path) | |
documents.extend(loader.load()) | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
chunked_documents = text_splitter.split_documents(documents) | |
os.makedirs("data/chroma_store/", exist_ok=True) | |
vectorstore = Chroma.from_documents( | |
documents=chunked_documents, | |
embedding=MistralAIEmbeddings(), | |
persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/")) | |
) | |
vectorstore.persist() | |
return vectorstore | |
df = pd.DataFrame( | |
{ | |
"exercise": ["Squat","Bench Press","Lunges","Pull ups"], | |
"sets": [4, 4, 3, 3], | |
"repetitions": [10, 8, 8, 8], | |
"rest":["2:30","2:00","1:30","2:00"] | |
} | |
) | |
# parser = PandasDataFrameOutputParser(dataframe=df) | |
# personal_info_vectorstore = load_chunk_persist_pdf("personal_info") | |
# zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero") | |
# bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight") | |
# nutrition_vectorstore = load_chunk_persist_pdf("nutrition") | |
# workout_vectorstore = load_chunk_persist_pdf("workout") | |
# zero2hero_retriever = zero2hero_vectorstore.as_retriever() | |
# nutrition_retriever = nutrition_vectorstore.as_retriever() | |
# bodyweight_retriever = bodyweight_vectorstore.as_retriever() | |
# workout_retriever = workout_vectorstore.as_retriever() | |
# personal_info_retriever = personal_info_vectorstore.as_retriever() | |
llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0) | |
# prompt = PromptTemplate( | |
# template=""" | |
# You are a professional AI coach specialized in building fitness plans, full workout programs. | |
# You must adapt to the user according to personal informations in the context. A You are gentle and motivative. | |
# Use the following pieces of retrieved context to answer the user's query. | |
# Context: {context} | |
# \n{format_instructions}\n{question}\n | |
# """, | |
# input_variables=["question","context"], | |
# partial_variables={"format_instructions": parser.get_format_instructions()}, | |
# ) | |
# def format_docs(docs): | |
# return "\n\n".join(doc.page_content for doc in docs) | |
# def format_parser_output(parser_output: Dict[str, Any]) -> None: | |
# for key in parser_output.keys(): | |
# parser_output[key] = parser_output[key].to_dict() | |
# return pprint.PrettyPrinter(width=4, compact=True).pprint(parser_output) | |
# retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever, personal_info_retriever]) | |
# chain = ( | |
# {"context": zero2hero_retriever | format_docs, "question": RunnablePassthrough()} | |
# | prompt | |
# | llm | |
# | parser | |
# ) | |
# # chain = prompt | llm | parser | |
# format_parser_output(chain.invoke("Build me a full body workout plan for summer body.")) | |
from pydantic import BaseModel, Field | |
from typing import List | |
from langchain_core.output_parsers import JsonOutputParser | |
class Exercise(BaseModel): | |
exercice: str = Field(description="Name of the exercise") | |
nombre_series: int = Field(description="Number of sets for the exercise") | |
nombre_repetitions: int = Field(description="Number of repetitions for the exercise") | |
temps_repos: str = Field(description="Rest time between sets") | |
class MusculationProgram(BaseModel): | |
exercises: List[Exercise] | |
from langchain.prompts import PromptTemplate | |
# Define your query to get a musculation program. | |
musculation_query = "Provide a musculation program with exercises, number of sets, number of repetitions, and rest time between sets." | |
# Set up a parser + inject instructions into the prompt template. | |
parser = JsonOutputParser(pydantic_object=MusculationProgram) | |
prompt = PromptTemplate( | |
template="Answer the user query.\n{format_instructions}\n{query}\n", | |
input_variables=["query"], | |
partial_variables={"format_instructions": parser.get_format_instructions()}, | |
) | |
# Set up a chain to invoke the language model with the prompt and parser. | |
workout_chain = prompt | llm | parser | |