import pandas as pd
import os
import json
import logging
import warnings
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain_together import Together
import streamlit as st

# Logging setup
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Suppress warnings
warnings.filterwarnings("ignore")

# Load environment variables
load_dotenv()
TOGETHER_AI_API = os.getenv("TOGETHER_AI")

# Dataset paths
csv_path = "bns_dataset.csv"
chunked_json_path = "chunked_bns_dataset.json"
faiss_db_path = "bns_vector_db"

# Streamlit page config
st.set_page_config(page_title="LawGPT - Bharatiya Nyay Sanhita", layout="centered")

st.title("Law4Her: Bharatiya Nyay Sanhita")
st.markdown("This app provides answers to legal questions based on the *Bharatiya Nyay Sanhita (BNS)*.")
st.image("https://res.cloudinary.com/dzzhbgbnp/image/upload/v1736073326/lawforher_logo1_yznqxr.png",
         use_container_width=True)


# Load CSV and Process Dataset
def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
    if not os.path.exists(csv_file):
        st.error(f"CSV file not found: {csv_file}")
        st.stop()

    logger.info(f"Loading CSV file: {csv_file}")
    df = pd.read_csv(csv_file)

    # Ensure required columns are present
    required_columns = {"chapter", "section_title", "section_content"}
    if not required_columns.issubset(df.columns):
        st.error(f"CSV file is missing required columns: {required_columns - set(df.columns)}")
        st.stop()

    logger.info("Creating text chunks...")
    chunks = []
    for _, row in df.iterrows():
        chapter = row.get("chapter", "")
        section_title = row.get("section_title", "")
        section_content = row.get("section_content", "")

        # Split content into chunks
        for i in range(0, len(section_content), chunk_size - overlap):
            chunk = section_content[i:i + chunk_size]
            chunks.append({
                "chapter": chapter,
                "section_title": section_title,
                "chunk": chunk
            })

    # Save chunks to JSON
    logger.info(f"Saving chunks to {output_json}...")
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(chunks, f, indent=4, ensure_ascii=False)
    logger.info("Chunks saved successfully.")

    return chunks


if not os.path.exists(chunked_json_path):
    logger.info("Processing CSV to JSON...")
    chunks = process_csv_to_chunks(csv_path, chunked_json_path)
else:
    logger.info("Loading pre-processed chunks from JSON...")
    with open(chunked_json_path, "r", encoding="utf-8") as f:
        chunks = json.load(f)

# Create FAISS Vectorstore
logger.info("Initializing embeddings and vectorstore...")
embeddings = HuggingFaceEmbeddings(
    model_name="nomic-ai/nomic-embed-text-v1",
    model_kwargs={"trust_remote_code": True, "revision": "289f532e14dbbbd5a04753fa58739e9ba766f3c7"},
)

if not os.path.exists(f"{faiss_db_path}/index.faiss"):
    logger.info("Creating FAISS vectorstore...")
    texts = [
        Document(page_content=chunk["chunk"],
                 metadata={"chapter": chunk["chapter"], "section_title": chunk["section_title"]})
        for chunk in chunks
    ]
    db = FAISS.from_documents(texts, embeddings)
    db.save_local(faiss_db_path)
else:
    logger.info("Loading existing FAISS vectorstore...")
    db = FAISS.load_local(faiss_db_path, embeddings, allow_dangerous_deserialization=True)

retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# Define Prompt Template
prompt_template = """<s>[INST]
You are a legal chatbot specializing in the Bharatiya Nyay Sanhita (Indian Penal Code replacement). Provide answers **only based on the provided CONTEXT**. 
If the requested information is not available in the CONTEXT, respond with: "The required information is not available."

CONTEXT: {context}

USER QUERY: {question}

RESPONSE:
</s>[INST]
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Initialize Together API
llm = Together(
    model="mistralai/Mistral-7B-Instruct-v0.2",
    temperature=0.5,
    max_tokens=1024,
    together_api_key=TOGETHER_AI_API,
)


# Chat Interface
def generate_response(user_query):
    try:
        # Retrieve relevant documents
        retrieved_docs = retriever.get_relevant_documents(user_query)

        # Log retrieved documents
        logger.info(f"User Query: {user_query}")
        for i, doc in enumerate(retrieved_docs):
            logger.info(
                f"Document {i + 1}: Chapter - {doc.metadata['chapter']}, Section - {doc.metadata['section_title']}")
            logger.info(f"Content: {doc.page_content}")

        # Prepare context for LLM
        context = "\n\n".join(
            f"Chapter: {doc.metadata['chapter']}, Section: {doc.metadata['section_title']}\n{doc.page_content}"
            for doc in retrieved_docs
        )

        # Construct LLM prompt input
        prompt_input = {"context": context, "question": user_query}

        # Generate response using LLM
        logger.debug(f"Payload sent to LLM: {json.dumps(prompt_input, ensure_ascii=False, indent=2)}")
        response = llm(prompt.format(**prompt_input))

        return response

    except Exception as e:
        logger.error(f"Error generating response: {e}")
        return "An error occurred while generating the response."


# Streamlit Chat Interface
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "Hi! How can I assist you today?"}]

# Display Chat Messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

# User Input
if user_input := st.chat_input("Type your question here..."):
    # User message
    st.session_state.messages.append({"role": "user", "content": user_input})
    with st.chat_message("user"):
        st.write(user_input)

    # Assistant response
    with st.chat_message("assistant"):
        with st.spinner("Generating response..."):
            response = generate_response(user_input)
            st.write(response)
        st.session_state.messages.append({"role": "assistant", "content": response})