Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,6 @@ from dotenv import load_dotenv
|
|
7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
8 |
from langchain_community.vectorstores import FAISS
|
9 |
from langchain.prompts import PromptTemplate
|
10 |
-
from langchain.memory import ConversationBufferMemory
|
11 |
-
from langchain.chains import ConversationalRetrievalChain
|
12 |
from langchain.schema import Document
|
13 |
from langchain_together import Together
|
14 |
import streamlit as st
|
@@ -30,12 +28,13 @@ chunked_json_path = "chunked_bns_dataset.json"
|
|
30 |
faiss_db_path = "bns_vector_db"
|
31 |
|
32 |
# Streamlit page config
|
33 |
-
st.set_page_config(page_title="LawGPT - Bharatiya Nyay Sanhita")
|
34 |
|
35 |
-
# Streamlit UI
|
36 |
st.title("Law4Her: Bharatiya Nyay Sanhita")
|
37 |
-
st.markdown("This app provides answers to legal questions based on the
|
38 |
-
st.image("https://res.cloudinary.com/dzzhbgbnp/image/upload/v1736073326/lawforher_logo1_yznqxr.png",
|
|
|
|
|
39 |
|
40 |
# Load CSV and Process Dataset
|
41 |
def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
|
@@ -76,6 +75,7 @@ def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
|
|
76 |
|
77 |
return chunks
|
78 |
|
|
|
79 |
if not os.path.exists(chunked_json_path):
|
80 |
logger.info("Processing CSV to JSON...")
|
81 |
chunks = process_csv_to_chunks(csv_path, chunked_json_path)
|
@@ -94,7 +94,8 @@ embeddings = HuggingFaceEmbeddings(
|
|
94 |
if not os.path.exists(f"{faiss_db_path}/index.faiss"):
|
95 |
logger.info("Creating FAISS vectorstore...")
|
96 |
texts = [
|
97 |
-
Document(page_content=chunk["chunk"],
|
|
|
98 |
for chunk in chunks
|
99 |
]
|
100 |
db = FAISS.from_documents(texts, embeddings)
|
@@ -107,103 +108,79 @@ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
|
|
107 |
|
108 |
# Define Prompt Template
|
109 |
prompt_template = """<s>[INST]
|
110 |
-
You are a legal chatbot specializing in the Bharatiya Nyay Sanhita (Indian Penal Code replacement). Provide
|
111 |
-
|
112 |
-
When providing legal information, always cite the relevant section number(s) explicitly.
|
113 |
|
114 |
CONTEXT: {context}
|
115 |
|
116 |
-
|
117 |
-
{chat_history}
|
118 |
-
|
119 |
-
USER QUERY:
|
120 |
-
{question}
|
121 |
|
122 |
RESPONSE:
|
123 |
</s>[INST]
|
124 |
"""
|
125 |
-
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"
|
126 |
|
127 |
# Initialize Together API
|
128 |
llm = Together(
|
129 |
model="mistralai/Mistral-7B-Instruct-v0.2",
|
130 |
-
temperature=0.
|
131 |
-
max_tokens=
|
132 |
together_api_key=TOGETHER_AI_API,
|
133 |
)
|
134 |
|
135 |
-
# Conversational Retrieval Chain
|
136 |
-
qa_chain = ConversationalRetrievalChain.from_llm(
|
137 |
-
llm=llm,
|
138 |
-
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
|
139 |
-
retriever=retriever,
|
140 |
-
combine_docs_chain_kwargs={"prompt": prompt},
|
141 |
-
)
|
142 |
|
143 |
-
#
|
144 |
-
def
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
if "messages" not in st.session_state:
|
164 |
-
st.session_state.messages = [{"role": "assistant", "content": "Hi
|
165 |
-
st.session_state.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
166 |
|
167 |
-
# Chat
|
168 |
for message in st.session_state.messages:
|
169 |
with st.chat_message(message["role"]):
|
170 |
st.write(message["content"])
|
171 |
|
172 |
-
# User Input
|
173 |
-
user_input
|
174 |
-
|
175 |
-
|
176 |
with st.chat_message("user"):
|
177 |
st.write(user_input)
|
178 |
|
179 |
-
|
180 |
-
|
181 |
with st.chat_message("assistant"):
|
182 |
-
with st.spinner("
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
formatted_context = format_context(retrieved_docs)
|
187 |
-
logger.info(f"Retrieved Context:\n{formatted_context}")
|
188 |
-
|
189 |
-
# Retrieve chat history
|
190 |
-
chat_history = st.session_state.memory.load_memory_variables({}).get("chat_history", "")
|
191 |
-
|
192 |
-
# Log the full prompt
|
193 |
-
log_full_prompt(context=formatted_context, chat_history=chat_history, user_query=user_input)
|
194 |
-
|
195 |
-
# Generate the response
|
196 |
-
response = qa_chain.invoke(
|
197 |
-
input=user_input,
|
198 |
-
context=formatted_context,
|
199 |
-
chat_history=chat_history
|
200 |
-
).get("answer", "The required information is not available.")
|
201 |
-
|
202 |
-
# Append and display the response
|
203 |
-
st.session_state.messages.append({"role": "assistant", "content": response})
|
204 |
-
st.write(response)
|
205 |
-
except Exception as e:
|
206 |
-
st.error(f"Error: {e}")
|
207 |
-
logger.error(f"Error during response generation: {e}")
|
208 |
-
|
209 |
-
st.button("Reset Chat", on_click=reset_conversation)
|
|
|
7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
8 |
from langchain_community.vectorstores import FAISS
|
9 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
10 |
from langchain.schema import Document
|
11 |
from langchain_together import Together
|
12 |
import streamlit as st
|
|
|
28 |
faiss_db_path = "bns_vector_db"
|
29 |
|
30 |
# Streamlit page config
|
31 |
+
st.set_page_config(page_title="LawGPT - Bharatiya Nyay Sanhita", layout="centered")
|
32 |
|
|
|
33 |
st.title("Law4Her: Bharatiya Nyay Sanhita")
|
34 |
+
st.markdown("This app provides answers to legal questions based on the *Bharatiya Nyay Sanhita (BNS)*.")
|
35 |
+
st.image("https://res.cloudinary.com/dzzhbgbnp/image/upload/v1736073326/lawforher_logo1_yznqxr.png",
|
36 |
+
use_container_width=True)
|
37 |
+
|
38 |
|
39 |
# Load CSV and Process Dataset
|
40 |
def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
|
|
|
75 |
|
76 |
return chunks
|
77 |
|
78 |
+
|
79 |
if not os.path.exists(chunked_json_path):
|
80 |
logger.info("Processing CSV to JSON...")
|
81 |
chunks = process_csv_to_chunks(csv_path, chunked_json_path)
|
|
|
94 |
if not os.path.exists(f"{faiss_db_path}/index.faiss"):
|
95 |
logger.info("Creating FAISS vectorstore...")
|
96 |
texts = [
|
97 |
+
Document(page_content=chunk["chunk"],
|
98 |
+
metadata={"chapter": chunk["chapter"], "section_title": chunk["section_title"]})
|
99 |
for chunk in chunks
|
100 |
]
|
101 |
db = FAISS.from_documents(texts, embeddings)
|
|
|
108 |
|
109 |
# Define Prompt Template
|
110 |
prompt_template = """<s>[INST]
|
111 |
+
You are a legal chatbot specializing in the Bharatiya Nyay Sanhita (Indian Penal Code replacement). Provide answers **only based on the provided CONTEXT**.
|
112 |
+
If the requested information is not available in the CONTEXT, respond with: "The required information is not available."
|
|
|
113 |
|
114 |
CONTEXT: {context}
|
115 |
|
116 |
+
USER QUERY: {question}
|
|
|
|
|
|
|
|
|
117 |
|
118 |
RESPONSE:
|
119 |
</s>[INST]
|
120 |
"""
|
121 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
122 |
|
123 |
# Initialize Together API
|
124 |
llm = Together(
|
125 |
model="mistralai/Mistral-7B-Instruct-v0.2",
|
126 |
+
temperature=0.5,
|
127 |
+
max_tokens=1024,
|
128 |
together_api_key=TOGETHER_AI_API,
|
129 |
)
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
+
# Chat Interface
|
133 |
+
def generate_response(user_query):
|
134 |
+
try:
|
135 |
+
# Retrieve relevant documents
|
136 |
+
retrieved_docs = retriever.get_relevant_documents(user_query)
|
137 |
+
|
138 |
+
# Log retrieved documents
|
139 |
+
logger.info(f"User Query: {user_query}")
|
140 |
+
for i, doc in enumerate(retrieved_docs):
|
141 |
+
logger.info(
|
142 |
+
f"Document {i + 1}: Chapter - {doc.metadata['chapter']}, Section - {doc.metadata['section_title']}")
|
143 |
+
logger.info(f"Content: {doc.page_content}")
|
144 |
+
|
145 |
+
# Prepare context for LLM
|
146 |
+
context = "\n\n".join(
|
147 |
+
f"Chapter: {doc.metadata['chapter']}, Section: {doc.metadata['section_title']}\n{doc.page_content}"
|
148 |
+
for doc in retrieved_docs
|
149 |
+
)
|
150 |
+
|
151 |
+
# Construct LLM prompt input
|
152 |
+
prompt_input = {"context": context, "question": user_query}
|
153 |
|
154 |
+
# Generate response using LLM
|
155 |
+
logger.debug(f"Payload sent to LLM: {json.dumps(prompt_input, ensure_ascii=False, indent=2)}")
|
156 |
+
response = llm(prompt.format(**prompt_input))
|
157 |
+
|
158 |
+
return response
|
159 |
+
|
160 |
+
except Exception as e:
|
161 |
+
logger.error(f"Error generating response: {e}")
|
162 |
+
return "An error occurred while generating the response."
|
163 |
+
|
164 |
+
|
165 |
+
# Streamlit Chat Interface
|
166 |
if "messages" not in st.session_state:
|
167 |
+
st.session_state.messages = [{"role": "assistant", "content": "Hi! How can I assist you today?"}]
|
|
|
168 |
|
169 |
+
# Display Chat Messages
|
170 |
for message in st.session_state.messages:
|
171 |
with st.chat_message(message["role"]):
|
172 |
st.write(message["content"])
|
173 |
|
174 |
+
# User Input
|
175 |
+
if user_input := st.chat_input("Type your question here..."):
|
176 |
+
# User message
|
177 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
178 |
with st.chat_message("user"):
|
179 |
st.write(user_input)
|
180 |
|
181 |
+
# Assistant response
|
|
|
182 |
with st.chat_message("assistant"):
|
183 |
+
with st.spinner("Generating response..."):
|
184 |
+
response = generate_response(user_input)
|
185 |
+
st.write(response)
|
186 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|