chaithanyashaji commited on
Commit
0b92c24
·
verified ·
1 Parent(s): f99aed4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -81
app.py CHANGED
@@ -7,8 +7,6 @@ from dotenv import load_dotenv
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
  from langchain.prompts import PromptTemplate
10
- from langchain.memory import ConversationBufferMemory
11
- from langchain.chains import ConversationalRetrievalChain
12
  from langchain.schema import Document
13
  from langchain_together import Together
14
  import streamlit as st
@@ -30,12 +28,13 @@ chunked_json_path = "chunked_bns_dataset.json"
30
  faiss_db_path = "bns_vector_db"
31
 
32
  # Streamlit page config
33
- st.set_page_config(page_title="LawGPT - Bharatiya Nyay Sanhita")
34
 
35
- # Streamlit UI
36
  st.title("Law4Her: Bharatiya Nyay Sanhita")
37
- st.markdown("This app provides answers to legal questions based on the **Bharatiya Nyay Sanhita (BNS)**.")
38
- st.image("https://res.cloudinary.com/dzzhbgbnp/image/upload/v1736073326/lawforher_logo1_yznqxr.png", use_container_width=True)
 
 
39
 
40
  # Load CSV and Process Dataset
41
  def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
@@ -76,6 +75,7 @@ def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
76
 
77
  return chunks
78
 
 
79
  if not os.path.exists(chunked_json_path):
80
  logger.info("Processing CSV to JSON...")
81
  chunks = process_csv_to_chunks(csv_path, chunked_json_path)
@@ -94,7 +94,8 @@ embeddings = HuggingFaceEmbeddings(
94
  if not os.path.exists(f"{faiss_db_path}/index.faiss"):
95
  logger.info("Creating FAISS vectorstore...")
96
  texts = [
97
- Document(page_content=chunk["chunk"], metadata={"chapter": chunk["chapter"], "section_title": chunk["section_title"]})
 
98
  for chunk in chunks
99
  ]
100
  db = FAISS.from_documents(texts, embeddings)
@@ -107,103 +108,79 @@ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
107
 
108
  # Define Prompt Template
109
  prompt_template = """<s>[INST]
110
- You are a legal chatbot specializing in the Bharatiya Nyay Sanhita (Indian Penal Code replacement). Provide concise, context-aware answers in a conversational tone. Avoid presenting the response as a question-answer format. If the requested information is not available in the provided dataset, respond with: "The required information is not available."
111
-
112
- When providing legal information, always cite the relevant section number(s) explicitly.
113
 
114
  CONTEXT: {context}
115
 
116
- CHAT HISTORY:
117
- {chat_history}
118
-
119
- USER QUERY:
120
- {question}
121
 
122
  RESPONSE:
123
  </s>[INST]
124
  """
125
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question", "chat_history"])
126
 
127
  # Initialize Together API
128
  llm = Together(
129
  model="mistralai/Mistral-7B-Instruct-v0.2",
130
- temperature=0.3, # Lower temperature for deterministic output
131
- max_tokens=500, # Limit token length
132
  together_api_key=TOGETHER_AI_API,
133
  )
134
 
135
- # Conversational Retrieval Chain
136
- qa_chain = ConversationalRetrievalChain.from_llm(
137
- llm=llm,
138
- memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
139
- retriever=retriever,
140
- combine_docs_chain_kwargs={"prompt": prompt},
141
- )
142
 
143
- # Function to format retrieved documents into context for the prompt
144
- def format_context(retrieved_docs):
145
- context = ""
146
- for doc in retrieved_docs:
147
- context += f"Chapter: {doc.metadata.get('chapter', 'N/A')}\n"
148
- context += f"Section Title: {doc.metadata.get('section_title', 'N/A')}\n"
149
- context += f"Content: {doc.page_content}\n\n"
150
- return context
151
-
152
- # Log the full prompt sent to the LLM
153
- def log_full_prompt(context, chat_history, user_query):
154
- full_prompt = prompt.format(context=context, question=user_query, chat_history=chat_history)
155
- logger.info(f"Full Prompt Sent to LLM:\n{full_prompt}")
156
- return full_prompt
157
-
158
- # Reset Chat
159
- def reset_conversation():
160
- st.session_state.messages = [{"role": "assistant", "content": "Hi, how can I assist you today?"}]
161
- st.session_state.memory.clear()
 
 
162
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  if "messages" not in st.session_state:
164
- st.session_state.messages = [{"role": "assistant", "content": "Hi, how can I assist you today?"}]
165
- st.session_state.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
166
 
167
- # Chat Interface
168
  for message in st.session_state.messages:
169
  with st.chat_message(message["role"]):
170
  st.write(message["content"])
171
 
172
- # User Input Handling
173
- user_input = st.chat_input("Ask a legal question about the Bharatiya Nyay Sanhita")
174
-
175
- if user_input:
176
  with st.chat_message("user"):
177
  st.write(user_input)
178
 
179
- st.session_state.messages.append({"role": "user", "content": user_input})
180
-
181
  with st.chat_message("assistant"):
182
- with st.spinner("Thinking..."):
183
- try:
184
- # Retrieve and log the documents
185
- retrieved_docs = retriever.get_relevant_documents(user_input)
186
- formatted_context = format_context(retrieved_docs)
187
- logger.info(f"Retrieved Context:\n{formatted_context}")
188
-
189
- # Retrieve chat history
190
- chat_history = st.session_state.memory.load_memory_variables({}).get("chat_history", "")
191
-
192
- # Log the full prompt
193
- log_full_prompt(context=formatted_context, chat_history=chat_history, user_query=user_input)
194
-
195
- # Generate the response
196
- response = qa_chain.invoke(
197
- input=user_input,
198
- context=formatted_context,
199
- chat_history=chat_history
200
- ).get("answer", "The required information is not available.")
201
-
202
- # Append and display the response
203
- st.session_state.messages.append({"role": "assistant", "content": response})
204
- st.write(response)
205
- except Exception as e:
206
- st.error(f"Error: {e}")
207
- logger.error(f"Error during response generation: {e}")
208
-
209
- st.button("Reset Chat", on_click=reset_conversation)
 
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
  from langchain.prompts import PromptTemplate
 
 
10
  from langchain.schema import Document
11
  from langchain_together import Together
12
  import streamlit as st
 
28
  faiss_db_path = "bns_vector_db"
29
 
30
  # Streamlit page config
31
+ st.set_page_config(page_title="LawGPT - Bharatiya Nyay Sanhita", layout="centered")
32
 
 
33
  st.title("Law4Her: Bharatiya Nyay Sanhita")
34
+ st.markdown("This app provides answers to legal questions based on the *Bharatiya Nyay Sanhita (BNS)*.")
35
+ st.image("https://res.cloudinary.com/dzzhbgbnp/image/upload/v1736073326/lawforher_logo1_yznqxr.png",
36
+ use_container_width=True)
37
+
38
 
39
  # Load CSV and Process Dataset
40
  def process_csv_to_chunks(csv_file, output_json, chunk_size=512, overlap=100):
 
75
 
76
  return chunks
77
 
78
+
79
  if not os.path.exists(chunked_json_path):
80
  logger.info("Processing CSV to JSON...")
81
  chunks = process_csv_to_chunks(csv_path, chunked_json_path)
 
94
  if not os.path.exists(f"{faiss_db_path}/index.faiss"):
95
  logger.info("Creating FAISS vectorstore...")
96
  texts = [
97
+ Document(page_content=chunk["chunk"],
98
+ metadata={"chapter": chunk["chapter"], "section_title": chunk["section_title"]})
99
  for chunk in chunks
100
  ]
101
  db = FAISS.from_documents(texts, embeddings)
 
108
 
109
  # Define Prompt Template
110
  prompt_template = """<s>[INST]
111
+ You are a legal chatbot specializing in the Bharatiya Nyay Sanhita (Indian Penal Code replacement). Provide answers **only based on the provided CONTEXT**.
112
+ If the requested information is not available in the CONTEXT, respond with: "The required information is not available."
 
113
 
114
  CONTEXT: {context}
115
 
116
+ USER QUERY: {question}
 
 
 
 
117
 
118
  RESPONSE:
119
  </s>[INST]
120
  """
121
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
122
 
123
  # Initialize Together API
124
  llm = Together(
125
  model="mistralai/Mistral-7B-Instruct-v0.2",
126
+ temperature=0.5,
127
+ max_tokens=1024,
128
  together_api_key=TOGETHER_AI_API,
129
  )
130
 
 
 
 
 
 
 
 
131
 
132
+ # Chat Interface
133
+ def generate_response(user_query):
134
+ try:
135
+ # Retrieve relevant documents
136
+ retrieved_docs = retriever.get_relevant_documents(user_query)
137
+
138
+ # Log retrieved documents
139
+ logger.info(f"User Query: {user_query}")
140
+ for i, doc in enumerate(retrieved_docs):
141
+ logger.info(
142
+ f"Document {i + 1}: Chapter - {doc.metadata['chapter']}, Section - {doc.metadata['section_title']}")
143
+ logger.info(f"Content: {doc.page_content}")
144
+
145
+ # Prepare context for LLM
146
+ context = "\n\n".join(
147
+ f"Chapter: {doc.metadata['chapter']}, Section: {doc.metadata['section_title']}\n{doc.page_content}"
148
+ for doc in retrieved_docs
149
+ )
150
+
151
+ # Construct LLM prompt input
152
+ prompt_input = {"context": context, "question": user_query}
153
 
154
+ # Generate response using LLM
155
+ logger.debug(f"Payload sent to LLM: {json.dumps(prompt_input, ensure_ascii=False, indent=2)}")
156
+ response = llm(prompt.format(**prompt_input))
157
+
158
+ return response
159
+
160
+ except Exception as e:
161
+ logger.error(f"Error generating response: {e}")
162
+ return "An error occurred while generating the response."
163
+
164
+
165
+ # Streamlit Chat Interface
166
  if "messages" not in st.session_state:
167
+ st.session_state.messages = [{"role": "assistant", "content": "Hi! How can I assist you today?"}]
 
168
 
169
+ # Display Chat Messages
170
  for message in st.session_state.messages:
171
  with st.chat_message(message["role"]):
172
  st.write(message["content"])
173
 
174
+ # User Input
175
+ if user_input := st.chat_input("Type your question here..."):
176
+ # User message
177
+ st.session_state.messages.append({"role": "user", "content": user_input})
178
  with st.chat_message("user"):
179
  st.write(user_input)
180
 
181
+ # Assistant response
 
182
  with st.chat_message("assistant"):
183
+ with st.spinner("Generating response..."):
184
+ response = generate_response(user_input)
185
+ st.write(response)
186
+ st.session_state.messages.append({"role": "assistant", "content": response})