Shreyas094 commited on
Commit
8962e02
·
verified ·
1 Parent(s): 824af1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -58
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import requests
2
  import gradio as gr
3
- from bs4 import BeautifulSoup
4
  import logging
5
  from urllib.parse import urlparse
6
  from requests.adapters import HTTPAdapter
@@ -38,10 +37,11 @@ from collections import Counter
38
  import numpy as np
39
  from typing import List, Dict, Tuple
40
  import datetime
41
- CURRENT_YEAR = datetime.datetime.now().year
 
42
 
43
  # Automatically get the current year
44
- current_year = datetime.datetime.now().year
45
 
46
  # Load environment variables from a .env file
47
  load_dotenv()
@@ -51,8 +51,42 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
51
  logger = logging.getLogger(__name__)
52
 
53
  # SearXNG instance details
54
- SEARXNG_URL = 'https://shreyas094-searxng-hf.hf.space/search'
55
- SEARXNG_KEY = 'f9f07f93b37b8483aadb5ba717f556f3a4ac507b281b4ca01e6c6288aa3e3ae5'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Use the environment variable
58
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -75,7 +109,89 @@ mistral_client = Mistral(api_key=MISTRAL_API_KEY)
75
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
76
 
77
 
78
- def determine_query_type(query: str, chat_history: str, llm_client) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  system_prompt = """You are Sentinel, an intelligent AI agent tasked with determining whether a user query requires a web search or can be answered using your existing knowledge base. Your knowledge cutoff date is 2023, and the current year is 2024. Your task is to analyze the query and decide on the appropriate action.
80
 
81
  Instructions for Sentinel:
@@ -120,18 +236,18 @@ def determine_query_type(query: str, chat_history: str, llm_client) -> str:
120
  ]
121
 
122
  try:
123
- response = llm_client.chat_completion(
124
  messages=messages,
125
  max_tokens=10,
126
  temperature=0.2
127
  )
128
- decision = response.choices[0].message.content.strip().lower()
129
  return "web_search" if decision == "web_search" else "knowledge_base"
130
  except Exception as e:
131
  logger.error(f"Error determining query type: {e}")
132
  return "web_search" # Default to web search if there's an error
133
 
134
- def generate_ai_response(query: str, chat_history: str, llm_client, model: str) -> str:
135
  system_prompt = """You are a helpful AI assistant. Provide a concise and informative response to the user's query based on your existing knowledge. Do not make up information or claim to have real-time data."""
136
 
137
  user_prompt = f"""
@@ -149,29 +265,12 @@ def generate_ai_response(query: str, chat_history: str, llm_client, model: str)
149
  ]
150
 
151
  try:
152
- if model == "groq":
153
- response = groq_client.chat.completions.create(
154
- messages=messages,
155
- model="llama-3.1-70b-versatile",
156
- max_tokens=500,
157
- temperature=0.7
158
- )
159
- return response.choices[0].message.content.strip()
160
- elif model == "mistral":
161
- response = mistral_client.chat.complete(
162
- model="open-mistral-nemo",
163
- messages=messages,
164
- max_tokens=500,
165
- temperature=0.7
166
- )
167
- return response.choices[0].message.content.strip()
168
- else: # huggingface
169
- response = llm_client.chat_completion(
170
- messages=messages,
171
- max_tokens=500,
172
- temperature=0.7
173
- )
174
- return response.choices[0].message.content.strip()
175
  except Exception as e:
176
  logger.error(f"Error generating AI response: {e}")
177
  return "I apologize, but I'm having trouble generating a response at the moment. Please try again later."
@@ -282,8 +381,8 @@ def rephrase_query(chat_history, query, temperature=0.2):
282
  - First, check if the query contains words indicating current information (e.g., "today", "now", "current", "latest"):
283
  - If present, do NOT add any date operators to the query
284
  - Otherwise, if the query mentions a specific time period (e.g., a quarter, year, or date range):
285
- - Add appropriate "after:" operators to the end of the rephrased query.
286
- - Use the format "after:YYYY" for date ranges.
287
  - If no specific time period is mentioned and no current-time indicators are present:
288
  - Append "after: {CURRENT_YEAR}" to the end of the rephrased query.
289
  - Do not use quotes or the "+" operator when adding dates.
@@ -300,16 +399,20 @@ def rephrase_query(chat_history, query, temperature=0.2):
300
 
301
  **Scenario 2: New Topic with Specific Quarter**
302
  - **User Query**: "How did Bank of America perform during Q2 2024?"
303
- - **Rephrased Query**: "How did \"Bank of America\" perform during Q2 2024 after:2024"
304
 
305
  **Scenario 3: Continuation with Date Range**
306
  - **Previous Query**: "What were Apple's sales figures for 2023?"
307
  - **User Query**: "How about for the first half of 2024?"
308
- - **Rephrased Query**: "How about \"Apple\"'s sales figures for the first half of 2024 after:2024"
309
 
310
  **Scenario 4: Current Status Query**
311
  - **User Query**: "What is the current market share of Toyota and Honda in the US?"
312
  - **Rephrased Query**: "What is the current market share of \"Toyota\" and \"Honda\" in the \"US\""
 
 
 
 
313
  """
314
 
315
  # Create the user prompt with the chat history and current query
@@ -527,7 +630,7 @@ def is_content_unique(new_content, existing_contents, similarity_threshold=0.8):
527
  return True
528
 
529
  def assess_relevance_and_summarize(llm_client, query, document, temperature=0.2):
530
- system_prompt = """You are a world-class AI assistant specializing in financial news analysis. Your task is to assess the relevance of a given document to a user's query and provide a detailed summary if it's relevant."""
531
 
532
  user_prompt = f"""
533
  Query: {query}
@@ -542,7 +645,7 @@ Instructions:
542
  - Key facts and figures
543
  - Dates of events or announcements
544
  - Names of important entities mentioned
545
- - Any financial metrics or changes reported
546
  - The potential impact or significance of the news
547
  3. If not relevant, simply state "Not relevant".
548
 
@@ -550,7 +653,7 @@ Your response should be in the following format:
550
  Relevant: [Yes/No]
551
  Summary: [Your detailed summary if relevant, or "Not relevant" if not]
552
 
553
- Remember to focus on financial aspects and implications in your assessment and summary. Aim to make the summary distinctive, highlighting what makes this particular news item unique compared to similar news.
554
  """
555
 
556
  messages = [
@@ -596,7 +699,7 @@ def scrape_full_content(url, max_chars=3000, timeout=5, use_pydf2=True):
596
  return ""
597
 
598
  def llm_summarize(json_input, model, temperature=0.2):
599
- system_prompt = """You are Sentinel, a world-class Financial analysis AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
600
  user_prompt = f"""
601
  Please provide a comprehensive summary based on the following JSON input:
602
  {json_input}
@@ -654,8 +757,23 @@ Instructions:
654
  logger.error(f"Error in LLM summarization: {e}")
655
  return "Error: Unable to generate a summary. Please try again."
656
 
657
- def search_and_scrape(query, chat_history, num_results=5, max_chars=3000, time_range="", language="all", category="",
658
- engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5, model="huggingface", use_pydf2=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
  try:
660
  # Step 1: Rephrase the Query
661
  rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
@@ -801,16 +919,16 @@ def search_and_scrape(query, chat_history, num_results=5, max_chars=3000, time_r
801
 
802
  if not relevant_documents:
803
  logger.warning("No relevant and unique documents found.")
804
- return "No relevant and unique financial news found for the given query."
805
 
806
  # Step 5: Rerank documents based on similarity to query and prioritize entity domain
807
  reranked_docs = rerank_documents_with_priority(rephrased_query, relevant_documents, entity_domain, similarity_threshold=0.95, max_results=num_results)
808
 
809
  if not reranked_docs:
810
  logger.warning("No documents remained after reranking.")
811
- return "No relevant financial news found after filtering and ranking."
812
 
813
- logger.info(f"Reranked and filtered to top {len(reranked_docs)} unique, finance-related documents.")
814
 
815
  # Step 5: Scrape full content for top documents (up to num_results)
816
  for doc in reranked_docs[:num_results]:
@@ -839,19 +957,39 @@ def search_and_scrape(query, chat_history, num_results=5, max_chars=3000, time_r
839
  logger.error(f"Unexpected error in search_and_scrape: {e}")
840
  return f"An unexpected error occurred during the search and scrape process: {e}"
841
 
842
- def chat_function(message: str, history: List[Tuple[str, str]], num_results: int, max_chars: int, time_range: str, language: str, category: str, engines: List[str], safesearch: int, method: str, llm_temperature: float, model: str, use_pydf2: bool):
 
 
 
 
 
 
 
 
 
 
 
 
 
843
  chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
844
 
845
- query_type = determine_query_type(message, chat_history, client)
 
 
 
 
 
 
846
 
847
  if query_type == "knowledge_base":
848
- response = generate_ai_response(message, chat_history, client, model)
849
  else: # web_search
850
  gr.Info("Initiating Web Search")
851
  yield "Request you to sit back and relax until I scrape the web for up-to-date information"
852
  response = search_and_scrape(
853
  query=message,
854
  chat_history=chat_history,
 
855
  num_results=num_results,
856
  max_chars=max_chars,
857
  time_range=time_range,
@@ -870,15 +1008,16 @@ def chat_function(message: str, history: List[Tuple[str, str]], num_results: int
870
 
871
  iface = gr.ChatInterface(
872
  chat_function,
873
- title="Web Scraper for Financial News with Sentinel AI",
874
  description="Ask Sentinel any question. It will search the web for recent information or use its knowledge base as appropriate.",
875
  theme=gr.Theme.from_hub("allenai/gradio-theme"),
876
  additional_inputs=[
877
- gr.Slider(5, 20, value=10, step=1, label="Number of initial results"),
 
878
  gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
879
- gr.Dropdown(["", "day", "week", "month", "year"], value="", label="Time Range"),
880
- gr.Dropdown(["", "all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="", label="Language"),
881
- gr.Dropdown(["", "general", "news", "images", "videos", "music", "files", "it", "science", "social media"], value="", label="Category"),
882
  gr.Dropdown(
883
  ["google", "bing", "duckduckgo", "baidu", "yahoo", "qwant", "startpage"],
884
  multiselect=True,
@@ -886,10 +1025,10 @@ iface = gr.ChatInterface(
886
  label="Engines"
887
  ),
888
  gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
889
- gr.Radio(["GET", "POST"], value="POST", label="HTTP Method"),
890
  gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
891
- gr.Dropdown(["huggingface", "groq", "mistral"], value="mistral", label="LLM Model"),
892
- gr.Checkbox(label="Use PyPDF2 for PDF scraping", value=False),
893
  ],
894
  additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
895
  retry_btn="Retry",
@@ -904,5 +1043,31 @@ iface = gr.ChatInterface(
904
  )
905
 
906
  if __name__ == "__main__":
907
- logger.info("Starting the SearXNG Scraper for Financial News using ChatInterface with Advanced Parameters")
908
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  import gradio as gr
 
3
  import logging
4
  from urllib.parse import urlparse
5
  from requests.adapters import HTTPAdapter
 
37
  import numpy as np
38
  from typing import List, Dict, Tuple
39
  import datetime
40
+ from abc import ABC, abstractmethod
41
+ from typing import List, Dict, Any
42
 
43
  # Automatically get the current year
44
+ CURRENT_YEAR = datetime.datetime.now().year
45
 
46
  # Load environment variables from a .env file
47
  load_dotenv()
 
51
  logger = logging.getLogger(__name__)
52
 
53
  # SearXNG instance details
54
+ SEARXNG_URL = os.getenv("SEARXNG_URL")
55
+ SEARXNG_KEY = os.getenv("SEARXNG_KEY")
56
+
57
+
58
+ logger.info(f"SearXNG URL: {SEARXNG_URL}")
59
+ logger.info(f"SearXNG Key: {SEARXNG_KEY}")
60
+
61
+
62
+ # ... other environment variables ...
63
+ CUSTOM_LLM = os.getenv("CUSTOM_LLM")
64
+ CUSTOM_LLM_DEFAULT_MODEL = os.getenv("CUSTOM_LLM_DEFAULT_MODEL")
65
+
66
+ logger.info(f"CUSTOM_LLM: {CUSTOM_LLM}")
67
+ logger.info(f"CUSTOM_LLM_DEFAULT_MODEL: {CUSTOM_LLM_DEFAULT_MODEL}")
68
+
69
+ # Define the fetch_custom_models function here
70
+ def fetch_custom_models():
71
+ if not CUSTOM_LLM:
72
+ return []
73
+ try:
74
+ response = requests.get(f"{CUSTOM_LLM}/v1/models")
75
+ response.raise_for_status()
76
+ models = response.json().get("data", [])
77
+ return [model["id"] for model in models]
78
+ except Exception as e:
79
+ logger.error(f"Error fetching custom models: {e}")
80
+ return []
81
+
82
+ # Fetch custom models and determine the default model
83
+ custom_models = fetch_custom_models()
84
+ all_models = ["huggingface", "groq", "mistral"] + custom_models
85
+
86
+ # Determine the default model
87
+ default_model = CUSTOM_LLM_DEFAULT_MODEL if CUSTOM_LLM_DEFAULT_MODEL in all_models else "groq"
88
+
89
+ logger.info(f"Default model selected: {default_model}")
90
 
91
  # Use the environment variable
92
  HF_TOKEN = os.getenv("HF_TOKEN")
 
109
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
110
 
111
 
112
+
113
+ # Step 1: Create a base class for AI models
114
+ class AIModel(ABC):
115
+ @abstractmethod
116
+ def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
117
+ pass
118
+
119
+ # Step 2: Implement specific classes for each AI model
120
+ class HuggingFaceModel(AIModel):
121
+ def __init__(self, client):
122
+ self.client = client
123
+
124
+ def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
125
+ response = self.client.chat_completion(
126
+ messages=messages,
127
+ max_tokens=max_tokens,
128
+ temperature=temperature
129
+ )
130
+ return response.choices[0].message.content.strip()
131
+
132
+ class GroqModel(AIModel):
133
+ def __init__(self, client):
134
+ self.client = client
135
+
136
+ def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
137
+ response = self.client.chat.completions.create(
138
+ messages=messages,
139
+ model="llama-3.1-70b-versatile",
140
+ max_tokens=max_tokens,
141
+ temperature=temperature
142
+ )
143
+ return response.choices[0].message.content.strip()
144
+
145
+ class MistralModel(AIModel):
146
+ def __init__(self, client):
147
+ self.client = client
148
+
149
+ def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
150
+ response = self.client.chat.complete(
151
+ model="open-mistral-nemo",
152
+ messages=messages,
153
+ max_tokens=max_tokens,
154
+ temperature=temperature
155
+ )
156
+ return response.choices[0].message.content.strip()
157
+
158
+ # Step 3: Use a factory pattern to create model instances
159
+ class CustomModel(AIModel):
160
+ def __init__(self, model_name):
161
+ self.model_name = model_name
162
+
163
+ def generate_response(self, messages: List[Dict[str, str]], max_tokens: int, temperature: float) -> str:
164
+ try:
165
+ response = requests.post(
166
+ f"{CUSTOM_LLM}/v1/chat/completions",
167
+ json={
168
+ "model": self.model_name,
169
+ "messages": messages,
170
+ "max_tokens": max_tokens,
171
+ "temperature": temperature
172
+ }
173
+ )
174
+ response.raise_for_status()
175
+ return response.json()["choices"][0]["message"]["content"].strip()
176
+ except Exception as e:
177
+ logger.error(f"Error generating response from custom model: {e}")
178
+ return "Error: Unable to generate response from custom model."
179
+
180
+ class AIModelFactory:
181
+ @staticmethod
182
+ def create_model(model_name: str, client: Any = None) -> AIModel:
183
+ if model_name == "huggingface":
184
+ return HuggingFaceModel(client)
185
+ elif model_name == "groq":
186
+ return GroqModel(client)
187
+ elif model_name == "mistral":
188
+ return MistralModel(client)
189
+ elif CUSTOM_LLM and model_name in fetch_custom_models():
190
+ return CustomModel(model_name)
191
+ else:
192
+ raise ValueError(f"Unsupported model: {model_name}")
193
+
194
+ def determine_query_type(query: str, chat_history: str, ai_model: AIModel) -> str:
195
  system_prompt = """You are Sentinel, an intelligent AI agent tasked with determining whether a user query requires a web search or can be answered using your existing knowledge base. Your knowledge cutoff date is 2023, and the current year is 2024. Your task is to analyze the query and decide on the appropriate action.
196
 
197
  Instructions for Sentinel:
 
236
  ]
237
 
238
  try:
239
+ response = ai_model.generate_response(
240
  messages=messages,
241
  max_tokens=10,
242
  temperature=0.2
243
  )
244
+ decision = response.strip().lower()
245
  return "web_search" if decision == "web_search" else "knowledge_base"
246
  except Exception as e:
247
  logger.error(f"Error determining query type: {e}")
248
  return "web_search" # Default to web search if there's an error
249
 
250
+ def generate_ai_response(query: str, chat_history: str, ai_model: AIModel, temperature: float) -> str:
251
  system_prompt = """You are a helpful AI assistant. Provide a concise and informative response to the user's query based on your existing knowledge. Do not make up information or claim to have real-time data."""
252
 
253
  user_prompt = f"""
 
265
  ]
266
 
267
  try:
268
+ response = ai_model.generate_response(
269
+ messages=messages,
270
+ max_tokens=500,
271
+ temperature=temperature
272
+ )
273
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  except Exception as e:
275
  logger.error(f"Error generating AI response: {e}")
276
  return "I apologize, but I'm having trouble generating a response at the moment. Please try again later."
 
381
  - First, check if the query contains words indicating current information (e.g., "today", "now", "current", "latest"):
382
  - If present, do NOT add any date operators to the query
383
  - Otherwise, if the query mentions a specific time period (e.g., a quarter, year, or date range):
384
+ - Add appropriate "after: " operators to the end of the rephrased query.
385
+ - Use the format "after: YYYY" for date ranges.
386
  - If no specific time period is mentioned and no current-time indicators are present:
387
  - Append "after: {CURRENT_YEAR}" to the end of the rephrased query.
388
  - Do not use quotes or the "+" operator when adding dates.
 
399
 
400
  **Scenario 2: New Topic with Specific Quarter**
401
  - **User Query**: "How did Bank of America perform during Q2 2024?"
402
+ - **Rephrased Query**: "How did \"Bank of America\" perform during Q2 2024 after: 2024"
403
 
404
  **Scenario 3: Continuation with Date Range**
405
  - **Previous Query**: "What were Apple's sales figures for 2023?"
406
  - **User Query**: "How about for the first half of 2024?"
407
+ - **Rephrased Query**: "How about \"Apple\"'s sales figures for the first half of 2024 after: 2024"
408
 
409
  **Scenario 4: Current Status Query**
410
  - **User Query**: "What is the current market share of Toyota and Honda in the US?"
411
  - **Rephrased Query**: "What is the current market share of \"Toyota\" and \"Honda\" in the \"US\""
412
+
413
+ **Scenario 5: Current Status Query**
414
+ - **User Query**: "Bank of America Q2 2024 earnings?"
415
+ - **Rephrased Query**: "\"Bank of America\" Q2 2024 earnings after: 2024""
416
  """
417
 
418
  # Create the user prompt with the chat history and current query
 
630
  return True
631
 
632
  def assess_relevance_and_summarize(llm_client, query, document, temperature=0.2):
633
+ system_prompt = """You are a world-class AI assistant specializing in news analysis. Your task is to assess the relevance of a given document to a user's query and provide a detailed summary if it's relevant."""
634
 
635
  user_prompt = f"""
636
  Query: {query}
 
645
  - Key facts and figures
646
  - Dates of events or announcements
647
  - Names of important entities mentioned
648
+ - Any metrics or changes reported
649
  - The potential impact or significance of the news
650
  3. If not relevant, simply state "Not relevant".
651
 
 
653
  Relevant: [Yes/No]
654
  Summary: [Your detailed summary if relevant, or "Not relevant" if not]
655
 
656
+ Remember to focus on key aspects and implications in your assessment and summary. Aim to make the summary distinctive, highlighting what makes this particular news item unique compared to similar news.
657
  """
658
 
659
  messages = [
 
699
  return ""
700
 
701
  def llm_summarize(json_input, model, temperature=0.2):
702
+ system_prompt = """You are Sentinel, a world-class AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
703
  user_prompt = f"""
704
  Please provide a comprehensive summary based on the following JSON input:
705
  {json_input}
 
757
  logger.error(f"Error in LLM summarization: {e}")
758
  return "Error: Unable to generate a summary. Please try again."
759
 
760
+ def search_and_scrape(
761
+ query: str,
762
+ chat_history: str,
763
+ ai_model: AIModel,
764
+ num_results: int = 10,
765
+ max_chars: int = 1500,
766
+ time_range: str = "",
767
+ language: str = "en",
768
+ category: str = "general",
769
+ engines: List[str] = [],
770
+ safesearch: int = 2,
771
+ method: str = "GET",
772
+ llm_temperature: float = 0.2,
773
+ timeout: int = 5,
774
+ model: str = "huggingface",
775
+ use_pydf2: bool = True
776
+ ):
777
  try:
778
  # Step 1: Rephrase the Query
779
  rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
 
919
 
920
  if not relevant_documents:
921
  logger.warning("No relevant and unique documents found.")
922
+ return "No relevant and unique news found for the given query."
923
 
924
  # Step 5: Rerank documents based on similarity to query and prioritize entity domain
925
  reranked_docs = rerank_documents_with_priority(rephrased_query, relevant_documents, entity_domain, similarity_threshold=0.95, max_results=num_results)
926
 
927
  if not reranked_docs:
928
  logger.warning("No documents remained after reranking.")
929
+ return "No relevant news found after filtering and ranking."
930
 
931
+ logger.info(f"Reranked and filtered to top {len(reranked_docs)} unique, related documents.")
932
 
933
  # Step 5: Scrape full content for top documents (up to num_results)
934
  for doc in reranked_docs[:num_results]:
 
957
  logger.error(f"Unexpected error in search_and_scrape: {e}")
958
  return f"An unexpected error occurred during the search and scrape process: {e}"
959
 
960
+ # Helper function to get the appropriate client for each model
961
+ def get_client_for_model(model: str) -> Any:
962
+ if model == "huggingface":
963
+ return InferenceClient("mistralai/Mistral-Small-Instruct-2409", token=HF_TOKEN)
964
+ elif model == "groq":
965
+ return Groq(api_key=GROQ_API_KEY)
966
+ elif model == "mistral":
967
+ return Mistral(api_key=MISTRAL_API_KEY)
968
+ elif CUSTOM_LLM and (model in fetch_custom_models() or model == CUSTOM_LLM_DEFAULT_MODEL):
969
+ return None # CustomModel doesn't need a client
970
+ else:
971
+ raise ValueError(f"Unsupported model: {model}")
972
+
973
+ def chat_function(message: str, history: List[Tuple[str, str]], only_web_search: bool, num_results: int, max_chars: int, time_range: str, language: str, category: str, engines: List[str], safesearch: int, method: str, llm_temperature: float, model: str, use_pydf2: bool):
974
  chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
975
 
976
+ # Create the appropriate AI model
977
+ ai_model = AIModelFactory.create_model(model, get_client_for_model(model))
978
+
979
+ if only_web_search:
980
+ query_type = "web_search"
981
+ else:
982
+ query_type = determine_query_type(message, chat_history, ai_model)
983
 
984
  if query_type == "knowledge_base":
985
+ response = generate_ai_response(message, chat_history, ai_model, llm_temperature)
986
  else: # web_search
987
  gr.Info("Initiating Web Search")
988
  yield "Request you to sit back and relax until I scrape the web for up-to-date information"
989
  response = search_and_scrape(
990
  query=message,
991
  chat_history=chat_history,
992
+ ai_model=ai_model,
993
  num_results=num_results,
994
  max_chars=max_chars,
995
  time_range=time_range,
 
1008
 
1009
  iface = gr.ChatInterface(
1010
  chat_function,
1011
+ title="Web Scraper for News with Sentinel AI",
1012
  description="Ask Sentinel any question. It will search the web for recent information or use its knowledge base as appropriate.",
1013
  theme=gr.Theme.from_hub("allenai/gradio-theme"),
1014
  additional_inputs=[
1015
+ gr.Checkbox(label="Only do web search", value=True), # Add this line
1016
+ gr.Slider(5, 20, value=3, step=1, label="Number of initial results"),
1017
  gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
1018
+ gr.Dropdown(["", "day", "week", "month", "year"], value="week", label="Time Range"),
1019
+ gr.Dropdown(["", "all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="en", label="Language"),
1020
+ gr.Dropdown(["", "general", "news", "images", "videos", "music", "files", "it", "science", "social media"], value="general", label="Category"),
1021
  gr.Dropdown(
1022
  ["google", "bing", "duckduckgo", "baidu", "yahoo", "qwant", "startpage"],
1023
  multiselect=True,
 
1025
  label="Engines"
1026
  ),
1027
  gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
1028
+ gr.Radio(["GET", "POST"], value="GET", label="HTTP Method"),
1029
  gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
1030
+ gr.Dropdown(all_models, value=default_model, label="LLM Model"),
1031
+ gr.Checkbox(label="Use PyPDF2 for PDF scraping", value=True),
1032
  ],
1033
  additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
1034
  retry_btn="Retry",
 
1043
  )
1044
 
1045
  if __name__ == "__main__":
1046
+ logger.info("Starting the SearXNG Scraper for News using ChatInterface with Advanced Parameters")
1047
+ iface.launch(server_name="0.0.0.0", server_port=7860, share=False)
1048
+
1049
+
1050
+
1051
+
1052
+
1053
+
1054
+
1055
+
1056
+
1057
+
1058
+
1059
+
1060
+
1061
+
1062
+
1063
+
1064
+
1065
+
1066
+
1067
+
1068
+
1069
+
1070
+
1071
+
1072
+
1073
+