hackerbyhobby commited on
Commit
1eb6c81
·
unverified ·
1 Parent(s): 60ee370

one last try

Browse files
Files changed (2) hide show
  1. app.py +202 -77
  2. app.py.bestofmon +210 -0
app.py CHANGED
@@ -5,6 +5,22 @@ from transformers import pipeline
5
  import re
6
  from langdetect import detect
7
  from deep_translator import GoogleTranslator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Translator instance
10
  translator = GoogleTranslator(source="auto", target="es")
@@ -21,15 +37,58 @@ model_name = "joeddav/xlm-roberta-large-xnli"
21
  classifier = pipeline("zero-shot-classification", model=model_name)
22
  CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def get_keywords_by_language(text: str):
25
  """
26
  Detect language using `langdetect` and translate keywords if needed.
27
  """
28
- snippet = text[:200]
29
  try:
30
  detected_lang = detect(snippet)
31
  except Exception:
32
- detected_lang = "en"
33
 
34
  if detected_lang == "es":
35
  smishing_in_spanish = [
@@ -67,12 +126,10 @@ def boost_probabilities(probabilities: dict, text: str):
67
  p_other_scam += other_scam_boost
68
  p_legit -= (smishing_boost + other_scam_boost)
69
 
70
- # Clamp
71
  p_smishing = max(p_smishing, 0.0)
72
  p_other_scam = max(p_other_scam, 0.0)
73
  p_legit = max(p_legit, 0.0)
74
 
75
- # Re-normalize
76
  total = p_smishing + p_other_scam + p_legit
77
  if total > 0:
78
  p_smishing /= total
@@ -85,19 +142,104 @@ def boost_probabilities(probabilities: dict, text: str):
85
  "SMiShing": p_smishing,
86
  "Other Scam": p_other_scam,
87
  "Legitimate": p_legit,
88
- "detected_lang": detected_lang
89
  }
90
 
91
- def smishing_detector(input_type, text, image):
92
  """
93
- Main detection function combining text (if 'Text') and OCR (if 'Screenshot').
94
  """
95
- if input_type == "Text":
96
- combined_text = text.strip() if text else ""
97
- else:
98
- combined_text = ""
99
- if image is not None:
100
- combined_text = pytesseract.image_to_string(image, lang="spa+eng").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  if not combined_text:
103
  return {
@@ -105,7 +247,9 @@ def smishing_detector(input_type, text, image):
105
  "label": "No text provided",
106
  "confidence": 0.0,
107
  "keywords_found": [],
108
- "urls_found": []
 
 
109
  }
110
 
111
  result = classifier(
@@ -114,18 +258,12 @@ def smishing_detector(input_type, text, image):
114
  hypothesis_template="This message is {}."
115
  )
116
  original_probs = {k: float(v) for k, v in zip(result["labels"], result["scores"])}
117
-
118
  boosted = boost_probabilities(original_probs, combined_text)
119
 
120
- # Patched snippet begins
121
- # 1. Extract language first, preserving it
122
- detected_lang = boosted.get("detected_lang", "en")
123
- # 2. Remove it so only numeric keys remain
124
- boosted.pop("detected_lang", None)
125
- # 3. Convert numeric values to float
126
  for k, v in boosted.items():
127
  boosted[k] = float(v)
128
- # Patched snippet ends
129
 
130
  final_label = max(boosted, key=boosted.get)
131
  final_confidence = round(boosted[final_label], 3)
@@ -137,6 +275,24 @@ def smishing_detector(input_type, text, image):
137
  found_smishing = [kw for kw in smishing_keys if kw in lower_text]
138
  found_other_scam = [kw for kw in scam_keys if kw in lower_text]
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  return {
141
  "detected_language": detected_lang,
142
  "text_used_for_classification": combined_text,
@@ -147,64 +303,33 @@ def smishing_detector(input_type, text, image):
147
  "smishing_keywords_found": found_smishing,
148
  "other_scam_keywords_found": found_other_scam,
149
  "urls_found": found_urls,
 
 
 
150
  }
151
 
152
- #
153
- # Gradio interface with dynamic visibility
154
- #
155
- def toggle_inputs(choice):
156
- """
157
- Return updates for (text_input, image_input) based on the radio selection.
158
- """
159
- if choice == "Text":
160
- # Show text input, hide image
161
- return gr.update(visible=True), gr.update(visible=False)
162
- else:
163
- # choice == "Screenshot"
164
- # Hide text input, show image
165
- return gr.update(visible=False), gr.update(visible=True)
166
-
167
- with gr.Blocks() as demo:
168
- gr.Markdown("## SMiShing & Scam Detector (Choose Text or Screenshot)")
169
-
170
- with gr.Row():
171
- input_type = gr.Radio(
172
- choices=["Text", "Screenshot"],
173
- value="Text",
174
- label="Choose Input Type"
175
  )
176
-
177
- text_input = gr.Textbox(
178
- lines=3,
179
- label="Paste Suspicious SMS Text",
180
- placeholder="Type or paste the message here...",
181
- visible=True # default
182
- )
183
-
184
- image_input = gr.Image(
185
- type="pil",
186
- label="Upload Screenshot",
187
- visible=False # hidden by default
188
- )
189
-
190
- # Whenever input_type changes, toggle which input is visible
191
- input_type.change(
192
- fn=toggle_inputs,
193
- inputs=input_type,
194
- outputs=[text_input, image_input],
195
- queue=False
196
- )
197
-
198
- # Button to run classification
199
- analyze_btn = gr.Button("Classify")
200
- output_json = gr.JSON(label="Result")
201
-
202
- # On button click, call the smishing_detector
203
- analyze_btn.click(
204
- fn=smishing_detector,
205
- inputs=[input_type, text_input, image_input],
206
- outputs=output_json
207
- )
208
 
209
  if __name__ == "__main__":
210
  demo.launch()
 
5
  import re
6
  from langdetect import detect
7
  from deep_translator import GoogleTranslator
8
+ import shap
9
+ import requests
10
+ import json
11
+ import os
12
+ import numpy as np
13
+ from shap.maskers import Text
14
+
15
+ # Patch SHAP to replace np.bool with np.bool_ dynamically
16
+ if hasattr(shap.maskers._text.Text, "invariants"):
17
+ original_invariants = shap.maskers._text.Text.invariants
18
+
19
+ def patched_invariants(self, *args):
20
+ # Use np.bool_ instead of the deprecated np.bool
21
+ return np.zeros(len(self._tokenized_s), dtype=np.bool_)
22
+
23
+ shap.maskers._text.Text.invariants = patched_invariants
24
 
25
  # Translator instance
26
  translator = GoogleTranslator(source="auto", target="es")
 
37
  classifier = pipeline("zero-shot-classification", model=model_name)
38
  CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
39
 
40
+ # 3. SHAP Explainer Setup
41
+ explainer = shap.Explainer(classifier, masker=Text(tokenizer=classifier.tokenizer))
42
+
43
+ # Retrieve the Google Safe Browsing API key from the environment
44
+ SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
45
+
46
+ if not SAFE_BROWSING_API_KEY:
47
+ raise ValueError("Google Safe Browsing API key not found. Please set it as an environment variable in your Hugging Face Space.")
48
+
49
+ SAFE_BROWSING_URL = "https://safebrowsing.googleapis.com/v4/threatMatches:find"
50
+
51
+ def check_url_with_google_safebrowsing(url):
52
+ """
53
+ Check a URL against Google's Safe Browsing API.
54
+ """
55
+ payload = {
56
+ "client": {
57
+ "clientId": "your-client-id",
58
+ "clientVersion": "1.0"
59
+ },
60
+ "threatInfo": {
61
+ "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"],
62
+ "platformTypes": ["ANY_PLATFORM"],
63
+ "threatEntryTypes": ["URL"],
64
+ "threatEntries": [
65
+ {"url": url}
66
+ ]
67
+ }
68
+ }
69
+ try:
70
+ response = requests.post(
71
+ SAFE_BROWSING_URL,
72
+ params={"key": SAFE_BROWSING_API_KEY},
73
+ json=payload
74
+ )
75
+ response_data = response.json()
76
+ if "matches" in response_data:
77
+ return True # URL is flagged as malicious
78
+ return False # URL is safe
79
+ except Exception as e:
80
+ print(f"Error checking URL with Safe Browsing API: {e}")
81
+ return False
82
+
83
  def get_keywords_by_language(text: str):
84
  """
85
  Detect language using `langdetect` and translate keywords if needed.
86
  """
87
+ snippet = text[:200] # Use a snippet for detection
88
  try:
89
  detected_lang = detect(snippet)
90
  except Exception:
91
+ detected_lang = "en" # Default to English if detection fails
92
 
93
  if detected_lang == "es":
94
  smishing_in_spanish = [
 
126
  p_other_scam += other_scam_boost
127
  p_legit -= (smishing_boost + other_scam_boost)
128
 
 
129
  p_smishing = max(p_smishing, 0.0)
130
  p_other_scam = max(p_other_scam, 0.0)
131
  p_legit = max(p_legit, 0.0)
132
 
 
133
  total = p_smishing + p_other_scam + p_legit
134
  if total > 0:
135
  p_smishing /= total
 
142
  "SMiShing": p_smishing,
143
  "Other Scam": p_other_scam,
144
  "Legitimate": p_legit,
145
+ "detected_lang": detected_lang,
146
  }
147
 
148
+ def explain_classification(text):
149
  """
150
+ Generate SHAP explanations for the classification.
151
  """
152
+ if not text.strip():
153
+ raise ValueError("Cannot generate SHAP explanations for empty text.")
154
+
155
+ shap_values = explainer([text])
156
+ shap.force_plot(
157
+ explainer.expected_value[0], shap_values[0].values[0], shap_values[0].data
158
+ )
159
+
160
+ def generate_user_friendly_message(
161
+ final_label: str,
162
+ confidence: float,
163
+ found_smishing: list,
164
+ found_other_scam: list,
165
+ found_urls: list,
166
+ threat_analysis: dict
167
+ ) -> str:
168
+ """
169
+ Build a user-friendly explanation of the classification and provide
170
+ a brief reason why it is labeled as SMiShing, Other Scam, or Legitimate.
171
+ """
172
+ if final_label == "SMiShing":
173
+ msg = (
174
+ f"This message is classified as SMiShing (confidence {confidence}). "
175
+ "We found indications typical of phishing via SMS, such as "
176
+ )
177
+ reasons = []
178
+ if found_smishing:
179
+ reasons.append(f"the use of suspicious keywords: {', '.join(found_smishing)}")
180
+ if found_urls:
181
+ flagged_urls = [u for u in found_urls if threat_analysis.get(u)]
182
+ safe_urls = [u for u in found_urls if not threat_analysis.get(u)]
183
+ if flagged_urls:
184
+ reasons.append(f"at least one URL flagged as unsafe: {', '.join(flagged_urls)}")
185
+ if safe_urls:
186
+ reasons.append(f"other URLs may be suspicious: {', '.join(safe_urls)}")
187
+
188
+ if not reasons:
189
+ reasons.append("certain context or structure commonly used in SMiShing")
190
+
191
+ msg += " and ".join(reasons) + "."
192
+ return msg
193
+
194
+ elif final_label == "Other Scam":
195
+ msg = (
196
+ f"This message is classified as 'Other Scam' (confidence {confidence}). "
197
+ "It contains elements typically associated with scams. "
198
+ )
199
+ reasons = []
200
+ if found_other_scam:
201
+ reasons.append(f"keywords often linked to fraudulent activity: {', '.join(found_other_scam)}")
202
+ if found_urls:
203
+ flagged_urls = [u for u in found_urls if threat_analysis.get(u)]
204
+ safe_urls = [u for u in found_urls if not threat_analysis.get(u)]
205
+ if flagged_urls:
206
+ reasons.append(f"URLs flagged as unsafe: {', '.join(flagged_urls)}")
207
+ if safe_urls:
208
+ reasons.append(f"additional suspicious URLs: {', '.join(safe_urls)}")
209
+
210
+ if not reasons:
211
+ reasons.append("general content or structure known to be used in scams")
212
+
213
+ msg += " and ".join(reasons) + "."
214
+ return msg
215
+
216
+ else: # Legitimate
217
+ msg = (
218
+ f"This message is classified as 'Legitimate' (confidence {confidence}). "
219
+ "We did not detect typical phishing or scam indicators. "
220
+ )
221
+ if found_urls:
222
+ # If there are URLs, mention if they're considered safe
223
+ flagged_urls = [u for u in found_urls if threat_analysis.get(u)]
224
+ if flagged_urls:
225
+ msg += f"However, note that at least one URL appears unsafe: {', '.join(flagged_urls)}."
226
+ else:
227
+ msg += "Although it contains URLs, none appear to be malicious."
228
+ else:
229
+ msg += "No suspicious keywords or URLs were detected."
230
+
231
+ return msg
232
+
233
+
234
+ def smishing_detector(text, image):
235
+ """
236
+ Main detection function combining text and OCR.
237
+ """
238
+ combined_text = text or ""
239
+ if image is not None:
240
+ ocr_text = pytesseract.image_to_string(image, lang="spa+eng")
241
+ combined_text += " " + ocr_text
242
+ combined_text = combined_text.strip()
243
 
244
  if not combined_text:
245
  return {
 
247
  "label": "No text provided",
248
  "confidence": 0.0,
249
  "keywords_found": [],
250
+ "urls_found": [],
251
+ "threat_analysis": "No URLs to analyze",
252
+ "user_friendly_message": "No classification could be made since no text was provided.",
253
  }
254
 
255
  result = classifier(
 
258
  hypothesis_template="This message is {}."
259
  )
260
  original_probs = {k: float(v) for k, v in zip(result["labels"], result["scores"])}
 
261
  boosted = boost_probabilities(original_probs, combined_text)
262
 
263
+ # Extract language key first, then remove
264
+ detected_lang = boosted.pop("detected_lang", "en")
 
 
 
 
265
  for k, v in boosted.items():
266
  boosted[k] = float(v)
 
267
 
268
  final_label = max(boosted, key=boosted.get)
269
  final_confidence = round(boosted[final_label], 3)
 
275
  found_smishing = [kw for kw in smishing_keys if kw in lower_text]
276
  found_other_scam = [kw for kw in scam_keys if kw in lower_text]
277
 
278
+ # Analyze URLs using Google's Safe Browsing API
279
+ threat_analysis = {
280
+ url: check_url_with_google_safebrowsing(url) for url in found_urls
281
+ }
282
+
283
+ # Generate SHAP Explanation (optional for user insights)
284
+ explain_classification(combined_text)
285
+
286
+ # Build user-friendly message
287
+ user_friendly_msg = generate_user_friendly_message(
288
+ final_label,
289
+ final_confidence,
290
+ found_smishing,
291
+ found_other_scam,
292
+ found_urls,
293
+ threat_analysis
294
+ )
295
+
296
  return {
297
  "detected_language": detected_lang,
298
  "text_used_for_classification": combined_text,
 
303
  "smishing_keywords_found": found_smishing,
304
  "other_scam_keywords_found": found_other_scam,
305
  "urls_found": found_urls,
306
+ "threat_analysis": threat_analysis,
307
+ # The new user-friendly explanation
308
+ "user_friendly_message": user_friendly_msg,
309
  }
310
 
311
+ demo = gr.Interface(
312
+ fn=smishing_detector,
313
+ inputs=[
314
+ gr.Textbox(
315
+ lines=3,
316
+ label="Paste Suspicious SMS Text (English/Spanish)",
317
+ placeholder="Type or paste the message here..."
318
+ ),
319
+ gr.Image(
320
+ type="pil",
321
+ label="Or Upload a Screenshot (Optional)"
 
 
 
 
 
 
 
 
 
 
 
 
322
  )
323
+ ],
324
+ outputs="json",
325
+ title="SMiShing & Scam Detector with Safe Browsing",
326
+ description="""
327
+ This tool classifies messages as SMiShing, Other Scam, or Legitimate using a zero-shot model
328
+ (joeddav/xlm-roberta-large-xnli). It automatically detects if the text is Spanish or English.
329
+ It uses SHAP for explainability and checks URLs against Google's Safe Browsing API for enhanced analysis.
330
+ """,
331
+ flagging_mode="never"
332
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
  if __name__ == "__main__":
335
  demo.launch()
app.py.bestofmon ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ from PIL import Image
4
+ from transformers import pipeline
5
+ import re
6
+ from langdetect import detect
7
+ from deep_translator import GoogleTranslator
8
+
9
+ # Translator instance
10
+ translator = GoogleTranslator(source="auto", target="es")
11
+
12
+ # 1. Load separate keywords for SMiShing and Other Scam (assumed in English)
13
+ with open("smishing_keywords.txt", "r", encoding="utf-8") as f:
14
+ SMISHING_KEYWORDS = [line.strip().lower() for line in f if line.strip()]
15
+
16
+ with open("other_scam_keywords.txt", "r", encoding="utf-8") as f:
17
+ OTHER_SCAM_KEYWORDS = [line.strip().lower() for line in f if line.strip()]
18
+
19
+ # 2. Zero-Shot Classification Pipeline
20
+ model_name = "joeddav/xlm-roberta-large-xnli"
21
+ classifier = pipeline("zero-shot-classification", model=model_name)
22
+ CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
23
+
24
+ def get_keywords_by_language(text: str):
25
+ """
26
+ Detect language using `langdetect` and translate keywords if needed.
27
+ """
28
+ snippet = text[:200]
29
+ try:
30
+ detected_lang = detect(snippet)
31
+ except Exception:
32
+ detected_lang = "en"
33
+
34
+ if detected_lang == "es":
35
+ smishing_in_spanish = [
36
+ translator.translate(kw).lower() for kw in SMISHING_KEYWORDS
37
+ ]
38
+ other_scam_in_spanish = [
39
+ translator.translate(kw).lower() for kw in OTHER_SCAM_KEYWORDS
40
+ ]
41
+ return smishing_in_spanish, other_scam_in_spanish, "es"
42
+ else:
43
+ return SMISHING_KEYWORDS, OTHER_SCAM_KEYWORDS, "en"
44
+
45
+ def boost_probabilities(probabilities: dict, text: str):
46
+ """
47
+ Boost probabilities based on keyword matches and presence of URLs.
48
+ """
49
+ lower_text = text.lower()
50
+ smishing_keywords, other_scam_keywords, detected_lang = get_keywords_by_language(text)
51
+
52
+ smishing_count = sum(1 for kw in smishing_keywords if kw in lower_text)
53
+ other_scam_count = sum(1 for kw in other_scam_keywords if kw in lower_text)
54
+
55
+ smishing_boost = 0.30 * smishing_count
56
+ other_scam_boost = 0.30 * other_scam_count
57
+
58
+ found_urls = re.findall(r"(https?://[^\s]+)", lower_text)
59
+ if found_urls:
60
+ smishing_boost += 0.35
61
+
62
+ p_smishing = probabilities.get("SMiShing", 0.0)
63
+ p_other_scam = probabilities.get("Other Scam", 0.0)
64
+ p_legit = probabilities.get("Legitimate", 1.0)
65
+
66
+ p_smishing += smishing_boost
67
+ p_other_scam += other_scam_boost
68
+ p_legit -= (smishing_boost + other_scam_boost)
69
+
70
+ # Clamp
71
+ p_smishing = max(p_smishing, 0.0)
72
+ p_other_scam = max(p_other_scam, 0.0)
73
+ p_legit = max(p_legit, 0.0)
74
+
75
+ # Re-normalize
76
+ total = p_smishing + p_other_scam + p_legit
77
+ if total > 0:
78
+ p_smishing /= total
79
+ p_other_scam /= total
80
+ p_legit /= total
81
+ else:
82
+ p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0
83
+
84
+ return {
85
+ "SMiShing": p_smishing,
86
+ "Other Scam": p_other_scam,
87
+ "Legitimate": p_legit,
88
+ "detected_lang": detected_lang
89
+ }
90
+
91
+ def smishing_detector(input_type, text, image):
92
+ """
93
+ Main detection function combining text (if 'Text') and OCR (if 'Screenshot').
94
+ """
95
+ if input_type == "Text":
96
+ combined_text = text.strip() if text else ""
97
+ else:
98
+ combined_text = ""
99
+ if image is not None:
100
+ combined_text = pytesseract.image_to_string(image, lang="spa+eng").strip()
101
+
102
+ if not combined_text:
103
+ return {
104
+ "text_used_for_classification": "(none)",
105
+ "label": "No text provided",
106
+ "confidence": 0.0,
107
+ "keywords_found": [],
108
+ "urls_found": []
109
+ }
110
+
111
+ result = classifier(
112
+ sequences=combined_text,
113
+ candidate_labels=CANDIDATE_LABELS,
114
+ hypothesis_template="This message is {}."
115
+ )
116
+ original_probs = {k: float(v) for k, v in zip(result["labels"], result["scores"])}
117
+
118
+ boosted = boost_probabilities(original_probs, combined_text)
119
+
120
+ # Patched snippet begins
121
+ # 1. Extract language first, preserving it
122
+ detected_lang = boosted.get("detected_lang", "en")
123
+ # 2. Remove it so only numeric keys remain
124
+ boosted.pop("detected_lang", None)
125
+ # 3. Convert numeric values to float
126
+ for k, v in boosted.items():
127
+ boosted[k] = float(v)
128
+ # Patched snippet ends
129
+
130
+ final_label = max(boosted, key=boosted.get)
131
+ final_confidence = round(boosted[final_label], 3)
132
+
133
+ lower_text = combined_text.lower()
134
+ smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
135
+
136
+ found_urls = re.findall(r"(https?://[^\s]+)", lower_text)
137
+ found_smishing = [kw for kw in smishing_keys if kw in lower_text]
138
+ found_other_scam = [kw for kw in scam_keys if kw in lower_text]
139
+
140
+ return {
141
+ "detected_language": detected_lang,
142
+ "text_used_for_classification": combined_text,
143
+ "original_probabilities": {k: round(v, 3) for k, v in original_probs.items()},
144
+ "boosted_probabilities": {k: round(v, 3) for k, v in boosted.items()},
145
+ "label": final_label,
146
+ "confidence": final_confidence,
147
+ "smishing_keywords_found": found_smishing,
148
+ "other_scam_keywords_found": found_other_scam,
149
+ "urls_found": found_urls,
150
+ }
151
+
152
+ #
153
+ # Gradio interface with dynamic visibility
154
+ #
155
+ def toggle_inputs(choice):
156
+ """
157
+ Return updates for (text_input, image_input) based on the radio selection.
158
+ """
159
+ if choice == "Text":
160
+ # Show text input, hide image
161
+ return gr.update(visible=True), gr.update(visible=False)
162
+ else:
163
+ # choice == "Screenshot"
164
+ # Hide text input, show image
165
+ return gr.update(visible=False), gr.update(visible=True)
166
+
167
+ with gr.Blocks() as demo:
168
+ gr.Markdown("## SMiShing & Scam Detector (Choose Text or Screenshot)")
169
+
170
+ with gr.Row():
171
+ input_type = gr.Radio(
172
+ choices=["Text", "Screenshot"],
173
+ value="Text",
174
+ label="Choose Input Type"
175
+ )
176
+
177
+ text_input = gr.Textbox(
178
+ lines=3,
179
+ label="Paste Suspicious SMS Text",
180
+ placeholder="Type or paste the message here...",
181
+ visible=True # default
182
+ )
183
+
184
+ image_input = gr.Image(
185
+ type="pil",
186
+ label="Upload Screenshot",
187
+ visible=False # hidden by default
188
+ )
189
+
190
+ # Whenever input_type changes, toggle which input is visible
191
+ input_type.change(
192
+ fn=toggle_inputs,
193
+ inputs=input_type,
194
+ outputs=[text_input, image_input],
195
+ queue=False
196
+ )
197
+
198
+ # Button to run classification
199
+ analyze_btn = gr.Button("Classify")
200
+ output_json = gr.JSON(label="Result")
201
+
202
+ # On button click, call the smishing_detector
203
+ analyze_btn.click(
204
+ fn=smishing_detector,
205
+ inputs=[input_type, text_input, image_input],
206
+ outputs=output_json
207
+ )
208
+
209
+ if __name__ == "__main__":
210
+ demo.launch()