dibend commited on
Commit
47d20f1
·
verified ·
1 Parent(s): 430297a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -27
app.py CHANGED
@@ -2,11 +2,11 @@ from transformers import pipeline
2
  import gradio as gr
3
  import requests
4
 
5
- # Initialize the summarization model
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
- # Function to fetch Wikipedia content
9
- def fetch_wikipedia_article(search_term):
10
  url = "https://en.wikipedia.org/w/api.php"
11
  params = {
12
  "action": "query",
@@ -16,51 +16,52 @@ def fetch_wikipedia_article(search_term):
16
  "titles": search_term,
17
  "redirects": 1 # Follow redirects
18
  }
19
- headers = {
20
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
21
- }
22
  try:
23
  response = requests.get(url, headers=headers, params=params)
24
  response.raise_for_status()
25
  data = response.json()
26
  page = next(iter(data["query"]["pages"].values()))
27
- return page.get("extract", "") if "extract" in page else "⚠️ No content found for this topic."
 
28
  except requests.RequestException as e:
29
  return f"⚠️ Error fetching article: {e}"
30
 
31
- # Function to split long text into chunks
32
- def split_into_chunks(text, max_words=750):
33
  words = text.split()
34
- return [" ".join(words[i:i+max_words]) for i in range(0, len(words), max_words)]
35
 
36
- # Function to summarize large text
37
- def summarize_large_text(text):
38
- chunks = split_into_chunks(text)
39
  summaries = []
40
- for chunk in chunks:
41
  try:
42
- summary = summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
 
 
43
  summaries.append(summary)
44
  except Exception as e:
45
- summaries.append(f"⚠️ Error summarizing chunk: {str(e)}")
46
- return " ".join(summaries)
47
 
48
- # Main function to fetch and summarize Wikipedia articles
49
- def summarize_wikipedia_article(search_term):
50
- content = fetch_wikipedia_article(search_term)
51
  if content.startswith("⚠️"):
52
- return content # Return error or empty content message
53
- return summarize_large_text(content)
54
 
55
  # Gradio Interface
56
  iface = gr.Interface(
57
- fn=summarize_wikipedia_article,
58
  inputs=gr.Textbox(lines=1, placeholder="Enter a Wikipedia topic", label="Wikipedia Topic"),
59
  outputs=gr.Textbox(label="Summarized Content"),
60
- title="Wikipedia Article Summarizer",
61
- description="Fetch and summarize Wikipedia articles using AI. Enter a topic to get a concise summary.",
 
62
  )
63
 
64
- # Launch the Gradio app
65
  if __name__ == "__main__":
66
- iface.launch(debug=True)
 
2
  import gradio as gr
3
  import requests
4
 
5
+ # Initialize the summarization pipeline (global initialization for efficiency)
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
+ # Function to fetch content from Wikipedia with a limit on length
9
+ def fetch_wikipedia_content(search_term, max_chars=10000):
10
  url = "https://en.wikipedia.org/w/api.php"
11
  params = {
12
  "action": "query",
 
16
  "titles": search_term,
17
  "redirects": 1 # Follow redirects
18
  }
 
 
 
19
  try:
20
  response = requests.get(url, headers=headers, params=params)
21
  response.raise_for_status()
22
  data = response.json()
23
  page = next(iter(data["query"]["pages"].values()))
24
+ content = page.get("extract", "")
25
+ return content[:max_chars] if content else "⚠️ No content found for this topic."
26
  except requests.RequestException as e:
27
  return f"⚠️ Error fetching article: {e}"
28
 
29
+ # Function to split text into manageable chunks
30
+ def split_text_into_chunks(text, max_words=500):
31
  words = text.split()
32
+ return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
33
 
34
+ # Function to summarize text in chunks
35
+ def summarize_text(text):
36
+ chunks = split_text_into_chunks(text)
37
  summaries = []
38
+ for i, chunk in enumerate(chunks):
39
  try:
40
+ chunk_word_count = len(chunk.split())
41
+ max_summary_length = min(120, max(40, chunk_word_count // 2)) # Optimized for smaller summaries
42
+ summary = summarizer(chunk, max_length=max_summary_length, min_length=30, do_sample=False)[0]['summary_text']
43
  summaries.append(summary)
44
  except Exception as e:
45
+ summaries.append(f"⚠️ Error summarizing chunk {i + 1}: {str(e)}")
46
+ return "\n\n".join(summaries)
47
 
48
+ # Main function for Gradio
49
+ def fetch_and_summarize(search_term):
50
+ content = fetch_wikipedia_content(search_term)
51
  if content.startswith("⚠️"):
52
+ return content
53
+ return summarize_text(content)
54
 
55
  # Gradio Interface
56
  iface = gr.Interface(
57
+ fn=fetch_and_summarize,
58
  inputs=gr.Textbox(lines=1, placeholder="Enter a Wikipedia topic", label="Wikipedia Topic"),
59
  outputs=gr.Textbox(label="Summarized Content"),
60
+ title="Optimized Wikipedia Article Summarizer",
61
+ description="Fetch and summarize Wikipedia articles efficiently. Optimized for lightweight summarization.",
62
+ allow_flagging="never" # Disable flagging for a lightweight deployment
63
  )
64
 
65
+ # Launch Gradio with queuing for handling concurrent requests
66
  if __name__ == "__main__":
67
+ iface.queue(concurrency_count=4).launch(debug=False)