Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,11 @@ from transformers import pipeline
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
|
5 |
-
# Initialize the summarization
|
6 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
7 |
|
8 |
-
# Function to fetch Wikipedia
|
9 |
-
def
|
10 |
url = "https://en.wikipedia.org/w/api.php"
|
11 |
params = {
|
12 |
"action": "query",
|
@@ -16,51 +16,52 @@ def fetch_wikipedia_article(search_term):
|
|
16 |
"titles": search_term,
|
17 |
"redirects": 1 # Follow redirects
|
18 |
}
|
19 |
-
headers = {
|
20 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
21 |
-
}
|
22 |
try:
|
23 |
response = requests.get(url, headers=headers, params=params)
|
24 |
response.raise_for_status()
|
25 |
data = response.json()
|
26 |
page = next(iter(data["query"]["pages"].values()))
|
27 |
-
|
|
|
28 |
except requests.RequestException as e:
|
29 |
return f"⚠️ Error fetching article: {e}"
|
30 |
|
31 |
-
# Function to split
|
32 |
-
def
|
33 |
words = text.split()
|
34 |
-
return [" ".join(words[i:i+max_words]) for i in range(0, len(words), max_words)]
|
35 |
|
36 |
-
# Function to summarize
|
37 |
-
def
|
38 |
-
chunks =
|
39 |
summaries = []
|
40 |
-
for chunk in chunks:
|
41 |
try:
|
42 |
-
|
|
|
|
|
43 |
summaries.append(summary)
|
44 |
except Exception as e:
|
45 |
-
summaries.append(f"⚠️ Error summarizing chunk: {str(e)}")
|
46 |
-
return "
|
47 |
|
48 |
-
# Main function
|
49 |
-
def
|
50 |
-
content =
|
51 |
if content.startswith("⚠️"):
|
52 |
-
return content
|
53 |
-
return
|
54 |
|
55 |
# Gradio Interface
|
56 |
iface = gr.Interface(
|
57 |
-
fn=
|
58 |
inputs=gr.Textbox(lines=1, placeholder="Enter a Wikipedia topic", label="Wikipedia Topic"),
|
59 |
outputs=gr.Textbox(label="Summarized Content"),
|
60 |
-
title="Wikipedia Article Summarizer",
|
61 |
-
description="Fetch and summarize Wikipedia articles
|
|
|
62 |
)
|
63 |
|
64 |
-
# Launch
|
65 |
if __name__ == "__main__":
|
66 |
-
iface.launch(debug=
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
|
5 |
+
# Initialize the summarization pipeline (global initialization for efficiency)
|
6 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
7 |
|
8 |
+
# Function to fetch content from Wikipedia with a limit on length
|
9 |
+
def fetch_wikipedia_content(search_term, max_chars=10000):
|
10 |
url = "https://en.wikipedia.org/w/api.php"
|
11 |
params = {
|
12 |
"action": "query",
|
|
|
16 |
"titles": search_term,
|
17 |
"redirects": 1 # Follow redirects
|
18 |
}
|
|
|
|
|
|
|
19 |
try:
|
20 |
response = requests.get(url, headers=headers, params=params)
|
21 |
response.raise_for_status()
|
22 |
data = response.json()
|
23 |
page = next(iter(data["query"]["pages"].values()))
|
24 |
+
content = page.get("extract", "")
|
25 |
+
return content[:max_chars] if content else "⚠️ No content found for this topic."
|
26 |
except requests.RequestException as e:
|
27 |
return f"⚠️ Error fetching article: {e}"
|
28 |
|
29 |
+
# Function to split text into manageable chunks
|
30 |
+
def split_text_into_chunks(text, max_words=500):
|
31 |
words = text.split()
|
32 |
+
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
|
33 |
|
34 |
+
# Function to summarize text in chunks
|
35 |
+
def summarize_text(text):
|
36 |
+
chunks = split_text_into_chunks(text)
|
37 |
summaries = []
|
38 |
+
for i, chunk in enumerate(chunks):
|
39 |
try:
|
40 |
+
chunk_word_count = len(chunk.split())
|
41 |
+
max_summary_length = min(120, max(40, chunk_word_count // 2)) # Optimized for smaller summaries
|
42 |
+
summary = summarizer(chunk, max_length=max_summary_length, min_length=30, do_sample=False)[0]['summary_text']
|
43 |
summaries.append(summary)
|
44 |
except Exception as e:
|
45 |
+
summaries.append(f"⚠️ Error summarizing chunk {i + 1}: {str(e)}")
|
46 |
+
return "\n\n".join(summaries)
|
47 |
|
48 |
+
# Main function for Gradio
|
49 |
+
def fetch_and_summarize(search_term):
|
50 |
+
content = fetch_wikipedia_content(search_term)
|
51 |
if content.startswith("⚠️"):
|
52 |
+
return content
|
53 |
+
return summarize_text(content)
|
54 |
|
55 |
# Gradio Interface
|
56 |
iface = gr.Interface(
|
57 |
+
fn=fetch_and_summarize,
|
58 |
inputs=gr.Textbox(lines=1, placeholder="Enter a Wikipedia topic", label="Wikipedia Topic"),
|
59 |
outputs=gr.Textbox(label="Summarized Content"),
|
60 |
+
title="Optimized Wikipedia Article Summarizer",
|
61 |
+
description="Fetch and summarize Wikipedia articles efficiently. Optimized for lightweight summarization.",
|
62 |
+
allow_flagging="never" # Disable flagging for a lightweight deployment
|
63 |
)
|
64 |
|
65 |
+
# Launch Gradio with queuing for handling concurrent requests
|
66 |
if __name__ == "__main__":
|
67 |
+
iface.queue(concurrency_count=4).launch(debug=False)
|