dibend's picture
Update app.py
47d20f1 verified
raw
history blame
2.71 kB
from transformers import pipeline
import gradio as gr
import requests
# Initialize the summarization pipeline (global initialization for efficiency)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to fetch content from Wikipedia with a limit on length
def fetch_wikipedia_content(search_term, max_chars=10000):
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "query",
"format": "json",
"prop": "extracts",
"explaintext": True,
"titles": search_term,
"redirects": 1 # Follow redirects
}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
page = next(iter(data["query"]["pages"].values()))
content = page.get("extract", "")
return content[:max_chars] if content else "⚠️ No content found for this topic."
except requests.RequestException as e:
return f"⚠️ Error fetching article: {e}"
# Function to split text into manageable chunks
def split_text_into_chunks(text, max_words=500):
words = text.split()
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
# Function to summarize text in chunks
def summarize_text(text):
chunks = split_text_into_chunks(text)
summaries = []
for i, chunk in enumerate(chunks):
try:
chunk_word_count = len(chunk.split())
max_summary_length = min(120, max(40, chunk_word_count // 2)) # Optimized for smaller summaries
summary = summarizer(chunk, max_length=max_summary_length, min_length=30, do_sample=False)[0]['summary_text']
summaries.append(summary)
except Exception as e:
summaries.append(f"⚠️ Error summarizing chunk {i + 1}: {str(e)}")
return "\n\n".join(summaries)
# Main function for Gradio
def fetch_and_summarize(search_term):
content = fetch_wikipedia_content(search_term)
if content.startswith("⚠️"):
return content
return summarize_text(content)
# Gradio Interface
iface = gr.Interface(
fn=fetch_and_summarize,
inputs=gr.Textbox(lines=1, placeholder="Enter a Wikipedia topic", label="Wikipedia Topic"),
outputs=gr.Textbox(label="Summarized Content"),
title="Optimized Wikipedia Article Summarizer",
description="Fetch and summarize Wikipedia articles efficiently. Optimized for lightweight summarization.",
allow_flagging="never" # Disable flagging for a lightweight deployment
)
# Launch Gradio with queuing for handling concurrent requests
if __name__ == "__main__":
iface.queue(concurrency_count=4).launch(debug=False)