Spaces:
Running
Running
from transformers import pipeline | |
import gradio as gr | |
import requests | |
# Initialize the summarization pipeline (global initialization for efficiency) | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Function to fetch content from Wikipedia with a limit on length | |
def fetch_wikipedia_content(search_term, max_chars=10000): | |
url = "https://en.wikipedia.org/w/api.php" | |
params = { | |
"action": "query", | |
"format": "json", | |
"prop": "extracts", | |
"explaintext": True, | |
"titles": search_term, | |
"redirects": 1 # Follow redirects | |
} | |
try: | |
response = requests.get(url, headers=headers, params=params) | |
response.raise_for_status() | |
data = response.json() | |
page = next(iter(data["query"]["pages"].values())) | |
content = page.get("extract", "") | |
return content[:max_chars] if content else "⚠️ No content found for this topic." | |
except requests.RequestException as e: | |
return f"⚠️ Error fetching article: {e}" | |
# Function to split text into manageable chunks | |
def split_text_into_chunks(text, max_words=500): | |
words = text.split() | |
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] | |
# Function to summarize text in chunks | |
def summarize_text(text): | |
chunks = split_text_into_chunks(text) | |
summaries = [] | |
for i, chunk in enumerate(chunks): | |
try: | |
chunk_word_count = len(chunk.split()) | |
max_summary_length = min(120, max(40, chunk_word_count // 2)) # Optimized for smaller summaries | |
summary = summarizer(chunk, max_length=max_summary_length, min_length=30, do_sample=False)[0]['summary_text'] | |
summaries.append(summary) | |
except Exception as e: | |
summaries.append(f"⚠️ Error summarizing chunk {i + 1}: {str(e)}") | |
return "\n\n".join(summaries) | |
# Main function for Gradio | |
def fetch_and_summarize(search_term): | |
content = fetch_wikipedia_content(search_term) | |
if content.startswith("⚠️"): | |
return content | |
return summarize_text(content) | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=fetch_and_summarize, | |
inputs=gr.Textbox(lines=1, placeholder="Enter a Wikipedia topic", label="Wikipedia Topic"), | |
outputs=gr.Textbox(label="Summarized Content"), | |
title="Optimized Wikipedia Article Summarizer", | |
description="Fetch and summarize Wikipedia articles efficiently. Optimized for lightweight summarization.", | |
allow_flagging="never" # Disable flagging for a lightweight deployment | |
) | |
# Launch Gradio with queuing for handling concurrent requests | |
if __name__ == "__main__": | |
iface.queue(concurrency_count=4).launch(debug=False) |