Spaces:

Shreyas94
/

World_News

Sleeping

App Files Files Community

World_News / app.py

Shreyas94

Update app.py

f3a27fd verified over 1 year ago

raw

history blame

2.93 kB

	import logging
	from bs4 import BeautifulSoup
	import requests
	import nltk
	from transformers import pipeline
	import gradio as gr

	# Configure logging
	logging.basicConfig(level=logging.DEBUG)

	# Initialize the summarization pipeline from Hugging Face Transformers
	summarizer = pipeline("summarization")

	# Initialize the NLTK sentence tokenizer
	nltk.download('punkt')

	# Function to fetch content from a given URL
	def fetch_article_content(url):
	try:
	r = requests.get(url)
	soup = BeautifulSoup(r.text, 'html.parser')
	results = soup.find_all(['h1', 'p'])
	text = [result.text for result in results]
	return ' '.join(text)
	except Exception as e:
	logging.error(f"Error fetching content from {url}: {e}")
	return ""

	# Function to summarize news articles based on a query
	def summarize_news(query, num_results=3):
	logging.debug(f"Query received: {query}")
	logging.debug(f"Number of results requested: {num_results}")

	# Search for news articles
	logging.debug("Searching for news articles...")

	articles = []
	aggregated_content = ""
	try:
	news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
	logging.debug(f"Search results: {news_results}")

	for article in news_results['articles']:
	url = article['url']
	logging.debug(f"Fetching content from URL: {url}")
	content = fetch_article_content(url)
	aggregated_content += content + " "
	except Exception as e:
	logging.error(f"Error fetching news articles: {e}")

	# Summarize the aggregated content
	try:
	# Chunk the aggregated content into meaningful segments
	sentences = nltk.sent_tokenize(aggregated_content)

	# Summarize each sentence individually if it's meaningful
	summaries = []
	for sentence in sentences:
	if len(sentence) > 10: # Adjust minimum length as needed
	summary = summarizer(sentence, max_length=120, min_length=30, do_sample=False)
	summaries.append(summary[0]['summary_text'])

	# Join all summaries to form final output
	final_summary = " ".join(summaries)

	logging.debug(f"Final summarized text: {final_summary}")
	return final_summary

	except Exception as e:
	logging.error(f"Error during summarization: {e}")
	return "An error occurred during summarization."

	# Setting up Gradio interface
	iface = gr.Interface(
	fn=summarize_news,
	inputs=[gr.Textbox(label="Query"), gr.Slider(minimum=1, maximum=10, default=3, label="Number of Results")],
	outputs="textbox",
	title="News Summarizer",
	description="Enter a query to get a consolidated summary of the top news articles."
	)

	if __name__ == "__main__":
	logging.debug("Launching Gradio interface...")
	iface.launch()