Spaces:

Shreyas94
/

World_News

Sleeping

App Files Files Community

World_News / app.py

Shreyas94

Update app.py

871b845 verified over 1 year ago

raw

history blame

3.03 kB

	from transformers import pipeline, BartTokenizer
	from googlesearch import search
	from bs4 import BeautifulSoup
	import requests

	# Initialize BART tokenizer and summarization pipeline
	tokenizer = BartTokenizer.from_pretrained('letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary')
	summarizer = pipeline("summarization", model="letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary")

	# Function to perform Google search and retrieve URLs, filtering by domain
	def google_search(query: str, num_results: int = 10):
	"""Perform a Google search and retrieve the URLs of the search results."""
	search_results = []
	try:
	for url in search(query, num_results=num_results, domains=["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]):
	search_results.append(url)
	except TypeError:
	for url in search(query, num_results=num_results):
	if any(domain in url for domain in ["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]):
	search_results.append(url)
	return search_results

	# Function to fetch content from a URL and summarize it
	def fetch_and_summarize_url(url: str):
	try:
	response = requests.get(url)
	response.raise_for_status()
	soup = BeautifulSoup(response.text, 'html.parser')

	# Extract relevant content (e.g., paragraphs or sections)
	paragraphs = [p.text for p in soup.find_all('p')]
	combined_text = " ".join(paragraphs[:3]) # Combine first few paragraphs for summary

	# Summarize using the pipeline
	if combined_text.strip(): # Ensure there is text to summarize
	summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False)
	return summary[0]['summary_text']
	else:
	return None
	except requests.RequestException as e:
	return None

	# Function to perform Google search and aggregate summaries
	def google_search_and_answer(question: str, keywords: str):
	search_query = f"{question} {keywords}"
	search_results = google_search(search_query)
	summaries = []
	for url in search_results:
	fetched_summary = fetch_and_summarize_url(url)
	if fetched_summary:
	# Add additional logic to filter summaries based on relevance
	# Example: Check if either question or keywords are present in fetched_summary
	if question.lower() in fetched_summary.lower() or keywords.lower() in fetched_summary.lower():
	summaries.append(fetched_summary)
	if summaries:
	return "\n\n".join(summaries)
	else:
	return "No relevant information found."

	# Main function to run the script
	def main():
	print("Intelligent Assistant")
	question = input("Enter your query: ")
	keywords = input("Enter specific keywords (e.g., 'Q1 2024 financial results Tesla'): ")
	answer = google_search_and_answer(question, keywords)
	print("Answer:", answer)

	if __name__ == "__main__":
	main()