Spaces:
Sleeping
Sleeping
| from transformers import pipeline, BartTokenizer | |
| from googlesearch import search | |
| from bs4 import BeautifulSoup | |
| import requests | |
| # Initialize BART tokenizer and summarization pipeline | |
| tokenizer = BartTokenizer.from_pretrained('letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary') | |
| summarizer = pipeline("summarization", model="letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary") | |
| # Function to perform Google search and retrieve URLs, filtering by domain | |
| def google_search(query: str, num_results: int = 10): | |
| """Perform a Google search and retrieve the URLs of the search results.""" | |
| search_results = [] | |
| try: | |
| for url in search(query, num_results=num_results, domains=["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]): | |
| search_results.append(url) | |
| except TypeError: | |
| for url in search(query, num_results=num_results): | |
| if any(domain in url for domain in ["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]): | |
| search_results.append(url) | |
| return search_results | |
| # Function to fetch content from a URL and summarize it | |
| def fetch_and_summarize_url(url: str): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract relevant content (e.g., paragraphs or sections) | |
| paragraphs = [p.text for p in soup.find_all('p')] | |
| combined_text = " ".join(paragraphs[:3]) # Combine first few paragraphs for summary | |
| # Summarize using the pipeline | |
| if combined_text.strip(): # Ensure there is text to summarize | |
| summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False) | |
| return summary[0]['summary_text'] | |
| else: | |
| return None | |
| except requests.RequestException as e: | |
| return None | |
| # Function to perform Google search and aggregate summaries | |
| def google_search_and_answer(question: str, keywords: str): | |
| search_query = f"{question} {keywords}" | |
| search_results = google_search(search_query) | |
| summaries = [] | |
| for url in search_results: | |
| fetched_summary = fetch_and_summarize_url(url) | |
| if fetched_summary: | |
| # Add additional logic to filter summaries based on relevance | |
| # Example: Check if either question or keywords are present in fetched_summary | |
| if question.lower() in fetched_summary.lower() or keywords.lower() in fetched_summary.lower(): | |
| summaries.append(fetched_summary) | |
| if summaries: | |
| return "\n\n".join(summaries) | |
| else: | |
| return "No relevant information found." | |
| # Main function to run the script | |
| def main(): | |
| print("Intelligent Assistant") | |
| question = input("Enter your query: ") | |
| keywords = input("Enter specific keywords (e.g., 'Q1 2024 financial results Tesla'): ") | |
| answer = google_search_and_answer(question, keywords) | |
| print("Answer:", answer) | |
| if __name__ == "__main__": | |
| main() |