Spaces:
Runtime error
Runtime error
| import xml.etree.ElementTree as ET | |
| from datetime import datetime | |
| import os | |
| from huggingface_hub import InferenceClient | |
| import re | |
| client = InferenceClient( | |
| "meta-llama/Llama-3.1-8B-Instruct", | |
| provider="hf-inference", | |
| token=os.getenv("HF_TOKEN"), | |
| ) | |
| def clean_label(line): | |
| # Remove common label patterns | |
| return re.sub(r"^\s*(\*\*?)?(Headline|Description)\:?\*?\*?\s*", "", line, flags=re.IGNORECASE) | |
| def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]: | |
| """Ask the LLM for a headline and a short description for the podcast episode.""" | |
| prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate: | |
| 1. A catchy, informative headline for a podcast episode about it (max 15 words). | |
| 2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting. | |
| Output ONLY the headline on the first line, and the description on the second line. Do NOT include any labels, markdown, or extra formatting. | |
| Here is the topic: | |
| {subject[:10000]} | |
| """ | |
| messages = [ | |
| {"role": "system", "content": "You are a world-class podcast producer."}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = client.chat_completion( | |
| messages, | |
| max_tokens=512, | |
| ) | |
| full_text = response.choices[0].message.content.strip() | |
| # Try to split headline and description | |
| lines = [clean_label(l.strip()) for l in full_text.splitlines() if l.strip()] | |
| if len(lines) >= 2: | |
| headline = lines[0] | |
| description = " ".join(lines[1:]) | |
| else: | |
| headline = full_text[:80] | |
| description = full_text | |
| return headline, description | |
| def indent(elem, level=0): | |
| i = "\n" + level * " " | |
| if len(elem): | |
| if not elem.text or not elem.text.strip(): | |
| elem.text = i + " " | |
| for child in elem: | |
| indent(child, level + 1) | |
| if not elem.tail or not elem.tail.strip(): | |
| elem.tail = i | |
| else: | |
| if level and (not elem.tail or not elem.tail.strip()): | |
| elem.tail = i | |
| # ----------------------------------------------------------------------------- | |
| # UPDATE RSS | |
| # ----------------------------------------------------------------------------- | |
| def get_next_episode_number(podcast_dir="podcasts"): | |
| files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")] | |
| return len(files) + 1 | |
| def update_rss(subject, audio_url, audio_length, paper_id=None, rss_path="rss.xml"): | |
| # Generate headline and description automatically | |
| title, description = generate_headline_and_description(subject) | |
| if paper_id: | |
| paper_url = f"https://huggingface.co/papers/{paper_id}" | |
| description += f'\n\n<a href="{paper_url}">[Read the paper on Hugging Face]</a>' | |
| tree = ET.parse(rss_path) | |
| root = tree.getroot() | |
| channel = root.find("channel") | |
| # Update lastBuildDate | |
| last_build_date = channel.find("lastBuildDate") | |
| now_rfc2822 = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") | |
| if last_build_date is not None: | |
| last_build_date.text = now_rfc2822 | |
| # Create new item | |
| item = ET.Element("item") | |
| ET.SubElement(item, "title").text = title | |
| ET.SubElement(item, "description").text = description | |
| ET.SubElement(item, "pubDate").text = now_rfc2822 | |
| ET.SubElement(item, "enclosure", url=audio_url, length=str(audio_length), type="audio/wav") | |
| ET.SubElement(item, "guid").text = audio_url | |
| ET.SubElement(item, "itunes:explicit").text = "false" | |
| # Insert new item after lastBuildDate (i.e., as the first item) | |
| # Find the first <item> and insert before it, or append if none exist | |
| items = channel.findall("item") | |
| if items: | |
| channel.insert(list(channel).index(items[0]), item) | |
| else: | |
| channel.append(item) | |
| # Write back to file with pretty formatting | |
| indent(root) | |
| ET.register_namespace('itunes', "http://www.itunes.com/dtds/podcast-1.0.dtd") | |
| tree.write(rss_path, encoding="utf-8", xml_declaration=True) |