Spaces:
Runtime error
Runtime error
| # qa.py | |
| import os | |
| import requests | |
| import json | |
| import tempfile | |
| import streamlit as st | |
| from utils import generate_audio_mp3 # Reuse your existing TTS function | |
| def transcribe_audio_deepgram(local_audio_path: str) -> str: | |
| """ | |
| Sends a local audio file to Deepgram for STT. | |
| Returns the transcript text if successful, or raises an error if failed. | |
| """ | |
| DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY") | |
| if not DEEPGRAM_API_KEY: | |
| raise ValueError("Deepgram API key not found in environment variables.") | |
| url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true" | |
| # For WAV -> "audio/wav". If user uploads MP3, you'd use "audio/mpeg". | |
| headers = { | |
| "Authorization": f"Token {DEEPGRAM_API_KEY}", | |
| "Content-Type": "audio/wav" | |
| } | |
| with open(local_audio_path, "rb") as f: | |
| response = requests.post(url, headers=headers, data=f) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Extract the transcript | |
| transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "") | |
| return transcript | |
| def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict: | |
| """ | |
| Minimal function that calls your LLM (Groq) to answer a follow-up question. | |
| Returns a Python dict, e.g.: {"speaker": "John", "text": "..."} | |
| """ | |
| system_prompt = f""" | |
| You are John, the guest speaker. The user is asking a follow-up question. | |
| Conversation so far: | |
| {conversation_so_far} | |
| New user question: | |
| {user_question} | |
| Please respond in JSON with keys "speaker" and "text", e.g.: | |
| {{ "speaker": "John", "text": "Sure, here's my answer..." }} | |
| """ | |
| from utils import call_groq_api_for_qa | |
| raw_json_response = call_groq_api_for_qa(system_prompt) | |
| # Expect a JSON string: {"speaker": "John", "text": "some short answer"} | |
| response_dict = json.loads(raw_json_response) | |
| return response_dict | |
| def handle_qa_exchange(user_question: str) -> (bytes, str): | |
| """ | |
| 1) Read conversation_so_far from session_state | |
| 2) Call the LLM for a short follow-up answer | |
| 3) Generate TTS audio | |
| 4) Return (audio_bytes, answer_text) | |
| """ | |
| conversation_so_far = st.session_state.get("conversation_history", "") | |
| # Ask the LLM | |
| response_dict = call_llm_for_qa(conversation_so_far, user_question) | |
| answer_text = response_dict.get("text", "") | |
| speaker = response_dict.get("speaker", "John") | |
| # Update conversation | |
| new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n" | |
| st.session_state["conversation_history"] = new_history | |
| if not answer_text.strip(): | |
| return (None, "") | |
| # TTS | |
| audio_file_path = generate_audio_mp3(answer_text, "John") # always John | |
| with open(audio_file_path, "rb") as f: | |
| audio_bytes = f.read() | |
| return (audio_bytes, answer_text) | |