import gradio as gr from elevenlabs import save, Voice, VoiceSettings from elevenlabs.client import ElevenLabs import os import requests import json from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Instantiate ElevenLabs client with API key from environment variables eleven_api_key = os.getenv("ELEVENLABS_API_KEY") if not eleven_api_key: raise ValueError("ELEVENLABS_API_KEY environment variable not set.") # Initialize ElevenLabs client eleven = ElevenLabs(api_key=eleven_api_key) def get_available_voices(): try: response = eleven.voices.get_all() voices = response.voices return [(f"{voice.name} ({voice.voice_id})", voice.voice_id) for voice in voices] except Exception as e: print(f"Error fetching voices: {str(e)}") return [("Error fetching voices", "")] def text_to_speech(text, voice_id, stability, similarity_boost, style, use_speaker_boost): try: url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" headers = { 'Authorization': f'Bearer {os.getenv("ELEVENLABS_JWT_TOKEN")}', 'Content-Type': 'application/json' } payload = json.dumps({ "text": text, "model_id": "eleven_multilingual_v2", "voice_settings": { "stability": stability, "similarity_boost": similarity_boost, "style": style, "use_speaker_boost": use_speaker_boost } }) response = requests.post(url, headers=headers, data=payload) response.raise_for_status() output_file = "output.mp3" with open(output_file, 'wb') as f: f.write(response.content) return output_file except Exception as e: print(f"An error occurred during text-to-speech conversion: {str(e)}") return None with gr.Blocks() as demo: gr.Markdown("# Text-to-Speech Converter") gr.Markdown("Enter text, select a voice, and convert it to speech.") with gr.Row(): with gr.Column(): text_input = gr.Textbox(label="Enter text to convert to speech", lines=5) voice_choices = get_available_voices() voice_input = gr.Dropdown( choices=voice_choices, label="Select voice", value=voice_choices[0][1] if voice_choices else None ) stability_input = gr.Slider( minimum=0.0, maximum=1.0, value=0.75, step=0.01, label="Stability" ) similarity_boost_input = gr.Slider( minimum=0.0, maximum=1.0, value=0.75, step=0.01, label="Similarity Boost" ) style_input = gr.Slider( minimum=0.0, maximum=1.0, value=0.0, step=0.01, label="Style" ) use_speaker_boost_input = gr.Checkbox( label="Use Speaker Boost", value=True ) submit_btn = gr.Button("Convert to Speech") with gr.Column(): audio_output = gr.Audio(label="Generated Speech") submit_btn.click( fn=text_to_speech, inputs=[ text_input, voice_input, stability_input, similarity_boost_input, style_input, use_speaker_boost_input ], outputs=audio_output ) if __name__ == "__main__": demo.launch(share=True)