import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import tempfile
import os
import numpy as np

# テンプレート設定
TEMPLATES = {
    "パラオ高め（ポーランドボール風）": {"rate": 180, "volume": 1.0},
    "低めのナレーター": {"rate": 120, "volume": 0.8},
    "普通の話し方": {"rate": 150, "volume": 1.0},
    "元気な女の子": {"rate": 180, "volume": 1.2},
    "落ち着いた男性": {"rate": 130, "volume": 0.9},
    "ロボット風（機械的）": {"rate": 140, "volume": 1.0},
    "さっぱりした女性": {"rate": 160, "volume": 1.1},
    "しっとりした声": {"rate": 140, "volume": 0.9},
    "おじさん風": {"rate": 60, "volume": 0.75},
    "怒った声": {"rate": 45, "volume": 0.9},
}

EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]

def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし", effect_strength=1.0):
    # テンプレートの設定を反映
    template = TEMPLATES.get(template_name, {"rate": 150, "volume": 1.0})
    rate = template["rate"] * speed_factor  # 速度調整
    volume = template["volume"]  # ボリューム調整
    
    # 音声合成（Gtts使用）
    tts = gTTS(text=text, lang='ja')
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
        tts_path = f.name
    tts.save(tts_path)

    # 音声読み込み
    sound = AudioSegment.from_mp3(tts_path)
    
    # ピッチ変更
    sound = change_pitch(sound, pitch_factor)
    
    # 速度変更
    sound = change_speed(sound, rate / 100)  # 速度が「%」であることを考慮
    
    # エフェクト適用
    sound = apply_effect(sound, effect_type, effect_strength)
    
    # 一時ファイルに保存
    output_path = tts_path.replace(".mp3", "_modified.mp3")
    sound.export(output_path, format="mp3")
    
    return output_path

def change_pitch(sound, factor):
    new_frame_rate = int(sound.frame_rate * factor)
    pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return pitched_sound.set_frame_rate(44100)

def change_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return sped_up_sound.set_frame_rate(44100)

def apply_effect(sound, effect_type, effect_strength):
    if effect_type == "ふわふわ化":
        return sound.low_pass_filter(1000 * effect_strength)
    elif effect_type == "かちかち化":
        return sound.high_pass_filter(3000 * effect_strength)
    elif effect_type == "減衰":
        return sound.fade_out(int(len(sound) * effect_strength))
    elif effect_type == "リバーブ":
        reversed_sound = sound.reverse()
        faded = reversed_sound.fade_in(200 * effect_strength).fade_out(200 * effect_strength)
        return (sound + faded.reverse()) - (10 * effect_strength)
    elif effect_type == "音揺れ":
        return wobble(sound, effect_strength)
    else:
        return sound

def wobble(sound, strength):
    # 0.1秒ごとにランダムにピッチを揺らす（揺れを強くする）
    chunk_ms = 100
    chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
    wobbled = AudioSegment.empty()
    for chunk in chunks:
        pitch_shift = np.random.uniform(1 - 0.05 * strength, 1 + 0.05 * strength)  # 強めの揺れ
        chunk = change_pitch(chunk, pitch_shift)
        wobbled += chunk
    return wobbled

with gr.Blocks() as app:
    gr.Markdown("# オリジナル声読み上げ機")
    
    with gr.Row():
        text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...")
    
    with gr.Row():
        template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め（ポーランドボール風）", label="テンプレートを選ぶ")
    
    with gr.Row():
        pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率（高く・低く）")
        speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率（速く・遅く）")
    
    with gr.Row():
        effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
        effect_strength_slider = gr.Slider(0.1, 10.0, value=1.0, step=0.05, label="エフェクト強さ")
    
    with gr.Row():
        submit_btn = gr.Button("生成する")
    
    audio_output = gr.Audio(label="出力音声", type="filepath")
    
    submit_btn.click(
        fn=generate_tts,
        inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown, effect_strength_slider],
        outputs=audio_output
    )

app.launch()