anissaofficial6h
/

the-quintessential-quintuplets-TTS

 tags:
 - art
 - code
+---
+## 🧠 Backend — FastAPI (Python)
+**File: backend/main.py**
+```python
+from fastapi import FastAPI, Form
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, JSONResponse
+import requests, os, hashlib
+from pathlib import Path
+app = FastAPI()
+# CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+HF_TOKEN = os.environ.get("HF_TOKEN")
+HF_URL = "https://api-inference.huggingface.co/models/{model_id}"
+headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+CACHE_DIR = Path("cache")
+CACHE_DIR.mkdir(exist_ok=True)
+CHARACTER_PRESETS = {
+    "Ichika": {"model": "suno/bark", "style": "mature, calm, older-sister tone"},
+    "Nino": {"model": "facebook/mms-tts-en", "style": "confident, tsundere tone"},
+    "Miku": {"model": "espnet/kan-bayashi_ljspeech_vits", "style": "shy, gentle tone"},
+    "Yotsuba": {"model": "espnet/kan-bayashi_ljspeech_fastspeech2", "style": "cheerful, energetic tone"},
+    "Itsuki": {"model": "parler-tts/parler-tts-mini-v1", "style": "serious, sincere tone"},
+}
+def get_cache_filename(character, text):
+    key = hashlib.sha256(f"{character}:{text}".encode()).hexdigest()
+    return CACHE_DIR / f"{key}.wav"
+@app.post("/api/tts")
+def generate_tts(character: str = Form(...), text: str = Form(...)):
+    preset = CHARACTER_PRESETS.get(character)
+    if not preset:
+        return JSONResponse({"error": "Character not found"}, status_code=400)
+    cache_file = get_cache_filename(character, text)
+    if cache_file.exists():
+        return FileResponse(cache_file, media_type="audio/wav")
+    model_id = preset["model"]
+    style_prompt = preset["style"]
+    payload = {"inputs": f"[{style_prompt}] {text}"}
+    response = requests.post(HF_URL.format(model_id=model_id), headers=headers, json=payload)
+    if response.status_code != 200:
+        return JSONResponse({"error": response.text}, status_code=500)
+    with open(cache_file, "wb") as f:
+        f.write(response.content)
+    return FileResponse(cache_file, media_type="audio/wav")
+```
+---
+## 🎨 Frontend — React (Vite)
+**File: frontend/src/App.jsx**
+```jsx
+import React, { useState } from 'react';
+export default function App() {
+  const [text, setText] = useState('こんにちは'); // Default greeting
+  const [character, setCharacter] = useState('Miku');
+  const [audioUrl, setAudioUrl] = useState(null);
+  const [loading, setLoading] = useState(false);
+  const [history, setHistory] = useState([]);
+  const characters = ['Ichika', 'Nino', 'Miku', 'Yotsuba', 'Itsuki'];
+  async function handleSpeak(e) {
+    e.preventDefault();
+    setLoading(true);
+    const form = new FormData();
+    form.append('character', character);
+    form.append('text', text);
+    const res = await fetch('http://localhost:8000/api/tts', {
+      method: 'POST',
+      body: form,
+    });
+    if (res.ok) {
+      const blob = await res.blob();
+      const url = URL.createObjectURL(blob);
+      setAudioUrl(url);
+      setHistory((prev) => [{ text, character, url }, ...prev]);
+    } else {
+      alert('Error generating speech');
+    }
+    setLoading(false);
+  }
+  function handleDownload(url, name) {
+    const link = document.createElement('a');
+    link.href = url;
+    link.download = `${name}.wav`;
+    link.click();
+  }
+  return (
+    <div className="min-h-screen flex flex-col items-center bg-pink-50 text-gray-800 p-6">
+      <h1 className="text-3xl font-bold mb-4">Gotoubun TTS — Final Version</h1>
+      <select
+        value={character}
+        onChange={(e) => setCharacter(e.target.value)}
+        className="p-2 rounded-md border mb-4"
+      >
+        {characters.map((ch) => (
+          <option key={ch}>{ch}</option>
+        ))}
+      </select>
+      <textarea
+        className="border rounded-md p-2 w-80 h-32 mb-4"
+        value={text}
+        onChange={(e) => setText(e.target.value)}
+      />
+      <button
+        onClick={handleSpeak}
+        className="bg-pink-400 hover:bg-pink-500 text-white font-bold px-4 py-2 rounded-md"
+        disabled={loading}
+      >
+        {loading ? 'Generating...' : `Speak as ${character}`}
+      </button>
+      {audioUrl && (
+        <div className="mt-4 flex flex-col items-center">
+          <audio controls src={audioUrl} />
+          <button
+            className="mt-2 bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded"
+            onClick={() => handleDownload(audioUrl, `${character}_${text.slice(0,10)}`)}
+          >
+            Download Audio
+          </button>
+        </div>
+      )}
+      {history.length > 0 && (
+        <div className="mt-6 w-full max-w-md">
+          <h2 className="text-xl font-semibold mb-2">History</h2>
+          <ul className="space-y-2">
+            {history.map((item, idx) => (
+              <li key={idx} className="bg-white rounded-md p-2 shadow-sm">
+                <div className="font-semibold">{item.character}</div>
+                <div className="text-sm italic mb-1">{item.text}</div>
+                <audio controls src={item.url} />
+              </li>
+            ))}
+          </ul>
+        </div>
+      )}
+    </div>
+  );
+}
+```
+---
+## 🪄 New Features
+✅ **Greeting preset:** default input “こんにちは” (users can change it)
+✅ **Download audio:** one-click save as `.wav`
+✅ **Playback history:** shows recent generated voices with player controls
+✅ **Audio caching:** reuses previous files for same text+character combo
+✅ **Presets:** each character has unique tone and speech style
+---
+## 🚀 Run Instructions
+```bash
+docker compose up --build
+```
+Then open **http://localhost:5173** — you’ll hear the greeting “こんにちは” spoken in your chosen Gotoubun character’s style, with download and playback history available.