anissaofficial6h commited on
Commit
a07806f
Β·
verified Β·
1 Parent(s): 4aa83fd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +187 -1
README.md CHANGED
@@ -14,4 +14,190 @@ library_name: asteroid
14
  tags:
15
  - art
16
  - code
17
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  tags:
15
  - art
16
  - code
17
+ ---
18
+ ## 🧠 Backend β€” FastAPI (Python)
19
+ **File: backend/main.py**
20
+ ```python
21
+ from fastapi import FastAPI, Form
22
+ from fastapi.middleware.cors import CORSMiddleware
23
+ from fastapi.responses import FileResponse, JSONResponse
24
+ import requests, os, hashlib
25
+ from pathlib import Path
26
+
27
+ app = FastAPI()
28
+
29
+ # CORS for frontend
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=["*"],
33
+ allow_credentials=True,
34
+ allow_methods=["*"],
35
+ allow_headers=["*"],
36
+ )
37
+
38
+ HF_TOKEN = os.environ.get("HF_TOKEN")
39
+ HF_URL = "https://api-inference.huggingface.co/models/{model_id}"
40
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
41
+ CACHE_DIR = Path("cache")
42
+ CACHE_DIR.mkdir(exist_ok=True)
43
+
44
+ CHARACTER_PRESETS = {
45
+ "Ichika": {"model": "suno/bark", "style": "mature, calm, older-sister tone"},
46
+ "Nino": {"model": "facebook/mms-tts-en", "style": "confident, tsundere tone"},
47
+ "Miku": {"model": "espnet/kan-bayashi_ljspeech_vits", "style": "shy, gentle tone"},
48
+ "Yotsuba": {"model": "espnet/kan-bayashi_ljspeech_fastspeech2", "style": "cheerful, energetic tone"},
49
+ "Itsuki": {"model": "parler-tts/parler-tts-mini-v1", "style": "serious, sincere tone"},
50
+ }
51
+
52
+ def get_cache_filename(character, text):
53
+ key = hashlib.sha256(f"{character}:{text}".encode()).hexdigest()
54
+ return CACHE_DIR / f"{key}.wav"
55
+
56
+ @app.post("/api/tts")
57
+ def generate_tts(character: str = Form(...), text: str = Form(...)):
58
+ preset = CHARACTER_PRESETS.get(character)
59
+ if not preset:
60
+ return JSONResponse({"error": "Character not found"}, status_code=400)
61
+
62
+ cache_file = get_cache_filename(character, text)
63
+ if cache_file.exists():
64
+ return FileResponse(cache_file, media_type="audio/wav")
65
+
66
+ model_id = preset["model"]
67
+ style_prompt = preset["style"]
68
+
69
+ payload = {"inputs": f"[{style_prompt}] {text}"}
70
+ response = requests.post(HF_URL.format(model_id=model_id), headers=headers, json=payload)
71
+
72
+ if response.status_code != 200:
73
+ return JSONResponse({"error": response.text}, status_code=500)
74
+
75
+ with open(cache_file, "wb") as f:
76
+ f.write(response.content)
77
+
78
+ return FileResponse(cache_file, media_type="audio/wav")
79
+ ```
80
+
81
+ ---
82
+
83
+ ## 🎨 Frontend β€” React (Vite)
84
+ **File: frontend/src/App.jsx**
85
+ ```jsx
86
+ import React, { useState } from 'react';
87
+
88
+ export default function App() {
89
+ const [text, setText] = useState('こんにけは'); // Default greeting
90
+ const [character, setCharacter] = useState('Miku');
91
+ const [audioUrl, setAudioUrl] = useState(null);
92
+ const [loading, setLoading] = useState(false);
93
+ const [history, setHistory] = useState([]);
94
+
95
+ const characters = ['Ichika', 'Nino', 'Miku', 'Yotsuba', 'Itsuki'];
96
+
97
+ async function handleSpeak(e) {
98
+ e.preventDefault();
99
+ setLoading(true);
100
+
101
+ const form = new FormData();
102
+ form.append('character', character);
103
+ form.append('text', text);
104
+
105
+ const res = await fetch('http://localhost:8000/api/tts', {
106
+ method: 'POST',
107
+ body: form,
108
+ });
109
+
110
+ if (res.ok) {
111
+ const blob = await res.blob();
112
+ const url = URL.createObjectURL(blob);
113
+ setAudioUrl(url);
114
+ setHistory((prev) => [{ text, character, url }, ...prev]);
115
+ } else {
116
+ alert('Error generating speech');
117
+ }
118
+
119
+ setLoading(false);
120
+ }
121
+
122
+ function handleDownload(url, name) {
123
+ const link = document.createElement('a');
124
+ link.href = url;
125
+ link.download = `${name}.wav`;
126
+ link.click();
127
+ }
128
+
129
+ return (
130
+ <div className="min-h-screen flex flex-col items-center bg-pink-50 text-gray-800 p-6">
131
+ <h1 className="text-3xl font-bold mb-4">Gotoubun TTS β€” Final Version</h1>
132
+
133
+ <select
134
+ value={character}
135
+ onChange={(e) => setCharacter(e.target.value)}
136
+ className="p-2 rounded-md border mb-4"
137
+ >
138
+ {characters.map((ch) => (
139
+ <option key={ch}>{ch}</option>
140
+ ))}
141
+ </select>
142
+
143
+ <textarea
144
+ className="border rounded-md p-2 w-80 h-32 mb-4"
145
+ value={text}
146
+ onChange={(e) => setText(e.target.value)}
147
+ />
148
+
149
+ <button
150
+ onClick={handleSpeak}
151
+ className="bg-pink-400 hover:bg-pink-500 text-white font-bold px-4 py-2 rounded-md"
152
+ disabled={loading}
153
+ >
154
+ {loading ? 'Generating...' : `Speak as ${character}`}
155
+ </button>
156
+
157
+ {audioUrl && (
158
+ <div className="mt-4 flex flex-col items-center">
159
+ <audio controls src={audioUrl} />
160
+ <button
161
+ className="mt-2 bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded"
162
+ onClick={() => handleDownload(audioUrl, `${character}_${text.slice(0,10)}`)}
163
+ >
164
+ Download Audio
165
+ </button>
166
+ </div>
167
+ )}
168
+
169
+ {history.length > 0 && (
170
+ <div className="mt-6 w-full max-w-md">
171
+ <h2 className="text-xl font-semibold mb-2">History</h2>
172
+ <ul className="space-y-2">
173
+ {history.map((item, idx) => (
174
+ <li key={idx} className="bg-white rounded-md p-2 shadow-sm">
175
+ <div className="font-semibold">{item.character}</div>
176
+ <div className="text-sm italic mb-1">{item.text}</div>
177
+ <audio controls src={item.url} />
178
+ </li>
179
+ ))}
180
+ </ul>
181
+ </div>
182
+ )}
183
+ </div>
184
+ );
185
+ }
186
+ ```
187
+
188
+ ---
189
+
190
+ ## πŸͺ„ New Features
191
+ βœ… **Greeting preset:** default input β€œγ“γ‚“γ«γ‘γ―β€ (users can change it)
192
+ βœ… **Download audio:** one-click save as `.wav`
193
+ βœ… **Playback history:** shows recent generated voices with player controls
194
+ βœ… **Audio caching:** reuses previous files for same text+character combo
195
+ βœ… **Presets:** each character has unique tone and speech style
196
+
197
+ ---
198
+
199
+ ## πŸš€ Run Instructions
200
+ ```bash
201
+ docker compose up --build
202
+ ```
203
+ Then open **http://localhost:5173** β€” you’ll hear the greeting β€œγ“γ‚“γ«γ‘γ―β€ spoken in your chosen Gotoubun character’s style, with download and playback history available.