File size: 3,587 Bytes
ce18f8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d0bddc
 
ce18f8b
 
 
 
 
db34208
2d0bddc
ce18f8b
 
2d0bddc
ce18f8b
db34208
ce18f8b
 
 
 
 
 
 
 
 
 
2d0bddc
 
ce18f8b
 
 
 
 
 
2d0bddc
ce18f8b
 
 
 
 
 
 
db34208
2d0bddc
db34208
 
 
 
 
 
 
 
 
 
ce18f8b
 
 
 
 
 
 
 
 
 
 
 
2d0bddc
ce18f8b
 
db34208
ce18f8b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#######################################################################################
#
# MIT License
#
# Copyright (c) [2025] [[email protected]]
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
#######################################################################################

# This file implements an API endpoint for the Italian Kokoro Text-to-Speech (TTS) system.
# It provides functionality to generate TTS audio from input Italian text using the Kokoro voice model.


# Source code is based on or inspired by several projects.
# For more details and proper attribution, please refer to the following resources:
#
# - [Kokoro] - [https://github.com/hexgrad/kokoro]
# - [Misaki] - [https://github.com/hexgrad/misaki]
# - [Kokoro-82M] - [https://huggingface.co/hexgrad/Kokoro-82M]
# - [Kokoro-onnx] - [https://github.com/thewh1teagle/kokoro-onnx]



import os
import gradio as gr
from misaki import espeak
from misaki.espeak import EspeakG2P
from kokoro_onnx import Kokoro
from huggingface_hub import snapshot_download

KOKORO_REPO_ID = "leonelhs/kokoro-thewh1teagle"

VOICES = {
    '🚺 Sara':'if_sara',
    '🚹 Nicola':'im_nicola'
}

snapshot = snapshot_download(repo_id=KOKORO_REPO_ID)

# Misaki G2P with espeak-ng fallback
fallback = espeak.EspeakFallback(british=False)
g2p = EspeakG2P(language="it")

# Kokoro
model_path = os.path.join(snapshot, "kokoro-v1.0.onnx")
voices_path = os.path.join(snapshot, "voices-v1.0.bin")
kokoro = Kokoro(model_path, voices_path)

def predict(text, voice='ef_dora', speed=1):
    """
        Generate speech audio from italian text input.

        Parameters:
            text (string): The text to be converted into speech.
            voice (string): The selected male of female voice profile (specific voice ID).
            speed (float): The speaking rate multiplier (e.g., 1.0 = normal speed, 0.8 = slower, 1.2 = faster).

        Returns:
            path: File path to the generated audio speech.
    """

    phonemes, _ = g2p(text)
    samples, sample_rate = kokoro.create(phonemes, voice, speed, is_phonemes=True)
    return sample_rate, samples

app = gr.Interface(
    predict,
    [
        gr.Textbox(label='Input Text'),
        gr.Dropdown(list(VOICES.items()), value='ef_dora', label='Voice'),
        gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='Speed')
    ],
    gr.Audio(label='Output Audio', interactive=False, streaming=False, autoplay=True),
    description="Kokoro TTS 🇮🇹 API Endpoint",
)

app.launch(share=False, debug=True, show_error=True, mcp_server=True)
app.queue()