| { | |
| "istftnet": { | |
| "upsample_kernel_sizes": [20, 12], | |
| "upsample_rates": [10, 6], | |
| "gen_istft_hop_size": 5, | |
| "gen_istft_n_fft": 20, | |
| "resblock_dilation_sizes": [ | |
| [1, 3, 5], | |
| [1, 3, 5], | |
| [1, 3, 5] | |
| ], | |
| "resblock_kernel_sizes": [3, 7, 11], | |
| "upsample_initial_channel": 512 | |
| }, | |
| "dim_in": 64, | |
| "dropout": 0.2, | |
| "hidden_dim": 512, | |
| "max_conv_dim": 512, | |
| "max_dur": 50, | |
| "multispeaker": true, | |
| "n_layer": 3, | |
| "n_mels": 80, | |
| "n_token": 178, | |
| "style_dim": 128, | |
| "text_encoder_kernel_size": 5, | |
| "plbert": { | |
| "hidden_size": 768, | |
| "num_attention_heads": 12, | |
| "intermediate_size": 2048, | |
| "max_position_embeddings": 512, | |
| "num_hidden_layers": 12, | |
| "dropout": 0.1 | |
| }, | |
| "vocab": { | |
| ";": 1, | |
| ":": 2, | |
| ",": 3, | |
| ".": 4, | |
| "!": 5, | |
| "?": 6, | |
| "—": 9, | |
| "…": 10, | |
| "\"": 11, | |
| "(": 12, | |
| ")": 13, | |
| "“": 14, | |
| "”": 15, | |
| " ": 16, | |
| "\u0303": 17, | |
| "ʣ": 18, | |
| "ʥ": 19, | |
| "ʦ": 20, | |
| "ʨ": 21, | |
| "ᵝ": 22, | |
| "\uAB67": 23, | |
| "A": 24, | |
| "I": 25, | |
| "O": 31, | |
| "Q": 33, | |
| "S": 35, | |
| "T": 36, | |
| "W": 39, | |
| "Y": 41, | |
| "ᵊ": 42, | |
| "a": 43, | |
| "b": 44, | |
| "c": 45, | |
| "d": 46, | |
| "e": 47, | |
| "f": 48, | |
| "h": 50, | |
| "i": 51, | |
| "j": 52, | |
| "k": 53, | |
| "l": 54, | |
| "m": 55, | |
| "n": 56, | |
| "o": 57, | |
| "p": 58, | |
| "q": 59, | |
| "r": 60, | |
| "s": 61, | |
| "t": 62, | |
| "u": 63, | |
| "v": 64, | |
| "w": 65, | |
| "x": 66, | |
| "y": 67, | |
| "z": 68, | |
| "ɑ": 69, | |
| "ɐ": 70, | |
| "ɒ": 71, | |
| "æ": 72, | |
| "β": 75, | |
| "ɔ": 76, | |
| "ɕ": 77, | |
| "ç": 78, | |
| "ɖ": 80, | |
| "ð": 81, | |
| "ʤ": 82, | |
| "ə": 83, | |
| "ɚ": 85, | |
| "ɛ": 86, | |
| "ɜ": 87, | |
| "ɟ": 90, | |
| "ɡ": 92, | |
| "ɥ": 99, | |
| "ɨ": 101, | |
| "ɪ": 102, | |
| "ʝ": 103, | |
| "ɯ": 110, | |
| "ɰ": 111, | |
| "ŋ": 112, | |
| "ɳ": 113, | |
| "ɲ": 114, | |
| "ɴ": 115, | |
| "ø": 116, | |
| "ɸ": 118, | |
| "θ": 119, | |
| "œ": 120, | |
| "ɹ": 123, | |
| "ɾ": 125, | |
| "ɻ": 126, | |
| "ʁ": 128, | |
| "ɽ": 129, | |
| "ʂ": 130, | |
| "ʃ": 131, | |
| "ʈ": 132, | |
| "ʧ": 133, | |
| "ʊ": 135, | |
| "ʋ": 136, | |
| "ʌ": 138, | |
| "ɣ": 139, | |
| "ɤ": 140, | |
| "χ": 142, | |
| "ʎ": 143, | |
| "ʒ": 147, | |
| "ʔ": 148, | |
| "ˈ": 156, | |
| "ˌ": 157, | |
| "ː": 158, | |
| "ʰ": 162, | |
| "ʲ": 164, | |
| "↓": 169, | |
| "→": 171, | |
| "↗": 172, | |
| "↘": 173, | |
| "ᵻ": 177 | |
| }, | |
| "quantization": { | |
| "group_size": 64, | |
| "bits": 3 | |
| } | |
| } |