Update README.md
Browse files
README.md
CHANGED
|
@@ -76,6 +76,8 @@ from transformers import VocosModel, VocosProcessor, BarkProcessor, BarkModel
|
|
| 76 |
from transformers.models.bark.generation_configuration_bark import BarkSemanticGenerationConfig, BarkCoarseGenerationConfig, BarkFineGenerationConfig
|
| 77 |
from scipy.io.wavfile import write as write_wav
|
| 78 |
|
|
|
|
|
|
|
| 79 |
# load the Bark model and processor
|
| 80 |
bark_id = "suno/bark-small"
|
| 81 |
bark_processor = BarkProcessor.from_pretrained(bark_id)
|
|
@@ -91,8 +93,7 @@ fine_generation_config = BarkFineGenerationConfig(**bark.generation_config.fine_
|
|
| 91 |
|
| 92 |
# generating the RVQ codes
|
| 93 |
semantic_tokens = bark.semantic.generate(
|
| 94 |
-
|
| 95 |
-
attention_mask=bark_inputs.attention_mask,
|
| 96 |
semantic_generation_config=semantic_generation_config)
|
| 97 |
coarse_tokens = bark.coarse_acoustics.generate(
|
| 98 |
semantic_tokens,
|
|
@@ -116,7 +117,7 @@ vocos_model = VocosModel.from_pretrained(vocos_id, device_map="auto")
|
|
| 116 |
sampling_rate = processor.feature_extractor.sampling_rate
|
| 117 |
|
| 118 |
# generate audio
|
| 119 |
-
inputs = processor(codes=codes.to("cpu"), bandwidth=
|
| 120 |
audio = vocos_model(**inputs).audio
|
| 121 |
|
| 122 |
# save audio to file
|
|
|
|
| 76 |
from transformers.models.bark.generation_configuration_bark import BarkSemanticGenerationConfig, BarkCoarseGenerationConfig, BarkFineGenerationConfig
|
| 77 |
from scipy.io.wavfile import write as write_wav
|
| 78 |
|
| 79 |
+
bandwidth = 6.0
|
| 80 |
+
|
| 81 |
# load the Bark model and processor
|
| 82 |
bark_id = "suno/bark-small"
|
| 83 |
bark_processor = BarkProcessor.from_pretrained(bark_id)
|
|
|
|
| 93 |
|
| 94 |
# generating the RVQ codes
|
| 95 |
semantic_tokens = bark.semantic.generate(
|
| 96 |
+
**bark_inputs,
|
|
|
|
| 97 |
semantic_generation_config=semantic_generation_config)
|
| 98 |
coarse_tokens = bark.coarse_acoustics.generate(
|
| 99 |
semantic_tokens,
|
|
|
|
| 117 |
sampling_rate = processor.feature_extractor.sampling_rate
|
| 118 |
|
| 119 |
# generate audio
|
| 120 |
+
inputs = processor(codes=codes.to("cpu"), bandwidth=bandwidth).to(vocos_model.device)
|
| 121 |
audio = vocos_model(**inputs).audio
|
| 122 |
|
| 123 |
# save audio to file
|