Update barks.py
Browse files
barks.py
CHANGED
|
@@ -283,12 +283,24 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
|
|
| 283 |
|
| 284 |
try:
|
| 285 |
print("Generating vocals with Bark...")
|
| 286 |
-
# Process vocal prompt
|
| 287 |
-
inputs = bark_processor(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
|
|
|
|
|
|
| 289 |
# Generate vocals with mixed precision
|
| 290 |
with torch.no_grad(), autocast():
|
| 291 |
-
vocal_array = bark_model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
# Convert to numpy and create AudioSegment
|
| 294 |
vocal_array = vocal_array.cpu().numpy().squeeze()
|
|
|
|
| 283 |
|
| 284 |
try:
|
| 285 |
print("Generating vocals with Bark...")
|
| 286 |
+
# Process vocal prompt with explicit padding and attention mask
|
| 287 |
+
inputs = bark_processor(
|
| 288 |
+
vocal_prompt,
|
| 289 |
+
return_tensors="pt",
|
| 290 |
+
padding=True,
|
| 291 |
+
return_attention_mask=True
|
| 292 |
+
).to("cuda")
|
| 293 |
|
| 294 |
+
# Set pad_token_id explicitly, avoiding eos_token_id
|
| 295 |
+
pad_token_id = 0 # Use 0 as a safe padding token
|
| 296 |
# Generate vocals with mixed precision
|
| 297 |
with torch.no_grad(), autocast():
|
| 298 |
+
vocal_array = bark_model.generate(
|
| 299 |
+
input_ids=inputs["input_ids"],
|
| 300 |
+
attention_mask=inputs["attention_mask"],
|
| 301 |
+
do_sample=True,
|
| 302 |
+
pad_token_id=pad_token_id
|
| 303 |
+
)
|
| 304 |
|
| 305 |
# Convert to numpy and create AudioSegment
|
| 306 |
vocal_array = vocal_array.cpu().numpy().squeeze()
|