ghostai1 commited on
Commit
1b95dbd
·
verified ·
1 Parent(s): 177f643

Update barks.py

Browse files
Files changed (1) hide show
  1. barks.py +15 -3
barks.py CHANGED
@@ -283,12 +283,24 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
283
 
284
  try:
285
  print("Generating vocals with Bark...")
286
- # Process vocal prompt, ensuring inputs are on cuda
287
- inputs = bark_processor(vocal_prompt, return_tensors="pt").to("cuda")
 
 
 
 
 
288
 
 
 
289
  # Generate vocals with mixed precision
290
  with torch.no_grad(), autocast():
291
- vocal_array = bark_model.generate(**inputs, do_sample=True)
 
 
 
 
 
292
 
293
  # Convert to numpy and create AudioSegment
294
  vocal_array = vocal_array.cpu().numpy().squeeze()
 
283
 
284
  try:
285
  print("Generating vocals with Bark...")
286
+ # Process vocal prompt with explicit padding and attention mask
287
+ inputs = bark_processor(
288
+ vocal_prompt,
289
+ return_tensors="pt",
290
+ padding=True,
291
+ return_attention_mask=True
292
+ ).to("cuda")
293
 
294
+ # Set pad_token_id explicitly, avoiding eos_token_id
295
+ pad_token_id = 0 # Use 0 as a safe padding token
296
  # Generate vocals with mixed precision
297
  with torch.no_grad(), autocast():
298
+ vocal_array = bark_model.generate(
299
+ input_ids=inputs["input_ids"],
300
+ attention_mask=inputs["attention_mask"],
301
+ do_sample=True,
302
+ pad_token_id=pad_token_id
303
+ )
304
 
305
  # Convert to numpy and create AudioSegment
306
  vocal_array = vocal_array.cpu().numpy().squeeze()