Spaces:
Runtime error
Runtime error
Update utils/decode.py
Browse files- utils/decode.py +4 -4
utils/decode.py
CHANGED
|
@@ -67,7 +67,7 @@ def decode_one_audio_mossformer2_ss_16k(model, device, inputs, args):
|
|
| 67 |
"""
|
| 68 |
out = [] # Initialize the list to store outputs
|
| 69 |
decode_do_segment = False # Flag to determine if segmentation is needed
|
| 70 |
-
window = args.sampling_rate * args.decode_window # Decoding window length
|
| 71 |
stride = int(window * 0.75) # Decoding stride if segmentation is used
|
| 72 |
b, t = inputs.shape # Get batch size and input length
|
| 73 |
|
|
@@ -142,7 +142,7 @@ def decode_one_audio_frcrn_se_16k(model, device, inputs, args):
|
|
| 142 |
"""
|
| 143 |
decode_do_segment = False # Flag to determine if segmentation is needed
|
| 144 |
|
| 145 |
-
window = args.sampling_rate * args.decode_window # Decoding window length
|
| 146 |
stride = int(window * 0.75) # Decoding stride for segmenting the input
|
| 147 |
b, t = inputs.shape # Get batch size (b) and input length (t)
|
| 148 |
|
|
@@ -210,7 +210,7 @@ def decode_one_audio_mossformergan_se_16k(model, device, inputs, args):
|
|
| 210 |
numpy.ndarray: The decoded audio output, which has been enhanced by the model.
|
| 211 |
"""
|
| 212 |
decode_do_segment = False # Flag to determine if segmentation is needed
|
| 213 |
-
window = args.sampling_rate * args.decode_window # Decoding window length
|
| 214 |
stride = int(window * 0.75) # Decoding stride for segmenting the input
|
| 215 |
b, t = inputs.shape # Get batch size (b) and input length (t)
|
| 216 |
|
|
@@ -274,7 +274,7 @@ def _decode_one_audio_mossformergan_se_16k(model, device, inputs, norm_factor, a
|
|
| 274 |
"""
|
| 275 |
input_len = inputs.size(-1) # Get the length of the input audio
|
| 276 |
nframe = int(np.ceil(input_len / args.win_inc)) # Calculate the number of frames based on window increment
|
| 277 |
-
padded_len = nframe * args.win_inc # Calculate the padded length to fit the model
|
| 278 |
padding_len = padded_len - input_len # Determine how much padding is needed
|
| 279 |
|
| 280 |
# Pad the input audio with the beginning of the input
|
|
|
|
| 67 |
"""
|
| 68 |
out = [] # Initialize the list to store outputs
|
| 69 |
decode_do_segment = False # Flag to determine if segmentation is needed
|
| 70 |
+
window = int(args.sampling_rate * args.decode_window) # Decoding window length
|
| 71 |
stride = int(window * 0.75) # Decoding stride if segmentation is used
|
| 72 |
b, t = inputs.shape # Get batch size and input length
|
| 73 |
|
|
|
|
| 142 |
"""
|
| 143 |
decode_do_segment = False # Flag to determine if segmentation is needed
|
| 144 |
|
| 145 |
+
window = int(args.sampling_rate * args.decode_window) # Decoding window length
|
| 146 |
stride = int(window * 0.75) # Decoding stride for segmenting the input
|
| 147 |
b, t = inputs.shape # Get batch size (b) and input length (t)
|
| 148 |
|
|
|
|
| 210 |
numpy.ndarray: The decoded audio output, which has been enhanced by the model.
|
| 211 |
"""
|
| 212 |
decode_do_segment = False # Flag to determine if segmentation is needed
|
| 213 |
+
window = int(args.sampling_rate * args.decode_window) # Decoding window length
|
| 214 |
stride = int(window * 0.75) # Decoding stride for segmenting the input
|
| 215 |
b, t = inputs.shape # Get batch size (b) and input length (t)
|
| 216 |
|
|
|
|
| 274 |
"""
|
| 275 |
input_len = inputs.size(-1) # Get the length of the input audio
|
| 276 |
nframe = int(np.ceil(input_len / args.win_inc)) # Calculate the number of frames based on window increment
|
| 277 |
+
padded_len = int(nframe * args.win_inc) # Calculate the padded length to fit the model
|
| 278 |
padding_len = padded_len - input_len # Determine how much padding is needed
|
| 279 |
|
| 280 |
# Pad the input audio with the beginning of the input
|