Revert "Update model to latest version"

Browse files

This reverts commit 7e4f17f9504e5d8f26eda48d2545059025414016.

Files changed (2) hide show

enhance_model.ckpt +2 -2
hyperparams.yaml +56 -10

enhance_model.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:348bdc866632457e60d9eea38aa9a511910b89cd0c1ad1b78c229535bd5b60e6
-size 89230845

 version https://git-lfs.github.com/spec/v1
+oid sha256:eea2ed64b9b136ccfa66741860d47b4a3ea6954bb8eb07d3212a14b601a0d3fb
+size 29005818

hyperparams.yaml CHANGED Viewed

@@ -4,21 +4,67 @@ n_fft: 512
 win_length: 32
 hop_length: 16
-mask_weight: 0.99
 # Enhancement model args
-enhance_model: !new:speechbrain.lobes.models.EnhanceResnet.EnhanceResnet
     n_fft: !ref <n_fft>
     win_length: !ref <win_length>
     hop_length: !ref <hop_length>
     sample_rate: !ref <sample_rate>
-    channel_counts: [128, 128, 256, 256, 512, 512]
-    normalization: !name:speechbrain.nnet.normalization.BatchNorm2d
-    activation: !new:torch.nn.GELU
-    dense_count: 2
-    dense_nodes: 1024
-    dropout: 0.1
-    mask_weight: !ref <mask_weight>
 modules:
     enhance_model: !ref <enhance_model>

 win_length: 32
 hop_length: 16
 # Enhancement model args
+emb_channels: 1024
+emb_kernel_size: 3
+emb_padding: same
+enhancer_size: 512
+enhancer_layers: 8
+enhancer_heads: 8
+enhancer_causal: False
+enhancer_drop_rate: 0.1
+compute_stft: !new:speechbrain.processing.features.STFT
+    sample_rate: !ref <sample_rate>
     n_fft: !ref <n_fft>
     win_length: !ref <win_length>
     hop_length: !ref <hop_length>
+compute_istft: !new:speechbrain.processing.features.ISTFT
     sample_rate: !ref <sample_rate>
+    n_fft: !ref <n_fft>
+    win_length: !ref <win_length>
+    hop_length: !ref <hop_length>
+spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
+    power: 0.5
+resynth: !name:speechbrain.processing.signal_processing.resynthesize
+    stft: !ref <compute_stft>
+    istft: !ref <compute_istft>
+enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
+    output_size: !ref <n_fft> // 2 + 1
+    d_model: !ref <n_fft> // 2
+    output_activation: !name:torch.nn.ReLU
+    activation: !name:torch.nn.LeakyReLU
+    dropout: !ref <enhancer_drop_rate>
+    num_layers: !ref <enhancer_layers>
+    d_ffn: !ref <enhancer_size>
+    nhead: !ref <enhancer_heads>
+    causal: !ref <enhancer_causal>
+    custom_emb_module: !new:speechbrain.nnet.containers.Sequential
+        input_shape: [null, null, !ref <n_fft> // 2 + 1]
+        conv1: !name:speechbrain.nnet.CNN.Conv1d
+            out_channels: !ref <emb_channels>
+            kernel_size: 3
+        norm1: !name:speechbrain.nnet.normalization.LayerNorm
+        act1: !new:torch.nn.LeakyReLU
+        conv2: !name:speechbrain.nnet.CNN.Conv1d
+            out_channels: !ref <emb_channels> // 2
+            kernel_size: 3
+        norm2: !name:speechbrain.nnet.normalization.LayerNorm
+        act2: !new:torch.nn.LeakyReLU
+        conv3: !name:speechbrain.nnet.CNN.Conv1d
+            out_channels: !ref <emb_channels> // 4
+            kernel_size: 3
+        norm3: !name:speechbrain.nnet.normalization.LayerNorm
+        act3: !new:torch.nn.LeakyReLU
+        conv4: !name:speechbrain.nnet.CNN.Conv1d
+            out_channels: !ref <emb_channels> // 4
+            kernel_size: 3
+        norm4: !name:speechbrain.nnet.normalization.LayerNorm
+        act4: !new:torch.nn.LeakyReLU
 modules:
     enhance_model: !ref <enhance_model>