Upload config.yaml
Browse files- config.yaml +58 -0
config.yaml
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# General model information
|
| 2 |
+
language: en
|
| 3 |
+
tags:
|
| 4 |
+
- speech-enhancement
|
| 5 |
+
- dereverberation
|
| 6 |
+
- diffusion-models
|
| 7 |
+
- generative-models
|
| 8 |
+
- pytorch
|
| 9 |
+
- audio-processing
|
| 10 |
+
license: mit
|
| 11 |
+
datasets:
|
| 12 |
+
- VoiceBank-DEMAND
|
| 13 |
+
- WSJ0-CHiME3
|
| 14 |
+
- WSJ0-REVERB
|
| 15 |
+
- EARS-WHAM
|
| 16 |
+
- EARS-Reverb
|
| 17 |
+
model_name: speech-enhancement-dereverberation-diffusion
|
| 18 |
+
model_type: diffusion-based-generative-model
|
| 19 |
+
library_name: pytorch
|
| 20 |
+
pipeline_tag: audio-to-audio
|
| 21 |
+
|
| 22 |
+
# Important files for the model
|
| 23 |
+
key_files:
|
| 24 |
+
- model.py
|
| 25 |
+
- train.py
|
| 26 |
+
- inference.py
|
| 27 |
+
- enhancement.py
|
| 28 |
+
- requirements.txt
|
| 29 |
+
- calc_metrics.py
|
| 30 |
+
|
| 31 |
+
# Pretrained model checkpoints
|
| 32 |
+
pretrained_checkpoints:
|
| 33 |
+
voicebank_demand:
|
| 34 |
+
description: SGMSE+ trained on VoiceBank-DEMAND
|
| 35 |
+
download_link: https://drive.google.com/drive/folders/1CSnkhUSoiv3RG0xg7WEcVapyLuwDaLbe?usp=sharing
|
| 36 |
+
gdown_id: 1_H3EXvhcYBhOZ9QNUcD5VZHc6ktrRbwQ
|
| 37 |
+
wsj0_chime3:
|
| 38 |
+
description: SGMSE+ trained on WSJ0-CHiME3
|
| 39 |
+
download_link: https://drive.google.com/drive/folders/1CSnkhUSoiv3RG0xg7WEcVapyLuwDaLbe?usp=sharing
|
| 40 |
+
gdown_id: 16K4DUdpmLhDNC7pJhBBc08pkSIn_yMPi
|
| 41 |
+
wsj0_reverb:
|
| 42 |
+
description: SGMSE+ trained on WSJ0-REVERB
|
| 43 |
+
download_link: https://drive.google.com/drive/folders/1082_PSEgrqoVVrNsAkSIcpLF1AAtzGwV?usp=sharing
|
| 44 |
+
gdown_id: 1eiOy0VjHh9V9ZUFTxu1Pq2w19izl9ejD
|
| 45 |
+
ears_wham:
|
| 46 |
+
description: SGMSE+ trained on EARS-WHAM
|
| 47 |
+
download_link: https://drive.google.com/drive/folders/1Tn6pVwjxUAy1DJ8167JCg3enuSi0hiw5?usp=sharing
|
| 48 |
+
gdown_id: 1t_DLLk8iPH6nj8M5wGeOP3jFPaz3i7K5
|
| 49 |
+
ears_reverb:
|
| 50 |
+
description: SGMSE+ trained on EARS-Reverb
|
| 51 |
+
download_link: https://drive.google.com/drive/folders/1PunXuLbuyGkknQCn_y-RCV2dTZBhyE3V?usp=sharing
|
| 52 |
+
gdown_id: 1PunXuLbuyGkknQCn_y-RCV2dTZBhyE3V
|
| 53 |
+
|
| 54 |
+
# Citation references for the model
|
| 55 |
+
citations:
|
| 56 |
+
- '@inproceedings{welker22speech, author={Simon Welker and Julius Richter and Timo Gerkmann}, title={Speech Enhancement with Score-Based Generative Models in the Complex {STFT} Domain}, year={2022}, booktitle={Proc. Interspeech 2022}, pages={2928--2932}, doi={10.21437/Interspeech.2022-10653}}'
|
| 57 |
+
- '@article{richter2023speech, title={Speech Enhancement and Dereverberation with Diffusion-based Generative Models}, author={Richter, Julius and Welker, Simon and Lemercier, Jean-Marie and Lay, Bunlong and Gerkmann, Timo}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, volume={31}, pages={2351-2364}, year={2023}, doi={10.1109/TASLP.2023.3285241}}'
|
| 58 |
+
- '@inproceedings{richter2024ears, title={{EARS}: An Anechoic Fullband Speech Dataset Benchmarked for Speech Enhancement and Dereverberation}, author={Richter, Julius and Wu, Yi-Chiao and Krenn, Steven and Welker, Simon and Lay, Bunlong and Watanabe, Shinjii and Richard, Alexander and Gerkmann, Timo}, booktitle={ISCA Interspeech}, year={2024}}'
|