Shikhar Bharadwaj
commited on
Commit
·
814591e
1
Parent(s):
de7db91
Update model
Browse files- README.md +783 -0
- meta.yaml +8 -0
- work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/audioset20k/token_list +529 -0
- work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/RESULTS.md +16 -0
- work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/config.yaml +731 -0
- work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/lightning_logs/version_0/events.out.tfevents.1742492588.gh130.hsn.cm.delta.internal.ncsa.edu.3586759.0 +3 -0
- work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/lightning_logs/version_0/hparams.yaml +208 -0
- work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/valid.epoch_mAP.ave_1best.pth +3 -0
README.md
ADDED
|
@@ -0,0 +1,783 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- espnet
|
| 4 |
+
- audio
|
| 5 |
+
- classification
|
| 6 |
+
datasets:
|
| 7 |
+
- as20k
|
| 8 |
+
license: cc-by-4.0
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## ESPnet2 CLS model
|
| 12 |
+
|
| 13 |
+
### `espnet/OpenBEATS-Base-i3-as20k`
|
| 14 |
+
|
| 15 |
+
This model was trained by Shikhar Bharadwaj using as20k recipe in [espnet](https://github.com/espnet/espnet/).
|
| 16 |
+
|
| 17 |
+
## CLS config
|
| 18 |
+
|
| 19 |
+
<details><summary>expand</summary>
|
| 20 |
+
|
| 21 |
+
```
|
| 22 |
+
config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earbasei3/conf/ear_base/audioset20k.yaml
|
| 23 |
+
print_config: false
|
| 24 |
+
log_level: INFO
|
| 25 |
+
drop_last_iter: false
|
| 26 |
+
dry_run: false
|
| 27 |
+
iterator_type: sequence
|
| 28 |
+
valid_iterator_type: null
|
| 29 |
+
output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
|
| 30 |
+
ngpu: 0
|
| 31 |
+
seed: 0
|
| 32 |
+
num_workers: 2
|
| 33 |
+
num_att_plot: 0
|
| 34 |
+
dist_backend: nccl
|
| 35 |
+
dist_init_method: env://
|
| 36 |
+
dist_world_size: null
|
| 37 |
+
dist_rank: null
|
| 38 |
+
local_rank: null
|
| 39 |
+
dist_master_addr: null
|
| 40 |
+
dist_master_port: null
|
| 41 |
+
dist_launcher: null
|
| 42 |
+
multiprocessing_distributed: false
|
| 43 |
+
unused_parameters: true
|
| 44 |
+
sharded_ddp: false
|
| 45 |
+
use_deepspeed: false
|
| 46 |
+
deepspeed_config: null
|
| 47 |
+
gradient_as_bucket_view: true
|
| 48 |
+
ddp_comm_hook: null
|
| 49 |
+
cudnn_enabled: true
|
| 50 |
+
cudnn_benchmark: false
|
| 51 |
+
cudnn_deterministic: true
|
| 52 |
+
use_tf32: false
|
| 53 |
+
collect_stats: false
|
| 54 |
+
write_collected_feats: false
|
| 55 |
+
max_epoch: 160
|
| 56 |
+
patience: null
|
| 57 |
+
val_scheduler_criterion:
|
| 58 |
+
- valid
|
| 59 |
+
- loss
|
| 60 |
+
early_stopping_criterion:
|
| 61 |
+
- valid
|
| 62 |
+
- loss
|
| 63 |
+
- min
|
| 64 |
+
best_model_criterion:
|
| 65 |
+
- - valid
|
| 66 |
+
- epoch_mAP
|
| 67 |
+
- max
|
| 68 |
+
keep_nbest_models: 1
|
| 69 |
+
nbest_averaging_interval: 0
|
| 70 |
+
grad_clip: 1
|
| 71 |
+
grad_clip_type: 2.0
|
| 72 |
+
grad_noise: false
|
| 73 |
+
accum_grad: 1
|
| 74 |
+
no_forward_run: false
|
| 75 |
+
resume: true
|
| 76 |
+
train_dtype: float32
|
| 77 |
+
use_amp: false
|
| 78 |
+
log_interval: null
|
| 79 |
+
use_matplotlib: true
|
| 80 |
+
use_tensorboard: true
|
| 81 |
+
create_graph_in_tensorboard: false
|
| 82 |
+
use_wandb: true
|
| 83 |
+
wandb_project: audioverse
|
| 84 |
+
wandb_id: null
|
| 85 |
+
wandb_entity: shikhar
|
| 86 |
+
wandb_name: audioset20k.earbasei3
|
| 87 |
+
wandb_model_log_interval: -1
|
| 88 |
+
detect_anomaly: false
|
| 89 |
+
use_adapter: false
|
| 90 |
+
adapter: lora
|
| 91 |
+
save_strategy: all
|
| 92 |
+
adapter_conf: {}
|
| 93 |
+
pretrain_path: null
|
| 94 |
+
init_param: []
|
| 95 |
+
ignore_init_mismatch: false
|
| 96 |
+
freeze_param: []
|
| 97 |
+
num_iters_per_epoch: null
|
| 98 |
+
batch_size: 80
|
| 99 |
+
valid_batch_size: 1200
|
| 100 |
+
batch_bins: 1000000
|
| 101 |
+
valid_batch_bins: null
|
| 102 |
+
category_sample_size: 10
|
| 103 |
+
train_shape_file:
|
| 104 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/speech_shape
|
| 105 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/label_shape
|
| 106 |
+
valid_shape_file:
|
| 107 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/speech_shape
|
| 108 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/label_shape
|
| 109 |
+
batch_type: folded
|
| 110 |
+
valid_batch_type: null
|
| 111 |
+
fold_length:
|
| 112 |
+
- 160000
|
| 113 |
+
- 600
|
| 114 |
+
sort_in_batch: descending
|
| 115 |
+
shuffle_within_batch: false
|
| 116 |
+
sort_batch: descending
|
| 117 |
+
multiple_iterator: false
|
| 118 |
+
utt2weight_file: null
|
| 119 |
+
chunk_length: 500
|
| 120 |
+
chunk_shift_ratio: 0.5
|
| 121 |
+
num_cache_chunks: 1024
|
| 122 |
+
chunk_excluded_key_prefixes: []
|
| 123 |
+
chunk_default_fs: null
|
| 124 |
+
chunk_max_abs_length: null
|
| 125 |
+
chunk_discard_short_samples: true
|
| 126 |
+
train_data_path_and_name_and_type:
|
| 127 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/wav.scp
|
| 128 |
+
- speech
|
| 129 |
+
- sound
|
| 130 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/text
|
| 131 |
+
- label
|
| 132 |
+
- text
|
| 133 |
+
valid_data_path_and_name_and_type:
|
| 134 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/wav.scp
|
| 135 |
+
- speech
|
| 136 |
+
- sound
|
| 137 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/text
|
| 138 |
+
- label
|
| 139 |
+
- text
|
| 140 |
+
multi_task_dataset: false
|
| 141 |
+
allow_variable_data_keys: false
|
| 142 |
+
max_cache_size: 0.0
|
| 143 |
+
max_cache_fd: 32
|
| 144 |
+
allow_multi_rates: false
|
| 145 |
+
valid_max_cache_size: null
|
| 146 |
+
exclude_weight_decay: false
|
| 147 |
+
exclude_weight_decay_conf: {}
|
| 148 |
+
optim: adamw
|
| 149 |
+
optim_conf:
|
| 150 |
+
lr: 3.0e-05
|
| 151 |
+
weight_decay: 0.01
|
| 152 |
+
betas:
|
| 153 |
+
- 0.9
|
| 154 |
+
- 0.98
|
| 155 |
+
scheduler: cosineannealingwarmuprestarts
|
| 156 |
+
scheduler_conf:
|
| 157 |
+
first_cycle_steps: 95000
|
| 158 |
+
warmup_steps: 8000
|
| 159 |
+
max_lr: 3.0e-05
|
| 160 |
+
min_lr: 5.0e-06
|
| 161 |
+
lightning_conf:
|
| 162 |
+
log_every_n_steps: 250
|
| 163 |
+
max_epochs: 500
|
| 164 |
+
strategy: ddp
|
| 165 |
+
strategy_conf:
|
| 166 |
+
find_unused_parameters: true
|
| 167 |
+
best_model_criterion:
|
| 168 |
+
- - valid/epoch_mAP
|
| 169 |
+
- max
|
| 170 |
+
- 1
|
| 171 |
+
devices: 1
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
|
| 174 |
+
token_list:
|
| 175 |
+
- Music
|
| 176 |
+
- Speech
|
| 177 |
+
- Vehicle
|
| 178 |
+
- Inside,_small_room
|
| 179 |
+
- Animal
|
| 180 |
+
- Musical_instrument
|
| 181 |
+
- Singing
|
| 182 |
+
- Domestic_animals,_pets
|
| 183 |
+
- Guitar
|
| 184 |
+
- Plucked_string_instrument
|
| 185 |
+
- Water
|
| 186 |
+
- Car
|
| 187 |
+
- Dog
|
| 188 |
+
- Percussion
|
| 189 |
+
- Wind_instrument,_woodwind_instrument
|
| 190 |
+
- Outside,_urban_or_manmade
|
| 191 |
+
- Outside,_rural_or_natural
|
| 192 |
+
- Boat,_Water_vehicle
|
| 193 |
+
- Brass_instrument
|
| 194 |
+
- Fowl
|
| 195 |
+
- Drum
|
| 196 |
+
- Siren
|
| 197 |
+
- Engine
|
| 198 |
+
- Bird
|
| 199 |
+
- Insect
|
| 200 |
+
- Gunshot,_gunfire
|
| 201 |
+
- Wood
|
| 202 |
+
- Rail_transport
|
| 203 |
+
- Train
|
| 204 |
+
- Wind
|
| 205 |
+
- Inside,_large_room_or_hall
|
| 206 |
+
- Railroad_car,_train_wagon
|
| 207 |
+
- Child_speech,_kid_speaking
|
| 208 |
+
- Crowd
|
| 209 |
+
- Rub
|
| 210 |
+
- Keyboard_(musical)
|
| 211 |
+
- Wind_noise_(microphone)
|
| 212 |
+
- Pizzicato
|
| 213 |
+
- Emergency_vehicle
|
| 214 |
+
- Bird_vocalization,_bird_call,_bird_song
|
| 215 |
+
- Livestock,_farm_animals,_working_animals
|
| 216 |
+
- Cat
|
| 217 |
+
- Organ
|
| 218 |
+
- Fly,_housefly
|
| 219 |
+
- Mechanisms
|
| 220 |
+
- Bowed_string_instrument
|
| 221 |
+
- Rain
|
| 222 |
+
- Laughter
|
| 223 |
+
- Aircraft
|
| 224 |
+
- Electronic_music
|
| 225 |
+
- Effects_unit
|
| 226 |
+
- Hum
|
| 227 |
+
- Tools
|
| 228 |
+
- Drum_kit
|
| 229 |
+
- Snare_drum
|
| 230 |
+
- Hiss
|
| 231 |
+
- Piano
|
| 232 |
+
- Water_tap,_faucet
|
| 233 |
+
- Rimshot
|
| 234 |
+
- Bass_drum
|
| 235 |
+
- Chicken,_rooster
|
| 236 |
+
- Marimba,_xylophone
|
| 237 |
+
- Horse
|
| 238 |
+
- Song
|
| 239 |
+
- Quack
|
| 240 |
+
- Power_tool
|
| 241 |
+
- Heart_sounds,_heartbeat
|
| 242 |
+
- Goose
|
| 243 |
+
- Hammond_organ
|
| 244 |
+
- Rock_music
|
| 245 |
+
- Ocean
|
| 246 |
+
- Mains_hum
|
| 247 |
+
- Thunder
|
| 248 |
+
- Chime
|
| 249 |
+
- Electronic_dance_music
|
| 250 |
+
- Typing
|
| 251 |
+
- Sink_(filling_or_washing)
|
| 252 |
+
- Raindrop
|
| 253 |
+
- Cello
|
| 254 |
+
- Electric_guitar
|
| 255 |
+
- Cheering
|
| 256 |
+
- Church_bell
|
| 257 |
+
- Christian_music
|
| 258 |
+
- Drum_roll
|
| 259 |
+
- Trombone
|
| 260 |
+
- Glockenspiel
|
| 261 |
+
- Trumpet
|
| 262 |
+
- Cymbal
|
| 263 |
+
- Tabla
|
| 264 |
+
- Clickety-clack
|
| 265 |
+
- Cricket
|
| 266 |
+
- Steam_whistle
|
| 267 |
+
- Explosion
|
| 268 |
+
- Saxophone
|
| 269 |
+
- Thunderstorm
|
| 270 |
+
- Pop_music
|
| 271 |
+
- Zither
|
| 272 |
+
- Applause
|
| 273 |
+
- Choir
|
| 274 |
+
- Whack,_thwack
|
| 275 |
+
- Clarinet
|
| 276 |
+
- Camera
|
| 277 |
+
- Electric_piano
|
| 278 |
+
- Independent_music
|
| 279 |
+
- Fire
|
| 280 |
+
- Frog
|
| 281 |
+
- Jet_engine
|
| 282 |
+
- Music_of_Asia
|
| 283 |
+
- Ding
|
| 284 |
+
- Waves,_surf
|
| 285 |
+
- Cattle,_bovinae
|
| 286 |
+
- Turkey
|
| 287 |
+
- Television
|
| 288 |
+
- Coo
|
| 289 |
+
- Scratching_(performance_technique)
|
| 290 |
+
- Flute
|
| 291 |
+
- Liquid
|
| 292 |
+
- Harp
|
| 293 |
+
- Progressive_rock
|
| 294 |
+
- Happy_music
|
| 295 |
+
- Steel_guitar,_slide_guitar
|
| 296 |
+
- Whoosh,_swoosh,_swish
|
| 297 |
+
- Boom
|
| 298 |
+
- Breathing
|
| 299 |
+
- Electronic_organ
|
| 300 |
+
- Environmental_noise
|
| 301 |
+
- Distortion
|
| 302 |
+
- Alarm_clock
|
| 303 |
+
- Fixed-wing_aircraft,_airplane
|
| 304 |
+
- Violin,_fiddle
|
| 305 |
+
- Whistling
|
| 306 |
+
- Accordion
|
| 307 |
+
- Disco
|
| 308 |
+
- Pump_(liquid)
|
| 309 |
+
- Waterfall
|
| 310 |
+
- Beep,_bleep
|
| 311 |
+
- Blues
|
| 312 |
+
- Grunge
|
| 313 |
+
- Hip_hop_music
|
| 314 |
+
- Whistle
|
| 315 |
+
- Fusillade
|
| 316 |
+
- Splash,_splatter
|
| 317 |
+
- Gush
|
| 318 |
+
- Toothbrush
|
| 319 |
+
- Knock
|
| 320 |
+
- Gargling
|
| 321 |
+
- Snoring
|
| 322 |
+
- Hammer
|
| 323 |
+
- Gobble
|
| 324 |
+
- Walk,_footsteps
|
| 325 |
+
- Jackhammer
|
| 326 |
+
- Filing_(rasp)
|
| 327 |
+
- Snort
|
| 328 |
+
- Narration,_monologue
|
| 329 |
+
- Tire_squeal
|
| 330 |
+
- Fire_alarm
|
| 331 |
+
- Squeal
|
| 332 |
+
- Meow
|
| 333 |
+
- Caterwaul
|
| 334 |
+
- Cutlery,_silverware
|
| 335 |
+
- Mantra
|
| 336 |
+
- Opera
|
| 337 |
+
- Classical_music
|
| 338 |
+
- Theremin
|
| 339 |
+
- Burst,_pop
|
| 340 |
+
- Drip
|
| 341 |
+
- Tick
|
| 342 |
+
- Children_shouting
|
| 343 |
+
- Creak
|
| 344 |
+
- Hiccup
|
| 345 |
+
- Pigeon,_dove
|
| 346 |
+
- Bicycle_bell
|
| 347 |
+
- Baby_cry,_infant_cry
|
| 348 |
+
- Duck
|
| 349 |
+
- Fireworks
|
| 350 |
+
- Tambourine
|
| 351 |
+
- Rodents,_rats,_mice
|
| 352 |
+
- Buzzer
|
| 353 |
+
- Splinter
|
| 354 |
+
- Writing
|
| 355 |
+
- Goat
|
| 356 |
+
- Sheep
|
| 357 |
+
- Heavy_metal
|
| 358 |
+
- Ska
|
| 359 |
+
- Neigh,_whinny
|
| 360 |
+
- Sizzle
|
| 361 |
+
- Rowboat,_canoe,_kayak
|
| 362 |
+
- Wood_block
|
| 363 |
+
- Clang
|
| 364 |
+
- Door
|
| 365 |
+
- Female_singing
|
| 366 |
+
- Stream
|
| 367 |
+
- Chant
|
| 368 |
+
- Vocal_music
|
| 369 |
+
- Yodeling
|
| 370 |
+
- Bee,_wasp,_etc.
|
| 371 |
+
- Air_brake
|
| 372 |
+
- Whir
|
| 373 |
+
- Bird_flight,_flapping_wings
|
| 374 |
+
- French_horn
|
| 375 |
+
- Telephone_dialing,_DTMF
|
| 376 |
+
- Squeak
|
| 377 |
+
- Sitar
|
| 378 |
+
- Smoke_detector,_smoke_alarm
|
| 379 |
+
- Tick-tock
|
| 380 |
+
- Gurgling
|
| 381 |
+
- Bellow
|
| 382 |
+
- Harmonic
|
| 383 |
+
- Male_singing
|
| 384 |
+
- Giggle
|
| 385 |
+
- Bark
|
| 386 |
+
- Vibration
|
| 387 |
+
- Drill
|
| 388 |
+
- Skidding
|
| 389 |
+
- Scratch
|
| 390 |
+
- Drawer_open_or_close
|
| 391 |
+
- Chop
|
| 392 |
+
- Drum_machine
|
| 393 |
+
- Squish
|
| 394 |
+
- Toilet_flush
|
| 395 |
+
- Fart
|
| 396 |
+
- Basketball_bounce
|
| 397 |
+
- Electronic_tuner
|
| 398 |
+
- Singing_bowl
|
| 399 |
+
- Squawk
|
| 400 |
+
- Conversation
|
| 401 |
+
- Reggae
|
| 402 |
+
- Funny_music
|
| 403 |
+
- Scrape
|
| 404 |
+
- Sewing_machine
|
| 405 |
+
- Tender_music
|
| 406 |
+
- Swing_music
|
| 407 |
+
- Dishes,_pots,_and_pans
|
| 408 |
+
- Sampler
|
| 409 |
+
- Synthesizer
|
| 410 |
+
- Clapping
|
| 411 |
+
- Hubbub,_speech_noise,_speech_babble
|
| 412 |
+
- Engine_knocking
|
| 413 |
+
- Canidae,_dogs,_wolves
|
| 414 |
+
- Chainsaw
|
| 415 |
+
- Pour
|
| 416 |
+
- Croak
|
| 417 |
+
- Chewing,_mastication
|
| 418 |
+
- Cowbell
|
| 419 |
+
- Propeller,_airscrew
|
| 420 |
+
- Didgeridoo
|
| 421 |
+
- Ringtone
|
| 422 |
+
- Rattle_(instrument)
|
| 423 |
+
- Artillery_fire
|
| 424 |
+
- Cash_register
|
| 425 |
+
- Crack
|
| 426 |
+
- Growling
|
| 427 |
+
- Mosquito
|
| 428 |
+
- Carnatic_music
|
| 429 |
+
- Honk
|
| 430 |
+
- Howl
|
| 431 |
+
- Cacophony
|
| 432 |
+
- Gospel_music
|
| 433 |
+
- Firecracker
|
| 434 |
+
- Strum
|
| 435 |
+
- Motorboat,_speedboat
|
| 436 |
+
- Clock
|
| 437 |
+
- Dance_music
|
| 438 |
+
- Microwave_oven
|
| 439 |
+
- Country
|
| 440 |
+
- Bluegrass
|
| 441 |
+
- Rattle
|
| 442 |
+
- Mallet_percussion
|
| 443 |
+
- Computer_keyboard
|
| 444 |
+
- Bass_guitar
|
| 445 |
+
- Electric_shaver,_electric_razor
|
| 446 |
+
- Sawing
|
| 447 |
+
- Owl
|
| 448 |
+
- Whip
|
| 449 |
+
- White_noise
|
| 450 |
+
- Chirp_tone
|
| 451 |
+
- Boiling
|
| 452 |
+
- Ship
|
| 453 |
+
- Mouse
|
| 454 |
+
- Breaking
|
| 455 |
+
- Silence
|
| 456 |
+
- Throat_clearing
|
| 457 |
+
- Bleat
|
| 458 |
+
- Salsa_music
|
| 459 |
+
- Patter
|
| 460 |
+
- Vibraphone
|
| 461 |
+
- Flap
|
| 462 |
+
- Typewriter
|
| 463 |
+
- Change_ringing_(campanology)
|
| 464 |
+
- Trickle,_dribble
|
| 465 |
+
- Video_game_music
|
| 466 |
+
- Glass
|
| 467 |
+
- Dial_tone
|
| 468 |
+
- Radio
|
| 469 |
+
- Bell
|
| 470 |
+
- Moo
|
| 471 |
+
- Heart_murmur
|
| 472 |
+
- Clatter
|
| 473 |
+
- Sniff
|
| 474 |
+
- Double_bass
|
| 475 |
+
- Background_music
|
| 476 |
+
- Lawn_mower
|
| 477 |
+
- Printer
|
| 478 |
+
- House_music
|
| 479 |
+
- Tearing
|
| 480 |
+
- Angry_music
|
| 481 |
+
- Male_speech,_man_speaking
|
| 482 |
+
- Wild_animals
|
| 483 |
+
- Cupboard_open_or_close
|
| 484 |
+
- Harpsichord
|
| 485 |
+
- Light_engine_(high_frequency)
|
| 486 |
+
- Child_singing
|
| 487 |
+
- Zipper_(clothing)
|
| 488 |
+
- Jazz
|
| 489 |
+
- Belly_laugh
|
| 490 |
+
- Roar
|
| 491 |
+
- Motor_vehicle_(road)
|
| 492 |
+
- Crowing,_cock-a-doodle-doo
|
| 493 |
+
- Cluck
|
| 494 |
+
- Sad_music
|
| 495 |
+
- Hi-hat
|
| 496 |
+
- Cough
|
| 497 |
+
- Stomach_rumble
|
| 498 |
+
- Alarm
|
| 499 |
+
- String_section
|
| 500 |
+
- Sonar
|
| 501 |
+
- Keys_jangling
|
| 502 |
+
- Synthetic_singing
|
| 503 |
+
- Rapping
|
| 504 |
+
- Sidetone
|
| 505 |
+
- Orchestra
|
| 506 |
+
- Throbbing
|
| 507 |
+
- Whale_vocalization
|
| 508 |
+
- Thunk
|
| 509 |
+
- Children_playing
|
| 510 |
+
- Snake
|
| 511 |
+
- Chink,_clink
|
| 512 |
+
- Chirp,_tweet
|
| 513 |
+
- Boing
|
| 514 |
+
- Shuffle
|
| 515 |
+
- Pulse
|
| 516 |
+
- Punk_rock
|
| 517 |
+
- Crow
|
| 518 |
+
- Caw
|
| 519 |
+
- Static
|
| 520 |
+
- Clicking
|
| 521 |
+
- Snicker
|
| 522 |
+
- Whispering
|
| 523 |
+
- Pink_noise
|
| 524 |
+
- Crushing
|
| 525 |
+
- Wedding_music
|
| 526 |
+
- Crumpling,_crinkling
|
| 527 |
+
- Crackle
|
| 528 |
+
- Whoop
|
| 529 |
+
- Electric_toothbrush
|
| 530 |
+
- Train_wheels_squealing
|
| 531 |
+
- Yell
|
| 532 |
+
- Wind_chime
|
| 533 |
+
- Frying_(food)
|
| 534 |
+
- Christmas_music
|
| 535 |
+
- Fill_(with_liquid)
|
| 536 |
+
- Reverberation
|
| 537 |
+
- Beatboxing
|
| 538 |
+
- Harmonica
|
| 539 |
+
- Banjo
|
| 540 |
+
- Sliding_door
|
| 541 |
+
- Groan
|
| 542 |
+
- Bagpipes
|
| 543 |
+
- Spray
|
| 544 |
+
- Stir
|
| 545 |
+
- Acoustic_guitar
|
| 546 |
+
- Tap
|
| 547 |
+
- Chorus_effect
|
| 548 |
+
- Noise
|
| 549 |
+
- Crunch
|
| 550 |
+
- Biting
|
| 551 |
+
- Aircraft_engine
|
| 552 |
+
- Busy_signal
|
| 553 |
+
- Bang
|
| 554 |
+
- Techno
|
| 555 |
+
- Tuning_fork
|
| 556 |
+
- Tapping_(guitar_technique)
|
| 557 |
+
- Pig
|
| 558 |
+
- Maraca
|
| 559 |
+
- Vacuum_cleaner
|
| 560 |
+
- Mandolin
|
| 561 |
+
- Electronica
|
| 562 |
+
- Theme_music
|
| 563 |
+
- Yip
|
| 564 |
+
- A_capella
|
| 565 |
+
- Rustle
|
| 566 |
+
- Chatter
|
| 567 |
+
- Traditional_music
|
| 568 |
+
- Soul_music
|
| 569 |
+
- Rustling_leaves
|
| 570 |
+
- Afrobeat
|
| 571 |
+
- Hoot
|
| 572 |
+
- Slosh
|
| 573 |
+
- Roaring_cats_(lions,_tigers)
|
| 574 |
+
- Chopping_(food)
|
| 575 |
+
- Heavy_engine_(low_frequency)
|
| 576 |
+
- Sine_wave
|
| 577 |
+
- Speech_synthesizer
|
| 578 |
+
- Middle_Eastern_music
|
| 579 |
+
- Music_of_Latin_America
|
| 580 |
+
- Arrow
|
| 581 |
+
- Timpani
|
| 582 |
+
- Eruption
|
| 583 |
+
- Shofar
|
| 584 |
+
- Jingle_bell
|
| 585 |
+
- Humming
|
| 586 |
+
- Sanding
|
| 587 |
+
- Female_speech,_woman_speaking
|
| 588 |
+
- Gong
|
| 589 |
+
- Rain_on_surface
|
| 590 |
+
- Pant
|
| 591 |
+
- Dubstep
|
| 592 |
+
- Clip-clop
|
| 593 |
+
- Finger_snapping
|
| 594 |
+
- Blender
|
| 595 |
+
- Drum_and_bass
|
| 596 |
+
- Bouncing
|
| 597 |
+
- Vehicle_horn,_car_horn,_honking
|
| 598 |
+
- Slam
|
| 599 |
+
- Idling
|
| 600 |
+
- Rhythm_and_blues
|
| 601 |
+
- Race_car,_auto_racing
|
| 602 |
+
- Single-lens_reflex_camera
|
| 603 |
+
- Smash,_crash
|
| 604 |
+
- Purr
|
| 605 |
+
- Shatter
|
| 606 |
+
- Steelpan
|
| 607 |
+
- Whimper_(dog)
|
| 608 |
+
- Power_windows,_electric_windows
|
| 609 |
+
- Battle_cry
|
| 610 |
+
- Scary_music
|
| 611 |
+
- Hands
|
| 612 |
+
- Echo
|
| 613 |
+
- Truck
|
| 614 |
+
- Buzz
|
| 615 |
+
- Mechanical_fan
|
| 616 |
+
- Plop
|
| 617 |
+
- Run
|
| 618 |
+
- Gasp
|
| 619 |
+
- Psychedelic_rock
|
| 620 |
+
- Grunt
|
| 621 |
+
- Helicopter
|
| 622 |
+
- Dental_drill,_dentist's_drill
|
| 623 |
+
- Babbling
|
| 624 |
+
- Zing
|
| 625 |
+
- Oink
|
| 626 |
+
- Soundtrack_music
|
| 627 |
+
- Ambulance_(siren)
|
| 628 |
+
- Exciting_music
|
| 629 |
+
- Telephone
|
| 630 |
+
- Jingle_(music)
|
| 631 |
+
- Tubular_bells
|
| 632 |
+
- Burping,_eructation
|
| 633 |
+
- Baby_laughter
|
| 634 |
+
- Ping
|
| 635 |
+
- Bow-wow
|
| 636 |
+
- Foghorn
|
| 637 |
+
- Machine_gun
|
| 638 |
+
- Ukulele
|
| 639 |
+
- Telephone_bell_ringing
|
| 640 |
+
- Pulleys
|
| 641 |
+
- Gears
|
| 642 |
+
- Sigh
|
| 643 |
+
- Coin_(dropping)
|
| 644 |
+
- Music_of_Africa
|
| 645 |
+
- Scissors
|
| 646 |
+
- Inside,_public_space
|
| 647 |
+
- Trance_music
|
| 648 |
+
- Roll
|
| 649 |
+
- Thump,_thud
|
| 650 |
+
- Air_conditioning
|
| 651 |
+
- Ding-dong
|
| 652 |
+
- Ratchet,_pawl
|
| 653 |
+
- Hair_dryer
|
| 654 |
+
- Shout
|
| 655 |
+
- Ambient_music
|
| 656 |
+
- Music_for_children
|
| 657 |
+
- Toot
|
| 658 |
+
- Bathtub_(filling_or_washing)
|
| 659 |
+
- Slap,_smack
|
| 660 |
+
- Chuckle,_chortle
|
| 661 |
+
- Traffic_noise,_roadway_noise
|
| 662 |
+
- Bicycle
|
| 663 |
+
- Whimper
|
| 664 |
+
- Doorbell
|
| 665 |
+
- Wheeze
|
| 666 |
+
- Sailboat,_sailing_ship
|
| 667 |
+
- Cap_gun
|
| 668 |
+
- Wail,_moan
|
| 669 |
+
- Rock_and_roll
|
| 670 |
+
- Jingle,_tinkle
|
| 671 |
+
- Fire_engine,_fire_truck_(siren)
|
| 672 |
+
- Funk
|
| 673 |
+
- Lullaby
|
| 674 |
+
- Field_recording
|
| 675 |
+
- Skateboard
|
| 676 |
+
- Steam
|
| 677 |
+
- Rumble
|
| 678 |
+
- Medium_engine_(mid_frequency)
|
| 679 |
+
- Sound_effect
|
| 680 |
+
- Flamenco
|
| 681 |
+
- Shuffling_cards
|
| 682 |
+
- Subway,_metro,_underground
|
| 683 |
+
- Police_car_(siren)
|
| 684 |
+
- Folk_music
|
| 685 |
+
- Crying,_sobbing
|
| 686 |
+
- New-age_music
|
| 687 |
+
- Ice_cream_truck,_ice_cream_van
|
| 688 |
+
- Music_of_Bollywood
|
| 689 |
+
- Accelerating,_revving,_vroom
|
| 690 |
+
- Screaming
|
| 691 |
+
- Motorcycle
|
| 692 |
+
- Engine_starting
|
| 693 |
+
- Train_whistle
|
| 694 |
+
- Car_passing_by
|
| 695 |
+
- Bus
|
| 696 |
+
- Sneeze
|
| 697 |
+
- Train_horn
|
| 698 |
+
- Air_horn,_truck_horn
|
| 699 |
+
- Civil_defense_siren
|
| 700 |
+
- Car_alarm
|
| 701 |
+
- Reversing_beeps
|
| 702 |
+
- <blank>
|
| 703 |
+
- <unk>
|
| 704 |
+
text_token_list: null
|
| 705 |
+
text_bpemodel: null
|
| 706 |
+
init: xavier_normal
|
| 707 |
+
input_size: 1
|
| 708 |
+
use_preprocessor: true
|
| 709 |
+
frontend: null
|
| 710 |
+
frontend_conf: {}
|
| 711 |
+
specaug: null
|
| 712 |
+
specaug_conf: {}
|
| 713 |
+
normalize: null
|
| 714 |
+
normalize_conf: {}
|
| 715 |
+
preencoder: null
|
| 716 |
+
preencoder_conf: {}
|
| 717 |
+
encoder: beats
|
| 718 |
+
encoder_conf:
|
| 719 |
+
beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_base2.tune_lr5e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
|
| 720 |
+
beats_config:
|
| 721 |
+
layer_wise_gradient_decay_ratio: 0.3
|
| 722 |
+
encoder_layerdrop: 0.1
|
| 723 |
+
dropout: 0.0
|
| 724 |
+
use_weighted_representation: false
|
| 725 |
+
specaug_config:
|
| 726 |
+
apply_time_warp: true
|
| 727 |
+
apply_freq_mask: false
|
| 728 |
+
apply_time_mask: true
|
| 729 |
+
time_mask_width_ratio_range:
|
| 730 |
+
- 0
|
| 731 |
+
- 0.06
|
| 732 |
+
num_time_mask: 1
|
| 733 |
+
roll_augment: true
|
| 734 |
+
roll_interval: 1
|
| 735 |
+
text_encoder: null
|
| 736 |
+
text_encoder_conf: {}
|
| 737 |
+
embedding_fusion: null
|
| 738 |
+
embedding_fusion_conf: {}
|
| 739 |
+
decoder: linear
|
| 740 |
+
decoder_conf: {}
|
| 741 |
+
model: espnet
|
| 742 |
+
model_conf:
|
| 743 |
+
classification_type: multi-label
|
| 744 |
+
mixup_probability: 0.8
|
| 745 |
+
lsm_weight: 0.0
|
| 746 |
+
log_epoch_metrics: true
|
| 747 |
+
user_callbacks:
|
| 748 |
+
- mAP_logging
|
| 749 |
+
required:
|
| 750 |
+
- output_dir
|
| 751 |
+
- token_list
|
| 752 |
+
task: cls
|
| 753 |
+
```
|
| 754 |
+
|
| 755 |
+
</details>
|
| 756 |
+
|
| 757 |
+
### Citations
|
| 758 |
+
|
| 759 |
+
```BibTex
|
| 760 |
+
|
| 761 |
+
@article{bharadwaj2025openbeats,
|
| 762 |
+
title={OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder},
|
| 763 |
+
author={Bharadwaj, Shikhar and Cornell, Samuele and Choi, Kwanghee and Fukayama, Satoru and Shim, Hye-jin and Deshmukh, Soham and Watanabe, Shinji},
|
| 764 |
+
journal={arXiv preprint arXiv:2507.14129},
|
| 765 |
+
year={2025}
|
| 766 |
+
}
|
| 767 |
+
|
| 768 |
+
@inproceedings{watanabe2018espnet,
|
| 769 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
| 770 |
+
title={{ESPnet}: End-to-End Speech Processing Toolkit},
|
| 771 |
+
year={2018},
|
| 772 |
+
booktitle={Proceedings of Interspeech},
|
| 773 |
+
pages={2207--2211},
|
| 774 |
+
doi={10.21437/Interspeech.2018-1456},
|
| 775 |
+
url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
|
| 776 |
+
}
|
| 777 |
+
|
| 778 |
+
|
| 779 |
+
|
| 780 |
+
|
| 781 |
+
|
| 782 |
+
|
| 783 |
+
```
|
meta.yaml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
espnet: '202503'
|
| 2 |
+
files:
|
| 3 |
+
classification_model_file: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/valid.epoch_mAP.ave_1best.pth
|
| 4 |
+
python: "3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) \n[GCC 12.3.0]"
|
| 5 |
+
timestamp: 1763330905.304442
|
| 6 |
+
torch: 2.1.2
|
| 7 |
+
yaml_files:
|
| 8 |
+
classification_train_config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/config.yaml
|
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/audioset20k/token_list
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Music
|
| 2 |
+
Speech
|
| 3 |
+
Vehicle
|
| 4 |
+
Inside,_small_room
|
| 5 |
+
Animal
|
| 6 |
+
Musical_instrument
|
| 7 |
+
Singing
|
| 8 |
+
Domestic_animals,_pets
|
| 9 |
+
Guitar
|
| 10 |
+
Plucked_string_instrument
|
| 11 |
+
Water
|
| 12 |
+
Car
|
| 13 |
+
Dog
|
| 14 |
+
Percussion
|
| 15 |
+
Wind_instrument,_woodwind_instrument
|
| 16 |
+
Outside,_urban_or_manmade
|
| 17 |
+
Outside,_rural_or_natural
|
| 18 |
+
Boat,_Water_vehicle
|
| 19 |
+
Brass_instrument
|
| 20 |
+
Fowl
|
| 21 |
+
Drum
|
| 22 |
+
Siren
|
| 23 |
+
Engine
|
| 24 |
+
Bird
|
| 25 |
+
Insect
|
| 26 |
+
Gunshot,_gunfire
|
| 27 |
+
Wood
|
| 28 |
+
Rail_transport
|
| 29 |
+
Train
|
| 30 |
+
Wind
|
| 31 |
+
Inside,_large_room_or_hall
|
| 32 |
+
Railroad_car,_train_wagon
|
| 33 |
+
Child_speech,_kid_speaking
|
| 34 |
+
Crowd
|
| 35 |
+
Rub
|
| 36 |
+
Keyboard_(musical)
|
| 37 |
+
Wind_noise_(microphone)
|
| 38 |
+
Pizzicato
|
| 39 |
+
Emergency_vehicle
|
| 40 |
+
Bird_vocalization,_bird_call,_bird_song
|
| 41 |
+
Livestock,_farm_animals,_working_animals
|
| 42 |
+
Cat
|
| 43 |
+
Organ
|
| 44 |
+
Fly,_housefly
|
| 45 |
+
Mechanisms
|
| 46 |
+
Bowed_string_instrument
|
| 47 |
+
Rain
|
| 48 |
+
Laughter
|
| 49 |
+
Aircraft
|
| 50 |
+
Electronic_music
|
| 51 |
+
Effects_unit
|
| 52 |
+
Hum
|
| 53 |
+
Tools
|
| 54 |
+
Drum_kit
|
| 55 |
+
Snare_drum
|
| 56 |
+
Hiss
|
| 57 |
+
Piano
|
| 58 |
+
Water_tap,_faucet
|
| 59 |
+
Rimshot
|
| 60 |
+
Bass_drum
|
| 61 |
+
Chicken,_rooster
|
| 62 |
+
Marimba,_xylophone
|
| 63 |
+
Horse
|
| 64 |
+
Song
|
| 65 |
+
Quack
|
| 66 |
+
Power_tool
|
| 67 |
+
Heart_sounds,_heartbeat
|
| 68 |
+
Goose
|
| 69 |
+
Hammond_organ
|
| 70 |
+
Rock_music
|
| 71 |
+
Ocean
|
| 72 |
+
Mains_hum
|
| 73 |
+
Thunder
|
| 74 |
+
Chime
|
| 75 |
+
Electronic_dance_music
|
| 76 |
+
Typing
|
| 77 |
+
Sink_(filling_or_washing)
|
| 78 |
+
Raindrop
|
| 79 |
+
Cello
|
| 80 |
+
Electric_guitar
|
| 81 |
+
Cheering
|
| 82 |
+
Church_bell
|
| 83 |
+
Christian_music
|
| 84 |
+
Drum_roll
|
| 85 |
+
Trombone
|
| 86 |
+
Glockenspiel
|
| 87 |
+
Trumpet
|
| 88 |
+
Cymbal
|
| 89 |
+
Tabla
|
| 90 |
+
Clickety-clack
|
| 91 |
+
Cricket
|
| 92 |
+
Steam_whistle
|
| 93 |
+
Explosion
|
| 94 |
+
Saxophone
|
| 95 |
+
Thunderstorm
|
| 96 |
+
Pop_music
|
| 97 |
+
Zither
|
| 98 |
+
Applause
|
| 99 |
+
Choir
|
| 100 |
+
Whack,_thwack
|
| 101 |
+
Clarinet
|
| 102 |
+
Camera
|
| 103 |
+
Electric_piano
|
| 104 |
+
Independent_music
|
| 105 |
+
Fire
|
| 106 |
+
Frog
|
| 107 |
+
Jet_engine
|
| 108 |
+
Music_of_Asia
|
| 109 |
+
Ding
|
| 110 |
+
Waves,_surf
|
| 111 |
+
Cattle,_bovinae
|
| 112 |
+
Turkey
|
| 113 |
+
Television
|
| 114 |
+
Coo
|
| 115 |
+
Scratching_(performance_technique)
|
| 116 |
+
Flute
|
| 117 |
+
Liquid
|
| 118 |
+
Harp
|
| 119 |
+
Progressive_rock
|
| 120 |
+
Happy_music
|
| 121 |
+
Steel_guitar,_slide_guitar
|
| 122 |
+
Whoosh,_swoosh,_swish
|
| 123 |
+
Boom
|
| 124 |
+
Breathing
|
| 125 |
+
Electronic_organ
|
| 126 |
+
Environmental_noise
|
| 127 |
+
Distortion
|
| 128 |
+
Alarm_clock
|
| 129 |
+
Fixed-wing_aircraft,_airplane
|
| 130 |
+
Violin,_fiddle
|
| 131 |
+
Whistling
|
| 132 |
+
Accordion
|
| 133 |
+
Disco
|
| 134 |
+
Pump_(liquid)
|
| 135 |
+
Waterfall
|
| 136 |
+
Beep,_bleep
|
| 137 |
+
Blues
|
| 138 |
+
Grunge
|
| 139 |
+
Hip_hop_music
|
| 140 |
+
Whistle
|
| 141 |
+
Fusillade
|
| 142 |
+
Splash,_splatter
|
| 143 |
+
Gush
|
| 144 |
+
Toothbrush
|
| 145 |
+
Knock
|
| 146 |
+
Gargling
|
| 147 |
+
Snoring
|
| 148 |
+
Hammer
|
| 149 |
+
Gobble
|
| 150 |
+
Walk,_footsteps
|
| 151 |
+
Jackhammer
|
| 152 |
+
Filing_(rasp)
|
| 153 |
+
Snort
|
| 154 |
+
Narration,_monologue
|
| 155 |
+
Tire_squeal
|
| 156 |
+
Fire_alarm
|
| 157 |
+
Squeal
|
| 158 |
+
Meow
|
| 159 |
+
Caterwaul
|
| 160 |
+
Cutlery,_silverware
|
| 161 |
+
Mantra
|
| 162 |
+
Opera
|
| 163 |
+
Classical_music
|
| 164 |
+
Theremin
|
| 165 |
+
Burst,_pop
|
| 166 |
+
Drip
|
| 167 |
+
Tick
|
| 168 |
+
Children_shouting
|
| 169 |
+
Creak
|
| 170 |
+
Hiccup
|
| 171 |
+
Pigeon,_dove
|
| 172 |
+
Bicycle_bell
|
| 173 |
+
Baby_cry,_infant_cry
|
| 174 |
+
Duck
|
| 175 |
+
Fireworks
|
| 176 |
+
Tambourine
|
| 177 |
+
Rodents,_rats,_mice
|
| 178 |
+
Buzzer
|
| 179 |
+
Splinter
|
| 180 |
+
Writing
|
| 181 |
+
Goat
|
| 182 |
+
Sheep
|
| 183 |
+
Heavy_metal
|
| 184 |
+
Ska
|
| 185 |
+
Neigh,_whinny
|
| 186 |
+
Sizzle
|
| 187 |
+
Rowboat,_canoe,_kayak
|
| 188 |
+
Wood_block
|
| 189 |
+
Clang
|
| 190 |
+
Door
|
| 191 |
+
Female_singing
|
| 192 |
+
Stream
|
| 193 |
+
Chant
|
| 194 |
+
Vocal_music
|
| 195 |
+
Yodeling
|
| 196 |
+
Bee,_wasp,_etc.
|
| 197 |
+
Air_brake
|
| 198 |
+
Whir
|
| 199 |
+
Bird_flight,_flapping_wings
|
| 200 |
+
French_horn
|
| 201 |
+
Telephone_dialing,_DTMF
|
| 202 |
+
Squeak
|
| 203 |
+
Sitar
|
| 204 |
+
Smoke_detector,_smoke_alarm
|
| 205 |
+
Tick-tock
|
| 206 |
+
Gurgling
|
| 207 |
+
Bellow
|
| 208 |
+
Harmonic
|
| 209 |
+
Male_singing
|
| 210 |
+
Giggle
|
| 211 |
+
Bark
|
| 212 |
+
Vibration
|
| 213 |
+
Drill
|
| 214 |
+
Skidding
|
| 215 |
+
Scratch
|
| 216 |
+
Drawer_open_or_close
|
| 217 |
+
Chop
|
| 218 |
+
Drum_machine
|
| 219 |
+
Squish
|
| 220 |
+
Toilet_flush
|
| 221 |
+
Fart
|
| 222 |
+
Basketball_bounce
|
| 223 |
+
Electronic_tuner
|
| 224 |
+
Singing_bowl
|
| 225 |
+
Squawk
|
| 226 |
+
Conversation
|
| 227 |
+
Reggae
|
| 228 |
+
Funny_music
|
| 229 |
+
Scrape
|
| 230 |
+
Sewing_machine
|
| 231 |
+
Tender_music
|
| 232 |
+
Swing_music
|
| 233 |
+
Dishes,_pots,_and_pans
|
| 234 |
+
Sampler
|
| 235 |
+
Synthesizer
|
| 236 |
+
Clapping
|
| 237 |
+
Hubbub,_speech_noise,_speech_babble
|
| 238 |
+
Engine_knocking
|
| 239 |
+
Canidae,_dogs,_wolves
|
| 240 |
+
Chainsaw
|
| 241 |
+
Pour
|
| 242 |
+
Croak
|
| 243 |
+
Chewing,_mastication
|
| 244 |
+
Cowbell
|
| 245 |
+
Propeller,_airscrew
|
| 246 |
+
Didgeridoo
|
| 247 |
+
Ringtone
|
| 248 |
+
Rattle_(instrument)
|
| 249 |
+
Artillery_fire
|
| 250 |
+
Cash_register
|
| 251 |
+
Crack
|
| 252 |
+
Growling
|
| 253 |
+
Mosquito
|
| 254 |
+
Carnatic_music
|
| 255 |
+
Honk
|
| 256 |
+
Howl
|
| 257 |
+
Cacophony
|
| 258 |
+
Gospel_music
|
| 259 |
+
Firecracker
|
| 260 |
+
Strum
|
| 261 |
+
Motorboat,_speedboat
|
| 262 |
+
Clock
|
| 263 |
+
Dance_music
|
| 264 |
+
Microwave_oven
|
| 265 |
+
Country
|
| 266 |
+
Bluegrass
|
| 267 |
+
Rattle
|
| 268 |
+
Mallet_percussion
|
| 269 |
+
Computer_keyboard
|
| 270 |
+
Bass_guitar
|
| 271 |
+
Electric_shaver,_electric_razor
|
| 272 |
+
Sawing
|
| 273 |
+
Owl
|
| 274 |
+
Whip
|
| 275 |
+
White_noise
|
| 276 |
+
Chirp_tone
|
| 277 |
+
Boiling
|
| 278 |
+
Ship
|
| 279 |
+
Mouse
|
| 280 |
+
Breaking
|
| 281 |
+
Silence
|
| 282 |
+
Throat_clearing
|
| 283 |
+
Bleat
|
| 284 |
+
Salsa_music
|
| 285 |
+
Patter
|
| 286 |
+
Vibraphone
|
| 287 |
+
Flap
|
| 288 |
+
Typewriter
|
| 289 |
+
Change_ringing_(campanology)
|
| 290 |
+
Trickle,_dribble
|
| 291 |
+
Video_game_music
|
| 292 |
+
Glass
|
| 293 |
+
Dial_tone
|
| 294 |
+
Radio
|
| 295 |
+
Bell
|
| 296 |
+
Moo
|
| 297 |
+
Heart_murmur
|
| 298 |
+
Clatter
|
| 299 |
+
Sniff
|
| 300 |
+
Double_bass
|
| 301 |
+
Background_music
|
| 302 |
+
Lawn_mower
|
| 303 |
+
Printer
|
| 304 |
+
House_music
|
| 305 |
+
Tearing
|
| 306 |
+
Angry_music
|
| 307 |
+
Male_speech,_man_speaking
|
| 308 |
+
Wild_animals
|
| 309 |
+
Cupboard_open_or_close
|
| 310 |
+
Harpsichord
|
| 311 |
+
Light_engine_(high_frequency)
|
| 312 |
+
Child_singing
|
| 313 |
+
Zipper_(clothing)
|
| 314 |
+
Jazz
|
| 315 |
+
Belly_laugh
|
| 316 |
+
Roar
|
| 317 |
+
Motor_vehicle_(road)
|
| 318 |
+
Crowing,_cock-a-doodle-doo
|
| 319 |
+
Cluck
|
| 320 |
+
Sad_music
|
| 321 |
+
Hi-hat
|
| 322 |
+
Cough
|
| 323 |
+
Stomach_rumble
|
| 324 |
+
Alarm
|
| 325 |
+
String_section
|
| 326 |
+
Sonar
|
| 327 |
+
Keys_jangling
|
| 328 |
+
Synthetic_singing
|
| 329 |
+
Rapping
|
| 330 |
+
Sidetone
|
| 331 |
+
Orchestra
|
| 332 |
+
Throbbing
|
| 333 |
+
Whale_vocalization
|
| 334 |
+
Thunk
|
| 335 |
+
Children_playing
|
| 336 |
+
Snake
|
| 337 |
+
Chink,_clink
|
| 338 |
+
Chirp,_tweet
|
| 339 |
+
Boing
|
| 340 |
+
Shuffle
|
| 341 |
+
Pulse
|
| 342 |
+
Punk_rock
|
| 343 |
+
Crow
|
| 344 |
+
Caw
|
| 345 |
+
Static
|
| 346 |
+
Clicking
|
| 347 |
+
Snicker
|
| 348 |
+
Whispering
|
| 349 |
+
Pink_noise
|
| 350 |
+
Crushing
|
| 351 |
+
Wedding_music
|
| 352 |
+
Crumpling,_crinkling
|
| 353 |
+
Crackle
|
| 354 |
+
Whoop
|
| 355 |
+
Electric_toothbrush
|
| 356 |
+
Train_wheels_squealing
|
| 357 |
+
Yell
|
| 358 |
+
Wind_chime
|
| 359 |
+
Frying_(food)
|
| 360 |
+
Christmas_music
|
| 361 |
+
Fill_(with_liquid)
|
| 362 |
+
Reverberation
|
| 363 |
+
Beatboxing
|
| 364 |
+
Harmonica
|
| 365 |
+
Banjo
|
| 366 |
+
Sliding_door
|
| 367 |
+
Groan
|
| 368 |
+
Bagpipes
|
| 369 |
+
Spray
|
| 370 |
+
Stir
|
| 371 |
+
Acoustic_guitar
|
| 372 |
+
Tap
|
| 373 |
+
Chorus_effect
|
| 374 |
+
Noise
|
| 375 |
+
Crunch
|
| 376 |
+
Biting
|
| 377 |
+
Aircraft_engine
|
| 378 |
+
Busy_signal
|
| 379 |
+
Bang
|
| 380 |
+
Techno
|
| 381 |
+
Tuning_fork
|
| 382 |
+
Tapping_(guitar_technique)
|
| 383 |
+
Pig
|
| 384 |
+
Maraca
|
| 385 |
+
Vacuum_cleaner
|
| 386 |
+
Mandolin
|
| 387 |
+
Electronica
|
| 388 |
+
Theme_music
|
| 389 |
+
Yip
|
| 390 |
+
A_capella
|
| 391 |
+
Rustle
|
| 392 |
+
Chatter
|
| 393 |
+
Traditional_music
|
| 394 |
+
Soul_music
|
| 395 |
+
Rustling_leaves
|
| 396 |
+
Afrobeat
|
| 397 |
+
Hoot
|
| 398 |
+
Slosh
|
| 399 |
+
Roaring_cats_(lions,_tigers)
|
| 400 |
+
Chopping_(food)
|
| 401 |
+
Heavy_engine_(low_frequency)
|
| 402 |
+
Sine_wave
|
| 403 |
+
Speech_synthesizer
|
| 404 |
+
Middle_Eastern_music
|
| 405 |
+
Music_of_Latin_America
|
| 406 |
+
Arrow
|
| 407 |
+
Timpani
|
| 408 |
+
Eruption
|
| 409 |
+
Shofar
|
| 410 |
+
Jingle_bell
|
| 411 |
+
Humming
|
| 412 |
+
Sanding
|
| 413 |
+
Female_speech,_woman_speaking
|
| 414 |
+
Gong
|
| 415 |
+
Rain_on_surface
|
| 416 |
+
Pant
|
| 417 |
+
Dubstep
|
| 418 |
+
Clip-clop
|
| 419 |
+
Finger_snapping
|
| 420 |
+
Blender
|
| 421 |
+
Drum_and_bass
|
| 422 |
+
Bouncing
|
| 423 |
+
Vehicle_horn,_car_horn,_honking
|
| 424 |
+
Slam
|
| 425 |
+
Idling
|
| 426 |
+
Rhythm_and_blues
|
| 427 |
+
Race_car,_auto_racing
|
| 428 |
+
Single-lens_reflex_camera
|
| 429 |
+
Smash,_crash
|
| 430 |
+
Purr
|
| 431 |
+
Shatter
|
| 432 |
+
Steelpan
|
| 433 |
+
Whimper_(dog)
|
| 434 |
+
Power_windows,_electric_windows
|
| 435 |
+
Battle_cry
|
| 436 |
+
Scary_music
|
| 437 |
+
Hands
|
| 438 |
+
Echo
|
| 439 |
+
Truck
|
| 440 |
+
Buzz
|
| 441 |
+
Mechanical_fan
|
| 442 |
+
Plop
|
| 443 |
+
Run
|
| 444 |
+
Gasp
|
| 445 |
+
Psychedelic_rock
|
| 446 |
+
Grunt
|
| 447 |
+
Helicopter
|
| 448 |
+
Dental_drill,_dentist's_drill
|
| 449 |
+
Babbling
|
| 450 |
+
Zing
|
| 451 |
+
Oink
|
| 452 |
+
Soundtrack_music
|
| 453 |
+
Ambulance_(siren)
|
| 454 |
+
Exciting_music
|
| 455 |
+
Telephone
|
| 456 |
+
Jingle_(music)
|
| 457 |
+
Tubular_bells
|
| 458 |
+
Burping,_eructation
|
| 459 |
+
Baby_laughter
|
| 460 |
+
Ping
|
| 461 |
+
Bow-wow
|
| 462 |
+
Foghorn
|
| 463 |
+
Machine_gun
|
| 464 |
+
Ukulele
|
| 465 |
+
Telephone_bell_ringing
|
| 466 |
+
Pulleys
|
| 467 |
+
Gears
|
| 468 |
+
Sigh
|
| 469 |
+
Coin_(dropping)
|
| 470 |
+
Music_of_Africa
|
| 471 |
+
Scissors
|
| 472 |
+
Inside,_public_space
|
| 473 |
+
Trance_music
|
| 474 |
+
Roll
|
| 475 |
+
Thump,_thud
|
| 476 |
+
Air_conditioning
|
| 477 |
+
Ding-dong
|
| 478 |
+
Ratchet,_pawl
|
| 479 |
+
Hair_dryer
|
| 480 |
+
Shout
|
| 481 |
+
Ambient_music
|
| 482 |
+
Music_for_children
|
| 483 |
+
Toot
|
| 484 |
+
Bathtub_(filling_or_washing)
|
| 485 |
+
Slap,_smack
|
| 486 |
+
Chuckle,_chortle
|
| 487 |
+
Traffic_noise,_roadway_noise
|
| 488 |
+
Bicycle
|
| 489 |
+
Whimper
|
| 490 |
+
Doorbell
|
| 491 |
+
Wheeze
|
| 492 |
+
Sailboat,_sailing_ship
|
| 493 |
+
Cap_gun
|
| 494 |
+
Wail,_moan
|
| 495 |
+
Rock_and_roll
|
| 496 |
+
Jingle,_tinkle
|
| 497 |
+
Fire_engine,_fire_truck_(siren)
|
| 498 |
+
Funk
|
| 499 |
+
Lullaby
|
| 500 |
+
Field_recording
|
| 501 |
+
Skateboard
|
| 502 |
+
Steam
|
| 503 |
+
Rumble
|
| 504 |
+
Medium_engine_(mid_frequency)
|
| 505 |
+
Sound_effect
|
| 506 |
+
Flamenco
|
| 507 |
+
Shuffling_cards
|
| 508 |
+
Subway,_metro,_underground
|
| 509 |
+
Police_car_(siren)
|
| 510 |
+
Folk_music
|
| 511 |
+
Crying,_sobbing
|
| 512 |
+
New-age_music
|
| 513 |
+
Ice_cream_truck,_ice_cream_van
|
| 514 |
+
Music_of_Bollywood
|
| 515 |
+
Accelerating,_revving,_vroom
|
| 516 |
+
Screaming
|
| 517 |
+
Motorcycle
|
| 518 |
+
Engine_starting
|
| 519 |
+
Train_whistle
|
| 520 |
+
Car_passing_by
|
| 521 |
+
Bus
|
| 522 |
+
Sneeze
|
| 523 |
+
Train_horn
|
| 524 |
+
Air_horn,_truck_horn
|
| 525 |
+
Civil_defense_siren
|
| 526 |
+
Car_alarm
|
| 527 |
+
Reversing_beeps
|
| 528 |
+
<blank>
|
| 529 |
+
<unk>
|
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/RESULTS.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- Generated by scripts/utils/show_cls_result.sh -->
|
| 2 |
+
# RESULTS
|
| 3 |
+
## Environments
|
| 4 |
+
- date: `Fri Mar 21 05:05:28 CDT 2025`
|
| 5 |
+
- python version: `3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) [GCC 12.3.0]`
|
| 6 |
+
- espnet version: `espnet 202412`
|
| 7 |
+
- pytorch version: `pytorch 2.6.0.dev20241210+cu124`
|
| 8 |
+
- Git hash: `ee8dd3d5da745a2c08c2bd6518bc0ba41ba5b224`
|
| 9 |
+
- Commit date: `Thu Mar 20 16:45:17 2025 -0500`
|
| 10 |
+
|
| 11 |
+
## cls_earbasei3
|
| 12 |
+
|Split|mean_acc|mAP|mean_auc|n_labels|n_instances|
|
| 13 |
+
|---|---|---|---|---|---|
|
| 14 |
+
cls_eval|47.76|32.09|95.67|527.00|20123.00
|
| 15 |
+
cls_val|45.63|36.62|94.72|527.00|2014.00
|
| 16 |
+
|
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/config.yaml
ADDED
|
@@ -0,0 +1,731 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earbasei3/conf/ear_base/audioset20k.yaml
|
| 2 |
+
print_config: false
|
| 3 |
+
log_level: INFO
|
| 4 |
+
drop_last_iter: false
|
| 5 |
+
dry_run: false
|
| 6 |
+
iterator_type: sequence
|
| 7 |
+
valid_iterator_type: null
|
| 8 |
+
output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
|
| 9 |
+
ngpu: 0
|
| 10 |
+
seed: 0
|
| 11 |
+
num_workers: 2
|
| 12 |
+
num_att_plot: 0
|
| 13 |
+
dist_backend: nccl
|
| 14 |
+
dist_init_method: env://
|
| 15 |
+
dist_world_size: null
|
| 16 |
+
dist_rank: null
|
| 17 |
+
local_rank: null
|
| 18 |
+
dist_master_addr: null
|
| 19 |
+
dist_master_port: null
|
| 20 |
+
dist_launcher: null
|
| 21 |
+
multiprocessing_distributed: false
|
| 22 |
+
unused_parameters: true
|
| 23 |
+
sharded_ddp: false
|
| 24 |
+
use_deepspeed: false
|
| 25 |
+
deepspeed_config: null
|
| 26 |
+
gradient_as_bucket_view: true
|
| 27 |
+
ddp_comm_hook: null
|
| 28 |
+
cudnn_enabled: true
|
| 29 |
+
cudnn_benchmark: false
|
| 30 |
+
cudnn_deterministic: true
|
| 31 |
+
use_tf32: false
|
| 32 |
+
collect_stats: false
|
| 33 |
+
write_collected_feats: false
|
| 34 |
+
max_epoch: 160
|
| 35 |
+
patience: null
|
| 36 |
+
val_scheduler_criterion:
|
| 37 |
+
- valid
|
| 38 |
+
- loss
|
| 39 |
+
early_stopping_criterion:
|
| 40 |
+
- valid
|
| 41 |
+
- loss
|
| 42 |
+
- min
|
| 43 |
+
best_model_criterion:
|
| 44 |
+
- - valid
|
| 45 |
+
- epoch_mAP
|
| 46 |
+
- max
|
| 47 |
+
keep_nbest_models: 1
|
| 48 |
+
nbest_averaging_interval: 0
|
| 49 |
+
grad_clip: 1
|
| 50 |
+
grad_clip_type: 2.0
|
| 51 |
+
grad_noise: false
|
| 52 |
+
accum_grad: 1
|
| 53 |
+
no_forward_run: false
|
| 54 |
+
resume: true
|
| 55 |
+
train_dtype: float32
|
| 56 |
+
use_amp: false
|
| 57 |
+
log_interval: null
|
| 58 |
+
use_matplotlib: true
|
| 59 |
+
use_tensorboard: true
|
| 60 |
+
create_graph_in_tensorboard: false
|
| 61 |
+
use_wandb: true
|
| 62 |
+
wandb_project: audioverse
|
| 63 |
+
wandb_id: null
|
| 64 |
+
wandb_entity: shikhar
|
| 65 |
+
wandb_name: audioset20k.earbasei3
|
| 66 |
+
wandb_model_log_interval: -1
|
| 67 |
+
detect_anomaly: false
|
| 68 |
+
use_adapter: false
|
| 69 |
+
adapter: lora
|
| 70 |
+
save_strategy: all
|
| 71 |
+
adapter_conf: {}
|
| 72 |
+
pretrain_path: null
|
| 73 |
+
init_param: []
|
| 74 |
+
ignore_init_mismatch: false
|
| 75 |
+
freeze_param: []
|
| 76 |
+
num_iters_per_epoch: null
|
| 77 |
+
batch_size: 80
|
| 78 |
+
valid_batch_size: 1200
|
| 79 |
+
batch_bins: 1000000
|
| 80 |
+
valid_batch_bins: null
|
| 81 |
+
category_sample_size: 10
|
| 82 |
+
train_shape_file:
|
| 83 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/speech_shape
|
| 84 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/label_shape
|
| 85 |
+
valid_shape_file:
|
| 86 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/speech_shape
|
| 87 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/label_shape
|
| 88 |
+
batch_type: folded
|
| 89 |
+
valid_batch_type: null
|
| 90 |
+
fold_length:
|
| 91 |
+
- 160000
|
| 92 |
+
- 600
|
| 93 |
+
sort_in_batch: descending
|
| 94 |
+
shuffle_within_batch: false
|
| 95 |
+
sort_batch: descending
|
| 96 |
+
multiple_iterator: false
|
| 97 |
+
utt2weight_file: null
|
| 98 |
+
chunk_length: 500
|
| 99 |
+
chunk_shift_ratio: 0.5
|
| 100 |
+
num_cache_chunks: 1024
|
| 101 |
+
chunk_excluded_key_prefixes: []
|
| 102 |
+
chunk_default_fs: null
|
| 103 |
+
chunk_max_abs_length: null
|
| 104 |
+
chunk_discard_short_samples: true
|
| 105 |
+
train_data_path_and_name_and_type:
|
| 106 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/wav.scp
|
| 107 |
+
- speech
|
| 108 |
+
- sound
|
| 109 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/text
|
| 110 |
+
- label
|
| 111 |
+
- text
|
| 112 |
+
valid_data_path_and_name_and_type:
|
| 113 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/wav.scp
|
| 114 |
+
- speech
|
| 115 |
+
- sound
|
| 116 |
+
- - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/text
|
| 117 |
+
- label
|
| 118 |
+
- text
|
| 119 |
+
multi_task_dataset: false
|
| 120 |
+
allow_variable_data_keys: false
|
| 121 |
+
max_cache_size: 0.0
|
| 122 |
+
max_cache_fd: 32
|
| 123 |
+
allow_multi_rates: false
|
| 124 |
+
valid_max_cache_size: null
|
| 125 |
+
exclude_weight_decay: false
|
| 126 |
+
exclude_weight_decay_conf: {}
|
| 127 |
+
optim: adamw
|
| 128 |
+
optim_conf:
|
| 129 |
+
lr: 3.0e-05
|
| 130 |
+
weight_decay: 0.01
|
| 131 |
+
betas:
|
| 132 |
+
- 0.9
|
| 133 |
+
- 0.98
|
| 134 |
+
scheduler: cosineannealingwarmuprestarts
|
| 135 |
+
scheduler_conf:
|
| 136 |
+
first_cycle_steps: 95000
|
| 137 |
+
warmup_steps: 8000
|
| 138 |
+
max_lr: 3.0e-05
|
| 139 |
+
min_lr: 5.0e-06
|
| 140 |
+
lightning_conf:
|
| 141 |
+
log_every_n_steps: 250
|
| 142 |
+
max_epochs: 500
|
| 143 |
+
strategy: ddp
|
| 144 |
+
strategy_conf:
|
| 145 |
+
find_unused_parameters: true
|
| 146 |
+
best_model_criterion:
|
| 147 |
+
- - valid/epoch_mAP
|
| 148 |
+
- max
|
| 149 |
+
- 1
|
| 150 |
+
devices: 1
|
| 151 |
+
num_nodes: 1
|
| 152 |
+
default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
|
| 153 |
+
token_list:
|
| 154 |
+
- Music
|
| 155 |
+
- Speech
|
| 156 |
+
- Vehicle
|
| 157 |
+
- Inside,_small_room
|
| 158 |
+
- Animal
|
| 159 |
+
- Musical_instrument
|
| 160 |
+
- Singing
|
| 161 |
+
- Domestic_animals,_pets
|
| 162 |
+
- Guitar
|
| 163 |
+
- Plucked_string_instrument
|
| 164 |
+
- Water
|
| 165 |
+
- Car
|
| 166 |
+
- Dog
|
| 167 |
+
- Percussion
|
| 168 |
+
- Wind_instrument,_woodwind_instrument
|
| 169 |
+
- Outside,_urban_or_manmade
|
| 170 |
+
- Outside,_rural_or_natural
|
| 171 |
+
- Boat,_Water_vehicle
|
| 172 |
+
- Brass_instrument
|
| 173 |
+
- Fowl
|
| 174 |
+
- Drum
|
| 175 |
+
- Siren
|
| 176 |
+
- Engine
|
| 177 |
+
- Bird
|
| 178 |
+
- Insect
|
| 179 |
+
- Gunshot,_gunfire
|
| 180 |
+
- Wood
|
| 181 |
+
- Rail_transport
|
| 182 |
+
- Train
|
| 183 |
+
- Wind
|
| 184 |
+
- Inside,_large_room_or_hall
|
| 185 |
+
- Railroad_car,_train_wagon
|
| 186 |
+
- Child_speech,_kid_speaking
|
| 187 |
+
- Crowd
|
| 188 |
+
- Rub
|
| 189 |
+
- Keyboard_(musical)
|
| 190 |
+
- Wind_noise_(microphone)
|
| 191 |
+
- Pizzicato
|
| 192 |
+
- Emergency_vehicle
|
| 193 |
+
- Bird_vocalization,_bird_call,_bird_song
|
| 194 |
+
- Livestock,_farm_animals,_working_animals
|
| 195 |
+
- Cat
|
| 196 |
+
- Organ
|
| 197 |
+
- Fly,_housefly
|
| 198 |
+
- Mechanisms
|
| 199 |
+
- Bowed_string_instrument
|
| 200 |
+
- Rain
|
| 201 |
+
- Laughter
|
| 202 |
+
- Aircraft
|
| 203 |
+
- Electronic_music
|
| 204 |
+
- Effects_unit
|
| 205 |
+
- Hum
|
| 206 |
+
- Tools
|
| 207 |
+
- Drum_kit
|
| 208 |
+
- Snare_drum
|
| 209 |
+
- Hiss
|
| 210 |
+
- Piano
|
| 211 |
+
- Water_tap,_faucet
|
| 212 |
+
- Rimshot
|
| 213 |
+
- Bass_drum
|
| 214 |
+
- Chicken,_rooster
|
| 215 |
+
- Marimba,_xylophone
|
| 216 |
+
- Horse
|
| 217 |
+
- Song
|
| 218 |
+
- Quack
|
| 219 |
+
- Power_tool
|
| 220 |
+
- Heart_sounds,_heartbeat
|
| 221 |
+
- Goose
|
| 222 |
+
- Hammond_organ
|
| 223 |
+
- Rock_music
|
| 224 |
+
- Ocean
|
| 225 |
+
- Mains_hum
|
| 226 |
+
- Thunder
|
| 227 |
+
- Chime
|
| 228 |
+
- Electronic_dance_music
|
| 229 |
+
- Typing
|
| 230 |
+
- Sink_(filling_or_washing)
|
| 231 |
+
- Raindrop
|
| 232 |
+
- Cello
|
| 233 |
+
- Electric_guitar
|
| 234 |
+
- Cheering
|
| 235 |
+
- Church_bell
|
| 236 |
+
- Christian_music
|
| 237 |
+
- Drum_roll
|
| 238 |
+
- Trombone
|
| 239 |
+
- Glockenspiel
|
| 240 |
+
- Trumpet
|
| 241 |
+
- Cymbal
|
| 242 |
+
- Tabla
|
| 243 |
+
- Clickety-clack
|
| 244 |
+
- Cricket
|
| 245 |
+
- Steam_whistle
|
| 246 |
+
- Explosion
|
| 247 |
+
- Saxophone
|
| 248 |
+
- Thunderstorm
|
| 249 |
+
- Pop_music
|
| 250 |
+
- Zither
|
| 251 |
+
- Applause
|
| 252 |
+
- Choir
|
| 253 |
+
- Whack,_thwack
|
| 254 |
+
- Clarinet
|
| 255 |
+
- Camera
|
| 256 |
+
- Electric_piano
|
| 257 |
+
- Independent_music
|
| 258 |
+
- Fire
|
| 259 |
+
- Frog
|
| 260 |
+
- Jet_engine
|
| 261 |
+
- Music_of_Asia
|
| 262 |
+
- Ding
|
| 263 |
+
- Waves,_surf
|
| 264 |
+
- Cattle,_bovinae
|
| 265 |
+
- Turkey
|
| 266 |
+
- Television
|
| 267 |
+
- Coo
|
| 268 |
+
- Scratching_(performance_technique)
|
| 269 |
+
- Flute
|
| 270 |
+
- Liquid
|
| 271 |
+
- Harp
|
| 272 |
+
- Progressive_rock
|
| 273 |
+
- Happy_music
|
| 274 |
+
- Steel_guitar,_slide_guitar
|
| 275 |
+
- Whoosh,_swoosh,_swish
|
| 276 |
+
- Boom
|
| 277 |
+
- Breathing
|
| 278 |
+
- Electronic_organ
|
| 279 |
+
- Environmental_noise
|
| 280 |
+
- Distortion
|
| 281 |
+
- Alarm_clock
|
| 282 |
+
- Fixed-wing_aircraft,_airplane
|
| 283 |
+
- Violin,_fiddle
|
| 284 |
+
- Whistling
|
| 285 |
+
- Accordion
|
| 286 |
+
- Disco
|
| 287 |
+
- Pump_(liquid)
|
| 288 |
+
- Waterfall
|
| 289 |
+
- Beep,_bleep
|
| 290 |
+
- Blues
|
| 291 |
+
- Grunge
|
| 292 |
+
- Hip_hop_music
|
| 293 |
+
- Whistle
|
| 294 |
+
- Fusillade
|
| 295 |
+
- Splash,_splatter
|
| 296 |
+
- Gush
|
| 297 |
+
- Toothbrush
|
| 298 |
+
- Knock
|
| 299 |
+
- Gargling
|
| 300 |
+
- Snoring
|
| 301 |
+
- Hammer
|
| 302 |
+
- Gobble
|
| 303 |
+
- Walk,_footsteps
|
| 304 |
+
- Jackhammer
|
| 305 |
+
- Filing_(rasp)
|
| 306 |
+
- Snort
|
| 307 |
+
- Narration,_monologue
|
| 308 |
+
- Tire_squeal
|
| 309 |
+
- Fire_alarm
|
| 310 |
+
- Squeal
|
| 311 |
+
- Meow
|
| 312 |
+
- Caterwaul
|
| 313 |
+
- Cutlery,_silverware
|
| 314 |
+
- Mantra
|
| 315 |
+
- Opera
|
| 316 |
+
- Classical_music
|
| 317 |
+
- Theremin
|
| 318 |
+
- Burst,_pop
|
| 319 |
+
- Drip
|
| 320 |
+
- Tick
|
| 321 |
+
- Children_shouting
|
| 322 |
+
- Creak
|
| 323 |
+
- Hiccup
|
| 324 |
+
- Pigeon,_dove
|
| 325 |
+
- Bicycle_bell
|
| 326 |
+
- Baby_cry,_infant_cry
|
| 327 |
+
- Duck
|
| 328 |
+
- Fireworks
|
| 329 |
+
- Tambourine
|
| 330 |
+
- Rodents,_rats,_mice
|
| 331 |
+
- Buzzer
|
| 332 |
+
- Splinter
|
| 333 |
+
- Writing
|
| 334 |
+
- Goat
|
| 335 |
+
- Sheep
|
| 336 |
+
- Heavy_metal
|
| 337 |
+
- Ska
|
| 338 |
+
- Neigh,_whinny
|
| 339 |
+
- Sizzle
|
| 340 |
+
- Rowboat,_canoe,_kayak
|
| 341 |
+
- Wood_block
|
| 342 |
+
- Clang
|
| 343 |
+
- Door
|
| 344 |
+
- Female_singing
|
| 345 |
+
- Stream
|
| 346 |
+
- Chant
|
| 347 |
+
- Vocal_music
|
| 348 |
+
- Yodeling
|
| 349 |
+
- Bee,_wasp,_etc.
|
| 350 |
+
- Air_brake
|
| 351 |
+
- Whir
|
| 352 |
+
- Bird_flight,_flapping_wings
|
| 353 |
+
- French_horn
|
| 354 |
+
- Telephone_dialing,_DTMF
|
| 355 |
+
- Squeak
|
| 356 |
+
- Sitar
|
| 357 |
+
- Smoke_detector,_smoke_alarm
|
| 358 |
+
- Tick-tock
|
| 359 |
+
- Gurgling
|
| 360 |
+
- Bellow
|
| 361 |
+
- Harmonic
|
| 362 |
+
- Male_singing
|
| 363 |
+
- Giggle
|
| 364 |
+
- Bark
|
| 365 |
+
- Vibration
|
| 366 |
+
- Drill
|
| 367 |
+
- Skidding
|
| 368 |
+
- Scratch
|
| 369 |
+
- Drawer_open_or_close
|
| 370 |
+
- Chop
|
| 371 |
+
- Drum_machine
|
| 372 |
+
- Squish
|
| 373 |
+
- Toilet_flush
|
| 374 |
+
- Fart
|
| 375 |
+
- Basketball_bounce
|
| 376 |
+
- Electronic_tuner
|
| 377 |
+
- Singing_bowl
|
| 378 |
+
- Squawk
|
| 379 |
+
- Conversation
|
| 380 |
+
- Reggae
|
| 381 |
+
- Funny_music
|
| 382 |
+
- Scrape
|
| 383 |
+
- Sewing_machine
|
| 384 |
+
- Tender_music
|
| 385 |
+
- Swing_music
|
| 386 |
+
- Dishes,_pots,_and_pans
|
| 387 |
+
- Sampler
|
| 388 |
+
- Synthesizer
|
| 389 |
+
- Clapping
|
| 390 |
+
- Hubbub,_speech_noise,_speech_babble
|
| 391 |
+
- Engine_knocking
|
| 392 |
+
- Canidae,_dogs,_wolves
|
| 393 |
+
- Chainsaw
|
| 394 |
+
- Pour
|
| 395 |
+
- Croak
|
| 396 |
+
- Chewing,_mastication
|
| 397 |
+
- Cowbell
|
| 398 |
+
- Propeller,_airscrew
|
| 399 |
+
- Didgeridoo
|
| 400 |
+
- Ringtone
|
| 401 |
+
- Rattle_(instrument)
|
| 402 |
+
- Artillery_fire
|
| 403 |
+
- Cash_register
|
| 404 |
+
- Crack
|
| 405 |
+
- Growling
|
| 406 |
+
- Mosquito
|
| 407 |
+
- Carnatic_music
|
| 408 |
+
- Honk
|
| 409 |
+
- Howl
|
| 410 |
+
- Cacophony
|
| 411 |
+
- Gospel_music
|
| 412 |
+
- Firecracker
|
| 413 |
+
- Strum
|
| 414 |
+
- Motorboat,_speedboat
|
| 415 |
+
- Clock
|
| 416 |
+
- Dance_music
|
| 417 |
+
- Microwave_oven
|
| 418 |
+
- Country
|
| 419 |
+
- Bluegrass
|
| 420 |
+
- Rattle
|
| 421 |
+
- Mallet_percussion
|
| 422 |
+
- Computer_keyboard
|
| 423 |
+
- Bass_guitar
|
| 424 |
+
- Electric_shaver,_electric_razor
|
| 425 |
+
- Sawing
|
| 426 |
+
- Owl
|
| 427 |
+
- Whip
|
| 428 |
+
- White_noise
|
| 429 |
+
- Chirp_tone
|
| 430 |
+
- Boiling
|
| 431 |
+
- Ship
|
| 432 |
+
- Mouse
|
| 433 |
+
- Breaking
|
| 434 |
+
- Silence
|
| 435 |
+
- Throat_clearing
|
| 436 |
+
- Bleat
|
| 437 |
+
- Salsa_music
|
| 438 |
+
- Patter
|
| 439 |
+
- Vibraphone
|
| 440 |
+
- Flap
|
| 441 |
+
- Typewriter
|
| 442 |
+
- Change_ringing_(campanology)
|
| 443 |
+
- Trickle,_dribble
|
| 444 |
+
- Video_game_music
|
| 445 |
+
- Glass
|
| 446 |
+
- Dial_tone
|
| 447 |
+
- Radio
|
| 448 |
+
- Bell
|
| 449 |
+
- Moo
|
| 450 |
+
- Heart_murmur
|
| 451 |
+
- Clatter
|
| 452 |
+
- Sniff
|
| 453 |
+
- Double_bass
|
| 454 |
+
- Background_music
|
| 455 |
+
- Lawn_mower
|
| 456 |
+
- Printer
|
| 457 |
+
- House_music
|
| 458 |
+
- Tearing
|
| 459 |
+
- Angry_music
|
| 460 |
+
- Male_speech,_man_speaking
|
| 461 |
+
- Wild_animals
|
| 462 |
+
- Cupboard_open_or_close
|
| 463 |
+
- Harpsichord
|
| 464 |
+
- Light_engine_(high_frequency)
|
| 465 |
+
- Child_singing
|
| 466 |
+
- Zipper_(clothing)
|
| 467 |
+
- Jazz
|
| 468 |
+
- Belly_laugh
|
| 469 |
+
- Roar
|
| 470 |
+
- Motor_vehicle_(road)
|
| 471 |
+
- Crowing,_cock-a-doodle-doo
|
| 472 |
+
- Cluck
|
| 473 |
+
- Sad_music
|
| 474 |
+
- Hi-hat
|
| 475 |
+
- Cough
|
| 476 |
+
- Stomach_rumble
|
| 477 |
+
- Alarm
|
| 478 |
+
- String_section
|
| 479 |
+
- Sonar
|
| 480 |
+
- Keys_jangling
|
| 481 |
+
- Synthetic_singing
|
| 482 |
+
- Rapping
|
| 483 |
+
- Sidetone
|
| 484 |
+
- Orchestra
|
| 485 |
+
- Throbbing
|
| 486 |
+
- Whale_vocalization
|
| 487 |
+
- Thunk
|
| 488 |
+
- Children_playing
|
| 489 |
+
- Snake
|
| 490 |
+
- Chink,_clink
|
| 491 |
+
- Chirp,_tweet
|
| 492 |
+
- Boing
|
| 493 |
+
- Shuffle
|
| 494 |
+
- Pulse
|
| 495 |
+
- Punk_rock
|
| 496 |
+
- Crow
|
| 497 |
+
- Caw
|
| 498 |
+
- Static
|
| 499 |
+
- Clicking
|
| 500 |
+
- Snicker
|
| 501 |
+
- Whispering
|
| 502 |
+
- Pink_noise
|
| 503 |
+
- Crushing
|
| 504 |
+
- Wedding_music
|
| 505 |
+
- Crumpling,_crinkling
|
| 506 |
+
- Crackle
|
| 507 |
+
- Whoop
|
| 508 |
+
- Electric_toothbrush
|
| 509 |
+
- Train_wheels_squealing
|
| 510 |
+
- Yell
|
| 511 |
+
- Wind_chime
|
| 512 |
+
- Frying_(food)
|
| 513 |
+
- Christmas_music
|
| 514 |
+
- Fill_(with_liquid)
|
| 515 |
+
- Reverberation
|
| 516 |
+
- Beatboxing
|
| 517 |
+
- Harmonica
|
| 518 |
+
- Banjo
|
| 519 |
+
- Sliding_door
|
| 520 |
+
- Groan
|
| 521 |
+
- Bagpipes
|
| 522 |
+
- Spray
|
| 523 |
+
- Stir
|
| 524 |
+
- Acoustic_guitar
|
| 525 |
+
- Tap
|
| 526 |
+
- Chorus_effect
|
| 527 |
+
- Noise
|
| 528 |
+
- Crunch
|
| 529 |
+
- Biting
|
| 530 |
+
- Aircraft_engine
|
| 531 |
+
- Busy_signal
|
| 532 |
+
- Bang
|
| 533 |
+
- Techno
|
| 534 |
+
- Tuning_fork
|
| 535 |
+
- Tapping_(guitar_technique)
|
| 536 |
+
- Pig
|
| 537 |
+
- Maraca
|
| 538 |
+
- Vacuum_cleaner
|
| 539 |
+
- Mandolin
|
| 540 |
+
- Electronica
|
| 541 |
+
- Theme_music
|
| 542 |
+
- Yip
|
| 543 |
+
- A_capella
|
| 544 |
+
- Rustle
|
| 545 |
+
- Chatter
|
| 546 |
+
- Traditional_music
|
| 547 |
+
- Soul_music
|
| 548 |
+
- Rustling_leaves
|
| 549 |
+
- Afrobeat
|
| 550 |
+
- Hoot
|
| 551 |
+
- Slosh
|
| 552 |
+
- Roaring_cats_(lions,_tigers)
|
| 553 |
+
- Chopping_(food)
|
| 554 |
+
- Heavy_engine_(low_frequency)
|
| 555 |
+
- Sine_wave
|
| 556 |
+
- Speech_synthesizer
|
| 557 |
+
- Middle_Eastern_music
|
| 558 |
+
- Music_of_Latin_America
|
| 559 |
+
- Arrow
|
| 560 |
+
- Timpani
|
| 561 |
+
- Eruption
|
| 562 |
+
- Shofar
|
| 563 |
+
- Jingle_bell
|
| 564 |
+
- Humming
|
| 565 |
+
- Sanding
|
| 566 |
+
- Female_speech,_woman_speaking
|
| 567 |
+
- Gong
|
| 568 |
+
- Rain_on_surface
|
| 569 |
+
- Pant
|
| 570 |
+
- Dubstep
|
| 571 |
+
- Clip-clop
|
| 572 |
+
- Finger_snapping
|
| 573 |
+
- Blender
|
| 574 |
+
- Drum_and_bass
|
| 575 |
+
- Bouncing
|
| 576 |
+
- Vehicle_horn,_car_horn,_honking
|
| 577 |
+
- Slam
|
| 578 |
+
- Idling
|
| 579 |
+
- Rhythm_and_blues
|
| 580 |
+
- Race_car,_auto_racing
|
| 581 |
+
- Single-lens_reflex_camera
|
| 582 |
+
- Smash,_crash
|
| 583 |
+
- Purr
|
| 584 |
+
- Shatter
|
| 585 |
+
- Steelpan
|
| 586 |
+
- Whimper_(dog)
|
| 587 |
+
- Power_windows,_electric_windows
|
| 588 |
+
- Battle_cry
|
| 589 |
+
- Scary_music
|
| 590 |
+
- Hands
|
| 591 |
+
- Echo
|
| 592 |
+
- Truck
|
| 593 |
+
- Buzz
|
| 594 |
+
- Mechanical_fan
|
| 595 |
+
- Plop
|
| 596 |
+
- Run
|
| 597 |
+
- Gasp
|
| 598 |
+
- Psychedelic_rock
|
| 599 |
+
- Grunt
|
| 600 |
+
- Helicopter
|
| 601 |
+
- Dental_drill,_dentist's_drill
|
| 602 |
+
- Babbling
|
| 603 |
+
- Zing
|
| 604 |
+
- Oink
|
| 605 |
+
- Soundtrack_music
|
| 606 |
+
- Ambulance_(siren)
|
| 607 |
+
- Exciting_music
|
| 608 |
+
- Telephone
|
| 609 |
+
- Jingle_(music)
|
| 610 |
+
- Tubular_bells
|
| 611 |
+
- Burping,_eructation
|
| 612 |
+
- Baby_laughter
|
| 613 |
+
- Ping
|
| 614 |
+
- Bow-wow
|
| 615 |
+
- Foghorn
|
| 616 |
+
- Machine_gun
|
| 617 |
+
- Ukulele
|
| 618 |
+
- Telephone_bell_ringing
|
| 619 |
+
- Pulleys
|
| 620 |
+
- Gears
|
| 621 |
+
- Sigh
|
| 622 |
+
- Coin_(dropping)
|
| 623 |
+
- Music_of_Africa
|
| 624 |
+
- Scissors
|
| 625 |
+
- Inside,_public_space
|
| 626 |
+
- Trance_music
|
| 627 |
+
- Roll
|
| 628 |
+
- Thump,_thud
|
| 629 |
+
- Air_conditioning
|
| 630 |
+
- Ding-dong
|
| 631 |
+
- Ratchet,_pawl
|
| 632 |
+
- Hair_dryer
|
| 633 |
+
- Shout
|
| 634 |
+
- Ambient_music
|
| 635 |
+
- Music_for_children
|
| 636 |
+
- Toot
|
| 637 |
+
- Bathtub_(filling_or_washing)
|
| 638 |
+
- Slap,_smack
|
| 639 |
+
- Chuckle,_chortle
|
| 640 |
+
- Traffic_noise,_roadway_noise
|
| 641 |
+
- Bicycle
|
| 642 |
+
- Whimper
|
| 643 |
+
- Doorbell
|
| 644 |
+
- Wheeze
|
| 645 |
+
- Sailboat,_sailing_ship
|
| 646 |
+
- Cap_gun
|
| 647 |
+
- Wail,_moan
|
| 648 |
+
- Rock_and_roll
|
| 649 |
+
- Jingle,_tinkle
|
| 650 |
+
- Fire_engine,_fire_truck_(siren)
|
| 651 |
+
- Funk
|
| 652 |
+
- Lullaby
|
| 653 |
+
- Field_recording
|
| 654 |
+
- Skateboard
|
| 655 |
+
- Steam
|
| 656 |
+
- Rumble
|
| 657 |
+
- Medium_engine_(mid_frequency)
|
| 658 |
+
- Sound_effect
|
| 659 |
+
- Flamenco
|
| 660 |
+
- Shuffling_cards
|
| 661 |
+
- Subway,_metro,_underground
|
| 662 |
+
- Police_car_(siren)
|
| 663 |
+
- Folk_music
|
| 664 |
+
- Crying,_sobbing
|
| 665 |
+
- New-age_music
|
| 666 |
+
- Ice_cream_truck,_ice_cream_van
|
| 667 |
+
- Music_of_Bollywood
|
| 668 |
+
- Accelerating,_revving,_vroom
|
| 669 |
+
- Screaming
|
| 670 |
+
- Motorcycle
|
| 671 |
+
- Engine_starting
|
| 672 |
+
- Train_whistle
|
| 673 |
+
- Car_passing_by
|
| 674 |
+
- Bus
|
| 675 |
+
- Sneeze
|
| 676 |
+
- Train_horn
|
| 677 |
+
- Air_horn,_truck_horn
|
| 678 |
+
- Civil_defense_siren
|
| 679 |
+
- Car_alarm
|
| 680 |
+
- Reversing_beeps
|
| 681 |
+
- <blank>
|
| 682 |
+
- <unk>
|
| 683 |
+
text_token_list: null
|
| 684 |
+
text_bpemodel: null
|
| 685 |
+
init: xavier_normal
|
| 686 |
+
input_size: 1
|
| 687 |
+
use_preprocessor: true
|
| 688 |
+
frontend: null
|
| 689 |
+
frontend_conf: {}
|
| 690 |
+
specaug: null
|
| 691 |
+
specaug_conf: {}
|
| 692 |
+
normalize: null
|
| 693 |
+
normalize_conf: {}
|
| 694 |
+
preencoder: null
|
| 695 |
+
preencoder_conf: {}
|
| 696 |
+
encoder: beats
|
| 697 |
+
encoder_conf:
|
| 698 |
+
beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_base2.tune_lr5e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
|
| 699 |
+
beats_config:
|
| 700 |
+
layer_wise_gradient_decay_ratio: 0.3
|
| 701 |
+
encoder_layerdrop: 0.1
|
| 702 |
+
dropout: 0.0
|
| 703 |
+
use_weighted_representation: false
|
| 704 |
+
specaug_config:
|
| 705 |
+
apply_time_warp: true
|
| 706 |
+
apply_freq_mask: false
|
| 707 |
+
apply_time_mask: true
|
| 708 |
+
time_mask_width_ratio_range:
|
| 709 |
+
- 0
|
| 710 |
+
- 0.06
|
| 711 |
+
num_time_mask: 1
|
| 712 |
+
roll_augment: true
|
| 713 |
+
roll_interval: 1
|
| 714 |
+
text_encoder: null
|
| 715 |
+
text_encoder_conf: {}
|
| 716 |
+
embedding_fusion: null
|
| 717 |
+
embedding_fusion_conf: {}
|
| 718 |
+
decoder: linear
|
| 719 |
+
decoder_conf: {}
|
| 720 |
+
model: espnet
|
| 721 |
+
model_conf:
|
| 722 |
+
classification_type: multi-label
|
| 723 |
+
mixup_probability: 0.8
|
| 724 |
+
lsm_weight: 0.0
|
| 725 |
+
log_epoch_metrics: true
|
| 726 |
+
user_callbacks:
|
| 727 |
+
- mAP_logging
|
| 728 |
+
required:
|
| 729 |
+
- output_dir
|
| 730 |
+
- token_list
|
| 731 |
+
task: cls
|
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/lightning_logs/version_0/events.out.tfevents.1742492588.gh130.hsn.cm.delta.internal.ncsa.edu.3586759.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aca3b0dad08cf2f8f38bfd982441259e80d8b95377c492704e465a2e08c4ad2c
|
| 3 |
+
size 372371
|
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/lightning_logs/version_0/hparams.yaml
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
args: !!python/object:argparse.Namespace
|
| 2 |
+
accum_grad: 1
|
| 3 |
+
adapter: lora
|
| 4 |
+
adapter_conf: {}
|
| 5 |
+
allow_multi_rates: false
|
| 6 |
+
allow_variable_data_keys: false
|
| 7 |
+
batch_bins: 1000000
|
| 8 |
+
batch_size: 80
|
| 9 |
+
batch_type: folded
|
| 10 |
+
best_model_criterion:
|
| 11 |
+
- - valid
|
| 12 |
+
- epoch_mAP
|
| 13 |
+
- max
|
| 14 |
+
category_sample_size: 10
|
| 15 |
+
chunk_default_fs: null
|
| 16 |
+
chunk_discard_short_samples: true
|
| 17 |
+
chunk_excluded_key_prefixes: []
|
| 18 |
+
chunk_length: 500
|
| 19 |
+
chunk_max_abs_length: null
|
| 20 |
+
chunk_shift_ratio: 0.5
|
| 21 |
+
collect_stats: false
|
| 22 |
+
config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earbasei3/conf/ear_base/audioset20k.yaml
|
| 23 |
+
create_graph_in_tensorboard: false
|
| 24 |
+
cudnn_benchmark: false
|
| 25 |
+
cudnn_deterministic: true
|
| 26 |
+
cudnn_enabled: true
|
| 27 |
+
ddp_comm_hook: null
|
| 28 |
+
decoder: linear
|
| 29 |
+
decoder_conf: {}
|
| 30 |
+
deepspeed_config: null
|
| 31 |
+
detect_anomaly: false
|
| 32 |
+
dist_backend: nccl
|
| 33 |
+
dist_init_method: env://
|
| 34 |
+
dist_launcher: null
|
| 35 |
+
dist_master_addr: null
|
| 36 |
+
dist_master_port: null
|
| 37 |
+
dist_rank: null
|
| 38 |
+
dist_world_size: null
|
| 39 |
+
drop_last_iter: false
|
| 40 |
+
dry_run: false
|
| 41 |
+
early_stopping_criterion: !!python/tuple
|
| 42 |
+
- valid
|
| 43 |
+
- loss
|
| 44 |
+
- min
|
| 45 |
+
embedding_fusion: null
|
| 46 |
+
embedding_fusion_conf: {}
|
| 47 |
+
encoder: beats
|
| 48 |
+
encoder_conf:
|
| 49 |
+
beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_base2.tune_lr5e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
|
| 50 |
+
beats_config:
|
| 51 |
+
dropout: 0.0
|
| 52 |
+
encoder_layerdrop: 0.1
|
| 53 |
+
layer_wise_gradient_decay_ratio: 0.3
|
| 54 |
+
roll_augment: true
|
| 55 |
+
roll_interval: 1
|
| 56 |
+
specaug_config:
|
| 57 |
+
apply_freq_mask: false
|
| 58 |
+
apply_time_mask: true
|
| 59 |
+
apply_time_warp: true
|
| 60 |
+
num_time_mask: 1
|
| 61 |
+
time_mask_width_ratio_range:
|
| 62 |
+
- 0
|
| 63 |
+
- 0.06
|
| 64 |
+
use_weighted_representation: false
|
| 65 |
+
exclude_weight_decay: false
|
| 66 |
+
exclude_weight_decay_conf: {}
|
| 67 |
+
fold_length:
|
| 68 |
+
- 160000
|
| 69 |
+
- 600
|
| 70 |
+
freeze_param: []
|
| 71 |
+
frontend: null
|
| 72 |
+
frontend_conf:
|
| 73 |
+
fs: 16k
|
| 74 |
+
grad_clip: 1
|
| 75 |
+
grad_clip_type: 2.0
|
| 76 |
+
grad_noise: false
|
| 77 |
+
gradient_as_bucket_view: true
|
| 78 |
+
ignore_init_mismatch: false
|
| 79 |
+
init: xavier_normal
|
| 80 |
+
init_param: []
|
| 81 |
+
input_size: 1
|
| 82 |
+
iterator_type: sequence
|
| 83 |
+
keep_nbest_models: 1
|
| 84 |
+
lightning_conf:
|
| 85 |
+
best_model_criterion:
|
| 86 |
+
- - valid/epoch_mAP
|
| 87 |
+
- max
|
| 88 |
+
- 1
|
| 89 |
+
default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
|
| 90 |
+
devices: 1
|
| 91 |
+
log_every_n_steps: 250
|
| 92 |
+
max_epochs: 500
|
| 93 |
+
num_nodes: 1
|
| 94 |
+
strategy: ddp
|
| 95 |
+
strategy_conf:
|
| 96 |
+
find_unused_parameters: true
|
| 97 |
+
local_rank: null
|
| 98 |
+
log_interval: null
|
| 99 |
+
log_level: INFO
|
| 100 |
+
max_cache_fd: 32
|
| 101 |
+
max_cache_size: 0.0
|
| 102 |
+
max_epoch: 160
|
| 103 |
+
model: espnet
|
| 104 |
+
model_conf:
|
| 105 |
+
classification_type: multi-label
|
| 106 |
+
log_epoch_metrics: true
|
| 107 |
+
lsm_weight: 0.0
|
| 108 |
+
mixup_probability: 0.8
|
| 109 |
+
multi_task_dataset: false
|
| 110 |
+
multiple_iterator: false
|
| 111 |
+
multiprocessing_distributed: false
|
| 112 |
+
nbest_averaging_interval: 0
|
| 113 |
+
ngpu: 0
|
| 114 |
+
no_forward_run: false
|
| 115 |
+
normalize: null
|
| 116 |
+
normalize_conf: {}
|
| 117 |
+
num_att_plot: 0
|
| 118 |
+
num_cache_chunks: 1024
|
| 119 |
+
num_iters_per_epoch: null
|
| 120 |
+
num_workers: 2
|
| 121 |
+
optim: adamw
|
| 122 |
+
optim_conf:
|
| 123 |
+
betas:
|
| 124 |
+
- 0.9
|
| 125 |
+
- 0.98
|
| 126 |
+
lr: 3.0e-05
|
| 127 |
+
weight_decay: 0.01
|
| 128 |
+
output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
|
| 129 |
+
patience: null
|
| 130 |
+
preencoder: null
|
| 131 |
+
preencoder_conf: {}
|
| 132 |
+
pretrain_path: null
|
| 133 |
+
print_config: false
|
| 134 |
+
required:
|
| 135 |
+
- output_dir
|
| 136 |
+
- token_list
|
| 137 |
+
resume: true
|
| 138 |
+
save_strategy: all
|
| 139 |
+
scheduler: cosineannealingwarmuprestarts
|
| 140 |
+
scheduler_conf:
|
| 141 |
+
first_cycle_steps: 95000
|
| 142 |
+
max_lr: 3.0e-05
|
| 143 |
+
min_lr: 5.0e-06
|
| 144 |
+
warmup_steps: 8000
|
| 145 |
+
seed: 0
|
| 146 |
+
sharded_ddp: false
|
| 147 |
+
shuffle_within_batch: false
|
| 148 |
+
sort_batch: descending
|
| 149 |
+
sort_in_batch: descending
|
| 150 |
+
specaug: null
|
| 151 |
+
specaug_conf: {}
|
| 152 |
+
task: cls
|
| 153 |
+
text_bpemodel: null
|
| 154 |
+
text_encoder: null
|
| 155 |
+
text_encoder_conf: {}
|
| 156 |
+
text_token_list: null
|
| 157 |
+
token_list: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/audioset20k/token_list
|
| 158 |
+
train_data_path_and_name_and_type:
|
| 159 |
+
- !!python/tuple
|
| 160 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/wav.scp
|
| 161 |
+
- speech
|
| 162 |
+
- sound
|
| 163 |
+
- !!python/tuple
|
| 164 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/text
|
| 165 |
+
- label
|
| 166 |
+
- text
|
| 167 |
+
train_dtype: float32
|
| 168 |
+
train_shape_file:
|
| 169 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/speech_shape
|
| 170 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/label_shape
|
| 171 |
+
unused_parameters: true
|
| 172 |
+
use_adapter: false
|
| 173 |
+
use_amp: false
|
| 174 |
+
use_deepspeed: false
|
| 175 |
+
use_matplotlib: true
|
| 176 |
+
use_preprocessor: true
|
| 177 |
+
use_tensorboard: true
|
| 178 |
+
use_tf32: false
|
| 179 |
+
use_wandb: true
|
| 180 |
+
user_callbacks:
|
| 181 |
+
- mAP_logging
|
| 182 |
+
utt2weight_file: null
|
| 183 |
+
val_scheduler_criterion: !!python/tuple
|
| 184 |
+
- valid
|
| 185 |
+
- loss
|
| 186 |
+
valid_batch_bins: null
|
| 187 |
+
valid_batch_size: 1200
|
| 188 |
+
valid_batch_type: null
|
| 189 |
+
valid_data_path_and_name_and_type:
|
| 190 |
+
- !!python/tuple
|
| 191 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/wav.scp
|
| 192 |
+
- speech
|
| 193 |
+
- sound
|
| 194 |
+
- !!python/tuple
|
| 195 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/text
|
| 196 |
+
- label
|
| 197 |
+
- text
|
| 198 |
+
valid_iterator_type: null
|
| 199 |
+
valid_max_cache_size: null
|
| 200 |
+
valid_shape_file:
|
| 201 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/speech_shape
|
| 202 |
+
- /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/label_shape
|
| 203 |
+
wandb_entity: shikhar
|
| 204 |
+
wandb_id: null
|
| 205 |
+
wandb_model_log_interval: -1
|
| 206 |
+
wandb_name: audioset20k.earbasei3
|
| 207 |
+
wandb_project: audioverse
|
| 208 |
+
write_collected_feats: false
|
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/valid.epoch_mAP.ave_1best.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09cbf19b179115d8b0af485d8efe9bf889147393cc0a8ad2fa2d0506237147f2
|
| 3 |
+
size 363126298
|