|
|
--- |
|
|
tags: |
|
|
- espnet |
|
|
- audio |
|
|
- self-supervised-learning |
|
|
datasets: |
|
|
- as2m |
|
|
license: cc-by-4.0 |
|
|
--- |
|
|
|
|
|
## ESPnet2 SSL model |
|
|
|
|
|
### `shikhar7ssu/OpenBEATs-Large-i3` |
|
|
|
|
|
This model was trained by Shikhar Bharadwaj using as2m recipe in [espnet](https://github.com/espnet/espnet/). |
|
|
|
|
|
## SSL config |
|
|
|
|
|
<details><summary>expand</summary> |
|
|
|
|
|
``` |
|
|
config: conf/ear_large.yaml |
|
|
print_config: false |
|
|
log_level: INFO |
|
|
drop_last_iter: false |
|
|
dry_run: false |
|
|
iterator_type: sequence |
|
|
valid_iterator_type: null |
|
|
output_dir: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_large2.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000 |
|
|
ngpu: 1 |
|
|
seed: 0 |
|
|
num_workers: 4 |
|
|
num_att_plot: 0 |
|
|
dist_backend: nccl |
|
|
dist_init_method: env:// |
|
|
dist_world_size: 4 |
|
|
dist_rank: 0 |
|
|
local_rank: 0 |
|
|
dist_master_addr: localhost |
|
|
dist_master_port: 60649 |
|
|
dist_launcher: null |
|
|
multiprocessing_distributed: true |
|
|
unused_parameters: false |
|
|
sharded_ddp: false |
|
|
use_deepspeed: true |
|
|
deepspeed_config: ewogICJ0cmFpbl9taWNyb19iYXRjaF9zaXplX3Blcl9ncHUiOiAxLAogICJncmFkaWVudF9hY2N1bXVsYXRpb25fc3RlcHMiOiAxLAogICJncmFkaWVudF9jbGlwcGluZyI6IDEuMCwKICAiYmYxNiI6IHsKICAgICJlbmFibGVkIjogdHJ1ZQogIH0sCiAgIm9wdGltaXplciI6IHsKICAgICJ0eXBlIjogIkFkYW0iLAogICAgInBhcmFtcyI6IHsKICAgICAgImxyIjogMS4wZS00LAogICAgICAiYmV0YXMiOiBbMC45LCAwLjk4XSwKICAgICAgImVwcyI6IDFlLTEyLAogICAgICAid2VpZ2h0X2RlY2F5IjogMS4wZS0yLAogICAgICAiYWRhbV93X21vZGUiOiB0cnVlCiAgICB9CiAgfSwKICAic2NoZWR1bGVyIjogewogICAgInR5cGUiOiAiV2FybXVwRGVjYXlMUiIsCiAgICAicGFyYW1zIjogewogICAgICAid2FybXVwX3R5cGUiOiAibGluZWFyIiwKICAgICAgInRvdGFsX251bV9zdGVwcyI6IDQwMDAwMCwKICAgICAgIndhcm11cF9udW1fc3RlcHMiOiA0MDAwMCwKICAgICAgIndhcm11cF9tYXhfbHIiOiAxLjBlLTQsCiAgICAgICJ3YXJtdXBfbWluX2xyIjogMS4wZS02CiAgICB9CiAgfSwKICAid2FsbF9jbG9ja19icmVha2Rvd24iOiBmYWxzZSwKICAic3RlcHNfcGVyX3ByaW50IjogMzAwMAp9Cg== |
|
|
gradient_as_bucket_view: true |
|
|
ddp_comm_hook: null |
|
|
cudnn_enabled: true |
|
|
cudnn_benchmark: false |
|
|
cudnn_deterministic: true |
|
|
use_tf32: false |
|
|
collect_stats: false |
|
|
write_collected_feats: false |
|
|
max_epoch: 59 |
|
|
patience: null |
|
|
val_scheduler_criterion: |
|
|
- valid |
|
|
- loss |
|
|
early_stopping_criterion: |
|
|
- valid |
|
|
- loss |
|
|
- min |
|
|
best_model_criterion: |
|
|
- - train |
|
|
- loss |
|
|
- min |
|
|
- - valid |
|
|
- loss |
|
|
- min |
|
|
- - train |
|
|
- acc |
|
|
- max |
|
|
- - valid |
|
|
- acc |
|
|
- max |
|
|
keep_nbest_models: |
|
|
- 10 |
|
|
nbest_averaging_interval: 0 |
|
|
grad_clip: 5.0 |
|
|
grad_clip_type: 2.0 |
|
|
grad_noise: false |
|
|
accum_grad: 1 |
|
|
no_forward_run: false |
|
|
resume: true |
|
|
train_dtype: float32 |
|
|
use_amp: false |
|
|
log_interval: null |
|
|
use_matplotlib: true |
|
|
use_tensorboard: true |
|
|
create_graph_in_tensorboard: false |
|
|
use_wandb: true |
|
|
wandb_project: EARlarge.PT |
|
|
wandb_id: null |
|
|
wandb_entity: shikhar |
|
|
wandb_name: large2.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000 |
|
|
wandb_model_log_interval: -1 |
|
|
detect_anomaly: false |
|
|
use_adapter: false |
|
|
adapter: lora |
|
|
save_strategy: all |
|
|
adapter_conf: {} |
|
|
pretrain_path: null |
|
|
init_param: [] |
|
|
ignore_init_mismatch: false |
|
|
freeze_param: [] |
|
|
num_iters_per_epoch: null |
|
|
batch_size: 20 |
|
|
valid_batch_size: null |
|
|
batch_bins: 1600000 |
|
|
valid_batch_bins: null |
|
|
category_sample_size: 10 |
|
|
train_shape_file: |
|
|
- /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_stats_fbank/train/speech_shape |
|
|
- /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_stats_fbank/train/target_shape.word |
|
|
valid_shape_file: |
|
|
- /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_stats_fbank/valid/speech_shape |
|
|
- /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_stats_fbank/valid/target_shape.word |
|
|
batch_type: length |
|
|
valid_batch_type: null |
|
|
fold_length: |
|
|
- 160000 |
|
|
- 600 |
|
|
sort_in_batch: descending |
|
|
shuffle_within_batch: false |
|
|
sort_batch: descending |
|
|
multiple_iterator: false |
|
|
utt2weight_file: null |
|
|
chunk_length: 500 |
|
|
chunk_shift_ratio: 0.5 |
|
|
num_cache_chunks: 1024 |
|
|
chunk_excluded_key_prefixes: [] |
|
|
chunk_default_fs: null |
|
|
chunk_max_abs_length: null |
|
|
chunk_discard_short_samples: true |
|
|
train_data_path_and_name_and_type: |
|
|
- - /work/nvme/bbjs/sbharadwaj/7Msounds/dump/fbank/train/feats.scp |
|
|
- speech |
|
|
- kaldi_ark |
|
|
- - /work/nvme/bbjs/sbharadwaj/7Msounds/dump/fbank/train/target_iter2_tokenizer_large_100k_steps |
|
|
- target |
|
|
- text |
|
|
valid_data_path_and_name_and_type: |
|
|
- - /work/nvme/bbjs/sbharadwaj/7Msounds/dump/fbank/eval/feats.scp |
|
|
- speech |
|
|
- kaldi_ark |
|
|
- - /work/nvme/bbjs/sbharadwaj/7Msounds/dump/fbank/eval/target_iter2_tokenizer_large_100k_steps |
|
|
- target |
|
|
- text |
|
|
multi_task_dataset: false |
|
|
allow_variable_data_keys: false |
|
|
max_cache_size: 0.0 |
|
|
max_cache_fd: 32 |
|
|
allow_multi_rates: false |
|
|
valid_max_cache_size: null |
|
|
exclude_weight_decay: false |
|
|
exclude_weight_decay_conf: {} |
|
|
optim: adadelta |
|
|
optim_conf: {} |
|
|
scheduler: null |
|
|
scheduler_conf: {} |
|
|
lightning_conf: {} |
|
|
token_list: |
|
|
- <unk> |
|
|
- '0' |
|
|
- '1' |
|
|
- '2' |
|
|
- '3' |
|
|
- '4' |
|
|
- '5' |
|
|
- '6' |
|
|
- '7' |
|
|
- '8' |
|
|
- '9' |
|
|
- '10' |
|
|
- '11' |
|
|
- '12' |
|
|
- '13' |
|
|
- '14' |
|
|
- '15' |
|
|
- '16' |
|
|
- '17' |
|
|
- '18' |
|
|
- '19' |
|
|
- '20' |
|
|
- '21' |
|
|
- '22' |
|
|
- '23' |
|
|
- '24' |
|
|
- '25' |
|
|
- '26' |
|
|
- '27' |
|
|
- '28' |
|
|
- '29' |
|
|
- '30' |
|
|
- '31' |
|
|
- '32' |
|
|
- '33' |
|
|
- '34' |
|
|
- '35' |
|
|
- '36' |
|
|
- '37' |
|
|
- '38' |
|
|
- '39' |
|
|
- '40' |
|
|
- '41' |
|
|
- '42' |
|
|
- '43' |
|
|
- '44' |
|
|
- '45' |
|
|
- '46' |
|
|
- '47' |
|
|
- '48' |
|
|
- '49' |
|
|
- '50' |
|
|
- '51' |
|
|
- '52' |
|
|
- '53' |
|
|
- '54' |
|
|
- '55' |
|
|
- '56' |
|
|
- '57' |
|
|
- '58' |
|
|
- '59' |
|
|
- '60' |
|
|
- '61' |
|
|
- '62' |
|
|
- '63' |
|
|
- '64' |
|
|
- '65' |
|
|
- '66' |
|
|
- '67' |
|
|
- '68' |
|
|
- '69' |
|
|
- '70' |
|
|
- '71' |
|
|
- '72' |
|
|
- '73' |
|
|
- '74' |
|
|
- '75' |
|
|
- '76' |
|
|
- '77' |
|
|
- '78' |
|
|
- '79' |
|
|
- '80' |
|
|
- '81' |
|
|
- '82' |
|
|
- '83' |
|
|
- '84' |
|
|
- '85' |
|
|
- '86' |
|
|
- '87' |
|
|
- '88' |
|
|
- '89' |
|
|
- '90' |
|
|
- '91' |
|
|
- '92' |
|
|
- '93' |
|
|
- '94' |
|
|
- '95' |
|
|
- '96' |
|
|
- '97' |
|
|
- '98' |
|
|
- '99' |
|
|
- '100' |
|
|
- '101' |
|
|
- '102' |
|
|
- '103' |
|
|
- '104' |
|
|
- '105' |
|
|
- '106' |
|
|
- '107' |
|
|
- '108' |
|
|
- '109' |
|
|
- '110' |
|
|
- '111' |
|
|
- '112' |
|
|
- '113' |
|
|
- '114' |
|
|
- '115' |
|
|
- '116' |
|
|
- '117' |
|
|
- '118' |
|
|
- '119' |
|
|
- '120' |
|
|
- '121' |
|
|
- '122' |
|
|
- '123' |
|
|
- '124' |
|
|
- '125' |
|
|
- '126' |
|
|
- '127' |
|
|
- '128' |
|
|
- '129' |
|
|
- '130' |
|
|
- '131' |
|
|
- '132' |
|
|
- '133' |
|
|
- '134' |
|
|
- '135' |
|
|
- '136' |
|
|
- '137' |
|
|
- '138' |
|
|
- '139' |
|
|
- '140' |
|
|
- '141' |
|
|
- '142' |
|
|
- '143' |
|
|
- '144' |
|
|
- '145' |
|
|
- '146' |
|
|
- '147' |
|
|
- '148' |
|
|
- '149' |
|
|
- '150' |
|
|
- '151' |
|
|
- '152' |
|
|
- '153' |
|
|
- '154' |
|
|
- '155' |
|
|
- '156' |
|
|
- '157' |
|
|
- '158' |
|
|
- '159' |
|
|
- '160' |
|
|
- '161' |
|
|
- '162' |
|
|
- '163' |
|
|
- '164' |
|
|
- '165' |
|
|
- '166' |
|
|
- '167' |
|
|
- '168' |
|
|
- '169' |
|
|
- '170' |
|
|
- '171' |
|
|
- '172' |
|
|
- '173' |
|
|
- '174' |
|
|
- '175' |
|
|
- '176' |
|
|
- '177' |
|
|
- '178' |
|
|
- '179' |
|
|
- '180' |
|
|
- '181' |
|
|
- '182' |
|
|
- '183' |
|
|
- '184' |
|
|
- '185' |
|
|
- '186' |
|
|
- '187' |
|
|
- '188' |
|
|
- '189' |
|
|
- '190' |
|
|
- '191' |
|
|
- '192' |
|
|
- '193' |
|
|
- '194' |
|
|
- '195' |
|
|
- '196' |
|
|
- '197' |
|
|
- '198' |
|
|
- '199' |
|
|
- '200' |
|
|
- '201' |
|
|
- '202' |
|
|
- '203' |
|
|
- '204' |
|
|
- '205' |
|
|
- '206' |
|
|
- '207' |
|
|
- '208' |
|
|
- '209' |
|
|
- '210' |
|
|
- '211' |
|
|
- '212' |
|
|
- '213' |
|
|
- '214' |
|
|
- '215' |
|
|
- '216' |
|
|
- '217' |
|
|
- '218' |
|
|
- '219' |
|
|
- '220' |
|
|
- '221' |
|
|
- '222' |
|
|
- '223' |
|
|
- '224' |
|
|
- '225' |
|
|
- '226' |
|
|
- '227' |
|
|
- '228' |
|
|
- '229' |
|
|
- '230' |
|
|
- '231' |
|
|
- '232' |
|
|
- '233' |
|
|
- '234' |
|
|
- '235' |
|
|
- '236' |
|
|
- '237' |
|
|
- '238' |
|
|
- '239' |
|
|
- '240' |
|
|
- '241' |
|
|
- '242' |
|
|
- '243' |
|
|
- '244' |
|
|
- '245' |
|
|
- '246' |
|
|
- '247' |
|
|
- '248' |
|
|
- '249' |
|
|
- '250' |
|
|
- '251' |
|
|
- '252' |
|
|
- '253' |
|
|
- '254' |
|
|
- '255' |
|
|
- '256' |
|
|
- '257' |
|
|
- '258' |
|
|
- '259' |
|
|
- '260' |
|
|
- '261' |
|
|
- '262' |
|
|
- '263' |
|
|
- '264' |
|
|
- '265' |
|
|
- '266' |
|
|
- '267' |
|
|
- '268' |
|
|
- '269' |
|
|
- '270' |
|
|
- '271' |
|
|
- '272' |
|
|
- '273' |
|
|
- '274' |
|
|
- '275' |
|
|
- '276' |
|
|
- '277' |
|
|
- '278' |
|
|
- '279' |
|
|
- '280' |
|
|
- '281' |
|
|
- '282' |
|
|
- '283' |
|
|
- '284' |
|
|
- '285' |
|
|
- '286' |
|
|
- '287' |
|
|
- '288' |
|
|
- '289' |
|
|
- '290' |
|
|
- '291' |
|
|
- '292' |
|
|
- '293' |
|
|
- '294' |
|
|
- '295' |
|
|
- '296' |
|
|
- '297' |
|
|
- '298' |
|
|
- '299' |
|
|
- '300' |
|
|
- '301' |
|
|
- '302' |
|
|
- '303' |
|
|
- '304' |
|
|
- '305' |
|
|
- '306' |
|
|
- '307' |
|
|
- '308' |
|
|
- '309' |
|
|
- '310' |
|
|
- '311' |
|
|
- '312' |
|
|
- '313' |
|
|
- '314' |
|
|
- '315' |
|
|
- '316' |
|
|
- '317' |
|
|
- '318' |
|
|
- '319' |
|
|
- '320' |
|
|
- '321' |
|
|
- '322' |
|
|
- '323' |
|
|
- '324' |
|
|
- '325' |
|
|
- '326' |
|
|
- '327' |
|
|
- '328' |
|
|
- '329' |
|
|
- '330' |
|
|
- '331' |
|
|
- '332' |
|
|
- '333' |
|
|
- '334' |
|
|
- '335' |
|
|
- '336' |
|
|
- '337' |
|
|
- '338' |
|
|
- '339' |
|
|
- '340' |
|
|
- '341' |
|
|
- '342' |
|
|
- '343' |
|
|
- '344' |
|
|
- '345' |
|
|
- '346' |
|
|
- '347' |
|
|
- '348' |
|
|
- '349' |
|
|
- '350' |
|
|
- '351' |
|
|
- '352' |
|
|
- '353' |
|
|
- '354' |
|
|
- '355' |
|
|
- '356' |
|
|
- '357' |
|
|
- '358' |
|
|
- '359' |
|
|
- '360' |
|
|
- '361' |
|
|
- '362' |
|
|
- '363' |
|
|
- '364' |
|
|
- '365' |
|
|
- '366' |
|
|
- '367' |
|
|
- '368' |
|
|
- '369' |
|
|
- '370' |
|
|
- '371' |
|
|
- '372' |
|
|
- '373' |
|
|
- '374' |
|
|
- '375' |
|
|
- '376' |
|
|
- '377' |
|
|
- '378' |
|
|
- '379' |
|
|
- '380' |
|
|
- '381' |
|
|
- '382' |
|
|
- '383' |
|
|
- '384' |
|
|
- '385' |
|
|
- '386' |
|
|
- '387' |
|
|
- '388' |
|
|
- '389' |
|
|
- '390' |
|
|
- '391' |
|
|
- '392' |
|
|
- '393' |
|
|
- '394' |
|
|
- '395' |
|
|
- '396' |
|
|
- '397' |
|
|
- '398' |
|
|
- '399' |
|
|
- '400' |
|
|
- '401' |
|
|
- '402' |
|
|
- '403' |
|
|
- '404' |
|
|
- '405' |
|
|
- '406' |
|
|
- '407' |
|
|
- '408' |
|
|
- '409' |
|
|
- '410' |
|
|
- '411' |
|
|
- '412' |
|
|
- '413' |
|
|
- '414' |
|
|
- '415' |
|
|
- '416' |
|
|
- '417' |
|
|
- '418' |
|
|
- '419' |
|
|
- '420' |
|
|
- '421' |
|
|
- '422' |
|
|
- '423' |
|
|
- '424' |
|
|
- '425' |
|
|
- '426' |
|
|
- '427' |
|
|
- '428' |
|
|
- '429' |
|
|
- '430' |
|
|
- '431' |
|
|
- '432' |
|
|
- '433' |
|
|
- '434' |
|
|
- '435' |
|
|
- '436' |
|
|
- '437' |
|
|
- '438' |
|
|
- '439' |
|
|
- '440' |
|
|
- '441' |
|
|
- '442' |
|
|
- '443' |
|
|
- '444' |
|
|
- '445' |
|
|
- '446' |
|
|
- '447' |
|
|
- '448' |
|
|
- '449' |
|
|
- '450' |
|
|
- '451' |
|
|
- '452' |
|
|
- '453' |
|
|
- '454' |
|
|
- '455' |
|
|
- '456' |
|
|
- '457' |
|
|
- '458' |
|
|
- '459' |
|
|
- '460' |
|
|
- '461' |
|
|
- '462' |
|
|
- '463' |
|
|
- '464' |
|
|
- '465' |
|
|
- '466' |
|
|
- '467' |
|
|
- '468' |
|
|
- '469' |
|
|
- '470' |
|
|
- '471' |
|
|
- '472' |
|
|
- '473' |
|
|
- '474' |
|
|
- '475' |
|
|
- '476' |
|
|
- '477' |
|
|
- '478' |
|
|
- '479' |
|
|
- '480' |
|
|
- '481' |
|
|
- '482' |
|
|
- '483' |
|
|
- '484' |
|
|
- '485' |
|
|
- '486' |
|
|
- '487' |
|
|
- '488' |
|
|
- '489' |
|
|
- '490' |
|
|
- '491' |
|
|
- '492' |
|
|
- '493' |
|
|
- '494' |
|
|
- '495' |
|
|
- '496' |
|
|
- '497' |
|
|
- '498' |
|
|
- '499' |
|
|
- '500' |
|
|
- '501' |
|
|
- '502' |
|
|
- '503' |
|
|
- '504' |
|
|
- '505' |
|
|
- '506' |
|
|
- '507' |
|
|
- '508' |
|
|
- '509' |
|
|
- '510' |
|
|
- '511' |
|
|
- '512' |
|
|
- '513' |
|
|
- '514' |
|
|
- '515' |
|
|
- '516' |
|
|
- '517' |
|
|
- '518' |
|
|
- '519' |
|
|
- '520' |
|
|
- '521' |
|
|
- '522' |
|
|
- '523' |
|
|
- '524' |
|
|
- '525' |
|
|
- '526' |
|
|
- '527' |
|
|
- '528' |
|
|
- '529' |
|
|
- '530' |
|
|
- '531' |
|
|
- '532' |
|
|
- '533' |
|
|
- '534' |
|
|
- '535' |
|
|
- '536' |
|
|
- '537' |
|
|
- '538' |
|
|
- '539' |
|
|
- '540' |
|
|
- '541' |
|
|
- '542' |
|
|
- '543' |
|
|
- '544' |
|
|
- '545' |
|
|
- '546' |
|
|
- '547' |
|
|
- '548' |
|
|
- '549' |
|
|
- '550' |
|
|
- '551' |
|
|
- '552' |
|
|
- '553' |
|
|
- '554' |
|
|
- '555' |
|
|
- '556' |
|
|
- '557' |
|
|
- '558' |
|
|
- '559' |
|
|
- '560' |
|
|
- '561' |
|
|
- '562' |
|
|
- '563' |
|
|
- '564' |
|
|
- '565' |
|
|
- '566' |
|
|
- '567' |
|
|
- '568' |
|
|
- '569' |
|
|
- '570' |
|
|
- '571' |
|
|
- '572' |
|
|
- '573' |
|
|
- '574' |
|
|
- '575' |
|
|
- '576' |
|
|
- '577' |
|
|
- '578' |
|
|
- '579' |
|
|
- '580' |
|
|
- '581' |
|
|
- '582' |
|
|
- '583' |
|
|
- '584' |
|
|
- '585' |
|
|
- '586' |
|
|
- '587' |
|
|
- '588' |
|
|
- '589' |
|
|
- '590' |
|
|
- '591' |
|
|
- '592' |
|
|
- '593' |
|
|
- '594' |
|
|
- '595' |
|
|
- '596' |
|
|
- '597' |
|
|
- '598' |
|
|
- '599' |
|
|
- '600' |
|
|
- '601' |
|
|
- '602' |
|
|
- '603' |
|
|
- '604' |
|
|
- '605' |
|
|
- '606' |
|
|
- '607' |
|
|
- '608' |
|
|
- '609' |
|
|
- '610' |
|
|
- '611' |
|
|
- '612' |
|
|
- '613' |
|
|
- '614' |
|
|
- '615' |
|
|
- '616' |
|
|
- '617' |
|
|
- '618' |
|
|
- '619' |
|
|
- '620' |
|
|
- '621' |
|
|
- '622' |
|
|
- '623' |
|
|
- '624' |
|
|
- '625' |
|
|
- '626' |
|
|
- '627' |
|
|
- '628' |
|
|
- '629' |
|
|
- '630' |
|
|
- '631' |
|
|
- '632' |
|
|
- '633' |
|
|
- '634' |
|
|
- '635' |
|
|
- '636' |
|
|
- '637' |
|
|
- '638' |
|
|
- '639' |
|
|
- '640' |
|
|
- '641' |
|
|
- '642' |
|
|
- '643' |
|
|
- '644' |
|
|
- '645' |
|
|
- '646' |
|
|
- '647' |
|
|
- '648' |
|
|
- '649' |
|
|
- '650' |
|
|
- '651' |
|
|
- '652' |
|
|
- '653' |
|
|
- '654' |
|
|
- '655' |
|
|
- '656' |
|
|
- '657' |
|
|
- '658' |
|
|
- '659' |
|
|
- '660' |
|
|
- '661' |
|
|
- '662' |
|
|
- '663' |
|
|
- '664' |
|
|
- '665' |
|
|
- '666' |
|
|
- '667' |
|
|
- '668' |
|
|
- '669' |
|
|
- '670' |
|
|
- '671' |
|
|
- '672' |
|
|
- '673' |
|
|
- '674' |
|
|
- '675' |
|
|
- '676' |
|
|
- '677' |
|
|
- '678' |
|
|
- '679' |
|
|
- '680' |
|
|
- '681' |
|
|
- '682' |
|
|
- '683' |
|
|
- '684' |
|
|
- '685' |
|
|
- '686' |
|
|
- '687' |
|
|
- '688' |
|
|
- '689' |
|
|
- '690' |
|
|
- '691' |
|
|
- '692' |
|
|
- '693' |
|
|
- '694' |
|
|
- '695' |
|
|
- '696' |
|
|
- '697' |
|
|
- '698' |
|
|
- '699' |
|
|
- '700' |
|
|
- '701' |
|
|
- '702' |
|
|
- '703' |
|
|
- '704' |
|
|
- '705' |
|
|
- '706' |
|
|
- '707' |
|
|
- '708' |
|
|
- '709' |
|
|
- '710' |
|
|
- '711' |
|
|
- '712' |
|
|
- '713' |
|
|
- '714' |
|
|
- '715' |
|
|
- '716' |
|
|
- '717' |
|
|
- '718' |
|
|
- '719' |
|
|
- '720' |
|
|
- '721' |
|
|
- '722' |
|
|
- '723' |
|
|
- '724' |
|
|
- '725' |
|
|
- '726' |
|
|
- '727' |
|
|
- '728' |
|
|
- '729' |
|
|
- '730' |
|
|
- '731' |
|
|
- '732' |
|
|
- '733' |
|
|
- '734' |
|
|
- '735' |
|
|
- '736' |
|
|
- '737' |
|
|
- '738' |
|
|
- '739' |
|
|
- '740' |
|
|
- '741' |
|
|
- '742' |
|
|
- '743' |
|
|
- '744' |
|
|
- '745' |
|
|
- '746' |
|
|
- '747' |
|
|
- '748' |
|
|
- '749' |
|
|
- '750' |
|
|
- '751' |
|
|
- '752' |
|
|
- '753' |
|
|
- '754' |
|
|
- '755' |
|
|
- '756' |
|
|
- '757' |
|
|
- '758' |
|
|
- '759' |
|
|
- '760' |
|
|
- '761' |
|
|
- '762' |
|
|
- '763' |
|
|
- '764' |
|
|
- '765' |
|
|
- '766' |
|
|
- '767' |
|
|
- '768' |
|
|
- '769' |
|
|
- '770' |
|
|
- '771' |
|
|
- '772' |
|
|
- '773' |
|
|
- '774' |
|
|
- '775' |
|
|
- '776' |
|
|
- '777' |
|
|
- '778' |
|
|
- '779' |
|
|
- '780' |
|
|
- '781' |
|
|
- '782' |
|
|
- '783' |
|
|
- '784' |
|
|
- '785' |
|
|
- '786' |
|
|
- '787' |
|
|
- '788' |
|
|
- '789' |
|
|
- '790' |
|
|
- '791' |
|
|
- '792' |
|
|
- '793' |
|
|
- '794' |
|
|
- '795' |
|
|
- '796' |
|
|
- '797' |
|
|
- '798' |
|
|
- '799' |
|
|
- '800' |
|
|
- '801' |
|
|
- '802' |
|
|
- '803' |
|
|
- '804' |
|
|
- '805' |
|
|
- '806' |
|
|
- '807' |
|
|
- '808' |
|
|
- '809' |
|
|
- '810' |
|
|
- '811' |
|
|
- '812' |
|
|
- '813' |
|
|
- '814' |
|
|
- '815' |
|
|
- '816' |
|
|
- '817' |
|
|
- '818' |
|
|
- '819' |
|
|
- '820' |
|
|
- '821' |
|
|
- '822' |
|
|
- '823' |
|
|
- '824' |
|
|
- '825' |
|
|
- '826' |
|
|
- '827' |
|
|
- '828' |
|
|
- '829' |
|
|
- '830' |
|
|
- '831' |
|
|
- '832' |
|
|
- '833' |
|
|
- '834' |
|
|
- '835' |
|
|
- '836' |
|
|
- '837' |
|
|
- '838' |
|
|
- '839' |
|
|
- '840' |
|
|
- '841' |
|
|
- '842' |
|
|
- '843' |
|
|
- '844' |
|
|
- '845' |
|
|
- '846' |
|
|
- '847' |
|
|
- '848' |
|
|
- '849' |
|
|
- '850' |
|
|
- '851' |
|
|
- '852' |
|
|
- '853' |
|
|
- '854' |
|
|
- '855' |
|
|
- '856' |
|
|
- '857' |
|
|
- '858' |
|
|
- '859' |
|
|
- '860' |
|
|
- '861' |
|
|
- '862' |
|
|
- '863' |
|
|
- '864' |
|
|
- '865' |
|
|
- '866' |
|
|
- '867' |
|
|
- '868' |
|
|
- '869' |
|
|
- '870' |
|
|
- '871' |
|
|
- '872' |
|
|
- '873' |
|
|
- '874' |
|
|
- '875' |
|
|
- '876' |
|
|
- '877' |
|
|
- '878' |
|
|
- '879' |
|
|
- '880' |
|
|
- '881' |
|
|
- '882' |
|
|
- '883' |
|
|
- '884' |
|
|
- '885' |
|
|
- '886' |
|
|
- '887' |
|
|
- '888' |
|
|
- '889' |
|
|
- '890' |
|
|
- '891' |
|
|
- '892' |
|
|
- '893' |
|
|
- '894' |
|
|
- '895' |
|
|
- '896' |
|
|
- '897' |
|
|
- '898' |
|
|
- '899' |
|
|
- '900' |
|
|
- '901' |
|
|
- '902' |
|
|
- '903' |
|
|
- '904' |
|
|
- '905' |
|
|
- '906' |
|
|
- '907' |
|
|
- '908' |
|
|
- '909' |
|
|
- '910' |
|
|
- '911' |
|
|
- '912' |
|
|
- '913' |
|
|
- '914' |
|
|
- '915' |
|
|
- '916' |
|
|
- '917' |
|
|
- '918' |
|
|
- '919' |
|
|
- '920' |
|
|
- '921' |
|
|
- '922' |
|
|
- '923' |
|
|
- '924' |
|
|
- '925' |
|
|
- '926' |
|
|
- '927' |
|
|
- '928' |
|
|
- '929' |
|
|
- '930' |
|
|
- '931' |
|
|
- '932' |
|
|
- '933' |
|
|
- '934' |
|
|
- '935' |
|
|
- '936' |
|
|
- '937' |
|
|
- '938' |
|
|
- '939' |
|
|
- '940' |
|
|
- '941' |
|
|
- '942' |
|
|
- '943' |
|
|
- '944' |
|
|
- '945' |
|
|
- '946' |
|
|
- '947' |
|
|
- '948' |
|
|
- '949' |
|
|
- '950' |
|
|
- '951' |
|
|
- '952' |
|
|
- '953' |
|
|
- '954' |
|
|
- '955' |
|
|
- '956' |
|
|
- '957' |
|
|
- '958' |
|
|
- '959' |
|
|
- '960' |
|
|
- '961' |
|
|
- '962' |
|
|
- '963' |
|
|
- '964' |
|
|
- '965' |
|
|
- '966' |
|
|
- '967' |
|
|
- '968' |
|
|
- '969' |
|
|
- '970' |
|
|
- '971' |
|
|
- '972' |
|
|
- '973' |
|
|
- '974' |
|
|
- '975' |
|
|
- '976' |
|
|
- '977' |
|
|
- '978' |
|
|
- '979' |
|
|
- '980' |
|
|
- '981' |
|
|
- '982' |
|
|
- '983' |
|
|
- '984' |
|
|
- '985' |
|
|
- '986' |
|
|
- '987' |
|
|
- '988' |
|
|
- '989' |
|
|
- '990' |
|
|
- '991' |
|
|
- '992' |
|
|
- '993' |
|
|
- '994' |
|
|
- '995' |
|
|
- '996' |
|
|
- '997' |
|
|
- '998' |
|
|
- '999' |
|
|
- '1000' |
|
|
- '1001' |
|
|
- '1002' |
|
|
- '1003' |
|
|
- '1004' |
|
|
- '1005' |
|
|
- '1006' |
|
|
- '1007' |
|
|
- '1008' |
|
|
- '1009' |
|
|
- '1010' |
|
|
- '1011' |
|
|
- '1012' |
|
|
- '1013' |
|
|
- '1014' |
|
|
- '1015' |
|
|
- '1016' |
|
|
- '1017' |
|
|
- '1018' |
|
|
- '1019' |
|
|
- '1020' |
|
|
- '1021' |
|
|
- '1022' |
|
|
- '1023' |
|
|
init: null |
|
|
collate_fn_conf: {} |
|
|
use_preprocessor: true |
|
|
waveform_input: false |
|
|
encoder: beats |
|
|
encoder_conf: |
|
|
use_weighted_representation: false |
|
|
is_pretraining: true |
|
|
beats_config: |
|
|
layer_wise_gradient_decay_ratio: 1.0 |
|
|
encoder_layerdrop: 0.0 |
|
|
dropout: 0.1 |
|
|
codebook_vocab_size: 1024 |
|
|
fbank_mean: 15.2913 |
|
|
fbank_std: 5.90532 |
|
|
decoder_layers: 3 |
|
|
deep_norm: true |
|
|
use_flash_attn: false |
|
|
relative_position_embedding: true |
|
|
num_buckets: 320 |
|
|
max_distance: 800 |
|
|
gru_rel_pos: true |
|
|
encoder_layers: 24 |
|
|
encoder_embed_dim: 1024 |
|
|
encoder_ffn_embed_dim: 4096 |
|
|
encoder_attention_heads: 16 |
|
|
decoder_embed_dim: 1024 |
|
|
decoder_attention_heads: 16 |
|
|
model: beats |
|
|
model_conf: |
|
|
ignore_id: -2 |
|
|
label_smoothing: 0.1 |
|
|
waveform_input: false |
|
|
mixup_probability: 0.0 |
|
|
required: |
|
|
- output_dir |
|
|
- token_list |
|
|
version: '202412' |
|
|
distributed: true |
|
|
``` |
|
|
|
|
|
</details> |
|
|
|
|
|
### Citing ESPnet |
|
|
|
|
|
```BibTex |
|
|
@inproceedings{watanabe2018espnet, |
|
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
|
title={{ESPnet}: End-to-End Speech Processing Toolkit}, |
|
|
year={2018}, |
|
|
booktitle={Proceedings of Interspeech}, |
|
|
pages={2207--2211}, |
|
|
doi={10.21437/Interspeech.2018-1456}, |
|
|
url={http://dx.doi.org/10.21437/Interspeech.2018-1456} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
|
|
or arXiv: |
|
|
|
|
|
```bibtex |
|
|
@misc{watanabe2018espnet, |
|
|
title={ESPnet: End-to-End Speech Processing Toolkit}, |
|
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
|
year={2018}, |
|
|
eprint={1804.00015}, |
|
|
archivePrefix={arXiv}, |
|
|
primaryClass={cs.CL} |
|
|
} |
|
|
``` |
|
|
|