Shikhar Bharadwaj commited on
Commit
814591e
·
1 Parent(s): de7db91

Update model

Browse files
README.md ADDED
@@ -0,0 +1,783 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - classification
6
+ datasets:
7
+ - as20k
8
+ license: cc-by-4.0
9
+ ---
10
+
11
+ ## ESPnet2 CLS model
12
+
13
+ ### `espnet/OpenBEATS-Base-i3-as20k`
14
+
15
+ This model was trained by Shikhar Bharadwaj using as20k recipe in [espnet](https://github.com/espnet/espnet/).
16
+
17
+ ## CLS config
18
+
19
+ <details><summary>expand</summary>
20
+
21
+ ```
22
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earbasei3/conf/ear_base/audioset20k.yaml
23
+ print_config: false
24
+ log_level: INFO
25
+ drop_last_iter: false
26
+ dry_run: false
27
+ iterator_type: sequence
28
+ valid_iterator_type: null
29
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
30
+ ngpu: 0
31
+ seed: 0
32
+ num_workers: 2
33
+ num_att_plot: 0
34
+ dist_backend: nccl
35
+ dist_init_method: env://
36
+ dist_world_size: null
37
+ dist_rank: null
38
+ local_rank: null
39
+ dist_master_addr: null
40
+ dist_master_port: null
41
+ dist_launcher: null
42
+ multiprocessing_distributed: false
43
+ unused_parameters: true
44
+ sharded_ddp: false
45
+ use_deepspeed: false
46
+ deepspeed_config: null
47
+ gradient_as_bucket_view: true
48
+ ddp_comm_hook: null
49
+ cudnn_enabled: true
50
+ cudnn_benchmark: false
51
+ cudnn_deterministic: true
52
+ use_tf32: false
53
+ collect_stats: false
54
+ write_collected_feats: false
55
+ max_epoch: 160
56
+ patience: null
57
+ val_scheduler_criterion:
58
+ - valid
59
+ - loss
60
+ early_stopping_criterion:
61
+ - valid
62
+ - loss
63
+ - min
64
+ best_model_criterion:
65
+ - - valid
66
+ - epoch_mAP
67
+ - max
68
+ keep_nbest_models: 1
69
+ nbest_averaging_interval: 0
70
+ grad_clip: 1
71
+ grad_clip_type: 2.0
72
+ grad_noise: false
73
+ accum_grad: 1
74
+ no_forward_run: false
75
+ resume: true
76
+ train_dtype: float32
77
+ use_amp: false
78
+ log_interval: null
79
+ use_matplotlib: true
80
+ use_tensorboard: true
81
+ create_graph_in_tensorboard: false
82
+ use_wandb: true
83
+ wandb_project: audioverse
84
+ wandb_id: null
85
+ wandb_entity: shikhar
86
+ wandb_name: audioset20k.earbasei3
87
+ wandb_model_log_interval: -1
88
+ detect_anomaly: false
89
+ use_adapter: false
90
+ adapter: lora
91
+ save_strategy: all
92
+ adapter_conf: {}
93
+ pretrain_path: null
94
+ init_param: []
95
+ ignore_init_mismatch: false
96
+ freeze_param: []
97
+ num_iters_per_epoch: null
98
+ batch_size: 80
99
+ valid_batch_size: 1200
100
+ batch_bins: 1000000
101
+ valid_batch_bins: null
102
+ category_sample_size: 10
103
+ train_shape_file:
104
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/speech_shape
105
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/label_shape
106
+ valid_shape_file:
107
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/speech_shape
108
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/label_shape
109
+ batch_type: folded
110
+ valid_batch_type: null
111
+ fold_length:
112
+ - 160000
113
+ - 600
114
+ sort_in_batch: descending
115
+ shuffle_within_batch: false
116
+ sort_batch: descending
117
+ multiple_iterator: false
118
+ utt2weight_file: null
119
+ chunk_length: 500
120
+ chunk_shift_ratio: 0.5
121
+ num_cache_chunks: 1024
122
+ chunk_excluded_key_prefixes: []
123
+ chunk_default_fs: null
124
+ chunk_max_abs_length: null
125
+ chunk_discard_short_samples: true
126
+ train_data_path_and_name_and_type:
127
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/wav.scp
128
+ - speech
129
+ - sound
130
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/text
131
+ - label
132
+ - text
133
+ valid_data_path_and_name_and_type:
134
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/wav.scp
135
+ - speech
136
+ - sound
137
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/text
138
+ - label
139
+ - text
140
+ multi_task_dataset: false
141
+ allow_variable_data_keys: false
142
+ max_cache_size: 0.0
143
+ max_cache_fd: 32
144
+ allow_multi_rates: false
145
+ valid_max_cache_size: null
146
+ exclude_weight_decay: false
147
+ exclude_weight_decay_conf: {}
148
+ optim: adamw
149
+ optim_conf:
150
+ lr: 3.0e-05
151
+ weight_decay: 0.01
152
+ betas:
153
+ - 0.9
154
+ - 0.98
155
+ scheduler: cosineannealingwarmuprestarts
156
+ scheduler_conf:
157
+ first_cycle_steps: 95000
158
+ warmup_steps: 8000
159
+ max_lr: 3.0e-05
160
+ min_lr: 5.0e-06
161
+ lightning_conf:
162
+ log_every_n_steps: 250
163
+ max_epochs: 500
164
+ strategy: ddp
165
+ strategy_conf:
166
+ find_unused_parameters: true
167
+ best_model_criterion:
168
+ - - valid/epoch_mAP
169
+ - max
170
+ - 1
171
+ devices: 1
172
+ num_nodes: 1
173
+ default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
174
+ token_list:
175
+ - Music
176
+ - Speech
177
+ - Vehicle
178
+ - Inside,_small_room
179
+ - Animal
180
+ - Musical_instrument
181
+ - Singing
182
+ - Domestic_animals,_pets
183
+ - Guitar
184
+ - Plucked_string_instrument
185
+ - Water
186
+ - Car
187
+ - Dog
188
+ - Percussion
189
+ - Wind_instrument,_woodwind_instrument
190
+ - Outside,_urban_or_manmade
191
+ - Outside,_rural_or_natural
192
+ - Boat,_Water_vehicle
193
+ - Brass_instrument
194
+ - Fowl
195
+ - Drum
196
+ - Siren
197
+ - Engine
198
+ - Bird
199
+ - Insect
200
+ - Gunshot,_gunfire
201
+ - Wood
202
+ - Rail_transport
203
+ - Train
204
+ - Wind
205
+ - Inside,_large_room_or_hall
206
+ - Railroad_car,_train_wagon
207
+ - Child_speech,_kid_speaking
208
+ - Crowd
209
+ - Rub
210
+ - Keyboard_(musical)
211
+ - Wind_noise_(microphone)
212
+ - Pizzicato
213
+ - Emergency_vehicle
214
+ - Bird_vocalization,_bird_call,_bird_song
215
+ - Livestock,_farm_animals,_working_animals
216
+ - Cat
217
+ - Organ
218
+ - Fly,_housefly
219
+ - Mechanisms
220
+ - Bowed_string_instrument
221
+ - Rain
222
+ - Laughter
223
+ - Aircraft
224
+ - Electronic_music
225
+ - Effects_unit
226
+ - Hum
227
+ - Tools
228
+ - Drum_kit
229
+ - Snare_drum
230
+ - Hiss
231
+ - Piano
232
+ - Water_tap,_faucet
233
+ - Rimshot
234
+ - Bass_drum
235
+ - Chicken,_rooster
236
+ - Marimba,_xylophone
237
+ - Horse
238
+ - Song
239
+ - Quack
240
+ - Power_tool
241
+ - Heart_sounds,_heartbeat
242
+ - Goose
243
+ - Hammond_organ
244
+ - Rock_music
245
+ - Ocean
246
+ - Mains_hum
247
+ - Thunder
248
+ - Chime
249
+ - Electronic_dance_music
250
+ - Typing
251
+ - Sink_(filling_or_washing)
252
+ - Raindrop
253
+ - Cello
254
+ - Electric_guitar
255
+ - Cheering
256
+ - Church_bell
257
+ - Christian_music
258
+ - Drum_roll
259
+ - Trombone
260
+ - Glockenspiel
261
+ - Trumpet
262
+ - Cymbal
263
+ - Tabla
264
+ - Clickety-clack
265
+ - Cricket
266
+ - Steam_whistle
267
+ - Explosion
268
+ - Saxophone
269
+ - Thunderstorm
270
+ - Pop_music
271
+ - Zither
272
+ - Applause
273
+ - Choir
274
+ - Whack,_thwack
275
+ - Clarinet
276
+ - Camera
277
+ - Electric_piano
278
+ - Independent_music
279
+ - Fire
280
+ - Frog
281
+ - Jet_engine
282
+ - Music_of_Asia
283
+ - Ding
284
+ - Waves,_surf
285
+ - Cattle,_bovinae
286
+ - Turkey
287
+ - Television
288
+ - Coo
289
+ - Scratching_(performance_technique)
290
+ - Flute
291
+ - Liquid
292
+ - Harp
293
+ - Progressive_rock
294
+ - Happy_music
295
+ - Steel_guitar,_slide_guitar
296
+ - Whoosh,_swoosh,_swish
297
+ - Boom
298
+ - Breathing
299
+ - Electronic_organ
300
+ - Environmental_noise
301
+ - Distortion
302
+ - Alarm_clock
303
+ - Fixed-wing_aircraft,_airplane
304
+ - Violin,_fiddle
305
+ - Whistling
306
+ - Accordion
307
+ - Disco
308
+ - Pump_(liquid)
309
+ - Waterfall
310
+ - Beep,_bleep
311
+ - Blues
312
+ - Grunge
313
+ - Hip_hop_music
314
+ - Whistle
315
+ - Fusillade
316
+ - Splash,_splatter
317
+ - Gush
318
+ - Toothbrush
319
+ - Knock
320
+ - Gargling
321
+ - Snoring
322
+ - Hammer
323
+ - Gobble
324
+ - Walk,_footsteps
325
+ - Jackhammer
326
+ - Filing_(rasp)
327
+ - Snort
328
+ - Narration,_monologue
329
+ - Tire_squeal
330
+ - Fire_alarm
331
+ - Squeal
332
+ - Meow
333
+ - Caterwaul
334
+ - Cutlery,_silverware
335
+ - Mantra
336
+ - Opera
337
+ - Classical_music
338
+ - Theremin
339
+ - Burst,_pop
340
+ - Drip
341
+ - Tick
342
+ - Children_shouting
343
+ - Creak
344
+ - Hiccup
345
+ - Pigeon,_dove
346
+ - Bicycle_bell
347
+ - Baby_cry,_infant_cry
348
+ - Duck
349
+ - Fireworks
350
+ - Tambourine
351
+ - Rodents,_rats,_mice
352
+ - Buzzer
353
+ - Splinter
354
+ - Writing
355
+ - Goat
356
+ - Sheep
357
+ - Heavy_metal
358
+ - Ska
359
+ - Neigh,_whinny
360
+ - Sizzle
361
+ - Rowboat,_canoe,_kayak
362
+ - Wood_block
363
+ - Clang
364
+ - Door
365
+ - Female_singing
366
+ - Stream
367
+ - Chant
368
+ - Vocal_music
369
+ - Yodeling
370
+ - Bee,_wasp,_etc.
371
+ - Air_brake
372
+ - Whir
373
+ - Bird_flight,_flapping_wings
374
+ - French_horn
375
+ - Telephone_dialing,_DTMF
376
+ - Squeak
377
+ - Sitar
378
+ - Smoke_detector,_smoke_alarm
379
+ - Tick-tock
380
+ - Gurgling
381
+ - Bellow
382
+ - Harmonic
383
+ - Male_singing
384
+ - Giggle
385
+ - Bark
386
+ - Vibration
387
+ - Drill
388
+ - Skidding
389
+ - Scratch
390
+ - Drawer_open_or_close
391
+ - Chop
392
+ - Drum_machine
393
+ - Squish
394
+ - Toilet_flush
395
+ - Fart
396
+ - Basketball_bounce
397
+ - Electronic_tuner
398
+ - Singing_bowl
399
+ - Squawk
400
+ - Conversation
401
+ - Reggae
402
+ - Funny_music
403
+ - Scrape
404
+ - Sewing_machine
405
+ - Tender_music
406
+ - Swing_music
407
+ - Dishes,_pots,_and_pans
408
+ - Sampler
409
+ - Synthesizer
410
+ - Clapping
411
+ - Hubbub,_speech_noise,_speech_babble
412
+ - Engine_knocking
413
+ - Canidae,_dogs,_wolves
414
+ - Chainsaw
415
+ - Pour
416
+ - Croak
417
+ - Chewing,_mastication
418
+ - Cowbell
419
+ - Propeller,_airscrew
420
+ - Didgeridoo
421
+ - Ringtone
422
+ - Rattle_(instrument)
423
+ - Artillery_fire
424
+ - Cash_register
425
+ - Crack
426
+ - Growling
427
+ - Mosquito
428
+ - Carnatic_music
429
+ - Honk
430
+ - Howl
431
+ - Cacophony
432
+ - Gospel_music
433
+ - Firecracker
434
+ - Strum
435
+ - Motorboat,_speedboat
436
+ - Clock
437
+ - Dance_music
438
+ - Microwave_oven
439
+ - Country
440
+ - Bluegrass
441
+ - Rattle
442
+ - Mallet_percussion
443
+ - Computer_keyboard
444
+ - Bass_guitar
445
+ - Electric_shaver,_electric_razor
446
+ - Sawing
447
+ - Owl
448
+ - Whip
449
+ - White_noise
450
+ - Chirp_tone
451
+ - Boiling
452
+ - Ship
453
+ - Mouse
454
+ - Breaking
455
+ - Silence
456
+ - Throat_clearing
457
+ - Bleat
458
+ - Salsa_music
459
+ - Patter
460
+ - Vibraphone
461
+ - Flap
462
+ - Typewriter
463
+ - Change_ringing_(campanology)
464
+ - Trickle,_dribble
465
+ - Video_game_music
466
+ - Glass
467
+ - Dial_tone
468
+ - Radio
469
+ - Bell
470
+ - Moo
471
+ - Heart_murmur
472
+ - Clatter
473
+ - Sniff
474
+ - Double_bass
475
+ - Background_music
476
+ - Lawn_mower
477
+ - Printer
478
+ - House_music
479
+ - Tearing
480
+ - Angry_music
481
+ - Male_speech,_man_speaking
482
+ - Wild_animals
483
+ - Cupboard_open_or_close
484
+ - Harpsichord
485
+ - Light_engine_(high_frequency)
486
+ - Child_singing
487
+ - Zipper_(clothing)
488
+ - Jazz
489
+ - Belly_laugh
490
+ - Roar
491
+ - Motor_vehicle_(road)
492
+ - Crowing,_cock-a-doodle-doo
493
+ - Cluck
494
+ - Sad_music
495
+ - Hi-hat
496
+ - Cough
497
+ - Stomach_rumble
498
+ - Alarm
499
+ - String_section
500
+ - Sonar
501
+ - Keys_jangling
502
+ - Synthetic_singing
503
+ - Rapping
504
+ - Sidetone
505
+ - Orchestra
506
+ - Throbbing
507
+ - Whale_vocalization
508
+ - Thunk
509
+ - Children_playing
510
+ - Snake
511
+ - Chink,_clink
512
+ - Chirp,_tweet
513
+ - Boing
514
+ - Shuffle
515
+ - Pulse
516
+ - Punk_rock
517
+ - Crow
518
+ - Caw
519
+ - Static
520
+ - Clicking
521
+ - Snicker
522
+ - Whispering
523
+ - Pink_noise
524
+ - Crushing
525
+ - Wedding_music
526
+ - Crumpling,_crinkling
527
+ - Crackle
528
+ - Whoop
529
+ - Electric_toothbrush
530
+ - Train_wheels_squealing
531
+ - Yell
532
+ - Wind_chime
533
+ - Frying_(food)
534
+ - Christmas_music
535
+ - Fill_(with_liquid)
536
+ - Reverberation
537
+ - Beatboxing
538
+ - Harmonica
539
+ - Banjo
540
+ - Sliding_door
541
+ - Groan
542
+ - Bagpipes
543
+ - Spray
544
+ - Stir
545
+ - Acoustic_guitar
546
+ - Tap
547
+ - Chorus_effect
548
+ - Noise
549
+ - Crunch
550
+ - Biting
551
+ - Aircraft_engine
552
+ - Busy_signal
553
+ - Bang
554
+ - Techno
555
+ - Tuning_fork
556
+ - Tapping_(guitar_technique)
557
+ - Pig
558
+ - Maraca
559
+ - Vacuum_cleaner
560
+ - Mandolin
561
+ - Electronica
562
+ - Theme_music
563
+ - Yip
564
+ - A_capella
565
+ - Rustle
566
+ - Chatter
567
+ - Traditional_music
568
+ - Soul_music
569
+ - Rustling_leaves
570
+ - Afrobeat
571
+ - Hoot
572
+ - Slosh
573
+ - Roaring_cats_(lions,_tigers)
574
+ - Chopping_(food)
575
+ - Heavy_engine_(low_frequency)
576
+ - Sine_wave
577
+ - Speech_synthesizer
578
+ - Middle_Eastern_music
579
+ - Music_of_Latin_America
580
+ - Arrow
581
+ - Timpani
582
+ - Eruption
583
+ - Shofar
584
+ - Jingle_bell
585
+ - Humming
586
+ - Sanding
587
+ - Female_speech,_woman_speaking
588
+ - Gong
589
+ - Rain_on_surface
590
+ - Pant
591
+ - Dubstep
592
+ - Clip-clop
593
+ - Finger_snapping
594
+ - Blender
595
+ - Drum_and_bass
596
+ - Bouncing
597
+ - Vehicle_horn,_car_horn,_honking
598
+ - Slam
599
+ - Idling
600
+ - Rhythm_and_blues
601
+ - Race_car,_auto_racing
602
+ - Single-lens_reflex_camera
603
+ - Smash,_crash
604
+ - Purr
605
+ - Shatter
606
+ - Steelpan
607
+ - Whimper_(dog)
608
+ - Power_windows,_electric_windows
609
+ - Battle_cry
610
+ - Scary_music
611
+ - Hands
612
+ - Echo
613
+ - Truck
614
+ - Buzz
615
+ - Mechanical_fan
616
+ - Plop
617
+ - Run
618
+ - Gasp
619
+ - Psychedelic_rock
620
+ - Grunt
621
+ - Helicopter
622
+ - Dental_drill,_dentist's_drill
623
+ - Babbling
624
+ - Zing
625
+ - Oink
626
+ - Soundtrack_music
627
+ - Ambulance_(siren)
628
+ - Exciting_music
629
+ - Telephone
630
+ - Jingle_(music)
631
+ - Tubular_bells
632
+ - Burping,_eructation
633
+ - Baby_laughter
634
+ - Ping
635
+ - Bow-wow
636
+ - Foghorn
637
+ - Machine_gun
638
+ - Ukulele
639
+ - Telephone_bell_ringing
640
+ - Pulleys
641
+ - Gears
642
+ - Sigh
643
+ - Coin_(dropping)
644
+ - Music_of_Africa
645
+ - Scissors
646
+ - Inside,_public_space
647
+ - Trance_music
648
+ - Roll
649
+ - Thump,_thud
650
+ - Air_conditioning
651
+ - Ding-dong
652
+ - Ratchet,_pawl
653
+ - Hair_dryer
654
+ - Shout
655
+ - Ambient_music
656
+ - Music_for_children
657
+ - Toot
658
+ - Bathtub_(filling_or_washing)
659
+ - Slap,_smack
660
+ - Chuckle,_chortle
661
+ - Traffic_noise,_roadway_noise
662
+ - Bicycle
663
+ - Whimper
664
+ - Doorbell
665
+ - Wheeze
666
+ - Sailboat,_sailing_ship
667
+ - Cap_gun
668
+ - Wail,_moan
669
+ - Rock_and_roll
670
+ - Jingle,_tinkle
671
+ - Fire_engine,_fire_truck_(siren)
672
+ - Funk
673
+ - Lullaby
674
+ - Field_recording
675
+ - Skateboard
676
+ - Steam
677
+ - Rumble
678
+ - Medium_engine_(mid_frequency)
679
+ - Sound_effect
680
+ - Flamenco
681
+ - Shuffling_cards
682
+ - Subway,_metro,_underground
683
+ - Police_car_(siren)
684
+ - Folk_music
685
+ - Crying,_sobbing
686
+ - New-age_music
687
+ - Ice_cream_truck,_ice_cream_van
688
+ - Music_of_Bollywood
689
+ - Accelerating,_revving,_vroom
690
+ - Screaming
691
+ - Motorcycle
692
+ - Engine_starting
693
+ - Train_whistle
694
+ - Car_passing_by
695
+ - Bus
696
+ - Sneeze
697
+ - Train_horn
698
+ - Air_horn,_truck_horn
699
+ - Civil_defense_siren
700
+ - Car_alarm
701
+ - Reversing_beeps
702
+ - <blank>
703
+ - <unk>
704
+ text_token_list: null
705
+ text_bpemodel: null
706
+ init: xavier_normal
707
+ input_size: 1
708
+ use_preprocessor: true
709
+ frontend: null
710
+ frontend_conf: {}
711
+ specaug: null
712
+ specaug_conf: {}
713
+ normalize: null
714
+ normalize_conf: {}
715
+ preencoder: null
716
+ preencoder_conf: {}
717
+ encoder: beats
718
+ encoder_conf:
719
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_base2.tune_lr5e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
720
+ beats_config:
721
+ layer_wise_gradient_decay_ratio: 0.3
722
+ encoder_layerdrop: 0.1
723
+ dropout: 0.0
724
+ use_weighted_representation: false
725
+ specaug_config:
726
+ apply_time_warp: true
727
+ apply_freq_mask: false
728
+ apply_time_mask: true
729
+ time_mask_width_ratio_range:
730
+ - 0
731
+ - 0.06
732
+ num_time_mask: 1
733
+ roll_augment: true
734
+ roll_interval: 1
735
+ text_encoder: null
736
+ text_encoder_conf: {}
737
+ embedding_fusion: null
738
+ embedding_fusion_conf: {}
739
+ decoder: linear
740
+ decoder_conf: {}
741
+ model: espnet
742
+ model_conf:
743
+ classification_type: multi-label
744
+ mixup_probability: 0.8
745
+ lsm_weight: 0.0
746
+ log_epoch_metrics: true
747
+ user_callbacks:
748
+ - mAP_logging
749
+ required:
750
+ - output_dir
751
+ - token_list
752
+ task: cls
753
+ ```
754
+
755
+ </details>
756
+
757
+ ### Citations
758
+
759
+ ```BibTex
760
+
761
+ @article{bharadwaj2025openbeats,
762
+ title={OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder},
763
+ author={Bharadwaj, Shikhar and Cornell, Samuele and Choi, Kwanghee and Fukayama, Satoru and Shim, Hye-jin and Deshmukh, Soham and Watanabe, Shinji},
764
+ journal={arXiv preprint arXiv:2507.14129},
765
+ year={2025}
766
+ }
767
+
768
+ @inproceedings{watanabe2018espnet,
769
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
770
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
771
+ year={2018},
772
+ booktitle={Proceedings of Interspeech},
773
+ pages={2207--2211},
774
+ doi={10.21437/Interspeech.2018-1456},
775
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
776
+ }
777
+
778
+
779
+
780
+
781
+
782
+
783
+ ```
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202503'
2
+ files:
3
+ classification_model_file: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/valid.epoch_mAP.ave_1best.pth
4
+ python: "3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) \n[GCC 12.3.0]"
5
+ timestamp: 1763330905.304442
6
+ torch: 2.1.2
7
+ yaml_files:
8
+ classification_train_config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/config.yaml
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/audioset20k/token_list ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Music
2
+ Speech
3
+ Vehicle
4
+ Inside,_small_room
5
+ Animal
6
+ Musical_instrument
7
+ Singing
8
+ Domestic_animals,_pets
9
+ Guitar
10
+ Plucked_string_instrument
11
+ Water
12
+ Car
13
+ Dog
14
+ Percussion
15
+ Wind_instrument,_woodwind_instrument
16
+ Outside,_urban_or_manmade
17
+ Outside,_rural_or_natural
18
+ Boat,_Water_vehicle
19
+ Brass_instrument
20
+ Fowl
21
+ Drum
22
+ Siren
23
+ Engine
24
+ Bird
25
+ Insect
26
+ Gunshot,_gunfire
27
+ Wood
28
+ Rail_transport
29
+ Train
30
+ Wind
31
+ Inside,_large_room_or_hall
32
+ Railroad_car,_train_wagon
33
+ Child_speech,_kid_speaking
34
+ Crowd
35
+ Rub
36
+ Keyboard_(musical)
37
+ Wind_noise_(microphone)
38
+ Pizzicato
39
+ Emergency_vehicle
40
+ Bird_vocalization,_bird_call,_bird_song
41
+ Livestock,_farm_animals,_working_animals
42
+ Cat
43
+ Organ
44
+ Fly,_housefly
45
+ Mechanisms
46
+ Bowed_string_instrument
47
+ Rain
48
+ Laughter
49
+ Aircraft
50
+ Electronic_music
51
+ Effects_unit
52
+ Hum
53
+ Tools
54
+ Drum_kit
55
+ Snare_drum
56
+ Hiss
57
+ Piano
58
+ Water_tap,_faucet
59
+ Rimshot
60
+ Bass_drum
61
+ Chicken,_rooster
62
+ Marimba,_xylophone
63
+ Horse
64
+ Song
65
+ Quack
66
+ Power_tool
67
+ Heart_sounds,_heartbeat
68
+ Goose
69
+ Hammond_organ
70
+ Rock_music
71
+ Ocean
72
+ Mains_hum
73
+ Thunder
74
+ Chime
75
+ Electronic_dance_music
76
+ Typing
77
+ Sink_(filling_or_washing)
78
+ Raindrop
79
+ Cello
80
+ Electric_guitar
81
+ Cheering
82
+ Church_bell
83
+ Christian_music
84
+ Drum_roll
85
+ Trombone
86
+ Glockenspiel
87
+ Trumpet
88
+ Cymbal
89
+ Tabla
90
+ Clickety-clack
91
+ Cricket
92
+ Steam_whistle
93
+ Explosion
94
+ Saxophone
95
+ Thunderstorm
96
+ Pop_music
97
+ Zither
98
+ Applause
99
+ Choir
100
+ Whack,_thwack
101
+ Clarinet
102
+ Camera
103
+ Electric_piano
104
+ Independent_music
105
+ Fire
106
+ Frog
107
+ Jet_engine
108
+ Music_of_Asia
109
+ Ding
110
+ Waves,_surf
111
+ Cattle,_bovinae
112
+ Turkey
113
+ Television
114
+ Coo
115
+ Scratching_(performance_technique)
116
+ Flute
117
+ Liquid
118
+ Harp
119
+ Progressive_rock
120
+ Happy_music
121
+ Steel_guitar,_slide_guitar
122
+ Whoosh,_swoosh,_swish
123
+ Boom
124
+ Breathing
125
+ Electronic_organ
126
+ Environmental_noise
127
+ Distortion
128
+ Alarm_clock
129
+ Fixed-wing_aircraft,_airplane
130
+ Violin,_fiddle
131
+ Whistling
132
+ Accordion
133
+ Disco
134
+ Pump_(liquid)
135
+ Waterfall
136
+ Beep,_bleep
137
+ Blues
138
+ Grunge
139
+ Hip_hop_music
140
+ Whistle
141
+ Fusillade
142
+ Splash,_splatter
143
+ Gush
144
+ Toothbrush
145
+ Knock
146
+ Gargling
147
+ Snoring
148
+ Hammer
149
+ Gobble
150
+ Walk,_footsteps
151
+ Jackhammer
152
+ Filing_(rasp)
153
+ Snort
154
+ Narration,_monologue
155
+ Tire_squeal
156
+ Fire_alarm
157
+ Squeal
158
+ Meow
159
+ Caterwaul
160
+ Cutlery,_silverware
161
+ Mantra
162
+ Opera
163
+ Classical_music
164
+ Theremin
165
+ Burst,_pop
166
+ Drip
167
+ Tick
168
+ Children_shouting
169
+ Creak
170
+ Hiccup
171
+ Pigeon,_dove
172
+ Bicycle_bell
173
+ Baby_cry,_infant_cry
174
+ Duck
175
+ Fireworks
176
+ Tambourine
177
+ Rodents,_rats,_mice
178
+ Buzzer
179
+ Splinter
180
+ Writing
181
+ Goat
182
+ Sheep
183
+ Heavy_metal
184
+ Ska
185
+ Neigh,_whinny
186
+ Sizzle
187
+ Rowboat,_canoe,_kayak
188
+ Wood_block
189
+ Clang
190
+ Door
191
+ Female_singing
192
+ Stream
193
+ Chant
194
+ Vocal_music
195
+ Yodeling
196
+ Bee,_wasp,_etc.
197
+ Air_brake
198
+ Whir
199
+ Bird_flight,_flapping_wings
200
+ French_horn
201
+ Telephone_dialing,_DTMF
202
+ Squeak
203
+ Sitar
204
+ Smoke_detector,_smoke_alarm
205
+ Tick-tock
206
+ Gurgling
207
+ Bellow
208
+ Harmonic
209
+ Male_singing
210
+ Giggle
211
+ Bark
212
+ Vibration
213
+ Drill
214
+ Skidding
215
+ Scratch
216
+ Drawer_open_or_close
217
+ Chop
218
+ Drum_machine
219
+ Squish
220
+ Toilet_flush
221
+ Fart
222
+ Basketball_bounce
223
+ Electronic_tuner
224
+ Singing_bowl
225
+ Squawk
226
+ Conversation
227
+ Reggae
228
+ Funny_music
229
+ Scrape
230
+ Sewing_machine
231
+ Tender_music
232
+ Swing_music
233
+ Dishes,_pots,_and_pans
234
+ Sampler
235
+ Synthesizer
236
+ Clapping
237
+ Hubbub,_speech_noise,_speech_babble
238
+ Engine_knocking
239
+ Canidae,_dogs,_wolves
240
+ Chainsaw
241
+ Pour
242
+ Croak
243
+ Chewing,_mastication
244
+ Cowbell
245
+ Propeller,_airscrew
246
+ Didgeridoo
247
+ Ringtone
248
+ Rattle_(instrument)
249
+ Artillery_fire
250
+ Cash_register
251
+ Crack
252
+ Growling
253
+ Mosquito
254
+ Carnatic_music
255
+ Honk
256
+ Howl
257
+ Cacophony
258
+ Gospel_music
259
+ Firecracker
260
+ Strum
261
+ Motorboat,_speedboat
262
+ Clock
263
+ Dance_music
264
+ Microwave_oven
265
+ Country
266
+ Bluegrass
267
+ Rattle
268
+ Mallet_percussion
269
+ Computer_keyboard
270
+ Bass_guitar
271
+ Electric_shaver,_electric_razor
272
+ Sawing
273
+ Owl
274
+ Whip
275
+ White_noise
276
+ Chirp_tone
277
+ Boiling
278
+ Ship
279
+ Mouse
280
+ Breaking
281
+ Silence
282
+ Throat_clearing
283
+ Bleat
284
+ Salsa_music
285
+ Patter
286
+ Vibraphone
287
+ Flap
288
+ Typewriter
289
+ Change_ringing_(campanology)
290
+ Trickle,_dribble
291
+ Video_game_music
292
+ Glass
293
+ Dial_tone
294
+ Radio
295
+ Bell
296
+ Moo
297
+ Heart_murmur
298
+ Clatter
299
+ Sniff
300
+ Double_bass
301
+ Background_music
302
+ Lawn_mower
303
+ Printer
304
+ House_music
305
+ Tearing
306
+ Angry_music
307
+ Male_speech,_man_speaking
308
+ Wild_animals
309
+ Cupboard_open_or_close
310
+ Harpsichord
311
+ Light_engine_(high_frequency)
312
+ Child_singing
313
+ Zipper_(clothing)
314
+ Jazz
315
+ Belly_laugh
316
+ Roar
317
+ Motor_vehicle_(road)
318
+ Crowing,_cock-a-doodle-doo
319
+ Cluck
320
+ Sad_music
321
+ Hi-hat
322
+ Cough
323
+ Stomach_rumble
324
+ Alarm
325
+ String_section
326
+ Sonar
327
+ Keys_jangling
328
+ Synthetic_singing
329
+ Rapping
330
+ Sidetone
331
+ Orchestra
332
+ Throbbing
333
+ Whale_vocalization
334
+ Thunk
335
+ Children_playing
336
+ Snake
337
+ Chink,_clink
338
+ Chirp,_tweet
339
+ Boing
340
+ Shuffle
341
+ Pulse
342
+ Punk_rock
343
+ Crow
344
+ Caw
345
+ Static
346
+ Clicking
347
+ Snicker
348
+ Whispering
349
+ Pink_noise
350
+ Crushing
351
+ Wedding_music
352
+ Crumpling,_crinkling
353
+ Crackle
354
+ Whoop
355
+ Electric_toothbrush
356
+ Train_wheels_squealing
357
+ Yell
358
+ Wind_chime
359
+ Frying_(food)
360
+ Christmas_music
361
+ Fill_(with_liquid)
362
+ Reverberation
363
+ Beatboxing
364
+ Harmonica
365
+ Banjo
366
+ Sliding_door
367
+ Groan
368
+ Bagpipes
369
+ Spray
370
+ Stir
371
+ Acoustic_guitar
372
+ Tap
373
+ Chorus_effect
374
+ Noise
375
+ Crunch
376
+ Biting
377
+ Aircraft_engine
378
+ Busy_signal
379
+ Bang
380
+ Techno
381
+ Tuning_fork
382
+ Tapping_(guitar_technique)
383
+ Pig
384
+ Maraca
385
+ Vacuum_cleaner
386
+ Mandolin
387
+ Electronica
388
+ Theme_music
389
+ Yip
390
+ A_capella
391
+ Rustle
392
+ Chatter
393
+ Traditional_music
394
+ Soul_music
395
+ Rustling_leaves
396
+ Afrobeat
397
+ Hoot
398
+ Slosh
399
+ Roaring_cats_(lions,_tigers)
400
+ Chopping_(food)
401
+ Heavy_engine_(low_frequency)
402
+ Sine_wave
403
+ Speech_synthesizer
404
+ Middle_Eastern_music
405
+ Music_of_Latin_America
406
+ Arrow
407
+ Timpani
408
+ Eruption
409
+ Shofar
410
+ Jingle_bell
411
+ Humming
412
+ Sanding
413
+ Female_speech,_woman_speaking
414
+ Gong
415
+ Rain_on_surface
416
+ Pant
417
+ Dubstep
418
+ Clip-clop
419
+ Finger_snapping
420
+ Blender
421
+ Drum_and_bass
422
+ Bouncing
423
+ Vehicle_horn,_car_horn,_honking
424
+ Slam
425
+ Idling
426
+ Rhythm_and_blues
427
+ Race_car,_auto_racing
428
+ Single-lens_reflex_camera
429
+ Smash,_crash
430
+ Purr
431
+ Shatter
432
+ Steelpan
433
+ Whimper_(dog)
434
+ Power_windows,_electric_windows
435
+ Battle_cry
436
+ Scary_music
437
+ Hands
438
+ Echo
439
+ Truck
440
+ Buzz
441
+ Mechanical_fan
442
+ Plop
443
+ Run
444
+ Gasp
445
+ Psychedelic_rock
446
+ Grunt
447
+ Helicopter
448
+ Dental_drill,_dentist's_drill
449
+ Babbling
450
+ Zing
451
+ Oink
452
+ Soundtrack_music
453
+ Ambulance_(siren)
454
+ Exciting_music
455
+ Telephone
456
+ Jingle_(music)
457
+ Tubular_bells
458
+ Burping,_eructation
459
+ Baby_laughter
460
+ Ping
461
+ Bow-wow
462
+ Foghorn
463
+ Machine_gun
464
+ Ukulele
465
+ Telephone_bell_ringing
466
+ Pulleys
467
+ Gears
468
+ Sigh
469
+ Coin_(dropping)
470
+ Music_of_Africa
471
+ Scissors
472
+ Inside,_public_space
473
+ Trance_music
474
+ Roll
475
+ Thump,_thud
476
+ Air_conditioning
477
+ Ding-dong
478
+ Ratchet,_pawl
479
+ Hair_dryer
480
+ Shout
481
+ Ambient_music
482
+ Music_for_children
483
+ Toot
484
+ Bathtub_(filling_or_washing)
485
+ Slap,_smack
486
+ Chuckle,_chortle
487
+ Traffic_noise,_roadway_noise
488
+ Bicycle
489
+ Whimper
490
+ Doorbell
491
+ Wheeze
492
+ Sailboat,_sailing_ship
493
+ Cap_gun
494
+ Wail,_moan
495
+ Rock_and_roll
496
+ Jingle,_tinkle
497
+ Fire_engine,_fire_truck_(siren)
498
+ Funk
499
+ Lullaby
500
+ Field_recording
501
+ Skateboard
502
+ Steam
503
+ Rumble
504
+ Medium_engine_(mid_frequency)
505
+ Sound_effect
506
+ Flamenco
507
+ Shuffling_cards
508
+ Subway,_metro,_underground
509
+ Police_car_(siren)
510
+ Folk_music
511
+ Crying,_sobbing
512
+ New-age_music
513
+ Ice_cream_truck,_ice_cream_van
514
+ Music_of_Bollywood
515
+ Accelerating,_revving,_vroom
516
+ Screaming
517
+ Motorcycle
518
+ Engine_starting
519
+ Train_whistle
520
+ Car_passing_by
521
+ Bus
522
+ Sneeze
523
+ Train_horn
524
+ Air_horn,_truck_horn
525
+ Civil_defense_siren
526
+ Car_alarm
527
+ Reversing_beeps
528
+ <blank>
529
+ <unk>
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/RESULTS.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_cls_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Fri Mar 21 05:05:28 CDT 2025`
5
+ - python version: `3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) [GCC 12.3.0]`
6
+ - espnet version: `espnet 202412`
7
+ - pytorch version: `pytorch 2.6.0.dev20241210+cu124`
8
+ - Git hash: `ee8dd3d5da745a2c08c2bd6518bc0ba41ba5b224`
9
+ - Commit date: `Thu Mar 20 16:45:17 2025 -0500`
10
+
11
+ ## cls_earbasei3
12
+ |Split|mean_acc|mAP|mean_auc|n_labels|n_instances|
13
+ |---|---|---|---|---|---|
14
+ cls_eval|47.76|32.09|95.67|527.00|20123.00
15
+ cls_val|45.63|36.62|94.72|527.00|2014.00
16
+
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/config.yaml ADDED
@@ -0,0 +1,731 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earbasei3/conf/ear_base/audioset20k.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
9
+ ngpu: 0
10
+ seed: 0
11
+ num_workers: 2
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: null
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 160
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - epoch_mAP
46
+ - max
47
+ keep_nbest_models: 1
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 1
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 1
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: true
62
+ wandb_project: audioverse
63
+ wandb_id: null
64
+ wandb_entity: shikhar
65
+ wandb_name: audioset20k.earbasei3
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: null
77
+ batch_size: 80
78
+ valid_batch_size: 1200
79
+ batch_bins: 1000000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ train_shape_file:
83
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/speech_shape
84
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/label_shape
85
+ valid_shape_file:
86
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/speech_shape
87
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/label_shape
88
+ batch_type: folded
89
+ valid_batch_type: null
90
+ fold_length:
91
+ - 160000
92
+ - 600
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ utt2weight_file: null
98
+ chunk_length: 500
99
+ chunk_shift_ratio: 0.5
100
+ num_cache_chunks: 1024
101
+ chunk_excluded_key_prefixes: []
102
+ chunk_default_fs: null
103
+ chunk_max_abs_length: null
104
+ chunk_discard_short_samples: true
105
+ train_data_path_and_name_and_type:
106
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/wav.scp
107
+ - speech
108
+ - sound
109
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/text
110
+ - label
111
+ - text
112
+ valid_data_path_and_name_and_type:
113
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/wav.scp
114
+ - speech
115
+ - sound
116
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/text
117
+ - label
118
+ - text
119
+ multi_task_dataset: false
120
+ allow_variable_data_keys: false
121
+ max_cache_size: 0.0
122
+ max_cache_fd: 32
123
+ allow_multi_rates: false
124
+ valid_max_cache_size: null
125
+ exclude_weight_decay: false
126
+ exclude_weight_decay_conf: {}
127
+ optim: adamw
128
+ optim_conf:
129
+ lr: 3.0e-05
130
+ weight_decay: 0.01
131
+ betas:
132
+ - 0.9
133
+ - 0.98
134
+ scheduler: cosineannealingwarmuprestarts
135
+ scheduler_conf:
136
+ first_cycle_steps: 95000
137
+ warmup_steps: 8000
138
+ max_lr: 3.0e-05
139
+ min_lr: 5.0e-06
140
+ lightning_conf:
141
+ log_every_n_steps: 250
142
+ max_epochs: 500
143
+ strategy: ddp
144
+ strategy_conf:
145
+ find_unused_parameters: true
146
+ best_model_criterion:
147
+ - - valid/epoch_mAP
148
+ - max
149
+ - 1
150
+ devices: 1
151
+ num_nodes: 1
152
+ default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
153
+ token_list:
154
+ - Music
155
+ - Speech
156
+ - Vehicle
157
+ - Inside,_small_room
158
+ - Animal
159
+ - Musical_instrument
160
+ - Singing
161
+ - Domestic_animals,_pets
162
+ - Guitar
163
+ - Plucked_string_instrument
164
+ - Water
165
+ - Car
166
+ - Dog
167
+ - Percussion
168
+ - Wind_instrument,_woodwind_instrument
169
+ - Outside,_urban_or_manmade
170
+ - Outside,_rural_or_natural
171
+ - Boat,_Water_vehicle
172
+ - Brass_instrument
173
+ - Fowl
174
+ - Drum
175
+ - Siren
176
+ - Engine
177
+ - Bird
178
+ - Insect
179
+ - Gunshot,_gunfire
180
+ - Wood
181
+ - Rail_transport
182
+ - Train
183
+ - Wind
184
+ - Inside,_large_room_or_hall
185
+ - Railroad_car,_train_wagon
186
+ - Child_speech,_kid_speaking
187
+ - Crowd
188
+ - Rub
189
+ - Keyboard_(musical)
190
+ - Wind_noise_(microphone)
191
+ - Pizzicato
192
+ - Emergency_vehicle
193
+ - Bird_vocalization,_bird_call,_bird_song
194
+ - Livestock,_farm_animals,_working_animals
195
+ - Cat
196
+ - Organ
197
+ - Fly,_housefly
198
+ - Mechanisms
199
+ - Bowed_string_instrument
200
+ - Rain
201
+ - Laughter
202
+ - Aircraft
203
+ - Electronic_music
204
+ - Effects_unit
205
+ - Hum
206
+ - Tools
207
+ - Drum_kit
208
+ - Snare_drum
209
+ - Hiss
210
+ - Piano
211
+ - Water_tap,_faucet
212
+ - Rimshot
213
+ - Bass_drum
214
+ - Chicken,_rooster
215
+ - Marimba,_xylophone
216
+ - Horse
217
+ - Song
218
+ - Quack
219
+ - Power_tool
220
+ - Heart_sounds,_heartbeat
221
+ - Goose
222
+ - Hammond_organ
223
+ - Rock_music
224
+ - Ocean
225
+ - Mains_hum
226
+ - Thunder
227
+ - Chime
228
+ - Electronic_dance_music
229
+ - Typing
230
+ - Sink_(filling_or_washing)
231
+ - Raindrop
232
+ - Cello
233
+ - Electric_guitar
234
+ - Cheering
235
+ - Church_bell
236
+ - Christian_music
237
+ - Drum_roll
238
+ - Trombone
239
+ - Glockenspiel
240
+ - Trumpet
241
+ - Cymbal
242
+ - Tabla
243
+ - Clickety-clack
244
+ - Cricket
245
+ - Steam_whistle
246
+ - Explosion
247
+ - Saxophone
248
+ - Thunderstorm
249
+ - Pop_music
250
+ - Zither
251
+ - Applause
252
+ - Choir
253
+ - Whack,_thwack
254
+ - Clarinet
255
+ - Camera
256
+ - Electric_piano
257
+ - Independent_music
258
+ - Fire
259
+ - Frog
260
+ - Jet_engine
261
+ - Music_of_Asia
262
+ - Ding
263
+ - Waves,_surf
264
+ - Cattle,_bovinae
265
+ - Turkey
266
+ - Television
267
+ - Coo
268
+ - Scratching_(performance_technique)
269
+ - Flute
270
+ - Liquid
271
+ - Harp
272
+ - Progressive_rock
273
+ - Happy_music
274
+ - Steel_guitar,_slide_guitar
275
+ - Whoosh,_swoosh,_swish
276
+ - Boom
277
+ - Breathing
278
+ - Electronic_organ
279
+ - Environmental_noise
280
+ - Distortion
281
+ - Alarm_clock
282
+ - Fixed-wing_aircraft,_airplane
283
+ - Violin,_fiddle
284
+ - Whistling
285
+ - Accordion
286
+ - Disco
287
+ - Pump_(liquid)
288
+ - Waterfall
289
+ - Beep,_bleep
290
+ - Blues
291
+ - Grunge
292
+ - Hip_hop_music
293
+ - Whistle
294
+ - Fusillade
295
+ - Splash,_splatter
296
+ - Gush
297
+ - Toothbrush
298
+ - Knock
299
+ - Gargling
300
+ - Snoring
301
+ - Hammer
302
+ - Gobble
303
+ - Walk,_footsteps
304
+ - Jackhammer
305
+ - Filing_(rasp)
306
+ - Snort
307
+ - Narration,_monologue
308
+ - Tire_squeal
309
+ - Fire_alarm
310
+ - Squeal
311
+ - Meow
312
+ - Caterwaul
313
+ - Cutlery,_silverware
314
+ - Mantra
315
+ - Opera
316
+ - Classical_music
317
+ - Theremin
318
+ - Burst,_pop
319
+ - Drip
320
+ - Tick
321
+ - Children_shouting
322
+ - Creak
323
+ - Hiccup
324
+ - Pigeon,_dove
325
+ - Bicycle_bell
326
+ - Baby_cry,_infant_cry
327
+ - Duck
328
+ - Fireworks
329
+ - Tambourine
330
+ - Rodents,_rats,_mice
331
+ - Buzzer
332
+ - Splinter
333
+ - Writing
334
+ - Goat
335
+ - Sheep
336
+ - Heavy_metal
337
+ - Ska
338
+ - Neigh,_whinny
339
+ - Sizzle
340
+ - Rowboat,_canoe,_kayak
341
+ - Wood_block
342
+ - Clang
343
+ - Door
344
+ - Female_singing
345
+ - Stream
346
+ - Chant
347
+ - Vocal_music
348
+ - Yodeling
349
+ - Bee,_wasp,_etc.
350
+ - Air_brake
351
+ - Whir
352
+ - Bird_flight,_flapping_wings
353
+ - French_horn
354
+ - Telephone_dialing,_DTMF
355
+ - Squeak
356
+ - Sitar
357
+ - Smoke_detector,_smoke_alarm
358
+ - Tick-tock
359
+ - Gurgling
360
+ - Bellow
361
+ - Harmonic
362
+ - Male_singing
363
+ - Giggle
364
+ - Bark
365
+ - Vibration
366
+ - Drill
367
+ - Skidding
368
+ - Scratch
369
+ - Drawer_open_or_close
370
+ - Chop
371
+ - Drum_machine
372
+ - Squish
373
+ - Toilet_flush
374
+ - Fart
375
+ - Basketball_bounce
376
+ - Electronic_tuner
377
+ - Singing_bowl
378
+ - Squawk
379
+ - Conversation
380
+ - Reggae
381
+ - Funny_music
382
+ - Scrape
383
+ - Sewing_machine
384
+ - Tender_music
385
+ - Swing_music
386
+ - Dishes,_pots,_and_pans
387
+ - Sampler
388
+ - Synthesizer
389
+ - Clapping
390
+ - Hubbub,_speech_noise,_speech_babble
391
+ - Engine_knocking
392
+ - Canidae,_dogs,_wolves
393
+ - Chainsaw
394
+ - Pour
395
+ - Croak
396
+ - Chewing,_mastication
397
+ - Cowbell
398
+ - Propeller,_airscrew
399
+ - Didgeridoo
400
+ - Ringtone
401
+ - Rattle_(instrument)
402
+ - Artillery_fire
403
+ - Cash_register
404
+ - Crack
405
+ - Growling
406
+ - Mosquito
407
+ - Carnatic_music
408
+ - Honk
409
+ - Howl
410
+ - Cacophony
411
+ - Gospel_music
412
+ - Firecracker
413
+ - Strum
414
+ - Motorboat,_speedboat
415
+ - Clock
416
+ - Dance_music
417
+ - Microwave_oven
418
+ - Country
419
+ - Bluegrass
420
+ - Rattle
421
+ - Mallet_percussion
422
+ - Computer_keyboard
423
+ - Bass_guitar
424
+ - Electric_shaver,_electric_razor
425
+ - Sawing
426
+ - Owl
427
+ - Whip
428
+ - White_noise
429
+ - Chirp_tone
430
+ - Boiling
431
+ - Ship
432
+ - Mouse
433
+ - Breaking
434
+ - Silence
435
+ - Throat_clearing
436
+ - Bleat
437
+ - Salsa_music
438
+ - Patter
439
+ - Vibraphone
440
+ - Flap
441
+ - Typewriter
442
+ - Change_ringing_(campanology)
443
+ - Trickle,_dribble
444
+ - Video_game_music
445
+ - Glass
446
+ - Dial_tone
447
+ - Radio
448
+ - Bell
449
+ - Moo
450
+ - Heart_murmur
451
+ - Clatter
452
+ - Sniff
453
+ - Double_bass
454
+ - Background_music
455
+ - Lawn_mower
456
+ - Printer
457
+ - House_music
458
+ - Tearing
459
+ - Angry_music
460
+ - Male_speech,_man_speaking
461
+ - Wild_animals
462
+ - Cupboard_open_or_close
463
+ - Harpsichord
464
+ - Light_engine_(high_frequency)
465
+ - Child_singing
466
+ - Zipper_(clothing)
467
+ - Jazz
468
+ - Belly_laugh
469
+ - Roar
470
+ - Motor_vehicle_(road)
471
+ - Crowing,_cock-a-doodle-doo
472
+ - Cluck
473
+ - Sad_music
474
+ - Hi-hat
475
+ - Cough
476
+ - Stomach_rumble
477
+ - Alarm
478
+ - String_section
479
+ - Sonar
480
+ - Keys_jangling
481
+ - Synthetic_singing
482
+ - Rapping
483
+ - Sidetone
484
+ - Orchestra
485
+ - Throbbing
486
+ - Whale_vocalization
487
+ - Thunk
488
+ - Children_playing
489
+ - Snake
490
+ - Chink,_clink
491
+ - Chirp,_tweet
492
+ - Boing
493
+ - Shuffle
494
+ - Pulse
495
+ - Punk_rock
496
+ - Crow
497
+ - Caw
498
+ - Static
499
+ - Clicking
500
+ - Snicker
501
+ - Whispering
502
+ - Pink_noise
503
+ - Crushing
504
+ - Wedding_music
505
+ - Crumpling,_crinkling
506
+ - Crackle
507
+ - Whoop
508
+ - Electric_toothbrush
509
+ - Train_wheels_squealing
510
+ - Yell
511
+ - Wind_chime
512
+ - Frying_(food)
513
+ - Christmas_music
514
+ - Fill_(with_liquid)
515
+ - Reverberation
516
+ - Beatboxing
517
+ - Harmonica
518
+ - Banjo
519
+ - Sliding_door
520
+ - Groan
521
+ - Bagpipes
522
+ - Spray
523
+ - Stir
524
+ - Acoustic_guitar
525
+ - Tap
526
+ - Chorus_effect
527
+ - Noise
528
+ - Crunch
529
+ - Biting
530
+ - Aircraft_engine
531
+ - Busy_signal
532
+ - Bang
533
+ - Techno
534
+ - Tuning_fork
535
+ - Tapping_(guitar_technique)
536
+ - Pig
537
+ - Maraca
538
+ - Vacuum_cleaner
539
+ - Mandolin
540
+ - Electronica
541
+ - Theme_music
542
+ - Yip
543
+ - A_capella
544
+ - Rustle
545
+ - Chatter
546
+ - Traditional_music
547
+ - Soul_music
548
+ - Rustling_leaves
549
+ - Afrobeat
550
+ - Hoot
551
+ - Slosh
552
+ - Roaring_cats_(lions,_tigers)
553
+ - Chopping_(food)
554
+ - Heavy_engine_(low_frequency)
555
+ - Sine_wave
556
+ - Speech_synthesizer
557
+ - Middle_Eastern_music
558
+ - Music_of_Latin_America
559
+ - Arrow
560
+ - Timpani
561
+ - Eruption
562
+ - Shofar
563
+ - Jingle_bell
564
+ - Humming
565
+ - Sanding
566
+ - Female_speech,_woman_speaking
567
+ - Gong
568
+ - Rain_on_surface
569
+ - Pant
570
+ - Dubstep
571
+ - Clip-clop
572
+ - Finger_snapping
573
+ - Blender
574
+ - Drum_and_bass
575
+ - Bouncing
576
+ - Vehicle_horn,_car_horn,_honking
577
+ - Slam
578
+ - Idling
579
+ - Rhythm_and_blues
580
+ - Race_car,_auto_racing
581
+ - Single-lens_reflex_camera
582
+ - Smash,_crash
583
+ - Purr
584
+ - Shatter
585
+ - Steelpan
586
+ - Whimper_(dog)
587
+ - Power_windows,_electric_windows
588
+ - Battle_cry
589
+ - Scary_music
590
+ - Hands
591
+ - Echo
592
+ - Truck
593
+ - Buzz
594
+ - Mechanical_fan
595
+ - Plop
596
+ - Run
597
+ - Gasp
598
+ - Psychedelic_rock
599
+ - Grunt
600
+ - Helicopter
601
+ - Dental_drill,_dentist's_drill
602
+ - Babbling
603
+ - Zing
604
+ - Oink
605
+ - Soundtrack_music
606
+ - Ambulance_(siren)
607
+ - Exciting_music
608
+ - Telephone
609
+ - Jingle_(music)
610
+ - Tubular_bells
611
+ - Burping,_eructation
612
+ - Baby_laughter
613
+ - Ping
614
+ - Bow-wow
615
+ - Foghorn
616
+ - Machine_gun
617
+ - Ukulele
618
+ - Telephone_bell_ringing
619
+ - Pulleys
620
+ - Gears
621
+ - Sigh
622
+ - Coin_(dropping)
623
+ - Music_of_Africa
624
+ - Scissors
625
+ - Inside,_public_space
626
+ - Trance_music
627
+ - Roll
628
+ - Thump,_thud
629
+ - Air_conditioning
630
+ - Ding-dong
631
+ - Ratchet,_pawl
632
+ - Hair_dryer
633
+ - Shout
634
+ - Ambient_music
635
+ - Music_for_children
636
+ - Toot
637
+ - Bathtub_(filling_or_washing)
638
+ - Slap,_smack
639
+ - Chuckle,_chortle
640
+ - Traffic_noise,_roadway_noise
641
+ - Bicycle
642
+ - Whimper
643
+ - Doorbell
644
+ - Wheeze
645
+ - Sailboat,_sailing_ship
646
+ - Cap_gun
647
+ - Wail,_moan
648
+ - Rock_and_roll
649
+ - Jingle,_tinkle
650
+ - Fire_engine,_fire_truck_(siren)
651
+ - Funk
652
+ - Lullaby
653
+ - Field_recording
654
+ - Skateboard
655
+ - Steam
656
+ - Rumble
657
+ - Medium_engine_(mid_frequency)
658
+ - Sound_effect
659
+ - Flamenco
660
+ - Shuffling_cards
661
+ - Subway,_metro,_underground
662
+ - Police_car_(siren)
663
+ - Folk_music
664
+ - Crying,_sobbing
665
+ - New-age_music
666
+ - Ice_cream_truck,_ice_cream_van
667
+ - Music_of_Bollywood
668
+ - Accelerating,_revving,_vroom
669
+ - Screaming
670
+ - Motorcycle
671
+ - Engine_starting
672
+ - Train_whistle
673
+ - Car_passing_by
674
+ - Bus
675
+ - Sneeze
676
+ - Train_horn
677
+ - Air_horn,_truck_horn
678
+ - Civil_defense_siren
679
+ - Car_alarm
680
+ - Reversing_beeps
681
+ - <blank>
682
+ - <unk>
683
+ text_token_list: null
684
+ text_bpemodel: null
685
+ init: xavier_normal
686
+ input_size: 1
687
+ use_preprocessor: true
688
+ frontend: null
689
+ frontend_conf: {}
690
+ specaug: null
691
+ specaug_conf: {}
692
+ normalize: null
693
+ normalize_conf: {}
694
+ preencoder: null
695
+ preencoder_conf: {}
696
+ encoder: beats
697
+ encoder_conf:
698
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_base2.tune_lr5e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
699
+ beats_config:
700
+ layer_wise_gradient_decay_ratio: 0.3
701
+ encoder_layerdrop: 0.1
702
+ dropout: 0.0
703
+ use_weighted_representation: false
704
+ specaug_config:
705
+ apply_time_warp: true
706
+ apply_freq_mask: false
707
+ apply_time_mask: true
708
+ time_mask_width_ratio_range:
709
+ - 0
710
+ - 0.06
711
+ num_time_mask: 1
712
+ roll_augment: true
713
+ roll_interval: 1
714
+ text_encoder: null
715
+ text_encoder_conf: {}
716
+ embedding_fusion: null
717
+ embedding_fusion_conf: {}
718
+ decoder: linear
719
+ decoder_conf: {}
720
+ model: espnet
721
+ model_conf:
722
+ classification_type: multi-label
723
+ mixup_probability: 0.8
724
+ lsm_weight: 0.0
725
+ log_epoch_metrics: true
726
+ user_callbacks:
727
+ - mAP_logging
728
+ required:
729
+ - output_dir
730
+ - token_list
731
+ task: cls
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/lightning_logs/version_0/events.out.tfevents.1742492588.gh130.hsn.cm.delta.internal.ncsa.edu.3586759.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca3b0dad08cf2f8f38bfd982441259e80d8b95377c492704e465a2e08c4ad2c
3
+ size 372371
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/lightning_logs/version_0/hparams.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ args: !!python/object:argparse.Namespace
2
+ accum_grad: 1
3
+ adapter: lora
4
+ adapter_conf: {}
5
+ allow_multi_rates: false
6
+ allow_variable_data_keys: false
7
+ batch_bins: 1000000
8
+ batch_size: 80
9
+ batch_type: folded
10
+ best_model_criterion:
11
+ - - valid
12
+ - epoch_mAP
13
+ - max
14
+ category_sample_size: 10
15
+ chunk_default_fs: null
16
+ chunk_discard_short_samples: true
17
+ chunk_excluded_key_prefixes: []
18
+ chunk_length: 500
19
+ chunk_max_abs_length: null
20
+ chunk_shift_ratio: 0.5
21
+ collect_stats: false
22
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earbasei3/conf/ear_base/audioset20k.yaml
23
+ create_graph_in_tensorboard: false
24
+ cudnn_benchmark: false
25
+ cudnn_deterministic: true
26
+ cudnn_enabled: true
27
+ ddp_comm_hook: null
28
+ decoder: linear
29
+ decoder_conf: {}
30
+ deepspeed_config: null
31
+ detect_anomaly: false
32
+ dist_backend: nccl
33
+ dist_init_method: env://
34
+ dist_launcher: null
35
+ dist_master_addr: null
36
+ dist_master_port: null
37
+ dist_rank: null
38
+ dist_world_size: null
39
+ drop_last_iter: false
40
+ dry_run: false
41
+ early_stopping_criterion: !!python/tuple
42
+ - valid
43
+ - loss
44
+ - min
45
+ embedding_fusion: null
46
+ embedding_fusion_conf: {}
47
+ encoder: beats
48
+ encoder_conf:
49
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter2_base2.tune_lr5e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
50
+ beats_config:
51
+ dropout: 0.0
52
+ encoder_layerdrop: 0.1
53
+ layer_wise_gradient_decay_ratio: 0.3
54
+ roll_augment: true
55
+ roll_interval: 1
56
+ specaug_config:
57
+ apply_freq_mask: false
58
+ apply_time_mask: true
59
+ apply_time_warp: true
60
+ num_time_mask: 1
61
+ time_mask_width_ratio_range:
62
+ - 0
63
+ - 0.06
64
+ use_weighted_representation: false
65
+ exclude_weight_decay: false
66
+ exclude_weight_decay_conf: {}
67
+ fold_length:
68
+ - 160000
69
+ - 600
70
+ freeze_param: []
71
+ frontend: null
72
+ frontend_conf:
73
+ fs: 16k
74
+ grad_clip: 1
75
+ grad_clip_type: 2.0
76
+ grad_noise: false
77
+ gradient_as_bucket_view: true
78
+ ignore_init_mismatch: false
79
+ init: xavier_normal
80
+ init_param: []
81
+ input_size: 1
82
+ iterator_type: sequence
83
+ keep_nbest_models: 1
84
+ lightning_conf:
85
+ best_model_criterion:
86
+ - - valid/epoch_mAP
87
+ - max
88
+ - 1
89
+ default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
90
+ devices: 1
91
+ log_every_n_steps: 250
92
+ max_epochs: 500
93
+ num_nodes: 1
94
+ strategy: ddp
95
+ strategy_conf:
96
+ find_unused_parameters: true
97
+ local_rank: null
98
+ log_interval: null
99
+ log_level: INFO
100
+ max_cache_fd: 32
101
+ max_cache_size: 0.0
102
+ max_epoch: 160
103
+ model: espnet
104
+ model_conf:
105
+ classification_type: multi-label
106
+ log_epoch_metrics: true
107
+ lsm_weight: 0.0
108
+ mixup_probability: 0.8
109
+ multi_task_dataset: false
110
+ multiple_iterator: false
111
+ multiprocessing_distributed: false
112
+ nbest_averaging_interval: 0
113
+ ngpu: 0
114
+ no_forward_run: false
115
+ normalize: null
116
+ normalize_conf: {}
117
+ num_att_plot: 0
118
+ num_cache_chunks: 1024
119
+ num_iters_per_epoch: null
120
+ num_workers: 2
121
+ optim: adamw
122
+ optim_conf:
123
+ betas:
124
+ - 0.9
125
+ - 0.98
126
+ lr: 3.0e-05
127
+ weight_decay: 0.01
128
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3
129
+ patience: null
130
+ preencoder: null
131
+ preencoder_conf: {}
132
+ pretrain_path: null
133
+ print_config: false
134
+ required:
135
+ - output_dir
136
+ - token_list
137
+ resume: true
138
+ save_strategy: all
139
+ scheduler: cosineannealingwarmuprestarts
140
+ scheduler_conf:
141
+ first_cycle_steps: 95000
142
+ max_lr: 3.0e-05
143
+ min_lr: 5.0e-06
144
+ warmup_steps: 8000
145
+ seed: 0
146
+ sharded_ddp: false
147
+ shuffle_within_batch: false
148
+ sort_batch: descending
149
+ sort_in_batch: descending
150
+ specaug: null
151
+ specaug_conf: {}
152
+ task: cls
153
+ text_bpemodel: null
154
+ text_encoder: null
155
+ text_encoder_conf: {}
156
+ text_token_list: null
157
+ token_list: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/audioset20k/token_list
158
+ train_data_path_and_name_and_type:
159
+ - !!python/tuple
160
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/wav.scp
161
+ - speech
162
+ - sound
163
+ - !!python/tuple
164
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/train/text
165
+ - label
166
+ - text
167
+ train_dtype: float32
168
+ train_shape_file:
169
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/speech_shape
170
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/train/label_shape
171
+ unused_parameters: true
172
+ use_adapter: false
173
+ use_amp: false
174
+ use_deepspeed: false
175
+ use_matplotlib: true
176
+ use_preprocessor: true
177
+ use_tensorboard: true
178
+ use_tf32: false
179
+ use_wandb: true
180
+ user_callbacks:
181
+ - mAP_logging
182
+ utt2weight_file: null
183
+ val_scheduler_criterion: !!python/tuple
184
+ - valid
185
+ - loss
186
+ valid_batch_bins: null
187
+ valid_batch_size: 1200
188
+ valid_batch_type: null
189
+ valid_data_path_and_name_and_type:
190
+ - !!python/tuple
191
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/wav.scp
192
+ - speech
193
+ - sound
194
+ - !!python/tuple
195
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/audioset20k/val/text
196
+ - label
197
+ - text
198
+ valid_iterator_type: null
199
+ valid_max_cache_size: null
200
+ valid_shape_file:
201
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/speech_shape
202
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_stats_16k/valid/label_shape
203
+ wandb_entity: shikhar
204
+ wandb_id: null
205
+ wandb_model_log_interval: -1
206
+ wandb_name: audioset20k.earbasei3
207
+ wandb_project: audioverse
208
+ write_collected_feats: false
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/audioset20k/cls_earbasei3/valid.epoch_mAP.ave_1best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cbf19b179115d8b0af485d8efe9bf889147393cc0a8ad2fa2d0506237147f2
3
+ size 363126298