ESPnet
audio
classification
Shikhar Bharadwaj commited on
Commit
270cc08
·
1 Parent(s): b76bea4

Update model

Browse files
Files changed (19) hide show
  1. README.md +503 -0
  2. meta.yaml +8 -0
  3. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/beans_cbi/token_list +266 -0
  4. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/217epoch.pth +3 -0
  5. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/RESULTS.md +16 -0
  6. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/config.yaml +451 -0
  7. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/acc.png +0 -0
  8. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/backward_time.png +0 -0
  9. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/clip.png +0 -0
  10. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/forward_time.png +0 -0
  11. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/gpu_max_cached_mem_GB.png +0 -0
  12. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/grad_norm.png +0 -0
  13. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/iter_time.png +0 -0
  14. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/loss.png +0 -0
  15. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/loss_scale.png +0 -0
  16. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/macro_precision.png +0 -0
  17. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/optim0_lr0.png +0 -0
  18. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/optim_step_time.png +0 -0
  19. work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/train_time.png +0 -0
README.md ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - classification
6
+ datasets:
7
+ - beans
8
+ license: cc-by-4.0
9
+ ---
10
+
11
+ ## ESPnet2 CLS model
12
+
13
+ ### `espnet/OpenBEATS-Large-i1-cbi`
14
+
15
+ This model was trained by Shikhar Bharadwaj using beans recipe in [espnet](https://github.com/espnet/espnet/).
16
+
17
+ ## CLS config
18
+
19
+ <details><summary>expand</summary>
20
+
21
+ ```
22
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge1/conf/ear/beans_cbi.yaml
23
+ print_config: false
24
+ log_level: INFO
25
+ drop_last_iter: false
26
+ dry_run: false
27
+ iterator_type: sequence
28
+ valid_iterator_type: null
29
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1
30
+ ngpu: 1
31
+ seed: 0
32
+ num_workers: 2
33
+ num_att_plot: 0
34
+ dist_backend: nccl
35
+ dist_init_method: env://
36
+ dist_world_size: null
37
+ dist_rank: null
38
+ local_rank: 0
39
+ dist_master_addr: null
40
+ dist_master_port: null
41
+ dist_launcher: null
42
+ multiprocessing_distributed: false
43
+ unused_parameters: true
44
+ sharded_ddp: false
45
+ use_deepspeed: false
46
+ deepspeed_config: null
47
+ gradient_as_bucket_view: true
48
+ ddp_comm_hook: null
49
+ cudnn_enabled: true
50
+ cudnn_benchmark: false
51
+ cudnn_deterministic: true
52
+ use_tf32: false
53
+ collect_stats: false
54
+ write_collected_feats: false
55
+ max_epoch: 250
56
+ patience: null
57
+ val_scheduler_criterion:
58
+ - valid
59
+ - loss
60
+ early_stopping_criterion:
61
+ - valid
62
+ - loss
63
+ - min
64
+ best_model_criterion:
65
+ - - valid
66
+ - acc
67
+ - max
68
+ keep_nbest_models: 1
69
+ nbest_averaging_interval: 0
70
+ grad_clip: 1
71
+ grad_clip_type: 2.0
72
+ grad_noise: false
73
+ accum_grad: 1
74
+ no_forward_run: false
75
+ resume: true
76
+ train_dtype: float32
77
+ use_amp: false
78
+ log_interval: null
79
+ use_matplotlib: true
80
+ use_tensorboard: true
81
+ create_graph_in_tensorboard: false
82
+ use_wandb: false
83
+ wandb_project: null
84
+ wandb_id: null
85
+ wandb_entity: null
86
+ wandb_name: null
87
+ wandb_model_log_interval: -1
88
+ detect_anomaly: false
89
+ use_adapter: false
90
+ adapter: lora
91
+ save_strategy: all
92
+ adapter_conf: {}
93
+ pretrain_path: null
94
+ init_param: []
95
+ ignore_init_mismatch: false
96
+ freeze_param: []
97
+ num_iters_per_epoch: null
98
+ batch_size: 32
99
+ valid_batch_size: 32
100
+ batch_bins: 1000000
101
+ valid_batch_bins: null
102
+ category_sample_size: 10
103
+ train_shape_file:
104
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/speech_shape
105
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/label_shape
106
+ valid_shape_file:
107
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/speech_shape
108
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/label_shape
109
+ batch_type: folded
110
+ valid_batch_type: null
111
+ fold_length:
112
+ - 160000
113
+ - 5
114
+ sort_in_batch: descending
115
+ shuffle_within_batch: false
116
+ sort_batch: descending
117
+ multiple_iterator: false
118
+ chunk_length: 500
119
+ chunk_shift_ratio: 0.5
120
+ num_cache_chunks: 1024
121
+ chunk_excluded_key_prefixes: []
122
+ chunk_default_fs: null
123
+ chunk_max_abs_length: null
124
+ chunk_discard_short_samples: true
125
+ train_data_path_and_name_and_type:
126
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/wav.scp
127
+ - speech
128
+ - sound
129
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/text
130
+ - label
131
+ - text
132
+ valid_data_path_and_name_and_type:
133
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/wav.scp
134
+ - speech
135
+ - sound
136
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/text
137
+ - label
138
+ - text
139
+ multi_task_dataset: false
140
+ allow_variable_data_keys: false
141
+ max_cache_size: 0.0
142
+ max_cache_fd: 32
143
+ allow_multi_rates: false
144
+ valid_max_cache_size: null
145
+ exclude_weight_decay: false
146
+ exclude_weight_decay_conf: {}
147
+ optim: adamw
148
+ optim_conf:
149
+ lr: 3.0e-05
150
+ weight_decay: 0.01
151
+ betas:
152
+ - 0.9
153
+ - 0.98
154
+ scheduler: cosineannealingwarmuprestarts
155
+ scheduler_conf:
156
+ first_cycle_steps: 95000
157
+ warmup_steps: 8000
158
+ max_lr: 3.0e-05
159
+ min_lr: 5.0e-06
160
+ token_list:
161
+ - scoori
162
+ - bulori
163
+ - bushti
164
+ - blkpho
165
+ - brthum
166
+ - cacwre
167
+ - pasfly
168
+ - lesgol
169
+ - logshr
170
+ - macwar
171
+ - pinsis
172
+ - whbnut
173
+ - hamfly
174
+ - normoc
175
+ - grtgra
176
+ - houwre
177
+ - comyel
178
+ - grhowl
179
+ - houfin
180
+ - rocpig
181
+ - annhum
182
+ - astfly
183
+ - magwar
184
+ - wesmea
185
+ - wewpew
186
+ - spotow
187
+ - amerob
188
+ - daejun
189
+ - easmea
190
+ - greroa
191
+ - mouchi
192
+ - pilwoo
193
+ - comrav
194
+ - hoowar
195
+ - savspa
196
+ - warvir
197
+ - easblu
198
+ - gnttow
199
+ - ovenbi1
200
+ - rewbla
201
+ - robgro
202
+ - swathr
203
+ - tuftit
204
+ - westan
205
+ - winwre3
206
+ - btywar
207
+ - carwre
208
+ - herthr
209
+ - bewwre
210
+ - sora
211
+ - brdowl
212
+ - buggna
213
+ - casvir
214
+ - chispa
215
+ - fiespa
216
+ - aldfly
217
+ - killde
218
+ - moudov
219
+ - rebwoo
220
+ - bkpwar
221
+ - dowwoo
222
+ - greegr
223
+ - banswa
224
+ - orcwar
225
+ - plsvir
226
+ - y00475
227
+ - blugrb1
228
+ - gockin
229
+ - greyel
230
+ - larspa
231
+ - osprey
232
+ - sonspa
233
+ - yebfly
234
+ - blujay
235
+ - brnthr
236
+ - canwre
237
+ - clanut
238
+ - comred
239
+ - eastow
240
+ - haiwoo
241
+ - lesyel
242
+ - amepip
243
+ - easpho
244
+ - fiscro
245
+ - sposan
246
+ - wooscj2
247
+ - bkhgro
248
+ - labwoo
249
+ - lazbun
250
+ - marwre
251
+ - stejay
252
+ - weskin
253
+ - bkbwar
254
+ - buhvir
255
+ - cangoo
256
+ - canwar
257
+ - dusfly
258
+ - grcfly
259
+ - norcar
260
+ - wilsni1
261
+ - yerwar
262
+ - yetvir
263
+ - eucdov
264
+ - linspa
265
+ - norpar
266
+ - olsfly
267
+ - rebnut
268
+ - scatan
269
+ - bnhcow
270
+ - louwat
271
+ - norfli
272
+ - veery
273
+ - woothr
274
+ - btnwar
275
+ - cedwax
276
+ - chswar
277
+ - comgra
278
+ - indbun
279
+ - leabit
280
+ - leafly
281
+ - pinwar
282
+ - reevir1
283
+ - solsan
284
+ - bktspa
285
+ - foxspa
286
+ - houspa
287
+ - snobun
288
+ - vesspa
289
+ - yelwar
290
+ - brespa
291
+ - comgol
292
+ - coohaw
293
+ - gnwtea
294
+ - grbher3
295
+ - hergul
296
+ - mallar3
297
+ - swaspa
298
+ - brncre
299
+ - btbwar
300
+ - caster1
301
+ - eawpew
302
+ - rethaw
303
+ - rocwre
304
+ - ruckin
305
+ - semsan
306
+ - whtspa
307
+ - wlswar
308
+ - bkcchi
309
+ - bkchum
310
+ - amered
311
+ - norwat
312
+ - whcspa
313
+ - grycat
314
+ - balori
315
+ - purfin
316
+ - treswa
317
+ - wilfly
318
+ - comter
319
+ - belspa2
320
+ - juntit1
321
+ - comnig
322
+ - reshaw
323
+ - snogoo
324
+ - perfal
325
+ - gadwal
326
+ - grnher
327
+ - horlar
328
+ - lobdow
329
+ - bawwar
330
+ - amegfi
331
+ - commer
332
+ - ribgul
333
+ - casfin
334
+ - pibgre
335
+ - evegro
336
+ - pygnut
337
+ - brwhaw
338
+ - gryfly
339
+ - leasan
340
+ - barswa
341
+ - phaino
342
+ - amecro
343
+ - calqua
344
+ - amewoo
345
+ - pingro
346
+ - saypho
347
+ - semplo
348
+ - buwwar
349
+ - boboli
350
+ - amekes
351
+ - cowscj1
352
+ - amtspa
353
+ - lobcur
354
+ - belkin1
355
+ - pecsan
356
+ - prawar
357
+ - vigswa
358
+ - camwar
359
+ - easkin
360
+ - yebsap
361
+ - norsho
362
+ - gocspa
363
+ - rufhum
364
+ - baisan
365
+ - cliswa
366
+ - pinjay
367
+ - comloo
368
+ - baleag
369
+ - merlin
370
+ - yehbla
371
+ - calgul
372
+ - goleag
373
+ - nutwoo
374
+ - rusbla
375
+ - eursta
376
+ - ameavo
377
+ - lesnig
378
+ - palwar
379
+ - bkbmag1
380
+ - brebla
381
+ - sagthr
382
+ - bkbcuc
383
+ - wesgre
384
+ - redcro
385
+ - wiltur
386
+ - amebit
387
+ - sagspa1
388
+ - tunswa
389
+ - wooduc
390
+ - renpha
391
+ - whtswi
392
+ - bongul
393
+ - norhar2
394
+ - doccor
395
+ - lotduc
396
+ - chukar
397
+ - horgre
398
+ - nrwswa
399
+ - sheowl
400
+ - wesblu
401
+ - whfibi
402
+ - buwtea
403
+ - norpin
404
+ - eargre
405
+ - rebsap
406
+ - lewwoo
407
+ - rebmer
408
+ - wessan
409
+ - chiswi
410
+ - lecthr
411
+ - rthhum
412
+ - moublu
413
+ - amewig
414
+ - rinduc
415
+ - shshaw
416
+ - rufgro
417
+ - swahaw
418
+ - coshum
419
+ - truswa
420
+ - rudduc
421
+ - buffle
422
+ - hoomer
423
+ - gcrfin
424
+ - redhea
425
+ - <blank>
426
+ - <unk>
427
+ text_token_list: null
428
+ text_bpemodel: null
429
+ init: xavier_normal
430
+ input_size: 1
431
+ use_preprocessor: true
432
+ frontend: null
433
+ frontend_conf: {}
434
+ specaug: null
435
+ specaug_conf: {}
436
+ normalize: null
437
+ normalize_conf: {}
438
+ preencoder: null
439
+ preencoder_conf: {}
440
+ encoder: beats
441
+ encoder_conf:
442
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/model_checkpoints/ear_large/beats_iter0_large.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch59.pt
443
+ beats_config:
444
+ layer_wise_gradient_decay_ratio: 0.3
445
+ encoder_layerdrop: 0.1
446
+ dropout: 0.0
447
+ use_weighted_representation: false
448
+ specaug_config:
449
+ apply_time_warp: true
450
+ apply_freq_mask: false
451
+ apply_time_mask: true
452
+ time_mask_width_ratio_range:
453
+ - 0
454
+ - 0.06
455
+ num_time_mask: 1
456
+ roll_augment: true
457
+ roll_interval: 1
458
+ text_encoder: null
459
+ text_encoder_conf: {}
460
+ embedding_fusion: null
461
+ embedding_fusion_conf: {}
462
+ decoder: linear
463
+ decoder_conf: {}
464
+ model: espnet
465
+ model_conf:
466
+ classification_type: multi-class
467
+ lsm_weight: 0.1
468
+ required:
469
+ - output_dir
470
+ - token_list
471
+ version: '202412'
472
+ distributed: false
473
+ ```
474
+
475
+ </details>
476
+
477
+ ### Citations
478
+
479
+ ```BibTex
480
+
481
+ @article{bharadwaj2025openbeats,
482
+ title={OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder},
483
+ author={Bharadwaj, Shikhar and Cornell, Samuele and Choi, Kwanghee and Fukayama, Satoru and Shim, Hye-jin and Deshmukh, Soham and Watanabe, Shinji},
484
+ journal={arXiv preprint arXiv:2507.14129},
485
+ year={2025}
486
+ }
487
+
488
+ @inproceedings{watanabe2018espnet,
489
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
490
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
491
+ year={2018},
492
+ booktitle={Proceedings of Interspeech},
493
+ pages={2207--2211},
494
+ doi={10.21437/Interspeech.2018-1456},
495
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
496
+ }
497
+
498
+
499
+
500
+
501
+
502
+
503
+ ```
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202503'
2
+ files:
3
+ classification_model_file: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/217epoch.pth
4
+ python: "3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) \n[GCC 12.3.0]"
5
+ timestamp: 1763331456.582901
6
+ torch: 2.1.2
7
+ yaml_files:
8
+ classification_train_config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/config.yaml
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/beans_cbi/token_list ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scoori
2
+ bulori
3
+ bushti
4
+ blkpho
5
+ brthum
6
+ cacwre
7
+ pasfly
8
+ lesgol
9
+ logshr
10
+ macwar
11
+ pinsis
12
+ whbnut
13
+ hamfly
14
+ normoc
15
+ grtgra
16
+ houwre
17
+ comyel
18
+ grhowl
19
+ houfin
20
+ rocpig
21
+ annhum
22
+ astfly
23
+ magwar
24
+ wesmea
25
+ wewpew
26
+ spotow
27
+ amerob
28
+ daejun
29
+ easmea
30
+ greroa
31
+ mouchi
32
+ pilwoo
33
+ comrav
34
+ hoowar
35
+ savspa
36
+ warvir
37
+ easblu
38
+ gnttow
39
+ ovenbi1
40
+ rewbla
41
+ robgro
42
+ swathr
43
+ tuftit
44
+ westan
45
+ winwre3
46
+ btywar
47
+ carwre
48
+ herthr
49
+ bewwre
50
+ sora
51
+ brdowl
52
+ buggna
53
+ casvir
54
+ chispa
55
+ fiespa
56
+ aldfly
57
+ killde
58
+ moudov
59
+ rebwoo
60
+ bkpwar
61
+ dowwoo
62
+ greegr
63
+ banswa
64
+ orcwar
65
+ plsvir
66
+ y00475
67
+ blugrb1
68
+ gockin
69
+ greyel
70
+ larspa
71
+ osprey
72
+ sonspa
73
+ yebfly
74
+ blujay
75
+ brnthr
76
+ canwre
77
+ clanut
78
+ comred
79
+ eastow
80
+ haiwoo
81
+ lesyel
82
+ amepip
83
+ easpho
84
+ fiscro
85
+ sposan
86
+ wooscj2
87
+ bkhgro
88
+ labwoo
89
+ lazbun
90
+ marwre
91
+ stejay
92
+ weskin
93
+ bkbwar
94
+ buhvir
95
+ cangoo
96
+ canwar
97
+ dusfly
98
+ grcfly
99
+ norcar
100
+ wilsni1
101
+ yerwar
102
+ yetvir
103
+ eucdov
104
+ linspa
105
+ norpar
106
+ olsfly
107
+ rebnut
108
+ scatan
109
+ bnhcow
110
+ louwat
111
+ norfli
112
+ veery
113
+ woothr
114
+ btnwar
115
+ cedwax
116
+ chswar
117
+ comgra
118
+ indbun
119
+ leabit
120
+ leafly
121
+ pinwar
122
+ reevir1
123
+ solsan
124
+ bktspa
125
+ foxspa
126
+ houspa
127
+ snobun
128
+ vesspa
129
+ yelwar
130
+ brespa
131
+ comgol
132
+ coohaw
133
+ gnwtea
134
+ grbher3
135
+ hergul
136
+ mallar3
137
+ swaspa
138
+ brncre
139
+ btbwar
140
+ caster1
141
+ eawpew
142
+ rethaw
143
+ rocwre
144
+ ruckin
145
+ semsan
146
+ whtspa
147
+ wlswar
148
+ bkcchi
149
+ bkchum
150
+ amered
151
+ norwat
152
+ whcspa
153
+ grycat
154
+ balori
155
+ purfin
156
+ treswa
157
+ wilfly
158
+ comter
159
+ belspa2
160
+ juntit1
161
+ comnig
162
+ reshaw
163
+ snogoo
164
+ perfal
165
+ gadwal
166
+ grnher
167
+ horlar
168
+ lobdow
169
+ bawwar
170
+ amegfi
171
+ commer
172
+ ribgul
173
+ casfin
174
+ pibgre
175
+ evegro
176
+ pygnut
177
+ brwhaw
178
+ gryfly
179
+ leasan
180
+ barswa
181
+ phaino
182
+ amecro
183
+ calqua
184
+ amewoo
185
+ pingro
186
+ saypho
187
+ semplo
188
+ buwwar
189
+ boboli
190
+ amekes
191
+ cowscj1
192
+ amtspa
193
+ lobcur
194
+ belkin1
195
+ pecsan
196
+ prawar
197
+ vigswa
198
+ camwar
199
+ easkin
200
+ yebsap
201
+ norsho
202
+ gocspa
203
+ rufhum
204
+ baisan
205
+ cliswa
206
+ pinjay
207
+ comloo
208
+ baleag
209
+ merlin
210
+ yehbla
211
+ calgul
212
+ goleag
213
+ nutwoo
214
+ rusbla
215
+ eursta
216
+ ameavo
217
+ lesnig
218
+ palwar
219
+ bkbmag1
220
+ brebla
221
+ sagthr
222
+ bkbcuc
223
+ wesgre
224
+ redcro
225
+ wiltur
226
+ amebit
227
+ sagspa1
228
+ tunswa
229
+ wooduc
230
+ renpha
231
+ whtswi
232
+ bongul
233
+ norhar2
234
+ doccor
235
+ lotduc
236
+ chukar
237
+ horgre
238
+ nrwswa
239
+ sheowl
240
+ wesblu
241
+ whfibi
242
+ buwtea
243
+ norpin
244
+ eargre
245
+ rebsap
246
+ lewwoo
247
+ rebmer
248
+ wessan
249
+ chiswi
250
+ lecthr
251
+ rthhum
252
+ moublu
253
+ amewig
254
+ rinduc
255
+ shshaw
256
+ rufgro
257
+ swahaw
258
+ coshum
259
+ truswa
260
+ rudduc
261
+ buffle
262
+ hoomer
263
+ gcrfin
264
+ redhea
265
+ <blank>
266
+ <unk>
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/217epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ef2f712ad8a46672861aa87e32d2711e393f091903e0089bfc978ea90aa9a9
3
+ size 1246778335
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/RESULTS.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_cls_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Mar 12 21:01:21 CDT 2025`
5
+ - python version: `3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) [GCC 12.3.0]`
6
+ - espnet version: `espnet 202412`
7
+ - pytorch version: `pytorch 2.6.0.dev20241210+cu124`
8
+ - Git hash: `6a4d1394930044a7d083ffe4647dc0e709726ba2`
9
+ - Commit date: `Wed Mar 12 13:24:03 2025 -0500`
10
+
11
+ ## cls_earlarge1
12
+ |Split|mean_acc|mAP|mean_auc|n_labels|n_instances|
13
+ |---|---|---|---|---|---|
14
+ cls_cbi.dev|70.60|70.31|94.26|264.00|3548.00
15
+ cls_cbi.test|67.79|71.00|96.07|264.00|3620.00
16
+
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/config.yaml ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge1/conf/ear/beans_cbi.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 2
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 250
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - acc
46
+ - max
47
+ keep_nbest_models: 1
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 1
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 1
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: false
62
+ wandb_project: null
63
+ wandb_id: null
64
+ wandb_entity: null
65
+ wandb_name: null
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: null
77
+ batch_size: 32
78
+ valid_batch_size: 32
79
+ batch_bins: 1000000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ train_shape_file:
83
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/speech_shape
84
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/train/label_shape
85
+ valid_shape_file:
86
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/speech_shape
87
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_stats_16k/valid/label_shape
88
+ batch_type: folded
89
+ valid_batch_type: null
90
+ fold_length:
91
+ - 160000
92
+ - 5
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ chunk_length: 500
98
+ chunk_shift_ratio: 0.5
99
+ num_cache_chunks: 1024
100
+ chunk_excluded_key_prefixes: []
101
+ chunk_default_fs: null
102
+ chunk_max_abs_length: null
103
+ chunk_discard_short_samples: true
104
+ train_data_path_and_name_and_type:
105
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/wav.scp
106
+ - speech
107
+ - sound
108
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.train/text
109
+ - label
110
+ - text
111
+ valid_data_path_and_name_and_type:
112
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_cbi/cbi.dev/text
116
+ - label
117
+ - text
118
+ multi_task_dataset: false
119
+ allow_variable_data_keys: false
120
+ max_cache_size: 0.0
121
+ max_cache_fd: 32
122
+ allow_multi_rates: false
123
+ valid_max_cache_size: null
124
+ exclude_weight_decay: false
125
+ exclude_weight_decay_conf: {}
126
+ optim: adamw
127
+ optim_conf:
128
+ lr: 3.0e-05
129
+ weight_decay: 0.01
130
+ betas:
131
+ - 0.9
132
+ - 0.98
133
+ scheduler: cosineannealingwarmuprestarts
134
+ scheduler_conf:
135
+ first_cycle_steps: 95000
136
+ warmup_steps: 8000
137
+ max_lr: 3.0e-05
138
+ min_lr: 5.0e-06
139
+ token_list:
140
+ - scoori
141
+ - bulori
142
+ - bushti
143
+ - blkpho
144
+ - brthum
145
+ - cacwre
146
+ - pasfly
147
+ - lesgol
148
+ - logshr
149
+ - macwar
150
+ - pinsis
151
+ - whbnut
152
+ - hamfly
153
+ - normoc
154
+ - grtgra
155
+ - houwre
156
+ - comyel
157
+ - grhowl
158
+ - houfin
159
+ - rocpig
160
+ - annhum
161
+ - astfly
162
+ - magwar
163
+ - wesmea
164
+ - wewpew
165
+ - spotow
166
+ - amerob
167
+ - daejun
168
+ - easmea
169
+ - greroa
170
+ - mouchi
171
+ - pilwoo
172
+ - comrav
173
+ - hoowar
174
+ - savspa
175
+ - warvir
176
+ - easblu
177
+ - gnttow
178
+ - ovenbi1
179
+ - rewbla
180
+ - robgro
181
+ - swathr
182
+ - tuftit
183
+ - westan
184
+ - winwre3
185
+ - btywar
186
+ - carwre
187
+ - herthr
188
+ - bewwre
189
+ - sora
190
+ - brdowl
191
+ - buggna
192
+ - casvir
193
+ - chispa
194
+ - fiespa
195
+ - aldfly
196
+ - killde
197
+ - moudov
198
+ - rebwoo
199
+ - bkpwar
200
+ - dowwoo
201
+ - greegr
202
+ - banswa
203
+ - orcwar
204
+ - plsvir
205
+ - y00475
206
+ - blugrb1
207
+ - gockin
208
+ - greyel
209
+ - larspa
210
+ - osprey
211
+ - sonspa
212
+ - yebfly
213
+ - blujay
214
+ - brnthr
215
+ - canwre
216
+ - clanut
217
+ - comred
218
+ - eastow
219
+ - haiwoo
220
+ - lesyel
221
+ - amepip
222
+ - easpho
223
+ - fiscro
224
+ - sposan
225
+ - wooscj2
226
+ - bkhgro
227
+ - labwoo
228
+ - lazbun
229
+ - marwre
230
+ - stejay
231
+ - weskin
232
+ - bkbwar
233
+ - buhvir
234
+ - cangoo
235
+ - canwar
236
+ - dusfly
237
+ - grcfly
238
+ - norcar
239
+ - wilsni1
240
+ - yerwar
241
+ - yetvir
242
+ - eucdov
243
+ - linspa
244
+ - norpar
245
+ - olsfly
246
+ - rebnut
247
+ - scatan
248
+ - bnhcow
249
+ - louwat
250
+ - norfli
251
+ - veery
252
+ - woothr
253
+ - btnwar
254
+ - cedwax
255
+ - chswar
256
+ - comgra
257
+ - indbun
258
+ - leabit
259
+ - leafly
260
+ - pinwar
261
+ - reevir1
262
+ - solsan
263
+ - bktspa
264
+ - foxspa
265
+ - houspa
266
+ - snobun
267
+ - vesspa
268
+ - yelwar
269
+ - brespa
270
+ - comgol
271
+ - coohaw
272
+ - gnwtea
273
+ - grbher3
274
+ - hergul
275
+ - mallar3
276
+ - swaspa
277
+ - brncre
278
+ - btbwar
279
+ - caster1
280
+ - eawpew
281
+ - rethaw
282
+ - rocwre
283
+ - ruckin
284
+ - semsan
285
+ - whtspa
286
+ - wlswar
287
+ - bkcchi
288
+ - bkchum
289
+ - amered
290
+ - norwat
291
+ - whcspa
292
+ - grycat
293
+ - balori
294
+ - purfin
295
+ - treswa
296
+ - wilfly
297
+ - comter
298
+ - belspa2
299
+ - juntit1
300
+ - comnig
301
+ - reshaw
302
+ - snogoo
303
+ - perfal
304
+ - gadwal
305
+ - grnher
306
+ - horlar
307
+ - lobdow
308
+ - bawwar
309
+ - amegfi
310
+ - commer
311
+ - ribgul
312
+ - casfin
313
+ - pibgre
314
+ - evegro
315
+ - pygnut
316
+ - brwhaw
317
+ - gryfly
318
+ - leasan
319
+ - barswa
320
+ - phaino
321
+ - amecro
322
+ - calqua
323
+ - amewoo
324
+ - pingro
325
+ - saypho
326
+ - semplo
327
+ - buwwar
328
+ - boboli
329
+ - amekes
330
+ - cowscj1
331
+ - amtspa
332
+ - lobcur
333
+ - belkin1
334
+ - pecsan
335
+ - prawar
336
+ - vigswa
337
+ - camwar
338
+ - easkin
339
+ - yebsap
340
+ - norsho
341
+ - gocspa
342
+ - rufhum
343
+ - baisan
344
+ - cliswa
345
+ - pinjay
346
+ - comloo
347
+ - baleag
348
+ - merlin
349
+ - yehbla
350
+ - calgul
351
+ - goleag
352
+ - nutwoo
353
+ - rusbla
354
+ - eursta
355
+ - ameavo
356
+ - lesnig
357
+ - palwar
358
+ - bkbmag1
359
+ - brebla
360
+ - sagthr
361
+ - bkbcuc
362
+ - wesgre
363
+ - redcro
364
+ - wiltur
365
+ - amebit
366
+ - sagspa1
367
+ - tunswa
368
+ - wooduc
369
+ - renpha
370
+ - whtswi
371
+ - bongul
372
+ - norhar2
373
+ - doccor
374
+ - lotduc
375
+ - chukar
376
+ - horgre
377
+ - nrwswa
378
+ - sheowl
379
+ - wesblu
380
+ - whfibi
381
+ - buwtea
382
+ - norpin
383
+ - eargre
384
+ - rebsap
385
+ - lewwoo
386
+ - rebmer
387
+ - wessan
388
+ - chiswi
389
+ - lecthr
390
+ - rthhum
391
+ - moublu
392
+ - amewig
393
+ - rinduc
394
+ - shshaw
395
+ - rufgro
396
+ - swahaw
397
+ - coshum
398
+ - truswa
399
+ - rudduc
400
+ - buffle
401
+ - hoomer
402
+ - gcrfin
403
+ - redhea
404
+ - <blank>
405
+ - <unk>
406
+ text_token_list: null
407
+ text_bpemodel: null
408
+ init: xavier_normal
409
+ input_size: 1
410
+ use_preprocessor: true
411
+ frontend: null
412
+ frontend_conf: {}
413
+ specaug: null
414
+ specaug_conf: {}
415
+ normalize: null
416
+ normalize_conf: {}
417
+ preencoder: null
418
+ preencoder_conf: {}
419
+ encoder: beats
420
+ encoder_conf:
421
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/model_checkpoints/ear_large/beats_iter0_large.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch59.pt
422
+ beats_config:
423
+ layer_wise_gradient_decay_ratio: 0.3
424
+ encoder_layerdrop: 0.1
425
+ dropout: 0.0
426
+ use_weighted_representation: false
427
+ specaug_config:
428
+ apply_time_warp: true
429
+ apply_freq_mask: false
430
+ apply_time_mask: true
431
+ time_mask_width_ratio_range:
432
+ - 0
433
+ - 0.06
434
+ num_time_mask: 1
435
+ roll_augment: true
436
+ roll_interval: 1
437
+ text_encoder: null
438
+ text_encoder_conf: {}
439
+ embedding_fusion: null
440
+ embedding_fusion_conf: {}
441
+ decoder: linear
442
+ decoder_conf: {}
443
+ model: espnet
444
+ model_conf:
445
+ classification_type: multi-class
446
+ lsm_weight: 0.1
447
+ required:
448
+ - output_dir
449
+ - token_list
450
+ version: '202412'
451
+ distributed: false
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/acc.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/backward_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/clip.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/forward_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/gpu_max_cached_mem_GB.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/grad_norm.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/iter_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/loss.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/loss_scale.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/macro_precision.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/optim0_lr0.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/optim_step_time.png ADDED
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_cbi/cls_earlarge1/images/train_time.png ADDED