Shikhar Bharadwaj commited on
Commit
d66a16d
·
1 Parent(s): 9e37880

Update model

Browse files
README.md ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - classification
6
+ datasets:
7
+ - beans
8
+ license: cc-by-4.0
9
+ ---
10
+
11
+ ## ESPnet2 CLS model
12
+
13
+ ### `espnet/OpenBEATS-Large-i2-enabirds`
14
+
15
+ This model was trained by Shikhar Bharadwaj using beans recipe in [espnet](https://github.com/espnet/espnet/).
16
+
17
+ ## CLS config
18
+
19
+ <details><summary>expand</summary>
20
+
21
+ ```
22
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge2/conf/ear_large/beans_enabirds.yaml
23
+ print_config: false
24
+ log_level: INFO
25
+ drop_last_iter: false
26
+ dry_run: false
27
+ iterator_type: sequence
28
+ valid_iterator_type: null
29
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2
30
+ ngpu: 0
31
+ seed: 0
32
+ num_workers: 2
33
+ num_att_plot: 0
34
+ dist_backend: nccl
35
+ dist_init_method: env://
36
+ dist_world_size: null
37
+ dist_rank: null
38
+ local_rank: null
39
+ dist_master_addr: null
40
+ dist_master_port: null
41
+ dist_launcher: null
42
+ multiprocessing_distributed: false
43
+ unused_parameters: true
44
+ sharded_ddp: false
45
+ use_deepspeed: false
46
+ deepspeed_config: null
47
+ gradient_as_bucket_view: true
48
+ ddp_comm_hook: null
49
+ cudnn_enabled: true
50
+ cudnn_benchmark: false
51
+ cudnn_deterministic: true
52
+ use_tf32: false
53
+ collect_stats: false
54
+ write_collected_feats: false
55
+ max_epoch: 250
56
+ patience: null
57
+ val_scheduler_criterion:
58
+ - valid
59
+ - loss
60
+ early_stopping_criterion:
61
+ - valid
62
+ - loss
63
+ - min
64
+ best_model_criterion:
65
+ - - valid
66
+ - epoch_mAP
67
+ - max
68
+ keep_nbest_models: 1
69
+ nbest_averaging_interval: 0
70
+ grad_clip: 1
71
+ grad_clip_type: 2.0
72
+ grad_noise: false
73
+ accum_grad: 1
74
+ no_forward_run: false
75
+ resume: true
76
+ train_dtype: float32
77
+ use_amp: false
78
+ log_interval: null
79
+ use_matplotlib: true
80
+ use_tensorboard: true
81
+ create_graph_in_tensorboard: false
82
+ use_wandb: true
83
+ wandb_project: audioverse
84
+ wandb_id: null
85
+ wandb_entity: shikhar
86
+ wandb_name: beans_enabirds.earlarge2
87
+ wandb_model_log_interval: -1
88
+ detect_anomaly: false
89
+ use_adapter: false
90
+ adapter: lora
91
+ save_strategy: all
92
+ adapter_conf: {}
93
+ pretrain_path: null
94
+ init_param: []
95
+ ignore_init_mismatch: false
96
+ freeze_param: []
97
+ num_iters_per_epoch: null
98
+ batch_size: 32
99
+ valid_batch_size: 32
100
+ batch_bins: 1000000
101
+ valid_batch_bins: null
102
+ category_sample_size: 10
103
+ train_shape_file:
104
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/train/speech_shape
105
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/train/label_shape
106
+ valid_shape_file:
107
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/valid/speech_shape
108
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/valid/label_shape
109
+ batch_type: folded
110
+ valid_batch_type: null
111
+ fold_length:
112
+ - 160000
113
+ - 35
114
+ sort_in_batch: descending
115
+ shuffle_within_batch: false
116
+ sort_batch: descending
117
+ multiple_iterator: false
118
+ utt2weight_file: null
119
+ chunk_length: 500
120
+ chunk_shift_ratio: 0.5
121
+ num_cache_chunks: 1024
122
+ chunk_excluded_key_prefixes: []
123
+ chunk_default_fs: null
124
+ chunk_max_abs_length: null
125
+ chunk_discard_short_samples: true
126
+ train_data_path_and_name_and_type:
127
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.train/wav.scp
128
+ - speech
129
+ - sound
130
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.train/text
131
+ - label
132
+ - text
133
+ valid_data_path_and_name_and_type:
134
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.dev/wav.scp
135
+ - speech
136
+ - sound
137
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.dev/text
138
+ - label
139
+ - text
140
+ multi_task_dataset: false
141
+ allow_variable_data_keys: false
142
+ max_cache_size: 0.0
143
+ max_cache_fd: 32
144
+ allow_multi_rates: false
145
+ valid_max_cache_size: null
146
+ exclude_weight_decay: false
147
+ exclude_weight_decay_conf: {}
148
+ optim: adamw
149
+ optim_conf:
150
+ lr: 3.0e-05
151
+ weight_decay: 0.01
152
+ betas:
153
+ - 0.9
154
+ - 0.98
155
+ scheduler: cosineannealingwarmuprestarts
156
+ scheduler_conf:
157
+ first_cycle_steps: 95000
158
+ warmup_steps: 8000
159
+ max_lr: 3.0e-05
160
+ min_lr: 5.0e-06
161
+ lightning_conf:
162
+ log_every_n_steps: 250
163
+ max_epochs: 250
164
+ strategy: ddp
165
+ strategy_conf:
166
+ find_unused_parameters: true
167
+ best_model_criterion:
168
+ - - valid/epoch_mAP
169
+ - max
170
+ - 1
171
+ devices: 1
172
+ num_nodes: 1
173
+ default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2
174
+ token_list:
175
+ - eato
176
+ - amcr
177
+ - woth
178
+ - noca
179
+ - btnw
180
+ - bcch
181
+ - tuti
182
+ - oven
183
+ - revi
184
+ - coye
185
+ - blja
186
+ - scta
187
+ - amre
188
+ - bggn
189
+ - kewa
190
+ - bhco
191
+ - baww
192
+ - heth
193
+ - bhvi
194
+ - rbwo
195
+ - nofl
196
+ - howa
197
+ - ybcu
198
+ - bwwa
199
+ - carw
200
+ - amgo
201
+ - rbgr
202
+ - rcki
203
+ - amro
204
+ - lowa
205
+ - swth
206
+ - wbnu
207
+ - witu
208
+ - dowo
209
+ - cang
210
+ - hawo
211
+ - cswa
212
+ - rwbl
213
+ - rsha
214
+ - bbwa
215
+ - baor
216
+ - piwo
217
+ - brcr
218
+ - cora
219
+ - nawa
220
+ - <unk>
221
+ - <blank>
222
+ text_token_list: null
223
+ text_bpemodel: null
224
+ init: xavier_normal
225
+ input_size: 1
226
+ use_preprocessor: true
227
+ frontend: null
228
+ frontend_conf: {}
229
+ specaug: null
230
+ specaug_conf: {}
231
+ normalize: null
232
+ normalize_conf: {}
233
+ preencoder: null
234
+ preencoder_conf: {}
235
+ encoder: beats
236
+ encoder_conf:
237
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter1_large1.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
238
+ beats_config:
239
+ layer_wise_gradient_decay_ratio: 0.3
240
+ encoder_layerdrop: 0.1
241
+ dropout: 0.0
242
+ use_weighted_representation: false
243
+ specaug_config:
244
+ apply_time_warp: true
245
+ apply_freq_mask: false
246
+ apply_time_mask: true
247
+ time_mask_width_ratio_range:
248
+ - 0
249
+ - 0.06
250
+ num_time_mask: 1
251
+ roll_augment: false
252
+ text_encoder: null
253
+ text_encoder_conf: {}
254
+ embedding_fusion: null
255
+ embedding_fusion_conf: {}
256
+ decoder: linear
257
+ decoder_conf: {}
258
+ model: espnet
259
+ model_conf:
260
+ classification_type: multi-label
261
+ log_epoch_metrics: true
262
+ user_callbacks:
263
+ - mAP_logging
264
+ required:
265
+ - output_dir
266
+ - token_list
267
+ task: cls
268
+ ```
269
+
270
+ </details>
271
+
272
+ ### Citations
273
+
274
+ ```BibTex
275
+
276
+ @article{bharadwaj2025openbeats,
277
+ title={OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder},
278
+ author={Bharadwaj, Shikhar and Cornell, Samuele and Choi, Kwanghee and Fukayama, Satoru and Shim, Hye-jin and Deshmukh, Soham and Watanabe, Shinji},
279
+ journal={arXiv preprint arXiv:2507.14129},
280
+ year={2025}
281
+ }
282
+
283
+ @inproceedings{watanabe2018espnet,
284
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
285
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
286
+ year={2018},
287
+ booktitle={Proceedings of Interspeech},
288
+ pages={2207--2211},
289
+ doi={10.21437/Interspeech.2018-1456},
290
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
291
+ }
292
+
293
+
294
+
295
+
296
+
297
+
298
+ ```
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202503'
2
+ files:
3
+ classification_model_file: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/valid.epoch_mAP.ave_1best.pth
4
+ python: "3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) \n[GCC 12.3.0]"
5
+ timestamp: 1763331857.549773
6
+ torch: 2.1.2
7
+ yaml_files:
8
+ classification_train_config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/config.yaml
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/beans_enabirds/token_list ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eato
2
+ amcr
3
+ woth
4
+ noca
5
+ btnw
6
+ bcch
7
+ tuti
8
+ oven
9
+ revi
10
+ coye
11
+ blja
12
+ scta
13
+ amre
14
+ bggn
15
+ kewa
16
+ bhco
17
+ baww
18
+ heth
19
+ bhvi
20
+ rbwo
21
+ nofl
22
+ howa
23
+ ybcu
24
+ bwwa
25
+ carw
26
+ amgo
27
+ rbgr
28
+ rcki
29
+ amro
30
+ lowa
31
+ swth
32
+ wbnu
33
+ witu
34
+ dowo
35
+ cang
36
+ hawo
37
+ cswa
38
+ rwbl
39
+ rsha
40
+ bbwa
41
+ baor
42
+ piwo
43
+ brcr
44
+ cora
45
+ nawa
46
+ <unk>
47
+ <blank>
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/RESULTS.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_cls_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Sat Apr 5 23:44:33 CDT 2025`
5
+ - python version: `3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 17:20:25) [GCC 12.3.0]`
6
+ - espnet version: `espnet 202412`
7
+ - pytorch version: `pytorch 2.6.0.dev20241210+cu124`
8
+ - Git hash: `c96433a43c5c3984889b81804becac6ebf10f7a7`
9
+ - Commit date: `Mon Mar 31 20:24:06 2025 -0500`
10
+
11
+ ## cls_earlarge2
12
+ |Split|mean_acc|mAP|mean_auc|n_labels|n_instances|
13
+ |---|---|---|---|---|---|
14
+ cls_enabirds.dev|65.66|53.50|72.73|45.00|4543.00
15
+ cls_enabirds.test|64.43|53.43|76.68|45.00|4543.00
16
+
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/config.yaml ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge2/conf/ear_large/beans_enabirds.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2
9
+ ngpu: 0
10
+ seed: 0
11
+ num_workers: 2
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: null
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 250
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - epoch_mAP
46
+ - max
47
+ keep_nbest_models: 1
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 1
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 1
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: true
62
+ wandb_project: audioverse
63
+ wandb_id: null
64
+ wandb_entity: shikhar
65
+ wandb_name: beans_enabirds.earlarge2
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: null
77
+ batch_size: 32
78
+ valid_batch_size: 32
79
+ batch_bins: 1000000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ train_shape_file:
83
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/train/speech_shape
84
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/train/label_shape
85
+ valid_shape_file:
86
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/valid/speech_shape
87
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/valid/label_shape
88
+ batch_type: folded
89
+ valid_batch_type: null
90
+ fold_length:
91
+ - 160000
92
+ - 35
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ utt2weight_file: null
98
+ chunk_length: 500
99
+ chunk_shift_ratio: 0.5
100
+ num_cache_chunks: 1024
101
+ chunk_excluded_key_prefixes: []
102
+ chunk_default_fs: null
103
+ chunk_max_abs_length: null
104
+ chunk_discard_short_samples: true
105
+ train_data_path_and_name_and_type:
106
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.train/wav.scp
107
+ - speech
108
+ - sound
109
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.train/text
110
+ - label
111
+ - text
112
+ valid_data_path_and_name_and_type:
113
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.dev/wav.scp
114
+ - speech
115
+ - sound
116
+ - - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.dev/text
117
+ - label
118
+ - text
119
+ multi_task_dataset: false
120
+ allow_variable_data_keys: false
121
+ max_cache_size: 0.0
122
+ max_cache_fd: 32
123
+ allow_multi_rates: false
124
+ valid_max_cache_size: null
125
+ exclude_weight_decay: false
126
+ exclude_weight_decay_conf: {}
127
+ optim: adamw
128
+ optim_conf:
129
+ lr: 3.0e-05
130
+ weight_decay: 0.01
131
+ betas:
132
+ - 0.9
133
+ - 0.98
134
+ scheduler: cosineannealingwarmuprestarts
135
+ scheduler_conf:
136
+ first_cycle_steps: 95000
137
+ warmup_steps: 8000
138
+ max_lr: 3.0e-05
139
+ min_lr: 5.0e-06
140
+ lightning_conf:
141
+ log_every_n_steps: 250
142
+ max_epochs: 250
143
+ strategy: ddp
144
+ strategy_conf:
145
+ find_unused_parameters: true
146
+ best_model_criterion:
147
+ - - valid/epoch_mAP
148
+ - max
149
+ - 1
150
+ devices: 1
151
+ num_nodes: 1
152
+ default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2
153
+ token_list:
154
+ - eato
155
+ - amcr
156
+ - woth
157
+ - noca
158
+ - btnw
159
+ - bcch
160
+ - tuti
161
+ - oven
162
+ - revi
163
+ - coye
164
+ - blja
165
+ - scta
166
+ - amre
167
+ - bggn
168
+ - kewa
169
+ - bhco
170
+ - baww
171
+ - heth
172
+ - bhvi
173
+ - rbwo
174
+ - nofl
175
+ - howa
176
+ - ybcu
177
+ - bwwa
178
+ - carw
179
+ - amgo
180
+ - rbgr
181
+ - rcki
182
+ - amro
183
+ - lowa
184
+ - swth
185
+ - wbnu
186
+ - witu
187
+ - dowo
188
+ - cang
189
+ - hawo
190
+ - cswa
191
+ - rwbl
192
+ - rsha
193
+ - bbwa
194
+ - baor
195
+ - piwo
196
+ - brcr
197
+ - cora
198
+ - nawa
199
+ - <unk>
200
+ - <blank>
201
+ text_token_list: null
202
+ text_bpemodel: null
203
+ init: xavier_normal
204
+ input_size: 1
205
+ use_preprocessor: true
206
+ frontend: null
207
+ frontend_conf: {}
208
+ specaug: null
209
+ specaug_conf: {}
210
+ normalize: null
211
+ normalize_conf: {}
212
+ preencoder: null
213
+ preencoder_conf: {}
214
+ encoder: beats
215
+ encoder_conf:
216
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter1_large1.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
217
+ beats_config:
218
+ layer_wise_gradient_decay_ratio: 0.3
219
+ encoder_layerdrop: 0.1
220
+ dropout: 0.0
221
+ use_weighted_representation: false
222
+ specaug_config:
223
+ apply_time_warp: true
224
+ apply_freq_mask: false
225
+ apply_time_mask: true
226
+ time_mask_width_ratio_range:
227
+ - 0
228
+ - 0.06
229
+ num_time_mask: 1
230
+ roll_augment: false
231
+ text_encoder: null
232
+ text_encoder_conf: {}
233
+ embedding_fusion: null
234
+ embedding_fusion_conf: {}
235
+ decoder: linear
236
+ decoder_conf: {}
237
+ model: espnet
238
+ model_conf:
239
+ classification_type: multi-label
240
+ log_epoch_metrics: true
241
+ user_callbacks:
242
+ - mAP_logging
243
+ required:
244
+ - output_dir
245
+ - token_list
246
+ task: cls
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/lightning_logs/version_0/events.out.tfevents.1743859446.gh109.hsn.cm.delta.internal.ncsa.edu.1472306.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdccaa7057b27df08fdf3978e5533912d6063d724161d70048b6008c7a1c3a07
3
+ size 163365
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/lightning_logs/version_0/hparams.yaml ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ args: !!python/object:argparse.Namespace
2
+ accum_grad: 1
3
+ adapter: lora
4
+ adapter_conf: {}
5
+ allow_multi_rates: false
6
+ allow_variable_data_keys: false
7
+ batch_bins: 1000000
8
+ batch_size: 32
9
+ batch_type: folded
10
+ best_model_criterion:
11
+ - - valid
12
+ - epoch_mAP
13
+ - max
14
+ category_sample_size: 10
15
+ chunk_default_fs: null
16
+ chunk_discard_short_samples: true
17
+ chunk_excluded_key_prefixes: []
18
+ chunk_length: 500
19
+ chunk_max_abs_length: null
20
+ chunk_shift_ratio: 0.5
21
+ collect_stats: false
22
+ config: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/earlarge2/conf/ear_large/beans_enabirds.yaml
23
+ create_graph_in_tensorboard: false
24
+ cudnn_benchmark: false
25
+ cudnn_deterministic: true
26
+ cudnn_enabled: true
27
+ ddp_comm_hook: null
28
+ decoder: linear
29
+ decoder_conf: {}
30
+ deepspeed_config: null
31
+ detect_anomaly: false
32
+ dist_backend: nccl
33
+ dist_init_method: env://
34
+ dist_launcher: null
35
+ dist_master_addr: null
36
+ dist_master_port: null
37
+ dist_rank: null
38
+ dist_world_size: null
39
+ drop_last_iter: false
40
+ dry_run: false
41
+ early_stopping_criterion: !!python/tuple
42
+ - valid
43
+ - loss
44
+ - min
45
+ embedding_fusion: null
46
+ embedding_fusion_conf: {}
47
+ encoder: beats
48
+ encoder_conf:
49
+ beats_ckpt_path: /work/nvme/bbjs/sbharadwaj/7Msounds/exp/beats_iter1_large1.tune_lr1.0e-4_warmup40000_bins1600000_totalsteps400000/epoch_latest.pt
50
+ beats_config:
51
+ dropout: 0.0
52
+ encoder_layerdrop: 0.1
53
+ layer_wise_gradient_decay_ratio: 0.3
54
+ roll_augment: false
55
+ specaug_config:
56
+ apply_freq_mask: false
57
+ apply_time_mask: true
58
+ apply_time_warp: true
59
+ num_time_mask: 1
60
+ time_mask_width_ratio_range:
61
+ - 0
62
+ - 0.06
63
+ use_weighted_representation: false
64
+ exclude_weight_decay: false
65
+ exclude_weight_decay_conf: {}
66
+ fold_length:
67
+ - 160000
68
+ - 35
69
+ freeze_param: []
70
+ frontend: null
71
+ frontend_conf:
72
+ fs: 16k
73
+ grad_clip: 1
74
+ grad_clip_type: 2.0
75
+ grad_noise: false
76
+ gradient_as_bucket_view: true
77
+ ignore_init_mismatch: false
78
+ init: xavier_normal
79
+ init_param: []
80
+ input_size: 1
81
+ iterator_type: sequence
82
+ keep_nbest_models: 1
83
+ lightning_conf:
84
+ best_model_criterion:
85
+ - - valid/epoch_mAP
86
+ - max
87
+ - 1
88
+ default_root_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2
89
+ devices: 1
90
+ log_every_n_steps: 250
91
+ max_epochs: 250
92
+ num_nodes: 1
93
+ strategy: ddp
94
+ strategy_conf:
95
+ find_unused_parameters: true
96
+ local_rank: null
97
+ log_interval: null
98
+ log_level: INFO
99
+ max_cache_fd: 32
100
+ max_cache_size: 0.0
101
+ max_epoch: 250
102
+ model: espnet
103
+ model_conf:
104
+ classification_type: multi-label
105
+ log_epoch_metrics: true
106
+ multi_task_dataset: false
107
+ multiple_iterator: false
108
+ multiprocessing_distributed: false
109
+ nbest_averaging_interval: 0
110
+ ngpu: 0
111
+ no_forward_run: false
112
+ normalize: null
113
+ normalize_conf: {}
114
+ num_att_plot: 0
115
+ num_cache_chunks: 1024
116
+ num_iters_per_epoch: null
117
+ num_workers: 2
118
+ optim: adamw
119
+ optim_conf:
120
+ betas:
121
+ - 0.9
122
+ - 0.98
123
+ lr: 3.0e-05
124
+ weight_decay: 0.01
125
+ output_dir: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2
126
+ patience: null
127
+ preencoder: null
128
+ preencoder_conf: {}
129
+ pretrain_path: null
130
+ print_config: false
131
+ required:
132
+ - output_dir
133
+ - token_list
134
+ resume: true
135
+ save_strategy: all
136
+ scheduler: cosineannealingwarmuprestarts
137
+ scheduler_conf:
138
+ first_cycle_steps: 95000
139
+ max_lr: 3.0e-05
140
+ min_lr: 5.0e-06
141
+ warmup_steps: 8000
142
+ seed: 0
143
+ sharded_ddp: false
144
+ shuffle_within_batch: false
145
+ sort_batch: descending
146
+ sort_in_batch: descending
147
+ specaug: null
148
+ specaug_conf: {}
149
+ task: cls
150
+ text_bpemodel: null
151
+ text_encoder: null
152
+ text_encoder_conf: {}
153
+ text_token_list: null
154
+ token_list: /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/data/beans_enabirds/token_list
155
+ train_data_path_and_name_and_type:
156
+ - !!python/tuple
157
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.train/wav.scp
158
+ - speech
159
+ - sound
160
+ - !!python/tuple
161
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.train/text
162
+ - label
163
+ - text
164
+ train_dtype: float32
165
+ train_shape_file:
166
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/train/speech_shape
167
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/train/label_shape
168
+ unused_parameters: true
169
+ use_adapter: false
170
+ use_amp: false
171
+ use_deepspeed: false
172
+ use_matplotlib: true
173
+ use_preprocessor: true
174
+ use_tensorboard: true
175
+ use_tf32: false
176
+ use_wandb: true
177
+ user_callbacks:
178
+ - mAP_logging
179
+ utt2weight_file: null
180
+ val_scheduler_criterion: !!python/tuple
181
+ - valid
182
+ - loss
183
+ valid_batch_bins: null
184
+ valid_batch_size: 32
185
+ valid_batch_type: null
186
+ valid_data_path_and_name_and_type:
187
+ - !!python/tuple
188
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.dev/wav.scp
189
+ - speech
190
+ - sound
191
+ - !!python/tuple
192
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/dump/beans_enabirds/enabirds.dev/text
193
+ - label
194
+ - text
195
+ valid_iterator_type: null
196
+ valid_max_cache_size: null
197
+ valid_shape_file:
198
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/valid/speech_shape
199
+ - /work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_stats_16k/valid/label_shape
200
+ wandb_entity: shikhar
201
+ wandb_id: null
202
+ wandb_model_log_interval: -1
203
+ wandb_name: beans_enabirds.earlarge2
204
+ wandb_project: audioverse
205
+ write_collected_feats: false
work/nvme/bbjs/sbharadwaj/espnet/egs2/audioverse/v1/exp/beans_enabirds/cls_earlarge2/valid.epoch_mAP.ave_1best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f392872b8b5239c91654bde3fe73e9c56543327a381ab5acd9216961f09f5bb
3
+ size 1246332570