MIT-SLS
/

USAD-Large

Feature Extraction

automatic-speech-recognition

audio-classification

Model card Files Files and versions

USAD-Large / config.json

vectominist's picture

upload model and code

aab2435 11 months ago

history blame contribute delete

991 Bytes

	{
	"architectures": [
	"USADModel"
	],
	"attention_dropout_p": 0.1,
	"attention_type": "mhsa",
	"auto_map": {
	"AutoConfig": "configuration_usad.USADConfig",
	"AutoModel": "modeling_usad.USADModel"
	},
	"conv_dropout_p": 0.1,
	"conv_expansion_factor": 2,
	"conv_kernel_size": 31,
	"conv_pos": true,
	"conv_pos_depth": 5,
	"conv_pos_groups": 16,
	"conv_pos_width": 95,
	"conv_subsample_channels": 64,
	"conv_subsample_rate": 2,
	"encoder_dim": 1024,
	"feed_forward_dropout_p": 0.1,
	"feed_forward_expansion_factor": 4,
	"half_step_residual": true,
	"input_dim": 128,
	"input_dropout_p": 0.0,
	"mamba_bidirectional": false,
	"mamba_d_conv": 4,
	"mamba_d_state": 16,
	"mamba_expand": 2,
	"model_type": "usad",
	"num_attention_heads": 16,
	"num_layers": 24,
	"subsample_normalization": true,
	"torch_dtype": "float32",
	"transformer_style": true,
	"transformers_version": "4.52.4",
	"use_framewise_subsample": true,
	"use_patchwise_subsample": false
	}