File size: 3,294 Bytes

45e1a77

{
  "_name_or_path": "",
  "activation_dropout": 0.15,
  "adapter_attn_dim": 256,
  "adapter_kernel_size": 5,
  "adapter_stride": 2,
  "add_adapter": true,
  "apply_spec_augment": true,
  "architectures": [
    "Wav2Vec2ForHierarchicalClassification"
  ],
  "attention_dropout": 0.12,
  "bos_token_id": 1,
  "classifier_proj_size": 512,
  "codevector_dim": 384,
  "contrastive_logits_temperature": 0.07,
  "conv_bias": true,
  "conv_dim": [
    768,
    768,
    896,
    896,
    1024,
    1024,
    1024
  ],
  "conv_kernel": [
    10,
    5,
    5,
    3,
    3,
    2,
    2
  ],
  "conv_stride": [
    5,
    2,
    2,
    2,
    2,
    2,
    2
  ],
  "ctc_loss_reduction": "sum",
  "ctc_zero_infinity": true,
  "diversity_loss_weight": 0.15,
  "do_stable_layer_norm": true,
  "eos_token_id": 2,
  "feat_extract_activation": "mish",
  "feat_extract_norm": "layer",
  "feat_proj_dropout": 0.15,
  "feat_quantizer_dropout": 0.05,
  "final_dropout": 0.1,
  "freeze_feat_extract_train": false,
  "hidden_act": "quick_gelu",
  "hidden_dropout": 0.12,
  "hidden_size": 1024,
  "id2label": {
    "0": "synthetic",
    "1": "authentic"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "synthetic": "0",
    "authentic": "1"
  },
  "layer_norm_eps": 1e-06,
  "layerdrop": 0.05,
  "mask_channel_length": 64,
  "mask_channel_min_space": 1,
  "mask_channel_other": 0.0,
  "mask_channel_prob": 0.1,
  "mask_channel_selection": "dynamic",
  "mask_feature_length": 64,
  "mask_feature_min_masks": 2,
  "mask_feature_prob": 0.1,
  "mask_time_length": 10,
  "mask_time_min_masks": 2,
  "mask_time_min_space": 2,
  "mask_time_other": 0.0,
  "mask_time_prob": 0.08,
  "mask_time_selection": "dynamic",
  "model_type": "wav2vec2",
  "no_mask_channel_overlap": true,
  "no_mask_time_overlap": true,
  "num_adapter_layers": 4,
  "num_attention_heads": 16,
  "num_codevector_groups": 4,
  "num_codevectors_per_group": 480,
  "num_conv_pos_embedding_groups": 32,
  "num_conv_pos_embeddings": 256,
  "num_feat_extract_layers": 7,
  "num_hidden_layers": 24,
  "num_negatives": 150,
  "output_hidden_size": 1024,
  "pad_token_id": 0,
  "proj_codevector_dim": 384,
  "tdnn_dilation": [
    1,
    2,
    3,
    4,
    1
  ],
  "tdnn_dim": [
    768,
    768,
    896,
    896,
    1500
  ],
  "tdnn_kernel": [
    5,
    3,
    3,
    3,
    1
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.39.3",
  "use_weighted_layer_sum": true,
  "vocab_size": 32,
  "xvector_output_dim": 768,
  "advanced_config": {
    "attention_type": "multihead_relative",
    "positional_encoding": "rotary",
    "layer_norm_type": "apex",
    "activation_checkpointing": true,
    "gradient_checkpointing": true,
    "mixed_precision_training": true,
    "optimization": {
      "kernel_fusion": true,
      "memory_efficient_attention": true,
      "flash_attention": true,
      "activation_recomputation": true,
      "dynamic_padding": true
    },
    "regularization": {
      "stochastic_depth_rate": 0.1,
      "label_smoothing": 0.1,
      "mixup_alpha": 0.2,
      "gradient_clip_norm": 1.0
    },
    "training_dynamics": {
      "loss_scaling": "dynamic",
      "gradient_accumulation_steps": 4,
      "batch_size_scaling": true,
      "adaptive_learning_rate": true
    }
  }
}