albert-squad-qa / training_config.json
HariomSahu's picture
Training run v20250727_173457 - F1: 88.7676, EM: 80.4541
9fa2b06 verified
{
"squad_v2": false,
"model_checkpoint": "albert-base-v2",
"max_length": 512,
"doc_stride": 64,
"n_best_size": 100,
"max_answer_length": 64,
"use_data_augmentation": true,
"augment_probability": 0.15,
"context_dropout": 0.05,
"question_paraphrasing": true,
"negative_sampling": true,
"batch_size": 28,
"num_epochs": 6,
"learning_rate": 6e-05,
"weight_decay": 0.005,
"warmup_ratio": 0.08,
"gradient_accumulation_steps": 2,
"max_grad_norm": 0.5,
"optimizer_type": "adamw",
"optimizer_betas": [
0.9,
0.98
],
"optimizer_eps": 1e-07,
"scheduler_type": "cosine_with_restarts",
"scheduler_power": 0.5,
"scheduler_eta_min": 5e-07,
"scheduler_num_cycles": 0.5,
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0002,
"early_stopping_metric": "f1",
"log_interval": 50,
"eval_steps": null,
"save_steps": null,
"save_total_limit": 3,
"wandb_project": "question-answering-enhanced",
"wandb_entity": null,
"use_wandb": true,
"wandb_tags": [
"question-answering",
"squad",
"multi-gpu"
],
"push_to_hub": true,
"hub_model_id": "HariomSahu/albert-squad-qa",
"hub_private": false,
"hub_model_name_max_length": 50,
"hub_versioning_strategy": "single_repo_versions",
"hub_base_model_name": "albert-squad-qa",
"use_label_smoothing": true,
"label_smoothing_factor": 0.1,
"use_focal_loss": false,
"focal_loss_alpha": 0.25,
"focal_loss_gamma": 2.0,
"use_curriculum_learning": true,
"curriculum_strategy": "length_based",
"dropout_rate": 0.1,
"attention_dropout": 0.1,
"use_mixup": false,
"mixup_alpha": 0.2,
"seed": 42,
"dataloader_num_workers": 0,
"dataloader_pin_memory": true
}