| { | |
| "squad_v2": false, | |
| "model_checkpoint": "albert-base-v2", | |
| "max_length": 512, | |
| "doc_stride": 64, | |
| "n_best_size": 100, | |
| "max_answer_length": 64, | |
| "use_data_augmentation": true, | |
| "augment_probability": 0.15, | |
| "context_dropout": 0.05, | |
| "question_paraphrasing": true, | |
| "negative_sampling": true, | |
| "batch_size": 28, | |
| "num_epochs": 6, | |
| "learning_rate": 6e-05, | |
| "weight_decay": 0.005, | |
| "warmup_ratio": 0.08, | |
| "gradient_accumulation_steps": 2, | |
| "max_grad_norm": 0.5, | |
| "optimizer_type": "adamw", | |
| "optimizer_betas": [ | |
| 0.9, | |
| 0.98 | |
| ], | |
| "optimizer_eps": 1e-07, | |
| "scheduler_type": "cosine_with_restarts", | |
| "scheduler_power": 0.5, | |
| "scheduler_eta_min": 5e-07, | |
| "scheduler_num_cycles": 0.5, | |
| "early_stopping_patience": 4, | |
| "early_stopping_threshold": 0.0002, | |
| "early_stopping_metric": "f1", | |
| "log_interval": 50, | |
| "eval_steps": null, | |
| "save_steps": null, | |
| "save_total_limit": 3, | |
| "wandb_project": "question-answering-enhanced", | |
| "wandb_entity": null, | |
| "use_wandb": true, | |
| "wandb_tags": [ | |
| "question-answering", | |
| "squad", | |
| "multi-gpu" | |
| ], | |
| "push_to_hub": true, | |
| "hub_model_id": "HariomSahu/albert-squad-qa", | |
| "hub_private": false, | |
| "hub_model_name_max_length": 50, | |
| "hub_versioning_strategy": "single_repo_versions", | |
| "hub_base_model_name": "albert-squad-qa", | |
| "use_label_smoothing": true, | |
| "label_smoothing_factor": 0.1, | |
| "use_focal_loss": false, | |
| "focal_loss_alpha": 0.25, | |
| "focal_loss_gamma": 2.0, | |
| "use_curriculum_learning": true, | |
| "curriculum_strategy": "length_based", | |
| "dropout_rate": 0.1, | |
| "attention_dropout": 0.1, | |
| "use_mixup": false, | |
| "mixup_alpha": 0.2, | |
| "seed": 42, | |
| "dataloader_num_workers": 0, | |
| "dataloader_pin_memory": true | |
| } |