albert-squad-qa / training_config.json

Training run v20250727_173457 - F1: 88.7676, EM: 80.4541

9fa2b06 verified 5 months ago

1.7 kB

	{
	"squad_v2": false,
	"model_checkpoint": "albert-base-v2",
	"max_length": 512,
	"doc_stride": 64,
	"n_best_size": 100,
	"max_answer_length": 64,
	"use_data_augmentation": true,
	"augment_probability": 0.15,
	"context_dropout": 0.05,
	"question_paraphrasing": true,
	"negative_sampling": true,
	"batch_size": 28,
	"num_epochs": 6,
	"learning_rate": 6e-05,
	"weight_decay": 0.005,
	"warmup_ratio": 0.08,
	"gradient_accumulation_steps": 2,
	"max_grad_norm": 0.5,
	"optimizer_type": "adamw",
	"optimizer_betas": [
	0.9,
	0.98
	],
	"optimizer_eps": 1e-07,
	"scheduler_type": "cosine_with_restarts",
	"scheduler_power": 0.5,
	"scheduler_eta_min": 5e-07,
	"scheduler_num_cycles": 0.5,
	"early_stopping_patience": 4,
	"early_stopping_threshold": 0.0002,
	"early_stopping_metric": "f1",
	"log_interval": 50,
	"eval_steps": null,
	"save_steps": null,
	"save_total_limit": 3,
	"wandb_project": "question-answering-enhanced",
	"wandb_entity": null,
	"use_wandb": true,
	"wandb_tags": [
	"question-answering",
	"squad",
	"multi-gpu"
	],
	"push_to_hub": true,
	"hub_model_id": "HariomSahu/albert-squad-qa",
	"hub_private": false,
	"hub_model_name_max_length": 50,
	"hub_versioning_strategy": "single_repo_versions",
	"hub_base_model_name": "albert-squad-qa",
	"use_label_smoothing": true,
	"label_smoothing_factor": 0.1,
	"use_focal_loss": false,
	"focal_loss_alpha": 0.25,
	"focal_loss_gamma": 2.0,
	"use_curriculum_learning": true,
	"curriculum_strategy": "length_based",
	"dropout_rate": 0.1,
	"attention_dropout": 0.1,
	"use_mixup": false,
	"mixup_alpha": 0.2,
	"seed": 42,
	"dataloader_num_workers": 0,
	"dataloader_pin_memory": true
	}