Fine-tuned distilbert-base-uncased on SQuAD - Best F1: 85.2786

Files changed (5) hide show

README.md CHANGED Viewed

@@ -20,9 +20,9 @@ model-index:
       type: squad
     metrics:
     - type: exact_match
-      value: 76.32923368022706
-    - type: f1
       value: N/A
 ---
 # distilbert-base-uncased fine-tuned on SQuAD
@@ -36,19 +36,19 @@ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingf
 - **Model**: distilbert-base-uncased
 - **Dataset**: SQuAD
 - **Optimizer**: adamw
-- **Learning Rate Scheduler**: linear
-- **Learning Rate**: 2e-05
 - **Batch Size**: 16 per device
 - **Total Batch Size**: 64
 - **Epochs**: 5 (with early stopping)
 - **Weight Decay**: 0.01
-- **Warmup Ratio**: 0.1
 - **Max Gradient Norm**: 1.0
 ### Early Stopping
-- **Patience**: 3
-- **Metric**: exact_match
 - **Best Epoch**: 3
 ## Usage
@@ -78,11 +78,11 @@ print(f"Answer: {answer}")
 The model achieved the following results on the evaluation set:
-- **Exact Match**: 76.2725
-- **F1 Score**: 84.5969
 ## Training Configuration Hash
-Config Hash: 57d14774
 This hash can be used to reproduce the exact training configuration.

       type: squad
     metrics:
     - type: exact_match
       value: N/A
+    - type: f1
+      value: 85.3016055407403
 ---
 # distilbert-base-uncased fine-tuned on SQuAD
 - **Model**: distilbert-base-uncased
 - **Dataset**: SQuAD
 - **Optimizer**: adamw
+- **Learning Rate Scheduler**: cosine_with_restarts
+- **Learning Rate**: 3e-05
 - **Batch Size**: 16 per device
 - **Total Batch Size**: 64
 - **Epochs**: 5 (with early stopping)
 - **Weight Decay**: 0.01
+- **Warmup Ratio**: 0.06
 - **Max Gradient Norm**: 1.0
 ### Early Stopping
+- **Patience**: 4
+- **Metric**: f1
 - **Best Epoch**: 3
 ## Usage
 The model achieved the following results on the evaluation set:
+- **Exact Match**: 76.9253
+- **F1 Score**: 85.2786
 ## Training Configuration Hash
+Config Hash: fe08f7bd
 This hash can be used to reproduce the exact training configuration.

eval_results.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "exact_match": 76.32923368022706,
-  "f1": 84.43955609103683
 }

 {
+  "exact_match": 77.02932828760643,
+  "f1": 85.3016055407403
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4511c010153b769d8e00e9b15d268e445e17f54c576492cbe1786fad692ebd17
 size 265470032

 version https://git-lfs.github.com/spec/v1
+oid sha256:239d6746ac8fbc72bd35d8f4cdb6a01df798e8f1dc8634eee4eaa623815e5af5
 size 265470032

tokenizer.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 384,
     "strategy": "OnlySecond",
-    "stride": 128
   },
   "padding": {
     "strategy": {
-      "Fixed": 384
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 512,
     "strategy": "OnlySecond",
+    "stride": 256
   },
   "padding": {
     "strategy": {
+      "Fixed": 512
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

training_config.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "squad_v2": false,
   "model_checkpoint": "distilbert-base-uncased",
-  "max_length": 384,
-  "doc_stride": 128,
   "n_best_size": 20,
   "max_answer_length": 30,
   "batch_size": 16,
   "num_epochs": 5,
-  "learning_rate": 2e-05,
   "weight_decay": 0.01,
-  "warmup_ratio": 0.1,
   "gradient_accumulation_steps": 1,
   "max_grad_norm": 1.0,
   "optimizer_type": "adamw",
@@ -18,17 +18,17 @@
     0.999
   ],
   "optimizer_eps": 1e-08,
-  "scheduler_type": "linear",
   "scheduler_power": 1.0,
   "scheduler_eta_min": 0.0,
-  "early_stopping_patience": 3,
   "early_stopping_threshold": 0.001,
-  "early_stopping_metric": "exact_match",
   "log_interval": 50,
   "eval_steps": null,
   "save_steps": null,
   "save_total_limit": 3,
-  "wandb_project": "question-answering-distilbert-squad-qa",
   "wandb_entity": null,
   "use_wandb": true,
   "wandb_tags": [
@@ -40,7 +40,7 @@
   "hub_model_id": null,
   "hub_private": false,
   "hub_model_name_max_length": 50,
-  "hub_versioning_strategy": "single_repo_versions",
   "hub_base_model_name": "distilbert-squad-qa",
   "seed": 42,
   "dataloader_num_workers": 0,

 {
   "squad_v2": false,
   "model_checkpoint": "distilbert-base-uncased",
+  "max_length": 512,
+  "doc_stride": 256,
   "n_best_size": 20,
   "max_answer_length": 30,
   "batch_size": 16,
   "num_epochs": 5,
+  "learning_rate": 3e-05,
   "weight_decay": 0.01,
+  "warmup_ratio": 0.06,
   "gradient_accumulation_steps": 1,
   "max_grad_norm": 1.0,
   "optimizer_type": "adamw",
     0.999
   ],
   "optimizer_eps": 1e-08,
+  "scheduler_type": "cosine_with_restarts",
   "scheduler_power": 1.0,
   "scheduler_eta_min": 0.0,
+  "early_stopping_patience": 4,
   "early_stopping_threshold": 0.001,
+  "early_stopping_metric": "f1",
   "log_interval": 50,
   "eval_steps": null,
   "save_steps": null,
   "save_total_limit": 3,
+  "wandb_project": "question-answering-enhanced",
   "wandb_entity": null,
   "use_wandb": true,
   "wandb_tags": [
   "hub_model_id": null,
   "hub_private": false,
   "hub_model_name_max_length": 50,
+  "hub_versioning_strategy": "Single_repo_versions",
   "hub_base_model_name": "distilbert-squad-qa",
   "seed": 42,
   "dataloader_num_workers": 0,