Fine-tuned distilbert-base-uncased on SQuAD - Best F1: 85.2786
Browse files- README.md +10 -10
- eval_results.json +2 -2
- model.safetensors +1 -1
- tokenizer.json +3 -3
- training_config.json +9 -9
README.md
CHANGED
|
@@ -20,9 +20,9 @@ model-index:
|
|
| 20 |
type: squad
|
| 21 |
metrics:
|
| 22 |
- type: exact_match
|
| 23 |
-
value: 76.32923368022706
|
| 24 |
-
- type: f1
|
| 25 |
value: N/A
|
|
|
|
|
|
|
| 26 |
---
|
| 27 |
|
| 28 |
# distilbert-base-uncased fine-tuned on SQuAD
|
|
@@ -36,19 +36,19 @@ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingf
|
|
| 36 |
- **Model**: distilbert-base-uncased
|
| 37 |
- **Dataset**: SQuAD
|
| 38 |
- **Optimizer**: adamw
|
| 39 |
-
- **Learning Rate Scheduler**:
|
| 40 |
-
- **Learning Rate**:
|
| 41 |
- **Batch Size**: 16 per device
|
| 42 |
- **Total Batch Size**: 64
|
| 43 |
- **Epochs**: 5 (with early stopping)
|
| 44 |
- **Weight Decay**: 0.01
|
| 45 |
-
- **Warmup Ratio**: 0.
|
| 46 |
- **Max Gradient Norm**: 1.0
|
| 47 |
|
| 48 |
### Early Stopping
|
| 49 |
|
| 50 |
-
- **Patience**:
|
| 51 |
-
- **Metric**:
|
| 52 |
- **Best Epoch**: 3
|
| 53 |
|
| 54 |
## Usage
|
|
@@ -78,11 +78,11 @@ print(f"Answer: {answer}")
|
|
| 78 |
|
| 79 |
The model achieved the following results on the evaluation set:
|
| 80 |
|
| 81 |
-
- **Exact Match**: 76.
|
| 82 |
-
- **F1 Score**:
|
| 83 |
|
| 84 |
## Training Configuration Hash
|
| 85 |
|
| 86 |
-
Config Hash:
|
| 87 |
|
| 88 |
This hash can be used to reproduce the exact training configuration.
|
|
|
|
| 20 |
type: squad
|
| 21 |
metrics:
|
| 22 |
- type: exact_match
|
|
|
|
|
|
|
| 23 |
value: N/A
|
| 24 |
+
- type: f1
|
| 25 |
+
value: 85.3016055407403
|
| 26 |
---
|
| 27 |
|
| 28 |
# distilbert-base-uncased fine-tuned on SQuAD
|
|
|
|
| 36 |
- **Model**: distilbert-base-uncased
|
| 37 |
- **Dataset**: SQuAD
|
| 38 |
- **Optimizer**: adamw
|
| 39 |
+
- **Learning Rate Scheduler**: cosine_with_restarts
|
| 40 |
+
- **Learning Rate**: 3e-05
|
| 41 |
- **Batch Size**: 16 per device
|
| 42 |
- **Total Batch Size**: 64
|
| 43 |
- **Epochs**: 5 (with early stopping)
|
| 44 |
- **Weight Decay**: 0.01
|
| 45 |
+
- **Warmup Ratio**: 0.06
|
| 46 |
- **Max Gradient Norm**: 1.0
|
| 47 |
|
| 48 |
### Early Stopping
|
| 49 |
|
| 50 |
+
- **Patience**: 4
|
| 51 |
+
- **Metric**: f1
|
| 52 |
- **Best Epoch**: 3
|
| 53 |
|
| 54 |
## Usage
|
|
|
|
| 78 |
|
| 79 |
The model achieved the following results on the evaluation set:
|
| 80 |
|
| 81 |
+
- **Exact Match**: 76.9253
|
| 82 |
+
- **F1 Score**: 85.2786
|
| 83 |
|
| 84 |
## Training Configuration Hash
|
| 85 |
|
| 86 |
+
Config Hash: fe08f7bd
|
| 87 |
|
| 88 |
This hash can be used to reproduce the exact training configuration.
|
eval_results.json
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"exact_match":
|
| 3 |
-
"f1":
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"exact_match": 77.02932828760643,
|
| 3 |
+
"f1": 85.3016055407403
|
| 4 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 265470032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:239d6746ac8fbc72bd35d8f4cdb6a01df798e8f1dc8634eee4eaa623815e5af5
|
| 3 |
size 265470032
|
tokenizer.json
CHANGED
|
@@ -2,13 +2,13 @@
|
|
| 2 |
"version": "1.0",
|
| 3 |
"truncation": {
|
| 4 |
"direction": "Right",
|
| 5 |
-
"max_length":
|
| 6 |
"strategy": "OnlySecond",
|
| 7 |
-
"stride":
|
| 8 |
},
|
| 9 |
"padding": {
|
| 10 |
"strategy": {
|
| 11 |
-
"Fixed":
|
| 12 |
},
|
| 13 |
"direction": "Right",
|
| 14 |
"pad_to_multiple_of": null,
|
|
|
|
| 2 |
"version": "1.0",
|
| 3 |
"truncation": {
|
| 4 |
"direction": "Right",
|
| 5 |
+
"max_length": 512,
|
| 6 |
"strategy": "OnlySecond",
|
| 7 |
+
"stride": 256
|
| 8 |
},
|
| 9 |
"padding": {
|
| 10 |
"strategy": {
|
| 11 |
+
"Fixed": 512
|
| 12 |
},
|
| 13 |
"direction": "Right",
|
| 14 |
"pad_to_multiple_of": null,
|
training_config.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"squad_v2": false,
|
| 3 |
"model_checkpoint": "distilbert-base-uncased",
|
| 4 |
-
"max_length":
|
| 5 |
-
"doc_stride":
|
| 6 |
"n_best_size": 20,
|
| 7 |
"max_answer_length": 30,
|
| 8 |
"batch_size": 16,
|
| 9 |
"num_epochs": 5,
|
| 10 |
-
"learning_rate":
|
| 11 |
"weight_decay": 0.01,
|
| 12 |
-
"warmup_ratio": 0.
|
| 13 |
"gradient_accumulation_steps": 1,
|
| 14 |
"max_grad_norm": 1.0,
|
| 15 |
"optimizer_type": "adamw",
|
|
@@ -18,17 +18,17 @@
|
|
| 18 |
0.999
|
| 19 |
],
|
| 20 |
"optimizer_eps": 1e-08,
|
| 21 |
-
"scheduler_type": "
|
| 22 |
"scheduler_power": 1.0,
|
| 23 |
"scheduler_eta_min": 0.0,
|
| 24 |
-
"early_stopping_patience":
|
| 25 |
"early_stopping_threshold": 0.001,
|
| 26 |
-
"early_stopping_metric": "
|
| 27 |
"log_interval": 50,
|
| 28 |
"eval_steps": null,
|
| 29 |
"save_steps": null,
|
| 30 |
"save_total_limit": 3,
|
| 31 |
-
"wandb_project": "question-answering-
|
| 32 |
"wandb_entity": null,
|
| 33 |
"use_wandb": true,
|
| 34 |
"wandb_tags": [
|
|
@@ -40,7 +40,7 @@
|
|
| 40 |
"hub_model_id": null,
|
| 41 |
"hub_private": false,
|
| 42 |
"hub_model_name_max_length": 50,
|
| 43 |
-
"hub_versioning_strategy": "
|
| 44 |
"hub_base_model_name": "distilbert-squad-qa",
|
| 45 |
"seed": 42,
|
| 46 |
"dataloader_num_workers": 0,
|
|
|
|
| 1 |
{
|
| 2 |
"squad_v2": false,
|
| 3 |
"model_checkpoint": "distilbert-base-uncased",
|
| 4 |
+
"max_length": 512,
|
| 5 |
+
"doc_stride": 256,
|
| 6 |
"n_best_size": 20,
|
| 7 |
"max_answer_length": 30,
|
| 8 |
"batch_size": 16,
|
| 9 |
"num_epochs": 5,
|
| 10 |
+
"learning_rate": 3e-05,
|
| 11 |
"weight_decay": 0.01,
|
| 12 |
+
"warmup_ratio": 0.06,
|
| 13 |
"gradient_accumulation_steps": 1,
|
| 14 |
"max_grad_norm": 1.0,
|
| 15 |
"optimizer_type": "adamw",
|
|
|
|
| 18 |
0.999
|
| 19 |
],
|
| 20 |
"optimizer_eps": 1e-08,
|
| 21 |
+
"scheduler_type": "cosine_with_restarts",
|
| 22 |
"scheduler_power": 1.0,
|
| 23 |
"scheduler_eta_min": 0.0,
|
| 24 |
+
"early_stopping_patience": 4,
|
| 25 |
"early_stopping_threshold": 0.001,
|
| 26 |
+
"early_stopping_metric": "f1",
|
| 27 |
"log_interval": 50,
|
| 28 |
"eval_steps": null,
|
| 29 |
"save_steps": null,
|
| 30 |
"save_total_limit": 3,
|
| 31 |
+
"wandb_project": "question-answering-enhanced",
|
| 32 |
"wandb_entity": null,
|
| 33 |
"use_wandb": true,
|
| 34 |
"wandb_tags": [
|
|
|
|
| 40 |
"hub_model_id": null,
|
| 41 |
"hub_private": false,
|
| 42 |
"hub_model_name_max_length": 50,
|
| 43 |
+
"hub_versioning_strategy": "Single_repo_versions",
|
| 44 |
"hub_base_model_name": "distilbert-squad-qa",
|
| 45 |
"seed": 42,
|
| 46 |
"dataloader_num_workers": 0,
|