HariomSahu commited on
Commit
d7aa003
verified
1 Parent(s): 052be9c

Fine-tuned distilbert-base-uncased on SQuAD - Best F1: 85.2786

Browse files
Files changed (5) hide show
  1. README.md +10 -10
  2. eval_results.json +2 -2
  3. model.safetensors +1 -1
  4. tokenizer.json +3 -3
  5. training_config.json +9 -9
README.md CHANGED
@@ -20,9 +20,9 @@ model-index:
20
  type: squad
21
  metrics:
22
  - type: exact_match
23
- value: 76.32923368022706
24
- - type: f1
25
  value: N/A
 
 
26
  ---
27
 
28
  # distilbert-base-uncased fine-tuned on SQuAD
@@ -36,19 +36,19 @@ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingf
36
  - **Model**: distilbert-base-uncased
37
  - **Dataset**: SQuAD
38
  - **Optimizer**: adamw
39
- - **Learning Rate Scheduler**: linear
40
- - **Learning Rate**: 2e-05
41
  - **Batch Size**: 16 per device
42
  - **Total Batch Size**: 64
43
  - **Epochs**: 5 (with early stopping)
44
  - **Weight Decay**: 0.01
45
- - **Warmup Ratio**: 0.1
46
  - **Max Gradient Norm**: 1.0
47
 
48
  ### Early Stopping
49
 
50
- - **Patience**: 3
51
- - **Metric**: exact_match
52
  - **Best Epoch**: 3
53
 
54
  ## Usage
@@ -78,11 +78,11 @@ print(f"Answer: {answer}")
78
 
79
  The model achieved the following results on the evaluation set:
80
 
81
- - **Exact Match**: 76.2725
82
- - **F1 Score**: 84.5969
83
 
84
  ## Training Configuration Hash
85
 
86
- Config Hash: 57d14774
87
 
88
  This hash can be used to reproduce the exact training configuration.
 
20
  type: squad
21
  metrics:
22
  - type: exact_match
 
 
23
  value: N/A
24
+ - type: f1
25
+ value: 85.3016055407403
26
  ---
27
 
28
  # distilbert-base-uncased fine-tuned on SQuAD
 
36
  - **Model**: distilbert-base-uncased
37
  - **Dataset**: SQuAD
38
  - **Optimizer**: adamw
39
+ - **Learning Rate Scheduler**: cosine_with_restarts
40
+ - **Learning Rate**: 3e-05
41
  - **Batch Size**: 16 per device
42
  - **Total Batch Size**: 64
43
  - **Epochs**: 5 (with early stopping)
44
  - **Weight Decay**: 0.01
45
+ - **Warmup Ratio**: 0.06
46
  - **Max Gradient Norm**: 1.0
47
 
48
  ### Early Stopping
49
 
50
+ - **Patience**: 4
51
+ - **Metric**: f1
52
  - **Best Epoch**: 3
53
 
54
  ## Usage
 
78
 
79
  The model achieved the following results on the evaluation set:
80
 
81
+ - **Exact Match**: 76.9253
82
+ - **F1 Score**: 85.2786
83
 
84
  ## Training Configuration Hash
85
 
86
+ Config Hash: fe08f7bd
87
 
88
  This hash can be used to reproduce the exact training configuration.
eval_results.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "exact_match": 76.32923368022706,
3
- "f1": 84.43955609103683
4
  }
 
1
  {
2
+ "exact_match": 77.02932828760643,
3
+ "f1": 85.3016055407403
4
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4511c010153b769d8e00e9b15d268e445e17f54c576492cbe1786fad692ebd17
3
  size 265470032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:239d6746ac8fbc72bd35d8f4cdb6a01df798e8f1dc8634eee4eaa623815e5af5
3
  size 265470032
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 384,
6
  "strategy": "OnlySecond",
7
- "stride": 128
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 384
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "OnlySecond",
7
+ "stride": 256
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 512
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
training_config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "squad_v2": false,
3
  "model_checkpoint": "distilbert-base-uncased",
4
- "max_length": 384,
5
- "doc_stride": 128,
6
  "n_best_size": 20,
7
  "max_answer_length": 30,
8
  "batch_size": 16,
9
  "num_epochs": 5,
10
- "learning_rate": 2e-05,
11
  "weight_decay": 0.01,
12
- "warmup_ratio": 0.1,
13
  "gradient_accumulation_steps": 1,
14
  "max_grad_norm": 1.0,
15
  "optimizer_type": "adamw",
@@ -18,17 +18,17 @@
18
  0.999
19
  ],
20
  "optimizer_eps": 1e-08,
21
- "scheduler_type": "linear",
22
  "scheduler_power": 1.0,
23
  "scheduler_eta_min": 0.0,
24
- "early_stopping_patience": 3,
25
  "early_stopping_threshold": 0.001,
26
- "early_stopping_metric": "exact_match",
27
  "log_interval": 50,
28
  "eval_steps": null,
29
  "save_steps": null,
30
  "save_total_limit": 3,
31
- "wandb_project": "question-answering-distilbert-squad-qa",
32
  "wandb_entity": null,
33
  "use_wandb": true,
34
  "wandb_tags": [
@@ -40,7 +40,7 @@
40
  "hub_model_id": null,
41
  "hub_private": false,
42
  "hub_model_name_max_length": 50,
43
- "hub_versioning_strategy": "single_repo_versions",
44
  "hub_base_model_name": "distilbert-squad-qa",
45
  "seed": 42,
46
  "dataloader_num_workers": 0,
 
1
  {
2
  "squad_v2": false,
3
  "model_checkpoint": "distilbert-base-uncased",
4
+ "max_length": 512,
5
+ "doc_stride": 256,
6
  "n_best_size": 20,
7
  "max_answer_length": 30,
8
  "batch_size": 16,
9
  "num_epochs": 5,
10
+ "learning_rate": 3e-05,
11
  "weight_decay": 0.01,
12
+ "warmup_ratio": 0.06,
13
  "gradient_accumulation_steps": 1,
14
  "max_grad_norm": 1.0,
15
  "optimizer_type": "adamw",
 
18
  0.999
19
  ],
20
  "optimizer_eps": 1e-08,
21
+ "scheduler_type": "cosine_with_restarts",
22
  "scheduler_power": 1.0,
23
  "scheduler_eta_min": 0.0,
24
+ "early_stopping_patience": 4,
25
  "early_stopping_threshold": 0.001,
26
+ "early_stopping_metric": "f1",
27
  "log_interval": 50,
28
  "eval_steps": null,
29
  "save_steps": null,
30
  "save_total_limit": 3,
31
+ "wandb_project": "question-answering-enhanced",
32
  "wandb_entity": null,
33
  "use_wandb": true,
34
  "wandb_tags": [
 
40
  "hub_model_id": null,
41
  "hub_private": false,
42
  "hub_model_name_max_length": 50,
43
+ "hub_versioning_strategy": "Single_repo_versions",
44
  "hub_base_model_name": "distilbert-squad-qa",
45
  "seed": 42,
46
  "dataloader_num_workers": 0,