gremid commited on
Commit
10a193d
·
verified ·
1 Parent(s): f75b0a8

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -13,18 +13,18 @@ model-index:
13
  metrics:
14
  - name: NER Precision
15
  type: precision
16
- value: 0.8598145591
17
  - name: NER Recall
18
  type: recall
19
- value: 0.846751632
20
  - name: NER F Score
21
  type: f_score
22
- value: 0.8532331004
23
  ---
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `de_ner_d_dist` |
27
- | **Version** | `0.0.8` |
28
  | **spaCy** | `>=3.8.7,<3.9.0` |
29
  | **Default Pipeline** | `transformer`, `ner` |
30
  | **Components** | `transformer`, `ner` |
@@ -49,33 +49,6 @@ model-index:
49
 
50
  | Type | Score |
51
  | --- | --- |
52
- | `ENTS_F` | 85.13 |
53
- | `ENTS_P` | 86.37 |
54
- | `ENTS_R` | 83.91 |
55
- | `TRANSFORMER_LOSS` | 220810.34 |
56
- | `NER_LOSS` | 377981.31 |
57
-
58
- ## Evaluation on Conll03 (updated version from 2006)
59
- | Type | Score |
60
- | --- | --- |
61
- | `ENTS_F` | 81.34 |
62
- | `ENTS_P` | 85.76 |
63
- | `ENTS_R` | 77.35 |
64
-
65
- ### Per Type
66
- | Type | F-Score |
67
- | --- | --- |
68
- | `ORG` | 69.86 |
69
- | `LOC` | 79.14 |
70
- | `PER` | 93.53|
71
- | `MISC` | 37.17 |
72
-
73
- ## Datasets used for training
74
- * D. Benikova, C. Biemann, M. Reznicek (2014). NoSta-D Named Entity Annotation for German: Guidelines and Dataset. Proceedings of LREC 2014, Reykjavik, Iceland.
75
- * M. Schiersch, V. Mironova, M. Schmitt, P. Thomas, A. Gabryszak, L. Hennig (2018). A German Corpus for Fine-Grained Named Entity Recognition and Relation Extraction of Traffic and Industry Events. Proceedings of LREC 2018, Miyazaki, Japan.
76
- * J. Zöllner, K. Sperfeld, C. Wick, R. Labahn (2021). Optimizing Small BERTs Trained for German NER. Information 2021, 12, 443.
77
- * M. Ehrmann, M. Romanello, A. Flückiger, and S. Clematide (2020). Extended Overview of CLEF HIPE 2020: Named Entity Processing on Historical Newspapers in Working Notes of CLEF 2020 - Conference and Labs of the Evaluation Forum, Thessaloniki, Greece, 2020, vol. 2696, p. 38. doi: 10.5281/zenodo.4117566.
78
- * L. Hennig, P. T. Truong, A. Gabryszak (2021). Mobie: A German Dataset for Named Entity Recognition, Entity Linking and Relation Extraction in the Mobility Domain. arXiv preprint arXiv:2108.06955.
79
- * A. Hamdi, E. Linhares Pontes, E. Boros, T. T. H. Nguyen, G. Hackl, J. G. Moreno, A. Doucet (2021). Multilingual Dataset for Named Entity Recognition, Entity Linking and Stance Detection in Historical Newspapers (V1.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.4573313
80
- * J. Nothman, N. Ringland, W. Radford, T. Murphy, J. R. Curran (2013). Learning Multilingual Named Entity Recognition from Wikipedia. Artificial Intelligence, 194, 151-175.
81
- * S. Schweter (2025). HisGermaNER (Revision 83571b3). doi: 10.57967/hf/5770, https://huggingface.co/datasets/stefan-it/HisGermaNER.
 
13
  metrics:
14
  - name: NER Precision
15
  type: precision
16
+ value: 0.909263658
17
  - name: NER Recall
18
  type: recall
19
+ value: 0.8684210526
20
  - name: NER F Score
21
  type: f_score
22
+ value: 0.8883731724
23
  ---
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `de_ner_d_dist` |
27
+ | **Version** | `2.0.0` |
28
  | **spaCy** | `>=3.8.7,<3.9.0` |
29
  | **Default Pipeline** | `transformer`, `ner` |
30
  | **Components** | `transformer`, `ner` |
 
49
 
50
  | Type | Score |
51
  | --- | --- |
52
+ | `ENTS_P` | 90.93 |
53
+ | `ENTS_R` | 86.84 |
54
+ | `ENTS_F` | 88.84 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "corpus/train.spacy"
3
- dev = "corpus/dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
1
  [paths]
2
+ train = "dataset/ner-d.train.spacy"
3
+ dev = "dataset/ner-d.dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
de_ner_d_dist-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f7c1e26e65312c76089387006bd96c1defff2d355c476d0088e69d47b1cbb54
3
- size 407601001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1086959a4b0f37ca5e2c0eb04b59deb1ee9c42d974e915f4dca809a45d9dfa
3
+ size 407768587
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"de",
3
  "name":"ner_d_dist",
4
- "version":"0.0.8",
5
  "description":"",
6
  "author":"",
7
  "email":"",
@@ -38,33 +38,36 @@
38
 
39
  ],
40
  "performance":{
41
- "ents_f":0.8532331004,
42
- "ents_p":0.8598145591,
43
- "ents_r":0.846751632,
 
 
 
 
44
  "ents_per_type":{
45
- "PER":{
46
- "p":0.8720152818,
47
- "r":0.9011350551,
48
- "f":0.886336057
49
- },
50
- "MISC":{
51
- "p":0.8093037585,
52
- "r":0.7234921509,
53
- "f":0.7639959285
54
  },
55
  "ORG":{
56
- "p":0.8100463302,
57
- "r":0.7707656613,
58
- "f":0.7899179646
59
  },
60
- "LOC":{
61
- "p":0.8846876067,
62
- "r":0.8846876067,
63
- "f":0.8846876067
 
 
 
 
 
64
  }
65
  },
66
- "transformer_loss":1085.9911612031,
67
- "ner_loss":1562.7839800269
68
  },
69
  "requirements":[
70
  "spacy-transformers>=1.3.9,<1.4.0"
 
1
  {
2
  "lang":"de",
3
  "name":"ner_d_dist",
4
+ "version":"2.0.0",
5
  "description":"",
6
  "author":"",
7
  "email":"",
 
38
 
39
  ],
40
  "performance":{
41
+ "token_acc":null,
42
+ "token_p":null,
43
+ "token_r":null,
44
+ "token_f":null,
45
+ "ents_p":0.909263658,
46
+ "ents_r":0.8684210526,
47
+ "ents_f":0.8883731724,
48
  "ents_per_type":{
49
+ "LOC":{
50
+ "p":0.9019607843,
51
+ "r":0.876747141,
52
+ "f":0.8891752577
 
 
 
 
 
53
  },
54
  "ORG":{
55
+ "p":0.8316582915,
56
+ "r":0.773364486,
57
+ "f":0.8014527845
58
  },
59
+ "PER":{
60
+ "p":0.9554794521,
61
+ "r":0.9500567537,
62
+ "f":0.952760387
63
+ },
64
+ "MISC":{
65
+ "p":0.8484848485,
66
+ "r":0.5185185185,
67
+ "f":0.6436781609
68
  }
69
  },
70
+ "speed":28155.5503559723
 
71
  },
72
  "requirements":[
73
  "spacy-transformers>=1.3.9,<1.4.0"
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c305f2d865177cb86d7626a2686deb16e2736ec69619da6c19e372fc9de4e061
3
  size 225962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01412ea30d5595fce9344b6b3ae99c35f6d0299951c3b8cb65ebf34ba1f934d
3
  size 225962
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{},"1":{"LOC":123564,"PER":89901,"ORG":69200,"MISC":46310},"2":{"LOC":123564,"PER":89901,"ORG":69200,"MISC":46310},"3":{"LOC":123564,"PER":89901,"ORG":69200,"MISC":46310},"4":{"LOC":123564,"PER":89901,"ORG":69200,"MISC":46310,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves��{"0":{},"1":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655},"2":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655},"3":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655},"4":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655,"":1},"5":{"":1}}�cfg��neg_key�
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9442cf82455edafc40feaf4f4ee239c9f72ef89cfefa01f70c4fafc503b85786
3
  size 437388364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990fd546cca7afed0c785f0f292e4e20990c3b7c49f8c3b6a4f5e7297f19ab92
3
  size 437388364
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff