Update spaCy pipeline
Browse files- README.md +7 -34
- config.cfg +2 -2
- de_ner_d_dist-any-py3-none-any.whl +2 -2
- meta.json +25 -22
- ner/model +1 -1
- ner/moves +1 -1
- transformer/model +1 -1
- vocab/strings.json +0 -0
README.md
CHANGED
|
@@ -13,18 +13,18 @@ model-index:
|
|
| 13 |
metrics:
|
| 14 |
- name: NER Precision
|
| 15 |
type: precision
|
| 16 |
-
value: 0.
|
| 17 |
- name: NER Recall
|
| 18 |
type: recall
|
| 19 |
-
value: 0.
|
| 20 |
- name: NER F Score
|
| 21 |
type: f_score
|
| 22 |
-
value: 0.
|
| 23 |
---
|
| 24 |
| Feature | Description |
|
| 25 |
| --- | --- |
|
| 26 |
| **Name** | `de_ner_d_dist` |
|
| 27 |
-
| **Version** | `0.0
|
| 28 |
| **spaCy** | `>=3.8.7,<3.9.0` |
|
| 29 |
| **Default Pipeline** | `transformer`, `ner` |
|
| 30 |
| **Components** | `transformer`, `ner` |
|
|
@@ -49,33 +49,6 @@ model-index:
|
|
| 49 |
|
| 50 |
| Type | Score |
|
| 51 |
| --- | --- |
|
| 52 |
-
| `
|
| 53 |
-
| `
|
| 54 |
-
| `
|
| 55 |
-
| `TRANSFORMER_LOSS` | 220810.34 |
|
| 56 |
-
| `NER_LOSS` | 377981.31 |
|
| 57 |
-
|
| 58 |
-
## Evaluation on Conll03 (updated version from 2006)
|
| 59 |
-
| Type | Score |
|
| 60 |
-
| --- | --- |
|
| 61 |
-
| `ENTS_F` | 81.34 |
|
| 62 |
-
| `ENTS_P` | 85.76 |
|
| 63 |
-
| `ENTS_R` | 77.35 |
|
| 64 |
-
|
| 65 |
-
### Per Type
|
| 66 |
-
| Type | F-Score |
|
| 67 |
-
| --- | --- |
|
| 68 |
-
| `ORG` | 69.86 |
|
| 69 |
-
| `LOC` | 79.14 |
|
| 70 |
-
| `PER` | 93.53|
|
| 71 |
-
| `MISC` | 37.17 |
|
| 72 |
-
|
| 73 |
-
## Datasets used for training
|
| 74 |
-
* D. Benikova, C. Biemann, M. Reznicek (2014). NoSta-D Named Entity Annotation for German: Guidelines and Dataset. Proceedings of LREC 2014, Reykjavik, Iceland.
|
| 75 |
-
* M. Schiersch, V. Mironova, M. Schmitt, P. Thomas, A. Gabryszak, L. Hennig (2018). A German Corpus for Fine-Grained Named Entity Recognition and Relation Extraction of Traffic and Industry Events. Proceedings of LREC 2018, Miyazaki, Japan.
|
| 76 |
-
* J. Zöllner, K. Sperfeld, C. Wick, R. Labahn (2021). Optimizing Small BERTs Trained for German NER. Information 2021, 12, 443.
|
| 77 |
-
* M. Ehrmann, M. Romanello, A. Flückiger, and S. Clematide (2020). Extended Overview of CLEF HIPE 2020: Named Entity Processing on Historical Newspapers in Working Notes of CLEF 2020 - Conference and Labs of the Evaluation Forum, Thessaloniki, Greece, 2020, vol. 2696, p. 38. doi: 10.5281/zenodo.4117566.
|
| 78 |
-
* L. Hennig, P. T. Truong, A. Gabryszak (2021). Mobie: A German Dataset for Named Entity Recognition, Entity Linking and Relation Extraction in the Mobility Domain. arXiv preprint arXiv:2108.06955.
|
| 79 |
-
* A. Hamdi, E. Linhares Pontes, E. Boros, T. T. H. Nguyen, G. Hackl, J. G. Moreno, A. Doucet (2021). Multilingual Dataset for Named Entity Recognition, Entity Linking and Stance Detection in Historical Newspapers (V1.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.4573313
|
| 80 |
-
* J. Nothman, N. Ringland, W. Radford, T. Murphy, J. R. Curran (2013). Learning Multilingual Named Entity Recognition from Wikipedia. Artificial Intelligence, 194, 151-175.
|
| 81 |
-
* S. Schweter (2025). HisGermaNER (Revision 83571b3). doi: 10.57967/hf/5770, https://huggingface.co/datasets/stefan-it/HisGermaNER.
|
|
|
|
| 13 |
metrics:
|
| 14 |
- name: NER Precision
|
| 15 |
type: precision
|
| 16 |
+
value: 0.909263658
|
| 17 |
- name: NER Recall
|
| 18 |
type: recall
|
| 19 |
+
value: 0.8684210526
|
| 20 |
- name: NER F Score
|
| 21 |
type: f_score
|
| 22 |
+
value: 0.8883731724
|
| 23 |
---
|
| 24 |
| Feature | Description |
|
| 25 |
| --- | --- |
|
| 26 |
| **Name** | `de_ner_d_dist` |
|
| 27 |
+
| **Version** | `2.0.0` |
|
| 28 |
| **spaCy** | `>=3.8.7,<3.9.0` |
|
| 29 |
| **Default Pipeline** | `transformer`, `ner` |
|
| 30 |
| **Components** | `transformer`, `ner` |
|
|
|
|
| 49 |
|
| 50 |
| Type | Score |
|
| 51 |
| --- | --- |
|
| 52 |
+
| `ENTS_P` | 90.93 |
|
| 53 |
+
| `ENTS_R` | 86.84 |
|
| 54 |
+
| `ENTS_F` | 88.84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.cfg
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[paths]
|
| 2 |
-
train = "
|
| 3 |
-
dev = "
|
| 4 |
vectors = null
|
| 5 |
init_tok2vec = null
|
| 6 |
|
|
|
|
| 1 |
[paths]
|
| 2 |
+
train = "dataset/ner-d.train.spacy"
|
| 3 |
+
dev = "dataset/ner-d.dev.spacy"
|
| 4 |
vectors = null
|
| 5 |
init_tok2vec = null
|
| 6 |
|
de_ner_d_dist-any-py3-none-any.whl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df1086959a4b0f37ca5e2c0eb04b59deb1ee9c42d974e915f4dca809a45d9dfa
|
| 3 |
+
size 407768587
|
meta.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"lang":"de",
|
| 3 |
"name":"ner_d_dist",
|
| 4 |
-
"version":"0.0
|
| 5 |
"description":"",
|
| 6 |
"author":"",
|
| 7 |
"email":"",
|
|
@@ -38,33 +38,36 @@
|
|
| 38 |
|
| 39 |
],
|
| 40 |
"performance":{
|
| 41 |
-
"
|
| 42 |
-
"
|
| 43 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
"ents_per_type":{
|
| 45 |
-
"
|
| 46 |
-
"p":0.
|
| 47 |
-
"r":0.
|
| 48 |
-
"f":0.
|
| 49 |
-
},
|
| 50 |
-
"MISC":{
|
| 51 |
-
"p":0.8093037585,
|
| 52 |
-
"r":0.7234921509,
|
| 53 |
-
"f":0.7639959285
|
| 54 |
},
|
| 55 |
"ORG":{
|
| 56 |
-
"p":0.
|
| 57 |
-
"r":0.
|
| 58 |
-
"f":0.
|
| 59 |
},
|
| 60 |
-
"
|
| 61 |
-
"p":0.
|
| 62 |
-
"r":0.
|
| 63 |
-
"f":0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
},
|
| 66 |
-
"
|
| 67 |
-
"ner_loss":1562.7839800269
|
| 68 |
},
|
| 69 |
"requirements":[
|
| 70 |
"spacy-transformers>=1.3.9,<1.4.0"
|
|
|
|
| 1 |
{
|
| 2 |
"lang":"de",
|
| 3 |
"name":"ner_d_dist",
|
| 4 |
+
"version":"2.0.0",
|
| 5 |
"description":"",
|
| 6 |
"author":"",
|
| 7 |
"email":"",
|
|
|
|
| 38 |
|
| 39 |
],
|
| 40 |
"performance":{
|
| 41 |
+
"token_acc":null,
|
| 42 |
+
"token_p":null,
|
| 43 |
+
"token_r":null,
|
| 44 |
+
"token_f":null,
|
| 45 |
+
"ents_p":0.909263658,
|
| 46 |
+
"ents_r":0.8684210526,
|
| 47 |
+
"ents_f":0.8883731724,
|
| 48 |
"ents_per_type":{
|
| 49 |
+
"LOC":{
|
| 50 |
+
"p":0.9019607843,
|
| 51 |
+
"r":0.876747141,
|
| 52 |
+
"f":0.8891752577
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
},
|
| 54 |
"ORG":{
|
| 55 |
+
"p":0.8316582915,
|
| 56 |
+
"r":0.773364486,
|
| 57 |
+
"f":0.8014527845
|
| 58 |
},
|
| 59 |
+
"PER":{
|
| 60 |
+
"p":0.9554794521,
|
| 61 |
+
"r":0.9500567537,
|
| 62 |
+
"f":0.952760387
|
| 63 |
+
},
|
| 64 |
+
"MISC":{
|
| 65 |
+
"p":0.8484848485,
|
| 66 |
+
"r":0.5185185185,
|
| 67 |
+
"f":0.6436781609
|
| 68 |
}
|
| 69 |
},
|
| 70 |
+
"speed":28155.5503559723
|
|
|
|
| 71 |
},
|
| 72 |
"requirements":[
|
| 73 |
"spacy-transformers>=1.3.9,<1.4.0"
|
ner/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 225962
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c01412ea30d5595fce9344b6b3ae99c35f6d0299951c3b8cb65ebf34ba1f934d
|
| 3 |
size 225962
|
ner/moves
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
��moves��{"0":{},"1":{"LOC":
|
|
|
|
| 1 |
+
��moves��{"0":{},"1":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655},"2":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655},"3":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655},"4":{"LOC":137997,"PER":96305,"ORG":74176,"MISC":49655,"":1},"5":{"":1}}�cfg��neg_key�
|
transformer/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 437388364
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:990fd546cca7afed0c785f0f292e4e20990c3b7c49f8c3b6a4f5e7297f19ab92
|
| 3 |
size 437388364
|
vocab/strings.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|