|
|
--- |
|
|
library_name: transformers |
|
|
license: cc-by-4.0 |
|
|
language: |
|
|
- de |
|
|
datasets: |
|
|
- SinclairSchneider/trainset_political_party_big |
|
|
base_model: |
|
|
- ikim-uk-essen/geberta-xlarge |
|
|
--- |
|
|
|
|
|
Parameter: 750M |
|
|
|
|
|
Context Length: 512 |
|
|
|
|
|
F1: 0.76 |
|
|
|
|
|
Precision: 0.79 |
|
|
|
|
|
Recall: 0.74 |
|
|
|
|
|
**BibTeX:** |
|
|
|
|
|
@misc {schneider2024GerPolClass, |
|
|
author = { Schneider, Sinclair A M }, |
|
|
title = { German Politics Party Text Classifier }, |
|
|
year = { 2024 }, |
|
|
month = { June }, |
|
|
url = { https://huggingface.co/SinclairSchneider/german_politic_direction_DeBERTa-large }, |
|
|
publisher = { Hugging Face }, |
|
|
note = { DeBERTa transformer model } |
|
|
} |
|
|
|
|
|
# Ideology Prediction of German Political Texts based on DeBERTa-large (highly experimental) |
|
|
|
|
|
Predicts the ideology of German texts on a scale from -1 (left-wing) over 0 (liberal) to 1 (right wing) |
|
|
|
|
|
Simple example |
|
|
|
|
|
```python |
|
|
from transformers import pipeline, DebertaV2ForSequenceClassification, AutoTokenizer |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import torch |
|
|
|
|
|
model_name = "SinclairSchneider/german_politic_direction_DeBERTa-large" |
|
|
model = DebertaV2ForSequenceClassification.from_pretrained(model_name, dtype=torch.bfloat16) |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None) |
|
|
|
|
|
vectors = np.array([[-1, 0], |
|
|
[-9.99193435e-01, 4.01556900e-02], |
|
|
[-9.18323655e-01, 3.95830349e-01], |
|
|
[ 3.82683432e-01, 9.23879533e-01], |
|
|
[ 8.69790824e-01, 4.93420634e-01], |
|
|
[1, 0]]) |
|
|
|
|
|
def classify(text): |
|
|
classification_result = np.array(pd.DataFrame(pipe(text)[0]).sort_values(by=['label'], key=lambda x: x.map({'DIE LINKE':0, |
|
|
'BÜNDNIS 90/DIE GRÜNEN':1, |
|
|
'SPD':2, |
|
|
'FDP':3, |
|
|
'CDU/CSU':4, |
|
|
'AfD':5}))['score']) |
|
|
return float(np.arctan2(*classification_result@vectors)/(np.pi/2)) |
|
|
|
|
|
#Links |
|
|
print(classify("Wir brauchen eine Vermögensteuer, um den Sozialstaat nachhaltig zu finanzieren.")) |
|
|
#-0.8840736055794486 |
|
|
print(classify("Mietendeckel und mehr gemeinnütziger Wohnungsbau sollen Wohnen bezahlbar machen.")) |
|
|
#-0.9584728540548622 |
|
|
print(classify("Die Energiewende muss mit massiven öffentlichen Investitionen beschleunigt werden.")) |
|
|
#-0.8996415250285207 |
|
|
|
|
|
|
|
|
#Mitte |
|
|
print(classify("Die soziale Marktwirtschaft braucht moderne Regeln und weniger Bürokratie.")) |
|
|
#0.2951027133966755 |
|
|
print(classify("Gezielte Entlastungen für kleine und mittlere Einkommen stärken die Mitte.")) |
|
|
#-0.5463382000342903 |
|
|
print(classify("Bildungsoffensive: Basiskompetenzen sichern, Weiterbildung im Beruf fördern.")) |
|
|
#0.16923175427437903 |
|
|
|
|
|
#Rechts |
|
|
print(classify("Deutsche Leitkultur und Sprache stärker in öffentlichen Einrichtungen betonen.")) |
|
|
#0.9907646874287308 |
|
|
print(classify("Grenzschutz an EU-Außengrenzen verstärken, Sekundärmigration begrenzen.")) |
|
|
#0.7533596283240895 |
|
|
print(classify("Identitätspolitik an Schulen und Behörden zurückfahren, Fokus auf Leistungsprinzip.")) |
|
|
#0.9748775694774731 |
|
|
``` |