File size: 1,491 Bytes
c1430da
 
 
 
 
 
a2f7f62
 
 
75c4517
 
 
 
a2f7f62
 
75c4517
a2f7f62
c1430da
 
 
 
 
 
 
 
a2f7f62
 
 
 
c1430da
 
 
 
75c4517
a2f7f62
75c4517
 
 
 
a2f7f62
75c4517
a2f7f62
 
 
 
75c4517
c1430da
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from typing import List, Optional

try:
    from transformers import AutoModelForSequenceClassification, AutoTokenizer
    import torch
except ImportError:
    AutoModelForSequenceClassification = None
    AutoTokenizer = None
    torch = None


class ThreatModel:
    """
    Transformer wrapper. If transformers is not installed,
    falls back to dummy mode and returns empty probabilities.
    """
    def __init__(self, model_path: str, device: Optional[str] = None):
        self.available = AutoModelForSequenceClassification is not None and torch is not None
        self.model = None
        self.tokenizer = None
        self.device = "cpu"

        if not self.available:
            return

        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)
        self.model.to(self.device)

    def predict_proba(self, text: str) -> List[float]:
        if not self.available or self.model is None or self.tokenizer is None:
            return []

        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding=True
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=-1).cpu().tolist()[0]

        return probs