File size: 2,256 Bytes
c4b87d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import torch
from scipy import signal
from statsmodels.tsa.stattools import acf


def lempel_ziv_complexity(binary_sequence: np.ndarray) -> int:
    """Computes the Lempel-Ziv complexity of a binary sequence."""
    sub_strings = set()
    n = len(binary_sequence)
    i = 0
    count = 0
    while i < n:
        sub_str = ""
        for j in range(i, n):
            sub_str += str(binary_sequence[j])
            if sub_str not in sub_strings:
                sub_strings.add(sub_str)
                count += 1
                i = j + 1
                break
        else:
            i += 1
    return count


def is_low_quality(
    series: torch.Tensor,
    autocorr_threshold: float = 0.2,
    snr_threshold: float = 0.5,
    complexity_threshold: float = 0.4,
) -> bool:
    """
    Returns True if the series appears non-forecastable (noise-like):
    - weak autocorrelation
    - low SNR proxy
    - high normalized Lempel-Ziv complexity
    """
    x = series.squeeze().detach().cpu().numpy()
    if x.size < 20:
        return True
    if np.var(x) < 1e-10:
        return True

    x_detrended = signal.detrend(x)

    try:
        max_lags = min(len(x_detrended) // 4, 40)
        if max_lags < 1:
            autocorr_strength = 0.0
        else:
            acf_vals = acf(x_detrended, nlags=max_lags, fft=True)[1:]
            autocorr_strength = float(np.max(np.abs(acf_vals)))
    except Exception:
        autocorr_strength = 0.0

    win_size = max(3, min(len(x) // 10, 15))
    signal_est = np.convolve(x, np.ones(win_size) / win_size, mode="valid")
    noise_est = x[win_size - 1 :] - signal_est
    var_signal = float(np.var(signal_est))
    var_noise = float(np.var(noise_est))
    snr_proxy = var_signal / var_noise if var_noise > 1e-8 else 1.0

    median_val = float(np.median(x_detrended))
    binary_seq = (x_detrended > median_val).astype(np.uint8)
    complexity_score = lempel_ziv_complexity(binary_seq)
    normalized_complexity = complexity_score / max(1, len(binary_seq))

    is_random_like = (snr_proxy < snr_threshold) and (
        normalized_complexity > complexity_threshold
    )
    is_uncorrelated = autocorr_strength < autocorr_threshold
    return bool(is_uncorrelated and is_random_like)