File size: 2,463 Bytes
4600161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
{
  "agreement_threshold": 0.15,
  "best_eval_loss": 21.752999266554905,
  "eval_history_length": 4,
  "eval_metrics": {
    "avg_margin_violation": null,
    "contradiction_rate": null,
    "coverage_f1": 0.09190465965213657,
    "coverage_precision": 0.7157190635451505,
    "coverage_recall": 0.04910509407985315,
    "coverage_source": "logits",
    "coverage_support": 117723.0,
    "eval_loss": 70.75614915129924,
    "nli_pair_count": 0,
    "redundancy_index": 0.00012577735405615995,
    "redundancy_pair_count": 85866
  },
  "generated_at": "2025-12-07T15:14:33.566756+00:00",
  "global_step": 50000,
  "stage": 3,
  "train_history_length": 1100,
  "train_metrics": {
    "coverage_f1": {
      "count": 1000,
      "last": 0.0,
      "max": 0.5,
      "mean": 0.0034451770451770455,
      "min": 0.0
    },
    "coverage_precision": {
      "count": 1000,
      "last": 0.0,
      "max": 1.0,
      "mean": 0.0029482174688057043,
      "min": 0.0
    },
    "coverage_recall": {
      "count": 1000,
      "last": 0.0,
      "max": 1.0,
      "mean": 0.010866666666666667,
      "min": 0.0
    },
    "kd_ce_ratio": {
      "count": 1100,
      "last": 6.771889053472952e-07,
      "max": 1.993265237894144e-06,
      "mean": 2.7301640674022355e-07,
      "min": -2.044742390260858e-06
    },
    "loss": {
      "count": 1100,
      "last": 432.0,
      "max": 604.0336303710938,
      "mean": 405.4652520197088,
      "min": 216.2018280029297
    },
    "repair_error_rate": {
      "count": 3,
      "last": 0.0,
      "max": 0.0,
      "mean": 0.0,
      "min": 0.0
    },
    "repair_margin": {
      "count": 3,
      "last": 224.0,
      "max": 294.0,
      "mean": 261.3333333333333,
      "min": 224.0
    },
    "rollback_kl": {
      "count": 3,
      "last": 0.0,
      "max": 0.0,
      "mean": 0.0,
      "min": 0.0
    },
    "stability_error_rate": {
      "count": 3,
      "last": 0.0,
      "max": 0.0,
      "mean": 0.0,
      "min": 0.0
    },
    "stability_kl": {
      "count": 3,
      "last": 0.0,
      "max": 2.837623469531536e-09,
      "mean": -1.8531864043325186e-07,
      "min": -5.587935447692871e-07
    },
    "stability_margin": {
      "count": 3,
      "last": 235.0,
      "max": 258.0,
      "mean": 246.33333333333334,
      "min": 235.0
    },
    "stage": {
      "last": 3.0
    },
    "usage_loss": {
      "count": 1100,
      "last": 0.0,
      "max": 0.0,
      "mean": 0.0,
      "min": 0.0
    }
  }
}