Aratako commited on
Commit
77d74df
·
verified ·
1 Parent(s): 65b0774

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ scatter.png filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForOrdinalAndRegression"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoModelForSequenceClassification": "modeling_modernbert_reward.ModernBertForOrdinalAndRegression"
9
+ },
10
+ "blend": 0.66,
11
+ "bos_token_id": 1,
12
+ "classifier_activation": "gelu",
13
+ "classifier_bias": false,
14
+ "classifier_dropout": 0.0,
15
+ "classifier_pooling": "mean",
16
+ "cls_token_id": 6,
17
+ "decoder_bias": true,
18
+ "deterministic_flash_attn": false,
19
+ "dtype": "float32",
20
+ "embedding_dropout": 0.0,
21
+ "eos_token_id": 2,
22
+ "gamma": 0.025,
23
+ "global_attn_every_n_layers": 3,
24
+ "global_rope_theta": 160000.0,
25
+ "gradient_checkpointing": false,
26
+ "hidden_activation": "gelu",
27
+ "hidden_size": 256,
28
+ "id2label": {
29
+ "0": "LABEL_0",
30
+ "1": "LABEL_1",
31
+ "2": "LABEL_2",
32
+ "3": "LABEL_3",
33
+ "4": "LABEL_4",
34
+ "5": "LABEL_5",
35
+ "6": "LABEL_6",
36
+ "7": "LABEL_7",
37
+ "8": "LABEL_8",
38
+ "9": "LABEL_9",
39
+ "10": "LABEL_10",
40
+ "11": "LABEL_11",
41
+ "12": "LABEL_12",
42
+ "13": "LABEL_13",
43
+ "14": "LABEL_14",
44
+ "15": "LABEL_15",
45
+ "16": "LABEL_16",
46
+ "17": "LABEL_17",
47
+ "18": "LABEL_18",
48
+ "19": "LABEL_19",
49
+ "20": "LABEL_20",
50
+ "21": "LABEL_21",
51
+ "22": "LABEL_22",
52
+ "23": "LABEL_23",
53
+ "24": "LABEL_24",
54
+ "25": "LABEL_25",
55
+ "26": "LABEL_26",
56
+ "27": "LABEL_27",
57
+ "28": "LABEL_28",
58
+ "29": "LABEL_29",
59
+ "30": "LABEL_30",
60
+ "31": "LABEL_31",
61
+ "32": "LABEL_32",
62
+ "33": "LABEL_33",
63
+ "34": "LABEL_34",
64
+ "35": "LABEL_35",
65
+ "36": "LABEL_36",
66
+ "37": "LABEL_37",
67
+ "38": "LABEL_38",
68
+ "39": "LABEL_39",
69
+ "40": "LABEL_40",
70
+ "41": "LABEL_41",
71
+ "42": "LABEL_42",
72
+ "43": "LABEL_43",
73
+ "44": "LABEL_44",
74
+ "45": "LABEL_45",
75
+ "46": "LABEL_46",
76
+ "47": "LABEL_47",
77
+ "48": "LABEL_48",
78
+ "49": "LABEL_49",
79
+ "50": "LABEL_50"
80
+ },
81
+ "initializer_cutoff_factor": 2.0,
82
+ "initializer_range": 0.02,
83
+ "intermediate_size": 1024,
84
+ "label2id": {
85
+ "LABEL_0": 0,
86
+ "LABEL_1": 1,
87
+ "LABEL_10": 10,
88
+ "LABEL_11": 11,
89
+ "LABEL_12": 12,
90
+ "LABEL_13": 13,
91
+ "LABEL_14": 14,
92
+ "LABEL_15": 15,
93
+ "LABEL_16": 16,
94
+ "LABEL_17": 17,
95
+ "LABEL_18": 18,
96
+ "LABEL_19": 19,
97
+ "LABEL_2": 2,
98
+ "LABEL_20": 20,
99
+ "LABEL_21": 21,
100
+ "LABEL_22": 22,
101
+ "LABEL_23": 23,
102
+ "LABEL_24": 24,
103
+ "LABEL_25": 25,
104
+ "LABEL_26": 26,
105
+ "LABEL_27": 27,
106
+ "LABEL_28": 28,
107
+ "LABEL_29": 29,
108
+ "LABEL_3": 3,
109
+ "LABEL_30": 30,
110
+ "LABEL_31": 31,
111
+ "LABEL_32": 32,
112
+ "LABEL_33": 33,
113
+ "LABEL_34": 34,
114
+ "LABEL_35": 35,
115
+ "LABEL_36": 36,
116
+ "LABEL_37": 37,
117
+ "LABEL_38": 38,
118
+ "LABEL_39": 39,
119
+ "LABEL_4": 4,
120
+ "LABEL_40": 40,
121
+ "LABEL_41": 41,
122
+ "LABEL_42": 42,
123
+ "LABEL_43": 43,
124
+ "LABEL_44": 44,
125
+ "LABEL_45": 45,
126
+ "LABEL_46": 46,
127
+ "LABEL_47": 47,
128
+ "LABEL_48": 48,
129
+ "LABEL_49": 49,
130
+ "LABEL_5": 5,
131
+ "LABEL_50": 50,
132
+ "LABEL_6": 6,
133
+ "LABEL_7": 7,
134
+ "LABEL_8": 8,
135
+ "LABEL_9": 9
136
+ },
137
+ "lambda_reg": 0.075,
138
+ "layer_norm_eps": 1e-05,
139
+ "local_attention": 128,
140
+ "local_rope_theta": 10000.0,
141
+ "max_position_embeddings": 8192,
142
+ "mlp_bias": false,
143
+ "mlp_dropout": 0.0,
144
+ "model_type": "modernbert",
145
+ "norm_bias": false,
146
+ "norm_eps": 1e-05,
147
+ "num_attention_heads": 4,
148
+ "num_hidden_layers": 10,
149
+ "pad_token_id": 3,
150
+ "position_embedding_type": "rope",
151
+ "problem_type": "regression",
152
+ "reg_eps": 0.0001,
153
+ "reg_temperature": 1.0,
154
+ "repad_logits_with_grad": false,
155
+ "score_max": 10.0,
156
+ "score_min": 0.0,
157
+ "sep_token_id": 4,
158
+ "sparse_pred_ignore_index": -100,
159
+ "sparse_prediction": false,
160
+ "transformers_version": "4.56.2",
161
+ "vocab_size": 102400
162
+ }
errors_hist.png ADDED
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6e302b976075219ecaa4b5d38f3feb6c79c002f42ced39ee79bbcb7e583575
3
+ size 147094428
modeling_modernbert_reward.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modeling_modernbert_reward.py
2
+ from typing import Optional, Union, Tuple
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from transformers.modeling_outputs import SequenceClassifierOutput
7
+ from transformers import ModernBertPreTrainedModel
8
+ from transformers.models.modernbert.modeling_modernbert import (
9
+ ModernBertModel, ModernBertPredictionHead
10
+ )
11
+ import math
12
+
13
+ class ModernBertForOrdinalAndRegression(ModernBertPreTrainedModel):
14
+ """
15
+ ModernBERT 本体の上に CORAL(順序) + 回帰ヘッドを載せる多目的報酬器。
16
+ - config.num_labels = K (例: 51 → 0.2刻み)
17
+ - 学習: L = L_ordinal + lambda_reg * L_regression (両方に sample_weight を掛ける)
18
+ - 推論: ord/reg のアンサンブル(blend)
19
+ """
20
+ def __init__(self, config):
21
+ super().__init__(config)
22
+ self.config = config
23
+ self.model = ModernBertModel(config)
24
+ self.head = ModernBertPredictionHead(config)
25
+ self.drop = nn.Dropout(config.classifier_dropout)
26
+
27
+ self.num_bins = int(getattr(config, "num_labels", 51))
28
+ self.lambda_reg = float(getattr(config, "lambda_reg", 0.3))
29
+ self.reg_temperature = float(getattr(config, "reg_temperature", 1.0))
30
+ self.reg_eps = float(getattr(config, "reg_eps", 1e-4))
31
+ self.gamma = float(getattr(config, "gamma", 0.05))
32
+ self.blend = float(getattr(config, "blend", 0.5))
33
+ self.score_min = float(getattr(config, "score_min", 0.0))
34
+ self.score_max = float(getattr(config, "score_max", 10.0))
35
+
36
+ # CORAL: 共通重み + 単調しきい値
37
+ self.coral_fc = nn.Linear(config.hidden_size, 1, bias=False)
38
+ self.coral_bias_raw = nn.Parameter(torch.zeros(self.num_bins - 1))
39
+
40
+ # 回帰ヘッド
41
+ self.reg_head = nn.Linear(config.hidden_size, 1)
42
+
43
+ self.config.problem_type = "regression"
44
+
45
+ self.post_init()
46
+
47
+ def _init_weights(self, module: nn.Module):
48
+ super()._init_weights(module)
49
+
50
+ cutoff_factor = self.config.initializer_cutoff_factor
51
+ if cutoff_factor is None:
52
+ cutoff_factor = 3
53
+
54
+ def init_weight(module: nn.Module, std: float):
55
+ nn.init.trunc_normal_(
56
+ module.weight,
57
+ mean=0.0,
58
+ std=std,
59
+ a=-cutoff_factor * std,
60
+ b=cutoff_factor * std,
61
+ )
62
+
63
+ if isinstance(module, nn.Linear):
64
+ if module.bias is not None:
65
+ nn.init.zeros_(module.bias)
66
+
67
+ stds = {
68
+ "in": self.config.initializer_range,
69
+ "out": self.config.initializer_range / math.sqrt(2.0 * self.config.num_hidden_layers),
70
+ "embedding": self.config.initializer_range,
71
+ "final_out": self.config.hidden_size**-0.5,
72
+ }
73
+ if isinstance(module, ModernBertForOrdinalAndRegression):
74
+ init_weight(module.coral_fc, stds["final_out"])
75
+ init_weight(module.reg_head, stds["final_out"])
76
+ module.coral_bias_raw.zero_()
77
+
78
+
79
+ def _thresholds(self) -> torch.Tensor:
80
+ # softplus で正の差分 → 累積で単調に
81
+ return torch.cumsum(F.softplus(self.coral_bias_raw), dim=0)
82
+
83
+ def _pool(self, last_hidden, attention_mask) -> torch.Tensor:
84
+ pooling = getattr(self.config, "classifier_pooling", "cls")
85
+ if pooling == "mean":
86
+ mask = attention_mask.unsqueeze(-1).to(last_hidden.dtype)
87
+ return (last_hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1e-6)
88
+ return last_hidden[:, 0] # "cls"
89
+
90
+ def forward(
91
+ self,
92
+ input_ids: Optional[torch.LongTensor] = None,
93
+ attention_mask: Optional[torch.Tensor] = None,
94
+ sliding_window_mask: Optional[torch.Tensor] = None,
95
+ position_ids: Optional[torch.Tensor] = None,
96
+ inputs_embeds: Optional[torch.Tensor] = None,
97
+ labels: Optional[torch.Tensor] = None, # 未使用
98
+ labels_cont: Optional[torch.Tensor] = None, # [B] 0..10
99
+ labels_bin: Optional[torch.Tensor] = None, # [B] 0..K-1
100
+ sample_weight: Optional[torch.Tensor] = None, # [B]
101
+ indices: Optional[torch.Tensor] = None,
102
+ cu_seqlens: Optional[torch.Tensor] = None,
103
+ max_seqlen: Optional[int] = None,
104
+ batch_size: Optional[int] = None,
105
+ seq_len: Optional[int] = None,
106
+ output_attentions: Optional[bool] = None,
107
+ output_hidden_states: Optional[bool] = None,
108
+ return_dict: Optional[bool] = None,
109
+ **kwargs,
110
+ ) -> Union[Tuple, SequenceClassifierOutput]:
111
+
112
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
113
+
114
+ outputs = self.model(
115
+ input_ids=input_ids,
116
+ attention_mask=attention_mask,
117
+ sliding_window_mask=sliding_window_mask,
118
+ position_ids=position_ids,
119
+ inputs_embeds=inputs_embeds,
120
+ indices=indices,
121
+ cu_seqlens=cu_seqlens,
122
+ max_seqlen=max_seqlen,
123
+ batch_size=batch_size,
124
+ seq_len=seq_len,
125
+ output_attentions=output_attentions,
126
+ output_hidden_states=output_hidden_states,
127
+ return_dict=True,
128
+ )
129
+ last_hidden = outputs.last_hidden_state
130
+ pooled = self.head(self._pool(last_hidden, attention_mask))
131
+ pooled = self.drop(pooled)
132
+
133
+ # ----- Ordinal (CORAL) -----
134
+ z = self.coral_fc(pooled).squeeze(-1) # [B]
135
+ th = self._thresholds() # [K-1]
136
+ logits_ord = z.unsqueeze(-1) - th.unsqueeze(0) # [B,K-1]
137
+ p_gt = torch.sigmoid(logits_ord)
138
+
139
+ ones = torch.ones(p_gt.size(0), 1, device=p_gt.device, dtype=p_gt.dtype)
140
+ zeros = torch.zeros(p_gt.size(0), 1, device=p_gt.device, dtype=p_gt.dtype)
141
+ p_left = torch.cat([ones, p_gt], dim=1)
142
+ p_right = torch.cat([p_gt, zeros], dim=1)
143
+ p_cls = (p_left - p_right).clamp_min(0.0) # [B,K]
144
+ bins = torch.arange(self.num_bins, device=p_gt.device, dtype=p_gt.dtype).unsqueeze(0)
145
+ expected_bin = (p_cls * bins).sum(dim=-1) # [B]
146
+ score_ord = self.score_min + (self.score_max - self.score_min) * (expected_bin / (self.num_bins - 1))
147
+
148
+ # ----- Regression -----
149
+ reg_raw = self.reg_head(pooled).squeeze(-1) # [B]
150
+ p = torch.sigmoid(reg_raw / self.reg_temperature)
151
+ p = p.clamp(self.reg_eps, 1.0 - self.reg_eps)
152
+
153
+ score_reg = self.score_min + (self.score_max - self.score_min) * p # [B]
154
+
155
+ # ----- Blend(最終スコア)-----
156
+ score = (1.0 - self.blend) * score_reg + self.blend * score_ord # [B]
157
+ logits = score.unsqueeze(-1) # [B,1] 0..10
158
+
159
+ # ----- Loss -----
160
+ loss = None
161
+ if (labels_cont is not None) or (labels_bin is not None):
162
+ if sample_weight is None:
163
+ sample_weight = torch.ones_like(score)
164
+ sw = sample_weight.to(score.device).float()
165
+ sw = sw / (sw.mean() + 1e-12)
166
+
167
+ loss_total = 0.0
168
+
169
+ if labels_bin is not None:
170
+ # CORAL loss
171
+ y = labels_bin.to(logits_ord.device).long()
172
+ Km1 = self.num_bins - 1
173
+ thr = torch.arange(Km1, device=y.device).unsqueeze(0)
174
+ target_ord = (y.unsqueeze(1) > thr).float() # [B,K-1]
175
+ bce = F.binary_cross_entropy_with_logits(logits_ord, target_ord, reduction="none").mean(dim=-1)
176
+ loss_ord = (bce * sw).sum() / sw.sum()
177
+ loss_total = loss_total + loss_ord
178
+
179
+ if labels_cont is not None and self.lambda_reg > 0.0:
180
+ # Huber loss
181
+ y_cont = labels_cont.to(score.device).float().clamp(self.score_min, self.score_max)
182
+ pt = (y_cont - self.score_min) / (self.score_max - self.score_min)
183
+ pt = pt.clamp(self.reg_eps, 1.0 - self.reg_eps)
184
+ t = torch.log(pt) - torch.log1p(-pt)
185
+ t = self.reg_temperature * t
186
+ huber = F.smooth_l1_loss(reg_raw, t, reduction="none")
187
+ loss_reg = (huber * sw).sum() / sw.sum()
188
+ loss_total = loss_total + self.lambda_reg * loss_reg
189
+ if self.gamma > 0:
190
+ loss_total += self.gamma * (F.smooth_l1_loss(score, y_cont, reduction="none") * sw).sum() / sw.sum()
191
+
192
+ loss = loss_total
193
+
194
+ if not return_dict:
195
+ out = (logits,)
196
+ return ((loss,) + out) if loss is not None else out
197
+
198
+ return SequenceClassifierOutput(
199
+ loss=loss,
200
+ logits=logits,
201
+ hidden_states=outputs.hidden_states,
202
+ attentions=outputs.attentions,
203
+ )
scatter.png ADDED

Git LFS Details

  • SHA256: cf5df0d73a505aea096f126f7a7c349a3f3aee8ac1f86baf5bfc0aa381444855
  • Pointer size: 131 Bytes
  • Size of remote file: 478 kB
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<cls>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "<sep>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277
3
+ size 1831879
tokenizer_config.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_dummy_prefix_space": false,
4
+ "add_eos_token": true,
5
+ "add_prefix_space": false,
6
+ "added_tokens_decoder": {
7
+ "0": {
8
+ "content": "<unk>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false,
13
+ "special": true
14
+ },
15
+ "1": {
16
+ "content": "<s>",
17
+ "lstrip": false,
18
+ "normalized": false,
19
+ "rstrip": false,
20
+ "single_word": false,
21
+ "special": true
22
+ },
23
+ "2": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false,
29
+ "special": true
30
+ },
31
+ "3": {
32
+ "content": "<pad>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false,
37
+ "special": true
38
+ },
39
+ "4": {
40
+ "content": "<sep>",
41
+ "lstrip": false,
42
+ "normalized": false,
43
+ "rstrip": false,
44
+ "single_word": false,
45
+ "special": true
46
+ },
47
+ "5": {
48
+ "content": "<mask>",
49
+ "lstrip": false,
50
+ "normalized": false,
51
+ "rstrip": false,
52
+ "single_word": false,
53
+ "special": true
54
+ },
55
+ "6": {
56
+ "content": "<cls>",
57
+ "lstrip": false,
58
+ "normalized": false,
59
+ "rstrip": false,
60
+ "single_word": false,
61
+ "special": true
62
+ },
63
+ "7": {
64
+ "content": "<|system|>",
65
+ "lstrip": false,
66
+ "normalized": false,
67
+ "rstrip": false,
68
+ "single_word": false,
69
+ "special": false
70
+ },
71
+ "8": {
72
+ "content": "<|assistant|>",
73
+ "lstrip": false,
74
+ "normalized": false,
75
+ "rstrip": false,
76
+ "single_word": false,
77
+ "special": false
78
+ },
79
+ "9": {
80
+ "content": "<|user|>",
81
+ "lstrip": false,
82
+ "normalized": false,
83
+ "rstrip": false,
84
+ "single_word": false,
85
+ "special": false
86
+ },
87
+ "10": {
88
+ "content": "<|available_tools|>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false,
93
+ "special": false
94
+ },
95
+ "11": {
96
+ "content": "<|tool_calls|>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false,
101
+ "special": false
102
+ },
103
+ "12": {
104
+ "content": "<|tool_results|>",
105
+ "lstrip": false,
106
+ "normalized": false,
107
+ "rstrip": false,
108
+ "single_word": false,
109
+ "special": false
110
+ },
111
+ "13": {
112
+ "content": "<|code|>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false,
117
+ "special": false
118
+ },
119
+ "14": {
120
+ "content": "<|file|>",
121
+ "lstrip": false,
122
+ "normalized": false,
123
+ "rstrip": false,
124
+ "single_word": false,
125
+ "special": false
126
+ },
127
+ "102397": {
128
+ "content": "<|prefix|>",
129
+ "lstrip": false,
130
+ "normalized": false,
131
+ "rstrip": false,
132
+ "single_word": false,
133
+ "special": false
134
+ },
135
+ "102398": {
136
+ "content": "<|suffix|>",
137
+ "lstrip": false,
138
+ "normalized": false,
139
+ "rstrip": false,
140
+ "single_word": false,
141
+ "special": false
142
+ },
143
+ "102399": {
144
+ "content": "<|middle|>",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false,
149
+ "special": false
150
+ }
151
+ },
152
+ "bos_token": "<s>",
153
+ "clean_up_tokenization_spaces": false,
154
+ "cls_token": "<cls>",
155
+ "do_lower_case": false,
156
+ "eos_token": "</s>",
157
+ "extra_ids": 0,
158
+ "extra_special_tokens": {},
159
+ "keep_accents": true,
160
+ "legacy": false,
161
+ "mask_token": "<mask>",
162
+ "model_max_length": 8192,
163
+ "pad_token": "<pad>",
164
+ "padding_side": "right",
165
+ "sep_token": "<sep>",
166
+ "sp_model_kwargs": {},
167
+ "spaces_between_special_tokens": false,
168
+ "tokenizer_class": "LlamaTokenizer",
169
+ "unk_token": "<unk>",
170
+ "use_default_system_prompt": false
171
+ }