tkhangg0910 commited on
Commit
df0983e
·
verified ·
1 Parent(s): ff861f0

Upload folder using huggingface_hub

Browse files
__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .configuration_viconbert import ViConBERTConfig
2
+ from .modeling_viconbert import ViConBERT
bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "viconbert",
3
+ "base_model": "vinai/phobert-base",
4
+ "base_model_cache_dir": "embeddings/vinai/phobert-base",
5
+ "hidden_dim": 512,
6
+ "out_dim": 768,
7
+ "dropout": 0.3,
8
+ "num_layers": 1,
9
+ "num_head": 3,
10
+ "encoder_type": "attentive",
11
+ "context_window_size": 3
12
+ }
configuration_viconbert.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class ViConBERTConfig(PretrainedConfig):
4
+ model_type = "viconbert"
5
+
6
+ def __init__(
7
+ self,
8
+ base_model="vinai/phobert-base",
9
+ base_model_cache_dir="embeddings/base_models",
10
+ hidden_dim=512,
11
+ out_dim=768,
12
+ dropout=0.1,
13
+ num_layers=1,
14
+ num_head=3,
15
+ encoder_type="attentive",
16
+ context_window_size=3,
17
+ **kwargs
18
+ ):
19
+ super().__init__(**kwargs)
20
+ self.base_model = base_model
21
+ self.base_model_cache_dir = base_model_cache_dir
22
+ self.hidden_dim = hidden_dim
23
+ self.out_dim = out_dim
24
+ self.dropout = dropout
25
+ self.num_layers = num_layers
26
+ self.num_head = num_head
27
+ self.encoder_type = encoder_type
28
+ self.context_window_size = context_window_size
modeling_viconbert.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import PreTrainedModel, AutoModel
4
+ from .configuration_viconbert import ViConBERTConfig
5
+
6
+
7
+ class MLPBlock(nn.Module):
8
+ def __init__(self, input_dim, hidden_dim, output_dim,
9
+ num_layers=2, dropout=0.3, activation=nn.GELU, use_residual=True):
10
+ super().__init__()
11
+ self.use_residual = use_residual
12
+ self.activation_fn = activation()
13
+
14
+ self.input_layer = nn.Linear(input_dim, hidden_dim)
15
+ self.hidden_layers = nn.ModuleList()
16
+ self.norms = nn.ModuleList()
17
+ self.dropouts = nn.ModuleList()
18
+ for _ in range(num_layers):
19
+ self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
20
+ self.norms.append(nn.LayerNorm(hidden_dim))
21
+ self.dropouts.append(nn.Dropout(dropout))
22
+ self.output_layer = nn.Linear(hidden_dim, output_dim)
23
+
24
+ def forward(self, x):
25
+ x = self.input_layer(x)
26
+ for layer, norm, dropout in zip(self.hidden_layers, self.norms, self.dropouts):
27
+ residual = x
28
+ x = layer(x)
29
+ x = norm(x)
30
+ x = dropout(x)
31
+ x = self.activation_fn(x)
32
+ if self.use_residual:
33
+ x = x + residual
34
+ x = self.output_layer(x)
35
+ return x
36
+
37
+
38
+ class ViConBERT(PreTrainedModel):
39
+ config_class = ViConBERTConfig
40
+
41
+ def __init__(self, config):
42
+ super().__init__(config)
43
+ self.context_encoder = AutoModel.from_pretrained(
44
+ config.base_model, cache_dir=config.base_model_cache_dir
45
+ )
46
+ self.context_projection = MLPBlock(
47
+ self.context_encoder.config.hidden_size,
48
+ config.hidden_dim,
49
+ config.out_dim,
50
+ dropout=config.dropout,
51
+ num_layers=config.num_layers
52
+ )
53
+ self.context_attention = nn.MultiheadAttention(
54
+ self.context_encoder.config.hidden_size,
55
+ num_heads=config.num_head,
56
+ dropout=config.dropout
57
+ )
58
+ self.context_window_size = config.context_window_size
59
+ self.context_layer_weights = nn.Parameter(
60
+ torch.zeros(self.context_encoder.config.num_hidden_layers)
61
+ )
62
+ self.post_init()
63
+
64
+ def _encode_context_attentive(self, text, target_span):
65
+ outputs = self.context_encoder(**text)
66
+ hidden_states = outputs[0]
67
+ start_pos, end_pos = target_span[:, 0], target_span[:, 1]
68
+
69
+ positions = torch.arange(hidden_states.size(1), device=hidden_states.device)
70
+ mask = (positions >= start_pos.unsqueeze(1)) & (positions <= end_pos.unsqueeze(1))
71
+ masked_states = hidden_states * mask.unsqueeze(-1)
72
+ span_lengths = mask.sum(dim=1, keepdim=True).clamp(min=1)
73
+ pooled_embeddings = masked_states.sum(dim=1) / span_lengths
74
+
75
+ Q_value = pooled_embeddings.unsqueeze(0)
76
+ KV_value = hidden_states.permute(1, 0, 2)
77
+ context_emb, _ = self.context_attention(Q_value, KV_value, KV_value)
78
+ return context_emb
79
+
80
+ def forward(self, context, target_span):
81
+ context_emb = self._encode_context_attentive(context, target_span)
82
+ return self.context_projection(context_emb.squeeze(0))
83
+
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fabd7f95c3cff17df969a861f6fec555cd54a3b340a3b208bfe3a91f7016d22
3
+ size 560246835
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "mask_token": "<mask>",
7
+ "model_max_length": 256,
8
+ "pad_token": "<pad>",
9
+ "sep_token": "</s>",
10
+ "tokenizer_class": "PhobertTokenizer",
11
+ "unk_token": "<unk>"
12
+ }
training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b63e158f11ea10dcac42c00968fc9d028c8800626b6c4b2847a8b46a61123d
3
+ size 1115763304
vocab.txt ADDED
The diff for this file is too large to render. See raw diff