gptbert-sun-100steps-small / configuration_gpt_bert.py
jumelet's picture
Add main & ema weights for sun
198e885 verified
from transformers import PretrainedConfig
class GPTBertConfig(PretrainedConfig):
model_type = 'gpt_bert'
def __init__(self, **kwargs):
self.attention_probs_dropout_prob = kwargs.pop('attention_probs_dropout_prob', 0.1)
self.hidden_dropout_prob = kwargs.pop('hidden_dropout_prob', 0.1)
self.hidden_size = kwargs.pop('hidden_size', 768)
self.intermediate_size = kwargs.pop('intermediate_size', 2560)
self.max_position_embeddings = kwargs.pop('max_position_embeddings', 512)
self.position_bucket_size = kwargs.pop('position_bucket_size', 32)
self.num_attention_heads = kwargs.pop('num_attention_heads', 12)
self.num_hidden_layers = kwargs.pop('num_hidden_layers', 12)
self.vocab_size = kwargs.pop('vocab_size', 16384)
self.layer_norm_eps = kwargs.pop('layer_norm_eps', 1e-5)
self.force_causal_mask = kwargs.pop('force_causal_mask', True)
self.classifier_dropout = kwargs.pop('classifier_dropout', 0.1)
self.classifier_layer_norm_eps = kwargs.pop('classifier_layer_norm_eps', 1e-05)
self.num_labels = kwargs.pop('num_labels', 2)
self.problem_type = kwargs.pop('problem_type', None)
self.auto_map = {
'AutoConfig': 'configuration_gpt_bert.GPTBertConfig',
'AutoModel': 'modeling_gpt_bert.GPTBertForMaskedLM',
'AutoModelForCausalLM': 'modeling_gpt_bert.GPTBertForCausalLM',
'AutoModelForMaskedLM': 'modeling_gpt_bert.GPTBertForMaskedLM',
'AutoModelForSequenceClassification': 'modeling_gpt_bert.GPTBertForSequenceClassification',
}
super().__init__(**kwargs)