gptbert-zho-500steps-base / configuration_gpt_bert.py
jumelet's picture
Add main & ema weights for zho
06615df verified
from transformers import PretrainedConfig
class GPTBertConfig(PretrainedConfig):
model_type = 'gpt_bert'
def __init__(self, **kwargs):
self.attention_probs_dropout_prob = kwargs.pop('attention_probs_dropout_prob', 0.1)
self.hidden_dropout_prob = kwargs.pop('hidden_dropout_prob', 0.1)
self.hidden_size = kwargs.pop('hidden_size', 768)
self.intermediate_size = kwargs.pop('intermediate_size', 2560)
self.max_position_embeddings = kwargs.pop('max_position_embeddings', 512)
self.position_bucket_size = kwargs.pop('position_bucket_size', 32)
self.num_attention_heads = kwargs.pop('num_attention_heads', 12)
self.num_hidden_layers = kwargs.pop('num_hidden_layers', 12)
self.vocab_size = kwargs.pop('vocab_size', 16384)
self.layer_norm_eps = kwargs.pop('layer_norm_eps', 1e-5)
self.auto_map = {
'AutoConfig': 'configuration_gpt_bert.GPTBertConfig',
'AutoModel': 'modeling_gpt_bert.GPTBertForMaskedLM',
'AutoModelForCausalLM': 'modeling_gpt_bert.GPTBertForCausalLM',
'AutoModelForMaskedLM': 'modeling_gpt_bert.GPTBertForMaskedLM',
'AutoModelForSequenceClassification': 'modeling_gpt_bert.GPTBertForSequenceClassification',
}
super().__init__(**kwargs)