| { | |
| "architectures": ["BitMarModel"], | |
| "auto_map": { | |
| "AutoConfig": "modeling_bitmar.BitMarConfig", | |
| "AutoModel": "modeling_bitmar.BitMarModel" | |
| }, | |
| "model_type": "bitmar", | |
| "vocab_size": 50257, | |
| "text_encoder_dim": 128, | |
| "text_encoder_layers": 4, | |
| "text_encoder_heads": 4, | |
| "text_decoder_dim": 128, | |
| "text_decoder_layers": 4, | |
| "text_decoder_heads": 4, | |
| "vision_encoder_dim": 768, | |
| "vision_latent_size": 128, | |
| "vision_hidden_size": 64, | |
| "vision_compression_method": "learned_compression", | |
| "vision_spatial_pooling": true, | |
| "vision_pool_size": 2, | |
| "fusion_hidden_size": 128, | |
| "fusion_num_heads": 4, | |
| "fusion_num_layers": 2, | |
| "memory_alpha": 0.2, | |
| "direct_writing": true, | |
| "memory_compression": true, | |
| "max_seq_len": 256, | |
| "dropout": 0.15, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.36.0", | |
| "use_cache": true, | |
| "tie_word_embeddings": true, | |
| "initializer_range": 0.02, | |
| "layer_norm_epsilon": 1e-5, | |
| "pad_token_id": 50256, | |
| "bos_token_id": 50256, | |
| "eos_token_id": 50256, | |
| "sep_token_id": null, | |
| "decoder_start_token_id": null | |
| } |