{ "architecture": "vit_small_patch16_224", "num_classes": 0, "num_features": 384, "global_pool": "token", "pretrained_cfg": { "tag": "dino", "custom_load": false, "input_size": [ 3, 224, 224 ], "fixed_input_size": true, "interpolation": "bicubic", "crop_pct": 0.9, "crop_mode": "center", "mean": [ 0.485, 0.456, 0.406 ], "std": [ 0.229, 0.224, 0.225 ], "num_classes": 0, "pool_size": null, "first_conv": "patch_embed.proj", "classifier": "head" }, "architectures": [ "ViTModel" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 384, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 1536, "layer_norm_eps": 1e-12, "model_type": "vit", "num_attention_heads": 6, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "torch_dtype": "float32", "transformers_version": "4.10.0.dev0" }