| { | |
| "_class_name": "AutoencoderKLMagvit", | |
| "_diffusers_version": "0.30.1", | |
| "act_fn": "silu", | |
| "block_out_channels": [ | |
| 128, | |
| 256, | |
| 512, | |
| 512 | |
| ], | |
| "cache_compression_vae": false, | |
| "cache_mag_vae": true, | |
| "ch": 128, | |
| "ch_mult": [ | |
| 1, | |
| 2, | |
| 4, | |
| 4 | |
| ], | |
| "down_block_types": [ | |
| "SpatialDownBlock3D", | |
| "SpatialTemporalDownBlock3D", | |
| "SpatialTemporalDownBlock3D", | |
| "SpatialTemporalDownBlock3D" | |
| ], | |
| "force_upcast": true, | |
| "in_channels": 3, | |
| "latent_channels": 16, | |
| "layers_per_block": 2, | |
| "mid_block_attention_type": "spatial", | |
| "mid_block_num_attention_heads": 1, | |
| "mid_block_type": "MidBlock3D", | |
| "mid_block_use_attention": true, | |
| "mini_batch_decoder": 1, | |
| "mini_batch_encoder": 4, | |
| "norm_num_groups": 32, | |
| "num_attention_heads": 1, | |
| "out_channels": 3, | |
| "sample_size": 256, | |
| "scaling_factor": 0.7125, | |
| "slice_compression_vae": false, | |
| "slice_mag_vae": false, | |
| "spatial_group_norm": true, | |
| "tile_overlap_factor": 0.25, | |
| "tile_sample_min_size": 384, | |
| "up_block_types": [ | |
| "SpatialUpBlock3D", | |
| "SpatialTemporalUpBlock3D", | |
| "SpatialTemporalUpBlock3D", | |
| "SpatialTemporalUpBlock3D" | |
| ], | |
| "upcast_vae": false, | |
| "use_gc_blocks": null, | |
| "use_tiling": false, | |
| "use_tiling_decoder": false, | |
| "use_tiling_encoder": false | |
| } | |