init

Browse files

Files changed (9) hide show

can_bus_embedder/can_bus_embedder_model.bin +3 -0
controlnet/config.json +83 -0
controlnet/diffusion_pytorch_model.bin +3 -0
hydra/config.yaml +463 -0
hydra/hydra.yaml +164 -0
hydra/overrides.yaml +4 -0
scene_embedder/scene_embedder_model.bin +3 -0
unet/config.json +108 -0
unet/diffusion_pytorch_model.bin +3 -0

can_bus_embedder/can_bus_embedder_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f78509a6b1dba2bbf3c094d2705c9ddcb2bcaff1ecf9ef0c4f1d1a78afa7678
+size 1206295

controlnet/config.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "_class_name": "BEVControlNetModel",
+  "_diffusers_version": "0.17.1",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "bbox_embedder_cls": "dreamforge.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding",
+  "bbox_embedder_param": {
+    "class_token_dim": 768,
+    "embedder_num_freq": 4,
+    "minmax_normalize": false,
+    "mode": "all-xyz",
+    "n_classes": 10,
+    "proj_dims": [
+      768,
+      512,
+      512,
+      768
+    ],
+    "trainable_class_token": false,
+    "use_text_encoder_init": true
+  },
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "cam_embedder_param": {
+    "include_input": true,
+    "input_dims": 3,
+    "log_sampling": true,
+    "num_freqs": 4
+  },
+  "camera_in_dim": 189,
+  "camera_out_dim": 768,
+  "canvas_conditioning_channels": 14,
+  "class_embed_type": null,
+  "conditioning_embedding_out_channels": [
+    16,
+    32,
+    96,
+    256
+  ],
+  "controlnet_conditioning_channel_order": "rgb",
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "drop_cam_num": 6,
+  "drop_cam_with_box": false,
+  "drop_cond_ratio": 0.25,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "global_pool_conditions": false,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "map_embedder_cls": null,
+  "map_embedder_param": null,
+  "map_size": [
+    4,
+    200,
+    200
+  ],
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "projection_class_embeddings_input_dim": null,
+  "resnet_time_scale_shift": "default",
+  "uncond_cam_in_dim": [
+    3,
+    7
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": false,
+  "use_uncond_map": null,
+  "with_layout_canvas": true
+}

controlnet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34524c766706dcac5a2316f8cd2b3b89dd85739b8823be3c5b19144f6223c4e3
+size 1456676573

hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,463 @@

+task_id: 2.0t_0.3.3_continue
+log_root_prefix: ./work_dirs/dreamforge-t-log
+projname: ${model.name}
+try_run: false
+debug: false
+log_root: ???
+init_method: env://
+seed: 42
+fix_seed_within_batch: false
+resume_from_checkpoint: ./work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-08-30_19-09_2.0t_0.3.3/checkpoint-150000/
+resume_reset_scheduler: false
+validation_only: false
+model:
+  name: SDv1.5mv-rawbox-t
+  pretrained_model_name_or_path: ./pretrained/stable-diffusion-v1-5/
+  bbox_mode: all-xyz
+  bbox_view_shared: false
+  crossview_attn_type: basic
+  train_with_same_noise: false
+  train_with_same_t: true
+  runner_module: dreamforge.runner.multiview_t_runner.MultiviewTRunner
+  pipe_module: dreamforge.pipeline.pipeline_bev_controlnet_t.StableDiffusionBEVControlNetTPipeline
+  unet_module: dreamforge.networks.unet_2d_condition_multiview_st.UNet2DConditionModelMultiviewSceneT
+  use_fp32_for_unet_trainable: true
+  unet_dir: unet
+  unet:
+    trainable_state: only_new
+    neighboring_view_pair: ${dataset.neighboring_view_pair}
+    neighboring_attn_type: add
+    zero_module_type: zero_linear
+    crossview_attn_type: ${..crossview_attn_type}
+    img_size: ${dataset.image_size}
+    video_length: ${..video_length}
+    temp_pos_emb: learnable
+    zero_module_type2: none
+    spatial_trainable: true
+    with_ref: true
+    ref_length: ${..ref_length}
+    with_can_bus: true
+    with_motion: true
+    transformer_type: _ff_last
+  model_module: dreamforge.networks.unet_addon_rawbox.BEVControlNetModel
+  controlnet_dir: controlnet
+  controlnet:
+    camera_in_dim: 189
+    camera_out_dim: 768
+    map_size:
+    - 4
+    - 200
+    - 200
+    conditioning_embedding_out_channels:
+    - 16
+    - 32
+    - 96
+    - 256
+    uncond_cam_in_dim:
+    - 3
+    - 7
+    use_uncond_map: null
+    drop_cond_ratio: 0.25
+    drop_cam_num: 6
+    drop_cam_with_box: false
+    cam_embedder_param:
+      input_dims: 3
+      num_freqs: 4
+      include_input: true
+      log_sampling: true
+    bbox_embedder_cls: dreamforge.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
+    bbox_embedder_param:
+      n_classes: 10
+      class_token_dim: 768
+      trainable_class_token: false
+      use_text_encoder_init: true
+      embedder_num_freq: 4
+      proj_dims:
+      - 768
+      - 512
+      - 512
+      - 768
+      mode: ${...bbox_mode}
+      minmax_normalize: false
+    with_layout_canvas: true
+    canvas_conditioning_channels: 14
+  load_pretrain_from: null
+  allow_partial_load: false
+  pretrained_dreamforge: ./pretrained/dreamforge-s
+  train_with_same_noise_t: false
+  video_length: 7
+  ref_length: 2
+  sc_attn_index:
+  - - 0
+    - 6
+    - 0
+  - - 0
+    - 6
+    - 0
+  - - 0
+    - 6
+    - 1
+  - - 0
+    - 6
+    - 2
+  - - 0
+    - 6
+    - 3
+  - - 0
+    - 6
+    - 4
+  - - 0
+    - 6
+    - 5
+  scene_embedder_cls: dreamforge.networks.scene_position_embedder.ScenePositionEmbedding
+  scene_embedder_dir: scene_embedder
+  scene_embedder:
+    embed_dims: 320
+    LID: false
+  can_bus_embedder_cls: dreamforge.networks.can_bus_embedder.CanbusEmbedding
+  can_bus_embedder_dir: can_bus_embedder
+  can_bus_embedder:
+    embed_dims: 768
+    input_channels: 9
+    can_bus_norm: true
+  fix_controlnet: true
+dataset:
+  dataset_type: NuScenesMapDataset
+  dataset_root: ./data/nuscenes
+  dataset_process_root: ./data/nuscenes_mmdet3d-12Hz_description/
+  dataset_cache_file_tag: 8x200x200_12Hz_interp
+  dataset_cache_dirname: nuscenes_map_aux_12Hz_interp
+  dataset_cache_file:
+  - ${..dataset_process_root}../${..dataset_cache_dirname}/train_${..dataset_cache_file_tag}.h5
+  - ${..dataset_process_root}../${..dataset_cache_dirname}/val_${..dataset_cache_file_tag}.h5
+  template: A driving scene image at {location}. {description}.
+  collect_meta_keys:
+  - camera_intrinsics
+  - lidar2ego
+  - lidar2camera
+  - camera2lidar
+  - lidar2image
+  - img_aug_matrix
+  - camera2ego
+  - ego2global
+  collect_meta_lis_keys:
+  - timeofday
+  - location
+  - description
+  - filename
+  - token
+  - ori_shape
+  image_size:
+  - 224
+  - 400
+  map_bound:
+    x:
+    - -50.0
+    - 50.0
+    - 0.5
+    'y':
+    - -50.0
+    - 50.0
+    - 0.5
+  view_order:
+  - CAM_FRONT_LEFT
+  - CAM_FRONT
+  - CAM_FRONT_RIGHT
+  - CAM_BACK_RIGHT
+  - CAM_BACK
+  - CAM_BACK_LEFT
+  neighboring_view_pair:
+    0:
+    - 5
+    - 1
+    1:
+    - 0
+    - 2
+    2:
+    - 1
+    - 3
+    3:
+    - 2
+    - 4
+    4:
+    - 3
+    - 5
+    5:
+    - 4
+    - 0
+  back_resize:
+  - 896
+  - 1600
+  back_pad:
+  - 0
+  - 4
+  - 0
+  - 0
+  augment2d:
+    resize:
+    - - 0.25
+      - 0.25
+    rotate: null
+  aux_data:
+  - visibility
+  - center_offset
+  - center_ohw
+  - height
+  augment3d:
+    scale:
+    - 1.0
+    - 1.0
+    rotate:
+    - 0.0
+    - 0.0
+    translate: 0
+    flip_ratio: 0.0
+    flip_direction: null
+  object_classes:
+  - car
+  - truck
+  - construction_vehicle
+  - bus
+  - trailer
+  - barrier
+  - motorcycle
+  - bicycle
+  - pedestrian
+  - traffic_cone
+  map_classes:
+  - drivable_area
+  - ped_crossing
+  - walkway
+  - stop_line
+  - carpark_area
+  - road_divider
+  - lane_divider
+  - road_block
+  input_modality:
+    use_lidar: false
+    use_camera: true
+    use_radar: false
+    use_map: false
+    use_external: false
+  train_pipeline:
+  - type: LoadMultiViewImageFromFiles
+    to_float32: true
+  - type: LoadAnnotations3D
+    with_bbox_3d: true
+    with_label_3d: true
+    with_attr_label: false
+  - type: ImageAug3D
+    final_dim: ${...image_size}
+    resize_lim: ${...augment2d.resize[0]}
+    bot_pct_lim:
+    - 0.0
+    - 0.0
+    rot_lim: ${...augment2d.rotate}
+    rand_flip: false
+    is_train: false
+  - type: GlobalRotScaleTrans
+    resize_lim: ${...augment3d.scale}
+    rot_lim: ${...augment3d.rotate}
+    trans_lim: ${...augment3d.translate}
+    is_train: true
+  - type: ObjectNameFilter
+    classes: ${...object_classes}
+  - type: LoadBEVSegmentation
+    dataset_root: ${...dataset_root}
+    xbound: ${...map_bound.x}
+    ybound: ${...map_bound.y}
+    classes: ${...map_classes}
+    object_classes: null
+    aux_data: null
+    cache_file: ${...dataset_cache_file.0}
+  - type: RandomFlip3DwithViews
+    flip_ratio: ${...augment3d.flip_ratio}
+    direction: ${...augment3d.flip_direction}
+  - type: ReorderMultiViewImages
+    order: ${...view_order}
+    safe: false
+  - type: ImageNormalize
+    mean:
+    - 0.5
+    - 0.5
+    - 0.5
+    std:
+    - 0.5
+    - 0.5
+    - 0.5
+  - type: DefaultFormatBundle3D
+    classes: ${...object_classes}
+  - type: Collect3D
+    keys:
+    - img
+    - gt_bboxes_3d
+    - gt_labels_3d
+    - gt_masks_bev
+    meta_keys: ${...collect_meta_keys}
+    meta_lis_keys: ${...collect_meta_lis_keys}
+  test_pipeline:
+  - type: LoadMultiViewImageFromFiles
+    to_float32: true
+  - type: LoadAnnotations3D
+    with_bbox_3d: true
+    with_label_3d: true
+    with_attr_label: false
+  - type: ImageAug3D
+    final_dim: ${...image_size}
+    resize_lim: ${...augment2d.resize[0]}
+    bot_pct_lim:
+    - 0.0
+    - 0.0
+    rot_lim:
+    - 0.0
+    - 0.0
+    rand_flip: false
+    is_train: false
+  - type: GlobalRotScaleTrans
+    resize_lim: ${...augment3d.scale}
+    rot_lim: ${...augment3d.rotate}
+    trans_lim: ${...augment3d.translate}
+    is_train: true
+  - type: ObjectNameFilter
+    classes: ${...object_classes}
+  - type: LoadBEVSegmentation
+    dataset_root: ${...dataset_root}
+    xbound: ${...map_bound.x}
+    ybound: ${...map_bound.y}
+    classes: ${...map_classes}
+    object_classes: null
+    aux_data: null
+    cache_file: ${...dataset_cache_file.1}
+  - type: ReorderMultiViewImages
+    order: ${...view_order}
+    safe: false
+  - type: ImageNormalize
+    mean:
+    - 0.5
+    - 0.5
+    - 0.5
+    std:
+    - 0.5
+    - 0.5
+    - 0.5
+  - type: DefaultFormatBundle3D
+    classes: ${...object_classes}
+  - type: Collect3D
+    keys:
+    - img
+    - gt_bboxes_3d
+    - gt_labels_3d
+    - gt_masks_bev
+    meta_keys: ${...collect_meta_keys}
+    meta_lis_keys: ${...collect_meta_lis_keys}
+  data:
+    train:
+      type: ${...dataset_type}
+      dataset_root: ${...dataset_root}
+      ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_train.pickle
+      pipeline: ${...train_pipeline}
+      object_classes: ${...object_classes}
+      map_classes: ${...map_classes}
+      modality: ${...input_modality}
+      test_mode: false
+      force_all_boxes: true
+      box_type_3d: LiDAR
+      filter_empty_gt: false
+      video_length: ${model.video_length}
+      start_on_keyframe: ${dataset.start_on_keyframe}
+      ref_length: ${model.ref_length}
+      candidate_length: 5
+    val:
+      type: ${...dataset_type}
+      dataset_root: ${...dataset_root}
+      ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_val.pickle
+      pipeline: ${...test_pipeline}
+      object_classes: ${...object_classes}
+      map_classes: ${...map_classes}
+      modality: ${...input_modality}
+      test_mode: false
+      force_all_boxes: true
+      box_type_3d: LiDAR
+      filter_empty_gt: false
+      video_length: ${model.video_length}
+      start_on_keyframe: ${dataset.start_on_keyframe}
+      ref_length: ${model.ref_length}
+      candidate_length: 5
+    test:
+      type: ${...dataset_type}
+      dataset_root: ${...dataset_root}
+      ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_val.pickle
+      pipeline: ${...test_pipeline}
+      object_classes: ${...object_classes}
+      map_classes: ${...map_classes}
+      modality: ${...input_modality}
+      test_mode: true
+      force_all_boxes: true
+      box_type_3d: LiDAR
+      filter_empty_gt: false
+      video_length: ${model.video_length}
+      start_on_keyframe: ${dataset.start_on_keyframe}
+      ref_length: ${model.ref_length}
+      candidate_length: 5
+  start_on_keyframe: true
+accelerator:
+  gradient_accumulation_steps: 1
+  mixed_precision: fp16
+  report_to: tensorboard
+runner:
+  foreground_loss_mode: null
+  foreground_loss_weight: 0.0
+  bbox_drop_ratio: 0
+  bbox_add_ratio: 0
+  bbox_add_num: 3
+  keyframe_rate: 1
+  num_train_epochs: 100
+  train_batch_size: 1
+  max_train_steps: null
+  num_workers: 4
+  prefetch_factor: 4
+  display_per_epoch: 40
+  display_per_n_min: 10
+  max_grad_norm: 1.0
+  set_grads_to_none: true
+  enable_xformers_memory_efficient_attention: true
+  unet_in_fp16: true
+  enable_unet_checkpointing: false
+  enable_controlnet_checkpointing: false
+  noise_offset: 0.0
+  train_with_same_offset: true
+  use_8bit_adam: false
+  adam_beta1: 0.9
+  adam_beta2: 0.999
+  adam_weight_decay: 0.01
+  adam_epsilon: 1.0e-08
+  learning_rate: 8.0e-05
+  lr_scheduler: constant_with_warmup
+  gradient_accumulation_steps: 1
+  lr_num_cycles: 1
+  lr_power: 1.0
+  lr_warmup_steps: 3000
+  checkpointing_steps: 10000
+  validation_steps: 5000
+  save_model_per_epoch: 10
+  validation_before_run: true
+  validation_index:
+  - 138
+  - 632
+  - 1301
+  - 2342
+  validation_times: 1
+  validation_batch_size: 1
+  validation_show_box: true
+  validation_seed_global: false
+  pipeline_param:
+    guidance_scale: 2
+    num_inference_steps: 20
+    eta: 0.0
+    controlnet_conditioning_scale: 1.0
+    guess_mode: false
+    use_zero_map_as_unconditional: false
+    bbox_max_length: null
+    init_noise: both
+    view_order: ${dataset.view_order}
+    keyframe_rate: 6

hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,164 @@

+hydra:
+  run:
+    dir: ${log_root_prefix}/${projname}_${now:%Y-%m-%d}_${now:%H-%M}_${task_id}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - runner=8gpus_t
+    - +exp=dreamforge_t
+    - +resume_from_checkpoint=./work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-08-30_19-09_2.0t_0.3.3/checkpoint-150000/
+    - task_id=2.0t_0.3.3_continue
+  job:
+    name: train
+    chdir: null
+    override_dirname: +exp=dreamforge_t,+resume_from_checkpoint=./work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-08-30_19-09_2.0t_0.3.3/checkpoint-150000/,runner=8gpus_t,task_id=2.0t_0.3.3_continue
+    id: ???
+    num: ???
+    config_name: config_single
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.0
+    version_base: '1.3'
+    cwd: /path/to/DreamForge
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /path/to/DreamForge/configs
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /path/to/DreamForge/work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-09-03_10-42_2.0t_0.3.3_continue
+    choices:
+      exp: dreamforge_t
+      exp/model@model: ../../model/SDv1.5mv_rawbox_t
+      runner: 8gpus_t
+      accelerator: default
+      dataset: Nuscenes_cache
+      model: SDv1.5mv_rawbox
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false

hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+- runner=8gpus_t
+- +exp=dreamforge_t
+- +resume_from_checkpoint=./work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-08-30_19-09_2.0t_0.3.3/checkpoint-150000/
+- task_id=2.0t_0.3.3_continue

scene_embedder/scene_embedder_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:293016fa036b7d36c8c71e588c4946be12acdeef1bc772d8f309992b6c82684a
+size 9184513

unet/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_class_name": "UNet2DConditionModelMultiviewSceneT",
+  "_diffusers_version": "0.17.1",
+  "_name_or_path": "./pretrained/dreamforge-s",
+  "act_fn": "silu",
+  "addition_embed_type": null,
+  "addition_embed_type_num_heads": 64,
+  "attention_head_dim": 8,
+  "attn1_q_trainable": true,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 768,
+  "cross_attention_norm": null,
+  "crossview_attn_type": "basic",
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "epipolar_mask_type": "binary",
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "img_size": [
+    224,
+    400
+  ],
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "neighboring_attn_type": "add",
+  "neighboring_view_pair": {
+    "0": [
+      5,
+      1
+    ],
+    "1": [
+      0,
+      2
+    ],
+    "2": [
+      1,
+      3
+    ],
+    "3": [
+      2,
+      4
+    ],
+    "4": [
+      3,
+      5
+    ],
+    "5": [
+      4,
+      0
+    ]
+  },
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": null,
+  "ref_length": 2,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 64,
+  "scene_channels": 320,
+  "spatial_trainable": true,
+  "temp_pos_emb": "learnable",
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "trainable_state": "only_new",
+  "transformer_type": "_ff_last",
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": false,
+  "video_length": 7,
+  "with_can_bus": true,
+  "with_motion": true,
+  "with_ref": true,
+  "zero_module_type": "zero_linear",
+  "zero_module_type2": "none"
+}

unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6e0a0d85014e8680666672102e71e030539138e5f4aacf2aae22568ccb1a577
+size 2770299339