| task_id: 2.0t_0.3.3_continue | |
| log_root_prefix: ./work_dirs/dreamforge-t-log | |
| projname: ${model.name} | |
| try_run: false | |
| debug: false | |
| log_root: ??? | |
| init_method: env:// | |
| seed: 42 | |
| fix_seed_within_batch: false | |
| resume_from_checkpoint: ./work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-08-30_19-09_2.0t_0.3.3/checkpoint-150000/ | |
| resume_reset_scheduler: false | |
| validation_only: false | |
| model: | |
| name: SDv1.5mv-rawbox-t | |
| pretrained_model_name_or_path: ./pretrained/stable-diffusion-v1-5/ | |
| bbox_mode: all-xyz | |
| bbox_view_shared: false | |
| crossview_attn_type: basic | |
| train_with_same_noise: false | |
| train_with_same_t: true | |
| runner_module: dreamforge.runner.multiview_t_runner.MultiviewTRunner | |
| pipe_module: dreamforge.pipeline.pipeline_bev_controlnet_t.StableDiffusionBEVControlNetTPipeline | |
| unet_module: dreamforge.networks.unet_2d_condition_multiview_st.UNet2DConditionModelMultiviewSceneT | |
| use_fp32_for_unet_trainable: true | |
| unet_dir: unet | |
| unet: | |
| trainable_state: only_new | |
| neighboring_view_pair: ${dataset.neighboring_view_pair} | |
| neighboring_attn_type: add | |
| zero_module_type: zero_linear | |
| crossview_attn_type: ${..crossview_attn_type} | |
| img_size: ${dataset.image_size} | |
| video_length: ${..video_length} | |
| temp_pos_emb: learnable | |
| zero_module_type2: none | |
| spatial_trainable: true | |
| with_ref: true | |
| ref_length: ${..ref_length} | |
| with_can_bus: true | |
| with_motion: true | |
| transformer_type: _ff_last | |
| model_module: dreamforge.networks.unet_addon_rawbox.BEVControlNetModel | |
| controlnet_dir: controlnet | |
| controlnet: | |
| camera_in_dim: 189 | |
| camera_out_dim: 768 | |
| map_size: | |
| - 4 | |
| - 200 | |
| - 200 | |
| conditioning_embedding_out_channels: | |
| - 16 | |
| - 32 | |
| - 96 | |
| - 256 | |
| uncond_cam_in_dim: | |
| - 3 | |
| - 7 | |
| use_uncond_map: null | |
| drop_cond_ratio: 0.25 | |
| drop_cam_num: 6 | |
| drop_cam_with_box: false | |
| cam_embedder_param: | |
| input_dims: 3 | |
| num_freqs: 4 | |
| include_input: true | |
| log_sampling: true | |
| bbox_embedder_cls: dreamforge.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding | |
| bbox_embedder_param: | |
| n_classes: 10 | |
| class_token_dim: 768 | |
| trainable_class_token: false | |
| use_text_encoder_init: true | |
| embedder_num_freq: 4 | |
| proj_dims: | |
| - 768 | |
| - 512 | |
| - 512 | |
| - 768 | |
| mode: ${...bbox_mode} | |
| minmax_normalize: false | |
| with_layout_canvas: true | |
| canvas_conditioning_channels: 14 | |
| load_pretrain_from: null | |
| allow_partial_load: false | |
| pretrained_dreamforge: ./pretrained/dreamforge-s | |
| train_with_same_noise_t: false | |
| video_length: 7 | |
| ref_length: 2 | |
| sc_attn_index: | |
| - - 0 | |
| - 6 | |
| - 0 | |
| - - 0 | |
| - 6 | |
| - 0 | |
| - - 0 | |
| - 6 | |
| - 1 | |
| - - 0 | |
| - 6 | |
| - 2 | |
| - - 0 | |
| - 6 | |
| - 3 | |
| - - 0 | |
| - 6 | |
| - 4 | |
| - - 0 | |
| - 6 | |
| - 5 | |
| scene_embedder_cls: dreamforge.networks.scene_position_embedder.ScenePositionEmbedding | |
| scene_embedder_dir: scene_embedder | |
| scene_embedder: | |
| embed_dims: 320 | |
| LID: false | |
| can_bus_embedder_cls: dreamforge.networks.can_bus_embedder.CanbusEmbedding | |
| can_bus_embedder_dir: can_bus_embedder | |
| can_bus_embedder: | |
| embed_dims: 768 | |
| input_channels: 9 | |
| can_bus_norm: true | |
| fix_controlnet: true | |
| dataset: | |
| dataset_type: NuScenesMapDataset | |
| dataset_root: ./data/nuscenes | |
| dataset_process_root: ./data/nuscenes_mmdet3d-12Hz_description/ | |
| dataset_cache_file_tag: 8x200x200_12Hz_interp | |
| dataset_cache_dirname: nuscenes_map_aux_12Hz_interp | |
| dataset_cache_file: | |
| - ${..dataset_process_root}../${..dataset_cache_dirname}/train_${..dataset_cache_file_tag}.h5 | |
| - ${..dataset_process_root}../${..dataset_cache_dirname}/val_${..dataset_cache_file_tag}.h5 | |
| template: A driving scene image at {location}. {description}. | |
| collect_meta_keys: | |
| - camera_intrinsics | |
| - lidar2ego | |
| - lidar2camera | |
| - camera2lidar | |
| - lidar2image | |
| - img_aug_matrix | |
| - camera2ego | |
| - ego2global | |
| collect_meta_lis_keys: | |
| - timeofday | |
| - location | |
| - description | |
| - filename | |
| - token | |
| - ori_shape | |
| image_size: | |
| - 224 | |
| - 400 | |
| map_bound: | |
| x: | |
| - -50.0 | |
| - 50.0 | |
| - 0.5 | |
| 'y': | |
| - -50.0 | |
| - 50.0 | |
| - 0.5 | |
| view_order: | |
| - CAM_FRONT_LEFT | |
| - CAM_FRONT | |
| - CAM_FRONT_RIGHT | |
| - CAM_BACK_RIGHT | |
| - CAM_BACK | |
| - CAM_BACK_LEFT | |
| neighboring_view_pair: | |
| 0: | |
| - 5 | |
| - 1 | |
| 1: | |
| - 0 | |
| - 2 | |
| 2: | |
| - 1 | |
| - 3 | |
| 3: | |
| - 2 | |
| - 4 | |
| 4: | |
| - 3 | |
| - 5 | |
| 5: | |
| - 4 | |
| - 0 | |
| back_resize: | |
| - 896 | |
| - 1600 | |
| back_pad: | |
| - 0 | |
| - 4 | |
| - 0 | |
| - 0 | |
| augment2d: | |
| resize: | |
| - - 0.25 | |
| - 0.25 | |
| rotate: null | |
| aux_data: | |
| - visibility | |
| - center_offset | |
| - center_ohw | |
| - height | |
| augment3d: | |
| scale: | |
| - 1.0 | |
| - 1.0 | |
| rotate: | |
| - 0.0 | |
| - 0.0 | |
| translate: 0 | |
| flip_ratio: 0.0 | |
| flip_direction: null | |
| object_classes: | |
| - car | |
| - truck | |
| - construction_vehicle | |
| - bus | |
| - trailer | |
| - barrier | |
| - motorcycle | |
| - bicycle | |
| - pedestrian | |
| - traffic_cone | |
| map_classes: | |
| - drivable_area | |
| - ped_crossing | |
| - walkway | |
| - stop_line | |
| - carpark_area | |
| - road_divider | |
| - lane_divider | |
| - road_block | |
| input_modality: | |
| use_lidar: false | |
| use_camera: true | |
| use_radar: false | |
| use_map: false | |
| use_external: false | |
| train_pipeline: | |
| - type: LoadMultiViewImageFromFiles | |
| to_float32: true | |
| - type: LoadAnnotations3D | |
| with_bbox_3d: true | |
| with_label_3d: true | |
| with_attr_label: false | |
| - type: ImageAug3D | |
| final_dim: ${...image_size} | |
| resize_lim: ${...augment2d.resize[0]} | |
| bot_pct_lim: | |
| - 0.0 | |
| - 0.0 | |
| rot_lim: ${...augment2d.rotate} | |
| rand_flip: false | |
| is_train: false | |
| - type: GlobalRotScaleTrans | |
| resize_lim: ${...augment3d.scale} | |
| rot_lim: ${...augment3d.rotate} | |
| trans_lim: ${...augment3d.translate} | |
| is_train: true | |
| - type: ObjectNameFilter | |
| classes: ${...object_classes} | |
| - type: LoadBEVSegmentation | |
| dataset_root: ${...dataset_root} | |
| xbound: ${...map_bound.x} | |
| ybound: ${...map_bound.y} | |
| classes: ${...map_classes} | |
| object_classes: null | |
| aux_data: null | |
| cache_file: ${...dataset_cache_file.0} | |
| - type: RandomFlip3DwithViews | |
| flip_ratio: ${...augment3d.flip_ratio} | |
| direction: ${...augment3d.flip_direction} | |
| - type: ReorderMultiViewImages | |
| order: ${...view_order} | |
| safe: false | |
| - type: ImageNormalize | |
| mean: | |
| - 0.5 | |
| - 0.5 | |
| - 0.5 | |
| std: | |
| - 0.5 | |
| - 0.5 | |
| - 0.5 | |
| - type: DefaultFormatBundle3D | |
| classes: ${...object_classes} | |
| - type: Collect3D | |
| keys: | |
| - img | |
| - gt_bboxes_3d | |
| - gt_labels_3d | |
| - gt_masks_bev | |
| meta_keys: ${...collect_meta_keys} | |
| meta_lis_keys: ${...collect_meta_lis_keys} | |
| test_pipeline: | |
| - type: LoadMultiViewImageFromFiles | |
| to_float32: true | |
| - type: LoadAnnotations3D | |
| with_bbox_3d: true | |
| with_label_3d: true | |
| with_attr_label: false | |
| - type: ImageAug3D | |
| final_dim: ${...image_size} | |
| resize_lim: ${...augment2d.resize[0]} | |
| bot_pct_lim: | |
| - 0.0 | |
| - 0.0 | |
| rot_lim: | |
| - 0.0 | |
| - 0.0 | |
| rand_flip: false | |
| is_train: false | |
| - type: GlobalRotScaleTrans | |
| resize_lim: ${...augment3d.scale} | |
| rot_lim: ${...augment3d.rotate} | |
| trans_lim: ${...augment3d.translate} | |
| is_train: true | |
| - type: ObjectNameFilter | |
| classes: ${...object_classes} | |
| - type: LoadBEVSegmentation | |
| dataset_root: ${...dataset_root} | |
| xbound: ${...map_bound.x} | |
| ybound: ${...map_bound.y} | |
| classes: ${...map_classes} | |
| object_classes: null | |
| aux_data: null | |
| cache_file: ${...dataset_cache_file.1} | |
| - type: ReorderMultiViewImages | |
| order: ${...view_order} | |
| safe: false | |
| - type: ImageNormalize | |
| mean: | |
| - 0.5 | |
| - 0.5 | |
| - 0.5 | |
| std: | |
| - 0.5 | |
| - 0.5 | |
| - 0.5 | |
| - type: DefaultFormatBundle3D | |
| classes: ${...object_classes} | |
| - type: Collect3D | |
| keys: | |
| - img | |
| - gt_bboxes_3d | |
| - gt_labels_3d | |
| - gt_masks_bev | |
| meta_keys: ${...collect_meta_keys} | |
| meta_lis_keys: ${...collect_meta_lis_keys} | |
| data: | |
| train: | |
| type: ${...dataset_type} | |
| dataset_root: ${...dataset_root} | |
| ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_train.pickle | |
| pipeline: ${...train_pipeline} | |
| object_classes: ${...object_classes} | |
| map_classes: ${...map_classes} | |
| modality: ${...input_modality} | |
| test_mode: false | |
| force_all_boxes: true | |
| box_type_3d: LiDAR | |
| filter_empty_gt: false | |
| video_length: ${model.video_length} | |
| start_on_keyframe: ${dataset.start_on_keyframe} | |
| ref_length: ${model.ref_length} | |
| candidate_length: 5 | |
| val: | |
| type: ${...dataset_type} | |
| dataset_root: ${...dataset_root} | |
| ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_val.pickle | |
| pipeline: ${...test_pipeline} | |
| object_classes: ${...object_classes} | |
| map_classes: ${...map_classes} | |
| modality: ${...input_modality} | |
| test_mode: false | |
| force_all_boxes: true | |
| box_type_3d: LiDAR | |
| filter_empty_gt: false | |
| video_length: ${model.video_length} | |
| start_on_keyframe: ${dataset.start_on_keyframe} | |
| ref_length: ${model.ref_length} | |
| candidate_length: 5 | |
| test: | |
| type: ${...dataset_type} | |
| dataset_root: ${...dataset_root} | |
| ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_val.pickle | |
| pipeline: ${...test_pipeline} | |
| object_classes: ${...object_classes} | |
| map_classes: ${...map_classes} | |
| modality: ${...input_modality} | |
| test_mode: true | |
| force_all_boxes: true | |
| box_type_3d: LiDAR | |
| filter_empty_gt: false | |
| video_length: ${model.video_length} | |
| start_on_keyframe: ${dataset.start_on_keyframe} | |
| ref_length: ${model.ref_length} | |
| candidate_length: 5 | |
| start_on_keyframe: true | |
| accelerator: | |
| gradient_accumulation_steps: 1 | |
| mixed_precision: fp16 | |
| report_to: tensorboard | |
| runner: | |
| foreground_loss_mode: null | |
| foreground_loss_weight: 0.0 | |
| bbox_drop_ratio: 0 | |
| bbox_add_ratio: 0 | |
| bbox_add_num: 3 | |
| keyframe_rate: 1 | |
| num_train_epochs: 100 | |
| train_batch_size: 1 | |
| max_train_steps: null | |
| num_workers: 4 | |
| prefetch_factor: 4 | |
| display_per_epoch: 40 | |
| display_per_n_min: 10 | |
| max_grad_norm: 1.0 | |
| set_grads_to_none: true | |
| enable_xformers_memory_efficient_attention: true | |
| unet_in_fp16: true | |
| enable_unet_checkpointing: false | |
| enable_controlnet_checkpointing: false | |
| noise_offset: 0.0 | |
| train_with_same_offset: true | |
| use_8bit_adam: false | |
| adam_beta1: 0.9 | |
| adam_beta2: 0.999 | |
| adam_weight_decay: 0.01 | |
| adam_epsilon: 1.0e-08 | |
| learning_rate: 8.0e-05 | |
| lr_scheduler: constant_with_warmup | |
| gradient_accumulation_steps: 1 | |
| lr_num_cycles: 1 | |
| lr_power: 1.0 | |
| lr_warmup_steps: 3000 | |
| checkpointing_steps: 10000 | |
| validation_steps: 5000 | |
| save_model_per_epoch: 10 | |
| validation_before_run: true | |
| validation_index: | |
| - 138 | |
| - 632 | |
| - 1301 | |
| - 2342 | |
| validation_times: 1 | |
| validation_batch_size: 1 | |
| validation_show_box: true | |
| validation_seed_global: false | |
| pipeline_param: | |
| guidance_scale: 2 | |
| num_inference_steps: 20 | |
| eta: 0.0 | |
| controlnet_conditioning_scale: 1.0 | |
| guess_mode: false | |
| use_zero_map_as_unconditional: false | |
| bbox_max_length: null | |
| init_noise: both | |
| view_order: ${dataset.view_order} | |
| keyframe_rate: 6 | |