init

0fa8de7 11 months ago

11.1 kB

	task_id: 2.0t_0.3.3_continue
	log_root_prefix: ./work_dirs/dreamforge-t-log
	projname: ${model.name}
	try_run: false
	debug: false
	log_root: ???
	init_method: env://
	seed: 42
	fix_seed_within_batch: false
	resume_from_checkpoint: ./work_dirs/dreamforge-t-log/SDv1.5mv-rawbox-t_2024-08-30_19-09_2.0t_0.3.3/checkpoint-150000/
	resume_reset_scheduler: false
	validation_only: false
	model:
	name: SDv1.5mv-rawbox-t
	pretrained_model_name_or_path: ./pretrained/stable-diffusion-v1-5/
	bbox_mode: all-xyz
	bbox_view_shared: false
	crossview_attn_type: basic
	train_with_same_noise: false
	train_with_same_t: true
	runner_module: dreamforge.runner.multiview_t_runner.MultiviewTRunner
	pipe_module: dreamforge.pipeline.pipeline_bev_controlnet_t.StableDiffusionBEVControlNetTPipeline
	unet_module: dreamforge.networks.unet_2d_condition_multiview_st.UNet2DConditionModelMultiviewSceneT
	use_fp32_for_unet_trainable: true
	unet_dir: unet
	unet:
	trainable_state: only_new
	neighboring_view_pair: ${dataset.neighboring_view_pair}
	neighboring_attn_type: add
	zero_module_type: zero_linear
	crossview_attn_type: ${..crossview_attn_type}
	img_size: ${dataset.image_size}
	video_length: ${..video_length}
	temp_pos_emb: learnable
	zero_module_type2: none
	spatial_trainable: true
	with_ref: true
	ref_length: ${..ref_length}
	with_can_bus: true
	with_motion: true
	transformer_type: _ff_last
	model_module: dreamforge.networks.unet_addon_rawbox.BEVControlNetModel
	controlnet_dir: controlnet
	controlnet:
	camera_in_dim: 189
	camera_out_dim: 768
	map_size:
	- 4
	- 200
	- 200
	conditioning_embedding_out_channels:
	- 16
	- 32
	- 96
	- 256
	uncond_cam_in_dim:
	- 3
	- 7
	use_uncond_map: null
	drop_cond_ratio: 0.25
	drop_cam_num: 6
	drop_cam_with_box: false
	cam_embedder_param:
	input_dims: 3
	num_freqs: 4
	include_input: true
	log_sampling: true
	bbox_embedder_cls: dreamforge.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
	bbox_embedder_param:
	n_classes: 10
	class_token_dim: 768
	trainable_class_token: false
	use_text_encoder_init: true
	embedder_num_freq: 4
	proj_dims:
	- 768
	- 512
	- 512
	- 768
	mode: ${...bbox_mode}
	minmax_normalize: false
	with_layout_canvas: true
	canvas_conditioning_channels: 14
	load_pretrain_from: null
	allow_partial_load: false
	pretrained_dreamforge: ./pretrained/dreamforge-s
	train_with_same_noise_t: false
	video_length: 7
	ref_length: 2
	sc_attn_index:
	- - 0
	- 6
	- 0
	- - 0
	- 6
	- 0
	- - 0
	- 6
	- 1
	- - 0
	- 6
	- 2
	- - 0
	- 6
	- 3
	- - 0
	- 6
	- 4
	- - 0
	- 6
	- 5
	scene_embedder_cls: dreamforge.networks.scene_position_embedder.ScenePositionEmbedding
	scene_embedder_dir: scene_embedder
	scene_embedder:
	embed_dims: 320
	LID: false
	can_bus_embedder_cls: dreamforge.networks.can_bus_embedder.CanbusEmbedding
	can_bus_embedder_dir: can_bus_embedder
	can_bus_embedder:
	embed_dims: 768
	input_channels: 9
	can_bus_norm: true
	fix_controlnet: true
	dataset:
	dataset_type: NuScenesMapDataset
	dataset_root: ./data/nuscenes
	dataset_process_root: ./data/nuscenes_mmdet3d-12Hz_description/
	dataset_cache_file_tag: 8x200x200_12Hz_interp
	dataset_cache_dirname: nuscenes_map_aux_12Hz_interp
	dataset_cache_file:
	- ${..dataset_process_root}../${..dataset_cache_dirname}/train_${..dataset_cache_file_tag}.h5
	- ${..dataset_process_root}../${..dataset_cache_dirname}/val_${..dataset_cache_file_tag}.h5
	template: A driving scene image at {location}. {description}.
	collect_meta_keys:
	- camera_intrinsics
	- lidar2ego
	- lidar2camera
	- camera2lidar
	- lidar2image
	- img_aug_matrix
	- camera2ego
	- ego2global
	collect_meta_lis_keys:
	- timeofday
	- location
	- description
	- filename
	- token
	- ori_shape
	image_size:
	- 224
	- 400
	map_bound:
	x:
	- -50.0
	- 50.0
	- 0.5
	'y':
	- -50.0
	- 50.0
	- 0.5
	view_order:
	- CAM_FRONT_LEFT
	- CAM_FRONT
	- CAM_FRONT_RIGHT
	- CAM_BACK_RIGHT
	- CAM_BACK
	- CAM_BACK_LEFT
	neighboring_view_pair:
	0:
	- 5
	- 1
	1:
	- 0
	- 2
	2:
	- 1
	- 3
	3:
	- 2
	- 4
	4:
	- 3
	- 5
	5:
	- 4
	- 0
	back_resize:
	- 896
	- 1600
	back_pad:
	- 0
	- 4
	- 0
	- 0
	augment2d:
	resize:
	- - 0.25
	- 0.25
	rotate: null
	aux_data:
	- visibility
	- center_offset
	- center_ohw
	- height
	augment3d:
	scale:
	- 1.0
	- 1.0
	rotate:
	- 0.0
	- 0.0
	translate: 0
	flip_ratio: 0.0
	flip_direction: null
	object_classes:
	- car
	- truck
	- construction_vehicle
	- bus
	- trailer
	- barrier
	- motorcycle
	- bicycle
	- pedestrian
	- traffic_cone
	map_classes:
	- drivable_area
	- ped_crossing
	- walkway
	- stop_line
	- carpark_area
	- road_divider
	- lane_divider
	- road_block
	input_modality:
	use_lidar: false
	use_camera: true
	use_radar: false
	use_map: false
	use_external: false
	train_pipeline:
	- type: LoadMultiViewImageFromFiles
	to_float32: true
	- type: LoadAnnotations3D
	with_bbox_3d: true
	with_label_3d: true
	with_attr_label: false
	- type: ImageAug3D
	final_dim: ${...image_size}
	resize_lim: ${...augment2d.resize[0]}
	bot_pct_lim:
	- 0.0
	- 0.0
	rot_lim: ${...augment2d.rotate}
	rand_flip: false
	is_train: false
	- type: GlobalRotScaleTrans
	resize_lim: ${...augment3d.scale}
	rot_lim: ${...augment3d.rotate}
	trans_lim: ${...augment3d.translate}
	is_train: true
	- type: ObjectNameFilter
	classes: ${...object_classes}
	- type: LoadBEVSegmentation
	dataset_root: ${...dataset_root}
	xbound: ${...map_bound.x}
	ybound: ${...map_bound.y}
	classes: ${...map_classes}
	object_classes: null
	aux_data: null
	cache_file: ${...dataset_cache_file.0}
	- type: RandomFlip3DwithViews
	flip_ratio: ${...augment3d.flip_ratio}
	direction: ${...augment3d.flip_direction}
	- type: ReorderMultiViewImages
	order: ${...view_order}
	safe: false
	- type: ImageNormalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	- type: DefaultFormatBundle3D
	classes: ${...object_classes}
	- type: Collect3D
	keys:
	- img
	- gt_bboxes_3d
	- gt_labels_3d
	- gt_masks_bev
	meta_keys: ${...collect_meta_keys}
	meta_lis_keys: ${...collect_meta_lis_keys}
	test_pipeline:
	- type: LoadMultiViewImageFromFiles
	to_float32: true
	- type: LoadAnnotations3D
	with_bbox_3d: true
	with_label_3d: true
	with_attr_label: false
	- type: ImageAug3D
	final_dim: ${...image_size}
	resize_lim: ${...augment2d.resize[0]}
	bot_pct_lim:
	- 0.0
	- 0.0
	rot_lim:
	- 0.0
	- 0.0
	rand_flip: false
	is_train: false
	- type: GlobalRotScaleTrans
	resize_lim: ${...augment3d.scale}
	rot_lim: ${...augment3d.rotate}
	trans_lim: ${...augment3d.translate}
	is_train: true
	- type: ObjectNameFilter
	classes: ${...object_classes}
	- type: LoadBEVSegmentation
	dataset_root: ${...dataset_root}
	xbound: ${...map_bound.x}
	ybound: ${...map_bound.y}
	classes: ${...map_classes}
	object_classes: null
	aux_data: null
	cache_file: ${...dataset_cache_file.1}
	- type: ReorderMultiViewImages
	order: ${...view_order}
	safe: false
	- type: ImageNormalize
	mean:
	- 0.5
	- 0.5
	- 0.5
	std:
	- 0.5
	- 0.5
	- 0.5
	- type: DefaultFormatBundle3D
	classes: ${...object_classes}
	- type: Collect3D
	keys:
	- img
	- gt_bboxes_3d
	- gt_labels_3d
	- gt_masks_bev
	meta_keys: ${...collect_meta_keys}
	meta_lis_keys: ${...collect_meta_lis_keys}
	data:
	train:
	type: ${...dataset_type}
	dataset_root: ${...dataset_root}
	ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_train.pickle
	pipeline: ${...train_pipeline}
	object_classes: ${...object_classes}
	map_classes: ${...map_classes}
	modality: ${...input_modality}
	test_mode: false
	force_all_boxes: true
	box_type_3d: LiDAR
	filter_empty_gt: false
	video_length: ${model.video_length}
	start_on_keyframe: ${dataset.start_on_keyframe}
	ref_length: ${model.ref_length}
	candidate_length: 5
	val:
	type: ${...dataset_type}
	dataset_root: ${...dataset_root}
	ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_val.pickle
	pipeline: ${...test_pipeline}
	object_classes: ${...object_classes}
	map_classes: ${...map_classes}
	modality: ${...input_modality}
	test_mode: false
	force_all_boxes: true
	box_type_3d: LiDAR
	filter_empty_gt: false
	video_length: ${model.video_length}
	start_on_keyframe: ${dataset.start_on_keyframe}
	ref_length: ${model.ref_length}
	candidate_length: 5
	test:
	type: ${...dataset_type}
	dataset_root: ${...dataset_root}
	ann_file: ${...dataset_process_root}nuscenes_interp_12Hz_updated_description_val.pickle
	pipeline: ${...test_pipeline}
	object_classes: ${...object_classes}
	map_classes: ${...map_classes}
	modality: ${...input_modality}
	test_mode: true
	force_all_boxes: true
	box_type_3d: LiDAR
	filter_empty_gt: false
	video_length: ${model.video_length}
	start_on_keyframe: ${dataset.start_on_keyframe}
	ref_length: ${model.ref_length}
	candidate_length: 5
	start_on_keyframe: true
	accelerator:
	gradient_accumulation_steps: 1
	mixed_precision: fp16
	report_to: tensorboard
	runner:
	foreground_loss_mode: null
	foreground_loss_weight: 0.0
	bbox_drop_ratio: 0
	bbox_add_ratio: 0
	bbox_add_num: 3
	keyframe_rate: 1
	num_train_epochs: 100
	train_batch_size: 1
	max_train_steps: null
	num_workers: 4
	prefetch_factor: 4
	display_per_epoch: 40
	display_per_n_min: 10
	max_grad_norm: 1.0
	set_grads_to_none: true
	enable_xformers_memory_efficient_attention: true
	unet_in_fp16: true
	enable_unet_checkpointing: false
	enable_controlnet_checkpointing: false
	noise_offset: 0.0
	train_with_same_offset: true
	use_8bit_adam: false
	adam_beta1: 0.9
	adam_beta2: 0.999
	adam_weight_decay: 0.01
	adam_epsilon: 1.0e-08
	learning_rate: 8.0e-05
	lr_scheduler: constant_with_warmup
	gradient_accumulation_steps: 1
	lr_num_cycles: 1
	lr_power: 1.0
	lr_warmup_steps: 3000
	checkpointing_steps: 10000
	validation_steps: 5000
	save_model_per_epoch: 10
	validation_before_run: true
	validation_index:
	- 138
	- 632
	- 1301
	- 2342
	validation_times: 1
	validation_batch_size: 1
	validation_show_box: true
	validation_seed_global: false
	pipeline_param:
	guidance_scale: 2
	num_inference_steps: 20
	eta: 0.0
	controlnet_conditioning_scale: 1.0
	guess_mode: false
	use_zero_map_as_unconditional: false
	bbox_max_length: null
	init_noise: both
	view_order: ${dataset.view_order}
	keyframe_rate: 6