| action_optimizer: | |
| _target_: mbrl.planning.CEMOptimizer | |
| alpha: 0.1 | |
| clipped_normal: false | |
| device: cpu | |
| elite_ratio: 0.1 | |
| lower_bound: ??? | |
| num_iterations: 5 | |
| population_size: 350 | |
| return_mean_elites: true | |
| upper_bound: ??? | |
| algorithm: | |
| agent: | |
| _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC | |
| action_space: | |
| _target_: gym.env.Box | |
| high: | |
| - 1.0 | |
| - 1.0 | |
| - 1.0 | |
| low: | |
| - -1.0 | |
| - -1.0 | |
| - -1.0 | |
| shape: | |
| - 3 | |
| args: | |
| alpha: 0.2 | |
| automatic_entropy_tuning: false | |
| device: cpu | |
| gamma: 0.99 | |
| hidden_size: 512 | |
| lr: 0.0003 | |
| policy: Gaussian | |
| target_entropy: 1 | |
| target_update_interval: 4 | |
| tau: 0.005 | |
| num_inputs: 11 | |
| freq_train_model: 250 | |
| initial_exploration_steps: 5000 | |
| learned_rewards: true | |
| name: mbpo | |
| normalize: true | |
| normalize_double_precision: true | |
| num_eval_episodes: 1 | |
| random_initial_explore: false | |
| real_data_ratio: 0.0 | |
| sac_samples_action: true | |
| target_is_delta: true | |
| debug_mode: false | |
| device: cpu | |
| dynamics_model: | |
| _target_: mbrl.models.GaussianMLP | |
| activation_fn_cfg: | |
| _target_: torch.nn.SiLU | |
| deterministic: false | |
| device: cpu | |
| ensemble_size: 7 | |
| hid_size: 200 | |
| in_size: 14 | |
| learn_logvar_bounds: false | |
| num_layers: 4 | |
| out_size: 12 | |
| propagation_method: random_model | |
| experiment: default | |
| log_frequency_agent: 1000 | |
| overrides: | |
| cem_alpha: 0.1 | |
| cem_clipped_normal: false | |
| cem_elite_ratio: 0.1 | |
| cem_num_iters: 5 | |
| cem_population_size: 350 | |
| effective_model_rollouts_per_step: 400 | |
| env: gym___Hopper-v2 | |
| epoch_length: 1000 | |
| freq_train_model: 250 | |
| model_batch_size: 256 | |
| model_lr: 0.001 | |
| model_wd: 1.0e-05 | |
| num_elites: 5 | |
| num_epochs_to_retain_sac_buffer: 1 | |
| num_sac_updates_per_step: 40 | |
| num_steps: 125000 | |
| patience: 5 | |
| planning_horizon: 15 | |
| rollout_schedule: | |
| - 20 | |
| - 150 | |
| - 1 | |
| - 15 | |
| sac_alpha: 0.2 | |
| sac_automatic_entropy_tuning: false | |
| sac_batch_size: 256 | |
| sac_gamma: 0.99 | |
| sac_hidden_size: 512 | |
| sac_lr: 0.0003 | |
| sac_policy: Gaussian | |
| sac_target_entropy: 1 | |
| sac_target_update_interval: 4 | |
| sac_tau: 0.005 | |
| sac_updates_every_steps: 1 | |
| term_fn: hopper | |
| validation_ratio: 0.2 | |
| root_dir: ./logs | |
| save_video: false | |
| seed: 0 | |