# Copyright (c) 2020, Fabio Muratore, Honda Research Institute Europe GmbH, and
# Technical University of Darmstadt.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. Neither the name of Fabio Muratore, Honda Research Institute Europe GmbH,
# or Technical University of Darmstadt, nor the names of its contributors may
# be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL FABIO MURATORE, HONDA RESEARCH INSTITUTE EUROPE GMBH,
# OR TECHNICAL UNIVERSITY OF DARMSTADT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from copy import deepcopy
import pytest
import torch as to
import pyrado
from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
from pyrado.environments.sim_base import SimEnv
from pyrado.exploration.stochastic_action import NormalActNoiseExplStrat
from pyrado.exploration.stochastic_params import NormalParamNoise
from pyrado.policies.base import Policy
[docs]@pytest.mark.parametrize(
"env",
[
BallOnBeamSim(dt=0.02, max_steps=1),
QBallBalancerSim(dt=0.02, max_steps=1),
],
ids=["bob", "qbb"],
)
@pytest.mark.parametrize("policy", ["linear_policy", "fnn_policy"], ids=["lin", "fnn"], indirect=True)
def test_noise_on_act(env: SimEnv, policy: Policy):
pyrado.set_seed(0)
for _ in range(100):
# Init the exploration strategy
act_noise_strat = NormalActNoiseExplStrat(policy, std_init=0.5, train_mean=True)
# Set new parameters for the exploration noise
std = to.ones(env.act_space.flat_dim) * to.rand(1)
mean = to.rand(env.act_space.shape)
act_noise_strat.noise.adapt(mean, std)
assert (mean == act_noise_strat.noise.mean).all()
# Sample a random observation from the environment
obs = to.from_numpy(env.obs_space.sample_uniform()).to(dtype=to.get_default_dtype())
# Get a clean and a noisy action
act = policy(obs) # policy expects Tensors
act_noisy = act_noise_strat(obs) # exploration strategy expects ndarrays
assert isinstance(act, to.Tensor)
assert not to.equal(act, act_noisy)
[docs]@pytest.mark.parametrize(
"env",
[
BallOnBeamSim(dt=0.02, max_steps=1),
QBallBalancerSim(dt=0.02, max_steps=1),
],
ids=["bob", "qbb"],
)
@pytest.mark.parametrize("policy", ["linear_policy", "fnn_policy"], ids=["lin", "fnn"], indirect=True)
def test_noise_on_param(env: SimEnv, policy: Policy):
for _ in range(5):
# Init the exploration strategy
param_noise_strat = NormalParamNoise(
policy.num_param,
full_cov=True,
std_init=1.0,
std_min=0.01,
train_mean=True,
use_cuda=policy.device != "cpu",
)
# Set new parameters for the exploration noise
mean = to.rand(policy.num_param)
cov = to.eye(policy.num_param)
param_noise_strat.adapt(mean, cov)
to.testing.assert_allclose(mean, param_noise_strat.noise.mean)
# Reset exploration strategy
param_noise_strat.reset_expl_params()
# Sample a random observation from the environment
obs = to.from_numpy(env.obs_space.sample_uniform()).to(dtype=to.get_default_dtype())
# Get a clean and a noisy action
act = policy(obs) # policy expects Tensors
sampled_param = param_noise_strat.sample_param_set(policy.param_values)
new_policy = deepcopy(policy)
new_policy.param_values = sampled_param
act_noisy = new_policy(obs) # exploration strategy expects ndarrays
assert isinstance(act, to.Tensor)
assert not to.equal(act, act_noisy)