Source code for tests.test_exploration

# Copyright (c) 2020, Fabio Muratore, Honda Research Institute Europe GmbH, and
# Technical University of Darmstadt.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. Neither the name of Fabio Muratore, Honda Research Institute Europe GmbH,
#    or Technical University of Darmstadt, nor the names of its contributors may
#    be used to endorse or promote products derived from this software without
#    specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL FABIO MURATORE, HONDA RESEARCH INSTITUTE EUROPE GMBH,
# OR TECHNICAL UNIVERSITY OF DARMSTADT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from copy import deepcopy

import pytest
import torch as to

import pyrado
from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
from pyrado.environments.sim_base import SimEnv
from pyrado.exploration.stochastic_action import NormalActNoiseExplStrat
from pyrado.exploration.stochastic_params import NormalParamNoise
from pyrado.policies.base import Policy


[docs]@pytest.mark.parametrize(
    "env",
    [
        BallOnBeamSim(dt=0.02, max_steps=1),
        QBallBalancerSim(dt=0.02, max_steps=1),
    ],
    ids=["bob", "qbb"],
)
@pytest.mark.parametrize("policy", ["linear_policy", "fnn_policy"], ids=["lin", "fnn"], indirect=True)
def test_noise_on_act(env: SimEnv, policy: Policy):
    pyrado.set_seed(0)

    for _ in range(100):
        # Init the exploration strategy
        act_noise_strat = NormalActNoiseExplStrat(policy, std_init=0.5, train_mean=True)

        # Set new parameters for the exploration noise
        std = to.ones(env.act_space.flat_dim) * to.rand(1)
        mean = to.rand(env.act_space.shape)
        act_noise_strat.noise.adapt(mean, std)
        assert (mean == act_noise_strat.noise.mean).all()

        # Sample a random observation from the environment
        obs = to.from_numpy(env.obs_space.sample_uniform()).to(dtype=to.get_default_dtype())

        # Get a clean and a noisy action
        act = policy(obs)  # policy expects Tensors
        act_noisy = act_noise_strat(obs)  # exploration strategy expects ndarrays
        assert isinstance(act, to.Tensor)
        assert not to.equal(act, act_noisy)


[docs]@pytest.mark.parametrize(
    "env",
    [
        BallOnBeamSim(dt=0.02, max_steps=1),
        QBallBalancerSim(dt=0.02, max_steps=1),
    ],
    ids=["bob", "qbb"],
)
@pytest.mark.parametrize("policy", ["linear_policy", "fnn_policy"], ids=["lin", "fnn"], indirect=True)
def test_noise_on_param(env: SimEnv, policy: Policy):
    for _ in range(5):
        # Init the exploration strategy
        param_noise_strat = NormalParamNoise(
            policy.num_param,
            full_cov=True,
            std_init=1.0,
            std_min=0.01,
            train_mean=True,
            use_cuda=policy.device != "cpu",
        )

        # Set new parameters for the exploration noise
        mean = to.rand(policy.num_param)
        cov = to.eye(policy.num_param)
        param_noise_strat.adapt(mean, cov)
        to.testing.assert_allclose(mean, param_noise_strat.noise.mean)

        # Reset exploration strategy
        param_noise_strat.reset_expl_params()

        # Sample a random observation from the environment
        obs = to.from_numpy(env.obs_space.sample_uniform()).to(dtype=to.get_default_dtype())

        # Get a clean and a noisy action
        act = policy(obs)  # policy expects Tensors
        sampled_param = param_noise_strat.sample_param_set(policy.param_values)
        new_policy = deepcopy(policy)
        new_policy.param_values = sampled_param
        act_noisy = new_policy(obs)  # exploration strategy expects ndarrays

        assert isinstance(act, to.Tensor)
        assert not to.equal(act, act_noisy)