RcsPySim
A robot control and simulation library
RcsSimEnv.h
Go to the documentation of this file.
1 /*******************************************************************************
2  Copyright (c) 2020, Fabio Muratore, Honda Research Institute Europe GmbH, and
3  Technical University of Darmstadt.
4  All rights reserved.
5 
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8  1. Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  2. Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  3. Neither the name of Fabio Muratore, Honda Research Institute Europe GmbH,
14  or Technical University of Darmstadt, nor the names of its contributors may
15  be used to endorse or promote products derived from this software without
16  specific prior written permission.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL FABIO MURATORE, HONDA RESEARCH INSTITUTE EUROPE GMBH,
22  OR TECHNICAL UNIVERSITY OF DARMSTADT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
26  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************/
30 
31 #ifndef _RCSSIMENV_H_
32 #define _RCSSIMENV_H_
33 
34 #include "ExperimentConfig.h"
36 
37 #include "util/BoxSpace.h"
38 #include "util/nocopy.h"
39 
40 #include <mutex>
41 
42 namespace Rcs
43 {
44 
45 class Viewer;
46 
47 class HUD;
48 
49 /**
50  * Thrown if the joint limits are violated after applying the action.
51  */
52 class JointLimitException : public std::runtime_error
53 {
54  using runtime_error::runtime_error;
55 };
56 
57 /**
58  * Rcs-backed machine learning simulation environment.
59  * This class provides a user-driven update loop. It is started by reset(), and then updated by step().
60  */
61 class RcsSimEnv
62 {
63 public:
64  /**
65  * Create the environment from the given property source.
66  *
67  * @param propertySource configuration
68  */
69  explicit RcsSimEnv(PropertySource* propertySource);
70 
71  virtual ~RcsSimEnv();
72 
73  // not copy- or movable
75 
76  /**
77  * Reset the internal state in order to start a new rollout.
78  *
79  * @param domainParam physics params to use for this rollout
80  * @param initState initial state for this rollout
81  * @return observation of the initial state
82  */
83  virtual MatNd* reset(PropertySource* domainParam = PropertySource::empty(), const MatNd* initState = NULL);
84 
85  /**
86  * Perform one environment step.
87  *
88  * @param action action vector
89  * @param disturbance disturbance vector, e.g. a 3-dim force
90  * @return observation after the step was processed
91  * @throws JointLimitException if the joint limits were violated
92  */
93  virtual MatNd* step(const MatNd* action, const MatNd* disturbance = NULL);
94 
95  /**
96  * Render the current state of the simulation.
97  *
98  * Should be called after each step call.
99  * The RcsGraphics renderer runs mostly on it's own thread, so that isn't absolutely
100  * necessairy, but it is required to update the HUD.
101  *
102  * @param mode only 'human' is supported at the moment, is also the default
103  * @param close true to close the render window
104  */
105  virtual void render(std::string mode = "human", bool close = false);
106 
107  /** Start/stop video recording. */
108  void toggleVideoRecording();
109 
110  /**
111  * Observation space of this environment.
112  * All valid observation values fit inside this.
113  */
114  const BoxSpace* observationSpace();
115 
116  /**
117  * Action space of this environment.
118  * All valid action values fit inside this.
119  * The actions provided from Python are not projected to this space, i.e. this must be done on the Python side.
120  */
121  const BoxSpace* actionSpace();
122 
123  /**
124  * Initial state space of this environment.
125  * All valid initial state values fit inside this.
126  */
127  const BoxSpace* initStateSpace();
128 
129  /**
130  * Set the transition noise buffer.
131  * In order to avoid heavy stochastic computation in every step, the transition noise values are pregenerated.
132  * The buffer should have a row count equal to getInternalStateDim(). Every column is a set of noise values
133  * applied in one step. In the next step, the next column is used. If the last column is reached, the next step
134  * will use the first column again.
135  *
136  * @param tnb transition noise buffer
137  */
138  void setTransitionNoiseBuffer(const MatNd* tnb);
139 
140  /** Internal state dimension, required for the transition noise buffer. */
141  unsigned int getInternalStateDim();
142 
143  /** Configuration settings for the experiment */
145  {
146  return config;
147  }
148 
149  /** Physics parameter management */
151  {
152  return physicsManager;
153  }
154 
155  /** Observation from last step */
156  MatNd* getCurrentObservation() const
157  {
158  return currentObservation;
159  }
160 
161  /** Action from last step */
162  MatNd* getCurrentAction() const
163  {
164  return currentAction;
165  }
166 
167 private:
168  //! Guards for parallel access to graph (can happen from gui)
169  std::mutex graphLock;
170 
171  //! Experiment configuration
174 
175  //! Physics simulator factory
177  //! Physics simulator
178  PhysicsBase* physicsSim;
179  //! Disturbance force simulator
181 
182  //! Initial state setter
184 
185  //! Temporary matrices
186  MatNd* q_ctrl;
187  MatNd* qd_ctrl;
188  MatNd* T_ctrl;
189 
190  //! Counters
191  unsigned int currentStep;
192  double currentTime;
193 
194  //! Transition noise values (every column is one set of noise values for every state variable)
196  unsigned int tnbIndex;
197  bool transitionNoiseIncludeVelocity; // false if transition noise is only applied to state
198 
199  //! Observation and action at last step
202 
203  //! Visualization
204  Viewer* viewer;
205  bool usePhysicsNode; // use PhysicsNode (can not be reset) or GraphicsNode for the viewer
206  HUD* hud;
207 // int adWidgetHandle;
208 };
209 
210 } /* namespace Rcs */
211 
212 #endif /* _RCSSIMENV_H_ */
bool usePhysicsNode
Definition: RcsSimEnv.h:205
#define RCSPYSIM_NOCOPY_NOMOVE(cls)
Definition: nocopy.h:40
unsigned int currentStep
Counters.
Definition: RcsSimEnv.h:191
Viewer * viewer
Visualization.
Definition: RcsSimEnv.h:204
MatNd * getCurrentAction() const
Definition: RcsSimEnv.h:162
bool allJointsPosCtrl
Definition: RcsSimEnv.h:173
MatNd * currentObservation
Observation and action at last step.
Definition: RcsSimEnv.h:200
MatNd * T_ctrl
Definition: RcsSimEnv.h:188
PhysicsParameterManager * physicsManager
Physics simulator factory.
Definition: RcsSimEnv.h:176
MatNd * qd_ctrl
Definition: RcsSimEnv.h:187
PhysicsParameterManager * getPhysicsManager()
Definition: RcsSimEnv.h:150
static PropertySource * empty()
MatNd * currentAction
Definition: RcsSimEnv.h:201
ExperimentConfig * getConfig()
Definition: RcsSimEnv.h:144
unsigned int tnbIndex
Definition: RcsSimEnv.h:196
ForceDisturber * disturber
Disturbance force simulator.
Definition: RcsSimEnv.h:180
PhysicsBase * physicsSim
Physics simulator.
Definition: RcsSimEnv.h:178
MatNd * q_ctrl
Temporary matrices.
Definition: RcsSimEnv.h:186
MatNd * getCurrentObservation() const
Definition: RcsSimEnv.h:156
double currentTime
Definition: RcsSimEnv.h:192
MatNd * transitionNoiseBuffer
Transition noise values (every column is one set of noise values for every state variable) ...
Definition: RcsSimEnv.h:195
bool transitionNoiseIncludeVelocity
Definition: RcsSimEnv.h:197
InitStateSetter * initStateSetter
Initial state setter.
Definition: RcsSimEnv.h:183
ExperimentConfig * config
Experiment configuration.
Definition: RcsSimEnv.h:172
std::mutex graphLock
Guards for parallel access to graph (can happen from gui)
Definition: RcsSimEnv.h:169