RcsPySim
A robot control and simulation library
PolicyComponent.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  Copyright (c) 2020, Fabio Muratore, Honda Research Institute Europe GmbH, and
3  Technical University of Darmstadt.
4  All rights reserved.
5 
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8  1. Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  2. Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  3. Neither the name of Fabio Muratore, Honda Research Institute Europe GmbH,
14  or Technical University of Darmstadt, nor the names of its contributors may
15  be used to endorse or promote products derived from this software without
16  specific prior written permission.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL FABIO MURATORE, HONDA RESEARCH INSTITUTE EUROPE GMBH,
22  OR TECHNICAL UNIVERSITY OF DARMSTADT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
26  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************/
30 
31 #include "PolicyComponent.h"
32 #include "action/ActionModel.h"
33 #include "action/ActionModelIK.h"
37 #include "util/eigen_matnd.h"
38 #include "util/string_format.h"
39 
40 #include <EntityBase.h>
41 
42 #include <Rcs_macros.h>
43 #include <Rcs_typedef.h>
44 #include <Rcs_cmdLine.h>
45 
46 #include <memory>
47 
49  Rcs::EntityBase* entity,
50  Rcs::PropertySource* settings,
51  bool computeJointVelocities) :
52  ComponentBase(entity), computeJointVelocities(computeJointVelocities)
53 {
54  // Load experiment
56 
57  // The dt value from the settings should be propagated to the whole entity
58  entity->setDt(experiment->dt);
59 
60  // Try to load policy from command line args
61  CmdLineParser argP;
62  std::string policyFile;
63  if (argP.getArgument("-policy", &policyFile, "Policy file to use")) {
64  std::string policyType;
65  if (!argP.getArgument("-policyType", &policyType, "Type of policy to load")) {
66  // Try to determine from file
67  if (policyFile.size() >= 4 && policyFile.substr(policyFile.size() - 4) == ".pth") {
68  policyType = "torch";
69  }
70  else {
71  RFATAL("Cannot determine policy type from policy file %s.", policyFile.c_str());
72  }
73  }
74  RCHECK(!policyType.empty());
75  policy = Rcs::ControlPolicy::create(policyType.c_str(), policyFile.c_str());
76  }
77  else {
78  // Try to load from config
79  auto policyConfig = settings->getChild("policy");
80  if (policyConfig->exists()) {
81  // Load from config
82  policy = Rcs::ControlPolicy::create(policyConfig);
83  }
84  else {
85  RFATAL("No policy");
86  }
87  }
88 
89  // Start with inactive policy
90  policyActive = false;
91  eStop = false;
92  eRec = false;
93  goHome = false;
94  renderingInitialized = false;
95 
96  // Init temp matrices, making sure the initial command is identical to the initial state
99 
100  desiredGraph = RcsGraph_clone(experiment->graph);
101 
102  T_ctrl = MatNd_create(experiment->graph->dof, 1);
103 
104  // Copy collision model for desired graph
105  collisionMdl = RcsCollisionModel_clone(experiment->collisionMdl, desiredGraph);
106 
107  // Setup go home policy
108  std::unique_ptr<ActionModel> goHomeWrappedAM;
109  auto existingAMIK = experiment->actionModel->unwrap<ActionModelIK>();
110  if (existingAMIK) {
111  // Use existing IK definition
112  goHomeWrappedAM.reset(existingAMIK->clone(experiment->graph));
113  }
114  else {
115  // No predefined IK, use joint position control
116  RLOG(0, "Couldn't find IK definition in action model, using joint position control for go home.");
117  goHomeWrappedAM.reset(new AMJointControlPosition(experiment->graph));
118  }
119  goHomeWrappedAM->reset();
120 
121  // Extract home position
122  Eigen::VectorXd homePos;
123  homePos.setZero(goHomeWrappedAM->getDim());
124  MatNd hpMat = viewEigen2MatNd(homePos);
125  goHomeWrappedAM->getStableAction(&hpMat);
126 
127  // Setup go home DS
128  PropertySource* ghConfig = experiment->properties->getChild("goHomeDS");
129  if (ghConfig->exists()) {
130  // Read from config, but always set goal pos to home pos
131  goHomeDS = DynamicalSystem::create(ghConfig, goHomeWrappedAM->getDim());
132  goHomeDS->setGoal(homePos);
133  }
134  else {
135  // Create a simple linear ds
136  goHomeDS = new DSLinear(
137  Eigen::MatrixXd::Identity(goHomeWrappedAM->getDim(), goHomeWrappedAM->getDim()), homePos);
138  }
139  goHomeAM = new AMDynamicalSystemActivation(goHomeWrappedAM.release(), {goHomeDS});
140 
141  // Register subscribers
142  subscribe();
143 
144  // Reset models and policy
147  policy->reset();
148  goHomeAM->reset();
149 }
150 
152 {
153  unsubscribe();
154 
155  RcsCollisionModel_destroy(collisionMdl);
156 
157  delete goHomeAM;
158  // goHomeDS is owned by goHomeAM, must not delete separately
159 
160  // Delete temporary matrices
161  MatNd_destroy(T_ctrl);
162  RcsGraph_destroy(desiredGraph);
163 
164  MatNd_destroy(observation);
165  MatNd_destroy(action);
166 
167  delete policy;
168  delete experiment;
169 }
170 
172 {
173 #define SUBSCRIBE(name) ComponentBase::subscribe(#name, &PolicyComponent::on##name)
174  SUBSCRIBE(UpdatePolicy);
175  SUBSCRIBE(InitFromState);
176  SUBSCRIBE(EmergencyStop);
177  SUBSCRIBE(EmergencyRecover);
178  SUBSCRIBE(Render);
179  SUBSCRIBE(Print);
180  SUBSCRIBE(PolicyStart);
181  SUBSCRIBE(PolicyPause);
182  SUBSCRIBE(PolicyReset);
183  SUBSCRIBE(GoHome);
184 }
185 
186 void Rcs::PolicyComponent::onUpdatePolicy(const RcsGraph* state)
187 {
188  if (eStop) {
189  return;
190  }
191 
192  // Update current graph
194  if (eRec) {
195  // Don't compute velocity on emergency recovery to avoid instabilities
196  MatNd_setZero(experiment->graph->q_dot);
197  }
198  else {
199  // Compute velocity using finite differences.
200  // The last state is still stored in the experiment's graph, since we only update that later
201  MatNd_sub(experiment->graph->q_dot, state->q, experiment->graph->q);
202  MatNd_constMulSelf(experiment->graph->q_dot, 1/experiment->dt);
203  }
204  }
205  else {
206  // Use observed velocities
207  MatNd_copy(experiment->graph->q_dot, state->q_dot);
208  }
209 
210  // Update graph
211  MatNd_copy(experiment->graph->q, state->q);
212  RcsGraph_computeForwardKinematics(experiment->graph, NULL, NULL);
213 
214  // Reset observation and action model on recovery
215  if (eRec) {
218  goHomeAM->reset();
219  // Don't reset policy since we might want it to continue.
220  // Reset desired graph to current graph to avoid sending commands right away
221  RcsGraph_setState(desiredGraph, experiment->graph->q, experiment->graph->q_dot);
222  }
223 
224  // Compute observation
226 
227  // Compute action
228  if (policyActive) {
230 
231  // Run action through action model
233  // Update desired graph
234  RcsGraph_computeForwardKinematics(desiredGraph, NULL, NULL);
235  }
236  else if (goHome) {
237  // Invoke goHome MP
238  double goHomeAct = 1;
239  MatNd ghaMat = MatNd_fromPtr(1, 1, &goHomeAct);
241  // Update desired graph
242  RcsGraph_computeForwardKinematics(desiredGraph, NULL, NULL);
243  }
244 
245  // Joint limit check
246  if (this->jointLimitCheck) {
247  unsigned int aor = RcsGraph_numJointLimitsViolated(desiredGraph, true);
248  if (aor > 0) {
249  RLOG(0, "%d joint limit violations - triggering emergency stop", aor);
250  getEntity()->publish("EmergencyStop");
251  RcsGraph_printState(desiredGraph, desiredGraph->q);
252  }
253  }
254 
255  // Collision check
256  if (this->collisionCheck && collisionMdl) {
257  // Compute collisions
258  RcsCollisionModel_compute(collisionMdl);
259  // Check distance
260  const double distLimit = 0.001;
261  double minDist = RcsCollisionMdl_getMinDist(collisionMdl);
262  if (minDist < distLimit) {
263  RLOG(0, "Found collision distance of %f (must be >%f) - triggering emergency stop",
264  minDist, distLimit);
265  RcsCollisionModel_fprintCollisions(stdout, collisionMdl, distLimit);
266  getEntity()->publish("EmergencyStop");
267  }
268  }
269 
270  // Wherever we are, emergency recovery is done
271  eRec = false;
272 }
273 
274 void Rcs::PolicyComponent::onInitFromState(const RcsGraph* target)
275 {
276  RLOG(0, "PolicyComponent::onInitFromState()");
277  // Update current graph
278  MatNd_copy(experiment->graph->q, target->q);
280  // Set zero velocities on reset
281  MatNd_setZero(experiment->graph->q_dot);
282  }
283  else {
284  // Use observed velocities
285  MatNd_copy(experiment->graph->q_dot, target->q_dot);
286  }
287  RcsGraph_computeForwardKinematics(experiment->graph, NULL, NULL);
288 
289  // Reset action and activation model to enforce consistent state.
292 
293  // Update initial desired states
294  RcsGraph_setState(desiredGraph, experiment->graph->q, experiment->graph->q_dot);
295  MatNd_setZero(T_ctrl);
296 }
297 
299 {
300  eStop = true;
301  // Make sure that the policy is inactive
302  policyActive = false;
303  goHome = false;
304 }
305 
307 {
308  if (!eStop) {
309  RLOG(0, "Not in emergency stop, ignored.");
310  return;
311  }
312  eStop = false;
313  // Mark recovery for next update call
314  eRec = true;
315 }
316 
318 {
319  if (eStop) {
320  RLOG(0, "Emergency stop is active, cannot start policy.");
321  return;
322  }
323  goHome = false;
324  policyActive = true;
325 
326  // Reset action and observation model to make sure they're aware of the current position
329 
330  // Does not reset policy, to do that use explicit resetPolicy()
331 }
332 
334 {
335  policyActive = false;
336  goHome = false;
337 }
338 
340 {
341  policy->reset();
342 }
343 
345 {
346  if (eStop) {
347  RLOG(0, "Emergency stop is active, cannot start go home policy.");
348  return;
349  }
350  // Deactivate policy
351  policyActive = false;
352 
353  // Activate go home policy
354  goHome = true;
355 
356  // Reset go home policy to make sure it's aware of the current position
357  goHomeAM->reset();
358 }
359 
361 {
362  // Publish the IK graph
363  getEntity()->publish<std::string, const RcsGraph*>("RenderGraph", "IK", desiredGraph);
364  if (collisionMdl) {
365  getEntity()->publish<const MatNd*>("RenderLines", collisionMdl->cp);
366  }
367 
368  // Perform rendering initialization on first render call
369  if (!this->renderingInitialized) {
370  getEntity()->publish<std::string, std::string>("RenderCommand", "IK", "setGhostMode");
371 
372  this->renderingInitialized = true;
373  }
374 }
375 
377 {
378  if (eStop) {
379  return "PolicyComponent: emergency stop!";
380  }
381  else if (policyActive) {
382  return "PolicyComponent: policy active";
383  }
384  else if (goHome) {
385  return "PolicyComponent: go home";
386  }
387  else {
388  return "PolicyComponent: inactive";
389  }
390 }
391 
393 {
394  printf("Desired state from action model:%n");
395  RcsGraph_fprintModelState(stdout, desiredGraph, desiredGraph->q);
396 }
397 
398 Rcs::ExperimentConfig* Rcs::PolicyComponent::getExperiment() const
399 {
400  return experiment;
401 }
402 
404 {
405  return policy;
406 }
407 
409 {
410  return observation;
411 }
412 
414 {
415  return action;
416 }
417 
419 {
420  return desiredGraph->q;
421 }
422 
424 {
425  return desiredGraph;
426 }
427 
429 {
431 }
432 
434 {
436 }
ControlPolicy * getPolicy() const
void setCollisionCheck(bool collisionCheck)
MatNd * createValueMatrix() const
Definition: BoxSpace.cpp:197
std::string getStateText() const
virtual PropertySource * getChild(const char *prefix)=0
PolicyComponent(EntityBase *entity, PropertySource *settings, bool computeJointVelocities=false)
const BoxSpace * getSpace() const
double dt
The time step [s].
ControlPolicy * policy
virtual void computeCommand(MatNd *q_des, MatNd *q_dot_des, MatNd *T_des, const MatNd *action, double dt)=0
virtual void reset()
MatNd * computeObservation(const MatNd *currentAction, double dt) const
const MatNd * getObservation() const
virtual void computeAction(MatNd *action, const MatNd *observation)=0
ActionModel * goHomeAM
DynamicalSystem * goHomeDS
#define SUBSCRIBE(name)
RcsGraph * getDesiredGraph() const
static DynamicalSystem * create(PropertySource *properties, unsigned int innerTaskDim)
void onInitFromState(const RcsGraph *target)
virtual void setGoal(const Eigen::VectorXd &x_des)=0
RcsCollisionMdl * collisionMdl
static ExperimentConfig * create(PropertySource *properties)
const MatNd * getAction() const
ObservationModel * observationModel
Observation model (pluggable) used to create the observation which will be returned from step() ...
PropertySource * properties
Property source (owned)
const AM * unwrap() const
Definition: ActionModel.h:142
virtual void reset()
Definition: ActionModel.cpp:49
void setJointLimitCheck(bool jointLimitCheck)
virtual void reset()
static ControlPolicy * create(const char *name, const char *dataFile)
virtual bool exists()=0
ActionModel * actionModel
Action model (pluggable)
RcsCollisionMdl * collisionMdl
Collision model to use. Is based on the graph, so take care when using with IK etc.
const MatNd * getJointCommandPtr() const
ExperimentConfig * experiment
void onUpdatePolicy(const RcsGraph *state)
MatNd viewEigen2MatNd(M &src)
Definition: eigen_matnd.h:48
RcsGraph * graph
Graph description.