RcsPySim
A robot control and simulation library
RcsPyBot.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  Copyright (c) 2020, Fabio Muratore, Honda Research Institute Europe GmbH, and
3  Technical University of Darmstadt.
4  All rights reserved.
5 
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8  1. Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  2. Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  3. Neither the name of Fabio Muratore, Honda Research Institute Europe GmbH,
14  or Technical University of Darmstadt, nor the names of its contributors may
15  be used to endorse or promote products derived from this software without
16  specific prior written permission.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL FABIO MURATORE, HONDA RESEARCH INSTITUTE EUROPE GMBH,
22  OR TECHNICAL UNIVERSITY OF DARMSTADT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
26  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************/
30 
31 #include "RcsPyBot.h"
32 #include "action/ActionModel.h"
33 #include "action/ActionModelIK.h"
35 #include "control/ControlPolicy.h"
36 
37 #include <Rcs_typedef.h>
38 #include <Rcs_macros.h>
39 #include <ControllerBase.h>
40 
41 #include <cmath>
42 
43 namespace Rcs
44 {
45 
46 /*! A simple time-based policy
47  * Yields 2 actions: the first one is oscillating round 0 with am amplitude of 0.2 and the second one is constant 0.
48  */
50 {
51 private:
52  //! Internal clock
53  double t;
54 
55 public:
56  SimpleControlPolicy() { t = 0; }
57 
58  virtual void computeAction(MatNd* action, const MatNd* observation)
59  {
60  action->ele[0] = 0.2*std::cos(2.*M_PI*t)*(135*M_PI/180);
61  action->ele[1] = 0.0;
62  t += 0.01;
63  }
64 };
65 
67 {
68  // Load experiment config
69  config = ExperimentConfig::create(propertySource);
70 
71  // Check if all joints are position controlled for skipping a later inverse dynamics control (compliance)
72  unsigned int numPosCtrlJoints = 0;
73  RCSGRAPH_TRAVERSE_JOINTS(config->graph) {
74  if (JNT->jacobiIndex != -1) {
75  if (JNT->ctrlType == RCSJOINT_CTRL_TYPE::RCSJOINT_CTRL_POSITION) {
76  numPosCtrlJoints++;
77  }
78  }
79  }
80  allJointsPosCtrl = config->graph->nJ == numPosCtrlJoints;
81 
82  // Set MotionControlLayer members
83  currentGraph = config->graph;
84  desiredGraph = RcsGraph_clone(currentGraph);
85 
86  // Initialize the temporary matrices, making sure the initial command is identical to the initial state
87  q_ctrl = MatNd_clone(desiredGraph->q);
88  qd_ctrl = MatNd_clone(desiredGraph->q_dot);
89  T_ctrl = MatNd_create(desiredGraph->dof, 1);
90 
91  // Filter for going to the home pose, initialized with the current state
92  double tmc = 0.10;
93  homePoseFilt = new Rcs::SecondOrderLPFND(currentGraph->q->ele, tmc, config->dt, desiredGraph->q->m);
94  homePoseFilt->setTarget(currentGraph->q->ele);
95 
96  // Control policy is set later
97  controlPolicy = nullptr;
98 
99  observation = config->observationModel->getSpace()->createValueMatrix();
100  action = config->actionModel->getSpace()->createValueMatrix();
101 
102  // ActionModel and observationModel expect a reset() call before they are used
103  config->actionModel->reset();
104  config->observationModel->reset();
105 }
106 
108 {
109  // Delete temporary matrices
110  MatNd_destroy(q_ctrl);
111  MatNd_destroy(qd_ctrl);
112  MatNd_destroy(T_ctrl);
113 
114  MatNd_destroy(observation);
115  MatNd_destroy(action);
116 
117  // The hardware components also use currentGraph, so it may only be destroyed by the MotionControlLayer destructor.
118  // however, currentGraph is identical to config->graph, which is owned.
119  // to solve this, set config->graph to nullptr.
120  config->graph = nullptr;
121  // Also, desiredGraph is a clone, so it must be destroyed. Can't set MotionControlLayer::ownsDesiredGraph = true
122  // since it's private, so do it manually here.
123  RcsGraph_destroy(desiredGraph);
124 
125  // Delete experiment config
126  delete config;
127 }
128 
129 void RcsPyBot::setControlPolicy(ControlPolicy* controlPolicy, const MatNd* q_des)
130 {
131  if ((controlPolicy == nullptr && q_des == nullptr) || (controlPolicy != nullptr && q_des != nullptr)) {
132  throw std::invalid_argument("Either controlPolicy or q_des need to be != nullptr, not none or both!");
133  }
134  std::unique_lock<std::mutex> lock(controlPolicyMutex);
135  this->controlPolicy = controlPolicy;
136 
137  if (controlPolicy == nullptr) {
138  // Command a fixed pose
139  homePoseFilt->setTarget(q_des->ele);
140  MatNd_setZero(qd_ctrl);
141  MatNd_setZero(T_ctrl);
142  }
143 
144  // Reset model states
145  config->observationModel->reset();
146  config->actionModel->reset();
147 
148  // Freeze
149  getConfig()->actionModel->setGraph(RcsGraph_clone(getCurrentGraph()));
150  Rcs::ActionModelIK* amIK = dynamic_cast<Rcs::ActionModelIK*>(getConfig()->actionModel);
151  if (amIK) {
152  amIK->setGraph(RcsGraph_clone(getCurrentGraph()));
153  }
154 }
155 
157 {
158  // Aggressive locking here is ok, setControlPolicy doesn't take long
159  std::unique_lock<std::mutex> lock(controlPolicyMutex);
160 
161  // Read observation from current graph
162  config->observationModel->computeObservation(observation, action, config->dt);
163 
164  // Compute action
165  if (controlPolicy != nullptr) {
166  // Call the policy
167  controlPolicy->computeAction(action, observation);
168 
169  // Run action through action model
170  config->actionModel->computeCommand(q_ctrl, qd_ctrl, T_ctrl, action, getCallbackUpdatePeriod());
171  }
172 
173  // Inverse dynamics in joint space (compliance control)
174  if (!allJointsPosCtrl) {
175  Rcs::ControllerBase::computeInvDynJointSpace(T_ctrl, config->graph, q_ctrl, 1000.);
176  }
177 
178  if (controlPolicy == nullptr) {
179  // If no policy is playing, we are going back to the home pose. However, we do not want to do this in one step.
180  // Thus the target signal is filtered.
181  homePoseFilt->iterate();
182  homePoseFilt->getPosition(q_ctrl->ele);
183  homePoseFilt->getVelocity(qd_ctrl->ele);
184  }
185 
186  // Command action to hardware (and update desiredGraph)
187  setMotorCommand(q_ctrl, qd_ctrl, T_ctrl);
188 
189  // Can unlock now, lock only guards controlPolicy and ctrl vectors
190  lock.unlock();
191 
192  // Log data (the reward is only computed on the python side)
193  logger.record(observation, action);
194 }
195 
197 {
198  return observation;
199 }
200 
201 MatNd* RcsPyBot::getAction() const
202 {
203  return action;
204 }
205 
207 {
208  RcsGraph_setState(desiredGraph, currentGraph->q, currentGraph->q_dot);
209 
210  RcsGraph_setState(getConfig()->graph, currentGraph->q, currentGraph->q_dot);
211 
212  RcsGraph_setState(getConfig()->actionModel->getGraph(), currentGraph->q, currentGraph->q_dot);
213 
214  Rcs::ActionModelIK* amIK = dynamic_cast<Rcs::ActionModelIK*>(getConfig()->actionModel);
215  if (amIK) {
216  RcsGraph_setState(amIK->getDesiredGraph(), currentGraph->q, currentGraph->q_dot);
217  }
218 
219  // Init filter for joint commands after syncing the graphs
220  homePoseFilt->init(currentGraph->q->ele);
221 }
222 
223 
224 } /* namespace Rcs */
RcsPyBot(PropertySource *propertySource)
Definition: RcsPyBot.cpp:66
MatNd * getAction() const
Definition: RcsPyBot.cpp:201
void syncGraphsToCurrent()
Definition: RcsPyBot.cpp:206
virtual ~RcsPyBot()
Definition: RcsPyBot.cpp:107
MatNd * getObservation() const
Definition: RcsPyBot.cpp:196
void setGraph(RcsGraph *newGraph)
Definition: ActionModel.h:113
virtual void computeAction(MatNd *action, const MatNd *observation)
Definition: RcsPyBot.cpp:58
static ExperimentConfig * create(PropertySource *properties)
virtual void updateControl()
Definition: RcsPyBot.cpp:156
void setControlPolicy(ControlPolicy *controlPolicy, const MatNd *q_des=nullptr)
Definition: RcsPyBot.cpp:129
double t
Internal clock.
Definition: RcsPyBot.cpp:53