Merge pull request #5 from Shunichi09/develop

Develop
This commit is contained in:
Shunichi09 2020-04-08 17:51:19 +09:00 committed by GitHub
commit 1e768f75e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 482 additions and 50 deletions

View File

@ -39,9 +39,9 @@ R = diag[0.1, 0.1]
X_g denote the goal states. X_g denote the goal states.
## [CatpoleEnv (Swing up)]((PythonLinearNonlinearControl/envs/cartpole.py)) ## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py)
System equation. ## System equation.
<img src="assets/cartpole.png" width="600"> <img src="assets/cartpole.png" width="600">

View File

@ -10,7 +10,11 @@ class CartPoleConfigModule():
INPUT_SIZE = 1 INPUT_SIZE = 1
DT = 0.02 DT = 0.02
# cost parameters # cost parameters
R = np.diag([0.01]) R = np.diag([1.]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
# 1. is worked for iLQR
Terminal_Weight = 1.
Q = None
Sf = None
# bounds # bounds
INPUT_LOWER_BOUND = np.array([-3.]) INPUT_LOWER_BOUND = np.array([-3.])
INPUT_UPPER_BOUND = np.array([3.]) INPUT_UPPER_BOUND = np.array([3.])
@ -128,12 +132,14 @@ class CartPoleConfigModule():
return (6. * (terminal_x[:, 0]**2) \ return (6. * (terminal_x[:, 0]**2) \
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
+ 0.1 * (terminal_x[:, 1]**2) \ + 0.1 * (terminal_x[:, 1]**2) \
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.Terminal_Weight
return 6. * (terminal_x[0]**2) \ return (6. * (terminal_x[0]**2) \
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \ + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+ 0.1 * (terminal_x[1]**2) \ + 0.1 * (terminal_x[1]**2) \
+ 0.1 * (terminal_x[3]**2) + 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.Terminal_Weight
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_with_state(x, g_x, terminal=False):
@ -148,9 +154,21 @@ class CartPoleConfigModule():
or shape(1, state_size) or shape(1, state_size)
""" """
if not terminal: if not terminal:
return None cost_dx0 = 12. * x[:, 0]
cost_dx1 = 0.2 * x[:, 1]
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
cost_dx3 = 0.2 * x[:, 3]
cost_dx = np.stack((cost_dx0, cost_dx1,\
cost_dx2, cost_dx3), axis=1)
return cost_dx
return None cost_dx0 = 12. * x[0]
cost_dx1 = 0.2 * x[1]
cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
cost_dx3 = 0.2 * x[3]
cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
return cost_dx * CartPoleConfigModule.Terminal_Weight
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_with_input(x, u):
@ -163,7 +181,7 @@ class CartPoleConfigModule():
Returns: Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
""" """
return None return 2. * u * np.diag(CartPoleConfigModule.R)
@staticmethod @staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False): def hessian_cost_fn_with_state(x, g_x, terminal=False):
@ -179,10 +197,30 @@ class CartPoleConfigModule():
shape(1, state_size, state_size) or shape(1, state_size, state_size) or
""" """
if not terminal: if not terminal:
(pred_len, _) = x.shape (pred_len, state_size) = x.shape
return None hessian = np.eye(state_size)
hessian = np.tile(hessian, (pred_len, 1, 1))
hessian[:, 0, 0] = 12.
hessian[:, 1, 1] = 0.2
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
* (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2])
hessian[:, 3, 3] = 0.2
return None return hessian
state_size = len(x)
hessian = np.eye(state_size)
hessian[0, 0] = 12.
hessian[1, 1] = 0.2
hessian[2, 2] = 24. * -np.sin(x[2]) \
* (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2])
hessian[3, 3] = 0.2
return hessian[np.newaxis, :, :] * CartPoleConfigModule.Terminal_Weight
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_with_input(x, u):
@ -198,7 +236,7 @@ class CartPoleConfigModule():
""" """
(pred_len, _) = u.shape (pred_len, _) = u.shape
return None return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_with_input_state(x, u):

View File

@ -159,11 +159,9 @@ class FirstOrderLagConfigModule():
""" """
if not terminal: if not terminal:
(pred_len, _) = x.shape (pred_len, _) = x.shape
return -g_x[:, :, np.newaxis] \ return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
* np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
return -g_x[:, np.newaxis] \ return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
* np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_with_input(x, u):

View File

@ -153,11 +153,9 @@ class TwoWheeledConfigModule():
""" """
if not terminal: if not terminal:
(pred_len, _) = x.shape (pred_len, _) = x.shape
return -g_x[:, :, np.newaxis] \ return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
* np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
return -g_x[:, np.newaxis] \ return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
* np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_with_input(x, u):

View File

@ -50,11 +50,6 @@ class iLQR(Controller):
self.input_size = config.INPUT_SIZE self.input_size = config.INPUT_SIZE
self.dt = config.DT self.dt = config.DT
# cost parameters
self.Q = config.Q
self.R = config.R
self.Sf = config.Sf
# initialize # initialize
self.prev_sol = np.zeros((self.pred_len, self.input_size)) self.prev_sol = np.zeros((self.pred_len, self.input_size))

View File

@ -37,7 +37,8 @@ class CartPoleEnv(Env):
""" """
self.step_count = 0 self.step_count = 0
self.curr_x = np.array([0., 0., 0., 0.]) theta = np.random.randn(1)
self.curr_x = np.array([0., 0., theta[0], 0.])
if init_x is not None: if init_x is not None:
self.curr_x = init_x self.curr_x = init_x

View File

@ -90,8 +90,41 @@ class CartPoleModel(Model):
f_x = np.zeros((pred_len, state_size, state_size)) f_x = np.zeros((pred_len, state_size, state_size))
f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0] # f_x_dot
f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0] f_x[:, 0, 1] = np.ones(pred_len)
# f_theta
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_x[:, 1, 2] = - us[:, 0] * tmp \
- tmp * (self.mp * np.sin(xs[:, 2]) \
* (self.l * xs[:, 3]**2 \
+ self.g * np.cos(xs[:, 2]))) \
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
* xs[:, 3]**2 \
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \
- np.sin(xs[:, 2])**2))
f_x[:, 3, 2] = - 1. / self.l * tmp \
* (-us[:, 0] * np.cos(xs[:, 2]) \
- self.mp * self.l * (xs[:, 3]**2) \
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2]) \
- self.mp * self.l * xs[:, 3]**2 \
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
- (self.mc + self.mp) \
* self.g * np.cos(xs[:, 2]))
# f_theta_dot
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
* self.l * 2 * xs[:, 3])
f_x[:, 2, 3] = np.ones(pred_len)
f_x[:, 3, 3] = 1. / self.l * tmp2 \
* (-2. * self.mp * self.l * xs[:, 3] \
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
return f_x * dt + np.eye(state_size) # to discrete form return f_x * dt + np.eye(state_size) # to discrete form
@ -139,10 +172,7 @@ class CartPoleModel(Model):
f_xx = np.zeros((pred_len, state_size, state_size, state_size)) f_xx = np.zeros((pred_len, state_size, state_size, state_size))
f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0] raise NotImplementedError
f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
return f_xx * dt
def calc_f_ux(self, xs, us, dt): def calc_f_ux(self, xs, us, dt):
""" hessian of model with respect to state and input in batch form """ hessian of model with respect to state and input in batch form
@ -161,10 +191,7 @@ class CartPoleModel(Model):
f_ux = np.zeros((pred_len, state_size, input_size, state_size)) f_ux = np.zeros((pred_len, state_size, input_size, state_size))
f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2]) raise NotImplementedError
f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
return f_ux * dt
def calc_f_uu(self, xs, us, dt): def calc_f_uu(self, xs, us, dt):
""" hessian of model with respect to input in batch form """ hessian of model with respect to input in batch form
@ -183,4 +210,4 @@ class CartPoleModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size)) f_uu = np.zeros((pred_len, state_size, input_size, input_size))
return f_uu * dt raise NotImplementedError

View File

@ -1,6 +1,6 @@
[![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
[![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master) [![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master&service=github)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master&service=github)
[![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl) [![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master&service=github)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
# PythonLinearNonLinearControl # PythonLinearNonLinearControl
@ -116,21 +116,21 @@ It should be noted that **Model** and **Environment** are different. As mentione
<img src="assets/concept.png" width="500"> <img src="assets/concept.png" width="500">
## Model ## [Model](PythonLinearNonlinearControl/models/)
System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]". System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]".
If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients. If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients.
## Planner ## [Planner](PythonLinearNonlinearControl/planners/)
Planner make the goal states. Planner make the goal states.
## Controller ## [Controller](PythonLinearNonlinearControl/controllers/)
Controller calculate the optimal inputs by using the model by using the algorithms. Controller calculate the optimal inputs by using the model by using the algorithms.
## Runner ## [Runner](PythonLinearNonlinearControl/runners/)
Runner runs the simulation. Runner runs the simulation.

View File

@ -42,9 +42,9 @@ def run(args):
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--controller_type", type=str, default="CEM") parser.add_argument("--controller_type", type=str, default="DDP")
parser.add_argument("--planner_type", type=str, default="const") parser.add_argument("--planner_type", type=str, default="const")
parser.add_argument("--env", type=str, default="TwoWheeledConst") parser.add_argument("--env", type=str, default="CartPole")
parser.add_argument("--result_dir", type=str, default="./result") parser.add_argument("--result_dir", type=str, default="./result")
args = parser.parse_args() args = parser.parse_args()

View File

@ -29,3 +29,153 @@ class TestCalcCost():
g_xs[:, -1, :]) g_xs[:, -1, :])
assert costs.shape == (pop_size, 1) assert costs.shape == (pop_size, 1)
class TestGradient():
def test_state_gradient(self):
"""
"""
xs = np.ones((1, 4))
cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None)
# numeric grad
eps = 1e-4
expected_grad = np.zeros((1, 4))
for i in range(4):
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps
forward = \
CartPoleConfigModule.state_cost_fn(tmp_x, None)
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps
backward = \
CartPoleConfigModule.state_cost_fn(tmp_x, None)
expected_grad[0, i] = (forward - backward) / (2. * eps)
assert cost_grad == pytest.approx(expected_grad)
def test_terminal_state_gradient(self):
"""
"""
xs = np.ones(4)
cost_grad =\
CartPoleConfigModule.gradient_cost_fn_with_state(xs, None,
terminal=True)
# numeric grad
eps = 1e-4
expected_grad = np.zeros((1, 4))
for i in range(4):
tmp_x = xs.copy()
tmp_x[i] = xs[i] + eps
forward = \
CartPoleConfigModule.state_cost_fn(tmp_x, None)
tmp_x = xs.copy()
tmp_x[i] = xs[i] - eps
backward = \
CartPoleConfigModule.state_cost_fn(tmp_x, None)
expected_grad[0, i] = (forward - backward) / (2. * eps)
assert cost_grad == pytest.approx(expected_grad)
def test_input_gradient(self):
"""
"""
us = np.ones((1, 1))
cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us)
# numeric grad
eps = 1e-4
expected_grad = np.zeros((1, 1))
for i in range(1):
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps
forward = \
CartPoleConfigModule.input_cost_fn(tmp_u)
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps
backward = \
CartPoleConfigModule.input_cost_fn(tmp_u)
expected_grad[0, i] = (forward - backward) / (2. * eps)
assert cost_grad == pytest.approx(expected_grad)
def test_state_hessian(self):
"""
"""
xs = np.ones((1, 4))
cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None)
# numeric grad
eps = 1e-4
expected_hess = np.zeros((1, 4, 4))
for i in range(4):
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps
forward = \
CartPoleConfigModule.gradient_cost_fn_with_state(
tmp_x, None, terminal=False)
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps
backward = \
CartPoleConfigModule.gradient_cost_fn_with_state(
tmp_x, None, terminal=False)
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
assert cost_hess == pytest.approx(expected_hess)
def test_terminal_state_hessian(self):
"""
"""
xs = np.ones(4)
cost_hess =\
CartPoleConfigModule.hessian_cost_fn_with_state(xs, None,
terminal=True)
# numeric grad
eps = 1e-4
expected_hess = np.zeros((1, 4, 4))
for i in range(4):
tmp_x = xs.copy()
tmp_x[i] = xs[i] + eps
forward = \
CartPoleConfigModule.gradient_cost_fn_with_state(
tmp_x, None, terminal=True)
tmp_x = xs.copy()
tmp_x[i] = xs[i] - eps
backward = \
CartPoleConfigModule.gradient_cost_fn_with_state(
tmp_x, None, terminal=True)
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
assert cost_hess == pytest.approx(expected_hess)
def test_input_hessian(self):
"""
"""
us = np.ones((1, 1))
xs = np.ones((1, 4))
cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs)
# numeric grad
eps = 1e-4
expected_hess = np.zeros((1, 1, 1))
for i in range(1):
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps
forward = \
CartPoleConfigModule.gradient_cost_fn_with_input(
xs, tmp_u)
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps
backward = \
CartPoleConfigModule.gradient_cost_fn_with_input(
xs, tmp_u)
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
assert cost_hess == pytest.approx(expected_hess)

View File

@ -32,3 +32,110 @@ class TestCalcCost():
expected_costs = np.ones((pop_size, state_size))*0.5 expected_costs = np.ones((pop_size, state_size))*0.5
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
class TestGradient():
def test_state_gradient(self):
"""
"""
xs = np.ones((1, 3))
g_xs = np.ones((1, 3)) * 0.5
cost_grad =\
TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs)
# numeric grad
eps = 1e-4
expected_grad = np.zeros((1, 3))
for i in range(3):
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps
forward = \
TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
forward = np.sum(forward)
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps
backward = \
TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
backward = np.sum(backward)
expected_grad[0, i] = (forward - backward) / (2. * eps)
assert cost_grad == pytest.approx(expected_grad)
def test_input_gradient(self):
"""
"""
us = np.ones((1, 2))
cost_grad =\
TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us)
# numeric grad
eps = 1e-4
expected_grad = np.zeros((1, 2))
for i in range(2):
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps
forward = \
TwoWheeledConfigModule.input_cost_fn(tmp_u)
forward = np.sum(forward)
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps
backward = \
TwoWheeledConfigModule.input_cost_fn(tmp_u)
backward = np.sum(backward)
expected_grad[0, i] = (forward - backward) / (2. * eps)
assert cost_grad == pytest.approx(expected_grad)
def test_state_hessian(self):
"""
"""
g_xs = np.ones((1, 3)) * 0.5
xs = np.ones((1, 3))
cost_hess =\
TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs)
# numeric grad
eps = 1e-4
expected_hess = np.zeros((1, 3, 3))
for i in range(3):
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps
forward = \
TwoWheeledConfigModule.gradient_cost_fn_with_state(
tmp_x, g_xs, terminal=False)
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps
backward = \
TwoWheeledConfigModule.gradient_cost_fn_with_state(
tmp_x, g_xs, terminal=False)
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
assert cost_hess == pytest.approx(expected_hess)
def test_input_hessian(self):
"""
"""
us = np.ones((1, 2))
xs = np.ones((1, 3))
cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs)
# numeric grad
eps = 1e-4
expected_hess = np.zeros((1, 2, 2))
for i in range(2):
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps
forward = \
TwoWheeledConfigModule.gradient_cost_fn_with_input(
xs, tmp_u)
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps
backward = \
TwoWheeledConfigModule.gradient_cost_fn_with_input(
xs, tmp_u)
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
assert cost_hess == pytest.approx(expected_hess)

View File

@ -55,3 +55,63 @@ class TestCartPoleModel():
pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0] pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
assert pred_xs_alltogether == pytest.approx(pred_xs) assert pred_xs_alltogether == pytest.approx(pred_xs)
def test_gradient_state(self):
config = CartPoleConfigModule()
cartpole_model = CartPoleModel(config)
xs = np.ones((1, config.STATE_SIZE)) \
* np.random.rand(1, config.STATE_SIZE)
xs[0, -1] = np.pi / 6.
us = np.ones((1, config.INPUT_SIZE))
grad = cartpole_model.calc_f_x(xs, us, config.DT)
# expected cost
expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
eps = 1e-4
for i in range(config.STATE_SIZE):
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps
forward = \
cartpole_model.predict_next_state(tmp_x[0], us[0])
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps
backward = \
cartpole_model.predict_next_state(tmp_x[0], us[0])
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
assert grad == pytest.approx(expected_grad)
def test_gradient_input(self):
config = CartPoleConfigModule()
cartpole_model = CartPoleModel(config)
xs = np.ones((1, config.STATE_SIZE)) \
* np.random.rand(1, config.STATE_SIZE)
xs[0, -1] = np.pi / 6.
us = np.ones((1, config.INPUT_SIZE))
grad = cartpole_model.calc_f_u(xs, us, config.DT)
# expected cost
expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
eps = 1e-4
for i in range(config.INPUT_SIZE):
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps
forward = \
cartpole_model.predict_next_state(xs[0], tmp_u[0])
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps
backward = \
cartpole_model.predict_next_state(xs[0], tmp_u[0])
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
assert grad == pytest.approx(expected_grad)

View File

@ -40,3 +40,61 @@ class TestTwoWheeledModel():
pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0] pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
assert pred_xs_alltogether == pytest.approx(pred_xs) assert pred_xs_alltogether == pytest.approx(pred_xs)
def test_gradient_state(self):
config = TwoWheeledConfigModule()
two_wheeled_model = TwoWheeledModel(config)
xs = np.ones((1, config.STATE_SIZE))
xs[0, -1] = np.pi / 6.
us = np.ones((1, config.INPUT_SIZE))
grad = two_wheeled_model.calc_f_x(xs, us, config.DT)
# expected cost
expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
eps = 1e-4
for i in range(config.STATE_SIZE):
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps
forward = \
two_wheeled_model.predict_next_state(tmp_x[0], us[0])
tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps
backward = \
two_wheeled_model.predict_next_state(tmp_x[0], us[0])
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
assert grad == pytest.approx(expected_grad)
def test_gradient_input(self):
config = TwoWheeledConfigModule()
two_wheeled_model = TwoWheeledModel(config)
xs = np.ones((1, config.STATE_SIZE))
xs[0, -1] = np.pi / 6.
us = np.ones((1, config.INPUT_SIZE))
grad = two_wheeled_model.calc_f_u(xs, us, config.DT)
# expected cost
expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
eps = 1e-4
for i in range(config.INPUT_SIZE):
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps
forward = \
two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps
backward = \
two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
assert grad == pytest.approx(expected_grad)