diff --git a/Environments.md b/Environments.md
index 5d0e89c..412b41c 100644
--- a/Environments.md
+++ b/Environments.md
@@ -39,9 +39,9 @@ R = diag[0.1, 0.1]
X_g denote the goal states.
-## [CatpoleEnv (Swing up)]((PythonLinearNonlinearControl/envs/cartpole.py))
+## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py)
-System equation.
+## System equation.
diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py
index 64a78db..fff5cf5 100644
--- a/PythonLinearNonlinearControl/configs/cartpole.py
+++ b/PythonLinearNonlinearControl/configs/cartpole.py
@@ -10,7 +10,11 @@ class CartPoleConfigModule():
INPUT_SIZE = 1
DT = 0.02
# cost parameters
- R = np.diag([0.01])
+ R = np.diag([1.]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
+ # 1. is worked for iLQR
+ Terminal_Weight = 1.
+ Q = None
+ Sf = None
# bounds
INPUT_LOWER_BOUND = np.array([-3.])
INPUT_UPPER_BOUND = np.array([3.])
@@ -128,12 +132,14 @@ class CartPoleConfigModule():
return (6. * (terminal_x[:, 0]**2) \
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
+ 0.1 * (terminal_x[:, 1]**2) \
- + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
+ + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
+ * CartPoleConfigModule.Terminal_Weight
- return 6. * (terminal_x[0]**2) \
+ return (6. * (terminal_x[0]**2) \
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+ 0.1 * (terminal_x[1]**2) \
- + 0.1 * (terminal_x[3]**2)
+ + 0.1 * (terminal_x[3]**2)) \
+ * CartPoleConfigModule.Terminal_Weight
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
@@ -148,9 +154,21 @@ class CartPoleConfigModule():
or shape(1, state_size)
"""
if not terminal:
- return None
+ cost_dx0 = 12. * x[:, 0]
+ cost_dx1 = 0.2 * x[:, 1]
+ cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
+ cost_dx3 = 0.2 * x[:, 3]
+ cost_dx = np.stack((cost_dx0, cost_dx1,\
+ cost_dx2, cost_dx3), axis=1)
+ return cost_dx
- return None
+ cost_dx0 = 12. * x[0]
+ cost_dx1 = 0.2 * x[1]
+ cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
+ cost_dx3 = 0.2 * x[3]
+ cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
+
+ return cost_dx * CartPoleConfigModule.Terminal_Weight
@staticmethod
def gradient_cost_fn_with_input(x, u):
@@ -163,7 +181,7 @@ class CartPoleConfigModule():
Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
"""
- return None
+ return 2. * u * np.diag(CartPoleConfigModule.R)
@staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False):
@@ -179,10 +197,30 @@ class CartPoleConfigModule():
shape(1, state_size, state_size) or
"""
if not terminal:
- (pred_len, _) = x.shape
- return None
+ (pred_len, state_size) = x.shape
+ hessian = np.eye(state_size)
+ hessian = np.tile(hessian, (pred_len, 1, 1))
+ hessian[:, 0, 0] = 12.
+ hessian[:, 1, 1] = 0.2
+ hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
+ * (-np.sin(x[:, 2])) \
+ + 24. * (1. + np.cos(x[:, 2])) \
+ * -np.cos(x[:, 2])
+ hessian[:, 3, 3] = 0.2
+
+ return hessian
- return None
+ state_size = len(x)
+ hessian = np.eye(state_size)
+ hessian[0, 0] = 12.
+ hessian[1, 1] = 0.2
+ hessian[2, 2] = 24. * -np.sin(x[2]) \
+ * (-np.sin(x[2])) \
+ + 24. * (1. + np.cos(x[2])) \
+ * -np.cos(x[2])
+ hessian[3, 3] = 0.2
+
+ return hessian[np.newaxis, :, :] * CartPoleConfigModule.Terminal_Weight
@staticmethod
def hessian_cost_fn_with_input(x, u):
@@ -198,7 +236,7 @@ class CartPoleConfigModule():
"""
(pred_len, _) = u.shape
- return None
+ return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
@staticmethod
def hessian_cost_fn_with_input_state(x, u):
diff --git a/PythonLinearNonlinearControl/configs/first_order_lag.py b/PythonLinearNonlinearControl/configs/first_order_lag.py
index 1ad59f6..2c9d268 100644
--- a/PythonLinearNonlinearControl/configs/first_order_lag.py
+++ b/PythonLinearNonlinearControl/configs/first_order_lag.py
@@ -159,11 +159,9 @@ class FirstOrderLagConfigModule():
"""
if not terminal:
(pred_len, _) = x.shape
- return -g_x[:, :, np.newaxis] \
- * np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
+ return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
- return -g_x[:, np.newaxis] \
- * np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
+ return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
@staticmethod
def hessian_cost_fn_with_input(x, u):
diff --git a/PythonLinearNonlinearControl/configs/two_wheeled.py b/PythonLinearNonlinearControl/configs/two_wheeled.py
index 27e9834..8b79c87 100644
--- a/PythonLinearNonlinearControl/configs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/configs/two_wheeled.py
@@ -153,11 +153,9 @@ class TwoWheeledConfigModule():
"""
if not terminal:
(pred_len, _) = x.shape
- return -g_x[:, :, np.newaxis] \
- * np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
+ return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
- return -g_x[:, np.newaxis] \
- * np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
+ return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
@staticmethod
def hessian_cost_fn_with_input(x, u):
diff --git a/PythonLinearNonlinearControl/controllers/ilqr.py b/PythonLinearNonlinearControl/controllers/ilqr.py
index a676ade..9a80dfa 100644
--- a/PythonLinearNonlinearControl/controllers/ilqr.py
+++ b/PythonLinearNonlinearControl/controllers/ilqr.py
@@ -50,11 +50,6 @@ class iLQR(Controller):
self.input_size = config.INPUT_SIZE
self.dt = config.DT
- # cost parameters
- self.Q = config.Q
- self.R = config.R
- self.Sf = config.Sf
-
# initialize
self.prev_sol = np.zeros((self.pred_len, self.input_size))
diff --git a/PythonLinearNonlinearControl/envs/cartpole.py b/PythonLinearNonlinearControl/envs/cartpole.py
index de9becb..fb190e6 100644
--- a/PythonLinearNonlinearControl/envs/cartpole.py
+++ b/PythonLinearNonlinearControl/envs/cartpole.py
@@ -37,7 +37,8 @@ class CartPoleEnv(Env):
"""
self.step_count = 0
- self.curr_x = np.array([0., 0., 0., 0.])
+ theta = np.random.randn(1)
+ self.curr_x = np.array([0., 0., theta[0], 0.])
if init_x is not None:
self.curr_x = init_x
diff --git a/PythonLinearNonlinearControl/models/cartpole.py b/PythonLinearNonlinearControl/models/cartpole.py
index 42c6616..db8ef61 100644
--- a/PythonLinearNonlinearControl/models/cartpole.py
+++ b/PythonLinearNonlinearControl/models/cartpole.py
@@ -90,8 +90,41 @@ class CartPoleModel(Model):
f_x = np.zeros((pred_len, state_size, state_size))
- f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0]
- f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0]
+ # f_x_dot
+ f_x[:, 0, 1] = np.ones(pred_len)
+
+ # f_theta
+ tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
+ * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
+ tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
+
+ f_x[:, 1, 2] = - us[:, 0] * tmp \
+ - tmp * (self.mp * np.sin(xs[:, 2]) \
+ * (self.l * xs[:, 3]**2 \
+ + self.g * np.cos(xs[:, 2]))) \
+ + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
+ * xs[:, 3]**2 \
+ + self.mp * self.g * (np.cos(xs[:, 2])**2 \
+ - np.sin(xs[:, 2])**2))
+ f_x[:, 3, 2] = - 1. / self.l * tmp \
+ * (-us[:, 0] * np.cos(xs[:, 2]) \
+ - self.mp * self.l * (xs[:, 3]**2) \
+ * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
+ - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ + 1. / self.l * tmp2 \
+ * (us[:, 0] * np.sin(xs[:, 2]) \
+ - self.mp * self.l * xs[:, 3]**2 \
+ * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
+ - (self.mc + self.mp) \
+ * self.g * np.cos(xs[:, 2]))
+
+ # f_theta_dot
+ f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
+ * self.l * 2 * xs[:, 3])
+ f_x[:, 2, 3] = np.ones(pred_len)
+ f_x[:, 3, 3] = 1. / self.l * tmp2 \
+ * (-2. * self.mp * self.l * xs[:, 3] \
+ * np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
return f_x * dt + np.eye(state_size) # to discrete form
@@ -139,10 +172,7 @@ class CartPoleModel(Model):
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
- f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
- f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
-
- return f_xx * dt
+ raise NotImplementedError
def calc_f_ux(self, xs, us, dt):
""" hessian of model with respect to state and input in batch form
@@ -161,11 +191,8 @@ class CartPoleModel(Model):
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
- f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
- f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
+ raise NotImplementedError
- return f_ux * dt
-
def calc_f_uu(self, xs, us, dt):
""" hessian of model with respect to input in batch form
@@ -183,4 +210,4 @@ class CartPoleModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
- return f_uu * dt
\ No newline at end of file
+ raise NotImplementedError
\ No newline at end of file
diff --git a/README.md b/README.md
index b177720..7bb14b1 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
[](LICENSE)
-[](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master)
-[](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
+[](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master&service=github)
+[](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
# PythonLinearNonLinearControl
@@ -116,21 +116,21 @@ It should be noted that **Model** and **Environment** are different. As mentione
-## Model
+## [Model](PythonLinearNonlinearControl/models/)
System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]".
If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients.
-## Planner
+## [Planner](PythonLinearNonlinearControl/planners/)
Planner make the goal states.
-## Controller
+## [Controller](PythonLinearNonlinearControl/controllers/)
Controller calculate the optimal inputs by using the model by using the algorithms.
-## Runner
+## [Runner](PythonLinearNonlinearControl/runners/)
Runner runs the simulation.
diff --git a/scripts/simple_run.py b/scripts/simple_run.py
index 25f828c..f1d70fd 100644
--- a/scripts/simple_run.py
+++ b/scripts/simple_run.py
@@ -42,9 +42,9 @@ def run(args):
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("--controller_type", type=str, default="CEM")
+ parser.add_argument("--controller_type", type=str, default="DDP")
parser.add_argument("--planner_type", type=str, default="const")
- parser.add_argument("--env", type=str, default="TwoWheeledConst")
+ parser.add_argument("--env", type=str, default="CartPole")
parser.add_argument("--result_dir", type=str, default="./result")
args = parser.parse_args()
diff --git a/tests/configs/test_cartpole.py b/tests/configs/test_cartpole.py
index 6f74321..877ba9e 100644
--- a/tests/configs/test_cartpole.py
+++ b/tests/configs/test_cartpole.py
@@ -28,4 +28,154 @@ class TestCalcCost():
costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\
g_xs[:, -1, :])
- assert costs.shape == (pop_size, 1)
\ No newline at end of file
+ assert costs.shape == (pop_size, 1)
+
+class TestGradient():
+ def test_state_gradient(self):
+ """
+ """
+ xs = np.ones((1, 4))
+ cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None)
+
+ # numeric grad
+ eps = 1e-4
+ expected_grad = np.zeros((1, 4))
+ for i in range(4):
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] + eps
+ forward = \
+ CartPoleConfigModule.state_cost_fn(tmp_x, None)
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] - eps
+ backward = \
+ CartPoleConfigModule.state_cost_fn(tmp_x, None)
+
+ expected_grad[0, i] = (forward - backward) / (2. * eps)
+
+ assert cost_grad == pytest.approx(expected_grad)
+
+ def test_terminal_state_gradient(self):
+ """
+ """
+ xs = np.ones(4)
+ cost_grad =\
+ CartPoleConfigModule.gradient_cost_fn_with_state(xs, None,
+ terminal=True)
+
+ # numeric grad
+ eps = 1e-4
+ expected_grad = np.zeros((1, 4))
+ for i in range(4):
+ tmp_x = xs.copy()
+ tmp_x[i] = xs[i] + eps
+ forward = \
+ CartPoleConfigModule.state_cost_fn(tmp_x, None)
+ tmp_x = xs.copy()
+ tmp_x[i] = xs[i] - eps
+ backward = \
+ CartPoleConfigModule.state_cost_fn(tmp_x, None)
+
+ expected_grad[0, i] = (forward - backward) / (2. * eps)
+
+ assert cost_grad == pytest.approx(expected_grad)
+
+ def test_input_gradient(self):
+ """
+ """
+ us = np.ones((1, 1))
+ cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us)
+
+ # numeric grad
+ eps = 1e-4
+ expected_grad = np.zeros((1, 1))
+ for i in range(1):
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] + eps
+ forward = \
+ CartPoleConfigModule.input_cost_fn(tmp_u)
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] - eps
+ backward = \
+ CartPoleConfigModule.input_cost_fn(tmp_u)
+
+ expected_grad[0, i] = (forward - backward) / (2. * eps)
+
+ assert cost_grad == pytest.approx(expected_grad)
+
+ def test_state_hessian(self):
+ """
+ """
+ xs = np.ones((1, 4))
+ cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None)
+
+ # numeric grad
+ eps = 1e-4
+ expected_hess = np.zeros((1, 4, 4))
+ for i in range(4):
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] + eps
+ forward = \
+ CartPoleConfigModule.gradient_cost_fn_with_state(
+ tmp_x, None, terminal=False)
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] - eps
+ backward = \
+ CartPoleConfigModule.gradient_cost_fn_with_state(
+ tmp_x, None, terminal=False)
+
+ expected_hess[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert cost_hess == pytest.approx(expected_hess)
+
+ def test_terminal_state_hessian(self):
+ """
+ """
+ xs = np.ones(4)
+ cost_hess =\
+ CartPoleConfigModule.hessian_cost_fn_with_state(xs, None,
+ terminal=True)
+
+ # numeric grad
+ eps = 1e-4
+ expected_hess = np.zeros((1, 4, 4))
+ for i in range(4):
+ tmp_x = xs.copy()
+ tmp_x[i] = xs[i] + eps
+ forward = \
+ CartPoleConfigModule.gradient_cost_fn_with_state(
+ tmp_x, None, terminal=True)
+ tmp_x = xs.copy()
+ tmp_x[i] = xs[i] - eps
+ backward = \
+ CartPoleConfigModule.gradient_cost_fn_with_state(
+ tmp_x, None, terminal=True)
+
+ expected_hess[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert cost_hess == pytest.approx(expected_hess)
+
+ def test_input_hessian(self):
+ """
+ """
+ us = np.ones((1, 1))
+ xs = np.ones((1, 4))
+ cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs)
+
+ # numeric grad
+ eps = 1e-4
+ expected_hess = np.zeros((1, 1, 1))
+ for i in range(1):
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] + eps
+ forward = \
+ CartPoleConfigModule.gradient_cost_fn_with_input(
+ xs, tmp_u)
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] - eps
+ backward = \
+ CartPoleConfigModule.gradient_cost_fn_with_input(
+ xs, tmp_u)
+
+ expected_hess[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert cost_hess == pytest.approx(expected_hess)
\ No newline at end of file
diff --git a/tests/configs/test_two_wheeled.py b/tests/configs/test_two_wheeled.py
index fb9cb7c..9ad0408 100644
--- a/tests/configs/test_two_wheeled.py
+++ b/tests/configs/test_two_wheeled.py
@@ -31,4 +31,111 @@ class TestCalcCost():
g_xs[:, -1, :])
expected_costs = np.ones((pop_size, state_size))*0.5
- assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
\ No newline at end of file
+ assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
+
+class TestGradient():
+ def test_state_gradient(self):
+ """
+ """
+ xs = np.ones((1, 3))
+ g_xs = np.ones((1, 3)) * 0.5
+ cost_grad =\
+ TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs)
+
+ # numeric grad
+ eps = 1e-4
+ expected_grad = np.zeros((1, 3))
+ for i in range(3):
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] + eps
+ forward = \
+ TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
+ forward = np.sum(forward)
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] - eps
+ backward = \
+ TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
+ backward = np.sum(backward)
+
+ expected_grad[0, i] = (forward - backward) / (2. * eps)
+
+ assert cost_grad == pytest.approx(expected_grad)
+
+ def test_input_gradient(self):
+ """
+ """
+ us = np.ones((1, 2))
+ cost_grad =\
+ TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us)
+
+ # numeric grad
+ eps = 1e-4
+ expected_grad = np.zeros((1, 2))
+ for i in range(2):
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] + eps
+ forward = \
+ TwoWheeledConfigModule.input_cost_fn(tmp_u)
+ forward = np.sum(forward)
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] - eps
+ backward = \
+ TwoWheeledConfigModule.input_cost_fn(tmp_u)
+ backward = np.sum(backward)
+
+ expected_grad[0, i] = (forward - backward) / (2. * eps)
+
+ assert cost_grad == pytest.approx(expected_grad)
+
+ def test_state_hessian(self):
+ """
+ """
+ g_xs = np.ones((1, 3)) * 0.5
+ xs = np.ones((1, 3))
+ cost_hess =\
+ TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs)
+
+ # numeric grad
+ eps = 1e-4
+ expected_hess = np.zeros((1, 3, 3))
+ for i in range(3):
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] + eps
+ forward = \
+ TwoWheeledConfigModule.gradient_cost_fn_with_state(
+ tmp_x, g_xs, terminal=False)
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] - eps
+ backward = \
+ TwoWheeledConfigModule.gradient_cost_fn_with_state(
+ tmp_x, g_xs, terminal=False)
+
+ expected_hess[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert cost_hess == pytest.approx(expected_hess)
+
+ def test_input_hessian(self):
+ """
+ """
+ us = np.ones((1, 2))
+ xs = np.ones((1, 3))
+ cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs)
+
+ # numeric grad
+ eps = 1e-4
+ expected_hess = np.zeros((1, 2, 2))
+ for i in range(2):
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] + eps
+ forward = \
+ TwoWheeledConfigModule.gradient_cost_fn_with_input(
+ xs, tmp_u)
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] - eps
+ backward = \
+ TwoWheeledConfigModule.gradient_cost_fn_with_input(
+ xs, tmp_u)
+
+ expected_hess[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert cost_hess == pytest.approx(expected_hess)
\ No newline at end of file
diff --git a/tests/models/test_cartpole.py b/tests/models/test_cartpole.py
index f7241b8..a6ca700 100644
--- a/tests/models/test_cartpole.py
+++ b/tests/models/test_cartpole.py
@@ -54,4 +54,64 @@ class TestCartPoleModel():
u = np.tile(u, (2, 1, 1))
pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
- assert pred_xs_alltogether == pytest.approx(pred_xs)
\ No newline at end of file
+ assert pred_xs_alltogether == pytest.approx(pred_xs)
+
+ def test_gradient_state(self):
+
+ config = CartPoleConfigModule()
+ cartpole_model = CartPoleModel(config)
+
+ xs = np.ones((1, config.STATE_SIZE)) \
+ * np.random.rand(1, config.STATE_SIZE)
+ xs[0, -1] = np.pi / 6.
+ us = np.ones((1, config.INPUT_SIZE))
+
+ grad = cartpole_model.calc_f_x(xs, us, config.DT)
+
+ # expected cost
+ expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
+ eps = 1e-4
+
+ for i in range(config.STATE_SIZE):
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] + eps
+ forward = \
+ cartpole_model.predict_next_state(tmp_x[0], us[0])
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] - eps
+ backward = \
+ cartpole_model.predict_next_state(tmp_x[0], us[0])
+
+ expected_grad[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert grad == pytest.approx(expected_grad)
+
+ def test_gradient_input(self):
+
+ config = CartPoleConfigModule()
+ cartpole_model = CartPoleModel(config)
+
+ xs = np.ones((1, config.STATE_SIZE)) \
+ * np.random.rand(1, config.STATE_SIZE)
+ xs[0, -1] = np.pi / 6.
+ us = np.ones((1, config.INPUT_SIZE))
+
+ grad = cartpole_model.calc_f_u(xs, us, config.DT)
+
+ # expected cost
+ expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
+ eps = 1e-4
+
+ for i in range(config.INPUT_SIZE):
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] + eps
+ forward = \
+ cartpole_model.predict_next_state(xs[0], tmp_u[0])
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] - eps
+ backward = \
+ cartpole_model.predict_next_state(xs[0], tmp_u[0])
+
+ expected_grad[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert grad == pytest.approx(expected_grad)
diff --git a/tests/models/test_two_wheeled.py b/tests/models/test_two_wheeled.py
index 4cdff5c..3ab1161 100644
--- a/tests/models/test_two_wheeled.py
+++ b/tests/models/test_two_wheeled.py
@@ -39,4 +39,62 @@ class TestTwoWheeledModel():
u = np.tile(u, (1, 1, 1))
pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
- assert pred_xs_alltogether == pytest.approx(pred_xs)
\ No newline at end of file
+ assert pred_xs_alltogether == pytest.approx(pred_xs)
+
+ def test_gradient_state(self):
+
+ config = TwoWheeledConfigModule()
+ two_wheeled_model = TwoWheeledModel(config)
+
+ xs = np.ones((1, config.STATE_SIZE))
+ xs[0, -1] = np.pi / 6.
+ us = np.ones((1, config.INPUT_SIZE))
+
+ grad = two_wheeled_model.calc_f_x(xs, us, config.DT)
+
+ # expected cost
+ expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
+ eps = 1e-4
+
+ for i in range(config.STATE_SIZE):
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] + eps
+ forward = \
+ two_wheeled_model.predict_next_state(tmp_x[0], us[0])
+ tmp_x = xs.copy()
+ tmp_x[0, i] = xs[0, i] - eps
+ backward = \
+ two_wheeled_model.predict_next_state(tmp_x[0], us[0])
+
+ expected_grad[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert grad == pytest.approx(expected_grad)
+
+ def test_gradient_input(self):
+
+ config = TwoWheeledConfigModule()
+ two_wheeled_model = TwoWheeledModel(config)
+
+ xs = np.ones((1, config.STATE_SIZE))
+ xs[0, -1] = np.pi / 6.
+ us = np.ones((1, config.INPUT_SIZE))
+
+ grad = two_wheeled_model.calc_f_u(xs, us, config.DT)
+
+ # expected cost
+ expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
+ eps = 1e-4
+
+ for i in range(config.INPUT_SIZE):
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] + eps
+ forward = \
+ two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
+ tmp_u = us.copy()
+ tmp_u[0, i] = us[0, i] - eps
+ backward = \
+ two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
+
+ expected_grad[0, :, i] = (forward - backward) / (2. * eps)
+
+ assert grad == pytest.approx(expected_grad)