Merge pull request #5 from Shunichi09/develop

Develop
2020-04-08 17:51:19 +09:00 · 2020-04-08 17:51:19 +09:00 · 1e768f75e1
parent b7efc581aa 1ed489cfc4
commit 1e768f75e1
13 changed files with 482 additions and 50 deletions
--- a/Environments.md
+++ b/Environments.md
@ -39,9 +39,9 @@ R = diag[0.1, 0.1]
 X_g denote the goal states.
-## [CatpoleEnv (Swing up)]((PythonLinearNonlinearControl/envs/cartpole.py))
+## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py)
-System equation.
+## System equation.
 <img src="assets/cartpole.png" width="600">
--- a/PythonLinearNonlinearControl/configs/cartpole.py
+++ b/PythonLinearNonlinearControl/configs/cartpole.py
@ -10,7 +10,11 @@ class CartPoleConfigModule():
    INPUT_SIZE = 1
    DT = 0.02
    # cost parameters
-    R = np.diag([0.01])
+    R = np.diag([1.])  # 0.01 is worked for MPPI and CEM and MPPIWilliams
                       # 1. is worked for iLQR 
    Terminal_Weight = 1.
    Q = None
    Sf = None
    # bounds
    INPUT_LOWER_BOUND = np.array([-3.])
    INPUT_UPPER_BOUND = np.array([3.])
@ -128,12 +132,14 @@ class CartPoleConfigModule():
            return (6. * (terminal_x[:, 0]**2) \
                   + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
                   + 0.1 * (terminal_x[:, 1]**2) \
-                   + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
+                   + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
                    * CartPoleConfigModule.Terminal_Weight
-        return 6. * (terminal_x[0]**2) \
+        return (6. * (terminal_x[0]**2) \
               + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
               + 0.1 * (terminal_x[1]**2) \
-               + 0.1 * (terminal_x[3]**2)
+               + 0.1 * (terminal_x[3]**2)) \
                * CartPoleConfigModule.Terminal_Weight
    @staticmethod
    def gradient_cost_fn_with_state(x, g_x, terminal=False):
@ -148,9 +154,21 @@ class CartPoleConfigModule():
                or shape(1, state_size)
        """
        if not terminal:
-            return None
+            cost_dx0 = 12. * x[:, 0] 
            cost_dx1 = 0.2 * x[:, 1]
            cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
            cost_dx3 = 0.2 * x[:, 3]
            cost_dx = np.stack((cost_dx0, cost_dx1,\
                                cost_dx2, cost_dx3), axis=1)
            return cost_dx
-        return None
+        cost_dx0 = 12. * x[0] 
        cost_dx1 = 0.2 * x[1]
        cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
        cost_dx3 = 0.2 * x[3]
        cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
        return cost_dx * CartPoleConfigModule.Terminal_Weight
    @staticmethod
    def gradient_cost_fn_with_input(x, u):
@ -163,7 +181,7 @@ class CartPoleConfigModule():
        Returns:
            l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
        """
-        return None
+        return 2. * u * np.diag(CartPoleConfigModule.R)
    @staticmethod
    def hessian_cost_fn_with_state(x, g_x, terminal=False):
@ -179,10 +197,30 @@ class CartPoleConfigModule():
                shape(1, state_size, state_size) or
        """
        if not terminal:
-            (pred_len, _) = x.shape
+            (pred_len, state_size) = x.shape
-            return None              
+            hessian = np.eye(state_size)
            hessian = np.tile(hessian, (pred_len, 1, 1))
            hessian[:, 0, 0] = 12.
            hessian[:, 1, 1] = 0.2
            hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
                               * (-np.sin(x[:, 2])) \
                               + 24. * (1. + np.cos(x[:, 2])) \
                                 * -np.cos(x[:, 2])
            hessian[:, 3, 3] = 0.2
-        return None
+            return hessian
        state_size = len(x)
        hessian = np.eye(state_size)
        hessian[0, 0] = 12.
        hessian[1, 1] = 0.2
        hessian[2, 2] = 24. * -np.sin(x[2]) \
                         * (-np.sin(x[2])) \
                        + 24. * (1. + np.cos(x[2])) \
                          * -np.cos(x[2])
        hessian[3, 3] = 0.2
        return hessian[np.newaxis, :, :] * CartPoleConfigModule.Terminal_Weight
    @staticmethod
    def hessian_cost_fn_with_input(x, u):
@ -198,7 +236,7 @@ class CartPoleConfigModule():
        """
        (pred_len, _) = u.shape
-        return None
+        return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
    @staticmethod
    def hessian_cost_fn_with_input_state(x, u):
--- a/PythonLinearNonlinearControl/configs/first_order_lag.py
+++ b/PythonLinearNonlinearControl/configs/first_order_lag.py
@ -159,11 +159,9 @@ class FirstOrderLagConfigModule():
        """
        if not terminal:
            (pred_len, _) = x.shape
-            return -g_x[:, :, np.newaxis] \
+            return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))               
                * np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))               
-        return -g_x[:, np.newaxis] \
+        return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))    
            * np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))    
    @staticmethod
    def hessian_cost_fn_with_input(x, u):
--- a/PythonLinearNonlinearControl/configs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/configs/two_wheeled.py
@ -153,11 +153,9 @@ class TwoWheeledConfigModule():
        """
        if not terminal:
            (pred_len, _) = x.shape
-            return -g_x[:, :, np.newaxis] \
+            return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))               
                * np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))               
-        return -g_x[:, np.newaxis] \
+        return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))    
            * np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))    
    @staticmethod
    def hessian_cost_fn_with_input(x, u):
--- a/PythonLinearNonlinearControl/controllers/ilqr.py
+++ b/PythonLinearNonlinearControl/controllers/ilqr.py
@ -50,11 +50,6 @@ class iLQR(Controller):
        self.input_size = config.INPUT_SIZE
        self.dt = config.DT
        # cost parameters
        self.Q = config.Q
        self.R = config.R
        self.Sf = config.Sf
        # initialize
        self.prev_sol = np.zeros((self.pred_len, self.input_size))
--- a/PythonLinearNonlinearControl/envs/cartpole.py
+++ b/PythonLinearNonlinearControl/envs/cartpole.py
@ -37,7 +37,8 @@ class CartPoleEnv(Env):
        """
        self.step_count = 0
-        self.curr_x = np.array([0., 0., 0., 0.])
+        theta = np.random.randn(1)
        self.curr_x = np.array([0., 0., theta[0], 0.])
        if init_x is not None:
            self.curr_x = init_x
--- a/PythonLinearNonlinearControl/models/cartpole.py
+++ b/PythonLinearNonlinearControl/models/cartpole.py
@ -90,8 +90,41 @@ class CartPoleModel(Model):
        f_x = np.zeros((pred_len, state_size, state_size))
-        f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0]
+        # f_x_dot
-        f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0]
+        f_x[:, 0, 1] = np.ones(pred_len)
        # f_theta
        tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
              * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
        tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
        f_x[:, 1, 2] = - us[:, 0] * tmp \
                       - tmp * (self.mp * np.sin(xs[:, 2]) \
                                * (self.l * xs[:, 3]**2 \
                                   + self.g * np.cos(xs[:, 2]))) \
                       + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
                                 * xs[:, 3]**2 \
                                 + self.mp * self.g * (np.cos(xs[:, 2])**2 \
                                                       - np.sin(xs[:, 2])**2))
        f_x[:, 3, 2] = - 1. / self.l * tmp \
                        * (-us[:, 0] * np.cos(xs[:, 2]) \
                           - self.mp * self.l * (xs[:, 3]**2) \
                           * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
                           - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
                        +  1. / self.l * tmp2 \
                           * (us[:, 0] * np.sin(xs[:, 2]) \
                              - self.mp * self.l * xs[:, 3]**2 \
                                * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
                              - (self.mc + self.mp) \
                                 * self.g * np.cos(xs[:, 2]))
        # f_theta_dot
        f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
                       * self.l * 2 * xs[:, 3])
        f_x[:, 2, 3] = np.ones(pred_len)
        f_x[:, 3, 3] = 1. / self.l * tmp2 \
                        * (-2. * self.mp * self.l * xs[:, 3] \
                           * np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
        return f_x * dt + np.eye(state_size)  # to discrete form
@ -139,10 +172,7 @@ class CartPoleModel(Model):
        f_xx = np.zeros((pred_len, state_size, state_size, state_size))
-        f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
+        raise NotImplementedError
        f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
        return f_xx * dt
    def calc_f_ux(self, xs, us, dt):
        """ hessian of model with respect to state and input in batch form
@ -161,10 +191,7 @@ class CartPoleModel(Model):
        f_ux = np.zeros((pred_len, state_size, input_size, state_size))
-        f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
+        raise NotImplementedError
        f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
        return f_ux * dt
    def calc_f_uu(self, xs, us, dt):
        """ hessian of model with respect to input in batch form
@ -183,4 +210,4 @@ class CartPoleModel(Model):
        f_uu = np.zeros((pred_len, state_size, input_size, input_size))
-        return f_uu * dt
+        raise NotImplementedError
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
-[![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master)
+[![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master&service=github)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master&service=github)
-[![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
+[![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master&service=github)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
 # PythonLinearNonLinearControl
@ -116,21 +116,21 @@ It should be noted that **Model** and **Environment** are different. As mentione
 <img src="assets/concept.png" width="500">
-## Model
+## [Model](PythonLinearNonlinearControl/models/)
 System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]".
 If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients.
-## Planner
+## [Planner](PythonLinearNonlinearControl/planners/)
 Planner make the goal states.
-## Controller
+## [Controller](PythonLinearNonlinearControl/controllers/)
 Controller calculate the optimal inputs by using the model by using the algorithms.
-## Runner
+## [Runner](PythonLinearNonlinearControl/runners/)
 Runner runs the simulation.
--- a/scripts/simple_run.py
+++ b/scripts/simple_run.py
@ -42,9 +42,9 @@ def run(args):
 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument("--controller_type", type=str, default="CEM")
+    parser.add_argument("--controller_type", type=str, default="DDP")
    parser.add_argument("--planner_type", type=str, default="const")
-    parser.add_argument("--env", type=str, default="TwoWheeledConst")
+    parser.add_argument("--env", type=str, default="CartPole")
    parser.add_argument("--result_dir", type=str, default="./result")
    args = parser.parse_args()
--- a/tests/configs/test_cartpole.py
+++ b/tests/configs/test_cartpole.py
@ -29,3 +29,153 @@ class TestCalcCost():
                                              g_xs[:, -1, :])
        assert costs.shape == (pop_size, 1)
 class TestGradient():
    def test_state_gradient(self):
        """
        """
        xs = np.ones((1, 4))
        cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None)
        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 4))
        for i in range(4): 
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                CartPoleConfigModule.state_cost_fn(tmp_x, None)
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                CartPoleConfigModule.state_cost_fn(tmp_x, None)
            expected_grad[0, i] = (forward - backward) / (2. * eps)
        assert cost_grad == pytest.approx(expected_grad)
    def test_terminal_state_gradient(self):
        """
        """
        xs = np.ones(4)
        cost_grad =\
            CartPoleConfigModule.gradient_cost_fn_with_state(xs, None,
                                                             terminal=True)
        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 4))
        for i in range(4): 
            tmp_x = xs.copy()
            tmp_x[i] = xs[i] + eps
            forward = \
                CartPoleConfigModule.state_cost_fn(tmp_x, None)
            tmp_x = xs.copy()
            tmp_x[i] = xs[i] - eps
            backward = \
                CartPoleConfigModule.state_cost_fn(tmp_x, None)
            expected_grad[0, i] = (forward - backward) / (2. * eps)
        assert cost_grad == pytest.approx(expected_grad)
    def test_input_gradient(self):
        """
        """
        us = np.ones((1, 1))
        cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us)
        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 1))
        for i in range(1): 
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                CartPoleConfigModule.input_cost_fn(tmp_u)
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                CartPoleConfigModule.input_cost_fn(tmp_u)
            expected_grad[0, i] = (forward - backward) / (2. * eps)
        assert cost_grad == pytest.approx(expected_grad)
    def test_state_hessian(self):
        """
        """
        xs = np.ones((1, 4))
        cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None)
        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 4, 4))
        for i in range(4): 
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                CartPoleConfigModule.gradient_cost_fn_with_state(
                    tmp_x, None, terminal=False)
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                CartPoleConfigModule.gradient_cost_fn_with_state(
                    tmp_x, None, terminal=False)
            expected_hess[0, :, i] = (forward - backward) / (2. * eps)
        assert cost_hess == pytest.approx(expected_hess)
    def test_terminal_state_hessian(self):
        """
        """
        xs = np.ones(4)
        cost_hess =\
            CartPoleConfigModule.hessian_cost_fn_with_state(xs, None,
                                                            terminal=True)
        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 4, 4))
        for i in range(4): 
            tmp_x = xs.copy()
            tmp_x[i] = xs[i] + eps
            forward = \
                CartPoleConfigModule.gradient_cost_fn_with_state(
                    tmp_x, None, terminal=True)
            tmp_x = xs.copy()
            tmp_x[i] = xs[i] - eps
            backward = \
                CartPoleConfigModule.gradient_cost_fn_with_state(
                    tmp_x, None, terminal=True)
            expected_hess[0, :, i] = (forward - backward) / (2. * eps)
        assert cost_hess == pytest.approx(expected_hess)
    def test_input_hessian(self):
        """
        """
        us = np.ones((1, 1))
        xs = np.ones((1, 4))
        cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs)
        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 1, 1))
        for i in range(1): 
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                CartPoleConfigModule.gradient_cost_fn_with_input(
                    xs, tmp_u)
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                CartPoleConfigModule.gradient_cost_fn_with_input(
                    xs, tmp_u)
            expected_hess[0, :, i] = (forward - backward) / (2. * eps)
        assert cost_hess == pytest.approx(expected_hess)
--- a/tests/configs/test_two_wheeled.py
+++ b/tests/configs/test_two_wheeled.py
@ -32,3 +32,110 @@ class TestCalcCost():
        expected_costs = np.ones((pop_size, state_size))*0.5
        assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
 class TestGradient():
    def test_state_gradient(self):
        """
        """
        xs = np.ones((1, 3))
        g_xs = np.ones((1, 3)) * 0.5
        cost_grad =\
            TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs)
        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 3))
        for i in range(3): 
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
            forward = np.sum(forward)
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
            backward = np.sum(backward)
            expected_grad[0, i] = (forward - backward) / (2. * eps)
        assert cost_grad == pytest.approx(expected_grad)
    def test_input_gradient(self):
        """
        """
        us = np.ones((1, 2))
        cost_grad =\
            TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us)
        # numeric grad
        eps = 1e-4
        expected_grad = np.zeros((1, 2))
        for i in range(2): 
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                TwoWheeledConfigModule.input_cost_fn(tmp_u)
            forward = np.sum(forward)
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                TwoWheeledConfigModule.input_cost_fn(tmp_u)
            backward = np.sum(backward)
            expected_grad[0, i] = (forward - backward) / (2. * eps)
        assert cost_grad == pytest.approx(expected_grad)
    def test_state_hessian(self):
        """
        """
        g_xs = np.ones((1, 3)) * 0.5
        xs = np.ones((1, 3))
        cost_hess =\
            TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs)
        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 3, 3))
        for i in range(3): 
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                TwoWheeledConfigModule.gradient_cost_fn_with_state(
                    tmp_x, g_xs, terminal=False)
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                TwoWheeledConfigModule.gradient_cost_fn_with_state(
                    tmp_x, g_xs, terminal=False)
            expected_hess[0, :, i] = (forward - backward) / (2. * eps)
        assert cost_hess == pytest.approx(expected_hess)
    def test_input_hessian(self):
        """
        """
        us = np.ones((1, 2))
        xs = np.ones((1, 3))
        cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs)
        # numeric grad
        eps = 1e-4
        expected_hess = np.zeros((1, 2, 2))
        for i in range(2): 
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                TwoWheeledConfigModule.gradient_cost_fn_with_input(
                    xs, tmp_u)
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                TwoWheeledConfigModule.gradient_cost_fn_with_input(
                    xs, tmp_u)
            expected_hess[0, :, i] = (forward - backward) / (2. * eps)
        assert cost_hess == pytest.approx(expected_hess)
--- a/tests/models/test_cartpole.py
+++ b/tests/models/test_cartpole.py
@ -55,3 +55,63 @@ class TestCartPoleModel():
        pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
        assert pred_xs_alltogether == pytest.approx(pred_xs)
    def test_gradient_state(self):
        config = CartPoleConfigModule()
        cartpole_model =  CartPoleModel(config)
        xs = np.ones((1, config.STATE_SIZE)) \
             * np.random.rand(1, config.STATE_SIZE)
        xs[0, -1] = np.pi / 6.
        us = np.ones((1, config.INPUT_SIZE))
        grad = cartpole_model.calc_f_x(xs, us, config.DT)
        # expected cost
        expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
        eps = 1e-4
        for i in range(config.STATE_SIZE):
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                cartpole_model.predict_next_state(tmp_x[0], us[0])
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                cartpole_model.predict_next_state(tmp_x[0], us[0])
            expected_grad[0, :, i] = (forward - backward) / (2. * eps)
        assert grad == pytest.approx(expected_grad)
    def test_gradient_input(self):
        config = CartPoleConfigModule()
        cartpole_model =  CartPoleModel(config)
        xs = np.ones((1, config.STATE_SIZE)) \
             * np.random.rand(1, config.STATE_SIZE)
        xs[0, -1] = np.pi / 6.
        us = np.ones((1, config.INPUT_SIZE))
        grad = cartpole_model.calc_f_u(xs, us, config.DT)
        # expected cost
        expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
        eps = 1e-4
        for i in range(config.INPUT_SIZE):
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                cartpole_model.predict_next_state(xs[0], tmp_u[0])
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                cartpole_model.predict_next_state(xs[0], tmp_u[0])
            expected_grad[0, :, i] = (forward - backward) / (2. * eps)
        assert grad == pytest.approx(expected_grad)
--- a/tests/models/test_two_wheeled.py
+++ b/tests/models/test_two_wheeled.py
@ -40,3 +40,61 @@ class TestTwoWheeledModel():
        pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
        assert pred_xs_alltogether == pytest.approx(pred_xs)
    def test_gradient_state(self):
        config = TwoWheeledConfigModule()
        two_wheeled_model = TwoWheeledModel(config)
        xs = np.ones((1, config.STATE_SIZE))
        xs[0, -1] = np.pi / 6.
        us = np.ones((1, config.INPUT_SIZE))
        grad = two_wheeled_model.calc_f_x(xs, us, config.DT)
        # expected cost
        expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
        eps = 1e-4
        for i in range(config.STATE_SIZE):
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] + eps
            forward = \
                two_wheeled_model.predict_next_state(tmp_x[0], us[0])
            tmp_x = xs.copy()
            tmp_x[0, i] = xs[0, i] - eps
            backward = \
                two_wheeled_model.predict_next_state(tmp_x[0], us[0])
            expected_grad[0, :, i] = (forward - backward) / (2. * eps)
        assert grad == pytest.approx(expected_grad)
    def test_gradient_input(self):
        config = TwoWheeledConfigModule()
        two_wheeled_model = TwoWheeledModel(config)
        xs = np.ones((1, config.STATE_SIZE))
        xs[0, -1] = np.pi / 6.
        us = np.ones((1, config.INPUT_SIZE))
        grad = two_wheeled_model.calc_f_u(xs, us, config.DT)
        # expected cost
        expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
        eps = 1e-4
        for i in range(config.INPUT_SIZE):
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] + eps
            forward = \
                two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
            tmp_u = us.copy()
            tmp_u[0, i] = us[0, i] - eps
            backward = \
                two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
            expected_grad[0, :, i] = (forward - backward) / (2. * eps)
        assert grad == pytest.approx(expected_grad)