Add: catpole env

2020-04-07 17:29:55 +09:00 · 2020-04-07 17:29:55 +09:00 · 4627d6fc1e
parent 4e01264bd9
commit 4627d6fc1e
17 changed files with 723 additions and 26 deletions
--- a/Environments.md
+++ b/Environments.md
@ -9,21 +9,39 @@

 ## FistOrderLagEnv

-System equations.
+### System equation.

 <img src="assets/firstorderlag.png" width="550">

 You can set arbinatry time constant, tau. The default is 0.63 s

+### Cost.
+
+<img src="assets/quadratic_score.png" width="200">
+
+Q = diag[1., 1., 1., 1.], 
+R = diag[1., 1.]
+
+X_g denote the goal states.
+
 ## TwoWheeledEnv

-System equations.
+### System equation.

 <img src="assets/twowheeled.png" width="300">

+### Cost.
+
+<img src="assets/quadratic_score.png" width="200">
+
+Q = diag[5., 5., 1.], 
+R = diag[0.1, 0.1]
+
+X_g denote the goal states.
+
 ## CatpoleEnv (Swing up)

-System equations.
+System equation.

 <img src="assets/cartpole.png" width="600">

@ -31,4 +49,8 @@ You can set arbinatry parameters, mc, mp, l and g.

 Default settings are as follows:

-mc = 1, mp = 0.2, l = 0.5, g = 9.8
+mc = 1, mp = 0.2, l = 0.5, g = 9.81
+
+### Cost.
+
+<img src="assets/cartpole_score.png" width="300">
--- a/PythonLinearNonlinearControl/configs/cartpole.py
+++ b/PythonLinearNonlinearControl/configs/cartpole.py
@ -0,0 +1,218 @@
+import numpy as np
+
+class CartPoleConfigModule():
+    # parameters
+    ENV_NAME = "CartPole-v0"
+    TYPE = "Nonlinear"
+    TASK_HORIZON = 500
+    PRED_LEN = 50
+    STATE_SIZE = 4
+    INPUT_SIZE = 1
+    DT = 0.02
+    # cost parameters
+    R = np.diag([0.01])
+    # bounds
+    INPUT_LOWER_BOUND = np.array([-3.])
+    INPUT_UPPER_BOUND = np.array([3.])
+    # parameters
+    MP = 0.2
+    MC = 1.
+    L = 0.5
+    G = 9.81
+
+    def __init__(self):
+        """ 
+        """
+        # opt configs
+        self.opt_config = {
+            "Random": {
+                "popsize": 5000
+            },
+            "CEM": {
+                "popsize": 500,
+                "num_elites": 50,
+                "max_iters": 15,
+                "alpha": 0.3,
+                "init_var":9.,
+                "threshold":0.001
+            },
+            "MPPI":{
+                "beta" : 0.6,
+                "popsize": 5000,
+                "kappa": 0.9,
+                "noise_sigma": 0.5,
+            },
+            "MPPIWilliams":{
+                "popsize": 5000,
+                "lambda": 1.,
+                "noise_sigma": 0.9,
+            },
+           "iLQR":{
+                "max_iter": 500,
+                "init_mu": 1.,
+                "mu_min": 1e-6,
+                "mu_max": 1e10,
+                "init_delta": 2.,
+                "threshold": 1e-6,
+           },
+           "DDP":{
+                "max_iter": 500,
+                "init_mu": 1.,
+                "mu_min": 1e-6,
+                "mu_max": 1e10,
+                "init_delta": 2.,
+                "threshold": 1e-6,
+           },
+           "NMPC-CGMRES":{
+           },
+           "NMPC-Newton":{
+           },
+        } 
+
+    @staticmethod
+    def input_cost_fn(u):
+        """ input cost functions
+        Args:
+            u (numpy.ndarray): input, shape(pred_len, input_size)
+                or shape(pop_size, pred_len, input_size)
+        Returns:
+            cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
+                shape(pop_size, pred_len, input_size)
+        """
+        return (u**2) * np.diag(CartPoleConfigModule.R)
+    
+    @staticmethod
+    def state_cost_fn(x, g_x):
+        """ state cost function
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+                or shape(pop_size, pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+                or shape(pop_size, pred_len, state_size)
+        Returns:
+            cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
+                shape(pop_size, pred_len, 1)
+        """
+
+        if len(x.shape) > 2:
+            return (6. * (x[:, :, 0]**2) \
+                   + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
+                   + 0.1 * (x[:, :, 1]**2) \
+                   + 0.1 *  (x[:, :, 3]**2))[:, :, np.newaxis]
+
+        elif len(x.shape) > 1:
+            return (6. * (x[:, 0]**2) \
+                   + 12. * ((np.cos(x[:, 2]) + 1.)**2) \
+                   + 0.1 * (x[:, 1]**2) \
+                   + 0.1 * (x[:, 3]**2))[:,  np.newaxis]
+            
+        return 6. * (x[0]**2) \
+               + 12. * ((np.cos(x[2]) + 1.)**2) \
+               + 0.1 * (x[1]**2) \
+               + 0.1 * (x[3]**2)
+
+    @staticmethod
+    def terminal_state_cost_fn(terminal_x, terminal_g_x):
+        """
+        Args:
+            terminal_x (numpy.ndarray): terminal state,
+                shape(state_size, ) or shape(pop_size, state_size)
+            terminal_g_x (numpy.ndarray): terminal goal state,
+                shape(state_size, ) or shape(pop_size, state_size)
+        Returns:
+            cost (numpy.ndarray): cost of state, shape(pred_len, ) or
+                shape(pop_size, pred_len)
+        """
+
+        if len(terminal_x.shape) > 1:
+            return (6. * (terminal_x[:, 0]**2) \
+                   + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
+                   + 0.1 * (terminal_x[:, 1]**2) \
+                   + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
+            
+        return 6. * (terminal_x[0]**2) \
+               + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+               + 0.1 * (terminal_x[1]**2) \
+               + 0.1 * (terminal_x[3]**2)
+    
+    @staticmethod
+    def gradient_cost_fn_with_state(x, g_x, terminal=False):
+        """ gradient of costs with respect to the state
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+        
+        Returns:
+            l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
+                or shape(1, state_size)
+        """
+        if not terminal:
+            return None
+        
+        return None
+
+    @staticmethod
+    def gradient_cost_fn_with_input(x, u):
+        """ gradient of costs with respect to the input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
+        """
+        return None
+
+    @staticmethod
+    def hessian_cost_fn_with_state(x, g_x, terminal=False):
+        """ hessian costs with respect to the state
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+        
+        Returns:
+            l_xx (numpy.ndarray): gradient of cost,
+                shape(pred_len, state_size, state_size) or
+                shape(1, state_size, state_size) or
+        """
+        if not terminal:
+            (pred_len, _) = x.shape
+            return None              
+        
+        return None
+
+    @staticmethod
+    def hessian_cost_fn_with_input(x, u):
+        """ hessian costs with respect to the input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_uu (numpy.ndarray): gradient of cost,
+                shape(pred_len, input_size, input_size)
+        """
+        (pred_len, _) = u.shape
+
+        return None
+    
+    @staticmethod
+    def hessian_cost_fn_with_input_state(x, u):
+        """ hessian costs with respect to the state and input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_ux (numpy.ndarray): gradient of cost ,
+                shape(pred_len, input_size, state_size)
+        """
+        (_, state_size) = x.shape
+        (pred_len, input_size) = u.shape
+
+        return np.zeros((pred_len, input_size, state_size))
--- a/PythonLinearNonlinearControl/configs/make_configs.py
+++ b/PythonLinearNonlinearControl/configs/make_configs.py
@ -1,5 +1,6 @@
 from .first_order_lag import FirstOrderLagConfigModule
 from .two_wheeled import TwoWheeledConfigModule
+from .cartpole import CartPoleConfigModule

 def make_config(args):
    """
@ -9,4 +10,6 @@ def make_config(args):
    if args.env == "FirstOrderLag":
        return FirstOrderLagConfigModule()
    elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
-        return TwoWheeledConfigModule()
+        return TwoWheeledConfigModule()
+    elif args.env == "CartPole":
+        return CartPoleConfigModule()
--- a/PythonLinearNonlinearControl/envs/cartpole.py
+++ b/PythonLinearNonlinearControl/envs/cartpole.py
@ -14,12 +14,16 @@ class CartPoleEnv(Env):
    def __init__(self):
        """
        """
-        self.config = {"state_size" : 4,\
-                       "input_size" : 1,\
-                       "dt" : 0.02,\
-                       "max_step" : 1000,\
-                       "input_lower_bound": None,\
-                       "input_upper_bound": None,
+        self.config = {"state_size" : 4,
+                       "input_size" : 1,
+                       "dt" : 0.02,
+                       "max_step" : 500,
+                       "input_lower_bound": [-3.],
+                       "input_upper_bound": [3.],
+                       "mp": 0.2,
+                       "mc": 1.,
+                       "l": 0.5,
+                       "g": 9.81,
                       }

        super(CartPoleEnv, self).__init__(self.config)
@ -33,13 +37,13 @@ class CartPoleEnv(Env):
        """
        self.step_count = 0
        
-        self.curr_x = np.zeros(self.config["state_size"])
+        self.curr_x = np.array([0., 0., 0., 0.])

        if init_x is not None:
            self.curr_x = init_x

        # goal
-        self.g_x = np.array([0., 0., np.pi, 0.])
+        self.g_x = np.array([0., 0., -np.pi, 0.])
        
        # clear memory
        self.history_x = []
@ -65,20 +69,43 @@ class CartPoleEnv(Env):
                        self.config["input_upper_bound"])

        # step
-        next_x = np.zeros(self.config["state_size"])
+        # x
+        d_x0 = self.curr_x[1]
+        # v_x
+        d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \
+               * (self.config["l"] * (self.curr_x[3]**2) \
+                  + self.config["g"] * np.cos(self.curr_x[2]))) \
+               / (self.config["mc"] + self.config["mp"] \
+                  * (np.sin(self.curr_x[2])**2))
+        # theta
+        d_x2 = self.curr_x[3]
+        
+        # v_theta
+        d_x3 = (-u[0] * np.cos(self.curr_x[2]) \
+                - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \
+                  * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \
+                - (self.config["mc"] + self.config["mp"]) * self.config["g"] \
+                   * np.sin(self.curr_x[2])) \
+               / (self.config["l"] * (self.config["mc"] + self.config["mp"] \
+                                      * (np.sin(self.curr_x[2])**2)))
+        
+        next_x = self.curr_x +\
+                 np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] 

        # TODO: costs
        costs = 0.
        costs += 0.1 * np.sum(u**2)
-        costs += np.sum((self.curr_x - self.g_x)**2)
-
+        costs += 6. * self.curr_x[0]**2 \
+                 + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+                 + 0.1 * self.curr_x[1]**2 \
+                 + 0.1 * self.curr_x[3]**2

        # save history
        self.history_x.append(next_x.flatten())
        self.history_g_x.append(self.g_x.flatten())
        
        # update
-        self.curr_x = next_x.flatten()
+        self.curr_x = next_x.flatten().copy()
        # update costs
        self.step_count += 1

--- a/PythonLinearNonlinearControl/envs/make_envs.py
+++ b/PythonLinearNonlinearControl/envs/make_envs.py
@ -1,6 +1,6 @@
 from .first_order_lag import FirstOrderLagEnv
 from .two_wheeled import TwoWheeledConstEnv
-from .cartpole import CartpoleEnv
+from .cartpole import CartPoleEnv

 def make_env(args):

@ -9,6 +9,6 @@ def make_env(args):
    elif args.env == "TwoWheeledConst":
        return TwoWheeledConstEnv()
    elif args.env == "CartPole":
-        return CartpoleEnv()
+        return CartPoleEnv()
    
    raise NotImplementedError("There is not {} Env".format(args.env))
--- a/PythonLinearNonlinearControl/envs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/envs/two_wheeled.py
@ -86,7 +86,7 @@ class TwoWheeledConstEnv(Env):
        # TODO: costs
        costs = 0.
        costs += 0.1 * np.sum(u**2)
-        costs += np.sum((self.curr_x - self.g_x)**2)
+        costs += np.sum(((self.curr_x - self.g_x)**2) * np.array([5., 5., 1.]))

        # save history
        self.history_x.append(next_x.flatten())
--- a/PythonLinearNonlinearControl/models/cartpole.py
+++ b/PythonLinearNonlinearControl/models/cartpole.py
@ -0,0 +1,186 @@
+import numpy as np
+
+from .model import Model
+
+class CartPoleModel(Model):
+    """ cartpole model
+    """
+    def __init__(self, config):
+        """
+        """
+        super(CartPoleModel, self).__init__()
+        self.dt = config.DT
+        self.mc = config.MC
+        self.mp = config.MP
+        self.l = config.L
+        self.g = config.G
+
+    def predict_next_state(self, curr_x, u):
+        """ predict next state
+        
+        Args:
+            curr_x (numpy.ndarray): current state, shape(state_size, ) or
+                shape(pop_size, state_size)
+            u (numpy.ndarray): input, shape(input_size, ) or
+                shape(pop_size, input_size)
+        Returns:
+            next_x (numpy.ndarray): next state, shape(state_size, ) or
+                shape(pop_size, state_size)
+        """
+        if len(u.shape) == 1:
+            # x
+            d_x0 = curr_x[1]
+            # v_x
+            d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \
+                        * (self.l * (curr_x[3]**2) \
+                           + self.g * np.cos(curr_x[2]))) \
+                   / (self.mc + self.mp * (np.sin(curr_x[2])**2))
+            # theta
+            d_x2 = curr_x[3]
+            # v_theta
+            d_x3 = (-u[0] * np.cos(curr_x[2]) \
+                    - self.mp * self.l * (curr_x[3]**2) \
+                      * np.cos(curr_x[2]) * np.sin(curr_x[2]) \
+                    - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
+                   / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
+        
+            next_x = curr_x +\
+                     np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt 
+            
+            return next_x
+
+        elif len(u.shape) == 2:
+            # x
+            d_x0 = curr_x[:, 1]
+            # v_x
+            d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \
+                        * (self.l * (curr_x[:, 3]**2) \
+                           + self.g * np.cos(curr_x[:, 2]))) \
+                   / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
+            # theta
+            d_x2 = curr_x[:, 3]
+            # v_theta
+            d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \
+                    - self.mp * self.l * (curr_x[:, 3]**2) \
+                      * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \
+                    - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
+                   / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
+        
+            next_x = curr_x +\
+                     np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt 
+
+            return next_x
+    
+    def calc_f_x(self, xs, us, dt):
+        """ gradient of model with respect to the state in batch form
+        Args:
+            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
+            us (numpy.ndarray): input, shape(pred_len, input_size,)
+        
+        Return:
+            f_x (numpy.ndarray): gradient of model with respect to x,
+                shape(pred_len, state_size, state_size)
+
+        Notes:
+            This should be discrete form !!
+        """ 
+        # get size
+        (_, state_size) = xs.shape
+        (pred_len, _) = us.shape
+
+        f_x = np.zeros((pred_len, state_size, state_size))
+
+        f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0]
+        f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0]
+
+        return f_x * dt + np.eye(state_size)  # to discrete form
+
+    def calc_f_u(self, xs, us, dt):
+        """ gradient of model with respect to the input in batch form
+        Args:
+            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
+            us (numpy.ndarray): input, shape(pred_len, input_size,)
+        
+        Return:
+            f_u (numpy.ndarray): gradient of model with respect to x,
+                shape(pred_len, state_size, input_size)
+
+        Notes:
+            This should be discrete form !!
+        """ 
+        # get size
+        (_, state_size) = xs.shape
+        (pred_len, input_size) = us.shape
+
+        f_u = np.zeros((pred_len, state_size, input_size))
+        
+        f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
+        
+        f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
+                       / (self.l * (self.mc \
+                                    + self.mp * (np.sin(xs[:, 2])**2)))
+
+        return f_u * dt  # to discrete form
+
+    def calc_f_xx(self, xs, us, dt):
+        """ hessian of model with respect to the state in batch form
+
+        Args:
+            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
+            us (numpy.ndarray): input, shape(pred_len, input_size,)
+        
+        Return:
+            f_xx (numpy.ndarray): gradient of model with respect to x,
+                shape(pred_len, state_size, state_size, state_size)
+        """
+        # get size
+        (_, state_size) = xs.shape
+        (pred_len, _) = us.shape
+
+        f_xx = np.zeros((pred_len, state_size, state_size, state_size))
+
+        f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
+        f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
+
+        return f_xx * dt
+
+    def calc_f_ux(self, xs, us, dt):
+        """ hessian of model with respect to state and input in batch form
+
+        Args:
+            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
+            us (numpy.ndarray): input, shape(pred_len, input_size,)
+        
+        Return:
+            f_ux (numpy.ndarray): gradient of model with respect to x,
+                shape(pred_len, state_size, input_size, state_size)
+        """
+        # get size
+        (_, state_size) = xs.shape
+        (pred_len, input_size) = us.shape
+
+        f_ux = np.zeros((pred_len, state_size, input_size, state_size))
+
+        f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
+        f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
+
+        return f_ux * dt
+    
+    def calc_f_uu(self, xs, us, dt):
+        """ hessian of model with respect to input in batch form
+
+        Args:
+            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
+            us (numpy.ndarray): input, shape(pred_len, input_size,)
+        
+        Return:
+            f_uu (numpy.ndarray): gradient of model with respect to x,
+                shape(pred_len, state_size, input_size, input_size)
+        """
+        # get size
+        (_, state_size) = xs.shape
+        (pred_len, input_size) = us.shape
+
+        f_uu = np.zeros((pred_len, state_size, input_size, input_size))
+
+        return f_uu * dt
--- a/PythonLinearNonlinearControl/models/make_models.py
+++ b/PythonLinearNonlinearControl/models/make_models.py
@ -1,5 +1,6 @@
 from .first_order_lag import FirstOrderLagModel
 from .two_wheeled import TwoWheeledModel
+from .cartpole import CartPoleModel

 def make_model(args, config):
    
@ -7,5 +8,7 @@ def make_model(args, config):
        return FirstOrderLagModel(config)
    elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
        return TwoWheeledModel(config)
+    elif args.env == "CartPole":
+        return CartPoleModel(config)
    
-    raise NotImplementedError("There is not {} Model".format(args.env))
+    raise NotImplementedError("There is not {} Model".format(args.env))
--- a/README.md
+++ b/README.md
@ -15,7 +15,7 @@ PythonLinearNonLinearControl is a library implementing the linear and nonlinear
 | Linear Model Predictive Control (MPC) | ✓ | x | x | x | x |
 | Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x |
 | Model Preidictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x |
-| Model Preidictive Path Integral Control of Williams (MPPIWilliams) | ✓ | ✓ | x | x | x |
+| Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams) | ✓ | ✓ | x | x | x |
 | Random Shooting Method (Random) | ✓ | ✓ | x | x | x |
 | Iterative LQR (iLQR) | x | ✓ | x | ✓ | x |
 | Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ |
@ -34,7 +34,7 @@ Following algorithms are implemented in PythonLinearNonlinearControl
 - [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114)
  - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765)
    - [script](PythonLinearNonlinearControl/controllers/cem.py)
- [Model Preidictive Path Integral Control Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652)
+- [Model Preidictive Path Integral Control of Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652)
  - Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652.
    - [script](PythonLinearNonlinearControl/controllers/mppi.py)
 - [Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202)
@ -71,7 +71,7 @@ Following algorithms are implemented in PythonLinearNonlinearControl
 All states and inputs of environments are continuous.
 **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**

-You could know abount out environmets more in [Environments.md](Environments.md)
+You could know abount our environmets more in [Environments.md](Environments.md)

 # Usage 

--- a/assets/cartpole_score.png
+++ b/assets/cartpole_score.png
--- a/assets/quadratic_score.png
+++ b/assets/quadratic_score.png
--- a/scripts/simple_run.py
+++ b/scripts/simple_run.py
@ -42,9 +42,9 @@ def run(args):
 def main():
    parser = argparse.ArgumentParser()

-    parser.add_argument("--controller_type", type=str, default="MPPIWilliams")
+    parser.add_argument("--controller_type", type=str, default="CEM")
    parser.add_argument("--planner_type", type=str, default="const")
-    parser.add_argument("--env", type=str, default="FirstOrderLag")
+    parser.add_argument("--env", type=str, default="TwoWheeledConst")
    parser.add_argument("--result_dir", type=str, default="./result")

    args = parser.parse_args()
--- a/tests/configs/test_cartpole.py
+++ b/tests/configs/test_cartpole.py
@ -0,0 +1,31 @@
+import pytest
+import numpy as np
+
+from PythonLinearNonlinearControl.configs.cartpole \
+    import CartPoleConfigModule
+
+class TestCalcCost():
+    def test_calc_costs(self):
+        # make config
+        config = CartPoleConfigModule()
+        # set
+        pred_len = 5
+        state_size = 4
+        input_size = 1
+        pop_size = 2
+        pred_xs = np.ones((pop_size, pred_len, state_size))
+        g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
+        input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
+
+        costs = config.input_cost_fn(input_samples)
+
+        assert costs.shape == (pop_size, pred_len, input_size)
+
+        costs = config.state_cost_fn(pred_xs, g_xs)
+        
+        assert costs.shape == (pop_size, pred_len, 1)
+
+        costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\
+                                              g_xs[:, -1, :])
+        
+        assert costs.shape == (pop_size, 1)
--- a/tests/configs/test_two_wheeled.py
+++ b/tests/configs/test_two_wheeled.py
@ -0,0 +1,34 @@
+import pytest
+import numpy as np
+
+from PythonLinearNonlinearControl.configs.two_wheeled \
+    import TwoWheeledConfigModule
+
+class TestCalcCost():
+    def test_calc_costs(self):
+        # make config
+        config = TwoWheeledConfigModule()
+        # set
+        pred_len = 5
+        state_size = 3
+        input_size = 2
+        pop_size = 2
+        pred_xs = np.ones((pop_size, pred_len, state_size))
+        g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
+        input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
+
+        costs = config.input_cost_fn(input_samples)
+        expected_costs = np.ones((pop_size, pred_len, input_size))*0.5
+
+        assert costs == pytest.approx(expected_costs**2 * np.diag(config.R))
+
+        costs = config.state_cost_fn(pred_xs, g_xs)
+        expected_costs = np.ones((pop_size, pred_len, state_size))*0.5
+        
+        assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q))
+
+        costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\
+                                              g_xs[:, -1, :])
+        expected_costs = np.ones((pop_size, state_size))*0.5
+        
+        assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
--- a/tests/env/test_cartpole.py
+++ b/tests/env/test_cartpole.py
@ -0,0 +1,73 @@
+import pytest
+import numpy as np
+
+from PythonLinearNonlinearControl.envs.cartpole import CartPoleEnv
+    
+class TestCartPoleEnv():
+    """
+    """
+    def test_step(self):
+        env = CartPoleEnv()
+
+        curr_x = np.ones(4)
+        curr_x[2] = np.pi / 6.
+
+        env.reset(init_x=curr_x)
+        
+        u = np.ones(1)
+
+        next_x, _, _, _ = env.step(u)
+
+        d_x0 = curr_x[1]
+        d_x1 = (1. + env.config["mp"] * np.sin(np.pi / 6.) \
+                     * (env.config["l"] * (1.**2) \
+                        + env.config["g"] * np.cos(np.pi / 6.))) \
+                / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2)
+        d_x2 = curr_x[3]
+        d_x3 = (-1. * np.cos(np.pi / 6.) \
+                - env.config["mp"] * env.config["l"] * (1.**2) \
+                  * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
+                - (env.config["mp"] + env.config["mc"]) * env.config["g"] \
+                   * np.sin(np.pi / 6.)) \
+                 / (env.config["l"] \
+                     * (env.config["mc"] \
+                        + env.config["mp"] * np.sin(np.pi / 6.)**2))
+
+        expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \
+                   + curr_x
+
+        assert next_x == pytest.approx(expected, abs=1e-5) 
+    
+    def test_bound_step(self):
+        env = CartPoleEnv()
+
+        curr_x = np.ones(4)
+        curr_x[2] = np.pi / 6.
+
+        env.reset(init_x=curr_x)
+        
+        u = np.ones(1) * 1e3
+
+        next_x, _, _, _ = env.step(u)
+
+        u = env.config["input_upper_bound"][0]
+
+        d_x0 = curr_x[1]
+        d_x1 = (u + env.config["mp"] * np.sin(np.pi / 6.) \
+                     * (env.config["l"] * (1.**2) \
+                        + env.config["g"] * np.cos(np.pi / 6.))) \
+                / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2)
+        d_x2 = curr_x[3]
+        d_x3 = (-u * np.cos(np.pi / 6.) \
+                - env.config["mp"] * env.config["l"] * (1.**2) \
+                  * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
+                - (env.config["mp"] + env.config["mc"]) * env.config["g"] \
+                   * np.sin(np.pi / 6.)) \
+                 / (env.config["l"] \
+                     * (env.config["mc"] \
+                        + env.config["mp"] * np.sin(np.pi / 6.)**2))
+
+        expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \
+                   + curr_x
+
+        assert next_x == pytest.approx(expected, abs=1e-5) 
--- a/tests/models/test_cartpole.py
+++ b/tests/models/test_cartpole.py
@ -0,0 +1,57 @@
+import pytest
+import numpy as np
+
+from PythonLinearNonlinearControl.models.cartpole import CartPoleModel
+from PythonLinearNonlinearControl.configs.cartpole \
+    import CartPoleConfigModule
+    
+class TestCartPoleModel():
+    """
+    """
+    def test_step(self):
+        config = CartPoleConfigModule()
+        cartpole_model = CartPoleModel(config)
+
+        curr_x = np.ones(4)
+        curr_x[2] = np.pi / 6.
+
+        us = np.ones((1, 1))
+
+        next_x = cartpole_model.predict_traj(curr_x, us)
+
+        d_x0 = curr_x[1]
+        d_x1 = (1. + config.MP * np.sin(np.pi / 6.) \
+                     * (config.L * (1.**2) \
+                        + config.G * np.cos(np.pi / 6.))) \
+                / (config.MC + config.MP * np.sin(np.pi / 6.)**2)
+        d_x2 = curr_x[3]
+        d_x3 = (-1. * np.cos(np.pi / 6.) \
+                - config.MP * config.L * (1.**2) \
+                  * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
+                - (config.MP + config.MC) * config.G \
+                   * np.sin(np.pi / 6.)) \
+                 / (config.L \
+                     * (config.MC \
+                        + config.MP * np.sin(np.pi / 6.)**2))
+
+        expected = np.array([d_x0, d_x1, d_x2, d_x3]) * config.DT \
+                   + curr_x
+
+        expected = np.stack((curr_x, expected), axis=0)
+
+        assert next_x == pytest.approx(expected, abs=1e-5) 
+
+    def test_predict_traj(self):
+        config = CartPoleConfigModule()
+        cartpole_model =  CartPoleModel(config)
+
+        curr_x = np.ones(config.STATE_SIZE)
+        curr_x[-1] = np.pi / 6.
+        u = np.ones((1, config.INPUT_SIZE))
+
+        pred_xs = cartpole_model.predict_traj(curr_x, u)
+
+        u = np.tile(u, (2, 1, 1))
+        pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
+
+        assert pred_xs_alltogether == pytest.approx(pred_xs)
--- a/tests/models/test_first_order_lag.py
+++ b/tests/models/test_first_order_lag.py
@ -0,0 +1,43 @@
+import pytest
+import numpy as np
+
+from PythonLinearNonlinearControl.models.model \
+    import LinearModel
+from PythonLinearNonlinearControl.models.first_order_lag \
+    import FirstOrderLagModel
+from PythonLinearNonlinearControl.configs.first_order_lag \
+    import FirstOrderLagConfigModule
+
+from unittest.mock import patch
+from unittest.mock import Mock
+
+class TestFirstOrderLagModel():
+    """
+    """
+    def test_step(self):
+        config = FirstOrderLagConfigModule()
+        firstorderlag_model = FirstOrderLagModel(config)
+
+        curr_x = np.ones(config.STATE_SIZE)
+        u = np.ones((1, config.INPUT_SIZE))
+
+        with patch.object(LinearModel, "predict_traj") as mock_predict_traj:
+            firstorderlag_model.predict_traj(curr_x, u)
+            
+            mock_predict_traj.assert_called_once_with(curr_x, u)
+    
+    def test_predict_traj(self):
+
+        config = FirstOrderLagConfigModule()
+        firstorderlag_model = FirstOrderLagModel(config)
+
+        curr_x = np.ones(config.STATE_SIZE)
+        curr_x[-1] = np.pi / 6.
+        u = np.ones((1, config.INPUT_SIZE))
+
+        pred_xs = firstorderlag_model.predict_traj(curr_x, u)
+
+        u = np.tile(u, (1, 1, 1))
+        pred_xs_alltogether = firstorderlag_model.predict_traj(curr_x, u)[0]
+
+        assert pred_xs_alltogether == pytest.approx(pred_xs)