Add: two wheeled model and environment

2020-04-02 17:37:09 +09:00 · 2020-04-02 17:37:09 +09:00 · ac7ab11fa0
parent cca357886b
commit ac7ab11fa0
14 changed files with 277 additions and 109 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,14 @@
 language: python
 python:
  - 3.7
 install:
  - pip install --upgrade pip setuptools wheel
  - pip install coveralls
 script:
  - coverage run --source=PythonLinearNonlinearControl setup.py test
 after_success:
  - coveralls
--- a/PythonLinearNonlinearControl/configs/first_order_lag.py
+++ b/PythonLinearNonlinearControl/configs/first_order_lag.py
@ -23,8 +23,6 @@ class FirstOrderLagConfigModule():
    def __init__(self):
        """ 
        Args:
            save_dit (str): save directory
        """
        # opt configs
        self.opt_config = {
--- a/PythonLinearNonlinearControl/configs/make_configs.py
+++ b/PythonLinearNonlinearControl/configs/make_configs.py
@ -1,4 +1,5 @@
 from .first_order_lag import FirstOrderLagConfigModule
 from .two_wheeled import TwoWheeledConfigModule
 def make_config(args):
    """
@ -7,3 +8,5 @@ def make_config(args):
    """
    if args.env == "FirstOrderLag":
        return FirstOrderLagConfigModule()
    elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
        return TwoWheeledConfigModule()
--- a/PythonLinearNonlinearControl/configs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/configs/two_wheeled.py
@ -0,0 +1,86 @@
 import numpy as np
 class TwoWheeledConfigModule():
    # parameters
    ENV_NAME = "TwoWheeled-v0"
    TYPE = "Nonlinear"
    TASK_HORIZON = 1000
    PRED_LEN = 10
    STATE_SIZE = 3
    INPUT_SIZE = 2
    DT = 0.01
    # cost parameters
    R = np.eye(INPUT_SIZE)
    Q = np.eye(STATE_SIZE)
    Sf = np.eye(STATE_SIZE)
    # bounds
    INPUT_LOWER_BOUND = np.array([-1.5, 3.14])
    INPUT_UPPER_BOUND = np.array([1.5, 3.14])
    def __init__(self):
        """ 
        """
        # opt configs
        self.opt_config = {
            "Random": {
                "popsize": 5000
            },
            "CEM": {
                "popsize": 500,
                "num_elites": 50,
                "max_iters": 15,
                "alpha": 0.3,
                "init_var":1.,
                "threshold":0.001
            },
            "MPPI":{
                "beta" : 0.6,
                "popsize": 5000,
                "kappa": 0.9,
                "noise_sigma": 0.5,
            },
           "iLQR":{
           },
           "NMPC-CGMRES":{
           },
           "NMPC-Newton":{
           },
        }   
    @staticmethod
    def input_cost_fn(u):
        """ input cost functions
        Args:
            u (numpy.ndarray): input, shape(input_size, )
                or shape(pop_size, input_size)
        Returns:
            cost (numpy.ndarray): cost of input, none or shape(pop_size, )
        """
        return (u**2) * np.diag(TwoWheeledConfigModule.R) * 0.1
    @staticmethod
    def state_cost_fn(x, g_x):
        """ state cost function
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
                or shape(pop_size,  pred_len, state_size)
            g_x (numpy.ndarray): goal state, shape(state_size, )
                or shape(pop_size, state_size)
        Returns:
            cost (numpy.ndarray): cost of state, none or shape(pop_size, )
        """
        return ((x - g_x)**2) * np.diag(TwoWheeledConfigModule.Q)
    @staticmethod
    def terminal_state_cost_fn(terminal_x, terminal_g_x):
        """
        Args:
            terminal_x (numpy.ndarray): terminal state,
                shape(state_size, ) or shape(pop_size, state_size)
            terminal_g_x (numpy.ndarray): terminal goal state,
                shape(state_size, ) or shape(pop_size, state_size)
        Returns:
            cost (numpy.ndarray): cost of state, none or shape(pop_size, )
        """
        return ((terminal_x - terminal_g_x)**2) \
                * np.diag(TwoWheeledConfigModule.Sf)
--- a/PythonLinearNonlinearControl/envs/first_order_lag.py
+++ b/PythonLinearNonlinearControl/envs/first_order_lag.py
@ -70,13 +70,13 @@ class FirstOrderLagEnv(Env):
            self.curr_x = init_x
        # goal
-        self.goal_state = np.array([0., 0, -2., 3.])
+        self.g_x = np.array([0., 0, -2., 3.])
        # clear memory
        self.history_x = []
        self.history_g_x = []
-        return self.curr_x, {"goal_state": self.goal_state}
+        return self.curr_x, {"goal_state": self.g_x}
    def step(self, u):
        """
@ -99,15 +99,17 @@ class FirstOrderLagEnv(Env):
        # cost
        cost = 0
        cost = np.sum(u**2)
-        cost += np.sum((self.curr_x-g_x)**2)
+        cost += np.sum((self.curr_x - self.g_x)**2)
        # save history
        self.history_x.append(next_x.flatten())
-        self.history_g_x.append(self.goal_state.flatten())
+        self.history_g_x.append(self.g_x.flatten())
        # update
        self.curr_x = next_x.flatten()
        # update costs
        self.step_count += 1
-        return next_x.flatten(), cost, self.step_count > self.config["max_step"], {"goal_state" : self.goal_state}
+        return next_x.flatten(), cost, \
               self.step_count > self.config["max_step"], \
               {"goal_state" : self.g_x}
--- a/PythonLinearNonlinearControl/envs/make_envs.py
+++ b/PythonLinearNonlinearControl/envs/make_envs.py
@ -1,8 +1,11 @@
 from .first_order_lag import FirstOrderLagEnv
 from .two_wheeled import TwoWheeledConstEnv
 def make_env(args):
    if args.env == "FirstOrderLag":
        return FirstOrderLagEnv()
    elif args.env == "TwoWheeledConst":
        return TwoWheeledConstEnv()
-    raise NotImplementedError("There is not {} Env".format(name))
+    raise NotImplementedError("There is not {} Env".format(args.env))
--- a/PythonLinearNonlinearControl/envs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/envs/two_wheeled.py
@ -1,8 +1,30 @@
 import numpy as np
-import scipy
+
 from scipy import integrate
 from .env import Env
 def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
    """ step two wheeled enviroment
    Args:
        curr_x (numpy.ndarray): current state, shape(state_size, )
        u (numpy.ndarray): input, shape(input_size, )
        dt (float): sampling time
    Returns:
        next_x (numpy.ndarray): next state, shape(state_size. )
    Notes:
        TODO: deal with another method, like Runge Kutta
    """
    B = np.array([[np.cos(curr_x[-1]), 0.],
                  [np.sin(curr_x[-1]), 0.],
                  [0., 1.]])
    x_dot = np.matmul(B, u[:, np.newaxis])
    next_x = x_dot.flatten() * dt + curr_x
    return next_x
 class TwoWheeledConstEnv(Env):
    """ Two wheeled robot with constant goal Env
    """
@ -12,15 +34,16 @@ class TwoWheeledConstEnv(Env):
        self.config = {"state_size" : 3,\
                       "input_size" : 2,\
                       "dt" : 0.01,\
-                       "max_step" : 500,\
+                       "max_step" : 1000,\
                       "input_lower_bound": [-1.5, -3.14],\
                       "input_upper_bound": [1.5, 3.14],
                       }
-        super(TwoWheeledEnv, self).__init__(self.config)
+        super(TwoWheeledConstEnv, self).__init__(self.config)
    def reset(self, init_x=None):
        """ reset state
        Returns:
            init_x (numpy.ndarray): initial state, shape(state_size, )  
            info (dict): information
@ -33,16 +56,17 @@ class TwoWheeledConstEnv(Env):
            self.curr_x = init_x
        # goal
-        self.goal_state = np.array([0., 0, -2., 3.])
+        self.g_x = np.array([5., 5., 0.])
        # clear memory
        self.history_x = []
        self.history_g_x = []
-        return self.curr_x, {"goal_state": self.goal_state}
+        return self.curr_x, {"goal_state": self.g_x}
    def step(self, u):
-        """
+        """ step environments
        Args:
            u (numpy.ndarray) : input, shape(input_size, )
        Returns:
@ -54,92 +78,25 @@ class TwoWheeledConstEnv(Env):
        # clip action
        u = np.clip(u,
                    self.config["input_lower_bound"],
-                    self.config["input_lower_bound"])
+                    self.config["input_upper_bound"])
        # step
-        next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
+        next_x = step_two_wheeled_env(self.curr_x, u, self.config["dt"])
                 + np.matmul(self.B, u[:, np.newaxis])
-        # TODO: implement costs
+        # TODO: costs
        costs = 0.
        costs += 0.1 * np.sum(u**2)
        costs += np.sum((self.curr_x - self.g_x)**2)
        # save history
        self.history_x.append(next_x.flatten())
-        self.history_g_x.append(self.goal_state.flatten())
+        self.history_g_x.append(self.g_x.flatten())
        # update
        self.curr_x = next_x.flatten()
        # update costs
        self.step_count += 1
-        return next_x.flatten(), 0., self.step_count > self.config["max_step"], {"goal_state" : self.goal_state}
+        return next_x.flatten(), costs, \
-    
+               self.step_count > self.config["max_step"], \
-class TwoWheeledEnv(Env):
+               {"goal_state" : self.g_x}
    """ Two wheeled robot Env
    """
    def __init__(self):
        """
        """
        self.config = {"state_size" : 3,\
                       "input_size" : 2,\
                       "dt" : 0.01,\
                       "max_step" : 500,\
                       "input_lower_bound": [-1.5, -3.14],\
                       "input_upper_bound": [1.5, 3.14],
                       }
        super(TwoWheeledEnv, self).__init__(self.config)
    def reset(self, init_x=None):
        """ reset state
        Returns:
            init_x (numpy.ndarray): initial state, shape(state_size, )  
            info (dict): information
        """
        self.step_count = 0
        self.curr_x = np.zeros(self.config["state_size"])
        if init_x is not None:
            self.curr_x = init_x
        # goal
        self.goal_state = np.array([0., 0, -2., 3.])
        # clear memory
        self.history_x = []
        self.history_g_x = []
        return self.curr_x, {"goal_state": self.goal_state}
    def step(self, u):
        """
        Args:
            u (numpy.ndarray) : input, shape(input_size, )
        Returns:
            next_x (numpy.ndarray): next state, shape(state_size, ) 
            cost (float): costs
            done (bool): end the simulation or not
            info (dict): information 
        """
        # clip action
        u = np.clip(u,
                    self.config["input_lower_bound"],
                    self.config["input_lower_bound"])
        # step
        next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
                 + np.matmul(self.B, u[:, np.newaxis])
        # TODO: implement costs
        # save history
        self.history_x.append(next_x.flatten())
        self.history_g_x.append(self.goal_state.flatten())
        # update
        self.curr_x = next_x.flatten()
        # update costs
        self.step_count += 1
        return next_x.flatten(), 0., self.step_count > self.config["max_step"], {"goal_state" : self.goal_state}
--- a/PythonLinearNonlinearControl/models/make_models.py
+++ b/PythonLinearNonlinearControl/models/make_models.py
@ -1,8 +1,11 @@
 from .first_order_lag import FirstOrderLagModel
 from .two_wheeled import TwoWheeledModel
 def make_model(args, config):
    if args.env == "FirstOrderLag":
        return FirstOrderLagModel(config)
    elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
        return TwoWheeledModel(config)
    raise NotImplementedError("There is not {} Model".format(args.env))
--- a/PythonLinearNonlinearControl/models/two_wheeled.py
+++ b/PythonLinearNonlinearControl/models/two_wheeled.py
@ -0,0 +1,53 @@
 import numpy as np
 from .model import Model
 class TwoWheeledModel(Model):
    """ two wheeled model
    """
    def __init__(self, config):
        """
        """
        super(TwoWheeledModel, self).__init__()
        self.dt = config.DT
    def predict_next_state(self, curr_x, u):
        """ predict next state
        Args:
            curr_x (numpy.ndarray): current state, shape(state_size, ) or
                shape(pop_size, state_size)
            u (numpy.ndarray): input, shape(input_size, ) or
                shape(pop_size, input_size)
        Returns:
            next_x (numpy.ndarray): next state, shape(state_size, ) or
                shape(pop_size, state_size)
        """
        if len(u.shape) == 1:
            B = np.array([[np.cos(curr_x[-1]), 0.],
                          [np.sin(curr_x[-1]), 0.],
                          [0., 1.]])
            # calc dot
            x_dot = np.matmul(B, u[:, np.newaxis])
            # next state
            next_x = x_dot.flatten() * self.dt + curr_x
            return next_x
        elif len(u.shape) == 2:
            (pop_size, state_size) = curr_x.shape
            (_, input_size) = u.shape
            # B.shape = (pop_size, state_size, input_size)
            B = np.zeros((pop_size, state_size, input_size))
            # insert
            B[:, 0, 0] = np.cos(curr_x[:, -1])
            B[:, 1, 0] = np.sin(curr_x[:, -1])
            B[:, 2, 1] = np.ones(pop_size)
            # x_dot.shape = (pop_size, state_size, 1)
            x_dot = np.matmul(B, u[:, :, np.newaxis])
            # next state
            next_x = x_dot[:, :, 0] * self.dt + curr_x
            return next_x
--- a/README.md
+++ b/README.md
@ -1,20 +1,25 @@
 [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 [![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master)
 [![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl)
 # PythonLinearNonLinearControl
 PythonLinearNonLinearControl is a library implementing the linear and nonlinear control theories in python.
 ![Concepts](assets/concepts.png)
 # Algorithms
 | Algorithm | Use Linear Model | Use Nonlinear Model | Need Gradient (Hamiltonian) | Need Gradient (Model) |
-|:----------|:---------------:|:----------------:|:----------------:|
+|:----------|:---------------: |:----------------:|:----------------:|:----------------:|
 | Linear Model Predictive Control (MPC) | ✓ | x | x | x | 
 | Cross Entropy Method (CEM) | ✓ | ✓ | x | x | 
 | Model Preidictive Path Integral Control (MPPI) | ✓ | ✓ | x | x | 
 | Random Shooting Method (Random) | ✓ | ✓ | x | x | 
-| Iterative LQR (iLQR) | ✓ | ✓ | x | ✓ | 
+| Iterative LQR (iLQR) | x | ✓ | x | ✓ | 
-| Nonlinear Model Predictive Control -CGMRES- (NMPC-CGMRES) | ✓ | ✓ | ✓ | x |
+| Unconstrained Nonlinear Model Predictive Control (NMPC) | x | ✓ | ✓ | x |
-| Nonlinear Model Predictive Control -Newton- (NMPC-Newton) | ✓ | ✓ | x | x |
+| Constrained Nonlinear Model Predictive Control CGMRES (NMPC-CGMRES) | x | ✓ | ✓ | x |
 | Constrained Nonlinear Model Predictive Control Newton (NMPC-Newton) | x | ✓ | x | x |
 "Need Gradient" means that you have to implement the gradient of the model or the gradient of hamiltonian.  
 This library is also easily to extend for your own situations.
@ -35,24 +40,24 @@ Following algorithms are implemented in PythonLinearNonlinearControl
    - [script]()
 - [Iterative LQR (iLQR)](https://ieeexplore.ieee.org/document/6386025)
  - Ref: Tassa, Y., Erez, T., & Todorov, E. (2012, October). Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems (pp. 4906-4913). IEEE. and [Study Wolf](https://github.com/studywolf/control)
-    - [script]()
+    - [script (Coming soon)]()
- [Unconstrained Nonlinear Model Predictive Control](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
+- [Unconstrained Nonlinear Model Predictive Control (NMPC)](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
  - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154.
-    - [script]()
+    - [script (Coming soon)]()
 - [Constrained Nonlinear Model Predictive Control -CGMRES- (NMPC-CGMRES)](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
  - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154.
-    - [script]()
+    - [script (Coming soon)]()
 - [Constrained Nonlinear Model Predictive Control -Newton- (NMPC-Newton)](https://www.sciencedirect.com/science/article/pii/S0005109897000058)
  - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154.
-    - [script]()
+    - [script (Coming soon)]()
 # Environments
 | Name | Linear | Nonlinear | State Size | Input size |
-|:----------|:---------------:|:----------------:|:----------------:|
+|:----------|:---------------:|:----------------:|:----------------:|:----------------:|
 | First Order Lag System | ✓ | x | 4 | 2 | 
-| Auto Cruse Control System | x | ✓ | 4 | 2 | 
+| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | 
-| Two wheeled System | x | ✓ | 4 | 2 | 
+| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | 
 All environments are continuous.
 **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**
@ -98,6 +103,8 @@ python scripts/simple_run.py --model "first-order_lag" --controller "CEM"
 When we design control systems, we should have **Model**, **Planner**, **Controller** and **Runner** as shown in the figure.
 It should be noted that **Model** and **Environment** are different. As mentioned before, we the algorithms for linear model could be applied to nonlinear enviroments if you have linealized model of nonlinear environments. In addition, you can use Neural Network or any non-linear functions to the model, although this library can not deal with it now.
 ![Concepts](assets/concepts.png)
 ## Model
 System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]".
--- a/assets/concept.png
+++ b/assets/concept.png
--- a/scripts/simple_run.py
+++ b/scripts/simple_run.py
@ -40,9 +40,9 @@ def run(args):
 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument("--controller_type", type=str, default="MPC")
+    parser.add_argument("--controller_type", type=str, default="CEM")
    parser.add_argument("--planner_type", type=str, default="const")
-    parser.add_argument("--env", type=str, default="FirstOrderLag")
+    parser.add_argument("--env", type=str, default="TwoWheeledConst")
    parser.add_argument("--result_dir", type=str, default="./result")
    args = parser.parse_args()
--- a/setup.py
+++ b/setup.py
@ -1,14 +1,14 @@
 from setuptools import find_packages
 from setuptools import setup
-install_requires = ['numpy', 'matplotlib', 'cvxopt']
+install_requires = ['numpy', 'matplotlib', 'cvxopt', 'scipy']
 tests_require = ['pytest']
 setup_requires = ["pytest-runner"]
 setup(
    name='PythonLinearNonlinearControl',
    version='2.0',
-    description='Implementing linear and non-linear control method in python',
+    description='Implementing linear and nonlinear control method in python',
    author='Shunichi Sekiguchi',
    author_email='quick1st97of@gmail.com',
    install_requires=install_requires,
--- a/tests/models/test_two_wheeled.py
+++ b/tests/models/test_two_wheeled.py
@ -0,0 +1,42 @@
 import pytest
 import numpy as np
 from PythonLinearNonlinearControl.models.two_wheeled import TwoWheeledModel
 from PythonLinearNonlinearControl.configs.two_wheeled \
    import TwoWheeledConfigModule
 class TestTwoWheeledModel():
    """
    """
    def test_step(self):
        config = TwoWheeledConfigModule()
        two_wheeled_model = TwoWheeledModel(config)
        curr_x = np.ones(config.STATE_SIZE)
        curr_x[-1] = np.pi / 6.
        u = np.ones((1, config.INPUT_SIZE))
        next_x = two_wheeled_model.predict_traj(curr_x, u)
        pos_x = np.cos(curr_x[-1]) * u[0, 0] * config.DT + curr_x[0]
        pos_y = np.sin(curr_x[-1]) * u[0, 0] * config.DT + curr_x[1]
        expected = np.array([[1., 1., np.pi / 6.],
                             [pos_x, pos_y, curr_x[-1] + u[0, 1] * config.DT]])
        assert next_x == pytest.approx(expected) 
    def test_predict_traj(self):
        config = TwoWheeledConfigModule()
        two_wheeled_model = TwoWheeledModel(config)
        curr_x = np.ones(config.STATE_SIZE)
        curr_x[-1] = np.pi / 6.
        u = np.ones((1, config.INPUT_SIZE))
        pred_xs = two_wheeled_model.predict_traj(curr_x, u)
        u = np.tile(u, (1, 1, 1))
        pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
        assert pred_xs_alltogether == pytest.approx(pred_xs)