Add nonlinear sample system

2021-02-13 21:19:49 +09:00 · 2021-02-13 21:19:49 +09:00 · 8c28ff328a
parent 8f4ba9a12b
commit 8c28ff328a
13 changed files with 434 additions and 35 deletions
--- a/PythonLinearNonlinearControl/common/utils.py
+++ b/PythonLinearNonlinearControl/common/utils.py
@ -91,17 +91,17 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
        for i, func in enumerate(functions):
            k3[i] = dt * func(state + k2, u)
-        return (k0 + 2. * k1 + 2. * k2 + k3) / 6.
+        return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
    else:
        batch_size, state_size = state.shape
        assert state_size == len(functions), \
            "Invalid functions length, You need to give the state size functions"
-        k0 = np.zeros(batch_size, state_size)
+        k0 = np.zeros((batch_size, state_size))
-        k1 = np.zeros(batch_size, state_size)
+        k1 = np.zeros((batch_size, state_size))
-        k2 = np.zeros(batch_size, state_size)
+        k2 = np.zeros((batch_size, state_size))
-        k3 = np.zeros(batch_size, state_size)
+        k3 = np.zeros((batch_size, state_size))
        for i, func in enumerate(functions):
            k0[:, i] = dt * func(state, u)
@ -115,4 +115,4 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
        for i, func in enumerate(functions):
            k3[:, i] = dt * func(state + k2, u)
-        return (k0 + 2. * k1 + 2. * k2 + k3) / 6.
+        return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
--- a/PythonLinearNonlinearControl/configs/make_configs.py
+++ b/PythonLinearNonlinearControl/configs/make_configs.py
@ -1,6 +1,7 @@
 from .first_order_lag import FirstOrderLagConfigModule
 from .two_wheeled import TwoWheeledConfigModule
 from .cartpole import CartPoleConfigModule
 from .nonlinear_sample_system import NonlinearSampleSystemConfigModule
 def make_config(args):
@ -14,3 +15,5 @@ def make_config(args):
        return TwoWheeledConfigModule()
    elif args.env == "CartPole":
        return CartPoleConfigModule()
    elif args.env == "NonlinearSample":
        return NonlinearSampleSystemConfigModule()
--- a/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py
+++ b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py
@ -0,0 +1,219 @@
 import numpy as np
 class NonlinearSampleSystemConfigModule():
    # parameters
    ENV_NAME = "NonlinearSampleSystem-v0"
    PLANNER_TYPE = "Const"
    TYPE = "Nonlinear"
    TASK_HORIZON = 2500
    PRED_LEN = 10
    STATE_SIZE = 2
    INPUT_SIZE = 1
    DT = 0.01
    R = np.diag([0.01])
    Q = None
    Sf = None
    # bounds
    INPUT_LOWER_BOUND = np.array([-0.5])
    INPUT_UPPER_BOUND = np.array([0.5])
    def __init__(self):
        """
        """
        # opt configs
        self.opt_config = {
            "Random": {
                "popsize": 5000
            },
            "CEM": {
                "popsize": 500,
                "num_elites": 50,
                "max_iters": 15,
                "alpha": 0.3,
                "init_var": 9.,
                "threshold": 0.001
            },
            "MPPI": {
                "beta": 0.6,
                "popsize": 5000,
                "kappa": 0.9,
                "noise_sigma": 0.5,
            },
            "MPPIWilliams": {
                "popsize": 5000,
                "lambda": 1.,
                "noise_sigma": 0.9,
            },
            "iLQR": {
                "max_iter": 500,
                "init_mu": 1.,
                "mu_min": 1e-6,
                "mu_max": 1e10,
                "init_delta": 2.,
                "threshold": 1e-6,
            },
            "DDP": {
                "max_iter": 500,
                "init_mu": 1.,
                "mu_min": 1e-6,
                "mu_max": 1e10,
                "init_delta": 2.,
                "threshold": 1e-6,
            },
            "NMPC-CGMRES": {
            },
            "NMPC-Newton": {
            },
        }
    @staticmethod
    def input_cost_fn(u):
        """ input cost functions
        Args:
            u (numpy.ndarray): input, shape(pred_len, input_size)
                or shape(pop_size, pred_len, input_size)
        Returns:
            cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
                shape(pop_size, pred_len, input_size)
        """
        return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R)
    @staticmethod
    def state_cost_fn(x, g_x):
        """ state cost function
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
                or shape(pop_size, pred_len, state_size)
            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
                or shape(pop_size, pred_len, state_size)
        Returns:
            cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
                shape(pop_size, pred_len, 1)
        """
        if len(x.shape) > 2:
            return (0.5 * (x[:, :, 0]**2) +
                    0.5 * (x[:, :, 1]**2))[:, :, np.newaxis]
        elif len(x.shape) > 1:
            return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis]
        return 0.5 * (x[0]**2) + 0.5 * (x[1]**2)
    @ staticmethod
    def terminal_state_cost_fn(terminal_x, terminal_g_x):
        """
        Args:
            terminal_x (numpy.ndarray): terminal state,
                shape(state_size, ) or shape(pop_size, state_size)
            terminal_g_x (numpy.ndarray): terminal goal state,
                shape(state_size, ) or shape(pop_size, state_size)
        Returns:
            cost (numpy.ndarray): cost of state, shape(pred_len, ) or
                shape(pop_size, pred_len)
        """
        if len(terminal_x.shape) > 1:
            return (0.5 * (terminal_x[:, 0]**2) +
                    0.5 * (terminal_x[:, 1]**2))[:, np.newaxis]
        return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2)
    @ staticmethod
    def gradient_cost_fn_with_state(x, g_x, terminal=False):
        """ gradient of costs with respect to the state
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
        Returns:
            l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
                or shape(1, state_size)
        """
        if not terminal:
            cost_dx0 = x[:, 0]
            cost_dx1 = x[:, 1]
            cost_dx = np.stack((cost_dx0, cost_dx1), axis=1)
            return cost_dx
        cost_dx0 = x[0]
        cost_dx1 = x[1]
        cost_dx = np.array([[cost_dx0, cost_dx1]])
        return cost_dx
    @ staticmethod
    def gradient_cost_fn_with_input(x, u):
        """ gradient of costs with respect to the input
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
            u (numpy.ndarray): goal state, shape(pred_len, input_size)
        Returns:
            l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
        """
        return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R)
    @ staticmethod
    def hessian_cost_fn_with_state(x, g_x, terminal=False):
        """ hessian costs with respect to the state
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
        Returns:
            l_xx (numpy.ndarray): gradient of cost,
                shape(pred_len, state_size, state_size) or
                shape(1, state_size, state_size) or
        """
        if not terminal:
            (pred_len, state_size) = x.shape
            hessian = np.eye(state_size)
            hessian = np.tile(hessian, (pred_len, 1, 1))
            hessian[:, 0, 0] = 1.
            hessian[:, 1, 1] = 1.
            return hessian
        state_size = len(x)
        hessian = np.eye(state_size)
        hessian[0, 0] = 1.
        hessian[1, 1] = 1.
        return hessian[np.newaxis, :, :]
    @ staticmethod
    def hessian_cost_fn_with_input(x, u):
        """ hessian costs with respect to the input
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
            u (numpy.ndarray): goal state, shape(pred_len, input_size)
        Returns:
            l_uu (numpy.ndarray): gradient of cost,
                shape(pred_len, input_size, input_size)
        """
        (pred_len, _) = u.shape
        return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1))
    @ staticmethod
    def hessian_cost_fn_with_input_state(x, u):
        """ hessian costs with respect to the state and input
        Args:
            x (numpy.ndarray): state, shape(pred_len, state_size)
            u (numpy.ndarray): goal state, shape(pred_len, input_size)
        Returns:
            l_ux (numpy.ndarray): gradient of cost ,
                shape(pred_len, input_size, state_size)
        """
        (_, state_size) = x.shape
        (pred_len, input_size) = u.shape
        return np.zeros((pred_len, input_size, state_size))
--- a/PythonLinearNonlinearControl/configs/nonlinear_system_sample.py
+++ b/PythonLinearNonlinearControl/configs/nonlinear_system_sample.py
--- a/PythonLinearNonlinearControl/envs/make_envs.py
+++ b/PythonLinearNonlinearControl/envs/make_envs.py
@ -2,6 +2,7 @@ from .first_order_lag import FirstOrderLagEnv
 from .two_wheeled import TwoWheeledConstEnv
 from .two_wheeled import TwoWheeledTrackEnv
 from .cartpole import CartPoleEnv
 from .nonlinear_sample_system import NonlinearSampleSystemEnv
 def make_env(args):
@ -14,5 +15,7 @@ def make_env(args):
        return TwoWheeledTrackEnv()
    elif args.env == "CartPole":
        return CartPoleEnv()
    elif args.env == "NonlinearSample":
        return NonlinearSampleSystemEnv()
    raise NotImplementedError("There is not {} Env".format(args.env))
--- a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py
+++ b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py
@ -5,7 +5,7 @@ from .env import Env
 from ..common.utils import update_state_with_Runge_Kutta
-class NonlinearSampleEnv(Env):
+class NonlinearSampleSystemEnv(Env):
    """ Nonlinear Sample Env
    """
@ -15,12 +15,12 @@ class NonlinearSampleEnv(Env):
        self.config = {"state_size": 2,
                       "input_size": 1,
                       "dt": 0.01,
-                       "max_step": 250,
+                       "max_step": 2000,
                       "input_lower_bound": [-0.5],
                       "input_upper_bound": [0.5],
                       }
-        super(NonlinearSampleEnv, self).__init__(self.config)
+        super(NonlinearSampleSystemEnv, self).__init__(self.config)
    def reset(self, init_x=np.array([2., 0.])):
        """ reset state
@ -62,7 +62,8 @@ class NonlinearSampleEnv(Env):
        functions = [self._func_x_1, self._func_x_2]
        next_x = update_state_with_Runge_Kutta(self.curr_x, u,
-                                               functions, self.config["dt"])
+                                               functions, self.config["dt"],
                                               batch=False)
        # cost
        cost = 0
@ -83,18 +84,14 @@ class NonlinearSampleEnv(Env):
            {"goal_state": self.g_x}
    def _func_x_1(self, x, u):
        """
        """
        x_dot = x[1]
        return x_dot
    def _func_x_2(self, x, u):
-        """
+        x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
        """
        x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u
        return x_dot
    def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
        """
        """
-        raise ValueError("NonlinearSampleEnv does not have animation")
+        raise ValueError("NonlinearSampleSystemEnv does not have animation")
--- a/PythonLinearNonlinearControl/models/make_models.py
+++ b/PythonLinearNonlinearControl/models/make_models.py
@ -1,6 +1,7 @@
 from .first_order_lag import FirstOrderLagModel
 from .two_wheeled import TwoWheeledModel
 from .cartpole import CartPoleModel
 from .nonlinear_sample_system import NonlinearSampleSystemModel
 def make_model(args, config):
@ -11,5 +12,7 @@ def make_model(args, config):
        return TwoWheeledModel(config)
    elif args.env == "CartPole":
        return CartPoleModel(config)
    elif args.env == "NonlinearSample":
        return NonlinearSampleSystemModel(config)
    raise NotImplementedError("There is not {} Model".format(args.env))
--- a/PythonLinearNonlinearControl/models/nonlinear_sample_system.py
+++ b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py
@ -0,0 +1,164 @@
 import numpy as np
 from .model import Model
 from ..common.utils import update_state_with_Runge_Kutta
 class NonlinearSampleSystemModel(Model):
    """ nonlinear sample system model
    """
    def __init__(self, config):
        """
        """
        super(NonlinearSampleSystemModel, self).__init__()
        self.dt = config.DT
    def predict_next_state(self, curr_x, u):
        """ predict next state
        Args:
            curr_x (numpy.ndarray): current state, shape(state_size, ) or
                shape(pop_size, state_size)
            u (numpy.ndarray): input, shape(input_size, ) or
                shape(pop_size, input_size)
        Returns:
            next_x (numpy.ndarray): next state, shape(state_size, ) or
                shape(pop_size, state_size)
        """
        if len(u.shape) == 1:
            func_1 = self._func_x_1
            func_2 = self._func_x_2
            functions = [func_1, func_2]
            next_x = update_state_with_Runge_Kutta(
                curr_x, u, functions, batch=False)
            return next_x
        elif len(u.shape) == 2:
            def func_1(xs, us): return self._func_x_1(xs, us, batch=True)
            def func_2(xs, us): return self._func_x_2(xs, us, batch=True)
            functions = [func_1, func_2]
            next_x = update_state_with_Runge_Kutta(
                curr_x, u, functions, batch=True)
            return next_x
    def _func_x_1(self, x, u, batch=False):
        if not batch:
            x_dot = x[1]
        else:
            x_dot = x[:, 1]
        return x_dot
    def _func_x_2(self, x, u, batch=False):
        if not batch:
            x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
        else:
            x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \
                x[:, 1] - x[:, 0] + u[:, 0]
        return x_dot
    def calc_f_x(self, xs, us, dt):
        """ gradient of model with respect to the state in batch form
        Args:
            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
            us (numpy.ndarray): input, shape(pred_len, input_size,)
        Return:
            f_x (numpy.ndarray): gradient of model with respect to x,
                shape(pred_len, state_size, state_size)
        Notes:
            This should be discrete form !!
        """
        # get size
        (_, state_size) = xs.shape
        (pred_len, _) = us.shape
        f_x = np.zeros((pred_len, state_size, state_size))
        f_x[:, 0, 1] = 1.
        f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1.
        f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \
            (1. - xs[:, 0]**2 - xs[:, 1]**2)
        return f_x * dt + np.eye(state_size)  # to discrete form
    def calc_f_u(self, xs, us, dt):
        """ gradient of model with respect to the input in batch form
        Args:
            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
            us (numpy.ndarray): input, shape(pred_len, input_size,)
        Return:
            f_u (numpy.ndarray): gradient of model with respect to x,
                shape(pred_len, state_size, input_size)
        Notes:
            This should be discrete form !!
        """
        # get size
        (_, state_size) = xs.shape
        (pred_len, input_size) = us.shape
        f_u = np.zeros((pred_len, state_size, input_size))
        f_u[:, 1, 0] = 1.
        return f_u * dt  # to discrete form
    def calc_f_xx(self, xs, us, dt):
        """ hessian of model with respect to the state in batch form
        Args:
            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
            us (numpy.ndarray): input, shape(pred_len, input_size,)
        Return:
            f_xx (numpy.ndarray): gradient of model with respect to x,
                shape(pred_len, state_size, state_size, state_size)
        """
        # get size
        (_, state_size) = xs.shape
        (pred_len, _) = us.shape
        f_xx = np.zeros((pred_len, state_size, state_size, state_size))
        raise NotImplementedError
    def calc_f_ux(self, xs, us, dt):
        """ hessian of model with respect to state and input in batch form
        Args:
            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
            us (numpy.ndarray): input, shape(pred_len, input_size,)
        Return:
            f_ux (numpy.ndarray): gradient of model with respect to x,
                shape(pred_len, state_size, input_size, state_size)
        """
        # get size
        (_, state_size) = xs.shape
        (pred_len, input_size) = us.shape
        f_ux = np.zeros((pred_len, state_size, input_size, state_size))
        raise NotImplementedError
    def calc_f_uu(self, xs, us, dt):
        """ hessian of model with respect to input in batch form
        Args:
            xs (numpy.ndarray): state, shape(pred_len+1, state_size)
            us (numpy.ndarray): input, shape(pred_len, input_size,)
        Return:
            f_uu (numpy.ndarray): gradient of model with respect to x,
                shape(pred_len, state_size, input_size, input_size)
        """
        # get size
        (_, state_size) = xs.shape
        (pred_len, input_size) = us.shape
        f_uu = np.zeros((pred_len, state_size, input_size, input_size))
        raise NotImplementedError
--- a/PythonLinearNonlinearControl/planners/make_planners.py
+++ b/PythonLinearNonlinearControl/planners/make_planners.py
@ -1,6 +1,7 @@
 from .const_planner import ConstantPlanner
 from .closest_point_planner import ClosestPointPlanner
 def make_planner(args, config):
    if args.env == "FirstOrderLag":
@ -11,5 +12,8 @@ def make_planner(args, config):
        return ClosestPointPlanner(config)
    elif args.env == "CartPole":
        return ConstantPlanner(config)
    elif args.env == "NonlinearSample":
        return ConstantPlanner(config)
-    raise NotImplementedError("There is not {} Planner".format(args.planner_type))
+    raise NotImplementedError(
        "There is not {} Planner".format(args.planner_type))
--- a/assets/nonlinear_sample_system.png
+++ b/assets/nonlinear_sample_system.png
--- a/assets/nonlinear_sample_system_score.png
+++ b/assets/nonlinear_sample_system_score.png
--- a/scripts/show_result.py
+++ b/scripts/show_result.py
@ -8,6 +8,7 @@ import matplotlib.pyplot as plt
 from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \
    plot_multi_result
 def run(args):
    controllers = ["iLQR", "DDP", "CEM", "MPPI"]
@ -41,6 +42,7 @@ def run(args):
    plot_multi_result(history_us, histories_g=np.zeros_like(history_us),
                      labels=controllers, ylabel="u", name="input_history")
 def main():
    parser = argparse.ArgumentParser()
@ -51,5 +53,6 @@ def main():
    run(args)
 if __name__ == "__main__":
    main()
--- a/scripts/simple_run.py
+++ b/scripts/simple_run.py
@ -11,6 +11,7 @@ from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \
    save_plot_data
 from PythonLinearNonlinearControl.plotters.animator import Animator
 def run(args):
    # logger
    make_logger(args.result_dir)
@ -44,17 +45,19 @@ def run(args):
        animator = Animator(env, args=args)
        animator.draw(history_x, history_g)
 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument("--controller_type", type=str, default="CEM")
+    parser.add_argument("--controller_type", type=str, default="DDP")
-    parser.add_argument("--env", type=str, default="TwoWheeledTrack")
+    parser.add_argument("--env", type=str, default="NonlinearSample")
-    parser.add_argument("--save_anim", type=bool_flag, default=1)
+    parser.add_argument("--save_anim", type=bool_flag, default=0)
    parser.add_argument("--result_dir", type=str, default="./result")
    args = parser.parse_args()
    run(args)
 if __name__ == "__main__":
    main()