diff --git a/PythonLinearNonlinearControl/common/utils.py b/PythonLinearNonlinearControl/common/utils.py index bcbde9f..e7f6d82 100644 --- a/PythonLinearNonlinearControl/common/utils.py +++ b/PythonLinearNonlinearControl/common/utils.py @@ -91,17 +91,17 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True): for i, func in enumerate(functions): k3[i] = dt * func(state + k2, u) - return (k0 + 2. * k1 + 2. * k2 + k3) / 6. + return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. else: batch_size, state_size = state.shape assert state_size == len(functions), \ "Invalid functions length, You need to give the state size functions" - k0 = np.zeros(batch_size, state_size) - k1 = np.zeros(batch_size, state_size) - k2 = np.zeros(batch_size, state_size) - k3 = np.zeros(batch_size, state_size) + k0 = np.zeros((batch_size, state_size)) + k1 = np.zeros((batch_size, state_size)) + k2 = np.zeros((batch_size, state_size)) + k3 = np.zeros((batch_size, state_size)) for i, func in enumerate(functions): k0[:, i] = dt * func(state, u) @@ -115,4 +115,4 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True): for i, func in enumerate(functions): k3[:, i] = dt * func(state + k2, u) - return (k0 + 2. * k1 + 2. * k2 + k3) / 6. + return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. diff --git a/PythonLinearNonlinearControl/configs/make_configs.py b/PythonLinearNonlinearControl/configs/make_configs.py index 4bb0873..1267470 100644 --- a/PythonLinearNonlinearControl/configs/make_configs.py +++ b/PythonLinearNonlinearControl/configs/make_configs.py @@ -1,6 +1,7 @@ from .first_order_lag import FirstOrderLagConfigModule from .two_wheeled import TwoWheeledConfigModule from .cartpole import CartPoleConfigModule +from .nonlinear_sample_system import NonlinearSampleSystemConfigModule def make_config(args): @@ -14,3 +15,5 @@ def make_config(args): return TwoWheeledConfigModule() elif args.env == "CartPole": return CartPoleConfigModule() + elif args.env == "NonlinearSample": + return NonlinearSampleSystemConfigModule() diff --git a/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py new file mode 100644 index 0000000..62d9c9f --- /dev/null +++ b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py @@ -0,0 +1,219 @@ +import numpy as np + + +class NonlinearSampleSystemConfigModule(): + # parameters + ENV_NAME = "NonlinearSampleSystem-v0" + PLANNER_TYPE = "Const" + TYPE = "Nonlinear" + TASK_HORIZON = 2500 + PRED_LEN = 10 + STATE_SIZE = 2 + INPUT_SIZE = 1 + DT = 0.01 + R = np.diag([0.01]) + Q = None + Sf = None + # bounds + INPUT_LOWER_BOUND = np.array([-0.5]) + INPUT_UPPER_BOUND = np.array([0.5]) + + def __init__(self): + """ + """ + # opt configs + self.opt_config = { + "Random": { + "popsize": 5000 + }, + "CEM": { + "popsize": 500, + "num_elites": 50, + "max_iters": 15, + "alpha": 0.3, + "init_var": 9., + "threshold": 0.001 + }, + "MPPI": { + "beta": 0.6, + "popsize": 5000, + "kappa": 0.9, + "noise_sigma": 0.5, + }, + "MPPIWilliams": { + "popsize": 5000, + "lambda": 1., + "noise_sigma": 0.9, + }, + "iLQR": { + "max_iter": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "DDP": { + "max_iter": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } + + @staticmethod + def input_cost_fn(u): + """ input cost functions + + Args: + u (numpy.ndarray): input, shape(pred_len, input_size) + or shape(pop_size, pred_len, input_size) + Returns: + cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or + shape(pop_size, pred_len, input_size) + """ + return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R) + + @staticmethod + def state_cost_fn(x, g_x): + """ state cost function + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + or shape(pop_size, pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + or shape(pop_size, pred_len, state_size) + Returns: + cost (numpy.ndarray): cost of state, shape(pred_len, 1) or + shape(pop_size, pred_len, 1) + """ + + if len(x.shape) > 2: + return (0.5 * (x[:, :, 0]**2) + + 0.5 * (x[:, :, 1]**2))[:, :, np.newaxis] + + elif len(x.shape) > 1: + return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis] + + return 0.5 * (x[0]**2) + 0.5 * (x[1]**2) + + @ staticmethod + def terminal_state_cost_fn(terminal_x, terminal_g_x): + """ + + Args: + terminal_x (numpy.ndarray): terminal state, + shape(state_size, ) or shape(pop_size, state_size) + terminal_g_x (numpy.ndarray): terminal goal state, + shape(state_size, ) or shape(pop_size, state_size) + Returns: + cost (numpy.ndarray): cost of state, shape(pred_len, ) or + shape(pop_size, pred_len) + """ + + if len(terminal_x.shape) > 1: + return (0.5 * (terminal_x[:, 0]**2) + + 0.5 * (terminal_x[:, 1]**2))[:, np.newaxis] + + return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2) + + @ staticmethod + def gradient_cost_fn_with_state(x, g_x, terminal=False): + """ gradient of costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + + Returns: + l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) + or shape(1, state_size) + """ + if not terminal: + cost_dx0 = x[:, 0] + cost_dx1 = x[:, 1] + cost_dx = np.stack((cost_dx0, cost_dx1), axis=1) + return cost_dx + + cost_dx0 = x[0] + cost_dx1 = x[1] + cost_dx = np.array([[cost_dx0, cost_dx1]]) + + return cost_dx + + @ staticmethod + def gradient_cost_fn_with_input(x, u): + """ gradient of costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + Returns: + l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) + """ + return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R) + + @ staticmethod + def hessian_cost_fn_with_state(x, g_x, terminal=False): + """ hessian costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + Returns: + l_xx (numpy.ndarray): gradient of cost, + shape(pred_len, state_size, state_size) or + shape(1, state_size, state_size) or + """ + if not terminal: + (pred_len, state_size) = x.shape + hessian = np.eye(state_size) + hessian = np.tile(hessian, (pred_len, 1, 1)) + hessian[:, 0, 0] = 1. + hessian[:, 1, 1] = 1. + + return hessian + + state_size = len(x) + hessian = np.eye(state_size) + hessian[0, 0] = 1. + hessian[1, 1] = 1. + + return hessian[np.newaxis, :, :] + + @ staticmethod + def hessian_cost_fn_with_input(x, u): + """ hessian costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + Returns: + l_uu (numpy.ndarray): gradient of cost, + shape(pred_len, input_size, input_size) + """ + (pred_len, _) = u.shape + + return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1)) + + @ staticmethod + def hessian_cost_fn_with_input_state(x, u): + """ hessian costs with respect to the state and input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + Returns: + l_ux (numpy.ndarray): gradient of cost , + shape(pred_len, input_size, state_size) + """ + (_, state_size) = x.shape + (pred_len, input_size) = u.shape + + return np.zeros((pred_len, input_size, state_size)) diff --git a/PythonLinearNonlinearControl/configs/nonlinear_system_sample.py b/PythonLinearNonlinearControl/configs/nonlinear_system_sample.py deleted file mode 100644 index e69de29..0000000 diff --git a/PythonLinearNonlinearControl/envs/make_envs.py b/PythonLinearNonlinearControl/envs/make_envs.py index c35e11a..5603b45 100644 --- a/PythonLinearNonlinearControl/envs/make_envs.py +++ b/PythonLinearNonlinearControl/envs/make_envs.py @@ -2,6 +2,7 @@ from .first_order_lag import FirstOrderLagEnv from .two_wheeled import TwoWheeledConstEnv from .two_wheeled import TwoWheeledTrackEnv from .cartpole import CartPoleEnv +from .nonlinear_sample_system import NonlinearSampleSystemEnv def make_env(args): @@ -14,5 +15,7 @@ def make_env(args): return TwoWheeledTrackEnv() elif args.env == "CartPole": return CartPoleEnv() + elif args.env == "NonlinearSample": + return NonlinearSampleSystemEnv() raise NotImplementedError("There is not {} Env".format(args.env)) diff --git a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py index 01f7cd5..38a827f 100644 --- a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py +++ b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py @@ -5,7 +5,7 @@ from .env import Env from ..common.utils import update_state_with_Runge_Kutta -class NonlinearSampleEnv(Env): +class NonlinearSampleSystemEnv(Env): """ Nonlinear Sample Env """ @@ -15,12 +15,12 @@ class NonlinearSampleEnv(Env): self.config = {"state_size": 2, "input_size": 1, "dt": 0.01, - "max_step": 250, + "max_step": 2000, "input_lower_bound": [-0.5], "input_upper_bound": [0.5], } - super(NonlinearSampleEnv, self).__init__(self.config) + super(NonlinearSampleSystemEnv, self).__init__(self.config) def reset(self, init_x=np.array([2., 0.])): """ reset state @@ -62,7 +62,8 @@ class NonlinearSampleEnv(Env): functions = [self._func_x_1, self._func_x_2] next_x = update_state_with_Runge_Kutta(self.curr_x, u, - functions, self.config["dt"]) + functions, self.config["dt"], + batch=False) # cost cost = 0 @@ -83,18 +84,14 @@ class NonlinearSampleEnv(Env): {"goal_state": self.g_x} def _func_x_1(self, x, u): - """ - """ x_dot = x[1] return x_dot def _func_x_2(self, x, u): - """ - """ - x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u + x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0] return x_dot def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ """ - raise ValueError("NonlinearSampleEnv does not have animation") + raise ValueError("NonlinearSampleSystemEnv does not have animation") diff --git a/PythonLinearNonlinearControl/models/make_models.py b/PythonLinearNonlinearControl/models/make_models.py index cc42a45..50f11f0 100644 --- a/PythonLinearNonlinearControl/models/make_models.py +++ b/PythonLinearNonlinearControl/models/make_models.py @@ -1,6 +1,7 @@ from .first_order_lag import FirstOrderLagModel from .two_wheeled import TwoWheeledModel from .cartpole import CartPoleModel +from .nonlinear_sample_system import NonlinearSampleSystemModel def make_model(args, config): @@ -11,5 +12,7 @@ def make_model(args, config): return TwoWheeledModel(config) elif args.env == "CartPole": return CartPoleModel(config) + elif args.env == "NonlinearSample": + return NonlinearSampleSystemModel(config) raise NotImplementedError("There is not {} Model".format(args.env)) diff --git a/PythonLinearNonlinearControl/models/nonlinear_sample_system.py b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py new file mode 100644 index 0000000..490d2ba --- /dev/null +++ b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py @@ -0,0 +1,164 @@ +import numpy as np + +from .model import Model +from ..common.utils import update_state_with_Runge_Kutta + + +class NonlinearSampleSystemModel(Model): + """ nonlinear sample system model + """ + + def __init__(self, config): + """ + """ + super(NonlinearSampleSystemModel, self).__init__() + self.dt = config.DT + + def predict_next_state(self, curr_x, u): + """ predict next state + + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) or + shape(pop_size, state_size) + u (numpy.ndarray): input, shape(input_size, ) or + shape(pop_size, input_size) + Returns: + next_x (numpy.ndarray): next state, shape(state_size, ) or + shape(pop_size, state_size) + """ + if len(u.shape) == 1: + func_1 = self._func_x_1 + func_2 = self._func_x_2 + functions = [func_1, func_2] + next_x = update_state_with_Runge_Kutta( + curr_x, u, functions, batch=False) + return next_x + + elif len(u.shape) == 2: + def func_1(xs, us): return self._func_x_1(xs, us, batch=True) + def func_2(xs, us): return self._func_x_2(xs, us, batch=True) + functions = [func_1, func_2] + next_x = update_state_with_Runge_Kutta( + curr_x, u, functions, batch=True) + + return next_x + + def _func_x_1(self, x, u, batch=False): + if not batch: + x_dot = x[1] + else: + x_dot = x[:, 1] + return x_dot + + def _func_x_2(self, x, u, batch=False): + if not batch: + x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0] + else: + x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \ + x[:, 1] - x[:, 0] + u[:, 0] + return x_dot + + def calc_f_x(self, xs, us, dt): + """ gradient of model with respect to the state in batch form + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_x (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size) + + Notes: + This should be discrete form !! + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_x = np.zeros((pred_len, state_size, state_size)) + f_x[:, 0, 1] = 1. + f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1. + f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \ + (1. - xs[:, 0]**2 - xs[:, 1]**2) + + return f_x * dt + np.eye(state_size) # to discrete form + + def calc_f_u(self, xs, us, dt): + """ gradient of model with respect to the input in batch form + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_u (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size) + + Notes: + This should be discrete form !! + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_u = np.zeros((pred_len, state_size, input_size)) + + f_u[:, 1, 0] = 1. + + return f_u * dt # to discrete form + + def calc_f_xx(self, xs, us, dt): + """ hessian of model with respect to the state in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_xx (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_xx = np.zeros((pred_len, state_size, state_size, state_size)) + + raise NotImplementedError + + def calc_f_ux(self, xs, us, dt): + """ hessian of model with respect to state and input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_ux (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_ux = np.zeros((pred_len, state_size, input_size, state_size)) + + raise NotImplementedError + + def calc_f_uu(self, xs, us, dt): + """ hessian of model with respect to input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_uu (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, input_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_uu = np.zeros((pred_len, state_size, input_size, input_size)) + + raise NotImplementedError diff --git a/PythonLinearNonlinearControl/planners/make_planners.py b/PythonLinearNonlinearControl/planners/make_planners.py index 9f18e51..eb77082 100644 --- a/PythonLinearNonlinearControl/planners/make_planners.py +++ b/PythonLinearNonlinearControl/planners/make_planners.py @@ -1,8 +1,9 @@ from .const_planner import ConstantPlanner from .closest_point_planner import ClosestPointPlanner + def make_planner(args, config): - + if args.env == "FirstOrderLag": return ConstantPlanner(config) elif args.env == "TwoWheeledConst": @@ -11,5 +12,8 @@ def make_planner(args, config): return ClosestPointPlanner(config) elif args.env == "CartPole": return ConstantPlanner(config) - - raise NotImplementedError("There is not {} Planner".format(args.planner_type)) \ No newline at end of file + elif args.env == "NonlinearSample": + return ConstantPlanner(config) + + raise NotImplementedError( + "There is not {} Planner".format(args.planner_type)) diff --git a/assets/nonlinear_sample_system.png b/assets/nonlinear_sample_system.png new file mode 100644 index 0000000..95e0eb6 Binary files /dev/null and b/assets/nonlinear_sample_system.png differ diff --git a/assets/nonlinear_sample_system_score.png b/assets/nonlinear_sample_system_score.png new file mode 100644 index 0000000..26e8310 Binary files /dev/null and b/assets/nonlinear_sample_system_score.png differ diff --git a/scripts/show_result.py b/scripts/show_result.py index e54b9dc..f1fd300 100644 --- a/scripts/show_result.py +++ b/scripts/show_result.py @@ -6,7 +6,8 @@ import numpy as np import matplotlib.pyplot as plt from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \ - plot_multi_result + plot_multi_result + def run(args): @@ -17,7 +18,7 @@ def run(args): history_gs = None # load data - for controller in controllers: + for controller in controllers: history_x, history_u, history_g = \ load_plot_data(args.env, controller, result_dir=args.result_dir) @@ -27,19 +28,20 @@ def run(args): history_us = history_u[np.newaxis, :] history_gs = history_g[np.newaxis, :] continue - + history_xs = np.concatenate((history_xs, history_x[np.newaxis, :]), axis=0) history_us = np.concatenate((history_us, history_u[np.newaxis, :]), axis=0) history_gs = np.concatenate((history_gs, history_g[np.newaxis, :]), axis=0) - + plot_multi_result(history_xs, histories_g=history_gs, labels=controllers, ylabel="x") plot_multi_result(history_us, histories_g=np.zeros_like(history_us), - labels=controllers, ylabel="u", name="input_history") + labels=controllers, ylabel="u", name="input_history") + def main(): parser = argparse.ArgumentParser() @@ -51,5 +53,6 @@ def main(): run(args) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/simple_run.py b/scripts/simple_run.py index 2ef1fd8..d3dbffb 100644 --- a/scripts/simple_run.py +++ b/scripts/simple_run.py @@ -8,9 +8,10 @@ from PythonLinearNonlinearControl.models.make_models import make_model from PythonLinearNonlinearControl.envs.make_envs import make_env from PythonLinearNonlinearControl.runners.make_runners import make_runner from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \ - save_plot_data + save_plot_data from PythonLinearNonlinearControl.plotters.animator import Animator + def run(args): # logger make_logger(args.result_dir) @@ -18,23 +19,23 @@ def run(args): # make envs env = make_env(args) - # make config + # make config config = make_config(args) # make planner planner = make_planner(args, config) - + # make model model = make_model(args, config) - + # make controller controller = make_controller(args, config, model) - + # make simulator runner = make_runner(args) # run experiment - history_x, history_u, history_g = runner.run(env, controller, planner) + history_x, history_u, history_g = runner.run(env, controller, planner) # plot results plot_results(history_x, history_u, history_g=history_g, args=args) @@ -44,17 +45,19 @@ def run(args): animator = Animator(env, args=args) animator.draw(history_x, history_g) + def main(): parser = argparse.ArgumentParser() - parser.add_argument("--controller_type", type=str, default="CEM") - parser.add_argument("--env", type=str, default="TwoWheeledTrack") - parser.add_argument("--save_anim", type=bool_flag, default=1) + parser.add_argument("--controller_type", type=str, default="DDP") + parser.add_argument("--env", type=str, default="NonlinearSample") + parser.add_argument("--save_anim", type=bool_flag, default=0) parser.add_argument("--result_dir", type=str, default="./result") args = parser.parse_args() run(args) + if __name__ == "__main__": - main() \ No newline at end of file + main()