diff --git a/Environments.md b/Environments.md new file mode 100644 index 0000000..7799db1 --- /dev/null +++ b/Environments.md @@ -0,0 +1,34 @@ +# Enviroments + +| Name | Linear | Nonlinear | State Size | Input size | +|:----------|:---------------:|:----------------:|:----------------:|:----------------:| +| First Order Lag System | ✓ | x | 4 | 2 | +| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | +| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | +| Cartpole (Swing up) | x | ✓ | 4 | 1 | + +## FistOrderLagEnv + +System equations. + + + +You can set arbinatry time constant, tau. The default is 0.63 s + +## TwoWheeledEnv + +System equations. + + + +## CatpoleEnv (Swing up) + +System equations. + + + +You can set arbinatry parameters, mc, mp, l and g. + +Default settings are as follows: + +mc = 1, mp = 0.2, l = 0.5, g = 9.8 \ No newline at end of file diff --git a/PythonLinearNonlinearControl/common/utils.py b/PythonLinearNonlinearControl/common/utils.py index 07ff604..a22b22b 100644 --- a/PythonLinearNonlinearControl/common/utils.py +++ b/PythonLinearNonlinearControl/common/utils.py @@ -1,2 +1 @@ import numpy as np - diff --git a/PythonLinearNonlinearControl/configs/first_order_lag.py b/PythonLinearNonlinearControl/configs/first_order_lag.py index 7726a77..1ad59f6 100644 --- a/PythonLinearNonlinearControl/configs/first_order_lag.py +++ b/PythonLinearNonlinearControl/configs/first_order_lag.py @@ -5,7 +5,7 @@ class FirstOrderLagConfigModule(): ENV_NAME = "FirstOrderLag-v0" TYPE = "Linear" TASK_HORIZON = 1000 - PRED_LEN = 10 + PRED_LEN = 50 STATE_SIZE = 4 INPUT_SIZE = 2 DT = 0.05 @@ -43,8 +43,33 @@ class FirstOrderLagConfigModule(): "kappa": 0.9, "noise_sigma": 0.5, }, + "MPPIWilliams":{ + "popsize": 5000, + "lambda": 1., + "noise_sigma": 0.9, + }, "MPC":{ - } + }, + "iLQR":{ + "max_iter": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "DDP":{ + "max_iter": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "NMPC-CGMRES":{ + }, + "NMPC-Newton":{ + }, } @staticmethod @@ -86,4 +111,89 @@ class FirstOrderLagConfigModule(): shape(pop_size, pred_len) """ return ((terminal_x - terminal_g_x)**2) \ - * np.diag(FirstOrderLagConfigModule.Sf) \ No newline at end of file + * np.diag(FirstOrderLagConfigModule.Sf) + + @staticmethod + def gradient_cost_fn_with_state(x, g_x, terminal=False): + """ gradient of costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + + Returns: + l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) + or shape(1, state_size) + """ + if not terminal: + return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q) + + return (2. * (x - g_x) \ + * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] + + @staticmethod + def gradient_cost_fn_with_input(x, u): + """ gradient of costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + + Returns: + l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) + """ + return 2. * u * np.diag(FirstOrderLagConfigModule.R) + + @staticmethod + def hessian_cost_fn_with_state(x, g_x, terminal=False): + """ hessian costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + + Returns: + l_xx (numpy.ndarray): gradient of cost, + shape(pred_len, state_size, state_size) or + shape(1, state_size, state_size) or + """ + if not terminal: + (pred_len, _) = x.shape + return -g_x[:, :, np.newaxis] \ + * np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) + + return -g_x[:, np.newaxis] \ + * np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) + + @staticmethod + def hessian_cost_fn_with_input(x, u): + """ hessian costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + + Returns: + l_uu (numpy.ndarray): gradient of cost, + shape(pred_len, input_size, input_size) + """ + (pred_len, _) = u.shape + + return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) + + @staticmethod + def hessian_cost_fn_with_input_state(x, u): + """ hessian costs with respect to the state and input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + + Returns: + l_ux (numpy.ndarray): gradient of cost , + shape(pred_len, input_size, state_size) + """ + (_, state_size) = x.shape + (pred_len, input_size) = u.shape + + return np.zeros((pred_len, input_size, state_size)) diff --git a/PythonLinearNonlinearControl/configs/two_wheeled.py b/PythonLinearNonlinearControl/configs/two_wheeled.py index 9167ca6..27e9834 100644 --- a/PythonLinearNonlinearControl/configs/two_wheeled.py +++ b/PythonLinearNonlinearControl/configs/two_wheeled.py @@ -39,6 +39,11 @@ class TwoWheeledConfigModule(): "kappa": 0.9, "noise_sigma": 0.5, }, + "MPPIWilliams":{ + "popsize": 5000, + "lambda": 1, + "noise_sigma": 1., + }, "iLQR":{ "max_iter": 500, "init_mu": 1., diff --git a/PythonLinearNonlinearControl/controllers/ddp.py b/PythonLinearNonlinearControl/controllers/ddp.py index ac04b7c..4abb229 100644 --- a/PythonLinearNonlinearControl/controllers/ddp.py +++ b/PythonLinearNonlinearControl/controllers/ddp.py @@ -23,10 +23,6 @@ class DDP(Controller): """ super(DDP, self).__init__(config, model) - if config.TYPE != "Nonlinear": - raise ValueError("{} could be not applied to \ - this controller".format(model)) - # model self.model = model @@ -296,6 +292,7 @@ class DDP(Controller): def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux): """ backward step of iLQR + Args: f_x (numpy.ndarray): gradient of model with respecto to state, shape(pred_len+1, state_size, state_size) @@ -317,7 +314,6 @@ class DDP(Controller): shape(pred_len, input_size, input_size) l_ux (numpy.ndarray): hessian of cost with respect to state and input, shape(pred_len, input_size, state_size) - Returns: k (numpy.ndarray): gain, shape(pred_len, input_size) K (numpy.ndarray): gain, shape(pred_len, input_size, state_size) @@ -353,7 +349,8 @@ class DDP(Controller): def _Q(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_u, l_xx, l_ux, l_uu, V_x, V_xx): - """Computes second order expansion. + """ compute Q function valued + Args: f_x (numpy.ndarray): gradient of model with respecto to state, shape(state_size, state_size) diff --git a/PythonLinearNonlinearControl/controllers/ilqr.py b/PythonLinearNonlinearControl/controllers/ilqr.py index 40be9e9..a676ade 100644 --- a/PythonLinearNonlinearControl/controllers/ilqr.py +++ b/PythonLinearNonlinearControl/controllers/ilqr.py @@ -21,10 +21,6 @@ class iLQR(Controller): """ super(iLQR, self).__init__(config, model) - if config.TYPE != "Nonlinear": - raise ValueError("{} could be not applied to \ - this controller".format(model)) - # model self.model = model diff --git a/PythonLinearNonlinearControl/controllers/make_controllers.py b/PythonLinearNonlinearControl/controllers/make_controllers.py index 99653d9..74d048c 100644 --- a/PythonLinearNonlinearControl/controllers/make_controllers.py +++ b/PythonLinearNonlinearControl/controllers/make_controllers.py @@ -2,6 +2,7 @@ from .mpc import LinearMPC from .cem import CEM from .random import RandomShooting from .mppi import MPPI +from .mppi_williams import MPPIWilliams from .ilqr import iLQR from .ddp import DDP @@ -15,6 +16,8 @@ def make_controller(args, config, model): return RandomShooting(config, model) elif args.controller_type == "MPPI": return MPPI(config, model) + elif args.controller_type == "MPPIWilliams": + return MPPIWilliams(config, model) elif args.controller_type == "iLQR": return iLQR(config, model) elif args.controller_type == "DDP": diff --git a/PythonLinearNonlinearControl/controllers/mppi_williams.py b/PythonLinearNonlinearControl/controllers/mppi_williams.py new file mode 100644 index 0000000..1fd0102 --- /dev/null +++ b/PythonLinearNonlinearControl/controllers/mppi_williams.py @@ -0,0 +1,143 @@ +from logging import getLogger + +import numpy as np +import scipy.stats as stats + +from .controller import Controller +from ..envs.cost import calc_cost + +logger = getLogger(__name__) + +class MPPIWilliams(Controller): + """ Model Predictive Path Integral for linear and nonlinear method + + Attributes: + history_u (list[numpy.ndarray]): time history of optimal input + Ref: + G. Williams et al., "Information theoretic MPC + for model-based reinforcement learning," + 2017 IEEE International Conference on Robotics and Automation (ICRA), + Singapore, 2017, pp. 1714-1721. + """ + def __init__(self, config, model): + super(MPPIWilliams, self).__init__(config, model) + + # model + self.model = model + + # general parameters + self.pred_len = config.PRED_LEN + self.input_size = config.INPUT_SIZE + + # mppi parameters + self.pop_size = config.opt_config["MPPIWilliams"]["popsize"] + self.lam = config.opt_config["MPPIWilliams"]["lambda"] + self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"] + self.opt_dim = self.input_size * self.pred_len + + # get bound + self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, + (self.pred_len, 1)) + self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, + (self.pred_len, 1)) + + # get cost func + self.state_cost_fn = config.state_cost_fn + self.terminal_state_cost_fn = config.terminal_state_cost_fn + self.input_cost_fn = config.input_cost_fn + + # init mean + self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ + + config.INPUT_LOWER_BOUND) / 2., + self.pred_len) + self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) + + # save + self.history_u = [np.zeros(self.input_size)] + + def clear_sol(self): + """ clear prev sol + """ + logger.debug("Clear Solution") + self.prev_sol = \ + (self.input_upper_bounds + self.input_lower_bounds) / 2. + self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) + + def calc_cost(self, curr_x, samples, g_xs): + """ calculate the cost of input samples by using MPPI's eq + + Args: + curr_x (numpy.ndarray): shape(state_size), + current robot position + samples (numpy.ndarray): shape(pop_size, opt_dim), + input samples + g_xs (numpy.ndarray): shape(pred_len, state_size), + goal states + Returns: + costs (numpy.ndarray): shape(pop_size, ) + """ + # get size + pop_size = samples.shape[0] + g_xs = np.tile(g_xs, (pop_size, 1, 1)) + + # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) + pred_xs = self.model.predict_traj(curr_x, samples) + + # get particle cost + costs = calc_cost(pred_xs, samples, g_xs, + self.state_cost_fn, None, \ + self.terminal_state_cost_fn) + + return costs + + def obtain_sol(self, curr_x, g_xs): + """ calculate the optimal inputs + + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) + Returns: + opt_input (numpy.ndarray): optimal input, shape(input_size, ) + """ + # get noised inputs + noise = np.random.normal( + loc=0, scale=1.0, size=(self.pop_size, self.pred_len, + self.input_size)) * self.noise_sigma + + noised_inputs = self.prev_sol + noise + + # clip actions + noised_inputs = np.clip( + noised_inputs, self.input_lower_bounds, self.input_upper_bounds) + + # calc cost + costs = self.calc_cost(curr_x, noised_inputs, g_xs) + + costs += np.sum(np.sum( + self.lam * self.prev_sol * noise / self.noise_sigma, + axis=-1), axis=-1) + + # mppi update + beta = np.min(costs) + eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \ + + 1e-10 + + # weight + # eta.shape = (pred_len, input_size) + weights = np.exp(- 1. / self.lam * (costs - beta)) / eta + + # update inputs + sol = self.prev_sol \ + + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) + + # update + self.prev_sol[:-1] = sol[1:] + self.prev_sol[-1] = sol[-1] # last use the terminal input + + # log + self.history_u.append(sol[0]) + + return sol[0] + + def __str__(self): + return "MPPIWilliams" \ No newline at end of file diff --git a/PythonLinearNonlinearControl/envs/cartpole.py b/PythonLinearNonlinearControl/envs/cartpole.py new file mode 100644 index 0000000..cd84313 --- /dev/null +++ b/PythonLinearNonlinearControl/envs/cartpole.py @@ -0,0 +1,87 @@ +import numpy as np + +from .env import Env + +class CartPoleEnv(Env): + """ Cartpole Environment + + Ref : + https://ocw.mit.edu/courses/ + electrical-engineering-and-computer-science/ + 6-832-underactuated-robotics-spring-2009/readings/ + MIT6_832s09_read_ch03.pdf + """ + def __init__(self): + """ + """ + self.config = {"state_size" : 4,\ + "input_size" : 1,\ + "dt" : 0.02,\ + "max_step" : 1000,\ + "input_lower_bound": None,\ + "input_upper_bound": None, + } + + super(CartPoleEnv, self).__init__(self.config) + + def reset(self, init_x=None): + """ reset state + + Returns: + init_x (numpy.ndarray): initial state, shape(state_size, ) + info (dict): information + """ + self.step_count = 0 + + self.curr_x = np.zeros(self.config["state_size"]) + + if init_x is not None: + self.curr_x = init_x + + # goal + self.g_x = np.array([0., 0., np.pi, 0.]) + + # clear memory + self.history_x = [] + self.history_g_x = [] + + return self.curr_x, {"goal_state": self.g_x} + + def step(self, u): + """ step environments + + Args: + u (numpy.ndarray) : input, shape(input_size, ) + Returns: + next_x (numpy.ndarray): next state, shape(state_size, ) + cost (float): costs + done (bool): end the simulation or not + info (dict): information + """ + # clip action + if self.config["input_lower_bound"] is not None: + u = np.clip(u, + self.config["input_lower_bound"], + self.config["input_upper_bound"]) + + # step + next_x = np.zeros(self.config["state_size"]) + + # TODO: costs + costs = 0. + costs += 0.1 * np.sum(u**2) + costs += np.sum((self.curr_x - self.g_x)**2) + + + # save history + self.history_x.append(next_x.flatten()) + self.history_g_x.append(self.g_x.flatten()) + + # update + self.curr_x = next_x.flatten() + # update costs + self.step_count += 1 + + return next_x.flatten(), costs, \ + self.step_count > self.config["max_step"], \ + {"goal_state" : self.g_x} \ No newline at end of file diff --git a/PythonLinearNonlinearControl/envs/cost.py b/PythonLinearNonlinearControl/envs/cost.py index 5d10697..117d5f2 100644 --- a/PythonLinearNonlinearControl/envs/cost.py +++ b/PythonLinearNonlinearControl/envs/cost.py @@ -22,16 +22,22 @@ def calc_cost(pred_xs, input_sample, g_xs, cost (numpy.ndarray): cost of the input sample, shape(pop_size, ) """ # state cost - state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) - state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) + state_cost = 0. + if state_cost_fn is not None: + state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) + state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) # terminal cost - terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], - g_xs[:, -1, :]) - terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) + terminal_state_cost = 0. + if terminal_state_cost_fn is not None: + terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], + g_xs[:, -1, :]) + terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) # act cost - act_pred_par_cost = input_cost_fn(input_sample) - act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1) + act_cost = 0. + if input_cost_fn is not None: + act_pred_par_cost = input_cost_fn(input_sample) + act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1) return state_cost + terminal_state_cost + act_cost \ No newline at end of file diff --git a/PythonLinearNonlinearControl/envs/make_envs.py b/PythonLinearNonlinearControl/envs/make_envs.py index debbf29..fd3ea09 100644 --- a/PythonLinearNonlinearControl/envs/make_envs.py +++ b/PythonLinearNonlinearControl/envs/make_envs.py @@ -1,5 +1,6 @@ from .first_order_lag import FirstOrderLagEnv from .two_wheeled import TwoWheeledConstEnv +from .cartpole import CartpoleEnv def make_env(args): @@ -7,5 +8,7 @@ def make_env(args): return FirstOrderLagEnv() elif args.env == "TwoWheeledConst": return TwoWheeledConstEnv() + elif args.env == "CartPole": + return CartpoleEnv() raise NotImplementedError("There is not {} Env".format(args.env)) \ No newline at end of file diff --git a/PythonLinearNonlinearControl/models/model.py b/PythonLinearNonlinearControl/models/model.py index 58fe32e..5eb2cb7 100644 --- a/PythonLinearNonlinearControl/models/model.py +++ b/PythonLinearNonlinearControl/models/model.py @@ -211,3 +211,94 @@ class LinearModel(Model): next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T) return next_x + + def calc_f_x(self, xs, us, dt): + """ gradient of model with respect to the state in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + Return: + f_x (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size) + Notes: + This should be discrete form !! + """ + # get size + (pred_len, _) = us.shape + + return np.tile(self.A, (pred_len, 1, 1)) + + def calc_f_u(self, xs, us, dt): + """ gradient of model with respect to the input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + Return: + f_u (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size) + Notes: + This should be discrete form !! + """ + # get size + (pred_len, input_size) = us.shape + + return np.tile(self.B, (pred_len, 1, 1)) + + @staticmethod + def calc_f_xx(xs, us, dt): + """ hessian of model with respect to the state in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + Return: + f_xx (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_xx = np.zeros((pred_len, state_size, state_size, state_size)) + + return f_xx + + @staticmethod + def calc_f_ux(xs, us, dt): + """ hessian of model with respect to state and input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + Return: + f_ux (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_ux = np.zeros((pred_len, state_size, input_size, state_size)) + + return f_ux + + @staticmethod + def calc_f_uu(xs, us, dt): + """ hessian of model with respect to input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + Return: + f_uu (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, input_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_uu = np.zeros((pred_len, state_size, input_size, input_size)) + + return f_uu diff --git a/PythonLinearNonlinearControl/plotters/plot_func.py b/PythonLinearNonlinearControl/plotters/plot_func.py index 216788e..16866ea 100644 --- a/PythonLinearNonlinearControl/plotters/plot_func.py +++ b/PythonLinearNonlinearControl/plotters/plot_func.py @@ -3,6 +3,8 @@ import os import numpy as np import matplotlib.pyplot as plt +from ..helper import save_pickle, load_pickle + def plot_result(history, history_g=None, ylabel="x", save_dir="./result", name="state_history"): """ @@ -47,14 +49,108 @@ def plot_result(history, history_g=None, ylabel="x", def plot_results(args, history_x, history_u, history_g=None): """ + Args: history_x (numpy.ndarray): history of state, shape(iters, state_size) history_u (numpy.ndarray): history of state, shape(iters, input_size) Returns: + None """ plot_result(history_x, history_g=history_g, ylabel="x", - name="state_history", + name= args.env + "-state_history", save_dir="./result/" + args.controller_type) plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u", - name="input_history", - save_dir="./result/" + args.controller_type) \ No newline at end of file + name= args.env + "-input_history", + save_dir="./result/" + args.controller_type) + +def save_plot_data(args, history_x, history_u, history_g=None): + """ save plot data + + Args: + history_x (numpy.ndarray): history of state, shape(iters, state_size) + history_u (numpy.ndarray): history of state, shape(iters, input_size) + Returns: + None + """ + path = os.path.join("./result/" + args.controller_type, + args.env + "-history_x.pkl") + save_pickle(path, history_x) + + path = os.path.join("./result/" + args.controller_type, + args.env + "-history_u.pkl") + save_pickle(path, history_u) + + path = os.path.join("./result/" + args.controller_type, + args.env + "-history_g.pkl") + save_pickle(path, history_g) + +def load_plot_data(env, controller_type, result_dir="./result"): + """ + Args: + env (str): environments name + controller_type (str): controller type + result_dir (str): result directory + Returns: + history_x (numpy.ndarray): history of state, shape(iters, state_size) + history_u (numpy.ndarray): history of state, shape(iters, input_size) + history_g (numpy.ndarray): history of state, shape(iters, input_size) + """ + path = os.path.join("./result/" + controller_type, + env + "-history_x.pkl") + history_x = load_pickle(path) + + path = os.path.join("./result/" + controller_type, + env + "-history_u.pkl") + history_u = load_pickle(path) + + path = os.path.join("./result/" + controller_type, + env + "-history_g.pkl") + history_g = load_pickle(path) + + return history_x, history_u, history_g + +def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", + save_dir="./result", name="state_history"): + """ + Args: + history (numpy.ndarray): history, shape(iters, size) + """ + (_, iters, size) = histories.shape + + for i in range(0, size, 2): + + figure = plt.figure() + axis1 = figure.add_subplot(211) + axis2 = figure.add_subplot(212) + + axis1.set_ylabel(ylabel + "_{}".format(i)) + axis2.set_ylabel(ylabel + "_{}".format(i+1)) + axis2.set_xlabel("time steps") + + # gt + def plot(axis, history, history_g=None, label=""): + axis.plot(range(iters), history, + linewidth=3, label=label, alpha=0.7, linestyle="dashed") + if history_g is not None: + axis.plot(range(iters), history_g,\ + c="b", linewidth=3) + + if i < size: + for j, (history, history_g) \ + in enumerate(zip(histories, histories_g)): + plot(axis1, history[:, i], + history_g=history_g[:, i], label=labels[j]) + if i+1 < size: + for j, (history, history_g) in \ + enumerate(zip(histories, histories_g)): + plot(axis2, history[:, i+1], + history_g=history_g[:, i+1], label=labels[j]) + + # save + if save_dir is not None: + path = os.path.join(save_dir, name + "-{}".format(i)) + else: + path = name + + axis1.legend(ncol=3, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) + figure.savefig(path, bbox_inches="tight", pad_inches=0.05) diff --git a/README.md b/README.md index 81bbae1..7c1e73d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ PythonLinearNonLinearControl is a library implementing the linear and nonlinear |:----------|:---------------: |:----------------:|:----------------:|:----------------:|:----------------:| | Linear Model Predictive Control (MPC) | ✓ | x | x | x | x | | Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x | -| Model Preidictive Path Integral Control (MPPI) | ✓ | ✓ | x | x | x | +| Model Preidictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x | +| Model Preidictive Path Integral Control of Williams (MPPIWilliams) | ✓ | ✓ | x | x | x | | Random Shooting Method (Random) | ✓ | ✓ | x | x | x | | Iterative LQR (iLQR) | x | ✓ | x | ✓ | x | | Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ | @@ -33,9 +34,12 @@ Following algorithms are implemented in PythonLinearNonlinearControl - [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114) - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765) - [script](PythonLinearNonlinearControl/controllers/cem.py) -- [Model Preidictive Path Integral Control (MPPI)](https://arxiv.org/abs/1909.11652) +- [Model Preidictive Path Integral Control Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652) - Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652. - [script](PythonLinearNonlinearControl/controllers/mppi.py) +- [Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202) + - Ref: Williams, G., Wagener, N., Goldfain, B., Drews, P., Rehg, J. M., Boots, B., & Theodorou, E. A. (2017, May). Information theoretic MPC for model-based reinforcement learning. In 2017 IEEE International Conference on Robotics and Automation (ICRA) (pp. 1714-1721). IEEE. + - [script](PythonLinearNonlinearControl/controllers/mppi_williams.py) - [Random Shooting Method (Random)](https://arxiv.org/abs/1805.12114) - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765) - [script](PythonLinearNonlinearControl/controllers/random.py) @@ -62,10 +66,13 @@ Following algorithms are implemented in PythonLinearNonlinearControl | First Order Lag System | ✓ | x | 4 | 2 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | | Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | +| Cartpole (Swing up) | x | ✓ | 4 | 1 | -All environments are continuous. +All states and inputs of environments are continuous. **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.** +You could know abount out environmets more in [Environments.md](Environments.md) + # Usage ## To install this package diff --git a/assets/cartpole.png b/assets/cartpole.png new file mode 100644 index 0000000..53abb68 Binary files /dev/null and b/assets/cartpole.png differ diff --git a/assets/firstorderlag.png b/assets/firstorderlag.png new file mode 100644 index 0000000..ad0c9ff Binary files /dev/null and b/assets/firstorderlag.png differ diff --git a/assets/twowheeled.png b/assets/twowheeled.png new file mode 100644 index 0000000..921e111 Binary files /dev/null and b/assets/twowheeled.png differ diff --git a/scripts/show_result.py b/scripts/show_result.py new file mode 100644 index 0000000..e54b9dc --- /dev/null +++ b/scripts/show_result.py @@ -0,0 +1,55 @@ +import os + +import argparse +import pickle +import numpy as np +import matplotlib.pyplot as plt + +from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \ + plot_multi_result + +def run(args): + + controllers = ["iLQR", "DDP", "CEM", "MPPI"] + + history_xs = None + history_us = None + history_gs = None + + # load data + for controller in controllers: + history_x, history_u, history_g = \ + load_plot_data(args.env, controller, + result_dir=args.result_dir) + + if history_xs is None: + history_xs = history_x[np.newaxis, :] + history_us = history_u[np.newaxis, :] + history_gs = history_g[np.newaxis, :] + continue + + history_xs = np.concatenate((history_xs, + history_x[np.newaxis, :]), axis=0) + history_us = np.concatenate((history_us, + history_u[np.newaxis, :]), axis=0) + history_gs = np.concatenate((history_gs, + history_g[np.newaxis, :]), axis=0) + + plot_multi_result(history_xs, histories_g=history_gs, labels=controllers, + ylabel="x") + + plot_multi_result(history_us, histories_g=np.zeros_like(history_us), + labels=controllers, ylabel="u", name="input_history") + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument("--env", type=str, default="FirstOrderLag") + parser.add_argument("--result_dir", type=str, default="./result") + + args = parser.parse_args() + + run(args) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/simple_run.py b/scripts/simple_run.py index 0796266..b5af782 100644 --- a/scripts/simple_run.py +++ b/scripts/simple_run.py @@ -7,7 +7,8 @@ from PythonLinearNonlinearControl.configs.make_configs import make_config from PythonLinearNonlinearControl.models.make_models import make_model from PythonLinearNonlinearControl.envs.make_envs import make_env from PythonLinearNonlinearControl.runners.make_runners import make_runner -from PythonLinearNonlinearControl.plotters.plot_func import plot_results +from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \ + save_plot_data def run(args): # logger @@ -36,13 +37,14 @@ def run(args): # plot results plot_results(args, history_x, history_u, history_g=history_g) + save_plot_data(args, history_x, history_u, history_g=history_g) def main(): parser = argparse.ArgumentParser() - parser.add_argument("--controller_type", type=str, default="DDP") + parser.add_argument("--controller_type", type=str, default="MPPIWilliams") parser.add_argument("--planner_type", type=str, default="const") - parser.add_argument("--env", type=str, default="TwoWheeledConst") + parser.add_argument("--env", type=str, default="FirstOrderLag") parser.add_argument("--result_dir", type=str, default="./result") args = parser.parse_args() diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6ac0b45 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[aliases] +test=pytest + +[tool:pytest] +addopts=-s \ No newline at end of file