From d64a799edab7c23b18280991c6feb115186da726 Mon Sep 17 00:00:00 2001 From: Shunichi09 Date: Sat, 13 Feb 2021 17:18:42 +0900 Subject: [PATCH] Fix format --- PythonLinearNonlinearControl/common/utils.py | 25 +-- .../configs/cartpole.py | 109 +++++------ .../configs/first_order_lag.py | 63 +++---- .../configs/make_configs.py | 3 +- .../configs/two_wheeled.py | 71 ++++---- .../controllers/cem.py | 32 ++-- .../controllers/controller.py | 18 +- .../controllers/ddp.py | 48 ++--- .../controllers/ilqr.py | 44 ++--- .../controllers/make_controllers.py | 5 +- .../controllers/mpc.py | 38 ++-- .../controllers/mppi.py | 36 ++-- .../controllers/mppi_williams.py | 28 +-- .../controllers/random.py | 12 +- PythonLinearNonlinearControl/envs/__init__.py | 2 +- PythonLinearNonlinearControl/envs/cartpole.py | 92 +++++----- PythonLinearNonlinearControl/envs/cost.py | 10 +- PythonLinearNonlinearControl/envs/env.py | 10 +- .../envs/first_order_lag.py | 46 ++--- .../envs/make_envs.py | 5 +- .../envs/nonlinear_sample_system.py | 34 ++-- .../envs/two_wheeled.py | 169 +++++++++--------- PythonLinearNonlinearControl/helper.py | 14 +- .../models/cartpole.py | 114 ++++++------ .../models/first_order_lag.py | 24 +-- .../models/make_models.py | 7 +- PythonLinearNonlinearControl/models/model.py | 60 ++++--- .../models/two_wheeled.py | 24 +-- .../planners/closest_point_planner.py | 8 +- .../planners/const_planner.py | 4 +- .../planners/planner.py | 4 +- .../plotters/animator.py | 10 +- .../plotters/plot_func.py | 21 ++- .../plotters/plot_objs.py | 10 +- .../runners/__init__.py | 2 +- .../runners/make_runners.py | 3 +- .../runners/runner.py | 6 +- 37 files changed, 648 insertions(+), 563 deletions(-) diff --git a/PythonLinearNonlinearControl/common/utils.py b/PythonLinearNonlinearControl/common/utils.py index 3645507..27d67ce 100644 --- a/PythonLinearNonlinearControl/common/utils.py +++ b/PythonLinearNonlinearControl/common/utils.py @@ -1,8 +1,9 @@ import numpy as np + def rotate_pos(pos, angle): """ Transformation the coordinate in the angle - + Args: pos (numpy.ndarray): local state, shape(data_size, 2) angle (float): rotate angle, in radians @@ -14,9 +15,10 @@ def rotate_pos(pos, angle): return np.dot(pos, rot_mat.T) + def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): """ Check angle range and correct the range - + Args: angle (numpy.ndarray): in radians min_angle (float): maximum of range in radians, default -pi @@ -29,7 +31,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): if (max_angle - min_angle) < 2.0 * np.pi: raise ValueError("difference between max_angle \ and min_angle must be greater than 2.0 * pi") - + output = np.array(angles) output_shape = output.shape @@ -43,6 +45,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): output = np.minimum(max_angle, np.maximum(min_angle, output)) return output.reshape(output_shape) + def update_state_with_Runge_Kutta(state, u, functions, dt=0.01): """ update state in Runge Kutta methods Args: @@ -52,23 +55,23 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01): each function will be called like func(*state, *u) We expect that this function returns differential of each state dt (float): float in seconds - + Returns: next_state (np.array): next state of system - + Notes: sample of function is as follows: def func_x(self, x_1, x_2, u): x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u return x_dot - + Note that the function return x_dot. """ state_size = len(state) assert state_size == len(functions), \ "Invalid functions length, You need to give the state size functions" - + k0 = np.zeros(state_size) k1 = np.zeros(state_size) k2 = np.zeros(state_size) @@ -84,10 +87,10 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01): for i, func in enumerate(functions): k1[i] = dt * func(*inputs) - + add_state = state + k1 / 2. inputs = np.concatenate([add_state, u]) - + for i, func in enumerate(functions): k2[i] = dt * func(*inputs) @@ -95,6 +98,6 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01): inputs = np.concatenate([add_state, u]) for i, func in enumerate(functions): - k3[i] = dt * func(*inputs) + k3[i] = dt * func(*inputs) - return (k0 + 2. * k1 + 2. * k2 + k3) / 6. \ No newline at end of file + return (k0 + 2. * k1 + 2. * k2 + k3) / 6. diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py index bbcf99b..e8d10a6 100644 --- a/PythonLinearNonlinearControl/configs/cartpole.py +++ b/PythonLinearNonlinearControl/configs/cartpole.py @@ -1,5 +1,6 @@ import numpy as np + class CartPoleConfigModule(): # parameters ENV_NAME = "CartPole-v0" @@ -12,7 +13,7 @@ class CartPoleConfigModule(): DT = 0.02 # cost parameters R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams - # 1. is worked for iLQR + # 1. is worked for iLQR TERMINAL_WEIGHT = 1. Q = None Sf = None @@ -39,41 +40,41 @@ class CartPoleConfigModule(): "num_elites": 50, "max_iters": 15, "alpha": 0.3, - "init_var":9., - "threshold":0.001 + "init_var": 9., + "threshold": 0.001 }, - "MPPI":{ - "beta" : 0.6, + "MPPI": { + "beta": 0.6, "popsize": 5000, "kappa": 0.9, "noise_sigma": 0.5, }, - "MPPIWilliams":{ + "MPPIWilliams": { "popsize": 5000, "lambda": 1., "noise_sigma": 0.9, }, - "iLQR":{ + "iLQR": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "DDP":{ + }, + "DDP": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "NMPC-CGMRES":{ - }, - "NMPC-Newton":{ - }, - } + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } @staticmethod def input_cost_fn(u): @@ -87,7 +88,7 @@ class CartPoleConfigModule(): shape(pop_size, pred_len, input_size) """ return (u**2) * np.diag(CartPoleConfigModule.R) - + @staticmethod def state_cost_fn(x, g_x): """ state cost function @@ -103,21 +104,21 @@ class CartPoleConfigModule(): """ if len(x.shape) > 2: - return (6. * (x[:, :, 0]**2) \ - + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \ - + 0.1 * (x[:, :, 1]**2) \ - + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] + return (6. * (x[:, :, 0]**2) + + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) + + 0.1 * (x[:, :, 1]**2) + + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] elif len(x.shape) > 1: - return (6. * (x[:, 0]**2) \ - + 12. * ((np.cos(x[:, 2]) + 1.)**2) \ - + 0.1 * (x[:, 1]**2) \ - + 0.1 * (x[:, 3]**2))[:, np.newaxis] - + return (6. * (x[:, 0]**2) + + 12. * ((np.cos(x[:, 2]) + 1.)**2) + + 0.1 * (x[:, 1]**2) + + 0.1 * (x[:, 3]**2))[:, np.newaxis] + return 6. * (x[0]**2) \ - + 12. * ((np.cos(x[2]) + 1.)**2) \ - + 0.1 * (x[1]**2) \ - + 0.1 * (x[3]**2) + + 12. * ((np.cos(x[2]) + 1.)**2) \ + + 0.1 * (x[1]**2) \ + + 0.1 * (x[3]**2) @staticmethod def terminal_state_cost_fn(terminal_x, terminal_g_x): @@ -134,18 +135,18 @@ class CartPoleConfigModule(): """ if len(terminal_x.shape) > 1: - return (6. * (terminal_x[:, 0]**2) \ - + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ - + 0.1 * (terminal_x[:, 1]**2) \ - + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ - * CartPoleConfigModule.TERMINAL_WEIGHT - - return (6. * (terminal_x[0]**2) \ - + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \ - + 0.1 * (terminal_x[1]**2) \ - + 0.1 * (terminal_x[3]**2)) \ + return (6. * (terminal_x[:, 0]**2) + + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) + + 0.1 * (terminal_x[:, 1]**2) + + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ * CartPoleConfigModule.TERMINAL_WEIGHT - + + return (6. * (terminal_x[0]**2) + + 12. * ((np.cos(terminal_x[2]) + 1.)**2) + + 0.1 * (terminal_x[1]**2) + + 0.1 * (terminal_x[3]**2)) \ + * CartPoleConfigModule.TERMINAL_WEIGHT + @staticmethod def gradient_cost_fn_with_state(x, g_x, terminal=False): """ gradient of costs with respect to the state @@ -153,26 +154,26 @@ class CartPoleConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) or shape(1, state_size) """ if not terminal: - cost_dx0 = 12. * x[:, 0] + cost_dx0 = 12. * x[:, 0] cost_dx1 = 0.2 * x[:, 1] cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2]) cost_dx3 = 0.2 * x[:, 3] - cost_dx = np.stack((cost_dx0, cost_dx1,\ + cost_dx = np.stack((cost_dx0, cost_dx1, cost_dx2, cost_dx3), axis=1) return cost_dx - - cost_dx0 = 12. * x[0] + + cost_dx0 = 12. * x[0] cost_dx1 = 0.2 * x[1] cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2]) cost_dx3 = 0.2 * x[3] cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]]) - + return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT @staticmethod @@ -206,21 +207,21 @@ class CartPoleConfigModule(): hessian[:, 0, 0] = 12. hessian[:, 1, 1] = 0.2 hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \ - * (-np.sin(x[:, 2])) \ - + 24. * (1. + np.cos(x[:, 2])) \ - * -np.cos(x[:, 2]) + * (-np.sin(x[:, 2])) \ + + 24. * (1. + np.cos(x[:, 2])) \ + * -np.cos(x[:, 2]) hessian[:, 3, 3] = 0.2 return hessian - + state_size = len(x) hessian = np.eye(state_size) hessian[0, 0] = 12. hessian[1, 1] = 0.2 hessian[2, 2] = 24. * -np.sin(x[2]) \ - * (-np.sin(x[2])) \ - + 24. * (1. + np.cos(x[2])) \ - * -np.cos(x[2]) + * (-np.sin(x[2])) \ + + 24. * (1. + np.cos(x[2])) \ + * -np.cos(x[2]) hessian[3, 3] = 0.2 return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT @@ -239,7 +240,7 @@ class CartPoleConfigModule(): (pred_len, _) = u.shape return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) - + @staticmethod def hessian_cost_fn_with_input_state(x, u): """ hessian costs with respect to the state and input @@ -254,4 +255,4 @@ class CartPoleConfigModule(): (_, state_size) = x.shape (pred_len, input_size) = u.shape - return np.zeros((pred_len, input_size, state_size)) \ No newline at end of file + return np.zeros((pred_len, input_size, state_size)) diff --git a/PythonLinearNonlinearControl/configs/first_order_lag.py b/PythonLinearNonlinearControl/configs/first_order_lag.py index 2c9d268..fef097d 100644 --- a/PythonLinearNonlinearControl/configs/first_order_lag.py +++ b/PythonLinearNonlinearControl/configs/first_order_lag.py @@ -1,5 +1,6 @@ import numpy as np + class FirstOrderLagConfigModule(): # parameters ENV_NAME = "FirstOrderLag-v0" @@ -34,43 +35,43 @@ class FirstOrderLagConfigModule(): "num_elites": 50, "max_iters": 15, "alpha": 0.3, - "init_var":1., - "threshold":0.001 + "init_var": 1., + "threshold": 0.001 }, - "MPPI":{ - "beta" : 0.6, + "MPPI": { + "beta": 0.6, "popsize": 5000, "kappa": 0.9, "noise_sigma": 0.5, }, - "MPPIWilliams":{ + "MPPIWilliams": { "popsize": 5000, "lambda": 1., "noise_sigma": 0.9, }, - "MPC":{ - }, - "iLQR":{ + "MPC": { + }, + "iLQR": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "DDP":{ + }, + "DDP": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "NMPC-CGMRES":{ - }, - "NMPC-Newton":{ - }, - } + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } @staticmethod def input_cost_fn(u): @@ -83,7 +84,7 @@ class FirstOrderLagConfigModule(): shape(pop_size, pred_len, input_size) """ return (u**2) * np.diag(FirstOrderLagConfigModule.R) - + @staticmethod def state_cost_fn(x, g_x): """ state cost function @@ -111,8 +112,8 @@ class FirstOrderLagConfigModule(): shape(pop_size, pred_len) """ return ((terminal_x - terminal_g_x)**2) \ - * np.diag(FirstOrderLagConfigModule.Sf) - + * np.diag(FirstOrderLagConfigModule.Sf) + @staticmethod def gradient_cost_fn_with_state(x, g_x, terminal=False): """ gradient of costs with respect to the state @@ -120,16 +121,16 @@ class FirstOrderLagConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) or shape(1, state_size) """ if not terminal: return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q) - - return (2. * (x - g_x) \ - * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] + + return (2. * (x - g_x) + * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] @staticmethod def gradient_cost_fn_with_input(x, u): @@ -138,7 +139,7 @@ class FirstOrderLagConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) """ @@ -151,7 +152,7 @@ class FirstOrderLagConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_xx (numpy.ndarray): gradient of cost, shape(pred_len, state_size, state_size) or @@ -159,9 +160,9 @@ class FirstOrderLagConfigModule(): """ if not terminal: (pred_len, _) = x.shape - return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) - - return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) + return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) + + return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) @staticmethod def hessian_cost_fn_with_input(x, u): @@ -170,7 +171,7 @@ class FirstOrderLagConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_uu (numpy.ndarray): gradient of cost, shape(pred_len, input_size, input_size) @@ -178,7 +179,7 @@ class FirstOrderLagConfigModule(): (pred_len, _) = u.shape return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) - + @staticmethod def hessian_cost_fn_with_input_state(x, u): """ hessian costs with respect to the state and input @@ -186,7 +187,7 @@ class FirstOrderLagConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_ux (numpy.ndarray): gradient of cost , shape(pred_len, input_size, state_size) diff --git a/PythonLinearNonlinearControl/configs/make_configs.py b/PythonLinearNonlinearControl/configs/make_configs.py index a48aedc..4bb0873 100644 --- a/PythonLinearNonlinearControl/configs/make_configs.py +++ b/PythonLinearNonlinearControl/configs/make_configs.py @@ -2,6 +2,7 @@ from .first_order_lag import FirstOrderLagConfigModule from .two_wheeled import TwoWheeledConfigModule from .cartpole import CartPoleConfigModule + def make_config(args): """ Returns: @@ -12,4 +13,4 @@ def make_config(args): elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": return TwoWheeledConfigModule() elif args.env == "CartPole": - return CartPoleConfigModule() \ No newline at end of file + return CartPoleConfigModule() diff --git a/PythonLinearNonlinearControl/configs/two_wheeled.py b/PythonLinearNonlinearControl/configs/two_wheeled.py index 93de72a..56de209 100644 --- a/PythonLinearNonlinearControl/configs/two_wheeled.py +++ b/PythonLinearNonlinearControl/configs/two_wheeled.py @@ -4,6 +4,7 @@ from matplotlib.axes import Axes from ..plotters.plot_objs import square_with_angle, square from ..common.utils import fit_angle_in_range + class TwoWheeledConfigModule(): # parameters ENV_NAME = "TwoWheeled-v0" @@ -25,7 +26,7 @@ class TwoWheeledConfigModule(): R = np.diag([0.01, 0.01]) Q = np.diag([2.5, 2.5, 0.01]) Sf = np.diag([2.5, 2.5, 0.01]) - + # bounds INPUT_LOWER_BOUND = np.array([-1.5, -3.14]) INPUT_UPPER_BOUND = np.array([1.5, 3.14]) @@ -46,41 +47,41 @@ class TwoWheeledConfigModule(): "num_elites": 50, "max_iters": 15, "alpha": 0.3, - "init_var":1., - "threshold":0.001 + "init_var": 1., + "threshold": 0.001 }, - "MPPI":{ - "beta" : 0.6, + "MPPI": { + "beta": 0.6, "popsize": 5000, "kappa": 0.9, "noise_sigma": 0.5, }, - "MPPIWilliams":{ + "MPPIWilliams": { "popsize": 5000, "lambda": 1, "noise_sigma": 1., }, - "iLQR":{ + "iLQR": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "DDP":{ + }, + "DDP": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "NMPC-CGMRES":{ - }, - "NMPC-Newton":{ - }, - } + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } @staticmethod def input_cost_fn(u): @@ -93,7 +94,7 @@ class TwoWheeledConfigModule(): shape(pop_size, pred_len, input_size) """ return (u**2) * np.diag(TwoWheeledConfigModule.R) - + @staticmethod def fit_diff_in_range(diff_x): """ fit difference state in range(angle) @@ -107,7 +108,7 @@ class TwoWheeledConfigModule(): fitted_diff_x (numpy.ndarray): same shape as diff_x """ if len(diff_x.shape) == 3: - diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1]) + diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1]) elif len(diff_x.shape) == 2: diff_x[:, -1] = fit_angle_in_range(diff_x[:, -1]) elif len(diff_x.shape) == 1: @@ -142,11 +143,11 @@ class TwoWheeledConfigModule(): cost (numpy.ndarray): cost of state, shape(pred_len, ) or shape(pop_size, pred_len) """ - terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \ - - terminal_g_x) - + terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x + - terminal_g_x) + return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf) - + @staticmethod def gradient_cost_fn_with_state(x, g_x, terminal=False): """ gradient of costs with respect to the state @@ -154,18 +155,18 @@ class TwoWheeledConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) or shape(1, state_size) """ diff = TwoWheeledConfigModule.fit_diff_in_range(x - g_x) - + if not terminal: return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q) - - return (2. * (diff) \ - * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] + + return (2. * (diff) + * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] @staticmethod def gradient_cost_fn_with_input(x, u): @@ -174,7 +175,7 @@ class TwoWheeledConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) """ @@ -187,7 +188,7 @@ class TwoWheeledConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_xx (numpy.ndarray): gradient of cost, shape(pred_len, state_size, state_size) or @@ -195,9 +196,9 @@ class TwoWheeledConfigModule(): """ if not terminal: (pred_len, _) = x.shape - return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) - - return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) + return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) + + return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) @staticmethod def hessian_cost_fn_with_input(x, u): @@ -206,7 +207,7 @@ class TwoWheeledConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_uu (numpy.ndarray): gradient of cost, shape(pred_len, input_size, input_size) @@ -214,7 +215,7 @@ class TwoWheeledConfigModule(): (pred_len, _) = u.shape return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1)) - + @staticmethod def hessian_cost_fn_with_input_state(x, u): """ hessian costs with respect to the state and input @@ -222,7 +223,7 @@ class TwoWheeledConfigModule(): Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_ux (numpy.ndarray): gradient of cost , shape(pred_len, input_size, state_size) @@ -230,4 +231,4 @@ class TwoWheeledConfigModule(): (_, state_size) = x.shape (pred_len, input_size) = u.shape - return np.zeros((pred_len, input_size, state_size)) \ No newline at end of file + return np.zeros((pred_len, input_size, state_size)) diff --git a/PythonLinearNonlinearControl/controllers/cem.py b/PythonLinearNonlinearControl/controllers/cem.py index 238ff39..176607e 100644 --- a/PythonLinearNonlinearControl/controllers/cem.py +++ b/PythonLinearNonlinearControl/controllers/cem.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class CEM(Controller): """ Cross Entropy Method for linear and nonlinear method @@ -19,6 +20,7 @@ class CEM(Controller): using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765). """ + def __init__(self, config, model): super(CEM, self).__init__(config, model) @@ -38,7 +40,7 @@ class CEM(Controller): self.init_var = config.opt_config["CEM"]["init_var"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.pred_len) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -50,18 +52,18 @@ class CEM(Controller): self.input_cost_fn = config.input_cost_fn # init mean - self.init_mean = np.tile((config.INPUT_UPPER_BOUND \ + self.init_mean = np.tile((config.INPUT_UPPER_BOUND + config.INPUT_LOWER_BOUND) / 2., self.pred_len) self.prev_sol = self.init_mean.copy() # init variance var = np.ones_like(config.INPUT_UPPER_BOUND) \ - * config.opt_config["CEM"]["init_var"] + * config.opt_config["CEM"]["init_var"] self.init_var = np.tile(var, self.pred_len) # save self.history_u = [] - + def clear_sol(self): """ clear prev sol """ @@ -77,21 +79,21 @@ class CEM(Controller): Returns: opt_input (numpy.ndarray): optimal input, shape(input_size, ) """ - # initialize + # initialize opt_count = 0 # get configuration - mean = self.prev_sol.flatten().copy() + mean = self.prev_sol.flatten().copy() var = self.init_var.flatten().copy() - # make distribution + # make distribution X = stats.truncnorm(-1, 1, - loc=np.zeros_like(mean),\ + loc=np.zeros_like(mean), scale=np.ones_like(mean)) - + while (opt_count < self.max_iters) and np.max(var) > self.epsilon: # constrained - lb_dist = mean - self.input_lower_bounds + lb_dist = mean - self.input_lower_bounds ub_dist = self.input_upper_bounds - mean constrained_var = np.minimum(np.minimum(np.square(lb_dist), np.square(ub_dist)), @@ -99,15 +101,15 @@ class CEM(Controller): # sample samples = X.rvs(size=[self.pop_size, self.opt_dim]) \ - * np.sqrt(constrained_var) \ - + mean + * np.sqrt(constrained_var) \ + + mean # calc cost # samples.shape = (pop_size, opt_dim) costs = self.calc_cost(curr_x, samples.reshape(self.pop_size, - self.pred_len, - self.input_size), + self.pred_len, + self.input_size), g_xs) # sort cost @@ -124,7 +126,7 @@ class CEM(Controller): logger.debug("Var = {}".format(np.max(var))) logger.debug("Costs = {}".format(np.mean(costs))) opt_count += 1 - + sol = mean.copy() self.prev_sol = np.concatenate((mean[self.input_size:], np.zeros(self.input_size))) diff --git a/PythonLinearNonlinearControl/controllers/controller.py b/PythonLinearNonlinearControl/controllers/controller.py index 50abb01..35c2297 100644 --- a/PythonLinearNonlinearControl/controllers/controller.py +++ b/PythonLinearNonlinearControl/controllers/controller.py @@ -2,9 +2,11 @@ import numpy as np from ..envs.cost import calc_cost + class Controller(): """ Controller class """ + def __init__(self, config, model): """ """ @@ -15,7 +17,7 @@ class Controller(): self.state_cost_fn = config.state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn self.input_cost_fn = config.input_cost_fn - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs Args: @@ -26,7 +28,7 @@ class Controller(): """ raise NotImplementedError("Implement the algorithm to \ get optimal input") - + def calc_cost(self, curr_x, samples, g_xs): """ calculate the cost of input samples @@ -46,22 +48,24 @@ class Controller(): # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) pred_xs = self.model.predict_traj(curr_x, samples) - + # get particle cost costs = calc_cost(pred_xs, samples, g_xs, - self.state_cost_fn, self.input_cost_fn, \ + self.state_cost_fn, self.input_cost_fn, self.terminal_state_cost_fn) - + return costs @staticmethod def gradient_hamiltonian_x(x, u, lam): """ gradient of hamitonian with respect to the state, """ - raise NotImplementedError("Implement gradient of hamitonian with respect to the state") + raise NotImplementedError( + "Implement gradient of hamitonian with respect to the state") @staticmethod def gradient_hamiltonian_u(x, u, lam): """ gradient of hamitonian with respect to the input """ - raise NotImplementedError("Implement gradient of hamitonian with respect to the input") \ No newline at end of file + raise NotImplementedError( + "Implement gradient of hamitonian with respect to the input") diff --git a/PythonLinearNonlinearControl/controllers/ddp.py b/PythonLinearNonlinearControl/controllers/ddp.py index 4abb229..2ef8099 100644 --- a/PythonLinearNonlinearControl/controllers/ddp.py +++ b/PythonLinearNonlinearControl/controllers/ddp.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class DDP(Controller): """ Differential Dynamic Programming @@ -18,11 +19,12 @@ class DDP(Controller): https://github.com/studywolf/control, and https://github.com/anassinator/ilqr """ + def __init__(self, config, model): """ """ super(DDP, self).__init__(config, model) - + # model self.model = model @@ -56,7 +58,7 @@ class DDP(Controller): self.Q = config.Q self.R = config.R self.Sf = config.Sf - + # initialize self.prev_sol = np.zeros((self.pred_len, self.input_size)) @@ -65,7 +67,7 @@ class DDP(Controller): """ logger.debug("Clear Sol") self.prev_sol = np.zeros((self.pred_len, self.input_size)) - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs @@ -89,26 +91,26 @@ class DDP(Controller): while opt_count < self.max_iter: accepted_sol = False - # forward + # forward if update_sol == True: pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\ - l_x, l_xx, l_u, l_uu, l_ux = \ + l_x, l_xx, l_u, l_uu, l_ux = \ self.forward(curr_x, g_xs, sol) update_sol = False - + try: # backward - k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \ + k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux) # line search for alpha in alphas: new_pred_xs, new_sol = \ self.calc_input(k, K, pred_xs, sol, alpha) - + new_cost = calc_cost(new_pred_xs[np.newaxis, :, :], new_sol[np.newaxis, :, :], - g_xs[np.newaxis, :, :], + g_xs[np.newaxis, :, :], self.state_cost_fn, self.input_cost_fn, self.terminal_state_cost_fn) @@ -131,15 +133,15 @@ class DDP(Controller): # accept the solution accepted_sol = True break - + except np.linalg.LinAlgError as e: logger.debug("Non ans : {}".format(e)) - + if not accepted_sol: # increase regularization term. self.delta = max(1.0, self.delta) * self.init_delta self.mu = max(self.mu_min, self.mu * self.delta) - logger.debug("Update regularization term to {}"\ + logger.debug("Update regularization term to {}" .format(self.mu)) if self.mu >= self.mu_max: logger.debug("Reach Max regularization term") @@ -156,7 +158,7 @@ class DDP(Controller): self.prev_sol[-1] = sol[-1] # last use the terminal input return sol[0] - + def calc_input(self, k, K, pred_xs, sol, alpha): """ calc input trajectory by using k and K @@ -183,8 +185,8 @@ class DDP(Controller): for t in range(pred_len): new_sol[t] = sol[t] \ - + alpha * k[t] \ - + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + + alpha * k[t] \ + + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_sol[t]) @@ -227,7 +229,7 @@ class DDP(Controller): g_xs) # calc gradinet in batch - f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) + f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt) # calc hessian in batch f_xx = self.model.calc_f_xx(pred_xs[:-1], sol, self.dt) @@ -237,13 +239,13 @@ class DDP(Controller): # gradint of costs l_x, l_xx, l_u, l_uu, l_ux = \ self._calc_gradient_hessian_cost(pred_xs, g_xs, sol) - + return pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu, \ l_x, l_xx, l_u, l_uu, l_ux def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol): """ calculate gradient and hessian of model and cost fn - + Args: pred_xs (numpy.ndarray): predict traj, shape(pred_len+1, state_size) @@ -268,7 +270,7 @@ class DDP(Controller): self.gradient_cost_fn_with_state(pred_xs[-1], g_x[-1], terminal=True) - l_x = np.concatenate((l_x, terminal_l_x), axis=0) + l_x = np.concatenate((l_x, terminal_l_x), axis=0) # l_u.shape = (pred_len, input_size) l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) @@ -281,7 +283,7 @@ class DDP(Controller): g_x[-1], terminal=True) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) - + # l_uu.shape = (pred_len, input_size, input_size) l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) @@ -321,7 +323,7 @@ class DDP(Controller): # get size (_, state_size, _) = f_x.shape - # initialzie + # initialzie V_x = l_x[-1] V_xx = l_xx[-1] k = np.zeros((self.pred_len, self.input_size)) @@ -388,7 +390,7 @@ class DDP(Controller): """ # get size state_size = len(l_x) - + Q_x = l_x + np.dot(f_x.T, V_x) Q_u = l_u + np.dot(f_u.T, V_x) Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x) @@ -402,4 +404,4 @@ class DDP(Controller): Q_ux += np.tensordot(V_x, f_ux, axes=1) Q_uu += np.tensordot(V_x, f_uu, axes=1) - return Q_x, Q_u, Q_xx, Q_ux, Q_uu \ No newline at end of file + return Q_x, Q_u, Q_xx, Q_ux, Q_uu diff --git a/PythonLinearNonlinearControl/controllers/ilqr.py b/PythonLinearNonlinearControl/controllers/ilqr.py index 9a80dfa..7bb81cd 100644 --- a/PythonLinearNonlinearControl/controllers/ilqr.py +++ b/PythonLinearNonlinearControl/controllers/ilqr.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class iLQR(Controller): """ iterative Liner Quadratique Regulator @@ -16,11 +17,12 @@ class iLQR(Controller): Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf, https://github.com/studywolf/control """ + def __init__(self, config, model): """ """ super(iLQR, self).__init__(config, model) - + # model self.model = model @@ -58,7 +60,7 @@ class iLQR(Controller): """ logger.debug("Clear Sol") self.prev_sol = np.zeros((self.pred_len, self.input_size)) - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs @@ -82,12 +84,12 @@ class iLQR(Controller): while opt_count < self.max_iter: accepted_sol = False - # forward + # forward if update_sol == True: pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux = \ self.forward(curr_x, g_xs, sol) update_sol = False - + try: # backward k, K = self.backward(f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux) @@ -96,10 +98,10 @@ class iLQR(Controller): for alpha in alphas: new_pred_xs, new_sol = \ self.calc_input(k, K, pred_xs, sol, alpha) - + new_cost = calc_cost(new_pred_xs[np.newaxis, :, :], new_sol[np.newaxis, :, :], - g_xs[np.newaxis, :, :], + g_xs[np.newaxis, :, :], self.state_cost_fn, self.input_cost_fn, self.terminal_state_cost_fn) @@ -122,15 +124,15 @@ class iLQR(Controller): # accept the solution accepted_sol = True break - + except np.linalg.LinAlgError as e: logger.debug("Non ans : {}".format(e)) - + if not accepted_sol: # increase regularization term. self.delta = max(1.0, self.delta) * self.init_delta self.mu = max(self.mu_min, self.mu * self.delta) - logger.debug("Update regularization term to {}"\ + logger.debug("Update regularization term to {}" .format(self.mu)) if self.mu >= self.mu_max: logger.debug("Reach Max regularization term") @@ -147,7 +149,7 @@ class iLQR(Controller): self.prev_sol[-1] = sol[-1] # last use the terminal input return sol[0] - + def calc_input(self, k, K, pred_xs, sol, alpha): """ calc input trajectory by using k and K @@ -174,8 +176,8 @@ class iLQR(Controller): for t in range(pred_len): new_sol[t] = sol[t] \ - + alpha * k[t] \ - + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + + alpha * k[t] \ + + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_sol[t]) @@ -212,18 +214,18 @@ class iLQR(Controller): g_xs) # calc gradinet in batch - f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) + f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt) # gradint of costs l_x, l_xx, l_u, l_uu, l_ux = \ self._calc_gradient_hessian_cost(pred_xs, g_xs, sol) - + return pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol): """ calculate gradient and hessian of model and cost fn - + Args: pred_xs (numpy.ndarray): predict traj, shape(pred_len+1, state_size) @@ -248,7 +250,7 @@ class iLQR(Controller): self.gradient_cost_fn_with_state(pred_xs[-1], g_x[-1], terminal=True) - l_x = np.concatenate((l_x, terminal_l_x), axis=0) + l_x = np.concatenate((l_x, terminal_l_x), axis=0) # l_u.shape = (pred_len, input_size) l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) @@ -261,7 +263,7 @@ class iLQR(Controller): g_x[-1], terminal=True) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) - + # l_uu.shape = (pred_len, input_size, input_size) l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) @@ -287,7 +289,7 @@ class iLQR(Controller): shape(pred_len, input_size, input_size) l_ux (numpy.ndarray): hessian of cost with respect to state and input, shape(pred_len, input_size, state_size) - + Returns: k (numpy.ndarray): gain, shape(pred_len, input_size) K (numpy.ndarray): gain, shape(pred_len, input_size, state_size) @@ -295,7 +297,7 @@ class iLQR(Controller): # get size (_, state_size, _) = f_x.shape - # initialzie + # initialzie V_x = l_x[-1] V_xx = l_xx[-1] k = np.zeros((self.pred_len, self.input_size)) @@ -352,7 +354,7 @@ class iLQR(Controller): """ # get size state_size = len(l_x) - + Q_x = l_x + np.dot(f_x.T, V_x) Q_u = l_u + np.dot(f_u.T, V_x) Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x) @@ -361,4 +363,4 @@ class iLQR(Controller): Q_ux = l_ux + np.dot(np.dot(f_u.T, (V_xx + reg)), f_x) Q_uu = l_uu + np.dot(np.dot(f_u.T, (V_xx + reg)), f_u) - return Q_x, Q_u, Q_xx, Q_ux, Q_uu \ No newline at end of file + return Q_x, Q_u, Q_xx, Q_ux, Q_uu diff --git a/PythonLinearNonlinearControl/controllers/make_controllers.py b/PythonLinearNonlinearControl/controllers/make_controllers.py index d5ec8df..1bc6138 100644 --- a/PythonLinearNonlinearControl/controllers/make_controllers.py +++ b/PythonLinearNonlinearControl/controllers/make_controllers.py @@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams from .ilqr import iLQR from .ddp import DDP + def make_controller(args, config, model): if args.controller_type == "MPC": @@ -22,5 +23,5 @@ def make_controller(args, config, model): return iLQR(config, model) elif args.controller_type == "DDP": return DDP(config, model) - - raise ValueError("No controller: {}".format(args.controller_type)) \ No newline at end of file + + raise ValueError("No controller: {}".format(args.controller_type)) diff --git a/PythonLinearNonlinearControl/controllers/mpc.py b/PythonLinearNonlinearControl/controllers/mpc.py index ddf5840..c1e0db3 100644 --- a/PythonLinearNonlinearControl/controllers/mpc.py +++ b/PythonLinearNonlinearControl/controllers/mpc.py @@ -9,6 +9,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class LinearMPC(Controller): """ Model Predictive Controller for linear model @@ -21,6 +22,7 @@ class LinearMPC(Controller): Ref: Maciejowski, J. M. (2002). Predictive control: with constraints. """ + def __init__(self, config, model): """ Args: @@ -55,7 +57,7 @@ class LinearMPC(Controller): self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND self.input_lower_bound = config.INPUT_LOWER_BOUND self.input_upper_bound = config.INPUT_UPPER_BOUND - + # setup controllers self.W = None self.omega = None @@ -66,7 +68,7 @@ class LinearMPC(Controller): # history self.history_u = [np.zeros(self.input_size)] - + def setup(self): """ setup Model Predictive Control as a quadratic programming @@ -77,11 +79,11 @@ class LinearMPC(Controller): for _ in range(self.pred_len - 1): temp_mat = np.matmul(A_factorials[-1], self.A) self.phi_mat = np.vstack((self.phi_mat, temp_mat)) - A_factorials.append(temp_mat) # after we use this factorials - + A_factorials.append(temp_mat) # after we use this factorials + self.gamma_mat = self.B.copy() gammma_mat_temp = self.B.copy() - + for i in range(self.pred_len - 1): temp_1_mat = np.matmul(A_factorials[i], self.B) gammma_mat_temp = temp_1_mat + gammma_mat_temp @@ -91,8 +93,8 @@ class LinearMPC(Controller): for i in range(self.pred_len - 1): temp_mat = np.zeros_like(self.gamma_mat) - temp_mat[int((i + 1)*self.state_size): , :] =\ - self.gamma_mat[:-int((i + 1)*self.state_size) , :] + temp_mat[int((i + 1)*self.state_size):, :] =\ + self.gamma_mat[:-int((i + 1)*self.state_size), :] self.theta_mat = np.hstack((self.theta_mat, temp_mat)) @@ -114,12 +116,12 @@ class LinearMPC(Controller): for i in range(self.pred_len - 1): for j in range(self.input_size): - temp_F[j * 2: (j + 1) * 2,\ + temp_F[j * 2: (j + 1) * 2, ((i+1) * self.input_size) + j] = np.array([1., -1.]) self.F = np.vstack((self.F, temp_F)) self.F1 = self.F[:, :self.input_size] - + temp_f = [] for i in range(self.input_size): temp_f.append(-1 * self.input_upper_bound[i]) @@ -168,7 +170,7 @@ class LinearMPC(Controller): H = H * 0.5 # constraints - A = [] + A = [] b = [] if self.W is not None: @@ -187,14 +189,14 @@ class LinearMPC(Controller): # using cvxopt def optimized_func(dt_us): - return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \ + return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) - np.dot(G.T, dt_us.reshape(-1, 1)))[0] # constraint lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons cons = LinearConstraint(A, lb, ub) # solve - opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\ + opt_sol = minimize(optimized_func, self.prev_sol.flatten(), constraints=[cons]) opt_dt_us = opt_sol.x @@ -213,21 +215,21 @@ class LinearMPC(Controller): """ # to dt form - opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\ + opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len, self.input_size), axis=0) self.prev_sol = opt_dt_u_seq.copy() - + opt_u_seq = opt_dt_u_seq + self.history_u[-1] - + # save self.history_u.append(opt_u_seq[0]) # check costs costs = self.calc_cost(curr_x, opt_u_seq.reshape(1, - self.pred_len, - self.input_size), + self.pred_len, + self.input_size), g_xs) logger.debug("Cost = {}".format(costs)) @@ -235,4 +237,4 @@ class LinearMPC(Controller): return opt_u_seq[0] def __str__(self): - return "LinearMPC" \ No newline at end of file + return "LinearMPC" diff --git a/PythonLinearNonlinearControl/controllers/mppi.py b/PythonLinearNonlinearControl/controllers/mppi.py index fc8d887..3b0be45 100644 --- a/PythonLinearNonlinearControl/controllers/mppi.py +++ b/PythonLinearNonlinearControl/controllers/mppi.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class MPPI(Controller): """ Model Predictive Path Integral for linear and nonlinear method @@ -18,6 +19,7 @@ class MPPI(Controller): Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652. """ + def __init__(self, config, model): super(MPPI, self).__init__(config, model) @@ -35,7 +37,7 @@ class MPPI(Controller): self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, (self.pred_len, 1)) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -47,14 +49,14 @@ class MPPI(Controller): self.input_cost_fn = config.input_cost_fn # init mean - self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ + self.prev_sol = np.tile((config.INPUT_UPPER_BOUND + config.INPUT_LOWER_BOUND) / 2., self.pred_len) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) # save self.history_u = [np.zeros(self.input_size)] - + def clear_sol(self): """ clear prev sol """ @@ -74,24 +76,24 @@ class MPPI(Controller): """ # get noised inputs noise = np.random.normal( - loc=0, scale=1.0, size=(self.pop_size, self.pred_len, - self.input_size)) * self.noise_sigma + loc=0, scale=1.0, size=(self.pop_size, self.pred_len, + self.input_size)) * self.noise_sigma noised_inputs = noise.copy() - + for t in range(self.pred_len): if t > 0: noised_inputs[:, t, :] = self.beta \ - * (self.prev_sol[t, :] \ - + noise[:, t, :]) \ - + (1 - self.beta) \ - * noised_inputs[:, t-1, :] + * (self.prev_sol[t, :] + + noise[:, t, :]) \ + + (1 - self.beta) \ + * noised_inputs[:, t-1, :] else: noised_inputs[:, t, :] = self.beta \ - * (self.prev_sol[t, :] \ - + noise[:, t, :]) \ - + (1 - self.beta) \ - * self.history_u[-1] - + * (self.prev_sol[t, :] + + noise[:, t, :]) \ + + (1 - self.beta) \ + * self.history_u[-1] + # clip actions noised_inputs = np.clip( noised_inputs, self.input_lower_bounds, self.input_upper_bounds) @@ -108,7 +110,7 @@ class MPPI(Controller): # weight actions weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \ - * noised_inputs + * noised_inputs sol = np.sum(weighted_inputs, 0) / denom # update @@ -121,4 +123,4 @@ class MPPI(Controller): return sol[0] def __str__(self): - return "MPPI" \ No newline at end of file + return "MPPI" diff --git a/PythonLinearNonlinearControl/controllers/mppi_williams.py b/PythonLinearNonlinearControl/controllers/mppi_williams.py index 1fd0102..3ffaaa1 100644 --- a/PythonLinearNonlinearControl/controllers/mppi_williams.py +++ b/PythonLinearNonlinearControl/controllers/mppi_williams.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class MPPIWilliams(Controller): """ Model Predictive Path Integral for linear and nonlinear method @@ -19,6 +20,7 @@ class MPPIWilliams(Controller): 2017 IEEE International Conference on Robotics and Automation (ICRA), Singapore, 2017, pp. 1714-1721. """ + def __init__(self, config, model): super(MPPIWilliams, self).__init__(config, model) @@ -35,7 +37,7 @@ class MPPIWilliams(Controller): self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, (self.pred_len, 1)) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -47,14 +49,14 @@ class MPPIWilliams(Controller): self.input_cost_fn = config.input_cost_fn # init mean - self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ + self.prev_sol = np.tile((config.INPUT_UPPER_BOUND + config.INPUT_LOWER_BOUND) / 2., self.pred_len) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) # save self.history_u = [np.zeros(self.input_size)] - + def clear_sol(self): """ clear prev sol """ @@ -62,7 +64,7 @@ class MPPIWilliams(Controller): self.prev_sol = \ (self.input_upper_bounds + self.input_lower_bounds) / 2. self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) - + def calc_cost(self, curr_x, samples, g_xs): """ calculate the cost of input samples by using MPPI's eq @@ -82,12 +84,12 @@ class MPPIWilliams(Controller): # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) pred_xs = self.model.predict_traj(curr_x, samples) - + # get particle cost costs = calc_cost(pred_xs, samples, g_xs, - self.state_cost_fn, None, \ + self.state_cost_fn, None, self.terminal_state_cost_fn) - + return costs def obtain_sol(self, curr_x, g_xs): @@ -101,9 +103,9 @@ class MPPIWilliams(Controller): """ # get noised inputs noise = np.random.normal( - loc=0, scale=1.0, size=(self.pop_size, self.pred_len, - self.input_size)) * self.noise_sigma - + loc=0, scale=1.0, size=(self.pop_size, self.pred_len, + self.input_size)) * self.noise_sigma + noised_inputs = self.prev_sol + noise # clip actions @@ -120,7 +122,7 @@ class MPPIWilliams(Controller): # mppi update beta = np.min(costs) eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \ - + 1e-10 + + 1e-10 # weight # eta.shape = (pred_len, input_size) @@ -128,7 +130,7 @@ class MPPIWilliams(Controller): # update inputs sol = self.prev_sol \ - + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) + + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) # update self.prev_sol[:-1] = sol[1:] @@ -140,4 +142,4 @@ class MPPIWilliams(Controller): return sol[0] def __str__(self): - return "MPPIWilliams" \ No newline at end of file + return "MPPIWilliams" diff --git a/PythonLinearNonlinearControl/controllers/random.py b/PythonLinearNonlinearControl/controllers/random.py index 53a7622..e82fbdf 100644 --- a/PythonLinearNonlinearControl/controllers/random.py +++ b/PythonLinearNonlinearControl/controllers/random.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class RandomShooting(Controller): """ Random Shooting Method for linear and nonlinear method @@ -19,6 +20,7 @@ class RandomShooting(Controller): using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765). """ + def __init__(self, config, model): super(RandomShooting, self).__init__(config, model) @@ -33,7 +35,7 @@ class RandomShooting(Controller): self.pop_size = config.opt_config["Random"]["popsize"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.pred_len) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -46,7 +48,7 @@ class RandomShooting(Controller): # save self.history_u = [] - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs @@ -65,8 +67,8 @@ class RandomShooting(Controller): # calc cost costs = self.calc_cost(curr_x, samples.reshape(self.pop_size, - self.pred_len, - self.input_size), + self.pred_len, + self.input_size), g_xs) # solution sol = samples[np.argmin(costs)] @@ -74,4 +76,4 @@ class RandomShooting(Controller): return sol.reshape(self.pred_len, self.input_size).copy()[0] def __str__(self): - return "RandomShooting" \ No newline at end of file + return "RandomShooting" diff --git a/PythonLinearNonlinearControl/envs/__init__.py b/PythonLinearNonlinearControl/envs/__init__.py index 837b5d8..e649609 100644 --- a/PythonLinearNonlinearControl/envs/__init__.py +++ b/PythonLinearNonlinearControl/envs/__init__.py @@ -5,4 +5,4 @@ from PythonLinearNonlinearControl.envs.first_order_lag \ from PythonLinearNonlinearControl.envs.two_wheeled \ import TwoWheeledConstEnv # NOQA from PythonLinearNonlinearControl.envs.two_wheeled \ - import TwoWheeledTrackEnv # NOQA \ No newline at end of file + import TwoWheeledTrackEnv # NOQA diff --git a/PythonLinearNonlinearControl/envs/cartpole.py b/PythonLinearNonlinearControl/envs/cartpole.py index fd70d47..f64b0df 100644 --- a/PythonLinearNonlinearControl/envs/cartpole.py +++ b/PythonLinearNonlinearControl/envs/cartpole.py @@ -4,6 +4,7 @@ from matplotlib.axes import Axes from .env import Env from ..plotters.plot_objs import square + class CartPoleEnv(Env): """ Cartpole Environment @@ -13,13 +14,14 @@ class CartPoleEnv(Env): 6-832-underactuated-robotics-spring-2009/readings/ MIT6_832s09_read_ch03.pdf """ + def __init__(self): """ """ - self.config = {"state_size" : 4, - "input_size" : 1, - "dt" : 0.02, - "max_step" : 500, + self.config = {"state_size": 4, + "input_size": 1, + "dt": 0.02, + "max_step": 500, "input_lower_bound": [-3.], "input_upper_bound": [3.], "mp": 0.2, @@ -30,7 +32,7 @@ class CartPoleEnv(Env): } super(CartPoleEnv, self).__init__(self.config) - + def reset(self, init_x=None): """ reset state @@ -39,7 +41,7 @@ class CartPoleEnv(Env): info (dict): information """ self.step_count = 0 - + theta = np.random.randn(1) self.curr_x = np.array([0., 0., theta[0], 0.]) @@ -48,7 +50,7 @@ class CartPoleEnv(Env): # goal self.g_x = np.array([0., 0., -np.pi, 0.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -76,50 +78,50 @@ class CartPoleEnv(Env): # x d_x0 = self.curr_x[1] # v_x - d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \ - * (self.config["l"] * (self.curr_x[3]**2) \ - + self.config["g"] * np.cos(self.curr_x[2]))) \ - / (self.config["mc"] + self.config["mp"] \ - * (np.sin(self.curr_x[2])**2)) + d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) + * (self.config["l"] * (self.curr_x[3]**2) + + self.config["g"] * np.cos(self.curr_x[2]))) \ + / (self.config["mc"] + self.config["mp"] + * (np.sin(self.curr_x[2])**2)) # theta d_x2 = self.curr_x[3] - + # v_theta - d_x3 = (-u[0] * np.cos(self.curr_x[2]) \ - - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \ - * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \ - - (self.config["mc"] + self.config["mp"]) * self.config["g"] \ - * np.sin(self.curr_x[2])) \ - / (self.config["l"] * (self.config["mc"] + self.config["mp"] \ - * (np.sin(self.curr_x[2])**2))) - + d_x3 = (-u[0] * np.cos(self.curr_x[2]) + - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) + * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) + - (self.config["mc"] + self.config["mp"]) * self.config["g"] + * np.sin(self.curr_x[2])) \ + / (self.config["l"] * (self.config["mc"] + self.config["mp"] + * (np.sin(self.curr_x[2])**2))) + next_x = self.curr_x +\ - np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] + np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] # TODO: costs costs = 0. costs += 0.1 * np.sum(u**2) costs += 6. * self.curr_x[0]**2 \ - + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ - + 0.1 * self.curr_x[1]**2 \ - + 0.1 * self.curr_x[3]**2 + + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ + + 0.1 * self.curr_x[1]**2 \ + + 0.1 * self.curr_x[3]**2 # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten().copy() # update costs self.step_count += 1 return next_x.flatten(), costs, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ plot cartpole object function - + Args: to_plot (axis or imgs): plotted objects i (int): frame count @@ -131,29 +133,29 @@ class CartPoleEnv(Env): """ if isinstance(to_plot, Axes): imgs = {} # create new imgs - + imgs["cart"] = to_plot.plot([], [], c="k")[0] imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0] - imgs["center"] = to_plot.plot([], [], marker="o", c="k",\ + imgs["center"] = to_plot.plot([], [], marker="o", c="k", markersize=10)[0] # centerline - to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\ + to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50), c="k", linestyle="dashed") - + # set axis to_plot.set_xlim([-1., 1.]) to_plot.set_ylim([-0.55, 1.5]) - + return imgs # set imgs cart_x, cart_y, pole_x, pole_y = \ self._plot_cartpole(history_x[i]) - + to_plot["cart"].set_data(cart_x, cart_y) to_plot["pole"].set_data(pole_x, pole_y) to_plot["center"].set_data(history_x[i][0], 0.) - + def _plot_cartpole(self, curr_x): """ plot cartpole fucntions @@ -166,13 +168,13 @@ class CartPoleEnv(Env): pole_y (numpy.ndarray): y data of pole """ # cart - cart_x, cart_y = square(curr_x[0], 0.,\ + cart_x, cart_y = square(curr_x[0], 0., self.config["cart_size"], 0.) - - # pole - pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \ - * np.cos(curr_x[2]-np.pi/2)]) - pole_y = np.array([0., self.config["l"] \ - * np.sin(curr_x[2]-np.pi/2)]) - return cart_x, cart_y, pole_x, pole_y \ No newline at end of file + # pole + pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] + * np.cos(curr_x[2]-np.pi/2)]) + pole_y = np.array([0., self.config["l"] + * np.sin(curr_x[2]-np.pi/2)]) + + return cart_x, cart_y, pole_x, pole_y diff --git a/PythonLinearNonlinearControl/envs/cost.py b/PythonLinearNonlinearControl/envs/cost.py index 117d5f2..3247dec 100644 --- a/PythonLinearNonlinearControl/envs/cost.py +++ b/PythonLinearNonlinearControl/envs/cost.py @@ -4,6 +4,7 @@ import numpy as np logger = getLogger(__name__) + def calc_cost(pred_xs, input_sample, g_xs, state_cost_fn, input_cost_fn, terminal_state_cost_fn): """ calculate the cost @@ -24,20 +25,21 @@ def calc_cost(pred_xs, input_sample, g_xs, # state cost state_cost = 0. if state_cost_fn is not None: - state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) + state_pred_par_cost = state_cost_fn( + pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) # terminal cost terminal_state_cost = 0. if terminal_state_cost_fn is not None: terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], - g_xs[:, -1, :]) + g_xs[:, -1, :]) terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) - + # act cost act_cost = 0. if input_cost_fn is not None: act_pred_par_cost = input_cost_fn(input_sample) act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1) - return state_cost + terminal_state_cost + act_cost \ No newline at end of file + return state_cost + terminal_state_cost + act_cost diff --git a/PythonLinearNonlinearControl/envs/env.py b/PythonLinearNonlinearControl/envs/env.py index 4f63903..9f59141 100644 --- a/PythonLinearNonlinearControl/envs/env.py +++ b/PythonLinearNonlinearControl/envs/env.py @@ -1,13 +1,15 @@ import numpy as np + class Env(): """ Environments class Attributes: - + curr_x (numpy.ndarray): current state history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size) step_count (int): step count """ + def __init__(self, config): """ """ @@ -25,12 +27,12 @@ class Env(): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: self.curr_x = init_x - + # clear memory self.history_x = [] self.history_g_x = [] @@ -52,4 +54,4 @@ class Env(): def __repr__(self): """ """ - return self.config \ No newline at end of file + return self.config diff --git a/PythonLinearNonlinearControl/envs/first_order_lag.py b/PythonLinearNonlinearControl/envs/first_order_lag.py index 5597a07..f7f06bc 100644 --- a/PythonLinearNonlinearControl/envs/first_order_lag.py +++ b/PythonLinearNonlinearControl/envs/first_order_lag.py @@ -3,25 +3,27 @@ import scipy from scipy import integrate from .env import Env + class FirstOrderLagEnv(Env): """ First Order Lag System Env """ + def __init__(self, tau=0.63): """ """ - self.config = {"state_size" : 4,\ - "input_size" : 2,\ - "dt" : 0.05,\ - "max_step" : 500,\ - "input_lower_bound": [-0.5, -0.5],\ + self.config = {"state_size": 4, + "input_size": 2, + "dt": 0.05, + "max_step": 500, + "input_lower_bound": [-0.5, -0.5], "input_upper_bound": [0.5, 0.5], } super(FirstOrderLagEnv, self).__init__(self.config) # to get discrete system matrix - self.A, self.B = self._to_state_space(tau, dt=self.config["dt"]) - + self.A, self.B = self._to_state_space(tau, dt=self.config["dt"]) + @staticmethod def _to_state_space(tau, dt=0.05): """ @@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env): """ # continuous Ac = np.array([[-1./tau, 0., 0., 0.], - [0., -1./tau, 0., 0.], - [1., 0., 0., 0.], - [0., 1., 0., 0.]]) + [0., -1./tau, 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.]]) Bc = np.array([[1./tau, 0.], - [0., 1./tau], - [0., 0.], - [0., 0.]]) + [0., 1./tau], + [0., 0.], + [0., 0.]]) # to discrete system A = scipy.linalg.expm(dt*Ac) # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) - @@ -55,7 +57,7 @@ class FirstOrderLagEnv(Env): B[m, n] = sol[0] return A, B - + def reset(self, init_x=None): """ reset state Returns: @@ -63,7 +65,7 @@ class FirstOrderLagEnv(Env): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: @@ -71,7 +73,7 @@ class FirstOrderLagEnv(Env): # goal self.g_x = np.array([0., 0, -2., 3.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env): self.config["input_upper_bound"]) next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \ - + np.matmul(self.B, u[:, np.newaxis]) + + np.matmul(self.B, u[:, np.newaxis]) # cost cost = 0 @@ -104,17 +106,17 @@ class FirstOrderLagEnv(Env): # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), cost, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ """ - raise ValueError("FirstOrderLag does not have animation") \ No newline at end of file + raise ValueError("FirstOrderLag does not have animation") diff --git a/PythonLinearNonlinearControl/envs/make_envs.py b/PythonLinearNonlinearControl/envs/make_envs.py index 253f3ed..c35e11a 100644 --- a/PythonLinearNonlinearControl/envs/make_envs.py +++ b/PythonLinearNonlinearControl/envs/make_envs.py @@ -3,6 +3,7 @@ from .two_wheeled import TwoWheeledConstEnv from .two_wheeled import TwoWheeledTrackEnv from .cartpole import CartPoleEnv + def make_env(args): if args.env == "FirstOrderLag": @@ -13,5 +14,5 @@ def make_env(args): return TwoWheeledTrackEnv() elif args.env == "CartPole": return CartPoleEnv() - - raise NotImplementedError("There is not {} Env".format(args.env)) \ No newline at end of file + + raise NotImplementedError("There is not {} Env".format(args.env)) diff --git a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py index 9e66e6e..a17390b 100644 --- a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py +++ b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py @@ -4,22 +4,24 @@ from scipy import integrate from .env import Env from ..common.utils import update_state_with_Runge_Kutta + class NonlinearSampleEnv(Env): """ Nonlinear Sample Env """ + def __init__(self): """ """ - self.config = {"state_size" : 2,\ - "input_size" : 1,\ - "dt" : 0.01,\ - "max_step" : 250,\ - "input_lower_bound": [-0.5],\ + self.config = {"state_size": 2, + "input_size": 1, + "dt": 0.01, + "max_step": 250, + "input_lower_bound": [-0.5], "input_upper_bound": [0.5], } super(NonlinearSampleEnv, self).__init__(self.config) - + def reset(self, init_x=np.array([2., 0.])): """ reset state Returns: @@ -27,7 +29,7 @@ class NonlinearSampleEnv(Env): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: @@ -35,7 +37,7 @@ class NonlinearSampleEnv(Env): # goal self.g_x = np.array([0., 0.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -60,7 +62,7 @@ class NonlinearSampleEnv(Env): funtions = [self._func_x_1, self._func_x_2] next_x = update_state_with_Runge_Kutta(self._curr_x, u, - functions, self.config["dt"]) + functions, self.config["dt"]) # cost cost = 0 @@ -70,29 +72,29 @@ class NonlinearSampleEnv(Env): # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), cost, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def _func_x_1(self, x_1, x_2, u): """ """ x_dot = x_2 return x_dot - + def _func_x_2(self, x_1, x_2, u): """ """ x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u return x_dot - + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ """ - raise ValueError("NonlinearSampleEnv does not have animation") \ No newline at end of file + raise ValueError("NonlinearSampleEnv does not have animation") diff --git a/PythonLinearNonlinearControl/envs/two_wheeled.py b/PythonLinearNonlinearControl/envs/two_wheeled.py index 43f5f0c..be59d3e 100644 --- a/PythonLinearNonlinearControl/envs/two_wheeled.py +++ b/PythonLinearNonlinearControl/envs/two_wheeled.py @@ -5,47 +5,50 @@ import matplotlib.pyplot as plt from .env import Env from ..plotters.plot_objs import circle_with_angle, square, circle + def step_two_wheeled_env(curr_x, u, dt, method="Oylar"): """ step two wheeled enviroment - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) u (numpy.ndarray): input, shape(input_size, ) dt (float): sampling time Returns: next_x (numpy.ndarray): next state, shape(state_size. ) - + Notes: TODO: deal with another method, like Runge Kutta """ B = np.array([[np.cos(curr_x[-1]), 0.], [np.sin(curr_x[-1]), 0.], [0., 1.]]) - + x_dot = np.matmul(B, u[:, np.newaxis]) next_x = x_dot.flatten() * dt + curr_x return next_x + class TwoWheeledConstEnv(Env): """ Two wheeled robot with constant goal Env """ + def __init__(self): """ """ - self.config = {"state_size" : 3,\ - "input_size" : 2,\ - "dt" : 0.01,\ - "max_step" : 500,\ - "input_lower_bound": (-1.5, -3.14),\ - "input_upper_bound": (1.5, 3.14),\ - "car_size": 0.2,\ + self.config = {"state_size": 3, + "input_size": 2, + "dt": 0.01, + "max_step": 500, + "input_lower_bound": (-1.5, -3.14), + "input_upper_bound": (1.5, 3.14), + "car_size": 0.2, "wheel_size": (0.075, 0.015) } super(TwoWheeledConstEnv, self).__init__(self.config) - + def reset(self, init_x=None): """ reset state @@ -54,7 +57,7 @@ class TwoWheeledConstEnv(Env): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: @@ -62,7 +65,7 @@ class TwoWheeledConstEnv(Env): # goal self.g_x = np.array([2.5, 2.5, 0.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -96,32 +99,32 @@ class TwoWheeledConstEnv(Env): # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), costs, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ plot cartpole object function - + Args: to_plot (axis or imgs): plotted objects i (int): frame count history_x (numpy.ndarray): history of state, shape(iters, state) history_g_x (numpy.ndarray): history of goal state, shape(iters, state) - + Returns: None or imgs : imgs order is ["cart_img", "pole_img"] """ if isinstance(to_plot, Axes): imgs = {} # create new imgs - + imgs["car"] = to_plot.plot([], [], c="k")[0] imgs["car_angle"] = to_plot.plot([], [], c="k")[0] imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0] @@ -139,9 +142,9 @@ class TwoWheeledConstEnv(Env): # set imgs # car imgs car_x, car_y, car_angle_x, car_angle_y, \ - left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ + left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ self._plot_car(history_x[i]) - + to_plot["car"].set_data(car_x, car_y) to_plot["car_angle"].set_data(car_angle_x, car_angle_y) to_plot["left_tire"].set_data(left_tire_x, left_tire_y,) @@ -150,7 +153,7 @@ class TwoWheeledConstEnv(Env): # goal and trajs to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1]) to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1]) - + def _plot_car(self, curr_x): """ plot car fucntions """ @@ -158,53 +161,55 @@ class TwoWheeledConstEnv(Env): car_x, car_y, car_angle_x, car_angle_y = \ circle_with_angle(curr_x[0], curr_x[1], self.config["car_size"], curr_x[2]) - + # left tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] + left_tire_x, left_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) # right tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] + right_tire_x, right_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) - + return car_x, car_y, car_angle_x, car_angle_y,\ - left_tire_x, left_tire_y,\ - right_tire_x, right_tire_y + left_tire_x, left_tire_y,\ + right_tire_x, right_tire_y + class TwoWheeledTrackEnv(Env): """ Two wheeled robot with constant goal Env """ + def __init__(self): """ """ - self.config = {"state_size" : 3,\ - "input_size" : 2,\ - "dt" : 0.01,\ - "max_step" : 1000,\ - "input_lower_bound": (-1.5, -3.14),\ - "input_upper_bound": (1.5, 3.14),\ - "car_size": 0.2,\ + self.config = {"state_size": 3, + "input_size": 2, + "dt": 0.01, + "max_step": 1000, + "input_lower_bound": (-1.5, -3.14), + "input_upper_bound": (1.5, 3.14), + "car_size": 0.2, "wheel_size": (0.075, 0.015) } super(TwoWheeledTrackEnv, self).__init__(self.config) - + @staticmethod def make_road(linelength=3., circle_radius=1.): """ make track @@ -220,23 +225,23 @@ class TwoWheeledTrackEnv(Env): # circle circle_1_x, circle_1_y = circle(linelength/2., circle_radius, - circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50) - circle_1 = np.stack((circle_1_x , circle_1_y), axis=1) - + circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50) + circle_1 = np.stack((circle_1_x, circle_1_y), axis=1) + circle_2_x, circle_2_y = circle(-linelength/2., circle_radius, - circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50) - circle_2 = np.stack((circle_2_x , circle_2_y), axis=1) + circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50) + circle_2 = np.stack((circle_2_x, circle_2_y), axis=1) road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0) # calc road angle road_diff = road_pos[1:] - road_pos[:-1] - road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0]) + road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0]) road_angle = np.concatenate((np.zeros(1), road_angle)) road = np.concatenate((road_pos, road_angle[:, np.newaxis]), axis=1) - return np.tile(road, (3, 1)) + return np.tile(road, (3, 1)) def reset(self, init_x=None): """ reset state @@ -246,7 +251,7 @@ class TwoWheeledTrackEnv(Env): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: @@ -254,7 +259,7 @@ class TwoWheeledTrackEnv(Env): # goal self.g_traj = self.make_road() - + # clear memory self.history_x = [] self.history_g_x = [] @@ -286,32 +291,32 @@ class TwoWheeledTrackEnv(Env): # save history self.history_x.append(next_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), costs, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_traj} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_traj} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ plot cartpole object function - + Args: to_plot (axis or imgs): plotted objects i (int): frame count history_x (numpy.ndarray): history of state, shape(iters, state) history_g_x (numpy.ndarray): history of goal state, shape(iters, state) - + Returns: None or imgs : imgs order is ["cart_img", "pole_img"] """ if isinstance(to_plot, Axes): imgs = {} # create new imgs - + imgs["car"] = to_plot.plot([], [], c="k")[0] imgs["car_angle"] = to_plot.plot([], [], c="k")[0] imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0] @@ -333,9 +338,9 @@ class TwoWheeledTrackEnv(Env): # set imgs # car imgs car_x, car_y, car_angle_x, car_angle_y, \ - left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ + left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ self._plot_car(history_x[i]) - + to_plot["car"].set_data(car_x, car_y) to_plot["car_angle"].set_data(car_angle_x, car_angle_y) to_plot["left_tire"].set_data(left_tire_x, left_tire_y,) @@ -344,7 +349,7 @@ class TwoWheeledTrackEnv(Env): # goal and trajs to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1]) to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1]) - + def _plot_car(self, curr_x): """ plot car fucntions """ @@ -352,31 +357,31 @@ class TwoWheeledTrackEnv(Env): car_x, car_y, car_angle_x, car_angle_y = \ circle_with_angle(curr_x[0], curr_x[1], self.config["car_size"], curr_x[2]) - + # left tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] + left_tire_x, left_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) # right tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] + right_tire_x, right_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) - + return car_x, car_y, car_angle_x, car_angle_y,\ - left_tire_x, left_tire_y,\ - right_tire_x, right_tire_y \ No newline at end of file + left_tire_x, left_tire_y,\ + right_tire_x, right_tire_y diff --git a/PythonLinearNonlinearControl/helper.py b/PythonLinearNonlinearControl/helper.py index 7fa2058..26844ef 100644 --- a/PythonLinearNonlinearControl/helper.py +++ b/PythonLinearNonlinearControl/helper.py @@ -7,6 +7,7 @@ import six import pickle from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger + def make_logger(save_dir): """ Args: @@ -21,7 +22,7 @@ def make_logger(save_dir): # mypackage log level logger = getLogger("PythonLinearNonlinearControl") logger.setLevel(DEBUG) - + # file handler log_path = os.path.join(save_dir, "log.txt") file_handler = FileHandler(log_path) @@ -33,6 +34,7 @@ def make_logger(save_dir): # sh_handler = StreamHandler() # logger.addHandler(sh_handler) + def int_tuple(s): """ transform str to tuple Args: @@ -42,6 +44,7 @@ def int_tuple(s): """ return tuple(int(i) for i in s.split(',')) + def bool_flag(s): """ transform str to bool flg Args: @@ -54,6 +57,7 @@ def bool_flag(s): msg = 'Invalid value "%s" for bool flag (should be 0 or 1)' raise ValueError(msg % s) + def file_exists(path): """ Check file existence on given path Args: @@ -63,6 +67,7 @@ def file_exists(path): """ return os.path.exists(path) + def create_dir_if_not_exist(outdir): """ Check directory existence and creates new directory if not exist Args: @@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir): return os.makedirs(outdir) + def write_text_to_file(file_path, data): """ Write given text data to file Args: @@ -86,6 +92,7 @@ def write_text_to_file(file_path, data): with open(file_path, 'w') as f: f.write(data) + def read_text_from_file(file_path): """ Read given file as text Args: @@ -96,6 +103,7 @@ def read_text_from_file(file_path): with open(file_path, 'r') as f: return f.read() + def save_pickle(file_path, data): """ pickle given data to file Args: @@ -105,6 +113,7 @@ def save_pickle(file_path, data): with open(file_path, 'wb') as f: pickle.dump(data, f) + def load_pickle(file_path): """ load pickled data from file Args: @@ -118,6 +127,7 @@ def load_pickle(file_path): else: return pickle.load(f, encoding='bytes') + def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): """ prepare a directory with current datetime as name. created directory contains the command and args when the script was called as text file. @@ -144,4 +154,4 @@ def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): argv = ' '.join(sys.argv) write_text_to_file(argv_file_path, argv) - return outdir \ No newline at end of file + return outdir diff --git a/PythonLinearNonlinearControl/models/cartpole.py b/PythonLinearNonlinearControl/models/cartpole.py index db8ef61..cc9bfda 100644 --- a/PythonLinearNonlinearControl/models/cartpole.py +++ b/PythonLinearNonlinearControl/models/cartpole.py @@ -2,9 +2,11 @@ import numpy as np from .model import Model + class CartPoleModel(Model): """ cartpole model """ + def __init__(self, config): """ """ @@ -17,7 +19,7 @@ class CartPoleModel(Model): def predict_next_state(self, curr_x, u): """ predict next state - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) or shape(pop_size, state_size) @@ -31,59 +33,59 @@ class CartPoleModel(Model): # x d_x0 = curr_x[1] # v_x - d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \ - * (self.l * (curr_x[3]**2) \ - + self.g * np.cos(curr_x[2]))) \ - / (self.mc + self.mp * (np.sin(curr_x[2])**2)) + d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) + * (self.l * (curr_x[3]**2) + + self.g * np.cos(curr_x[2]))) \ + / (self.mc + self.mp * (np.sin(curr_x[2])**2)) # theta d_x2 = curr_x[3] # v_theta - d_x3 = (-u[0] * np.cos(curr_x[2]) \ - - self.mp * self.l * (curr_x[3]**2) \ - * np.cos(curr_x[2]) * np.sin(curr_x[2]) \ + d_x3 = (-u[0] * np.cos(curr_x[2]) + - self.mp * self.l * (curr_x[3]**2) + * np.cos(curr_x[2]) * np.sin(curr_x[2]) - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \ - / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) - + / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) + next_x = curr_x +\ - np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt - + np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt + return next_x elif len(u.shape) == 2: # x d_x0 = curr_x[:, 1] # v_x - d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \ - * (self.l * (curr_x[:, 3]**2) \ - + self.g * np.cos(curr_x[:, 2]))) \ - / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) + d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) + * (self.l * (curr_x[:, 3]**2) + + self.g * np.cos(curr_x[:, 2]))) \ + / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) # theta d_x2 = curr_x[:, 3] # v_theta - d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \ - - self.mp * self.l * (curr_x[:, 3]**2) \ - * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \ + d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) + - self.mp * self.l * (curr_x[:, 3]**2) + * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \ - / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) - + / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) + next_x = curr_x +\ - np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt + np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt return next_x - + def calc_f_x(self, xs, us, dt): """ gradient of model with respect to the state in batch form Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_x (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, _) = us.shape @@ -95,36 +97,36 @@ class CartPoleModel(Model): # f_theta tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \ - * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) + * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) f_x[:, 1, 2] = - us[:, 0] * tmp \ - - tmp * (self.mp * np.sin(xs[:, 2]) \ - * (self.l * xs[:, 3]**2 \ + - tmp * (self.mp * np.sin(xs[:, 2]) + * (self.l * xs[:, 3]**2 + self.g * np.cos(xs[:, 2]))) \ - + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \ - * xs[:, 3]**2 \ - + self.mp * self.g * (np.cos(xs[:, 2])**2 \ - - np.sin(xs[:, 2])**2)) + + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l + * xs[:, 3]**2 + + self.mp * self.g * (np.cos(xs[:, 2])**2 + - np.sin(xs[:, 2])**2)) f_x[:, 3, 2] = - 1. / self.l * tmp \ - * (-us[:, 0] * np.cos(xs[:, 2]) \ - - self.mp * self.l * (xs[:, 3]**2) \ - * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \ - - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ - + 1. / self.l * tmp2 \ - * (us[:, 0] * np.sin(xs[:, 2]) \ - - self.mp * self.l * xs[:, 3]**2 \ - * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \ - - (self.mc + self.mp) \ - * self.g * np.cos(xs[:, 2])) + * (-us[:, 0] * np.cos(xs[:, 2]) + - self.mp * self.l * (xs[:, 3]**2) + * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) + - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ + + 1. / self.l * tmp2 \ + * (us[:, 0] * np.sin(xs[:, 2]) + - self.mp * self.l * xs[:, 3]**2 + * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) + - (self.mc + self.mp) + * self.g * np.cos(xs[:, 2])) # f_theta_dot - f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \ - * self.l * 2 * xs[:, 3]) + f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) + * self.l * 2 * xs[:, 3]) f_x[:, 2, 3] = np.ones(pred_len) f_x[:, 3, 3] = 1. / self.l * tmp2 \ - * (-2. * self.mp * self.l * xs[:, 3] \ - * np.cos(xs[:, 2]) * np.sin(xs[:, 2])) + * (-2. * self.mp * self.l * xs[:, 3] + * np.cos(xs[:, 2]) * np.sin(xs[:, 2])) return f_x * dt + np.eye(state_size) # to discrete form @@ -133,25 +135,25 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_u (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, input_size) = us.shape f_u = np.zeros((pred_len, state_size, input_size)) - + f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) - + f_u[:, 3, 0] = -np.cos(xs[:, 2]) \ - / (self.l * (self.mc \ - + self.mp * (np.sin(xs[:, 2])**2))) + / (self.l * (self.mc + + self.mp * (np.sin(xs[:, 2])**2))) return f_u * dt # to discrete form @@ -161,7 +163,7 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_xx (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size, state_size) @@ -180,7 +182,7 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_ux (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, state_size) @@ -199,7 +201,7 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_uu (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, input_size) @@ -210,4 +212,4 @@ class CartPoleModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/PythonLinearNonlinearControl/models/first_order_lag.py b/PythonLinearNonlinearControl/models/first_order_lag.py index a4a97fb..f3ccec5 100644 --- a/PythonLinearNonlinearControl/models/first_order_lag.py +++ b/PythonLinearNonlinearControl/models/first_order_lag.py @@ -3,6 +3,7 @@ import scipy.linalg from scipy import integrate from .model import LinearModel + class FirstOrderLagModel(LinearModel): """ first order lag model Attributes: @@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel): u (numpy.ndarray): history_pred_xs (numpy.ndarray): """ + def __init__(self, config, tau=0.63): """ Args: tau (float): time constant - """ + """ # param - self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system + self.A, self.B = self._to_state_space( + tau, dt=config.DT) # discrete system super(FirstOrderLagModel, self).__init__(self.A, self.B) @staticmethod @@ -31,21 +34,22 @@ class FirstOrderLagModel(LinearModel): """ # continuous Ac = np.array([[-1./tau, 0., 0., 0.], - [0., -1./tau, 0., 0.], - [1., 0., 0., 0.], - [0., 1., 0., 0.]]) + [0., -1./tau, 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.]]) Bc = np.array([[1./tau, 0.], - [0., 1./tau], - [0., 0.], - [0., 0.]]) + [0., 1./tau], + [0., 0.], + [0., 0.]]) # to discrete system A = scipy.linalg.expm(dt*Ac) # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc) B = np.zeros_like(Bc) for m in range(Bc.shape[0]): for n in range(Bc.shape[1]): - integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n] + def integrate_fn(tau): return np.matmul( + scipy.linalg.expm(Ac*tau), Bc)[m, n] sol = integrate.quad(integrate_fn, 0, dt) B[m, n] = sol[0] - return A, B \ No newline at end of file + return A, B diff --git a/PythonLinearNonlinearControl/models/make_models.py b/PythonLinearNonlinearControl/models/make_models.py index fb628ad..cc42a45 100644 --- a/PythonLinearNonlinearControl/models/make_models.py +++ b/PythonLinearNonlinearControl/models/make_models.py @@ -2,13 +2,14 @@ from .first_order_lag import FirstOrderLagModel from .two_wheeled import TwoWheeledModel from .cartpole import CartPoleModel + def make_model(args, config): - + if args.env == "FirstOrderLag": return FirstOrderLagModel(config) elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": return TwoWheeledModel(config) elif args.env == "CartPole": return CartPoleModel(config) - - raise NotImplementedError("There is not {} Model".format(args.env)) \ No newline at end of file + + raise NotImplementedError("There is not {} Model".format(args.env)) diff --git a/PythonLinearNonlinearControl/models/model.py b/PythonLinearNonlinearControl/models/model.py index 5eb2cb7..7e43234 100644 --- a/PythonLinearNonlinearControl/models/model.py +++ b/PythonLinearNonlinearControl/models/model.py @@ -1,8 +1,10 @@ import numpy as np + class Model(): """ base class of model """ + def __init__(self): """ """ @@ -22,17 +24,17 @@ class Model(): or shape(pop_size, pred_len+1, state_size) """ if len(us.shape) == 3: - pred_xs =self._predict_traj_alltogether(curr_x, us) + pred_xs = self._predict_traj_alltogether(curr_x, us) elif len(us.shape) == 2: pred_xs = self._predict_traj(curr_x, us) else: raise ValueError("Invalid us") - + return pred_xs - + def _predict_traj(self, curr_x, us): """ predict trajectories - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) us (numpy.ndarray): inputs, shape(pred_len, input_size) @@ -53,10 +55,10 @@ class Model(): x = next_x return pred_xs - + def _predict_traj_alltogether(self, curr_x, us): """ predict trajectories for all samples - + Args: curr_x (numpy.ndarray): current state, shape(pop_size, state_size) us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size) @@ -75,12 +77,12 @@ class Model(): # next_x.shape = (pop_size, state_size) next_x = self.predict_next_state(x, us[t]) # update - pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\ - axis=0) + pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]), + axis=0) x = next_x return np.transpose(pred_xs, (1, 0, 2)) - + def predict_next_state(self, curr_x, u): """ predict next state """ @@ -99,23 +101,23 @@ class Model(): # get size (pred_len, input_size) = us.shape # pred final adjoint state - lam = self.predict_terminal_adjoint_state(xs[-1],\ + lam = self.predict_terminal_adjoint_state(xs[-1], terminal_g_x=g_xs[-1]) lams = lam[np.newaxis, :] - for t in range(pred_len-1, 0, -1): + for t in range(pred_len-1, 0, -1): prev_lam = \ - self.predict_adjoint_state(lam, xs[t], us[t],\ + self.predict_adjoint_state(lam, xs[t], us[t], goal=g_xs[t], t=t) # update lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) lam = prev_lam - + return lams def predict_adjoint_state(self, lam, x, u, goal=None, t=None): """ predict adjoint states - + Args: lam (numpy.ndarray): adjoint state, shape(state_size, ) x (numpy.ndarray): state, shape(state_size, ) @@ -129,7 +131,7 @@ class Model(): def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): """ predict terminal adjoint state - + Args: terminal_x (numpy.ndarray): terminal state, shape(state_size, ) terminal_g_x (numpy.ndarray): terminal goal state, @@ -143,7 +145,7 @@ class Model(): @staticmethod def calc_f_x(xs, us, dt): """ gradient of model with respect to the state in batch form - """ + """ raise NotImplementedError("Implement gradient of model \ with respect to the state") @@ -153,11 +155,11 @@ class Model(): """ raise NotImplementedError("Implement gradient of model \ with respect to the input") - + @staticmethod def calc_f_xx(xs, us, dt): """ hessian of model with respect to the state in batch form - """ + """ raise NotImplementedError("Implement hessian of model \ with respect to the state") @@ -171,27 +173,29 @@ class Model(): @staticmethod def calc_f_uu(xs, us, dt): """ hessian of model with respect to the state in batch form - """ + """ raise NotImplementedError("Implement hessian of model \ with respect to the input") + class LinearModel(Model): """ discrete linear model, x[k+1] = Ax[k] + Bu[k] - + Attributes: A (numpy.ndarray): shape(state_size, state_size) B (numpy.ndarray): shape(state_size, input_size) """ + def __init__(self, A, B): """ """ super(LinearModel, self).__init__() self.A = A self.B = B - + def predict_next_state(self, curr_x, u): """ predict next state - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) or shape(pop_size, state_size) @@ -203,7 +207,7 @@ class LinearModel(Model): """ if len(u.shape) == 1: next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \ - + np.matmul(self.B, u[:, np.newaxis]) + + np.matmul(self.B, u[:, np.newaxis]) return next_x.flatten() @@ -211,7 +215,7 @@ class LinearModel(Model): next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T) return next_x - + def calc_f_x(self, xs, us, dt): """ gradient of model with respect to the state in batch form @@ -223,7 +227,7 @@ class LinearModel(Model): shape(pred_len, state_size, state_size) Notes: This should be discrete form !! - """ + """ # get size (pred_len, _) = us.shape @@ -240,7 +244,7 @@ class LinearModel(Model): shape(pred_len, state_size, input_size) Notes: This should be discrete form !! - """ + """ # get size (pred_len, input_size) = us.shape @@ -283,7 +287,7 @@ class LinearModel(Model): f_ux = np.zeros((pred_len, state_size, input_size, state_size)) return f_ux - + @staticmethod def calc_f_uu(xs, us, dt): """ hessian of model with respect to input in batch form @@ -301,4 +305,4 @@ class LinearModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - return f_uu + return f_uu diff --git a/PythonLinearNonlinearControl/models/two_wheeled.py b/PythonLinearNonlinearControl/models/two_wheeled.py index 3bd60d7..3a81ff5 100644 --- a/PythonLinearNonlinearControl/models/two_wheeled.py +++ b/PythonLinearNonlinearControl/models/two_wheeled.py @@ -2,9 +2,11 @@ import numpy as np from .model import Model + class TwoWheeledModel(Model): """ two wheeled model """ + def __init__(self, config): """ """ @@ -13,7 +15,7 @@ class TwoWheeledModel(Model): def predict_next_state(self, curr_x, u): """ predict next state - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) or shape(pop_size, state_size) @@ -50,21 +52,21 @@ class TwoWheeledModel(Model): next_x = x_dot[:, :, 0] * self.dt + curr_x return next_x - + @staticmethod def calc_f_x(xs, us, dt): """ gradient of model with respect to the state in batch form Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_x (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, _) = us.shape @@ -81,14 +83,14 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_u (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, input_size) = us.shape @@ -107,7 +109,7 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_xx (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size, state_size) @@ -130,7 +132,7 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_ux (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, state_size) @@ -145,7 +147,7 @@ class TwoWheeledModel(Model): f_ux[:, 1, 0, 2] = np.cos(xs[:, 2]) return f_ux * dt - + @staticmethod def calc_f_uu(xs, us, dt): """ hessian of model with respect to input in batch form @@ -153,7 +155,7 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_uu (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, input_size) @@ -164,4 +166,4 @@ class TwoWheeledModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - return f_uu * dt \ No newline at end of file + return f_uu * dt diff --git a/PythonLinearNonlinearControl/planners/closest_point_planner.py b/PythonLinearNonlinearControl/planners/closest_point_planner.py index 291aff3..cf24a97 100644 --- a/PythonLinearNonlinearControl/planners/closest_point_planner.py +++ b/PythonLinearNonlinearControl/planners/closest_point_planner.py @@ -1,9 +1,11 @@ import numpy as np from .planner import Planner + class ClosestPointPlanner(Planner): """ This planner make goal state according to goal path """ + def __init__(self, config): """ """ @@ -24,7 +26,7 @@ class ClosestPointPlanner(Planner): min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1], axis=1)) - start = (min_idx+self.n_ahead) + start = (min_idx+self.n_ahead) if start > len(g_traj): start = len(g_traj) @@ -32,8 +34,8 @@ class ClosestPointPlanner(Planner): if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj): end = len(g_traj) - + if abs(start - end) != self.pred_len + 1: return np.tile(g_traj[-1], (self.pred_len+1, 1)) - return g_traj[start:end] \ No newline at end of file + return g_traj[start:end] diff --git a/PythonLinearNonlinearControl/planners/const_planner.py b/PythonLinearNonlinearControl/planners/const_planner.py index a0569a2..caa8110 100644 --- a/PythonLinearNonlinearControl/planners/const_planner.py +++ b/PythonLinearNonlinearControl/planners/const_planner.py @@ -1,9 +1,11 @@ import numpy as np from .planner import Planner + class ConstantPlanner(Planner): """ This planner make constant goal state """ + def __init__(self, config): """ """ @@ -20,4 +22,4 @@ class ConstantPlanner(Planner): Returns: g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) """ - return np.tile(g_x, (self.pred_len+1, 1)) \ No newline at end of file + return np.tile(g_x, (self.pred_len+1, 1)) diff --git a/PythonLinearNonlinearControl/planners/planner.py b/PythonLinearNonlinearControl/planners/planner.py index 7e20b4f..c524ea7 100644 --- a/PythonLinearNonlinearControl/planners/planner.py +++ b/PythonLinearNonlinearControl/planners/planner.py @@ -1,8 +1,10 @@ import numpy as np + class Planner(): """ """ + def __init__(self): """ """ @@ -15,4 +17,4 @@ class Planner(): Returns: g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) """ - raise NotImplementedError("Implement plan func") \ No newline at end of file + raise NotImplementedError("Implement plan func") diff --git a/PythonLinearNonlinearControl/plotters/animator.py b/PythonLinearNonlinearControl/plotters/animator.py index 2260707..49a9d05 100644 --- a/PythonLinearNonlinearControl/plotters/animator.py +++ b/PythonLinearNonlinearControl/plotters/animator.py @@ -8,9 +8,11 @@ import matplotlib.animation as animation logger = getLogger(__name__) + class Animator(): """ animation class """ + def __init__(self, env, args=None): """ """ @@ -34,7 +36,7 @@ class Animator(): # make fig self.anim_fig = plt.figure() - # axis + # axis self.axis = self.anim_fig.add_subplot(111) self.axis.set_aspect('equal', adjustable='box') @@ -65,12 +67,12 @@ class Animator(): """ # set up animation figures self._setup() - _update_img = lambda i: self._update_img(i, history_x, history_g_x) + def _update_img(i): return self._update_img(i, history_x, history_g_x) # Set up formatting for the movie files Writer = animation.writers['ffmpeg'] writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) - + # call funcanimation ani = FuncAnimation( self.anim_fig, @@ -79,6 +81,6 @@ class Animator(): # save animation path = os.path.join(self.result_dir, self.controller_type, "animation-" + self.env_name + ".mp4") - logger.info("Saved Animation to {} ...".format(path)) + logger.info("Saved Animation to {} ...".format(path)) ani.save(path, writer=writer) diff --git a/PythonLinearNonlinearControl/plotters/plot_func.py b/PythonLinearNonlinearControl/plotters/plot_func.py index 3d2a7de..d25e1c5 100644 --- a/PythonLinearNonlinearControl/plotters/plot_func.py +++ b/PythonLinearNonlinearControl/plotters/plot_func.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt from ..helper import save_pickle, load_pickle + def plot_result(history, history_g=None, ylabel="x", save_dir="./result", name="state_history"): """ @@ -28,14 +29,14 @@ def plot_result(history, history_g=None, ylabel="x", def plot(axis, history, history_g=None): axis.plot(range(iters), history, c="r", linewidth=3) if history_g is not None: - axis.plot(range(iters), history_g,\ + axis.plot(range(iters), history_g, c="b", linewidth=3, label="goal") if i < size: plot(axis1, history[:, i], history_g=history_g[:, i]) if i+1 < size: plot(axis2, history[:, i+1], history_g=history_g[:, i+1]) - if i+2 < size: + if i+2 < size: plot(axis3, history[:, i+2], history_g=history_g[:, i+2]) # save @@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x", axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) figure.savefig(path, bbox_inches="tight", pad_inches=0.05) + def plot_results(history_x, history_u, history_g=None, args=None): """ @@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None): controller_type = args.controller_type plot_result(history_x, history_g=history_g, ylabel="x", - name= env + "-state_history", + name=env + "-state_history", save_dir="./result/" + controller_type) plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u", - name= env + "-input_history", + name=env + "-input_history", save_dir="./result/" + controller_type) + def save_plot_data(history_x, history_u, history_g=None, args=None): """ save plot data @@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None): env + "-history_g.pkl") save_pickle(path, history_g) + def load_plot_data(env, controller_type, result_dir="./result"): """ Args: @@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"): return history_x, history_u, history_g + def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", save_dir="./result", name="state_history"): """ @@ -130,7 +135,7 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", history (numpy.ndarray): history, shape(iters, size) """ (_, iters, size) = histories.shape - + for i in range(0, size, 2): figure = plt.figure() @@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", axis.plot(range(iters), history, linewidth=3, label=label, alpha=0.7, linestyle="dashed") if history_g is not None: - axis.plot(range(iters), history_g,\ + axis.plot(range(iters), history_g, c="b", linewidth=3) if i < size: for j, (history, history_g) \ - in enumerate(zip(histories, histories_g)): + in enumerate(zip(histories, histories_g)): plot(axis1, history[:, i], history_g=history_g[:, i], label=labels[j]) if i+1 < size: for j, (history, history_g) in \ - enumerate(zip(histories, histories_g)): + enumerate(zip(histories, histories_g)): plot(axis2, history[:, i+1], history_g=history_g[:, i+1], label=labels[j]) diff --git a/PythonLinearNonlinearControl/plotters/plot_objs.py b/PythonLinearNonlinearControl/plotters/plot_objs.py index 911cb5f..1e14bcb 100644 --- a/PythonLinearNonlinearControl/plotters/plot_objs.py +++ b/PythonLinearNonlinearControl/plotters/plot_objs.py @@ -5,9 +5,10 @@ import matplotlib.pyplot as plt from ..common.utils import rotate_pos + def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): """ Create circle matrix - + Args: center_x (float): the center x position of the circle center_y (float): the center y position of the circle @@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): return np.array(circle_xs), np.array(circle_ys) + def circle_with_angle(center_x, center_y, radius, angle): """ Create circle matrix with angle line matrix @@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle): return circle_x, circle_y, angle_x, angle_y + def square(center_x, center_y, shape, angle): """ Create square @@ -74,9 +77,10 @@ def square(center_x, center_y, shape, angle): trans_points = rotate_pos(square_xy, angle) # translation trans_points += np.array([center_x, center_y]) - + return trans_points[:, 0], trans_points[:, 1] + def square_with_angle(center_x, center_y, shape, angle): """ Create square with angle line @@ -96,4 +100,4 @@ def square_with_angle(center_x, center_y, shape, angle): angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]]) angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]]) - return square_x, square_y, angle_x, angle_y \ No newline at end of file + return square_x, square_y, angle_x, angle_y diff --git a/PythonLinearNonlinearControl/runners/__init__.py b/PythonLinearNonlinearControl/runners/__init__.py index 8fd5521..0406fea 100644 --- a/PythonLinearNonlinearControl/runners/__init__.py +++ b/PythonLinearNonlinearControl/runners/__init__.py @@ -1,2 +1,2 @@ from PythonLinearNonlinearControl.runners.runner \ - import ExpRunner # NOQA \ No newline at end of file + import ExpRunner # NOQA diff --git a/PythonLinearNonlinearControl/runners/make_runners.py b/PythonLinearNonlinearControl/runners/make_runners.py index be08186..133ae06 100644 --- a/PythonLinearNonlinearControl/runners/make_runners.py +++ b/PythonLinearNonlinearControl/runners/make_runners.py @@ -1,4 +1,5 @@ from .runner import ExpRunner + def make_runner(args): - return ExpRunner() \ No newline at end of file + return ExpRunner() diff --git a/PythonLinearNonlinearControl/runners/runner.py b/PythonLinearNonlinearControl/runners/runner.py index 4ef0c6b..b83789d 100644 --- a/PythonLinearNonlinearControl/runners/runner.py +++ b/PythonLinearNonlinearControl/runners/runner.py @@ -4,9 +4,11 @@ import numpy as np logger = getLogger(__name__) + class ExpRunner(): """ experiment runner """ + def __init__(self): """ """ @@ -46,6 +48,6 @@ class ExpRunner(): score += cost step_count += 1 - logger.debug("Controller type = {}, Score = {}"\ + logger.debug("Controller type = {}, Score = {}" .format(controller, score)) - return np.array(history_x), np.array(history_u), np.array(history_g) \ No newline at end of file + return np.array(history_x), np.array(history_u), np.array(history_g)