diff --git a/Environments.md b/Environments.md index 412b41c..d1a010b 100644 --- a/Environments.md +++ b/Environments.md @@ -6,6 +6,8 @@ | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | | Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | | Cartpole (Swing up) | x | ✓ | 4 | 1 | +| Nonlinear Sample System Env | x | ✓ | 2 | 1 | + ## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py) @@ -53,4 +55,14 @@ mc = 1, mp = 0.2, l = 0.5, g = 9.81 ### Cost. - \ No newline at end of file + + +## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py) + +## System equation. + + + +### Cost. + + diff --git a/PythonLinearNonlinearControl/common/utils.py b/PythonLinearNonlinearControl/common/utils.py index 3645507..5fd2092 100644 --- a/PythonLinearNonlinearControl/common/utils.py +++ b/PythonLinearNonlinearControl/common/utils.py @@ -1,8 +1,9 @@ import numpy as np + def rotate_pos(pos, angle): """ Transformation the coordinate in the angle - + Args: pos (numpy.ndarray): local state, shape(data_size, 2) angle (float): rotate angle, in radians @@ -14,9 +15,10 @@ def rotate_pos(pos, angle): return np.dot(pos, rot_mat.T) + def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): """ Check angle range and correct the range - + Args: angle (numpy.ndarray): in radians min_angle (float): maximum of range in radians, default -pi @@ -29,7 +31,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): if (max_angle - min_angle) < 2.0 * np.pi: raise ValueError("difference between max_angle \ and min_angle must be greater than 2.0 * pi") - + output = np.array(angles) output_shape = output.shape @@ -43,58 +45,103 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): output = np.minimum(max_angle, np.maximum(min_angle, output)) return output.reshape(output_shape) -def update_state_with_Runge_Kutta(state, u, functions, dt=0.01): + +def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True): """ update state in Runge Kutta methods Args: state (array-like): state of system u (array-like): input of system functions (list): update function of each state, - each function will be called like func(*state, *u) + each function will be called like func(state, u) We expect that this function returns differential of each state dt (float): float in seconds - + batch (bool): state and u is given by batch or not + Returns: next_state (np.array): next state of system - + Notes: sample of function is as follows: def func_x(self, x_1, x_2, u): x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u return x_dot - + Note that the function return x_dot. """ - state_size = len(state) - assert state_size == len(functions), \ - "Invalid functions length, You need to give the state size functions" - - k0 = np.zeros(state_size) - k1 = np.zeros(state_size) - k2 = np.zeros(state_size) - k3 = np.zeros(state_size) + if not batch: + state_size = len(state) + assert state_size == len(functions), \ + "Invalid functions length, You need to give the state size functions" - inputs = np.concatenate([state, u]) + k0 = np.zeros(state_size) + k1 = np.zeros(state_size) + k2 = np.zeros(state_size) + k3 = np.zeros(state_size) - for i, func in enumerate(functions): - k0[i] = dt * func(*inputs) + for i, func in enumerate(functions): + k0[i] = dt * func(state, u) - add_state = state + k0 / 2. - inputs = np.concatenate([add_state, u]) + for i, func in enumerate(functions): + k1[i] = dt * func(state + k0 / 2., u) - for i, func in enumerate(functions): - k1[i] = dt * func(*inputs) - - add_state = state + k1 / 2. - inputs = np.concatenate([add_state, u]) - - for i, func in enumerate(functions): - k2[i] = dt * func(*inputs) + for i, func in enumerate(functions): + k2[i] = dt * func(state + k1 / 2., u) - add_state = state + k2 - inputs = np.concatenate([add_state, u]) + for i, func in enumerate(functions): + k3[i] = dt * func(state + k2, u) - for i, func in enumerate(functions): - k3[i] = dt * func(*inputs) + return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. - return (k0 + 2. * k1 + 2. * k2 + k3) / 6. \ No newline at end of file + else: + batch_size, state_size = state.shape + assert state_size == len(functions), \ + "Invalid functions length, You need to give the state size functions" + + k0 = np.zeros((batch_size, state_size)) + k1 = np.zeros((batch_size, state_size)) + k2 = np.zeros((batch_size, state_size)) + k3 = np.zeros((batch_size, state_size)) + + for i, func in enumerate(functions): + k0[:, i] = dt * func(state, u) + + for i, func in enumerate(functions): + k1[:, i] = dt * func(state + k0 / 2., u) + + for i, func in enumerate(functions): + k2[:, i] = dt * func(state + k1 / 2., u) + + for i, func in enumerate(functions): + k3[:, i] = dt * func(state + k2, u) + + return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. + + +def line_search(grad, sol, compute_eval_val, + init_alpha=0.001, max_iter=100, update_ratio=1.): + """ line search + Args: + grad (numpy.ndarray): gradient + sol (numpy.ndarray): sol + compute_eval_val (numpy.ndarray): function to compute evaluation value + + Returns: + alpha (float): result of line search + """ + assert grad.shape == sol.shape + base_val = np.inf + alpha = init_alpha + original_sol = sol.copy() + + for _ in range(max_iter): + updated_sol = original_sol - alpha * grad + eval_val = compute_eval_val(updated_sol) + + if eval_val < base_val: + alpha += init_alpha * update_ratio + base_val = eval_val + else: + break + + return alpha diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py index bbcf99b..416f9a7 100644 --- a/PythonLinearNonlinearControl/configs/cartpole.py +++ b/PythonLinearNonlinearControl/configs/cartpole.py @@ -1,5 +1,6 @@ import numpy as np + class CartPoleConfigModule(): # parameters ENV_NAME = "CartPole-v0" @@ -12,7 +13,7 @@ class CartPoleConfigModule(): DT = 0.02 # cost parameters R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams - # 1. is worked for iLQR + # 1. is worked for iLQR TERMINAL_WEIGHT = 1. Q = None Sf = None @@ -39,41 +40,41 @@ class CartPoleConfigModule(): "num_elites": 50, "max_iters": 15, "alpha": 0.3, - "init_var":9., - "threshold":0.001 + "init_var": 9., + "threshold": 0.001 }, - "MPPI":{ - "beta" : 0.6, + "MPPI": { + "beta": 0.6, "popsize": 5000, "kappa": 0.9, "noise_sigma": 0.5, }, - "MPPIWilliams":{ + "MPPIWilliams": { "popsize": 5000, "lambda": 1., "noise_sigma": 0.9, }, - "iLQR":{ + "iLQR": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "DDP":{ + }, + "DDP": { "max_iter": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "NMPC-CGMRES":{ - }, - "NMPC-Newton":{ - }, - } + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } @staticmethod def input_cost_fn(u): @@ -87,7 +88,7 @@ class CartPoleConfigModule(): shape(pop_size, pred_len, input_size) """ return (u**2) * np.diag(CartPoleConfigModule.R) - + @staticmethod def state_cost_fn(x, g_x): """ state cost function @@ -103,21 +104,21 @@ class CartPoleConfigModule(): """ if len(x.shape) > 2: - return (6. * (x[:, :, 0]**2) \ - + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \ - + 0.1 * (x[:, :, 1]**2) \ - + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] + return (6. * (x[:, :, 0]**2) + + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) + + 0.1 * (x[:, :, 1]**2) + + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] elif len(x.shape) > 1: - return (6. * (x[:, 0]**2) \ - + 12. * ((np.cos(x[:, 2]) + 1.)**2) \ - + 0.1 * (x[:, 1]**2) \ - + 0.1 * (x[:, 3]**2))[:, np.newaxis] - + return (6. * (x[:, 0]**2) + + 12. * ((np.cos(x[:, 2]) + 1.)**2) + + 0.1 * (x[:, 1]**2) + + 0.1 * (x[:, 3]**2))[:, np.newaxis] + return 6. * (x[0]**2) \ - + 12. * ((np.cos(x[2]) + 1.)**2) \ - + 0.1 * (x[1]**2) \ - + 0.1 * (x[3]**2) + + 12. * ((np.cos(x[2]) + 1.)**2) \ + + 0.1 * (x[1]**2) \ + + 0.1 * (x[3]**2) @staticmethod def terminal_state_cost_fn(terminal_x, terminal_g_x): @@ -134,49 +135,49 @@ class CartPoleConfigModule(): """ if len(terminal_x.shape) > 1: - return (6. * (terminal_x[:, 0]**2) \ - + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ - + 0.1 * (terminal_x[:, 1]**2) \ - + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ - * CartPoleConfigModule.TERMINAL_WEIGHT - - return (6. * (terminal_x[0]**2) \ - + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \ - + 0.1 * (terminal_x[1]**2) \ - + 0.1 * (terminal_x[3]**2)) \ + return (6. * (terminal_x[:, 0]**2) + + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) + + 0.1 * (terminal_x[:, 1]**2) + + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ * CartPoleConfigModule.TERMINAL_WEIGHT - + + return (6. * (terminal_x[0]**2) + + 12. * ((np.cos(terminal_x[2]) + 1.)**2) + + 0.1 * (terminal_x[1]**2) + + 0.1 * (terminal_x[3]**2)) \ + * CartPoleConfigModule.TERMINAL_WEIGHT + @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) or shape(1, state_size) """ if not terminal: - cost_dx0 = 12. * x[:, 0] + cost_dx0 = 12. * x[:, 0] cost_dx1 = 0.2 * x[:, 1] cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2]) cost_dx3 = 0.2 * x[:, 3] - cost_dx = np.stack((cost_dx0, cost_dx1,\ + cost_dx = np.stack((cost_dx0, cost_dx1, cost_dx2, cost_dx3), axis=1) return cost_dx - - cost_dx0 = 12. * x[0] + + cost_dx0 = 12. * x[0] cost_dx1 = 0.2 * x[1] cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2]) cost_dx3 = 0.2 * x[3] cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]]) - + return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: @@ -188,7 +189,7 @@ class CartPoleConfigModule(): return 2. * u * np.diag(CartPoleConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: @@ -206,27 +207,27 @@ class CartPoleConfigModule(): hessian[:, 0, 0] = 12. hessian[:, 1, 1] = 0.2 hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \ - * (-np.sin(x[:, 2])) \ - + 24. * (1. + np.cos(x[:, 2])) \ - * -np.cos(x[:, 2]) + * (-np.sin(x[:, 2])) \ + + 24. * (1. + np.cos(x[:, 2])) \ + * -np.cos(x[:, 2]) hessian[:, 3, 3] = 0.2 return hessian - + state_size = len(x) hessian = np.eye(state_size) hessian[0, 0] = 12. hessian[1, 1] = 0.2 hessian[2, 2] = 24. * -np.sin(x[2]) \ - * (-np.sin(x[2])) \ - + 24. * (1. + np.cos(x[2])) \ - * -np.cos(x[2]) + * (-np.sin(x[2])) \ + + 24. * (1. + np.cos(x[2])) \ + * -np.cos(x[2]) hessian[3, 3] = 0.2 return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: @@ -239,9 +240,9 @@ class CartPoleConfigModule(): (pred_len, _) = u.shape return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) - + @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: @@ -254,4 +255,4 @@ class CartPoleConfigModule(): (_, state_size) = x.shape (pred_len, input_size) = u.shape - return np.zeros((pred_len, input_size, state_size)) \ No newline at end of file + return np.zeros((pred_len, input_size, state_size)) diff --git a/PythonLinearNonlinearControl/configs/first_order_lag.py b/PythonLinearNonlinearControl/configs/first_order_lag.py index 2c9d268..9cf93b0 100644 --- a/PythonLinearNonlinearControl/configs/first_order_lag.py +++ b/PythonLinearNonlinearControl/configs/first_order_lag.py @@ -1,5 +1,6 @@ import numpy as np + class FirstOrderLagConfigModule(): # parameters ENV_NAME = "FirstOrderLag-v0" @@ -34,43 +35,43 @@ class FirstOrderLagConfigModule(): "num_elites": 50, "max_iters": 15, "alpha": 0.3, - "init_var":1., - "threshold":0.001 + "init_var": 1., + "threshold": 0.001 }, - "MPPI":{ - "beta" : 0.6, + "MPPI": { + "beta": 0.6, "popsize": 5000, "kappa": 0.9, "noise_sigma": 0.5, }, - "MPPIWilliams":{ + "MPPIWilliams": { "popsize": 5000, "lambda": 1., "noise_sigma": 0.9, }, - "MPC":{ - }, - "iLQR":{ - "max_iter": 500, + "MPC": { + }, + "iLQR": { + "max_iters": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "DDP":{ - "max_iter": 500, + }, + "DDP": { + "max_iters": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "NMPC-CGMRES":{ - }, - "NMPC-Newton":{ - }, - } + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } @staticmethod def input_cost_fn(u): @@ -83,7 +84,7 @@ class FirstOrderLagConfigModule(): shape(pop_size, pred_len, input_size) """ return (u**2) * np.diag(FirstOrderLagConfigModule.R) - + @staticmethod def state_cost_fn(x, g_x): """ state cost function @@ -111,47 +112,47 @@ class FirstOrderLagConfigModule(): shape(pop_size, pred_len) """ return ((terminal_x - terminal_g_x)**2) \ - * np.diag(FirstOrderLagConfigModule.Sf) - + * np.diag(FirstOrderLagConfigModule.Sf) + @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) or shape(1, state_size) """ if not terminal: return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q) - - return (2. * (x - g_x) \ - * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] + + return (2. * (x - g_x) + * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) """ return 2. * u * np.diag(FirstOrderLagConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_xx (numpy.ndarray): gradient of cost, shape(pred_len, state_size, state_size) or @@ -159,18 +160,18 @@ class FirstOrderLagConfigModule(): """ if not terminal: (pred_len, _) = x.shape - return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) - - return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) + return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) + + return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_uu (numpy.ndarray): gradient of cost, shape(pred_len, input_size, input_size) @@ -178,15 +179,15 @@ class FirstOrderLagConfigModule(): (pred_len, _) = u.shape return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) - + @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_ux (numpy.ndarray): gradient of cost , shape(pred_len, input_size, state_size) diff --git a/PythonLinearNonlinearControl/configs/make_configs.py b/PythonLinearNonlinearControl/configs/make_configs.py index a48aedc..fefcaab 100644 --- a/PythonLinearNonlinearControl/configs/make_configs.py +++ b/PythonLinearNonlinearControl/configs/make_configs.py @@ -1,6 +1,8 @@ from .first_order_lag import FirstOrderLagConfigModule -from .two_wheeled import TwoWheeledConfigModule +from .two_wheeled import TwoWheeledConfigModule, TwoWheeledExtendConfigModule from .cartpole import CartPoleConfigModule +from .nonlinear_sample_system import NonlinearSampleSystemConfigModule, NonlinearSampleSystemExtendConfigModule + def make_config(args): """ @@ -10,6 +12,12 @@ def make_config(args): if args.env == "FirstOrderLag": return FirstOrderLagConfigModule() elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": + if args.controller_type == "NMPCCGMRES": + return TwoWheeledExtendConfigModule() return TwoWheeledConfigModule() elif args.env == "CartPole": - return CartPoleConfigModule() \ No newline at end of file + return CartPoleConfigModule() + elif args.env == "NonlinearSample": + if args.controller_type == "NMPCCGMRES": + return NonlinearSampleSystemExtendConfigModule() + return NonlinearSampleSystemConfigModule() diff --git a/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py new file mode 100644 index 0000000..f4b9ff6 --- /dev/null +++ b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py @@ -0,0 +1,353 @@ +import numpy as np + + +class NonlinearSampleSystemConfigModule(): + # parameters + ENV_NAME = "NonlinearSampleSystem-v0" + PLANNER_TYPE = "Const" + TYPE = "Nonlinear" + TASK_HORIZON = 2000 + PRED_LEN = 10 + STATE_SIZE = 2 + INPUT_SIZE = 1 + DT = 0.01 + R = np.diag([1.]) + Q = None + Sf = None + # bounds + INPUT_LOWER_BOUND = np.array([-0.5]) + INPUT_UPPER_BOUND = np.array([0.5]) + + def __init__(self): + """ + """ + # opt configs + self.opt_config = { + "Random": { + "popsize": 5000 + }, + "CEM": { + "popsize": 500, + "num_elites": 50, + "max_iters": 15, + "alpha": 0.3, + "init_var": 9., + "threshold": 0.001 + }, + "MPPI": { + "beta": 0.6, + "popsize": 5000, + "kappa": 0.9, + "noise_sigma": 0.5, + }, + "MPPIWilliams": { + "popsize": 5000, + "lambda": 1., + "noise_sigma": 0.9, + }, + "iLQR": { + "max_iters": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "DDP": { + "max_iters": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "NMPC": { + "threshold": 0.01, + "max_iters": 5000, + "learning_rate": 0.01, + "optimizer_mode": "conjugate" + } + } + + @staticmethod + def input_cost_fn(u): + """ input cost functions + + Args: + u (numpy.ndarray): input, shape(pred_len, input_size) + or shape(pop_size, pred_len, input_size) + Returns: + cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or + shape(pop_size, pred_len, input_size) + """ + return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R) + + @staticmethod + def state_cost_fn(x, g_x): + """ state cost function + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + or shape(pop_size, pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + or shape(pop_size, pred_len, state_size) + Returns: + cost (numpy.ndarray): cost of state, shape(pred_len, 1) or + shape(pop_size, pred_len, 1) + """ + + if len(x.shape) > 2: + return (0.5 * (x[:, :, 0]**2) + + 0.5 * (x[:, :, 1]**2))[:, :, np.newaxis] + + elif len(x.shape) > 1: + return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis] + + return 0.5 * (x[0]**2) + 0.5 * (x[1]**2) + + @staticmethod + def terminal_state_cost_fn(terminal_x, terminal_g_x): + """ + + Args: + terminal_x (numpy.ndarray): terminal state, + shape(state_size, ) or shape(pop_size, state_size) + terminal_g_x (numpy.ndarray): terminal goal state, + shape(state_size, ) or shape(pop_size, state_size) + Returns: + cost (numpy.ndarray): cost of state, shape(pred_len, ) or + shape(pop_size, pred_len) + """ + + if len(terminal_x.shape) > 1: + return (0.5 * (terminal_x[:, 0]**2) + + 0.5 * (terminal_x[:, 1]**2))[:, np.newaxis] + + return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2) + + @staticmethod + def gradient_cost_fn_state(x, g_x, terminal=False): + """ gradient of costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + + Returns: + l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) + or shape(1, state_size) + """ + if not terminal: + cost_dx0 = x[:, 0] + cost_dx1 = x[:, 1] + cost_dx = np.stack((cost_dx0, cost_dx1), axis=1) + return cost_dx + + cost_dx0 = x[0] + cost_dx1 = x[1] + cost_dx = np.array([[cost_dx0, cost_dx1]]) + + return cost_dx + + @staticmethod + def gradient_cost_fn_input(x, u): + """ gradient of costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + Returns: + l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) + """ + return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R) + + @staticmethod + def hessian_cost_fn_state(x, g_x, terminal=False): + """ hessian costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + Returns: + l_xx (numpy.ndarray): gradient of cost, + shape(pred_len, state_size, state_size) or + shape(1, state_size, state_size) or + """ + if not terminal: + (pred_len, state_size) = x.shape + hessian = np.eye(state_size) + hessian = np.tile(hessian, (pred_len, 1, 1)) + hessian[:, 0, 0] = 1. + hessian[:, 1, 1] = 1. + + return hessian + + state_size = len(x) + hessian = np.eye(state_size) + hessian[0, 0] = 1. + hessian[1, 1] = 1. + + return hessian[np.newaxis, :, :] + + @staticmethod + def hessian_cost_fn_input(x, u): + """ hessian costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + Returns: + l_uu (numpy.ndarray): gradient of cost, + shape(pred_len, input_size, input_size) + """ + (pred_len, _) = u.shape + + return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1)) + + @staticmethod + def hessian_cost_fn_input_state(x, u): + """ hessian costs with respect to the state and input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + Returns: + l_ux (numpy.ndarray): gradient of cost , + shape(pred_len, input_size, state_size) + """ + (_, state_size) = x.shape + (pred_len, input_size) = u.shape + + return np.zeros((pred_len, input_size, state_size)) + + @staticmethod + def gradient_hamiltonian_input(x, lam, u, g_x): + """ + + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + + Returns: + F (numpy.ndarray), shape(pred_len, input_size) + """ + if len(x.shape) == 1: + input_size = u.shape[0] + F = np.zeros(input_size) + F[0] = u[0] + lam[1] + + return F + + elif len(x.shape) == 2: + pred_len, input_size = u.shape + F = np.zeros((pred_len, input_size)) + + for i in range(pred_len): + F[i, 0] = u[i, 0] + lam[i, 1] + + return F + + else: + raise NotImplementedError + + @staticmethod + def gradient_hamiltonian_state(x, lam, u, g_x): + """ + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + + Returns: + lam_dot (numpy.ndarray), shape(state_size, ) + """ + if len(lam.shape) == 1: + state_size = lam.shape[0] + lam_dot = np.zeros(state_size) + lam_dot[0] = x[0] - (2. * x[0] * x[1] + 1.) * lam[1] + lam_dot[1] = x[1] + lam[0] + \ + (-3. * (x[1]**2) - x[0]**2 + 1.) * lam[1] + + return lam_dot + + elif len(lam.shape) == 2: + pred_len, state_size = lam.shape + lam_dot = np.zeros((pred_len, state_size)) + + for i in range(pred_len): + lam_dot[i, 0] = x[i, 0] - \ + (2. * x[i, 0] * x[i, 1] + 1.) * lam[i, 1] + lam_dot[i, 1] = x[i, 1] + lam[i, 0] + \ + (-3. * (x[i, 1]**2) - x[i, 0]**2 + 1.) * lam[i, 1] + + return lam_dot + + else: + raise NotImplementedError + + +class NonlinearSampleSystemExtendConfigModule(NonlinearSampleSystemConfigModule): + def __init__(self): + super().__init__() + self.opt_config = { + "NMPCCGMRES": { + "threshold": 1e-3, + "zeta": 100., + "delta": 0.01, + "alpha": 0.5, + "tf": 1., + "constraint": True + }, + "NMPCNewton": { + "threshold": 1e-3, + "max_iteration": 500, + "learning_rate": 1e-3 + } + } + + @staticmethod + def gradient_hamiltonian_input_with_constraint(x, lam, u, g_x, dummy_u, raw): + """ + + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + dummy_u (numpy.ndarray): shape(pred_len, input_size) + raw (numpy.ndarray): shape(pred_len, input_size), Lagrangian for constraints + + Returns: + F (numpy.ndarray), shape(pred_len, 3) + """ + if len(x.shape) == 1: + vanilla_F = np.zeros(1) + extend_F = np.zeros(1) # 1 is the same as input size + extend_C = np.zeros(1) + + vanilla_F[0] = u[0] + lam[1] + 2. * raw[0] * u[0] + extend_F[0] = -0.01 + 2. * raw[0] * dummy_u[0] + extend_C[0] = u[0]**2 + dummy_u[0]**2 - \ + NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2 + + F = np.concatenate([vanilla_F, extend_F, extend_C]) + + elif len(x.shape) == 2: + pred_len, _ = u.shape + vanilla_F = np.zeros((pred_len, 1)) + extend_F = np.zeros((pred_len, 1)) # 1 is the same as input size + extend_C = np.zeros((pred_len, 1)) + + for i in range(pred_len): + vanilla_F[i, 0] = \ + u[i, 0] + lam[i, 1] + 2. * raw[i, 0] * u[i, 0] + extend_F[i, 0] = -0.01 + 2. * raw[i, 0] * dummy_u[i, 0] + extend_C[i, 0] = u[i, 0]**2 + dummy_u[i, 0]**2 - \ + NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2 + + F = np.concatenate([vanilla_F, extend_F, extend_C], axis=1) + + return F diff --git a/PythonLinearNonlinearControl/configs/nonlinear_system_sample.py b/PythonLinearNonlinearControl/configs/nonlinear_system_sample.py deleted file mode 100644 index e69de29..0000000 diff --git a/PythonLinearNonlinearControl/configs/two_wheeled.py b/PythonLinearNonlinearControl/configs/two_wheeled.py index 93de72a..070cffc 100644 --- a/PythonLinearNonlinearControl/configs/two_wheeled.py +++ b/PythonLinearNonlinearControl/configs/two_wheeled.py @@ -4,6 +4,7 @@ from matplotlib.axes import Axes from ..plotters.plot_objs import square_with_angle, square from ..common.utils import fit_angle_in_range + class TwoWheeledConfigModule(): # parameters ENV_NAME = "TwoWheeled-v0" @@ -22,10 +23,15 @@ class TwoWheeledConfigModule(): Sf = np.diag([5., 5., 1.]) """ # for track goal + """ R = np.diag([0.01, 0.01]) Q = np.diag([2.5, 2.5, 0.01]) Sf = np.diag([2.5, 2.5, 0.01]) - + """ + # for track goal to NMPC + R = np.diag([1., 1.]) + Q = np.diag([0.001, 0.001, 0.001]) + Sf = np.diag([1., 1., 0.001]) # bounds INPUT_LOWER_BOUND = np.array([-1.5, -3.14]) INPUT_UPPER_BOUND = np.array([1.5, 3.14]) @@ -46,41 +52,47 @@ class TwoWheeledConfigModule(): "num_elites": 50, "max_iters": 15, "alpha": 0.3, - "init_var":1., - "threshold":0.001 + "init_var": 1., + "threshold": 0.001 }, - "MPPI":{ - "beta" : 0.6, + "MPPI": { + "beta": 0.6, "popsize": 5000, "kappa": 0.9, "noise_sigma": 0.5, }, - "MPPIWilliams":{ + "MPPIWilliams": { "popsize": 5000, "lambda": 1, "noise_sigma": 1., }, - "iLQR":{ - "max_iter": 500, + "iLQR": { + "max_iters": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "DDP":{ - "max_iter": 500, + }, + "DDP": { + "max_iters": 500, "init_mu": 1., "mu_min": 1e-6, "mu_max": 1e10, "init_delta": 2., "threshold": 1e-6, - }, - "NMPC-CGMRES":{ - }, - "NMPC-Newton":{ - }, - } + }, + "NMPC": { + "threshold": 0.01, + "max_iters": 5000, + "learning_rate": 0.01, + "optimizer_mode": "conjugate" + }, + "NMPC-CGMRES": { + }, + "NMPC-Newton": { + }, + } @staticmethod def input_cost_fn(u): @@ -93,7 +105,7 @@ class TwoWheeledConfigModule(): shape(pop_size, pred_len, input_size) """ return (u**2) * np.diag(TwoWheeledConfigModule.R) - + @staticmethod def fit_diff_in_range(diff_x): """ fit difference state in range(angle) @@ -107,7 +119,7 @@ class TwoWheeledConfigModule(): fitted_diff_x (numpy.ndarray): same shape as diff_x """ if len(diff_x.shape) == 3: - diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1]) + diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1]) elif len(diff_x.shape) == 2: diff_x[:, -1] = fit_angle_in_range(diff_x[:, -1]) elif len(diff_x.shape) == 1: @@ -142,52 +154,52 @@ class TwoWheeledConfigModule(): cost (numpy.ndarray): cost of state, shape(pred_len, ) or shape(pop_size, pred_len) """ - terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \ - - terminal_g_x) - + terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x + - terminal_g_x) + return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf) - + @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) or shape(1, state_size) """ diff = TwoWheeledConfigModule.fit_diff_in_range(x - g_x) - + if not terminal: return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q) - - return (2. * (diff) \ - * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] + + return (2. * (diff) + * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) """ return 2. * u * np.diag(TwoWheeledConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: x (numpy.ndarray): state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size) - + Returns: l_xx (numpy.ndarray): gradient of cost, shape(pred_len, state_size, state_size) or @@ -195,18 +207,18 @@ class TwoWheeledConfigModule(): """ if not terminal: (pred_len, _) = x.shape - return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) - - return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) + return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) + + return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_uu (numpy.ndarray): gradient of cost, shape(pred_len, input_size, input_size) @@ -214,15 +226,15 @@ class TwoWheeledConfigModule(): (pred_len, _) = u.shape return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1)) - + @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: x (numpy.ndarray): state, shape(pred_len, state_size) u (numpy.ndarray): goal state, shape(pred_len, input_size) - + Returns: l_ux (numpy.ndarray): gradient of cost , shape(pred_len, input_size, state_size) @@ -230,4 +242,167 @@ class TwoWheeledConfigModule(): (_, state_size) = x.shape (pred_len, input_size) = u.shape - return np.zeros((pred_len, input_size, state_size)) \ No newline at end of file + return np.zeros((pred_len, input_size, state_size)) + + @staticmethod + def gradient_hamiltonian_input(x, lam, u, g_x): + """ + + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + + Returns: + F (numpy.ndarray), shape(pred_len, input_size) + """ + if len(x.shape) == 1: + input_size = u.shape[0] + F = np.zeros(input_size) + F[0] = u[0] * TwoWheeledConfigModule.R[0, 0] + \ + lam[0] * np.cos(x[2]) + lam[1] * np.sin(x[2]) + F[1] = u[1] * TwoWheeledConfigModule.R[1, 1] + lam[2] + + return F + + elif len(x.shape) == 2: + pred_len, input_size = u.shape + F = np.zeros((pred_len, input_size)) + + for i in range(pred_len): + F[i, 0] = u[i, 0] * TwoWheeledConfigModule.R[0, 0] + \ + lam[i, 0] * np.cos(x[i, 2]) + lam[i, 1] * np.sin(x[i, 2]) + F[i, 1] = u[i, 1] * TwoWheeledConfigModule.R[1, 1] + lam[i, 2] + + return F + else: + raise NotImplementedError + + @staticmethod + def gradient_hamiltonian_state(x, lam, u, g_x): + """ + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + + Returns: + lam_dot (numpy.ndarray), shape(state_size, ) + """ + if len(lam.shape) == 1: + state_size = lam.shape[0] + lam_dot = np.zeros(state_size) + lam_dot[0] = \ + (x[0] - g_x[0]) * TwoWheeledConfigModule.Q[0, 0] + lam_dot[1] = \ + (x[1] - g_x[1]) * TwoWheeledConfigModule.Q[1, 1] + + relative_angle = fit_angle_in_range(x[2] - g_x[2]) + lam_dot[2] = \ + relative_angle * TwoWheeledConfigModule.Q[2, 2] \ + - lam[0] * u[0] * np.sin(x[2]) \ + + lam[1] * u[0] * np.cos(x[2]) + + return lam_dot + + elif len(lam.shape) == 2: + pred_len, state_size = lam.shape + lam_dot = np.zeros((pred_len, state_size)) + + for i in range(pred_len): + lam_dot[i, 0] = \ + (x[i, 0] - g_x[i, 0]) * TwoWheeledConfigModule.Q[0, 0] + lam_dot[i, 1] = \ + (x[i, 1] - g_x[i, 1]) * TwoWheeledConfigModule.Q[1, 1] + + relative_angle = fit_angle_in_range(x[i, 2] - g_x[i, 2]) + lam_dot[i, 2] = \ + relative_angle * TwoWheeledConfigModule.Q[2, 2] \ + - lam[i, 0] * u[i, 0] * np.sin(x[i, 2]) \ + + lam[i, 1] * u[i, 0] * np.cos(x[i, 2]) + + return lam_dot + else: + raise NotImplementedError + + +class TwoWheeledExtendConfigModule(TwoWheeledConfigModule): + PRED_LEN = 20 + + def __init__(self): + super().__init__() + self.opt_config = { + "NMPCCGMRES": { + "threshold": 1e-3, + "zeta": 5., + "delta": 0.01, + "alpha": 0.5, + "tf": 1., + "constraint": True + }, + "NMPCNewton": { + "threshold": 1e-3, + "max_iteration": 500, + "learning_rate": 1e-3 + } + } + + @staticmethod + def gradient_hamiltonian_input_with_constraint(x, lam, u, g_x, dummy_u, raw): + """ + + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + dummy_u (numpy.ndarray): shape(pred_len, input_size) + raw (numpy.ndarray): shape(pred_len, input_size), Lagrangian for constraints + + Returns: + F (numpy.ndarray), shape(pred_len, 3) + """ + if len(x.shape) == 1: + vanilla_F = np.zeros(2) + extend_F = np.zeros(2) # 1 is the same as input size + extend_C = np.zeros(2) + + vanilla_F[0] = u[0] + lam[0] * \ + np.cos(x[2]) + lam[1] * np.sin(x[2]) + 2. * raw[0] * u[0] + vanilla_F[1] = u[1] + lam[2] + 2 * raw[1] * u[1] + + extend_F[0] = -0.01 + 2. * raw[0] * dummy_u[0] + extend_F[1] = -0.01 + 2. * raw[1] * dummy_u[1] + + extend_C[0] = u[0]**2 + dummy_u[0]**2 - \ + TwoWheeledConfigModule.INPUT_LOWER_BOUND[0]**2 + extend_C[1] = u[1]**2 + dummy_u[1]**2 - \ + TwoWheeledConfigModule.INPUT_LOWER_BOUND[1]**2 + + F = np.concatenate([vanilla_F, extend_F, extend_C]) + + elif len(x.shape) == 2: + pred_len, _ = u.shape + vanilla_F = np.zeros((pred_len, 2)) + extend_F = np.zeros((pred_len, 2)) # 1 is the same as input size + extend_C = np.zeros((pred_len, 2)) + + for i in range(pred_len): + vanilla_F[i, 0] = u[i, 0] + lam[i, 0] * \ + np.cos(x[i, 2]) + lam[i, 1] * \ + np.sin(x[i, 2]) + 2. * raw[i, 0] * u[i, 0] + vanilla_F[i, 1] = u[i, 1] + lam[i, 2] + 2 * raw[i, 1] * u[i, 1] + + extend_F[i, 0] = -0.01 + 2. * raw[i, 0] * dummy_u[i, 0] + extend_F[i, 1] = -0.01 + 2. * raw[i, 1] * dummy_u[i, 1] + + extend_C[i, 0] = u[i, 0]**2 + dummy_u[i, 0]**2 - \ + TwoWheeledConfigModule.INPUT_LOWER_BOUND[0]**2 + extend_C[i, 1] = u[i, 1]**2 + dummy_u[i, 1]**2 - \ + TwoWheeledConfigModule.INPUT_LOWER_BOUND[1]**2 + + F = np.concatenate([vanilla_F, extend_F, extend_C], axis=1) + + return F diff --git a/PythonLinearNonlinearControl/controllers/cem.py b/PythonLinearNonlinearControl/controllers/cem.py index 238ff39..176607e 100644 --- a/PythonLinearNonlinearControl/controllers/cem.py +++ b/PythonLinearNonlinearControl/controllers/cem.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class CEM(Controller): """ Cross Entropy Method for linear and nonlinear method @@ -19,6 +20,7 @@ class CEM(Controller): using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765). """ + def __init__(self, config, model): super(CEM, self).__init__(config, model) @@ -38,7 +40,7 @@ class CEM(Controller): self.init_var = config.opt_config["CEM"]["init_var"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.pred_len) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -50,18 +52,18 @@ class CEM(Controller): self.input_cost_fn = config.input_cost_fn # init mean - self.init_mean = np.tile((config.INPUT_UPPER_BOUND \ + self.init_mean = np.tile((config.INPUT_UPPER_BOUND + config.INPUT_LOWER_BOUND) / 2., self.pred_len) self.prev_sol = self.init_mean.copy() # init variance var = np.ones_like(config.INPUT_UPPER_BOUND) \ - * config.opt_config["CEM"]["init_var"] + * config.opt_config["CEM"]["init_var"] self.init_var = np.tile(var, self.pred_len) # save self.history_u = [] - + def clear_sol(self): """ clear prev sol """ @@ -77,21 +79,21 @@ class CEM(Controller): Returns: opt_input (numpy.ndarray): optimal input, shape(input_size, ) """ - # initialize + # initialize opt_count = 0 # get configuration - mean = self.prev_sol.flatten().copy() + mean = self.prev_sol.flatten().copy() var = self.init_var.flatten().copy() - # make distribution + # make distribution X = stats.truncnorm(-1, 1, - loc=np.zeros_like(mean),\ + loc=np.zeros_like(mean), scale=np.ones_like(mean)) - + while (opt_count < self.max_iters) and np.max(var) > self.epsilon: # constrained - lb_dist = mean - self.input_lower_bounds + lb_dist = mean - self.input_lower_bounds ub_dist = self.input_upper_bounds - mean constrained_var = np.minimum(np.minimum(np.square(lb_dist), np.square(ub_dist)), @@ -99,15 +101,15 @@ class CEM(Controller): # sample samples = X.rvs(size=[self.pop_size, self.opt_dim]) \ - * np.sqrt(constrained_var) \ - + mean + * np.sqrt(constrained_var) \ + + mean # calc cost # samples.shape = (pop_size, opt_dim) costs = self.calc_cost(curr_x, samples.reshape(self.pop_size, - self.pred_len, - self.input_size), + self.pred_len, + self.input_size), g_xs) # sort cost @@ -124,7 +126,7 @@ class CEM(Controller): logger.debug("Var = {}".format(np.max(var))) logger.debug("Costs = {}".format(np.mean(costs))) opt_count += 1 - + sol = mean.copy() self.prev_sol = np.concatenate((mean[self.input_size:], np.zeros(self.input_size))) diff --git a/PythonLinearNonlinearControl/controllers/controller.py b/PythonLinearNonlinearControl/controllers/controller.py index 50abb01..9077912 100644 --- a/PythonLinearNonlinearControl/controllers/controller.py +++ b/PythonLinearNonlinearControl/controllers/controller.py @@ -2,9 +2,11 @@ import numpy as np from ..envs.cost import calc_cost + class Controller(): """ Controller class """ + def __init__(self, config, model): """ """ @@ -15,7 +17,7 @@ class Controller(): self.state_cost_fn = config.state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn self.input_cost_fn = config.input_cost_fn - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs Args: @@ -26,7 +28,7 @@ class Controller(): """ raise NotImplementedError("Implement the algorithm to \ get optimal input") - + def calc_cost(self, curr_x, samples, g_xs): """ calculate the cost of input samples @@ -46,22 +48,10 @@ class Controller(): # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) pred_xs = self.model.predict_traj(curr_x, samples) - + # get particle cost costs = calc_cost(pred_xs, samples, g_xs, - self.state_cost_fn, self.input_cost_fn, \ + self.state_cost_fn, self.input_cost_fn, self.terminal_state_cost_fn) - + return costs - - @staticmethod - def gradient_hamiltonian_x(x, u, lam): - """ gradient of hamitonian with respect to the state, - """ - raise NotImplementedError("Implement gradient of hamitonian with respect to the state") - - @staticmethod - def gradient_hamiltonian_u(x, u, lam): - """ gradient of hamitonian with respect to the input - """ - raise NotImplementedError("Implement gradient of hamitonian with respect to the input") \ No newline at end of file diff --git a/PythonLinearNonlinearControl/controllers/ddp.py b/PythonLinearNonlinearControl/controllers/ddp.py index 4abb229..d4ece0f 100644 --- a/PythonLinearNonlinearControl/controllers/ddp.py +++ b/PythonLinearNonlinearControl/controllers/ddp.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class DDP(Controller): """ Differential Dynamic Programming @@ -18,11 +19,12 @@ class DDP(Controller): https://github.com/studywolf/control, and https://github.com/anassinator/ilqr """ + def __init__(self, config, model): """ """ super(DDP, self).__init__(config, model) - + # model self.model = model @@ -30,15 +32,15 @@ class DDP(Controller): self.state_cost_fn = config.state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn self.input_cost_fn = config.input_cost_fn - self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state - self.gradient_cost_fn_with_input = config.gradient_cost_fn_with_input - self.hessian_cost_fn_with_state = config.hessian_cost_fn_with_state - self.hessian_cost_fn_with_input = config.hessian_cost_fn_with_input - self.hessian_cost_fn_with_input_state = \ - config.hessian_cost_fn_with_input_state + self.gradient_cost_fn_state = config.gradient_cost_fn_state + self.gradient_cost_fn_input = config.gradient_cost_fn_input + self.hessian_cost_fn_state = config.hessian_cost_fn_state + self.hessian_cost_fn_input = config.hessian_cost_fn_input + self.hessian_cost_fn_input_state = \ + config.hessian_cost_fn_input_state # controller parameters - self.max_iter = config.opt_config["DDP"]["max_iter"] + self.max_iters = config.opt_config["DDP"]["max_iters"] self.init_mu = config.opt_config["DDP"]["init_mu"] self.mu = self.init_mu self.mu_min = config.opt_config["DDP"]["mu_min"] @@ -56,7 +58,7 @@ class DDP(Controller): self.Q = config.Q self.R = config.R self.Sf = config.Sf - + # initialize self.prev_sol = np.zeros((self.pred_len, self.input_size)) @@ -65,7 +67,7 @@ class DDP(Controller): """ logger.debug("Clear Sol") self.prev_sol = np.zeros((self.pred_len, self.input_size)) - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs @@ -86,29 +88,29 @@ class DDP(Controller): # line search param alphas = 1.1**(-np.arange(10)**2) - while opt_count < self.max_iter: + while opt_count < self.max_iters: accepted_sol = False - # forward + # forward if update_sol == True: pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\ - l_x, l_xx, l_u, l_uu, l_ux = \ + l_x, l_xx, l_u, l_uu, l_ux = \ self.forward(curr_x, g_xs, sol) update_sol = False - + try: # backward - k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \ + k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux) # line search for alpha in alphas: new_pred_xs, new_sol = \ self.calc_input(k, K, pred_xs, sol, alpha) - + new_cost = calc_cost(new_pred_xs[np.newaxis, :, :], new_sol[np.newaxis, :, :], - g_xs[np.newaxis, :, :], + g_xs[np.newaxis, :, :], self.state_cost_fn, self.input_cost_fn, self.terminal_state_cost_fn) @@ -131,15 +133,15 @@ class DDP(Controller): # accept the solution accepted_sol = True break - + except np.linalg.LinAlgError as e: logger.debug("Non ans : {}".format(e)) - + if not accepted_sol: # increase regularization term. self.delta = max(1.0, self.delta) * self.init_delta self.mu = max(self.mu_min, self.mu * self.delta) - logger.debug("Update regularization term to {}"\ + logger.debug("Update regularization term to {}" .format(self.mu)) if self.mu >= self.mu_max: logger.debug("Reach Max regularization term") @@ -156,7 +158,7 @@ class DDP(Controller): self.prev_sol[-1] = sol[-1] # last use the terminal input return sol[0] - + def calc_input(self, k, K, pred_xs, sol, alpha): """ calc input trajectory by using k and K @@ -183,8 +185,8 @@ class DDP(Controller): for t in range(pred_len): new_sol[t] = sol[t] \ - + alpha * k[t] \ - + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + + alpha * k[t] \ + + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_sol[t]) @@ -227,7 +229,7 @@ class DDP(Controller): g_xs) # calc gradinet in batch - f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) + f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt) # calc hessian in batch f_xx = self.model.calc_f_xx(pred_xs[:-1], sol, self.dt) @@ -237,13 +239,13 @@ class DDP(Controller): # gradint of costs l_x, l_xx, l_u, l_uu, l_ux = \ self._calc_gradient_hessian_cost(pred_xs, g_xs, sol) - + return pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu, \ l_x, l_xx, l_u, l_uu, l_ux def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol): """ calculate gradient and hessian of model and cost fn - + Args: pred_xs (numpy.ndarray): predict traj, shape(pred_len+1, state_size) @@ -262,31 +264,31 @@ class DDP(Controller): shape(pred_len, input_size, state_size) """ # l_x.shape = (pred_len+1, state_size) - l_x = self.gradient_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_x = self.gradient_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_x = \ - self.gradient_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.gradient_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) - l_x = np.concatenate((l_x, terminal_l_x), axis=0) + l_x = np.concatenate((l_x, terminal_l_x), axis=0) # l_u.shape = (pred_len, input_size) - l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) + l_u = self.gradient_cost_fn_input(pred_xs[:-1], sol) # l_xx.shape = (pred_len+1, state_size, state_size) - l_xx = self.hessian_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_xx = self.hessian_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_xx = \ - self.hessian_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.hessian_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) - + # l_uu.shape = (pred_len, input_size, input_size) - l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) + l_uu = self.hessian_cost_fn_input(pred_xs[:-1], sol) # l_ux.shape = (pred_len, input_size, state_size) - l_ux = self.hessian_cost_fn_with_input_state(pred_xs[:-1], sol) + l_ux = self.hessian_cost_fn_input_state(pred_xs[:-1], sol) return l_x, l_xx, l_u, l_uu, l_ux @@ -321,7 +323,7 @@ class DDP(Controller): # get size (_, state_size, _) = f_x.shape - # initialzie + # initialzie V_x = l_x[-1] V_xx = l_xx[-1] k = np.zeros((self.pred_len, self.input_size)) @@ -388,7 +390,7 @@ class DDP(Controller): """ # get size state_size = len(l_x) - + Q_x = l_x + np.dot(f_x.T, V_x) Q_u = l_u + np.dot(f_u.T, V_x) Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x) @@ -402,4 +404,4 @@ class DDP(Controller): Q_ux += np.tensordot(V_x, f_ux, axes=1) Q_uu += np.tensordot(V_x, f_uu, axes=1) - return Q_x, Q_u, Q_xx, Q_ux, Q_uu \ No newline at end of file + return Q_x, Q_u, Q_xx, Q_ux, Q_uu diff --git a/PythonLinearNonlinearControl/controllers/ilqr.py b/PythonLinearNonlinearControl/controllers/ilqr.py index 9a80dfa..d7bb34c 100644 --- a/PythonLinearNonlinearControl/controllers/ilqr.py +++ b/PythonLinearNonlinearControl/controllers/ilqr.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class iLQR(Controller): """ iterative Liner Quadratique Regulator @@ -16,11 +17,12 @@ class iLQR(Controller): Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf, https://github.com/studywolf/control """ + def __init__(self, config, model): """ """ super(iLQR, self).__init__(config, model) - + # model self.model = model @@ -28,15 +30,15 @@ class iLQR(Controller): self.state_cost_fn = config.state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn self.input_cost_fn = config.input_cost_fn - self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state - self.gradient_cost_fn_with_input = config.gradient_cost_fn_with_input - self.hessian_cost_fn_with_state = config.hessian_cost_fn_with_state - self.hessian_cost_fn_with_input = config.hessian_cost_fn_with_input - self.hessian_cost_fn_with_input_state = \ - config.hessian_cost_fn_with_input_state + self.gradient_cost_fn_state = config.gradient_cost_fn_state + self.gradient_cost_fn_input = config.gradient_cost_fn_input + self.hessian_cost_fn_state = config.hessian_cost_fn_state + self.hessian_cost_fn_input = config.hessian_cost_fn_input + self.hessian_cost_fn_input_state = \ + config.hessian_cost_fn_input_state # controller parameters - self.max_iter = config.opt_config["iLQR"]["max_iter"] + self.max_iters = config.opt_config["iLQR"]["max_iters"] self.init_mu = config.opt_config["iLQR"]["init_mu"] self.mu = self.init_mu self.mu_min = config.opt_config["iLQR"]["mu_min"] @@ -58,7 +60,7 @@ class iLQR(Controller): """ logger.debug("Clear Sol") self.prev_sol = np.zeros((self.pred_len, self.input_size)) - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs @@ -79,15 +81,15 @@ class iLQR(Controller): # line search param alphas = 1.1**(-np.arange(10)**2) - while opt_count < self.max_iter: + while opt_count < self.max_iters: accepted_sol = False - # forward + # forward if update_sol == True: pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux = \ self.forward(curr_x, g_xs, sol) update_sol = False - + try: # backward k, K = self.backward(f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux) @@ -96,10 +98,10 @@ class iLQR(Controller): for alpha in alphas: new_pred_xs, new_sol = \ self.calc_input(k, K, pred_xs, sol, alpha) - + new_cost = calc_cost(new_pred_xs[np.newaxis, :, :], new_sol[np.newaxis, :, :], - g_xs[np.newaxis, :, :], + g_xs[np.newaxis, :, :], self.state_cost_fn, self.input_cost_fn, self.terminal_state_cost_fn) @@ -122,15 +124,15 @@ class iLQR(Controller): # accept the solution accepted_sol = True break - + except np.linalg.LinAlgError as e: logger.debug("Non ans : {}".format(e)) - + if not accepted_sol: # increase regularization term. self.delta = max(1.0, self.delta) * self.init_delta self.mu = max(self.mu_min, self.mu * self.delta) - logger.debug("Update regularization term to {}"\ + logger.debug("Update regularization term to {}" .format(self.mu)) if self.mu >= self.mu_max: logger.debug("Reach Max regularization term") @@ -147,7 +149,7 @@ class iLQR(Controller): self.prev_sol[-1] = sol[-1] # last use the terminal input return sol[0] - + def calc_input(self, k, K, pred_xs, sol, alpha): """ calc input trajectory by using k and K @@ -174,8 +176,8 @@ class iLQR(Controller): for t in range(pred_len): new_sol[t] = sol[t] \ - + alpha * k[t] \ - + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + + alpha * k[t] \ + + np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_sol[t]) @@ -212,18 +214,18 @@ class iLQR(Controller): g_xs) # calc gradinet in batch - f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) + f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt) # gradint of costs l_x, l_xx, l_u, l_uu, l_ux = \ self._calc_gradient_hessian_cost(pred_xs, g_xs, sol) - + return pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol): """ calculate gradient and hessian of model and cost fn - + Args: pred_xs (numpy.ndarray): predict traj, shape(pred_len+1, state_size) @@ -242,31 +244,31 @@ class iLQR(Controller): shape(pred_len, input_size, state_size) """ # l_x.shape = (pred_len+1, state_size) - l_x = self.gradient_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_x = self.gradient_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_x = \ - self.gradient_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.gradient_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) - l_x = np.concatenate((l_x, terminal_l_x), axis=0) + l_x = np.concatenate((l_x, terminal_l_x), axis=0) # l_u.shape = (pred_len, input_size) - l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) + l_u = self.gradient_cost_fn_input(pred_xs[:-1], sol) # l_xx.shape = (pred_len+1, state_size, state_size) - l_xx = self.hessian_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_xx = self.hessian_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_xx = \ - self.hessian_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.hessian_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) - + # l_uu.shape = (pred_len, input_size, input_size) - l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) + l_uu = self.hessian_cost_fn_input(pred_xs[:-1], sol) # l_ux.shape = (pred_len, input_size, state_size) - l_ux = self.hessian_cost_fn_with_input_state(pred_xs[:-1], sol) + l_ux = self.hessian_cost_fn_input_state(pred_xs[:-1], sol) return l_x, l_xx, l_u, l_uu, l_ux @@ -287,7 +289,7 @@ class iLQR(Controller): shape(pred_len, input_size, input_size) l_ux (numpy.ndarray): hessian of cost with respect to state and input, shape(pred_len, input_size, state_size) - + Returns: k (numpy.ndarray): gain, shape(pred_len, input_size) K (numpy.ndarray): gain, shape(pred_len, input_size, state_size) @@ -295,7 +297,7 @@ class iLQR(Controller): # get size (_, state_size, _) = f_x.shape - # initialzie + # initialzie V_x = l_x[-1] V_xx = l_xx[-1] k = np.zeros((self.pred_len, self.input_size)) @@ -352,7 +354,7 @@ class iLQR(Controller): """ # get size state_size = len(l_x) - + Q_x = l_x + np.dot(f_x.T, V_x) Q_u = l_u + np.dot(f_u.T, V_x) Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x) @@ -361,4 +363,4 @@ class iLQR(Controller): Q_ux = l_ux + np.dot(np.dot(f_u.T, (V_xx + reg)), f_x) Q_uu = l_uu + np.dot(np.dot(f_u.T, (V_xx + reg)), f_u) - return Q_x, Q_u, Q_xx, Q_ux, Q_uu \ No newline at end of file + return Q_x, Q_u, Q_xx, Q_ux, Q_uu diff --git a/PythonLinearNonlinearControl/controllers/make_controllers.py b/PythonLinearNonlinearControl/controllers/make_controllers.py index d5ec8df..194cbf7 100644 --- a/PythonLinearNonlinearControl/controllers/make_controllers.py +++ b/PythonLinearNonlinearControl/controllers/make_controllers.py @@ -5,6 +5,9 @@ from .mppi import MPPI from .mppi_williams import MPPIWilliams from .ilqr import iLQR from .ddp import DDP +from .nmpc import NMPC +from .nmpc_cgmres import NMPCCGMRES + def make_controller(args, config, model): @@ -22,5 +25,9 @@ def make_controller(args, config, model): return iLQR(config, model) elif args.controller_type == "DDP": return DDP(config, model) - - raise ValueError("No controller: {}".format(args.controller_type)) \ No newline at end of file + elif args.controller_type == "NMPC": + return NMPC(config, model) + elif args.controller_type == "NMPCCGMRES": + return NMPCCGMRES(config, model) + + raise ValueError("No controller: {}".format(args.controller_type)) diff --git a/PythonLinearNonlinearControl/controllers/mpc.py b/PythonLinearNonlinearControl/controllers/mpc.py index ddf5840..c1e0db3 100644 --- a/PythonLinearNonlinearControl/controllers/mpc.py +++ b/PythonLinearNonlinearControl/controllers/mpc.py @@ -9,6 +9,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class LinearMPC(Controller): """ Model Predictive Controller for linear model @@ -21,6 +22,7 @@ class LinearMPC(Controller): Ref: Maciejowski, J. M. (2002). Predictive control: with constraints. """ + def __init__(self, config, model): """ Args: @@ -55,7 +57,7 @@ class LinearMPC(Controller): self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND self.input_lower_bound = config.INPUT_LOWER_BOUND self.input_upper_bound = config.INPUT_UPPER_BOUND - + # setup controllers self.W = None self.omega = None @@ -66,7 +68,7 @@ class LinearMPC(Controller): # history self.history_u = [np.zeros(self.input_size)] - + def setup(self): """ setup Model Predictive Control as a quadratic programming @@ -77,11 +79,11 @@ class LinearMPC(Controller): for _ in range(self.pred_len - 1): temp_mat = np.matmul(A_factorials[-1], self.A) self.phi_mat = np.vstack((self.phi_mat, temp_mat)) - A_factorials.append(temp_mat) # after we use this factorials - + A_factorials.append(temp_mat) # after we use this factorials + self.gamma_mat = self.B.copy() gammma_mat_temp = self.B.copy() - + for i in range(self.pred_len - 1): temp_1_mat = np.matmul(A_factorials[i], self.B) gammma_mat_temp = temp_1_mat + gammma_mat_temp @@ -91,8 +93,8 @@ class LinearMPC(Controller): for i in range(self.pred_len - 1): temp_mat = np.zeros_like(self.gamma_mat) - temp_mat[int((i + 1)*self.state_size): , :] =\ - self.gamma_mat[:-int((i + 1)*self.state_size) , :] + temp_mat[int((i + 1)*self.state_size):, :] =\ + self.gamma_mat[:-int((i + 1)*self.state_size), :] self.theta_mat = np.hstack((self.theta_mat, temp_mat)) @@ -114,12 +116,12 @@ class LinearMPC(Controller): for i in range(self.pred_len - 1): for j in range(self.input_size): - temp_F[j * 2: (j + 1) * 2,\ + temp_F[j * 2: (j + 1) * 2, ((i+1) * self.input_size) + j] = np.array([1., -1.]) self.F = np.vstack((self.F, temp_F)) self.F1 = self.F[:, :self.input_size] - + temp_f = [] for i in range(self.input_size): temp_f.append(-1 * self.input_upper_bound[i]) @@ -168,7 +170,7 @@ class LinearMPC(Controller): H = H * 0.5 # constraints - A = [] + A = [] b = [] if self.W is not None: @@ -187,14 +189,14 @@ class LinearMPC(Controller): # using cvxopt def optimized_func(dt_us): - return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \ + return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) - np.dot(G.T, dt_us.reshape(-1, 1)))[0] # constraint lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons cons = LinearConstraint(A, lb, ub) # solve - opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\ + opt_sol = minimize(optimized_func, self.prev_sol.flatten(), constraints=[cons]) opt_dt_us = opt_sol.x @@ -213,21 +215,21 @@ class LinearMPC(Controller): """ # to dt form - opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\ + opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len, self.input_size), axis=0) self.prev_sol = opt_dt_u_seq.copy() - + opt_u_seq = opt_dt_u_seq + self.history_u[-1] - + # save self.history_u.append(opt_u_seq[0]) # check costs costs = self.calc_cost(curr_x, opt_u_seq.reshape(1, - self.pred_len, - self.input_size), + self.pred_len, + self.input_size), g_xs) logger.debug("Cost = {}".format(costs)) @@ -235,4 +237,4 @@ class LinearMPC(Controller): return opt_u_seq[0] def __str__(self): - return "LinearMPC" \ No newline at end of file + return "LinearMPC" diff --git a/PythonLinearNonlinearControl/controllers/mppi.py b/PythonLinearNonlinearControl/controllers/mppi.py index fc8d887..3b0be45 100644 --- a/PythonLinearNonlinearControl/controllers/mppi.py +++ b/PythonLinearNonlinearControl/controllers/mppi.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class MPPI(Controller): """ Model Predictive Path Integral for linear and nonlinear method @@ -18,6 +19,7 @@ class MPPI(Controller): Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652. """ + def __init__(self, config, model): super(MPPI, self).__init__(config, model) @@ -35,7 +37,7 @@ class MPPI(Controller): self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, (self.pred_len, 1)) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -47,14 +49,14 @@ class MPPI(Controller): self.input_cost_fn = config.input_cost_fn # init mean - self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ + self.prev_sol = np.tile((config.INPUT_UPPER_BOUND + config.INPUT_LOWER_BOUND) / 2., self.pred_len) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) # save self.history_u = [np.zeros(self.input_size)] - + def clear_sol(self): """ clear prev sol """ @@ -74,24 +76,24 @@ class MPPI(Controller): """ # get noised inputs noise = np.random.normal( - loc=0, scale=1.0, size=(self.pop_size, self.pred_len, - self.input_size)) * self.noise_sigma + loc=0, scale=1.0, size=(self.pop_size, self.pred_len, + self.input_size)) * self.noise_sigma noised_inputs = noise.copy() - + for t in range(self.pred_len): if t > 0: noised_inputs[:, t, :] = self.beta \ - * (self.prev_sol[t, :] \ - + noise[:, t, :]) \ - + (1 - self.beta) \ - * noised_inputs[:, t-1, :] + * (self.prev_sol[t, :] + + noise[:, t, :]) \ + + (1 - self.beta) \ + * noised_inputs[:, t-1, :] else: noised_inputs[:, t, :] = self.beta \ - * (self.prev_sol[t, :] \ - + noise[:, t, :]) \ - + (1 - self.beta) \ - * self.history_u[-1] - + * (self.prev_sol[t, :] + + noise[:, t, :]) \ + + (1 - self.beta) \ + * self.history_u[-1] + # clip actions noised_inputs = np.clip( noised_inputs, self.input_lower_bounds, self.input_upper_bounds) @@ -108,7 +110,7 @@ class MPPI(Controller): # weight actions weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \ - * noised_inputs + * noised_inputs sol = np.sum(weighted_inputs, 0) / denom # update @@ -121,4 +123,4 @@ class MPPI(Controller): return sol[0] def __str__(self): - return "MPPI" \ No newline at end of file + return "MPPI" diff --git a/PythonLinearNonlinearControl/controllers/mppi_williams.py b/PythonLinearNonlinearControl/controllers/mppi_williams.py index 1fd0102..3ffaaa1 100644 --- a/PythonLinearNonlinearControl/controllers/mppi_williams.py +++ b/PythonLinearNonlinearControl/controllers/mppi_williams.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class MPPIWilliams(Controller): """ Model Predictive Path Integral for linear and nonlinear method @@ -19,6 +20,7 @@ class MPPIWilliams(Controller): 2017 IEEE International Conference on Robotics and Automation (ICRA), Singapore, 2017, pp. 1714-1721. """ + def __init__(self, config, model): super(MPPIWilliams, self).__init__(config, model) @@ -35,7 +37,7 @@ class MPPIWilliams(Controller): self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, (self.pred_len, 1)) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -47,14 +49,14 @@ class MPPIWilliams(Controller): self.input_cost_fn = config.input_cost_fn # init mean - self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ + self.prev_sol = np.tile((config.INPUT_UPPER_BOUND + config.INPUT_LOWER_BOUND) / 2., self.pred_len) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) # save self.history_u = [np.zeros(self.input_size)] - + def clear_sol(self): """ clear prev sol """ @@ -62,7 +64,7 @@ class MPPIWilliams(Controller): self.prev_sol = \ (self.input_upper_bounds + self.input_lower_bounds) / 2. self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) - + def calc_cost(self, curr_x, samples, g_xs): """ calculate the cost of input samples by using MPPI's eq @@ -82,12 +84,12 @@ class MPPIWilliams(Controller): # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) pred_xs = self.model.predict_traj(curr_x, samples) - + # get particle cost costs = calc_cost(pred_xs, samples, g_xs, - self.state_cost_fn, None, \ + self.state_cost_fn, None, self.terminal_state_cost_fn) - + return costs def obtain_sol(self, curr_x, g_xs): @@ -101,9 +103,9 @@ class MPPIWilliams(Controller): """ # get noised inputs noise = np.random.normal( - loc=0, scale=1.0, size=(self.pop_size, self.pred_len, - self.input_size)) * self.noise_sigma - + loc=0, scale=1.0, size=(self.pop_size, self.pred_len, + self.input_size)) * self.noise_sigma + noised_inputs = self.prev_sol + noise # clip actions @@ -120,7 +122,7 @@ class MPPIWilliams(Controller): # mppi update beta = np.min(costs) eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \ - + 1e-10 + + 1e-10 # weight # eta.shape = (pred_len, input_size) @@ -128,7 +130,7 @@ class MPPIWilliams(Controller): # update inputs sol = self.prev_sol \ - + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) + + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) # update self.prev_sol[:-1] = sol[1:] @@ -140,4 +142,4 @@ class MPPIWilliams(Controller): return sol[0] def __str__(self): - return "MPPIWilliams" \ No newline at end of file + return "MPPIWilliams" diff --git a/PythonLinearNonlinearControl/controllers/nmpc.py b/PythonLinearNonlinearControl/controllers/nmpc.py new file mode 100644 index 0000000..e7ea44d --- /dev/null +++ b/PythonLinearNonlinearControl/controllers/nmpc.py @@ -0,0 +1,109 @@ +from logging import getLogger + +import numpy as np +import scipy.stats as stats + +from .controller import Controller +from ..envs.cost import calc_cost +from ..common.utils import line_search + +logger = getLogger(__name__) + + +class NMPC(Controller): + def __init__(self, config, model): + """ Nonlinear Model Predictive Control using pure gradient algorithm + """ + super(NMPC, self).__init__(config, model) + + # model + self.model = model + + # get cost func + self.state_cost_fn = config.state_cost_fn + self.terminal_state_cost_fn = config.terminal_state_cost_fn + self.input_cost_fn = config.input_cost_fn + + # controller parameters + self.threshold = config.opt_config["NMPC"]["threshold"] + self.max_iters = config.opt_config["NMPC"]["max_iters"] + self.learning_rate = config.opt_config["NMPC"]["learning_rate"] + self.optimizer_mode = config.opt_config["NMPC"]["optimizer_mode"] + + # general parameters + self.pred_len = config.PRED_LEN + self.input_size = config.INPUT_SIZE + self.dt = config.DT + + # initialize + self.prev_sol = np.zeros((self.pred_len, self.input_size)) + + def obtain_sol(self, curr_x, g_xs): + """ calculate the optimal inputs + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) + Returns: + opt_input (numpy.ndarray): optimal input, shape(input_size, ) + """ + sol = self.prev_sol.copy() + count = 0 + # use for Conjugate method + conjugate_d = None + conjugate_prev_d = None + conjugate_s = None + conjugate_beta = None + + while True: + # shape(pred_len+1, state_size) + pred_xs = self.model.predict_traj(curr_x, sol) + # shape(pred_len, state_size) + pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs) + + F_hat = self.config.gradient_hamiltonian_input( + pred_xs, pred_lams, sol, g_xs) + + if np.linalg.norm(F_hat) < self.threshold: + break + + if count > self.max_iters: + logger.debug(" break max iteartion at F : `{}".format( + np.linalg.norm(F_hat))) + break + + if self.optimizer_mode == "conjugate": + conjugate_d = F_hat.flatten() + + if conjugate_prev_d is None: # initial + conjugate_s = conjugate_d + conjugate_prev_d = conjugate_d + F_hat = conjugate_s.reshape(F_hat.shape) + else: + prev_d = np.dot(conjugate_prev_d, conjugate_prev_d) + d = np.dot(conjugate_d, conjugate_d - conjugate_prev_d) + conjugate_beta = (d + 1e-6) / (prev_d + 1e-6) + + conjugate_s = conjugate_d + conjugate_beta * conjugate_s + conjugate_prev_d = conjugate_d + F_hat = conjugate_s.reshape(F_hat.shape) + + def compute_eval_val(u): + pred_xs = self.model.predict_traj(curr_x, u) + state_cost = np.sum(self.config.state_cost_fn( + pred_xs[1:-1], g_xs[1:-1])) + input_cost = np.sum(self.config.input_cost_fn(u)) + terminal_cost = np.sum( + self.config.terminal_state_cost_fn(pred_xs[-1], g_xs[-1])) + return state_cost + input_cost + terminal_cost + + alpha = line_search(F_hat, sol, + compute_eval_val, init_alpha=self.learning_rate) + + sol -= alpha * F_hat + count += 1 + + # update us for next optimization + self.prev_sol = np.concatenate( + (sol[1:], np.zeros((1, self.input_size))), axis=0) + + return sol[0] diff --git a/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py b/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py new file mode 100644 index 0000000..70e722b --- /dev/null +++ b/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py @@ -0,0 +1,167 @@ +from logging import getLogger + +import numpy as np +import scipy.stats as stats + +from .controller import Controller +from ..envs.cost import calc_cost +from ..common.utils import line_search + +logger = getLogger(__name__) + + +class NMPCCGMRES(Controller): + def __init__(self, config, model): + """ Nonlinear Model Predictive Control using cgmres + """ + super(NMPCCGMRES, self).__init__(config, model) + + # model + self.model = model + + # get cost func + self.state_cost_fn = config.state_cost_fn + self.terminal_state_cost_fn = config.terminal_state_cost_fn + self.input_cost_fn = config.input_cost_fn + + # general parameters + self.pred_len = config.PRED_LEN + self.input_size = config.INPUT_SIZE + self.dt = config.DT + + # controller parameters + self.threshold = config.opt_config["NMPCCGMRES"]["threshold"] + self.zeta = config.opt_config["NMPCCGMRES"]["zeta"] + self.delta = config.opt_config["NMPCCGMRES"]["delta"] + self.alpha = config.opt_config["NMPCCGMRES"]["alpha"] + self.tf = config.opt_config["NMPCCGMRES"]["tf"] + self.divide_num = config.PRED_LEN + self.with_constraint = config.opt_config["NMPCCGMRES"]["constraint"] + if not self.with_constraint: + raise NotImplementedError + # 3 means u, dummy_u, raw + self.max_iters = 3 * self.input_size * self.divide_num + + # initialize + self.prev_sol = np.zeros((self.pred_len, self.input_size)) + self.opt_count = 1 + # add smaller than constraints value + input_constraint = np.abs([config.INPUT_LOWER_BOUND]) + self.prev_dummy_sol = np.ones( + (self.pred_len, self.input_size)) * input_constraint - 1e-3 + # add bigger than 0.01 to avoid computational error + self.prev_raw = np.zeros( + (self.pred_len, self.input_size)) + 0.01 + 1e-3 + + def _compute_f(self, curr_x, sol, g_xs, dummy_sol=None, raw=None): + # shape(pred_len+1, state_size) + pred_xs = self.model.predict_traj(curr_x, sol) + # shape(pred_len, state_size) + pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs) + + if self.with_constraint: + F = self.config.gradient_hamiltonian_input_with_constraint( + pred_xs, pred_lams, sol, g_xs, dummy_sol, raw) + + return F + else: + raise NotImplementedError + + def obtain_sol(self, curr_x, g_xs): + """ calculate the optimal inputs + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) + Returns: + opt_input (numpy.ndarray): optimal input, shape(input_size, ) + """ + sol = self.prev_sol.copy() + dummy_sol = self.prev_dummy_sol.copy() + raw = self.prev_raw.copy() + + # compute delta t + time = self.dt * self.opt_count + dt = self.tf * (1. - np.exp(-self.alpha * time)) / \ + float(self.divide_num) + self.model.dt = dt + + # compute Fxt + x_dot = self.model.x_dot(curr_x, sol[0]) + dx = x_dot * self.delta + Fxt = self._compute_f(curr_x+dx, sol, g_xs, dummy_sol, raw).flatten() + + # compute F + F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw).flatten() + right = - self.zeta * F - ((Fxt - F) / self.delta) + + # compute Fuxt + du = sol * self.delta + ddummy_u = dummy_sol * self.delta + draw = raw * self.delta + Fuxt = self._compute_f(curr_x+dx, sol+du, g_xs, + dummy_sol+ddummy_u, raw+draw).flatten() + left = ((Fuxt - Fxt) / self.delta) + + r0 = right - left + r0_norm = np.linalg.norm(r0) + + vs = np.zeros((self.max_iters, self.max_iters + 1)) + vs[:, 0] = r0 / r0_norm + hs = np.zeros((self.max_iters + 1, self.max_iters + 1)) + e = np.zeros((self.max_iters + 1, 1)) + e[0] = 1. + + for i in range(self.max_iters): + reshaped_vs = vs.reshape( + (self.divide_num, 3, self.input_size, self.max_iters+1)) + du = reshaped_vs[:, 0, :, i] * self.delta + ddummy_u = reshaped_vs[:, 1, :, i] * self.delta + draw = reshaped_vs[:, 2, :, i] * self.delta + + Fuxt = self._compute_f( + curr_x+dx, sol+du, g_xs, dummy_sol+ddummy_u, raw+draw).flatten() + Av = ((Fuxt - Fxt) / self.delta) + + sum_Av = np.zeros(self.max_iters) + + for j in range(i + 1): + hs[j, i] = np.dot(Av, vs[:, j]) + sum_Av = sum_Av + hs[j, i] * vs[:, j] + + v_est = Av - sum_Av + + hs[i+1, i] = np.linalg.norm(v_est) + + vs[:, i+1] = v_est / hs[i+1, i] + + inv_hs = np.linalg.pinv(hs[:i+1, :i]) + ys = np.dot(inv_hs, r0_norm * e[:i+1]) + + judge_value = r0_norm * e[:i+1] - np.dot(hs[:i+1, :i], ys[:i]) + + if np.linalg.norm(judge_value) < self.threshold or i == self.max_iters-1: + update_value = np.dot(vs[:, :i-1], ys_pre[:i-1]).flatten() + + update_value = update_value.reshape( + (self.divide_num, 3, self.input_size)) + du_new = du + update_value[:, 0, :] + ddummy_u_new = ddummy_u + update_value[:, 1, :] + draw_new = draw + update_value[:, 2, :] + break + + ys_pre = ys + + sol += du_new * self.delta + dummy_sol += ddummy_u_new * self.delta + raw += draw_new * self.delta + + F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw) + logger.debug("check F = {0}".format(np.linalg.norm(F))) + + self.prev_sol = sol.copy() + self.prev_dummy_sol = dummy_sol.copy() + self.prev_raw = raw.copy() + + self.opt_count += 1 + + return sol[0] diff --git a/PythonLinearNonlinearControl/controllers/random.py b/PythonLinearNonlinearControl/controllers/random.py index 53a7622..e82fbdf 100644 --- a/PythonLinearNonlinearControl/controllers/random.py +++ b/PythonLinearNonlinearControl/controllers/random.py @@ -8,6 +8,7 @@ from ..envs.cost import calc_cost logger = getLogger(__name__) + class RandomShooting(Controller): """ Random Shooting Method for linear and nonlinear method @@ -19,6 +20,7 @@ class RandomShooting(Controller): using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765). """ + def __init__(self, config, model): super(RandomShooting, self).__init__(config, model) @@ -33,7 +35,7 @@ class RandomShooting(Controller): self.pop_size = config.opt_config["Random"]["popsize"] self.opt_dim = self.input_size * self.pred_len - # get bound + # get bound self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.pred_len) self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, @@ -46,7 +48,7 @@ class RandomShooting(Controller): # save self.history_u = [] - + def obtain_sol(self, curr_x, g_xs): """ calculate the optimal inputs @@ -65,8 +67,8 @@ class RandomShooting(Controller): # calc cost costs = self.calc_cost(curr_x, samples.reshape(self.pop_size, - self.pred_len, - self.input_size), + self.pred_len, + self.input_size), g_xs) # solution sol = samples[np.argmin(costs)] @@ -74,4 +76,4 @@ class RandomShooting(Controller): return sol.reshape(self.pred_len, self.input_size).copy()[0] def __str__(self): - return "RandomShooting" \ No newline at end of file + return "RandomShooting" diff --git a/PythonLinearNonlinearControl/envs/__init__.py b/PythonLinearNonlinearControl/envs/__init__.py index 837b5d8..e649609 100644 --- a/PythonLinearNonlinearControl/envs/__init__.py +++ b/PythonLinearNonlinearControl/envs/__init__.py @@ -5,4 +5,4 @@ from PythonLinearNonlinearControl.envs.first_order_lag \ from PythonLinearNonlinearControl.envs.two_wheeled \ import TwoWheeledConstEnv # NOQA from PythonLinearNonlinearControl.envs.two_wheeled \ - import TwoWheeledTrackEnv # NOQA \ No newline at end of file + import TwoWheeledTrackEnv # NOQA diff --git a/PythonLinearNonlinearControl/envs/cartpole.py b/PythonLinearNonlinearControl/envs/cartpole.py index fd70d47..f64b0df 100644 --- a/PythonLinearNonlinearControl/envs/cartpole.py +++ b/PythonLinearNonlinearControl/envs/cartpole.py @@ -4,6 +4,7 @@ from matplotlib.axes import Axes from .env import Env from ..plotters.plot_objs import square + class CartPoleEnv(Env): """ Cartpole Environment @@ -13,13 +14,14 @@ class CartPoleEnv(Env): 6-832-underactuated-robotics-spring-2009/readings/ MIT6_832s09_read_ch03.pdf """ + def __init__(self): """ """ - self.config = {"state_size" : 4, - "input_size" : 1, - "dt" : 0.02, - "max_step" : 500, + self.config = {"state_size": 4, + "input_size": 1, + "dt": 0.02, + "max_step": 500, "input_lower_bound": [-3.], "input_upper_bound": [3.], "mp": 0.2, @@ -30,7 +32,7 @@ class CartPoleEnv(Env): } super(CartPoleEnv, self).__init__(self.config) - + def reset(self, init_x=None): """ reset state @@ -39,7 +41,7 @@ class CartPoleEnv(Env): info (dict): information """ self.step_count = 0 - + theta = np.random.randn(1) self.curr_x = np.array([0., 0., theta[0], 0.]) @@ -48,7 +50,7 @@ class CartPoleEnv(Env): # goal self.g_x = np.array([0., 0., -np.pi, 0.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -76,50 +78,50 @@ class CartPoleEnv(Env): # x d_x0 = self.curr_x[1] # v_x - d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \ - * (self.config["l"] * (self.curr_x[3]**2) \ - + self.config["g"] * np.cos(self.curr_x[2]))) \ - / (self.config["mc"] + self.config["mp"] \ - * (np.sin(self.curr_x[2])**2)) + d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) + * (self.config["l"] * (self.curr_x[3]**2) + + self.config["g"] * np.cos(self.curr_x[2]))) \ + / (self.config["mc"] + self.config["mp"] + * (np.sin(self.curr_x[2])**2)) # theta d_x2 = self.curr_x[3] - + # v_theta - d_x3 = (-u[0] * np.cos(self.curr_x[2]) \ - - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \ - * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \ - - (self.config["mc"] + self.config["mp"]) * self.config["g"] \ - * np.sin(self.curr_x[2])) \ - / (self.config["l"] * (self.config["mc"] + self.config["mp"] \ - * (np.sin(self.curr_x[2])**2))) - + d_x3 = (-u[0] * np.cos(self.curr_x[2]) + - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) + * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) + - (self.config["mc"] + self.config["mp"]) * self.config["g"] + * np.sin(self.curr_x[2])) \ + / (self.config["l"] * (self.config["mc"] + self.config["mp"] + * (np.sin(self.curr_x[2])**2))) + next_x = self.curr_x +\ - np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] + np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] # TODO: costs costs = 0. costs += 0.1 * np.sum(u**2) costs += 6. * self.curr_x[0]**2 \ - + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ - + 0.1 * self.curr_x[1]**2 \ - + 0.1 * self.curr_x[3]**2 + + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ + + 0.1 * self.curr_x[1]**2 \ + + 0.1 * self.curr_x[3]**2 # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten().copy() # update costs self.step_count += 1 return next_x.flatten(), costs, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ plot cartpole object function - + Args: to_plot (axis or imgs): plotted objects i (int): frame count @@ -131,29 +133,29 @@ class CartPoleEnv(Env): """ if isinstance(to_plot, Axes): imgs = {} # create new imgs - + imgs["cart"] = to_plot.plot([], [], c="k")[0] imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0] - imgs["center"] = to_plot.plot([], [], marker="o", c="k",\ + imgs["center"] = to_plot.plot([], [], marker="o", c="k", markersize=10)[0] # centerline - to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\ + to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50), c="k", linestyle="dashed") - + # set axis to_plot.set_xlim([-1., 1.]) to_plot.set_ylim([-0.55, 1.5]) - + return imgs # set imgs cart_x, cart_y, pole_x, pole_y = \ self._plot_cartpole(history_x[i]) - + to_plot["cart"].set_data(cart_x, cart_y) to_plot["pole"].set_data(pole_x, pole_y) to_plot["center"].set_data(history_x[i][0], 0.) - + def _plot_cartpole(self, curr_x): """ plot cartpole fucntions @@ -166,13 +168,13 @@ class CartPoleEnv(Env): pole_y (numpy.ndarray): y data of pole """ # cart - cart_x, cart_y = square(curr_x[0], 0.,\ + cart_x, cart_y = square(curr_x[0], 0., self.config["cart_size"], 0.) - - # pole - pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \ - * np.cos(curr_x[2]-np.pi/2)]) - pole_y = np.array([0., self.config["l"] \ - * np.sin(curr_x[2]-np.pi/2)]) - return cart_x, cart_y, pole_x, pole_y \ No newline at end of file + # pole + pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] + * np.cos(curr_x[2]-np.pi/2)]) + pole_y = np.array([0., self.config["l"] + * np.sin(curr_x[2]-np.pi/2)]) + + return cart_x, cart_y, pole_x, pole_y diff --git a/PythonLinearNonlinearControl/envs/cost.py b/PythonLinearNonlinearControl/envs/cost.py index 117d5f2..3247dec 100644 --- a/PythonLinearNonlinearControl/envs/cost.py +++ b/PythonLinearNonlinearControl/envs/cost.py @@ -4,6 +4,7 @@ import numpy as np logger = getLogger(__name__) + def calc_cost(pred_xs, input_sample, g_xs, state_cost_fn, input_cost_fn, terminal_state_cost_fn): """ calculate the cost @@ -24,20 +25,21 @@ def calc_cost(pred_xs, input_sample, g_xs, # state cost state_cost = 0. if state_cost_fn is not None: - state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) + state_pred_par_cost = state_cost_fn( + pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) # terminal cost terminal_state_cost = 0. if terminal_state_cost_fn is not None: terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], - g_xs[:, -1, :]) + g_xs[:, -1, :]) terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) - + # act cost act_cost = 0. if input_cost_fn is not None: act_pred_par_cost = input_cost_fn(input_sample) act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1) - return state_cost + terminal_state_cost + act_cost \ No newline at end of file + return state_cost + terminal_state_cost + act_cost diff --git a/PythonLinearNonlinearControl/envs/env.py b/PythonLinearNonlinearControl/envs/env.py index 4f63903..9f59141 100644 --- a/PythonLinearNonlinearControl/envs/env.py +++ b/PythonLinearNonlinearControl/envs/env.py @@ -1,13 +1,15 @@ import numpy as np + class Env(): """ Environments class Attributes: - + curr_x (numpy.ndarray): current state history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size) step_count (int): step count """ + def __init__(self, config): """ """ @@ -25,12 +27,12 @@ class Env(): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: self.curr_x = init_x - + # clear memory self.history_x = [] self.history_g_x = [] @@ -52,4 +54,4 @@ class Env(): def __repr__(self): """ """ - return self.config \ No newline at end of file + return self.config diff --git a/PythonLinearNonlinearControl/envs/first_order_lag.py b/PythonLinearNonlinearControl/envs/first_order_lag.py index 5597a07..f7f06bc 100644 --- a/PythonLinearNonlinearControl/envs/first_order_lag.py +++ b/PythonLinearNonlinearControl/envs/first_order_lag.py @@ -3,25 +3,27 @@ import scipy from scipy import integrate from .env import Env + class FirstOrderLagEnv(Env): """ First Order Lag System Env """ + def __init__(self, tau=0.63): """ """ - self.config = {"state_size" : 4,\ - "input_size" : 2,\ - "dt" : 0.05,\ - "max_step" : 500,\ - "input_lower_bound": [-0.5, -0.5],\ + self.config = {"state_size": 4, + "input_size": 2, + "dt": 0.05, + "max_step": 500, + "input_lower_bound": [-0.5, -0.5], "input_upper_bound": [0.5, 0.5], } super(FirstOrderLagEnv, self).__init__(self.config) # to get discrete system matrix - self.A, self.B = self._to_state_space(tau, dt=self.config["dt"]) - + self.A, self.B = self._to_state_space(tau, dt=self.config["dt"]) + @staticmethod def _to_state_space(tau, dt=0.05): """ @@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env): """ # continuous Ac = np.array([[-1./tau, 0., 0., 0.], - [0., -1./tau, 0., 0.], - [1., 0., 0., 0.], - [0., 1., 0., 0.]]) + [0., -1./tau, 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.]]) Bc = np.array([[1./tau, 0.], - [0., 1./tau], - [0., 0.], - [0., 0.]]) + [0., 1./tau], + [0., 0.], + [0., 0.]]) # to discrete system A = scipy.linalg.expm(dt*Ac) # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) - @@ -55,7 +57,7 @@ class FirstOrderLagEnv(Env): B[m, n] = sol[0] return A, B - + def reset(self, init_x=None): """ reset state Returns: @@ -63,7 +65,7 @@ class FirstOrderLagEnv(Env): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: @@ -71,7 +73,7 @@ class FirstOrderLagEnv(Env): # goal self.g_x = np.array([0., 0, -2., 3.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env): self.config["input_upper_bound"]) next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \ - + np.matmul(self.B, u[:, np.newaxis]) + + np.matmul(self.B, u[:, np.newaxis]) # cost cost = 0 @@ -104,17 +106,17 @@ class FirstOrderLagEnv(Env): # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), cost, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ """ - raise ValueError("FirstOrderLag does not have animation") \ No newline at end of file + raise ValueError("FirstOrderLag does not have animation") diff --git a/PythonLinearNonlinearControl/envs/make_envs.py b/PythonLinearNonlinearControl/envs/make_envs.py index 253f3ed..5603b45 100644 --- a/PythonLinearNonlinearControl/envs/make_envs.py +++ b/PythonLinearNonlinearControl/envs/make_envs.py @@ -2,6 +2,8 @@ from .first_order_lag import FirstOrderLagEnv from .two_wheeled import TwoWheeledConstEnv from .two_wheeled import TwoWheeledTrackEnv from .cartpole import CartPoleEnv +from .nonlinear_sample_system import NonlinearSampleSystemEnv + def make_env(args): @@ -13,5 +15,7 @@ def make_env(args): return TwoWheeledTrackEnv() elif args.env == "CartPole": return CartPoleEnv() - - raise NotImplementedError("There is not {} Env".format(args.env)) \ No newline at end of file + elif args.env == "NonlinearSample": + return NonlinearSampleSystemEnv() + + raise NotImplementedError("There is not {} Env".format(args.env)) diff --git a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py index 9e66e6e..38a827f 100644 --- a/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py +++ b/PythonLinearNonlinearControl/envs/nonlinear_sample_system.py @@ -4,22 +4,24 @@ from scipy import integrate from .env import Env from ..common.utils import update_state_with_Runge_Kutta -class NonlinearSampleEnv(Env): + +class NonlinearSampleSystemEnv(Env): """ Nonlinear Sample Env """ + def __init__(self): """ """ - self.config = {"state_size" : 2,\ - "input_size" : 1,\ - "dt" : 0.01,\ - "max_step" : 250,\ - "input_lower_bound": [-0.5],\ + self.config = {"state_size": 2, + "input_size": 1, + "dt": 0.01, + "max_step": 2000, + "input_lower_bound": [-0.5], "input_upper_bound": [0.5], } - super(NonlinearSampleEnv, self).__init__(self.config) - + super(NonlinearSampleSystemEnv, self).__init__(self.config) + def reset(self, init_x=np.array([2., 0.])): """ reset state Returns: @@ -27,7 +29,7 @@ class NonlinearSampleEnv(Env): info (dict): information """ self.step_count = 0 - + self.curr_x = np.zeros(self.config["state_size"]) if init_x is not None: @@ -35,7 +37,7 @@ class NonlinearSampleEnv(Env): # goal self.g_x = np.array([0., 0.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -57,10 +59,11 @@ class NonlinearSampleEnv(Env): self.config["input_lower_bound"], self.config["input_upper_bound"]) - funtions = [self._func_x_1, self._func_x_2] + functions = [self._func_x_1, self._func_x_2] - next_x = update_state_with_Runge_Kutta(self._curr_x, u, - functions, self.config["dt"]) + next_x = update_state_with_Runge_Kutta(self.curr_x, u, + functions, self.config["dt"], + batch=False) # cost cost = 0 @@ -70,29 +73,25 @@ class NonlinearSampleEnv(Env): # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), cost, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - - def _func_x_1(self, x_1, x_2, u): - """ - """ - x_dot = x_2 + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + + def _func_x_1(self, x, u): + x_dot = x[1] return x_dot - - def _func_x_2(self, x_1, x_2, u): - """ - """ - x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u + + def _func_x_2(self, x, u): + x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0] return x_dot - + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ """ - raise ValueError("NonlinearSampleEnv does not have animation") \ No newline at end of file + raise ValueError("NonlinearSampleSystemEnv does not have animation") diff --git a/PythonLinearNonlinearControl/envs/two_wheeled.py b/PythonLinearNonlinearControl/envs/two_wheeled.py index 43f5f0c..07c87a1 100644 --- a/PythonLinearNonlinearControl/envs/two_wheeled.py +++ b/PythonLinearNonlinearControl/envs/two_wheeled.py @@ -5,47 +5,50 @@ import matplotlib.pyplot as plt from .env import Env from ..plotters.plot_objs import circle_with_angle, square, circle + def step_two_wheeled_env(curr_x, u, dt, method="Oylar"): """ step two wheeled enviroment - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) u (numpy.ndarray): input, shape(input_size, ) dt (float): sampling time Returns: next_x (numpy.ndarray): next state, shape(state_size. ) - + Notes: TODO: deal with another method, like Runge Kutta """ B = np.array([[np.cos(curr_x[-1]), 0.], [np.sin(curr_x[-1]), 0.], [0., 1.]]) - + x_dot = np.matmul(B, u[:, np.newaxis]) next_x = x_dot.flatten() * dt + curr_x return next_x + class TwoWheeledConstEnv(Env): """ Two wheeled robot with constant goal Env """ + def __init__(self): """ """ - self.config = {"state_size" : 3,\ - "input_size" : 2,\ - "dt" : 0.01,\ - "max_step" : 500,\ - "input_lower_bound": (-1.5, -3.14),\ - "input_upper_bound": (1.5, 3.14),\ - "car_size": 0.2,\ + self.config = {"state_size": 3, + "input_size": 2, + "dt": 0.01, + "max_step": 500, + "input_lower_bound": (-1.5, -3.14), + "input_upper_bound": (1.5, 3.14), + "car_size": 0.2, "wheel_size": (0.075, 0.015) } super(TwoWheeledConstEnv, self).__init__(self.config) - + def reset(self, init_x=None): """ reset state @@ -54,15 +57,17 @@ class TwoWheeledConstEnv(Env): info (dict): information """ self.step_count = 0 - - self.curr_x = np.zeros(self.config["state_size"]) + + noise = np.clip(np.random.randn(3), -0.1, 0.1) + noise *= 0.1 + self.curr_x = np.zeros(self.config["state_size"]) + noise if init_x is not None: self.curr_x = init_x # goal self.g_x = np.array([2.5, 2.5, 0.]) - + # clear memory self.history_x = [] self.history_g_x = [] @@ -96,32 +101,32 @@ class TwoWheeledConstEnv(Env): # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), costs, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_x} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_x} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ plot cartpole object function - + Args: to_plot (axis or imgs): plotted objects i (int): frame count history_x (numpy.ndarray): history of state, shape(iters, state) history_g_x (numpy.ndarray): history of goal state, shape(iters, state) - + Returns: None or imgs : imgs order is ["cart_img", "pole_img"] """ if isinstance(to_plot, Axes): imgs = {} # create new imgs - + imgs["car"] = to_plot.plot([], [], c="k")[0] imgs["car_angle"] = to_plot.plot([], [], c="k")[0] imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0] @@ -139,9 +144,9 @@ class TwoWheeledConstEnv(Env): # set imgs # car imgs car_x, car_y, car_angle_x, car_angle_y, \ - left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ + left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ self._plot_car(history_x[i]) - + to_plot["car"].set_data(car_x, car_y) to_plot["car_angle"].set_data(car_angle_x, car_angle_y) to_plot["left_tire"].set_data(left_tire_x, left_tire_y,) @@ -150,7 +155,7 @@ class TwoWheeledConstEnv(Env): # goal and trajs to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1]) to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1]) - + def _plot_car(self, curr_x): """ plot car fucntions """ @@ -158,53 +163,55 @@ class TwoWheeledConstEnv(Env): car_x, car_y, car_angle_x, car_angle_y = \ circle_with_angle(curr_x[0], curr_x[1], self.config["car_size"], curr_x[2]) - + # left tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] + left_tire_x, left_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) # right tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] + right_tire_x, right_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) - + return car_x, car_y, car_angle_x, car_angle_y,\ - left_tire_x, left_tire_y,\ - right_tire_x, right_tire_y + left_tire_x, left_tire_y,\ + right_tire_x, right_tire_y + class TwoWheeledTrackEnv(Env): """ Two wheeled robot with constant goal Env """ + def __init__(self): """ """ - self.config = {"state_size" : 3,\ - "input_size" : 2,\ - "dt" : 0.01,\ - "max_step" : 1000,\ - "input_lower_bound": (-1.5, -3.14),\ - "input_upper_bound": (1.5, 3.14),\ - "car_size": 0.2,\ + self.config = {"state_size": 3, + "input_size": 2, + "dt": 0.01, + "max_step": 1000, + "input_lower_bound": (-1.5, -3.14), + "input_upper_bound": (1.5, 3.14), + "car_size": 0.2, "wheel_size": (0.075, 0.015) } super(TwoWheeledTrackEnv, self).__init__(self.config) - + @staticmethod def make_road(linelength=3., circle_radius=1.): """ make track @@ -220,23 +227,23 @@ class TwoWheeledTrackEnv(Env): # circle circle_1_x, circle_1_y = circle(linelength/2., circle_radius, - circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50) - circle_1 = np.stack((circle_1_x , circle_1_y), axis=1) - + circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50) + circle_1 = np.stack((circle_1_x, circle_1_y), axis=1) + circle_2_x, circle_2_y = circle(-linelength/2., circle_radius, - circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50) - circle_2 = np.stack((circle_2_x , circle_2_y), axis=1) + circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50) + circle_2 = np.stack((circle_2_x, circle_2_y), axis=1) road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0) # calc road angle road_diff = road_pos[1:] - road_pos[:-1] - road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0]) + road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0]) road_angle = np.concatenate((np.zeros(1), road_angle)) road = np.concatenate((road_pos, road_angle[:, np.newaxis]), axis=1) - return np.tile(road, (3, 1)) + return np.tile(road, (3, 1)) def reset(self, init_x=None): """ reset state @@ -246,15 +253,17 @@ class TwoWheeledTrackEnv(Env): info (dict): information """ self.step_count = 0 - - self.curr_x = np.zeros(self.config["state_size"]) + + noise = np.clip(np.random.randn(3), -0.1, 0.1) + noise *= 0.01 + self.curr_x = np.zeros(self.config["state_size"]) + noise if init_x is not None: self.curr_x = init_x # goal self.g_traj = self.make_road() - + # clear memory self.history_x = [] self.history_g_x = [] @@ -286,32 +295,32 @@ class TwoWheeledTrackEnv(Env): # save history self.history_x.append(next_x.flatten()) - + # update self.curr_x = next_x.flatten() # update costs self.step_count += 1 return next_x.flatten(), costs, \ - self.step_count > self.config["max_step"], \ - {"goal_state" : self.g_traj} - + self.step_count > self.config["max_step"], \ + {"goal_state": self.g_traj} + def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): """ plot cartpole object function - + Args: to_plot (axis or imgs): plotted objects i (int): frame count history_x (numpy.ndarray): history of state, shape(iters, state) history_g_x (numpy.ndarray): history of goal state, shape(iters, state) - + Returns: None or imgs : imgs order is ["cart_img", "pole_img"] """ if isinstance(to_plot, Axes): imgs = {} # create new imgs - + imgs["car"] = to_plot.plot([], [], c="k")[0] imgs["car_angle"] = to_plot.plot([], [], c="k")[0] imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0] @@ -333,9 +342,9 @@ class TwoWheeledTrackEnv(Env): # set imgs # car imgs car_x, car_y, car_angle_x, car_angle_y, \ - left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ + left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ self._plot_car(history_x[i]) - + to_plot["car"].set_data(car_x, car_y) to_plot["car_angle"].set_data(car_angle_x, car_angle_y) to_plot["left_tire"].set_data(left_tire_x, left_tire_y,) @@ -344,7 +353,7 @@ class TwoWheeledTrackEnv(Env): # goal and trajs to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1]) to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1]) - + def _plot_car(self, curr_x): """ plot car fucntions """ @@ -352,31 +361,31 @@ class TwoWheeledTrackEnv(Env): car_x, car_y, car_angle_x, car_angle_y = \ circle_with_angle(curr_x[0], curr_x[1], self.config["car_size"], curr_x[2]) - + # left tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]-np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]-np.pi/2.) + curr_x[1] + left_tire_x, left_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) # right tire - center_x = (self.config["car_size"] \ + center_x = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] - center_y = (self.config["car_size"] \ + * np.cos(curr_x[2]+np.pi/2.) + curr_x[0] + center_y = (self.config["car_size"] + self.config["wheel_size"][1]) \ - * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] - + * np.sin(curr_x[2]+np.pi/2.) + curr_x[1] + right_tire_x, right_tire_y = \ - square(center_x, center_y, + square(center_x, center_y, self.config["wheel_size"], curr_x[2]) - + return car_x, car_y, car_angle_x, car_angle_y,\ - left_tire_x, left_tire_y,\ - right_tire_x, right_tire_y \ No newline at end of file + left_tire_x, left_tire_y,\ + right_tire_x, right_tire_y diff --git a/PythonLinearNonlinearControl/helper.py b/PythonLinearNonlinearControl/helper.py index 7fa2058..26844ef 100644 --- a/PythonLinearNonlinearControl/helper.py +++ b/PythonLinearNonlinearControl/helper.py @@ -7,6 +7,7 @@ import six import pickle from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger + def make_logger(save_dir): """ Args: @@ -21,7 +22,7 @@ def make_logger(save_dir): # mypackage log level logger = getLogger("PythonLinearNonlinearControl") logger.setLevel(DEBUG) - + # file handler log_path = os.path.join(save_dir, "log.txt") file_handler = FileHandler(log_path) @@ -33,6 +34,7 @@ def make_logger(save_dir): # sh_handler = StreamHandler() # logger.addHandler(sh_handler) + def int_tuple(s): """ transform str to tuple Args: @@ -42,6 +44,7 @@ def int_tuple(s): """ return tuple(int(i) for i in s.split(',')) + def bool_flag(s): """ transform str to bool flg Args: @@ -54,6 +57,7 @@ def bool_flag(s): msg = 'Invalid value "%s" for bool flag (should be 0 or 1)' raise ValueError(msg % s) + def file_exists(path): """ Check file existence on given path Args: @@ -63,6 +67,7 @@ def file_exists(path): """ return os.path.exists(path) + def create_dir_if_not_exist(outdir): """ Check directory existence and creates new directory if not exist Args: @@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir): return os.makedirs(outdir) + def write_text_to_file(file_path, data): """ Write given text data to file Args: @@ -86,6 +92,7 @@ def write_text_to_file(file_path, data): with open(file_path, 'w') as f: f.write(data) + def read_text_from_file(file_path): """ Read given file as text Args: @@ -96,6 +103,7 @@ def read_text_from_file(file_path): with open(file_path, 'r') as f: return f.read() + def save_pickle(file_path, data): """ pickle given data to file Args: @@ -105,6 +113,7 @@ def save_pickle(file_path, data): with open(file_path, 'wb') as f: pickle.dump(data, f) + def load_pickle(file_path): """ load pickled data from file Args: @@ -118,6 +127,7 @@ def load_pickle(file_path): else: return pickle.load(f, encoding='bytes') + def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): """ prepare a directory with current datetime as name. created directory contains the command and args when the script was called as text file. @@ -144,4 +154,4 @@ def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): argv = ' '.join(sys.argv) write_text_to_file(argv_file_path, argv) - return outdir \ No newline at end of file + return outdir diff --git a/PythonLinearNonlinearControl/models/cartpole.py b/PythonLinearNonlinearControl/models/cartpole.py index db8ef61..cc9bfda 100644 --- a/PythonLinearNonlinearControl/models/cartpole.py +++ b/PythonLinearNonlinearControl/models/cartpole.py @@ -2,9 +2,11 @@ import numpy as np from .model import Model + class CartPoleModel(Model): """ cartpole model """ + def __init__(self, config): """ """ @@ -17,7 +19,7 @@ class CartPoleModel(Model): def predict_next_state(self, curr_x, u): """ predict next state - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) or shape(pop_size, state_size) @@ -31,59 +33,59 @@ class CartPoleModel(Model): # x d_x0 = curr_x[1] # v_x - d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \ - * (self.l * (curr_x[3]**2) \ - + self.g * np.cos(curr_x[2]))) \ - / (self.mc + self.mp * (np.sin(curr_x[2])**2)) + d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) + * (self.l * (curr_x[3]**2) + + self.g * np.cos(curr_x[2]))) \ + / (self.mc + self.mp * (np.sin(curr_x[2])**2)) # theta d_x2 = curr_x[3] # v_theta - d_x3 = (-u[0] * np.cos(curr_x[2]) \ - - self.mp * self.l * (curr_x[3]**2) \ - * np.cos(curr_x[2]) * np.sin(curr_x[2]) \ + d_x3 = (-u[0] * np.cos(curr_x[2]) + - self.mp * self.l * (curr_x[3]**2) + * np.cos(curr_x[2]) * np.sin(curr_x[2]) - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \ - / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) - + / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) + next_x = curr_x +\ - np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt - + np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt + return next_x elif len(u.shape) == 2: # x d_x0 = curr_x[:, 1] # v_x - d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \ - * (self.l * (curr_x[:, 3]**2) \ - + self.g * np.cos(curr_x[:, 2]))) \ - / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) + d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) + * (self.l * (curr_x[:, 3]**2) + + self.g * np.cos(curr_x[:, 2]))) \ + / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) # theta d_x2 = curr_x[:, 3] # v_theta - d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \ - - self.mp * self.l * (curr_x[:, 3]**2) \ - * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \ + d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) + - self.mp * self.l * (curr_x[:, 3]**2) + * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \ - / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) - + / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) + next_x = curr_x +\ - np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt + np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt return next_x - + def calc_f_x(self, xs, us, dt): """ gradient of model with respect to the state in batch form Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_x (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, _) = us.shape @@ -95,36 +97,36 @@ class CartPoleModel(Model): # f_theta tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \ - * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) + * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) f_x[:, 1, 2] = - us[:, 0] * tmp \ - - tmp * (self.mp * np.sin(xs[:, 2]) \ - * (self.l * xs[:, 3]**2 \ + - tmp * (self.mp * np.sin(xs[:, 2]) + * (self.l * xs[:, 3]**2 + self.g * np.cos(xs[:, 2]))) \ - + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \ - * xs[:, 3]**2 \ - + self.mp * self.g * (np.cos(xs[:, 2])**2 \ - - np.sin(xs[:, 2])**2)) + + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l + * xs[:, 3]**2 + + self.mp * self.g * (np.cos(xs[:, 2])**2 + - np.sin(xs[:, 2])**2)) f_x[:, 3, 2] = - 1. / self.l * tmp \ - * (-us[:, 0] * np.cos(xs[:, 2]) \ - - self.mp * self.l * (xs[:, 3]**2) \ - * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \ - - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ - + 1. / self.l * tmp2 \ - * (us[:, 0] * np.sin(xs[:, 2]) \ - - self.mp * self.l * xs[:, 3]**2 \ - * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \ - - (self.mc + self.mp) \ - * self.g * np.cos(xs[:, 2])) + * (-us[:, 0] * np.cos(xs[:, 2]) + - self.mp * self.l * (xs[:, 3]**2) + * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) + - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ + + 1. / self.l * tmp2 \ + * (us[:, 0] * np.sin(xs[:, 2]) + - self.mp * self.l * xs[:, 3]**2 + * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) + - (self.mc + self.mp) + * self.g * np.cos(xs[:, 2])) # f_theta_dot - f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \ - * self.l * 2 * xs[:, 3]) + f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) + * self.l * 2 * xs[:, 3]) f_x[:, 2, 3] = np.ones(pred_len) f_x[:, 3, 3] = 1. / self.l * tmp2 \ - * (-2. * self.mp * self.l * xs[:, 3] \ - * np.cos(xs[:, 2]) * np.sin(xs[:, 2])) + * (-2. * self.mp * self.l * xs[:, 3] + * np.cos(xs[:, 2]) * np.sin(xs[:, 2])) return f_x * dt + np.eye(state_size) # to discrete form @@ -133,25 +135,25 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_u (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, input_size) = us.shape f_u = np.zeros((pred_len, state_size, input_size)) - + f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) - + f_u[:, 3, 0] = -np.cos(xs[:, 2]) \ - / (self.l * (self.mc \ - + self.mp * (np.sin(xs[:, 2])**2))) + / (self.l * (self.mc + + self.mp * (np.sin(xs[:, 2])**2))) return f_u * dt # to discrete form @@ -161,7 +163,7 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_xx (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size, state_size) @@ -180,7 +182,7 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_ux (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, state_size) @@ -199,7 +201,7 @@ class CartPoleModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_uu (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, input_size) @@ -210,4 +212,4 @@ class CartPoleModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/PythonLinearNonlinearControl/models/first_order_lag.py b/PythonLinearNonlinearControl/models/first_order_lag.py index a4a97fb..f3ccec5 100644 --- a/PythonLinearNonlinearControl/models/first_order_lag.py +++ b/PythonLinearNonlinearControl/models/first_order_lag.py @@ -3,6 +3,7 @@ import scipy.linalg from scipy import integrate from .model import LinearModel + class FirstOrderLagModel(LinearModel): """ first order lag model Attributes: @@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel): u (numpy.ndarray): history_pred_xs (numpy.ndarray): """ + def __init__(self, config, tau=0.63): """ Args: tau (float): time constant - """ + """ # param - self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system + self.A, self.B = self._to_state_space( + tau, dt=config.DT) # discrete system super(FirstOrderLagModel, self).__init__(self.A, self.B) @staticmethod @@ -31,21 +34,22 @@ class FirstOrderLagModel(LinearModel): """ # continuous Ac = np.array([[-1./tau, 0., 0., 0.], - [0., -1./tau, 0., 0.], - [1., 0., 0., 0.], - [0., 1., 0., 0.]]) + [0., -1./tau, 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.]]) Bc = np.array([[1./tau, 0.], - [0., 1./tau], - [0., 0.], - [0., 0.]]) + [0., 1./tau], + [0., 0.], + [0., 0.]]) # to discrete system A = scipy.linalg.expm(dt*Ac) # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc) B = np.zeros_like(Bc) for m in range(Bc.shape[0]): for n in range(Bc.shape[1]): - integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n] + def integrate_fn(tau): return np.matmul( + scipy.linalg.expm(Ac*tau), Bc)[m, n] sol = integrate.quad(integrate_fn, 0, dt) B[m, n] = sol[0] - return A, B \ No newline at end of file + return A, B diff --git a/PythonLinearNonlinearControl/models/make_models.py b/PythonLinearNonlinearControl/models/make_models.py index fb628ad..50f11f0 100644 --- a/PythonLinearNonlinearControl/models/make_models.py +++ b/PythonLinearNonlinearControl/models/make_models.py @@ -1,14 +1,18 @@ from .first_order_lag import FirstOrderLagModel from .two_wheeled import TwoWheeledModel from .cartpole import CartPoleModel +from .nonlinear_sample_system import NonlinearSampleSystemModel + def make_model(args, config): - + if args.env == "FirstOrderLag": return FirstOrderLagModel(config) elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": return TwoWheeledModel(config) elif args.env == "CartPole": return CartPoleModel(config) - - raise NotImplementedError("There is not {} Model".format(args.env)) \ No newline at end of file + elif args.env == "NonlinearSample": + return NonlinearSampleSystemModel(config) + + raise NotImplementedError("There is not {} Model".format(args.env)) diff --git a/PythonLinearNonlinearControl/models/model.py b/PythonLinearNonlinearControl/models/model.py index 5eb2cb7..6f44028 100644 --- a/PythonLinearNonlinearControl/models/model.py +++ b/PythonLinearNonlinearControl/models/model.py @@ -1,8 +1,10 @@ import numpy as np + class Model(): """ base class of model """ + def __init__(self): """ """ @@ -22,17 +24,17 @@ class Model(): or shape(pop_size, pred_len+1, state_size) """ if len(us.shape) == 3: - pred_xs =self._predict_traj_alltogether(curr_x, us) + pred_xs = self._predict_traj_alltogether(curr_x, us) elif len(us.shape) == 2: pred_xs = self._predict_traj(curr_x, us) else: raise ValueError("Invalid us") - + return pred_xs - + def _predict_traj(self, curr_x, us): """ predict trajectories - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) us (numpy.ndarray): inputs, shape(pred_len, input_size) @@ -53,10 +55,10 @@ class Model(): x = next_x return pred_xs - + def _predict_traj_alltogether(self, curr_x, us): """ predict trajectories for all samples - + Args: curr_x (numpy.ndarray): current state, shape(pop_size, state_size) us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size) @@ -75,17 +77,22 @@ class Model(): # next_x.shape = (pop_size, state_size) next_x = self.predict_next_state(x, us[t]) # update - pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\ - axis=0) + pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]), + axis=0) x = next_x return np.transpose(pred_xs, (1, 0, 2)) - + def predict_next_state(self, curr_x, u): """ predict next state """ raise NotImplementedError("Implement the model") + def x_dot(self, curr_x, u): + """ compute x dot + """ + raise NotImplementedError("Implement the model") + def predict_adjoint_traj(self, xs, us, g_xs): """ Args: @@ -95,32 +102,35 @@ class Model(): Returns: lams (numpy.ndarray): adjoint state, shape(pred_len, state_size), adjoint size is the same as state_size + Notes: + Adjoint trajectory be computed by backward path. + Usually, we should -\dot{lam} but in backward path case, we can use \dot{lam} directry """ # get size (pred_len, input_size) = us.shape # pred final adjoint state - lam = self.predict_terminal_adjoint_state(xs[-1],\ + lam = self.predict_terminal_adjoint_state(xs[-1], terminal_g_x=g_xs[-1]) lams = lam[np.newaxis, :] - for t in range(pred_len-1, 0, -1): + for t in range(pred_len-1, 0, -1): prev_lam = \ - self.predict_adjoint_state(lam, xs[t], us[t],\ - goal=g_xs[t], t=t) + self.predict_adjoint_state(lam, xs[t], us[t], + g_x=g_xs[t]) # update lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) lam = prev_lam - + return lams - def predict_adjoint_state(self, lam, x, u, goal=None, t=None): + def predict_adjoint_state(self, lam, x, u, g_x=None, t=None): """ predict adjoint states - + Args: lam (numpy.ndarray): adjoint state, shape(state_size, ) x (numpy.ndarray): state, shape(state_size, ) u (numpy.ndarray): input, shape(input_size, ) - goal (numpy.ndarray): goal state, shape(state_size, ) + g_x (numpy.ndarray): goal state, shape(state_size, ) Returns: prev_lam (numpy.ndarrya): previous adjoint state, shape(state_size, ) @@ -129,7 +139,7 @@ class Model(): def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): """ predict terminal adjoint state - + Args: terminal_x (numpy.ndarray): terminal state, shape(state_size, ) terminal_g_x (numpy.ndarray): terminal goal state, @@ -143,7 +153,7 @@ class Model(): @staticmethod def calc_f_x(xs, us, dt): """ gradient of model with respect to the state in batch form - """ + """ raise NotImplementedError("Implement gradient of model \ with respect to the state") @@ -153,11 +163,11 @@ class Model(): """ raise NotImplementedError("Implement gradient of model \ with respect to the input") - + @staticmethod def calc_f_xx(xs, us, dt): """ hessian of model with respect to the state in batch form - """ + """ raise NotImplementedError("Implement hessian of model \ with respect to the state") @@ -171,27 +181,29 @@ class Model(): @staticmethod def calc_f_uu(xs, us, dt): """ hessian of model with respect to the state in batch form - """ + """ raise NotImplementedError("Implement hessian of model \ with respect to the input") + class LinearModel(Model): """ discrete linear model, x[k+1] = Ax[k] + Bu[k] - + Attributes: A (numpy.ndarray): shape(state_size, state_size) B (numpy.ndarray): shape(state_size, input_size) """ + def __init__(self, A, B): """ """ super(LinearModel, self).__init__() self.A = A self.B = B - + def predict_next_state(self, curr_x, u): """ predict next state - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) or shape(pop_size, state_size) @@ -203,7 +215,7 @@ class LinearModel(Model): """ if len(u.shape) == 1: next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \ - + np.matmul(self.B, u[:, np.newaxis]) + + np.matmul(self.B, u[:, np.newaxis]) return next_x.flatten() @@ -211,7 +223,7 @@ class LinearModel(Model): next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T) return next_x - + def calc_f_x(self, xs, us, dt): """ gradient of model with respect to the state in batch form @@ -223,7 +235,7 @@ class LinearModel(Model): shape(pred_len, state_size, state_size) Notes: This should be discrete form !! - """ + """ # get size (pred_len, _) = us.shape @@ -240,7 +252,7 @@ class LinearModel(Model): shape(pred_len, state_size, input_size) Notes: This should be discrete form !! - """ + """ # get size (pred_len, input_size) = us.shape @@ -283,7 +295,7 @@ class LinearModel(Model): f_ux = np.zeros((pred_len, state_size, input_size, state_size)) return f_ux - + @staticmethod def calc_f_uu(xs, us, dt): """ hessian of model with respect to input in batch form @@ -301,4 +313,4 @@ class LinearModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - return f_uu + return f_uu diff --git a/PythonLinearNonlinearControl/models/nonlinear_sample_system.py b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py new file mode 100644 index 0000000..faf6379 --- /dev/null +++ b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py @@ -0,0 +1,217 @@ +import numpy as np + +from .model import Model +from ..common.utils import update_state_with_Runge_Kutta + + +class NonlinearSampleSystemModel(Model): + """ nonlinear sample system model + """ + + def __init__(self, config): + """ + """ + super(NonlinearSampleSystemModel, self).__init__() + self.dt = config.DT + self.gradient_hamiltonian_state = config.gradient_hamiltonian_state + self.gradient_hamiltonian_input = config.gradient_hamiltonian_input + self.gradient_cost_fn_state = config.gradient_cost_fn_state + + def predict_next_state(self, curr_x, u): + """ predict next state + + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) or + shape(pop_size, state_size) + u (numpy.ndarray): input, shape(input_size, ) or + shape(pop_size, input_size) + Returns: + next_x (numpy.ndarray): next state, shape(state_size, ) or + shape(pop_size, state_size) + """ + if len(u.shape) == 1: + func_1 = self._func_x_1 + func_2 = self._func_x_2 + functions = [func_1, func_2] + next_x = update_state_with_Runge_Kutta( + curr_x, u, functions, batch=False, dt=self.dt) + return next_x + + elif len(u.shape) == 2: + def func_1(xs, us): return self._func_x_1(xs, us, batch=True) + def func_2(xs, us): return self._func_x_2(xs, us, batch=True) + functions = [func_1, func_2] + next_x = update_state_with_Runge_Kutta( + curr_x, u, functions, batch=True, dt=self.dt) + + return next_x + + def x_dot(self, curr_x, u): + """ + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + u (numpy.ndarray): input, shape(input_size, ) + Returns: + x_dot (numpy.ndarray): next state, shape(state_size, ) + """ + state_size = curr_x.shape[0] + x_dot = np.zeros(state_size) + x_dot[0] = self._func_x_1(curr_x, u) + x_dot[1] = self._func_x_2(curr_x, u) + return x_dot + + def predict_adjoint_state(self, lam, x, u, g_x=None): + """ predict adjoint states + + Args: + lam (numpy.ndarray): adjoint state, shape(state_size, ) + x (numpy.ndarray): state, shape(state_size, ) + u (numpy.ndarray): input, shape(input_size, ) + goal (numpy.ndarray): goal state, shape(state_size, ) + Returns: + prev_lam (numpy.ndarrya): previous adjoint state, + shape(state_size, ) + """ + if len(u.shape) == 1: + delta_lam = self.dt * \ + self.gradient_hamiltonian_state(x, lam, u, g_x) + prev_lam = lam + delta_lam + return prev_lam + + elif len(u.shape) == 2: + raise ValueError + + def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): + """ predict terminal adjoint state + + Args: + terminal_x (numpy.ndarray): terminal state, shape(state_size, ) + terminal_g_x (numpy.ndarray): terminal goal state, + shape(state_size, ) + Returns: + terminal_lam (numpy.ndarray): terminal adjoint state, + shape(state_size, ) + """ + terminal_lam = self.gradient_cost_fn_state( + terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] + return terminal_lam[0] + + def _func_x_1(self, x, u, batch=False): + if not batch: + x_dot = x[1] + else: + x_dot = x[:, 1] + return x_dot + + def _func_x_2(self, x, u, batch=False): + if not batch: + x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0] + else: + x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \ + x[:, 1] - x[:, 0] + u[:, 0] + return x_dot + + def calc_f_x(self, xs, us, dt): + """ gradient of model with respect to the state in batch form + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_x (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size) + + Notes: + This should be discrete form !! + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_x = np.zeros((pred_len, state_size, state_size)) + f_x[:, 0, 1] = 1. + f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1. + f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \ + (1. - xs[:, 0]**2 - xs[:, 1]**2) + + return f_x * dt + np.eye(state_size) # to discrete form + + def calc_f_u(self, xs, us, dt): + """ gradient of model with respect to the input in batch form + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_u (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size) + + Notes: + This should be discrete form !! + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_u = np.zeros((pred_len, state_size, input_size)) + + f_u[:, 1, 0] = 1. + + return f_u * dt # to discrete form + + def calc_f_xx(self, xs, us, dt): + """ hessian of model with respect to the state in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_xx (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_xx = np.zeros((pred_len, state_size, state_size, state_size)) + + raise NotImplementedError + + def calc_f_ux(self, xs, us, dt): + """ hessian of model with respect to state and input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_ux (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_ux = np.zeros((pred_len, state_size, input_size, state_size)) + + raise NotImplementedError + + def calc_f_uu(self, xs, us, dt): + """ hessian of model with respect to input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_uu (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, input_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_uu = np.zeros((pred_len, state_size, input_size, input_size)) + + raise NotImplementedError diff --git a/PythonLinearNonlinearControl/models/two_wheeled.py b/PythonLinearNonlinearControl/models/two_wheeled.py index 3bd60d7..2f1e0a5 100644 --- a/PythonLinearNonlinearControl/models/two_wheeled.py +++ b/PythonLinearNonlinearControl/models/two_wheeled.py @@ -2,18 +2,23 @@ import numpy as np from .model import Model + class TwoWheeledModel(Model): """ two wheeled model """ + def __init__(self, config): """ """ super(TwoWheeledModel, self).__init__() self.dt = config.DT + self.gradient_hamiltonian_state = config.gradient_hamiltonian_state + self.gradient_hamiltonian_input = config.gradient_hamiltonian_input + self.gradient_cost_fn_state = config.gradient_cost_fn_state def predict_next_state(self, curr_x, u): """ predict next state - + Args: curr_x (numpy.ndarray): current state, shape(state_size, ) or shape(pop_size, state_size) @@ -50,21 +55,71 @@ class TwoWheeledModel(Model): next_x = x_dot[:, :, 0] * self.dt + curr_x return next_x - + + def x_dot(self, curr_x, u): + """ compute x dot + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + u (numpy.ndarray): input, shape(input_size, ) + Returns: + x_dot (numpy.ndarray): next state, shape(state_size, ) + """ + B = np.array([[np.cos(curr_x[-1]), 0.], + [np.sin(curr_x[-1]), 0.], + [0., 1.]]) + x_dot = np.matmul(B, u[:, np.newaxis]) + return x_dot.flatten() + + def predict_adjoint_state(self, lam, x, u, g_x=None, t=None): + """ predict adjoint states + + Args: + lam (numpy.ndarray): adjoint state, shape(state_size, ) + x (numpy.ndarray): state, shape(state_size, ) + u (numpy.ndarray): input, shape(input_size, ) + goal (numpy.ndarray): goal state, shape(state_size, ) + Returns: + prev_lam (numpy.ndarrya): previous adjoint state, + shape(state_size, ) + """ + if len(u.shape) == 1: + delta_lam = self.dt * \ + self.gradient_hamiltonian_state(x, lam, u, g_x) + prev_lam = lam + delta_lam + return prev_lam + + elif len(u.shape) == 2: + raise ValueError + + def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): + """ predict terminal adjoint state + + Args: + terminal_x (numpy.ndarray): terminal state, shape(state_size, ) + terminal_g_x (numpy.ndarray): terminal goal state, + shape(state_size, ) + Returns: + terminal_lam (numpy.ndarray): terminal adjoint state, + shape(state_size, ) + """ + terminal_lam = self.gradient_cost_fn_state( + terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] + return terminal_lam[0] + @staticmethod def calc_f_x(xs, us, dt): """ gradient of model with respect to the state in batch form Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_x (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, _) = us.shape @@ -81,14 +136,14 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_u (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size) Notes: This should be discrete form !! - """ + """ # get size (_, state_size) = xs.shape (pred_len, input_size) = us.shape @@ -107,7 +162,7 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_xx (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, state_size, state_size) @@ -130,7 +185,7 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_ux (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, state_size) @@ -145,7 +200,7 @@ class TwoWheeledModel(Model): f_ux[:, 1, 0, 2] = np.cos(xs[:, 2]) return f_ux * dt - + @staticmethod def calc_f_uu(xs, us, dt): """ hessian of model with respect to input in batch form @@ -153,7 +208,7 @@ class TwoWheeledModel(Model): Args: xs (numpy.ndarray): state, shape(pred_len+1, state_size) us (numpy.ndarray): input, shape(pred_len, input_size,) - + Return: f_uu (numpy.ndarray): gradient of model with respect to x, shape(pred_len, state_size, input_size, input_size) @@ -164,4 +219,4 @@ class TwoWheeledModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - return f_uu * dt \ No newline at end of file + return f_uu * dt diff --git a/PythonLinearNonlinearControl/planners/closest_point_planner.py b/PythonLinearNonlinearControl/planners/closest_point_planner.py index 291aff3..cf24a97 100644 --- a/PythonLinearNonlinearControl/planners/closest_point_planner.py +++ b/PythonLinearNonlinearControl/planners/closest_point_planner.py @@ -1,9 +1,11 @@ import numpy as np from .planner import Planner + class ClosestPointPlanner(Planner): """ This planner make goal state according to goal path """ + def __init__(self, config): """ """ @@ -24,7 +26,7 @@ class ClosestPointPlanner(Planner): min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1], axis=1)) - start = (min_idx+self.n_ahead) + start = (min_idx+self.n_ahead) if start > len(g_traj): start = len(g_traj) @@ -32,8 +34,8 @@ class ClosestPointPlanner(Planner): if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj): end = len(g_traj) - + if abs(start - end) != self.pred_len + 1: return np.tile(g_traj[-1], (self.pred_len+1, 1)) - return g_traj[start:end] \ No newline at end of file + return g_traj[start:end] diff --git a/PythonLinearNonlinearControl/planners/const_planner.py b/PythonLinearNonlinearControl/planners/const_planner.py index a0569a2..caa8110 100644 --- a/PythonLinearNonlinearControl/planners/const_planner.py +++ b/PythonLinearNonlinearControl/planners/const_planner.py @@ -1,9 +1,11 @@ import numpy as np from .planner import Planner + class ConstantPlanner(Planner): """ This planner make constant goal state """ + def __init__(self, config): """ """ @@ -20,4 +22,4 @@ class ConstantPlanner(Planner): Returns: g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) """ - return np.tile(g_x, (self.pred_len+1, 1)) \ No newline at end of file + return np.tile(g_x, (self.pred_len+1, 1)) diff --git a/PythonLinearNonlinearControl/planners/make_planners.py b/PythonLinearNonlinearControl/planners/make_planners.py index 9f18e51..eb77082 100644 --- a/PythonLinearNonlinearControl/planners/make_planners.py +++ b/PythonLinearNonlinearControl/planners/make_planners.py @@ -1,8 +1,9 @@ from .const_planner import ConstantPlanner from .closest_point_planner import ClosestPointPlanner + def make_planner(args, config): - + if args.env == "FirstOrderLag": return ConstantPlanner(config) elif args.env == "TwoWheeledConst": @@ -11,5 +12,8 @@ def make_planner(args, config): return ClosestPointPlanner(config) elif args.env == "CartPole": return ConstantPlanner(config) - - raise NotImplementedError("There is not {} Planner".format(args.planner_type)) \ No newline at end of file + elif args.env == "NonlinearSample": + return ConstantPlanner(config) + + raise NotImplementedError( + "There is not {} Planner".format(args.planner_type)) diff --git a/PythonLinearNonlinearControl/planners/planner.py b/PythonLinearNonlinearControl/planners/planner.py index 7e20b4f..c524ea7 100644 --- a/PythonLinearNonlinearControl/planners/planner.py +++ b/PythonLinearNonlinearControl/planners/planner.py @@ -1,8 +1,10 @@ import numpy as np + class Planner(): """ """ + def __init__(self): """ """ @@ -15,4 +17,4 @@ class Planner(): Returns: g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) """ - raise NotImplementedError("Implement plan func") \ No newline at end of file + raise NotImplementedError("Implement plan func") diff --git a/PythonLinearNonlinearControl/plotters/animator.py b/PythonLinearNonlinearControl/plotters/animator.py index 2260707..49a9d05 100644 --- a/PythonLinearNonlinearControl/plotters/animator.py +++ b/PythonLinearNonlinearControl/plotters/animator.py @@ -8,9 +8,11 @@ import matplotlib.animation as animation logger = getLogger(__name__) + class Animator(): """ animation class """ + def __init__(self, env, args=None): """ """ @@ -34,7 +36,7 @@ class Animator(): # make fig self.anim_fig = plt.figure() - # axis + # axis self.axis = self.anim_fig.add_subplot(111) self.axis.set_aspect('equal', adjustable='box') @@ -65,12 +67,12 @@ class Animator(): """ # set up animation figures self._setup() - _update_img = lambda i: self._update_img(i, history_x, history_g_x) + def _update_img(i): return self._update_img(i, history_x, history_g_x) # Set up formatting for the movie files Writer = animation.writers['ffmpeg'] writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) - + # call funcanimation ani = FuncAnimation( self.anim_fig, @@ -79,6 +81,6 @@ class Animator(): # save animation path = os.path.join(self.result_dir, self.controller_type, "animation-" + self.env_name + ".mp4") - logger.info("Saved Animation to {} ...".format(path)) + logger.info("Saved Animation to {} ...".format(path)) ani.save(path, writer=writer) diff --git a/PythonLinearNonlinearControl/plotters/plot_func.py b/PythonLinearNonlinearControl/plotters/plot_func.py index 3d2a7de..d25e1c5 100644 --- a/PythonLinearNonlinearControl/plotters/plot_func.py +++ b/PythonLinearNonlinearControl/plotters/plot_func.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt from ..helper import save_pickle, load_pickle + def plot_result(history, history_g=None, ylabel="x", save_dir="./result", name="state_history"): """ @@ -28,14 +29,14 @@ def plot_result(history, history_g=None, ylabel="x", def plot(axis, history, history_g=None): axis.plot(range(iters), history, c="r", linewidth=3) if history_g is not None: - axis.plot(range(iters), history_g,\ + axis.plot(range(iters), history_g, c="b", linewidth=3, label="goal") if i < size: plot(axis1, history[:, i], history_g=history_g[:, i]) if i+1 < size: plot(axis2, history[:, i+1], history_g=history_g[:, i+1]) - if i+2 < size: + if i+2 < size: plot(axis3, history[:, i+2], history_g=history_g[:, i+2]) # save @@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x", axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) figure.savefig(path, bbox_inches="tight", pad_inches=0.05) + def plot_results(history_x, history_u, history_g=None, args=None): """ @@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None): controller_type = args.controller_type plot_result(history_x, history_g=history_g, ylabel="x", - name= env + "-state_history", + name=env + "-state_history", save_dir="./result/" + controller_type) plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u", - name= env + "-input_history", + name=env + "-input_history", save_dir="./result/" + controller_type) + def save_plot_data(history_x, history_u, history_g=None, args=None): """ save plot data @@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None): env + "-history_g.pkl") save_pickle(path, history_g) + def load_plot_data(env, controller_type, result_dir="./result"): """ Args: @@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"): return history_x, history_u, history_g + def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", save_dir="./result", name="state_history"): """ @@ -130,7 +135,7 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", history (numpy.ndarray): history, shape(iters, size) """ (_, iters, size) = histories.shape - + for i in range(0, size, 2): figure = plt.figure() @@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", axis.plot(range(iters), history, linewidth=3, label=label, alpha=0.7, linestyle="dashed") if history_g is not None: - axis.plot(range(iters), history_g,\ + axis.plot(range(iters), history_g, c="b", linewidth=3) if i < size: for j, (history, history_g) \ - in enumerate(zip(histories, histories_g)): + in enumerate(zip(histories, histories_g)): plot(axis1, history[:, i], history_g=history_g[:, i], label=labels[j]) if i+1 < size: for j, (history, history_g) in \ - enumerate(zip(histories, histories_g)): + enumerate(zip(histories, histories_g)): plot(axis2, history[:, i+1], history_g=history_g[:, i+1], label=labels[j]) diff --git a/PythonLinearNonlinearControl/plotters/plot_objs.py b/PythonLinearNonlinearControl/plotters/plot_objs.py index 911cb5f..1e14bcb 100644 --- a/PythonLinearNonlinearControl/plotters/plot_objs.py +++ b/PythonLinearNonlinearControl/plotters/plot_objs.py @@ -5,9 +5,10 @@ import matplotlib.pyplot as plt from ..common.utils import rotate_pos + def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): """ Create circle matrix - + Args: center_x (float): the center x position of the circle center_y (float): the center y position of the circle @@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): return np.array(circle_xs), np.array(circle_ys) + def circle_with_angle(center_x, center_y, radius, angle): """ Create circle matrix with angle line matrix @@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle): return circle_x, circle_y, angle_x, angle_y + def square(center_x, center_y, shape, angle): """ Create square @@ -74,9 +77,10 @@ def square(center_x, center_y, shape, angle): trans_points = rotate_pos(square_xy, angle) # translation trans_points += np.array([center_x, center_y]) - + return trans_points[:, 0], trans_points[:, 1] + def square_with_angle(center_x, center_y, shape, angle): """ Create square with angle line @@ -96,4 +100,4 @@ def square_with_angle(center_x, center_y, shape, angle): angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]]) angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]]) - return square_x, square_y, angle_x, angle_y \ No newline at end of file + return square_x, square_y, angle_x, angle_y diff --git a/PythonLinearNonlinearControl/runners/__init__.py b/PythonLinearNonlinearControl/runners/__init__.py index 8fd5521..0406fea 100644 --- a/PythonLinearNonlinearControl/runners/__init__.py +++ b/PythonLinearNonlinearControl/runners/__init__.py @@ -1,2 +1,2 @@ from PythonLinearNonlinearControl.runners.runner \ - import ExpRunner # NOQA \ No newline at end of file + import ExpRunner # NOQA diff --git a/PythonLinearNonlinearControl/runners/make_runners.py b/PythonLinearNonlinearControl/runners/make_runners.py index be08186..133ae06 100644 --- a/PythonLinearNonlinearControl/runners/make_runners.py +++ b/PythonLinearNonlinearControl/runners/make_runners.py @@ -1,4 +1,5 @@ from .runner import ExpRunner + def make_runner(args): - return ExpRunner() \ No newline at end of file + return ExpRunner() diff --git a/PythonLinearNonlinearControl/runners/runner.py b/PythonLinearNonlinearControl/runners/runner.py index 4ef0c6b..b83789d 100644 --- a/PythonLinearNonlinearControl/runners/runner.py +++ b/PythonLinearNonlinearControl/runners/runner.py @@ -4,9 +4,11 @@ import numpy as np logger = getLogger(__name__) + class ExpRunner(): """ experiment runner """ + def __init__(self): """ """ @@ -46,6 +48,6 @@ class ExpRunner(): score += cost step_count += 1 - logger.debug("Controller type = {}, Score = {}"\ + logger.debug("Controller type = {}, Score = {}" .format(controller, score)) - return np.array(history_x), np.array(history_u), np.array(history_g) \ No newline at end of file + return np.array(history_x), np.array(history_u), np.array(history_g) diff --git a/README.md b/README.md index d03d02b..6f35bfb 100644 --- a/README.md +++ b/README.md @@ -53,10 +53,10 @@ Following algorithms are implemented in PythonLinearNonlinearControl - [script](PythonLinearNonlinearControl/controllers/ddp.py) - [Unconstrained Nonlinear Model Predictive Control (NMPC)](https://www.sciencedirect.com/science/article/pii/S0005109897000058) - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154. - - [script (Coming soon)]() + - [script](PythonLinearNonlinearControl/controllers/nmpc.py) - [Constrained Nonlinear Model Predictive Control -CGMRES- (NMPC-CGMRES)](https://www.sciencedirect.com/science/article/pii/S0005109897000058) - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154. - - [script (Coming soon)]() + - [script](PythonLinearNonlinearControl/controllers/nmpc_cgmres.py) - [Constrained Nonlinear Model Predictive Control -Newton- (NMPC-Newton)](https://www.sciencedirect.com/science/article/pii/S0005109897000058) - Ref: Ohtsuka, T., & Fujii, H. A. (1997). Real-time optimization algorithm for nonlinear receding-horizon control. Automatica, 33(6), 1147-1154. - [script (Coming soon)]() @@ -71,6 +71,7 @@ There are 4 example environments, "FirstOrderLag", "TwoWheeledConst", "TwoWheele | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | | Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 | | Cartpole (Swing up) | x | ✓ | 4 | 1 | +| Nonlinear Sample System Env | x | ✓ | 2 | 1 | All states and inputs of environments are continuous. **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.** @@ -184,6 +185,8 @@ save_plot_data(history_x, history_u, history_g=history_g) animator = Animator(env) animator.draw(history_x, history_g) ``` +**It should be noted that the controller parameters like Q, R and Sf strongly affect the performence of the controller. +Please, check these parameters before you run the simulation.** ## Run Example Script diff --git a/assets/nonlinear_sample_system.png b/assets/nonlinear_sample_system.png new file mode 100644 index 0000000..95e0eb6 Binary files /dev/null and b/assets/nonlinear_sample_system.png differ diff --git a/assets/nonlinear_sample_system_score.png b/assets/nonlinear_sample_system_score.png new file mode 100644 index 0000000..26e8310 Binary files /dev/null and b/assets/nonlinear_sample_system_score.png differ diff --git a/scripts/show_result.py b/scripts/show_result.py index e54b9dc..f1fd300 100644 --- a/scripts/show_result.py +++ b/scripts/show_result.py @@ -6,7 +6,8 @@ import numpy as np import matplotlib.pyplot as plt from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \ - plot_multi_result + plot_multi_result + def run(args): @@ -17,7 +18,7 @@ def run(args): history_gs = None # load data - for controller in controllers: + for controller in controllers: history_x, history_u, history_g = \ load_plot_data(args.env, controller, result_dir=args.result_dir) @@ -27,19 +28,20 @@ def run(args): history_us = history_u[np.newaxis, :] history_gs = history_g[np.newaxis, :] continue - + history_xs = np.concatenate((history_xs, history_x[np.newaxis, :]), axis=0) history_us = np.concatenate((history_us, history_u[np.newaxis, :]), axis=0) history_gs = np.concatenate((history_gs, history_g[np.newaxis, :]), axis=0) - + plot_multi_result(history_xs, histories_g=history_gs, labels=controllers, ylabel="x") plot_multi_result(history_us, histories_g=np.zeros_like(history_us), - labels=controllers, ylabel="u", name="input_history") + labels=controllers, ylabel="u", name="input_history") + def main(): parser = argparse.ArgumentParser() @@ -51,5 +53,6 @@ def main(): run(args) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/simple_run.py b/scripts/simple_run.py index 2ef1fd8..8d12a0f 100644 --- a/scripts/simple_run.py +++ b/scripts/simple_run.py @@ -8,35 +8,27 @@ from PythonLinearNonlinearControl.models.make_models import make_model from PythonLinearNonlinearControl.envs.make_envs import make_env from PythonLinearNonlinearControl.runners.make_runners import make_runner from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \ - save_plot_data + save_plot_data from PythonLinearNonlinearControl.plotters.animator import Animator + def run(args): - # logger make_logger(args.result_dir) - # make envs env = make_env(args) - # make config config = make_config(args) - # make planner planner = make_planner(args, config) - - # make model + model = make_model(args, config) - - # make controller + controller = make_controller(args, config, model) - - # make simulator + runner = make_runner(args) - # run experiment - history_x, history_u, history_g = runner.run(env, controller, planner) + history_x, history_u, history_g = runner.run(env, controller, planner) - # plot results plot_results(history_x, history_u, history_g=history_g, args=args) save_plot_data(history_x, history_u, history_g=history_g, args=args) @@ -44,17 +36,19 @@ def run(args): animator = Animator(env, args=args) animator.draw(history_x, history_g) + def main(): parser = argparse.ArgumentParser() - parser.add_argument("--controller_type", type=str, default="CEM") - parser.add_argument("--env", type=str, default="TwoWheeledTrack") - parser.add_argument("--save_anim", type=bool_flag, default=1) + parser.add_argument("--controller_type", type=str, default="NMPCCGMRES") + parser.add_argument("--env", type=str, default="TwoWheeledConst") + parser.add_argument("--save_anim", type=bool_flag, default=0) parser.add_argument("--result_dir", type=str, default="./result") args = parser.parse_args() run(args) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tests/configs/test_cartpole.py b/tests/configs/test_cartpole.py index 877ba9e..1b8908c 100644 --- a/tests/configs/test_cartpole.py +++ b/tests/configs/test_cartpole.py @@ -4,6 +4,7 @@ import numpy as np from PythonLinearNonlinearControl.configs.cartpole \ import CartPoleConfigModule + class TestCalcCost(): def test_calc_costs(self): # make config @@ -22,25 +23,26 @@ class TestCalcCost(): assert costs.shape == (pop_size, pred_len, input_size) costs = config.state_cost_fn(pred_xs, g_xs) - + assert costs.shape == (pop_size, pred_len, 1) - costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ + costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], g_xs[:, -1, :]) - + assert costs.shape == (pop_size, 1) + class TestGradient(): def test_state_gradient(self): """ """ xs = np.ones((1, 4)) - cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None) + cost_grad = CartPoleConfigModule.gradient_cost_fn_state(xs, None) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ @@ -51,7 +53,7 @@ class TestGradient(): CartPoleConfigModule.state_cost_fn(tmp_x, None) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) def test_terminal_state_gradient(self): @@ -59,13 +61,13 @@ class TestGradient(): """ xs = np.ones(4) cost_grad =\ - CartPoleConfigModule.gradient_cost_fn_with_state(xs, None, - terminal=True) + CartPoleConfigModule.gradient_cost_fn_state(xs, None, + terminal=True) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[i] = xs[i] + eps forward = \ @@ -76,19 +78,19 @@ class TestGradient(): CartPoleConfigModule.state_cost_fn(tmp_x, None) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_input_gradient(self): """ """ us = np.ones((1, 1)) - cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us) + cost_grad = CartPoleConfigModule.gradient_cost_fn_input(None, us) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 1)) - for i in range(1): + for i in range(1): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ @@ -99,32 +101,32 @@ class TestGradient(): CartPoleConfigModule.input_cost_fn(tmp_u) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_state_hessian(self): """ """ xs = np.ones((1, 4)) - cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None) + cost_hess = CartPoleConfigModule.hessian_cost_fn_state(xs, None) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 4, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=False) tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] - eps backward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=False) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - + assert cost_hess == pytest.approx(expected_hess) def test_terminal_state_hessian(self): @@ -132,50 +134,50 @@ class TestGradient(): """ xs = np.ones(4) cost_hess =\ - CartPoleConfigModule.hessian_cost_fn_with_state(xs, None, - terminal=True) + CartPoleConfigModule.hessian_cost_fn_state(xs, None, + terminal=True) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 4, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[i] = xs[i] + eps forward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=True) tmp_x = xs.copy() tmp_x[i] = xs[i] - eps backward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=True) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - + assert cost_hess == pytest.approx(expected_hess) - + def test_input_hessian(self): """ """ us = np.ones((1, 1)) xs = np.ones((1, 4)) - cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs) + cost_hess = CartPoleConfigModule.hessian_cost_fn_input(us, xs) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 1, 1)) - for i in range(1): + for i in range(1): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ - CartPoleConfigModule.gradient_cost_fn_with_input( + CartPoleConfigModule.gradient_cost_fn_input( xs, tmp_u) tmp_u = us.copy() tmp_u[0, i] = us[0, i] - eps backward = \ - CartPoleConfigModule.gradient_cost_fn_with_input( + CartPoleConfigModule.gradient_cost_fn_input( xs, tmp_u) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - - assert cost_hess == pytest.approx(expected_hess) \ No newline at end of file + + assert cost_hess == pytest.approx(expected_hess) diff --git a/tests/configs/test_two_wheeled.py b/tests/configs/test_two_wheeled.py index 9ad0408..6e64fff 100644 --- a/tests/configs/test_two_wheeled.py +++ b/tests/configs/test_two_wheeled.py @@ -4,6 +4,7 @@ import numpy as np from PythonLinearNonlinearControl.configs.two_wheeled \ import TwoWheeledConfigModule + class TestCalcCost(): def test_calc_costs(self): # make config @@ -24,15 +25,16 @@ class TestCalcCost(): costs = config.state_cost_fn(pred_xs, g_xs) expected_costs = np.ones((pop_size, pred_len, state_size))*0.5 - + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q)) - costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ + costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], g_xs[:, -1, :]) expected_costs = np.ones((pop_size, state_size))*0.5 - + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) + class TestGradient(): def test_state_gradient(self): """ @@ -40,12 +42,12 @@ class TestGradient(): xs = np.ones((1, 3)) g_xs = np.ones((1, 3)) * 0.5 cost_grad =\ - TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs) + TwoWheeledConfigModule.gradient_cost_fn_state(xs, g_xs) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 3)) - for i in range(3): + for i in range(3): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ @@ -58,20 +60,20 @@ class TestGradient(): backward = np.sum(backward) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_input_gradient(self): """ """ us = np.ones((1, 2)) cost_grad =\ - TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us) + TwoWheeledConfigModule.gradient_cost_fn_input(None, us) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 2)) - for i in range(2): + for i in range(2): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ @@ -84,58 +86,58 @@ class TestGradient(): backward = np.sum(backward) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_state_hessian(self): """ """ g_xs = np.ones((1, 3)) * 0.5 xs = np.ones((1, 3)) cost_hess =\ - TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs) + TwoWheeledConfigModule.hessian_cost_fn_state(xs, g_xs) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 3, 3)) - for i in range(3): + for i in range(3): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_state( + TwoWheeledConfigModule.gradient_cost_fn_state( tmp_x, g_xs, terminal=False) tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] - eps backward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_state( + TwoWheeledConfigModule.gradient_cost_fn_state( tmp_x, g_xs, terminal=False) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - + assert cost_hess == pytest.approx(expected_hess) - + def test_input_hessian(self): """ """ us = np.ones((1, 2)) xs = np.ones((1, 3)) - cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs) + cost_hess = TwoWheeledConfigModule.hessian_cost_fn_input(us, xs) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 2, 2)) - for i in range(2): + for i in range(2): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_input( + TwoWheeledConfigModule.gradient_cost_fn_input( xs, tmp_u) tmp_u = us.copy() tmp_u[0, i] = us[0, i] - eps backward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_input( + TwoWheeledConfigModule.gradient_cost_fn_input( xs, tmp_u) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - - assert cost_hess == pytest.approx(expected_hess) \ No newline at end of file + + assert cost_hess == pytest.approx(expected_hess)