From fb09e919dabf0472dbf7eac8d855732718837aad Mon Sep 17 00:00:00 2001 From: Shunichi09 Date: Sun, 14 Feb 2021 23:17:47 +0900 Subject: [PATCH] Add cgmres --- PythonLinearNonlinearControl/common/utils.py | 29 +++ .../configs/cartpole.py | 10 +- .../configs/first_order_lag.py | 10 +- .../configs/make_configs.py | 4 +- .../configs/nonlinear_sample_system.py | 93 ++++++++-- .../configs/two_wheeled.py | 10 +- .../controllers/controller.py | 14 -- .../controllers/ddp.py | 34 ++-- .../controllers/ilqr.py | 34 ++-- .../controllers/make_controllers.py | 3 + .../controllers/nmpc.py | 37 +++- .../controllers/nmpc_cgmres.py | 167 ++++++++++++++++++ PythonLinearNonlinearControl/models/model.py | 7 +- .../models/nonlinear_sample_system.py | 24 ++- .../models/two_wheeled.py | 4 +- scripts/simple_run.py | 4 +- tests/configs/test_cartpole.py | 68 +++---- tests/configs/test_two_wheeled.py | 48 ++--- 18 files changed, 452 insertions(+), 148 deletions(-) diff --git a/PythonLinearNonlinearControl/common/utils.py b/PythonLinearNonlinearControl/common/utils.py index e7f6d82..5fd2092 100644 --- a/PythonLinearNonlinearControl/common/utils.py +++ b/PythonLinearNonlinearControl/common/utils.py @@ -116,3 +116,32 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True): k3[:, i] = dt * func(state + k2, u) return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. + + +def line_search(grad, sol, compute_eval_val, + init_alpha=0.001, max_iter=100, update_ratio=1.): + """ line search + Args: + grad (numpy.ndarray): gradient + sol (numpy.ndarray): sol + compute_eval_val (numpy.ndarray): function to compute evaluation value + + Returns: + alpha (float): result of line search + """ + assert grad.shape == sol.shape + base_val = np.inf + alpha = init_alpha + original_sol = sol.copy() + + for _ in range(max_iter): + updated_sol = original_sol - alpha * grad + eval_val = compute_eval_val(updated_sol) + + if eval_val < base_val: + alpha += init_alpha * update_ratio + base_val = eval_val + else: + break + + return alpha diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py index e8d10a6..416f9a7 100644 --- a/PythonLinearNonlinearControl/configs/cartpole.py +++ b/PythonLinearNonlinearControl/configs/cartpole.py @@ -148,7 +148,7 @@ class CartPoleConfigModule(): * CartPoleConfigModule.TERMINAL_WEIGHT @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: @@ -177,7 +177,7 @@ class CartPoleConfigModule(): return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: @@ -189,7 +189,7 @@ class CartPoleConfigModule(): return 2. * u * np.diag(CartPoleConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: @@ -227,7 +227,7 @@ class CartPoleConfigModule(): return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: @@ -242,7 +242,7 @@ class CartPoleConfigModule(): return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: diff --git a/PythonLinearNonlinearControl/configs/first_order_lag.py b/PythonLinearNonlinearControl/configs/first_order_lag.py index 074d8db..9cf93b0 100644 --- a/PythonLinearNonlinearControl/configs/first_order_lag.py +++ b/PythonLinearNonlinearControl/configs/first_order_lag.py @@ -115,7 +115,7 @@ class FirstOrderLagConfigModule(): * np.diag(FirstOrderLagConfigModule.Sf) @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: @@ -133,7 +133,7 @@ class FirstOrderLagConfigModule(): * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: @@ -146,7 +146,7 @@ class FirstOrderLagConfigModule(): return 2. * u * np.diag(FirstOrderLagConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: @@ -165,7 +165,7 @@ class FirstOrderLagConfigModule(): return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: @@ -181,7 +181,7 @@ class FirstOrderLagConfigModule(): return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: diff --git a/PythonLinearNonlinearControl/configs/make_configs.py b/PythonLinearNonlinearControl/configs/make_configs.py index 1267470..7f07d20 100644 --- a/PythonLinearNonlinearControl/configs/make_configs.py +++ b/PythonLinearNonlinearControl/configs/make_configs.py @@ -1,7 +1,7 @@ from .first_order_lag import FirstOrderLagConfigModule from .two_wheeled import TwoWheeledConfigModule from .cartpole import CartPoleConfigModule -from .nonlinear_sample_system import NonlinearSampleSystemConfigModule +from .nonlinear_sample_system import NonlinearSampleSystemConfigModule, NonlinearSampleSystemExtendConfigModule def make_config(args): @@ -16,4 +16,6 @@ def make_config(args): elif args.env == "CartPole": return CartPoleConfigModule() elif args.env == "NonlinearSample": + if args.controller_type == "NMPCCGMRES": + return NonlinearSampleSystemExtendConfigModule() return NonlinearSampleSystemConfigModule() diff --git a/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py index 1f75097..6d74d49 100644 --- a/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py +++ b/PythonLinearNonlinearControl/configs/nonlinear_sample_system.py @@ -62,18 +62,11 @@ class NonlinearSampleSystemConfigModule(): "threshold": 1e-6, }, "NMPC": { - "threshold": 1e-5, - "max_iters": 1000, - "learning_rate": 0.1 - }, - "NMPC-CGMRES": { - "threshold": 1e-3 - }, - "NMPC-Newton": { - "threshold": 1e-3, - "max_iteration": 500, - "learning_rate": 1e-3 - }, + "threshold": 0.01, + "max_iters": 5000, + "learning_rate": 0.01, + "optimizer_mode": "conjugate" + } } @staticmethod @@ -133,7 +126,7 @@ class NonlinearSampleSystemConfigModule(): return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2) @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: @@ -157,7 +150,7 @@ class NonlinearSampleSystemConfigModule(): return cost_dx @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: @@ -169,7 +162,7 @@ class NonlinearSampleSystemConfigModule(): return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: @@ -197,7 +190,7 @@ class NonlinearSampleSystemConfigModule(): return hessian[np.newaxis, :, :] @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: @@ -212,7 +205,7 @@ class NonlinearSampleSystemConfigModule(): return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1)) @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: @@ -294,3 +287,69 @@ class NonlinearSampleSystemConfigModule(): else: raise NotImplementedError + + +class NonlinearSampleSystemExtendConfigModule(NonlinearSampleSystemConfigModule): + INPUT_SIZE = 1 # include dummy input + + def __init__(self): + super().__init__() + self.opt_config = { + "NMPCCGMRES": { + "threshold": 1e-3, + "zeta": 100., + "delta": 0.01, + "alpha": 0.5, + "tf": 1., + "constraint": True + }, + "NMPCNewton": { + "threshold": 1e-3, + "max_iteration": 500, + "learning_rate": 1e-3 + } + } + + @staticmethod + def gradient_hamiltonian_input_with_constraint(x, lam, u, g_x, dummy_u, raw): + """ + + Args: + x (numpy.ndarray): shape(pred_len+1, state_size) + lam (numpy.ndarray): shape(pred_len, state_size) + u (numpy.ndarray): shape(pred_len, input_size) + g_xs (numpy.ndarray): shape(pred_len, state_size) + dummy_u (numpy.ndarray): shape(pred_len, input_size) + raw (numpy.ndarray): shape(pred_len, input_size), Lagrangian for constraints + + Returns: + F (numpy.ndarray), shape(pred_len, 3) + """ + if len(x.shape) == 1: + vanilla_F = np.zeros(1) + extend_F = np.zeros(1) # 1 is the same as input size + extend_C = np.zeros(1) + + vanilla_F[0] = u[0] + lam[1] + 2. * raw[0] * u[0] + extend_F[0] = -0.01 + 2. * raw[0] * dummy_u[0] + extend_C[0] = u[0]**2 + dummy_u[0]**2 - \ + NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2 + + F = np.concatenate([vanilla_F, extend_F, extend_C]) + + elif len(x.shape) == 2: + pred_len, _ = u.shape + vanilla_F = np.zeros((pred_len, 1)) + extend_F = np.zeros((pred_len, 1)) # 1 is the same as input size + extend_C = np.zeros((pred_len, 1)) + + for i in range(pred_len): + vanilla_F[i, 0] = \ + u[i, 0] + lam[i, 1] + 2. * raw[i, 0] * u[i, 0] + extend_F[i, 0] = -0.01 + 2. * raw[i, 0] * dummy_u[i, 0] + extend_C[i, 0] = u[i, 0]**2 + dummy_u[i, 0]**2 - \ + NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2 + + F = np.concatenate([vanilla_F, extend_F, extend_C], axis=1) + + return F diff --git a/PythonLinearNonlinearControl/configs/two_wheeled.py b/PythonLinearNonlinearControl/configs/two_wheeled.py index dead8c4..b293318 100644 --- a/PythonLinearNonlinearControl/configs/two_wheeled.py +++ b/PythonLinearNonlinearControl/configs/two_wheeled.py @@ -160,7 +160,7 @@ class TwoWheeledConfigModule(): return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf) @staticmethod - def gradient_cost_fn_with_state(x, g_x, terminal=False): + def gradient_cost_fn_state(x, g_x, terminal=False): """ gradient of costs with respect to the state Args: @@ -180,7 +180,7 @@ class TwoWheeledConfigModule(): * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] @staticmethod - def gradient_cost_fn_with_input(x, u): + def gradient_cost_fn_input(x, u): """ gradient of costs with respect to the input Args: @@ -193,7 +193,7 @@ class TwoWheeledConfigModule(): return 2. * u * np.diag(TwoWheeledConfigModule.R) @staticmethod - def hessian_cost_fn_with_state(x, g_x, terminal=False): + def hessian_cost_fn_state(x, g_x, terminal=False): """ hessian costs with respect to the state Args: @@ -212,7 +212,7 @@ class TwoWheeledConfigModule(): return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) @staticmethod - def hessian_cost_fn_with_input(x, u): + def hessian_cost_fn_input(x, u): """ hessian costs with respect to the input Args: @@ -228,7 +228,7 @@ class TwoWheeledConfigModule(): return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1)) @staticmethod - def hessian_cost_fn_with_input_state(x, u): + def hessian_cost_fn_input_state(x, u): """ hessian costs with respect to the state and input Args: diff --git a/PythonLinearNonlinearControl/controllers/controller.py b/PythonLinearNonlinearControl/controllers/controller.py index 35c2297..9077912 100644 --- a/PythonLinearNonlinearControl/controllers/controller.py +++ b/PythonLinearNonlinearControl/controllers/controller.py @@ -55,17 +55,3 @@ class Controller(): self.terminal_state_cost_fn) return costs - - @staticmethod - def gradient_hamiltonian_x(x, u, lam): - """ gradient of hamitonian with respect to the state, - """ - raise NotImplementedError( - "Implement gradient of hamitonian with respect to the state") - - @staticmethod - def gradient_hamiltonian_u(x, u, lam): - """ gradient of hamitonian with respect to the input - """ - raise NotImplementedError( - "Implement gradient of hamitonian with respect to the input") diff --git a/PythonLinearNonlinearControl/controllers/ddp.py b/PythonLinearNonlinearControl/controllers/ddp.py index 0743f81..d4ece0f 100644 --- a/PythonLinearNonlinearControl/controllers/ddp.py +++ b/PythonLinearNonlinearControl/controllers/ddp.py @@ -32,12 +32,12 @@ class DDP(Controller): self.state_cost_fn = config.state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn self.input_cost_fn = config.input_cost_fn - self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state - self.gradient_cost_fn_with_input = config.gradient_cost_fn_with_input - self.hessian_cost_fn_with_state = config.hessian_cost_fn_with_state - self.hessian_cost_fn_with_input = config.hessian_cost_fn_with_input - self.hessian_cost_fn_with_input_state = \ - config.hessian_cost_fn_with_input_state + self.gradient_cost_fn_state = config.gradient_cost_fn_state + self.gradient_cost_fn_input = config.gradient_cost_fn_input + self.hessian_cost_fn_state = config.hessian_cost_fn_state + self.hessian_cost_fn_input = config.hessian_cost_fn_input + self.hessian_cost_fn_input_state = \ + config.hessian_cost_fn_input_state # controller parameters self.max_iters = config.opt_config["DDP"]["max_iters"] @@ -264,31 +264,31 @@ class DDP(Controller): shape(pred_len, input_size, state_size) """ # l_x.shape = (pred_len+1, state_size) - l_x = self.gradient_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_x = self.gradient_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_x = \ - self.gradient_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.gradient_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) l_x = np.concatenate((l_x, terminal_l_x), axis=0) # l_u.shape = (pred_len, input_size) - l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) + l_u = self.gradient_cost_fn_input(pred_xs[:-1], sol) # l_xx.shape = (pred_len+1, state_size, state_size) - l_xx = self.hessian_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_xx = self.hessian_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_xx = \ - self.hessian_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.hessian_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) # l_uu.shape = (pred_len, input_size, input_size) - l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) + l_uu = self.hessian_cost_fn_input(pred_xs[:-1], sol) # l_ux.shape = (pred_len, input_size, state_size) - l_ux = self.hessian_cost_fn_with_input_state(pred_xs[:-1], sol) + l_ux = self.hessian_cost_fn_input_state(pred_xs[:-1], sol) return l_x, l_xx, l_u, l_uu, l_ux diff --git a/PythonLinearNonlinearControl/controllers/ilqr.py b/PythonLinearNonlinearControl/controllers/ilqr.py index ea4a0c1..d7bb34c 100644 --- a/PythonLinearNonlinearControl/controllers/ilqr.py +++ b/PythonLinearNonlinearControl/controllers/ilqr.py @@ -30,12 +30,12 @@ class iLQR(Controller): self.state_cost_fn = config.state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn self.input_cost_fn = config.input_cost_fn - self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state - self.gradient_cost_fn_with_input = config.gradient_cost_fn_with_input - self.hessian_cost_fn_with_state = config.hessian_cost_fn_with_state - self.hessian_cost_fn_with_input = config.hessian_cost_fn_with_input - self.hessian_cost_fn_with_input_state = \ - config.hessian_cost_fn_with_input_state + self.gradient_cost_fn_state = config.gradient_cost_fn_state + self.gradient_cost_fn_input = config.gradient_cost_fn_input + self.hessian_cost_fn_state = config.hessian_cost_fn_state + self.hessian_cost_fn_input = config.hessian_cost_fn_input + self.hessian_cost_fn_input_state = \ + config.hessian_cost_fn_input_state # controller parameters self.max_iters = config.opt_config["iLQR"]["max_iters"] @@ -244,31 +244,31 @@ class iLQR(Controller): shape(pred_len, input_size, state_size) """ # l_x.shape = (pred_len+1, state_size) - l_x = self.gradient_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_x = self.gradient_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_x = \ - self.gradient_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.gradient_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) l_x = np.concatenate((l_x, terminal_l_x), axis=0) # l_u.shape = (pred_len, input_size) - l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) + l_u = self.gradient_cost_fn_input(pred_xs[:-1], sol) # l_xx.shape = (pred_len+1, state_size, state_size) - l_xx = self.hessian_cost_fn_with_state(pred_xs[:-1], - g_x[:-1], terminal=False) + l_xx = self.hessian_cost_fn_state(pred_xs[:-1], + g_x[:-1], terminal=False) terminal_l_xx = \ - self.hessian_cost_fn_with_state(pred_xs[-1], - g_x[-1], terminal=True) + self.hessian_cost_fn_state(pred_xs[-1], + g_x[-1], terminal=True) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) # l_uu.shape = (pred_len, input_size, input_size) - l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) + l_uu = self.hessian_cost_fn_input(pred_xs[:-1], sol) # l_ux.shape = (pred_len, input_size, state_size) - l_ux = self.hessian_cost_fn_with_input_state(pred_xs[:-1], sol) + l_ux = self.hessian_cost_fn_input_state(pred_xs[:-1], sol) return l_x, l_xx, l_u, l_uu, l_ux diff --git a/PythonLinearNonlinearControl/controllers/make_controllers.py b/PythonLinearNonlinearControl/controllers/make_controllers.py index 488db02..194cbf7 100644 --- a/PythonLinearNonlinearControl/controllers/make_controllers.py +++ b/PythonLinearNonlinearControl/controllers/make_controllers.py @@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams from .ilqr import iLQR from .ddp import DDP from .nmpc import NMPC +from .nmpc_cgmres import NMPCCGMRES def make_controller(args, config, model): @@ -26,5 +27,7 @@ def make_controller(args, config, model): return DDP(config, model) elif args.controller_type == "NMPC": return NMPC(config, model) + elif args.controller_type == "NMPCCGMRES": + return NMPCCGMRES(config, model) raise ValueError("No controller: {}".format(args.controller_type)) diff --git a/PythonLinearNonlinearControl/controllers/nmpc.py b/PythonLinearNonlinearControl/controllers/nmpc.py index 6eee931..e7ea44d 100644 --- a/PythonLinearNonlinearControl/controllers/nmpc.py +++ b/PythonLinearNonlinearControl/controllers/nmpc.py @@ -5,6 +5,7 @@ import scipy.stats as stats from .controller import Controller from ..envs.cost import calc_cost +from ..common.utils import line_search logger = getLogger(__name__) @@ -27,6 +28,7 @@ class NMPC(Controller): self.threshold = config.opt_config["NMPC"]["threshold"] self.max_iters = config.opt_config["NMPC"]["max_iters"] self.learning_rate = config.opt_config["NMPC"]["learning_rate"] + self.optimizer_mode = config.opt_config["NMPC"]["optimizer_mode"] # general parameters self.pred_len = config.PRED_LEN @@ -46,6 +48,11 @@ class NMPC(Controller): """ sol = self.prev_sol.copy() count = 0 + # use for Conjugate method + conjugate_d = None + conjugate_prev_d = None + conjugate_s = None + conjugate_beta = None while True: # shape(pred_len+1, state_size) @@ -64,7 +71,35 @@ class NMPC(Controller): np.linalg.norm(F_hat))) break - sol -= self.learning_rate * F_hat + if self.optimizer_mode == "conjugate": + conjugate_d = F_hat.flatten() + + if conjugate_prev_d is None: # initial + conjugate_s = conjugate_d + conjugate_prev_d = conjugate_d + F_hat = conjugate_s.reshape(F_hat.shape) + else: + prev_d = np.dot(conjugate_prev_d, conjugate_prev_d) + d = np.dot(conjugate_d, conjugate_d - conjugate_prev_d) + conjugate_beta = (d + 1e-6) / (prev_d + 1e-6) + + conjugate_s = conjugate_d + conjugate_beta * conjugate_s + conjugate_prev_d = conjugate_d + F_hat = conjugate_s.reshape(F_hat.shape) + + def compute_eval_val(u): + pred_xs = self.model.predict_traj(curr_x, u) + state_cost = np.sum(self.config.state_cost_fn( + pred_xs[1:-1], g_xs[1:-1])) + input_cost = np.sum(self.config.input_cost_fn(u)) + terminal_cost = np.sum( + self.config.terminal_state_cost_fn(pred_xs[-1], g_xs[-1])) + return state_cost + input_cost + terminal_cost + + alpha = line_search(F_hat, sol, + compute_eval_val, init_alpha=self.learning_rate) + + sol -= alpha * F_hat count += 1 # update us for next optimization diff --git a/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py b/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py index e69de29..70e722b 100644 --- a/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py +++ b/PythonLinearNonlinearControl/controllers/nmpc_cgmres.py @@ -0,0 +1,167 @@ +from logging import getLogger + +import numpy as np +import scipy.stats as stats + +from .controller import Controller +from ..envs.cost import calc_cost +from ..common.utils import line_search + +logger = getLogger(__name__) + + +class NMPCCGMRES(Controller): + def __init__(self, config, model): + """ Nonlinear Model Predictive Control using cgmres + """ + super(NMPCCGMRES, self).__init__(config, model) + + # model + self.model = model + + # get cost func + self.state_cost_fn = config.state_cost_fn + self.terminal_state_cost_fn = config.terminal_state_cost_fn + self.input_cost_fn = config.input_cost_fn + + # general parameters + self.pred_len = config.PRED_LEN + self.input_size = config.INPUT_SIZE + self.dt = config.DT + + # controller parameters + self.threshold = config.opt_config["NMPCCGMRES"]["threshold"] + self.zeta = config.opt_config["NMPCCGMRES"]["zeta"] + self.delta = config.opt_config["NMPCCGMRES"]["delta"] + self.alpha = config.opt_config["NMPCCGMRES"]["alpha"] + self.tf = config.opt_config["NMPCCGMRES"]["tf"] + self.divide_num = config.PRED_LEN + self.with_constraint = config.opt_config["NMPCCGMRES"]["constraint"] + if not self.with_constraint: + raise NotImplementedError + # 3 means u, dummy_u, raw + self.max_iters = 3 * self.input_size * self.divide_num + + # initialize + self.prev_sol = np.zeros((self.pred_len, self.input_size)) + self.opt_count = 1 + # add smaller than constraints value + input_constraint = np.abs([config.INPUT_LOWER_BOUND]) + self.prev_dummy_sol = np.ones( + (self.pred_len, self.input_size)) * input_constraint - 1e-3 + # add bigger than 0.01 to avoid computational error + self.prev_raw = np.zeros( + (self.pred_len, self.input_size)) + 0.01 + 1e-3 + + def _compute_f(self, curr_x, sol, g_xs, dummy_sol=None, raw=None): + # shape(pred_len+1, state_size) + pred_xs = self.model.predict_traj(curr_x, sol) + # shape(pred_len, state_size) + pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs) + + if self.with_constraint: + F = self.config.gradient_hamiltonian_input_with_constraint( + pred_xs, pred_lams, sol, g_xs, dummy_sol, raw) + + return F + else: + raise NotImplementedError + + def obtain_sol(self, curr_x, g_xs): + """ calculate the optimal inputs + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size) + Returns: + opt_input (numpy.ndarray): optimal input, shape(input_size, ) + """ + sol = self.prev_sol.copy() + dummy_sol = self.prev_dummy_sol.copy() + raw = self.prev_raw.copy() + + # compute delta t + time = self.dt * self.opt_count + dt = self.tf * (1. - np.exp(-self.alpha * time)) / \ + float(self.divide_num) + self.model.dt = dt + + # compute Fxt + x_dot = self.model.x_dot(curr_x, sol[0]) + dx = x_dot * self.delta + Fxt = self._compute_f(curr_x+dx, sol, g_xs, dummy_sol, raw).flatten() + + # compute F + F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw).flatten() + right = - self.zeta * F - ((Fxt - F) / self.delta) + + # compute Fuxt + du = sol * self.delta + ddummy_u = dummy_sol * self.delta + draw = raw * self.delta + Fuxt = self._compute_f(curr_x+dx, sol+du, g_xs, + dummy_sol+ddummy_u, raw+draw).flatten() + left = ((Fuxt - Fxt) / self.delta) + + r0 = right - left + r0_norm = np.linalg.norm(r0) + + vs = np.zeros((self.max_iters, self.max_iters + 1)) + vs[:, 0] = r0 / r0_norm + hs = np.zeros((self.max_iters + 1, self.max_iters + 1)) + e = np.zeros((self.max_iters + 1, 1)) + e[0] = 1. + + for i in range(self.max_iters): + reshaped_vs = vs.reshape( + (self.divide_num, 3, self.input_size, self.max_iters+1)) + du = reshaped_vs[:, 0, :, i] * self.delta + ddummy_u = reshaped_vs[:, 1, :, i] * self.delta + draw = reshaped_vs[:, 2, :, i] * self.delta + + Fuxt = self._compute_f( + curr_x+dx, sol+du, g_xs, dummy_sol+ddummy_u, raw+draw).flatten() + Av = ((Fuxt - Fxt) / self.delta) + + sum_Av = np.zeros(self.max_iters) + + for j in range(i + 1): + hs[j, i] = np.dot(Av, vs[:, j]) + sum_Av = sum_Av + hs[j, i] * vs[:, j] + + v_est = Av - sum_Av + + hs[i+1, i] = np.linalg.norm(v_est) + + vs[:, i+1] = v_est / hs[i+1, i] + + inv_hs = np.linalg.pinv(hs[:i+1, :i]) + ys = np.dot(inv_hs, r0_norm * e[:i+1]) + + judge_value = r0_norm * e[:i+1] - np.dot(hs[:i+1, :i], ys[:i]) + + if np.linalg.norm(judge_value) < self.threshold or i == self.max_iters-1: + update_value = np.dot(vs[:, :i-1], ys_pre[:i-1]).flatten() + + update_value = update_value.reshape( + (self.divide_num, 3, self.input_size)) + du_new = du + update_value[:, 0, :] + ddummy_u_new = ddummy_u + update_value[:, 1, :] + draw_new = draw + update_value[:, 2, :] + break + + ys_pre = ys + + sol += du_new * self.delta + dummy_sol += ddummy_u_new * self.delta + raw += draw_new * self.delta + + F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw) + logger.debug("check F = {0}".format(np.linalg.norm(F))) + + self.prev_sol = sol.copy() + self.prev_dummy_sol = dummy_sol.copy() + self.prev_raw = raw.copy() + + self.opt_count += 1 + + return sol[0] diff --git a/PythonLinearNonlinearControl/models/model.py b/PythonLinearNonlinearControl/models/model.py index 0d37913..6f44028 100644 --- a/PythonLinearNonlinearControl/models/model.py +++ b/PythonLinearNonlinearControl/models/model.py @@ -88,6 +88,11 @@ class Model(): """ raise NotImplementedError("Implement the model") + def x_dot(self, curr_x, u): + """ compute x dot + """ + raise NotImplementedError("Implement the model") + def predict_adjoint_traj(self, xs, us, g_xs): """ Args: @@ -111,7 +116,7 @@ class Model(): for t in range(pred_len-1, 0, -1): prev_lam = \ self.predict_adjoint_state(lam, xs[t], us[t], - g_x=g_xs[t], t=t) + g_x=g_xs[t]) # update lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) lam = prev_lam diff --git a/PythonLinearNonlinearControl/models/nonlinear_sample_system.py b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py index 1cb703f..faf6379 100644 --- a/PythonLinearNonlinearControl/models/nonlinear_sample_system.py +++ b/PythonLinearNonlinearControl/models/nonlinear_sample_system.py @@ -15,7 +15,7 @@ class NonlinearSampleSystemModel(Model): self.dt = config.DT self.gradient_hamiltonian_state = config.gradient_hamiltonian_state self.gradient_hamiltonian_input = config.gradient_hamiltonian_input - self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state + self.gradient_cost_fn_state = config.gradient_cost_fn_state def predict_next_state(self, curr_x, u): """ predict next state @@ -34,7 +34,7 @@ class NonlinearSampleSystemModel(Model): func_2 = self._func_x_2 functions = [func_1, func_2] next_x = update_state_with_Runge_Kutta( - curr_x, u, functions, batch=False) + curr_x, u, functions, batch=False, dt=self.dt) return next_x elif len(u.shape) == 2: @@ -42,11 +42,25 @@ class NonlinearSampleSystemModel(Model): def func_2(xs, us): return self._func_x_2(xs, us, batch=True) functions = [func_1, func_2] next_x = update_state_with_Runge_Kutta( - curr_x, u, functions, batch=True) + curr_x, u, functions, batch=True, dt=self.dt) return next_x - def predict_adjoint_state(self, lam, x, u, g_x=None, t=None): + def x_dot(self, curr_x, u): + """ + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) + u (numpy.ndarray): input, shape(input_size, ) + Returns: + x_dot (numpy.ndarray): next state, shape(state_size, ) + """ + state_size = curr_x.shape[0] + x_dot = np.zeros(state_size) + x_dot[0] = self._func_x_1(curr_x, u) + x_dot[1] = self._func_x_2(curr_x, u) + return x_dot + + def predict_adjoint_state(self, lam, x, u, g_x=None): """ predict adjoint states Args: @@ -78,7 +92,7 @@ class NonlinearSampleSystemModel(Model): terminal_lam (numpy.ndarray): terminal adjoint state, shape(state_size, ) """ - terminal_lam = self.gradient_cost_fn_with_state( + terminal_lam = self.gradient_cost_fn_state( terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] return terminal_lam[0] diff --git a/PythonLinearNonlinearControl/models/two_wheeled.py b/PythonLinearNonlinearControl/models/two_wheeled.py index ff6f26f..0a89860 100644 --- a/PythonLinearNonlinearControl/models/two_wheeled.py +++ b/PythonLinearNonlinearControl/models/two_wheeled.py @@ -14,7 +14,7 @@ class TwoWheeledModel(Model): self.dt = config.DT self.gradient_hamiltonian_state = config.gradient_hamiltonian_state self.gradient_hamiltonian_input = config.gradient_hamiltonian_input - self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state + self.gradient_cost_fn_state = config.gradient_cost_fn_state def predict_next_state(self, curr_x, u): """ predict next state @@ -88,7 +88,7 @@ class TwoWheeledModel(Model): terminal_lam (numpy.ndarray): terminal adjoint state, shape(state_size, ) """ - terminal_lam = self.gradient_cost_fn_with_state( + terminal_lam = self.gradient_cost_fn_state( terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] return terminal_lam[0] diff --git a/scripts/simple_run.py b/scripts/simple_run.py index 1109abc..29b89f2 100644 --- a/scripts/simple_run.py +++ b/scripts/simple_run.py @@ -40,8 +40,8 @@ def run(args): def main(): parser = argparse.ArgumentParser() - parser.add_argument("--controller_type", type=str, default="NMPC") - parser.add_argument("--env", type=str, default="TwoWheeledTrack") + parser.add_argument("--controller_type", type=str, default="NMPCCGMRES") + parser.add_argument("--env", type=str, default="NonlinearSample") parser.add_argument("--save_anim", type=bool_flag, default=0) parser.add_argument("--result_dir", type=str, default="./result") diff --git a/tests/configs/test_cartpole.py b/tests/configs/test_cartpole.py index 877ba9e..1b8908c 100644 --- a/tests/configs/test_cartpole.py +++ b/tests/configs/test_cartpole.py @@ -4,6 +4,7 @@ import numpy as np from PythonLinearNonlinearControl.configs.cartpole \ import CartPoleConfigModule + class TestCalcCost(): def test_calc_costs(self): # make config @@ -22,25 +23,26 @@ class TestCalcCost(): assert costs.shape == (pop_size, pred_len, input_size) costs = config.state_cost_fn(pred_xs, g_xs) - + assert costs.shape == (pop_size, pred_len, 1) - costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ + costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], g_xs[:, -1, :]) - + assert costs.shape == (pop_size, 1) + class TestGradient(): def test_state_gradient(self): """ """ xs = np.ones((1, 4)) - cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None) + cost_grad = CartPoleConfigModule.gradient_cost_fn_state(xs, None) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ @@ -51,7 +53,7 @@ class TestGradient(): CartPoleConfigModule.state_cost_fn(tmp_x, None) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) def test_terminal_state_gradient(self): @@ -59,13 +61,13 @@ class TestGradient(): """ xs = np.ones(4) cost_grad =\ - CartPoleConfigModule.gradient_cost_fn_with_state(xs, None, - terminal=True) + CartPoleConfigModule.gradient_cost_fn_state(xs, None, + terminal=True) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[i] = xs[i] + eps forward = \ @@ -76,19 +78,19 @@ class TestGradient(): CartPoleConfigModule.state_cost_fn(tmp_x, None) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_input_gradient(self): """ """ us = np.ones((1, 1)) - cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us) + cost_grad = CartPoleConfigModule.gradient_cost_fn_input(None, us) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 1)) - for i in range(1): + for i in range(1): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ @@ -99,32 +101,32 @@ class TestGradient(): CartPoleConfigModule.input_cost_fn(tmp_u) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_state_hessian(self): """ """ xs = np.ones((1, 4)) - cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None) + cost_hess = CartPoleConfigModule.hessian_cost_fn_state(xs, None) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 4, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=False) tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] - eps backward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=False) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - + assert cost_hess == pytest.approx(expected_hess) def test_terminal_state_hessian(self): @@ -132,50 +134,50 @@ class TestGradient(): """ xs = np.ones(4) cost_hess =\ - CartPoleConfigModule.hessian_cost_fn_with_state(xs, None, - terminal=True) + CartPoleConfigModule.hessian_cost_fn_state(xs, None, + terminal=True) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 4, 4)) - for i in range(4): + for i in range(4): tmp_x = xs.copy() tmp_x[i] = xs[i] + eps forward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=True) tmp_x = xs.copy() tmp_x[i] = xs[i] - eps backward = \ - CartPoleConfigModule.gradient_cost_fn_with_state( + CartPoleConfigModule.gradient_cost_fn_state( tmp_x, None, terminal=True) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - + assert cost_hess == pytest.approx(expected_hess) - + def test_input_hessian(self): """ """ us = np.ones((1, 1)) xs = np.ones((1, 4)) - cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs) + cost_hess = CartPoleConfigModule.hessian_cost_fn_input(us, xs) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 1, 1)) - for i in range(1): + for i in range(1): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ - CartPoleConfigModule.gradient_cost_fn_with_input( + CartPoleConfigModule.gradient_cost_fn_input( xs, tmp_u) tmp_u = us.copy() tmp_u[0, i] = us[0, i] - eps backward = \ - CartPoleConfigModule.gradient_cost_fn_with_input( + CartPoleConfigModule.gradient_cost_fn_input( xs, tmp_u) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - - assert cost_hess == pytest.approx(expected_hess) \ No newline at end of file + + assert cost_hess == pytest.approx(expected_hess) diff --git a/tests/configs/test_two_wheeled.py b/tests/configs/test_two_wheeled.py index 9ad0408..6e64fff 100644 --- a/tests/configs/test_two_wheeled.py +++ b/tests/configs/test_two_wheeled.py @@ -4,6 +4,7 @@ import numpy as np from PythonLinearNonlinearControl.configs.two_wheeled \ import TwoWheeledConfigModule + class TestCalcCost(): def test_calc_costs(self): # make config @@ -24,15 +25,16 @@ class TestCalcCost(): costs = config.state_cost_fn(pred_xs, g_xs) expected_costs = np.ones((pop_size, pred_len, state_size))*0.5 - + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q)) - costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ + costs = config.terminal_state_cost_fn(pred_xs[:, -1, :], g_xs[:, -1, :]) expected_costs = np.ones((pop_size, state_size))*0.5 - + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) + class TestGradient(): def test_state_gradient(self): """ @@ -40,12 +42,12 @@ class TestGradient(): xs = np.ones((1, 3)) g_xs = np.ones((1, 3)) * 0.5 cost_grad =\ - TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs) + TwoWheeledConfigModule.gradient_cost_fn_state(xs, g_xs) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 3)) - for i in range(3): + for i in range(3): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ @@ -58,20 +60,20 @@ class TestGradient(): backward = np.sum(backward) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_input_gradient(self): """ """ us = np.ones((1, 2)) cost_grad =\ - TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us) + TwoWheeledConfigModule.gradient_cost_fn_input(None, us) # numeric grad eps = 1e-4 expected_grad = np.zeros((1, 2)) - for i in range(2): + for i in range(2): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ @@ -84,58 +86,58 @@ class TestGradient(): backward = np.sum(backward) expected_grad[0, i] = (forward - backward) / (2. * eps) - + assert cost_grad == pytest.approx(expected_grad) - + def test_state_hessian(self): """ """ g_xs = np.ones((1, 3)) * 0.5 xs = np.ones((1, 3)) cost_hess =\ - TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs) + TwoWheeledConfigModule.hessian_cost_fn_state(xs, g_xs) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 3, 3)) - for i in range(3): + for i in range(3): tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] + eps forward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_state( + TwoWheeledConfigModule.gradient_cost_fn_state( tmp_x, g_xs, terminal=False) tmp_x = xs.copy() tmp_x[0, i] = xs[0, i] - eps backward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_state( + TwoWheeledConfigModule.gradient_cost_fn_state( tmp_x, g_xs, terminal=False) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - + assert cost_hess == pytest.approx(expected_hess) - + def test_input_hessian(self): """ """ us = np.ones((1, 2)) xs = np.ones((1, 3)) - cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs) + cost_hess = TwoWheeledConfigModule.hessian_cost_fn_input(us, xs) # numeric grad eps = 1e-4 expected_hess = np.zeros((1, 2, 2)) - for i in range(2): + for i in range(2): tmp_u = us.copy() tmp_u[0, i] = us[0, i] + eps forward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_input( + TwoWheeledConfigModule.gradient_cost_fn_input( xs, tmp_u) tmp_u = us.copy() tmp_u[0, i] = us[0, i] - eps backward = \ - TwoWheeledConfigModule.gradient_cost_fn_with_input( + TwoWheeledConfigModule.gradient_cost_fn_input( xs, tmp_u) expected_hess[0, :, i] = (forward - backward) / (2. * eps) - - assert cost_hess == pytest.approx(expected_hess) \ No newline at end of file + + assert cost_hess == pytest.approx(expected_hess)