Add cgmres

This commit is contained in:
Shunichi09 2021-02-14 23:17:47 +09:00
parent 6e0f1209cf
commit fb09e919da
18 changed files with 452 additions and 148 deletions

View File

@ -116,3 +116,32 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
k3[:, i] = dt * func(state + k2, u) k3[:, i] = dt * func(state + k2, u)
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6. return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
def line_search(grad, sol, compute_eval_val,
init_alpha=0.001, max_iter=100, update_ratio=1.):
""" line search
Args:
grad (numpy.ndarray): gradient
sol (numpy.ndarray): sol
compute_eval_val (numpy.ndarray): function to compute evaluation value
Returns:
alpha (float): result of line search
"""
assert grad.shape == sol.shape
base_val = np.inf
alpha = init_alpha
original_sol = sol.copy()
for _ in range(max_iter):
updated_sol = original_sol - alpha * grad
eval_val = compute_eval_val(updated_sol)
if eval_val < base_val:
alpha += init_alpha * update_ratio
base_val = eval_val
else:
break
return alpha

View File

@ -148,7 +148,7 @@ class CartPoleConfigModule():
* CartPoleConfigModule.TERMINAL_WEIGHT * CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
Args: Args:
@ -177,7 +177,7 @@ class CartPoleConfigModule():
return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_input(x, u):
""" gradient of costs with respect to the input """ gradient of costs with respect to the input
Args: Args:
@ -189,7 +189,7 @@ class CartPoleConfigModule():
return 2. * u * np.diag(CartPoleConfigModule.R) return 2. * u * np.diag(CartPoleConfigModule.R)
@staticmethod @staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False): def hessian_cost_fn_state(x, g_x, terminal=False):
""" hessian costs with respect to the state """ hessian costs with respect to the state
Args: Args:
@ -227,7 +227,7 @@ class CartPoleConfigModule():
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_input(x, u):
""" hessian costs with respect to the input """ hessian costs with respect to the input
Args: Args:
@ -242,7 +242,7 @@ class CartPoleConfigModule():
return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
Args: Args:

View File

@ -115,7 +115,7 @@ class FirstOrderLagConfigModule():
* np.diag(FirstOrderLagConfigModule.Sf) * np.diag(FirstOrderLagConfigModule.Sf)
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
Args: Args:
@ -133,7 +133,7 @@ class FirstOrderLagConfigModule():
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_input(x, u):
""" gradient of costs with respect to the input """ gradient of costs with respect to the input
Args: Args:
@ -146,7 +146,7 @@ class FirstOrderLagConfigModule():
return 2. * u * np.diag(FirstOrderLagConfigModule.R) return 2. * u * np.diag(FirstOrderLagConfigModule.R)
@staticmethod @staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False): def hessian_cost_fn_state(x, g_x, terminal=False):
""" hessian costs with respect to the state """ hessian costs with respect to the state
Args: Args:
@ -165,7 +165,7 @@ class FirstOrderLagConfigModule():
return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_input(x, u):
""" hessian costs with respect to the input """ hessian costs with respect to the input
Args: Args:
@ -181,7 +181,7 @@ class FirstOrderLagConfigModule():
return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
Args: Args:

View File

@ -1,7 +1,7 @@
from .first_order_lag import FirstOrderLagConfigModule from .first_order_lag import FirstOrderLagConfigModule
from .two_wheeled import TwoWheeledConfigModule from .two_wheeled import TwoWheeledConfigModule
from .cartpole import CartPoleConfigModule from .cartpole import CartPoleConfigModule
from .nonlinear_sample_system import NonlinearSampleSystemConfigModule from .nonlinear_sample_system import NonlinearSampleSystemConfigModule, NonlinearSampleSystemExtendConfigModule
def make_config(args): def make_config(args):
@ -16,4 +16,6 @@ def make_config(args):
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleConfigModule() return CartPoleConfigModule()
elif args.env == "NonlinearSample": elif args.env == "NonlinearSample":
if args.controller_type == "NMPCCGMRES":
return NonlinearSampleSystemExtendConfigModule()
return NonlinearSampleSystemConfigModule() return NonlinearSampleSystemConfigModule()

View File

@ -62,18 +62,11 @@ class NonlinearSampleSystemConfigModule():
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC": { "NMPC": {
"threshold": 1e-5, "threshold": 0.01,
"max_iters": 1000, "max_iters": 5000,
"learning_rate": 0.1 "learning_rate": 0.01,
}, "optimizer_mode": "conjugate"
"NMPC-CGMRES": { }
"threshold": 1e-3
},
"NMPC-Newton": {
"threshold": 1e-3,
"max_iteration": 500,
"learning_rate": 1e-3
},
} }
@staticmethod @staticmethod
@ -133,7 +126,7 @@ class NonlinearSampleSystemConfigModule():
return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2) return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2)
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
Args: Args:
@ -157,7 +150,7 @@ class NonlinearSampleSystemConfigModule():
return cost_dx return cost_dx
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_input(x, u):
""" gradient of costs with respect to the input """ gradient of costs with respect to the input
Args: Args:
@ -169,7 +162,7 @@ class NonlinearSampleSystemConfigModule():
return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R) return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R)
@staticmethod @staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False): def hessian_cost_fn_state(x, g_x, terminal=False):
""" hessian costs with respect to the state """ hessian costs with respect to the state
Args: Args:
@ -197,7 +190,7 @@ class NonlinearSampleSystemConfigModule():
return hessian[np.newaxis, :, :] return hessian[np.newaxis, :, :]
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_input(x, u):
""" hessian costs with respect to the input """ hessian costs with respect to the input
Args: Args:
@ -212,7 +205,7 @@ class NonlinearSampleSystemConfigModule():
return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1)) return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
Args: Args:
@ -294,3 +287,69 @@ class NonlinearSampleSystemConfigModule():
else: else:
raise NotImplementedError raise NotImplementedError
class NonlinearSampleSystemExtendConfigModule(NonlinearSampleSystemConfigModule):
INPUT_SIZE = 1 # include dummy input
def __init__(self):
super().__init__()
self.opt_config = {
"NMPCCGMRES": {
"threshold": 1e-3,
"zeta": 100.,
"delta": 0.01,
"alpha": 0.5,
"tf": 1.,
"constraint": True
},
"NMPCNewton": {
"threshold": 1e-3,
"max_iteration": 500,
"learning_rate": 1e-3
}
}
@staticmethod
def gradient_hamiltonian_input_with_constraint(x, lam, u, g_x, dummy_u, raw):
"""
Args:
x (numpy.ndarray): shape(pred_len+1, state_size)
lam (numpy.ndarray): shape(pred_len, state_size)
u (numpy.ndarray): shape(pred_len, input_size)
g_xs (numpy.ndarray): shape(pred_len, state_size)
dummy_u (numpy.ndarray): shape(pred_len, input_size)
raw (numpy.ndarray): shape(pred_len, input_size), Lagrangian for constraints
Returns:
F (numpy.ndarray), shape(pred_len, 3)
"""
if len(x.shape) == 1:
vanilla_F = np.zeros(1)
extend_F = np.zeros(1) # 1 is the same as input size
extend_C = np.zeros(1)
vanilla_F[0] = u[0] + lam[1] + 2. * raw[0] * u[0]
extend_F[0] = -0.01 + 2. * raw[0] * dummy_u[0]
extend_C[0] = u[0]**2 + dummy_u[0]**2 - \
NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2
F = np.concatenate([vanilla_F, extend_F, extend_C])
elif len(x.shape) == 2:
pred_len, _ = u.shape
vanilla_F = np.zeros((pred_len, 1))
extend_F = np.zeros((pred_len, 1)) # 1 is the same as input size
extend_C = np.zeros((pred_len, 1))
for i in range(pred_len):
vanilla_F[i, 0] = \
u[i, 0] + lam[i, 1] + 2. * raw[i, 0] * u[i, 0]
extend_F[i, 0] = -0.01 + 2. * raw[i, 0] * dummy_u[i, 0]
extend_C[i, 0] = u[i, 0]**2 + dummy_u[i, 0]**2 - \
NonlinearSampleSystemConfigModule.INPUT_LOWER_BOUND**2
F = np.concatenate([vanilla_F, extend_F, extend_C], axis=1)
return F

View File

@ -160,7 +160,7 @@ class TwoWheeledConfigModule():
return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf) return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf)
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
Args: Args:
@ -180,7 +180,7 @@ class TwoWheeledConfigModule():
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_input(x, u):
""" gradient of costs with respect to the input """ gradient of costs with respect to the input
Args: Args:
@ -193,7 +193,7 @@ class TwoWheeledConfigModule():
return 2. * u * np.diag(TwoWheeledConfigModule.R) return 2. * u * np.diag(TwoWheeledConfigModule.R)
@staticmethod @staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False): def hessian_cost_fn_state(x, g_x, terminal=False):
""" hessian costs with respect to the state """ hessian costs with respect to the state
Args: Args:
@ -212,7 +212,7 @@ class TwoWheeledConfigModule():
return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_input(x, u):
""" hessian costs with respect to the input """ hessian costs with respect to the input
Args: Args:
@ -228,7 +228,7 @@ class TwoWheeledConfigModule():
return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1)) return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
Args: Args:

View File

@ -55,17 +55,3 @@ class Controller():
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
return costs return costs
@staticmethod
def gradient_hamiltonian_x(x, u, lam):
""" gradient of hamitonian with respect to the state,
"""
raise NotImplementedError(
"Implement gradient of hamitonian with respect to the state")
@staticmethod
def gradient_hamiltonian_u(x, u, lam):
""" gradient of hamitonian with respect to the input
"""
raise NotImplementedError(
"Implement gradient of hamitonian with respect to the input")

View File

@ -32,12 +32,12 @@ class DDP(Controller):
self.state_cost_fn = config.state_cost_fn self.state_cost_fn = config.state_cost_fn
self.terminal_state_cost_fn = config.terminal_state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state self.gradient_cost_fn_state = config.gradient_cost_fn_state
self.gradient_cost_fn_with_input = config.gradient_cost_fn_with_input self.gradient_cost_fn_input = config.gradient_cost_fn_input
self.hessian_cost_fn_with_state = config.hessian_cost_fn_with_state self.hessian_cost_fn_state = config.hessian_cost_fn_state
self.hessian_cost_fn_with_input = config.hessian_cost_fn_with_input self.hessian_cost_fn_input = config.hessian_cost_fn_input
self.hessian_cost_fn_with_input_state = \ self.hessian_cost_fn_input_state = \
config.hessian_cost_fn_with_input_state config.hessian_cost_fn_input_state
# controller parameters # controller parameters
self.max_iters = config.opt_config["DDP"]["max_iters"] self.max_iters = config.opt_config["DDP"]["max_iters"]
@ -264,31 +264,31 @@ class DDP(Controller):
shape(pred_len, input_size, state_size) shape(pred_len, input_size, state_size)
""" """
# l_x.shape = (pred_len+1, state_size) # l_x.shape = (pred_len+1, state_size)
l_x = self.gradient_cost_fn_with_state(pred_xs[:-1], l_x = self.gradient_cost_fn_state(pred_xs[:-1],
g_x[:-1], terminal=False) g_x[:-1], terminal=False)
terminal_l_x = \ terminal_l_x = \
self.gradient_cost_fn_with_state(pred_xs[-1], self.gradient_cost_fn_state(pred_xs[-1],
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_x = np.concatenate((l_x, terminal_l_x), axis=0) l_x = np.concatenate((l_x, terminal_l_x), axis=0)
# l_u.shape = (pred_len, input_size) # l_u.shape = (pred_len, input_size)
l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) l_u = self.gradient_cost_fn_input(pred_xs[:-1], sol)
# l_xx.shape = (pred_len+1, state_size, state_size) # l_xx.shape = (pred_len+1, state_size, state_size)
l_xx = self.hessian_cost_fn_with_state(pred_xs[:-1], l_xx = self.hessian_cost_fn_state(pred_xs[:-1],
g_x[:-1], terminal=False) g_x[:-1], terminal=False)
terminal_l_xx = \ terminal_l_xx = \
self.hessian_cost_fn_with_state(pred_xs[-1], self.hessian_cost_fn_state(pred_xs[-1],
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0)
# l_uu.shape = (pred_len, input_size, input_size) # l_uu.shape = (pred_len, input_size, input_size)
l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) l_uu = self.hessian_cost_fn_input(pred_xs[:-1], sol)
# l_ux.shape = (pred_len, input_size, state_size) # l_ux.shape = (pred_len, input_size, state_size)
l_ux = self.hessian_cost_fn_with_input_state(pred_xs[:-1], sol) l_ux = self.hessian_cost_fn_input_state(pred_xs[:-1], sol)
return l_x, l_xx, l_u, l_uu, l_ux return l_x, l_xx, l_u, l_uu, l_ux

View File

@ -30,12 +30,12 @@ class iLQR(Controller):
self.state_cost_fn = config.state_cost_fn self.state_cost_fn = config.state_cost_fn
self.terminal_state_cost_fn = config.terminal_state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state self.gradient_cost_fn_state = config.gradient_cost_fn_state
self.gradient_cost_fn_with_input = config.gradient_cost_fn_with_input self.gradient_cost_fn_input = config.gradient_cost_fn_input
self.hessian_cost_fn_with_state = config.hessian_cost_fn_with_state self.hessian_cost_fn_state = config.hessian_cost_fn_state
self.hessian_cost_fn_with_input = config.hessian_cost_fn_with_input self.hessian_cost_fn_input = config.hessian_cost_fn_input
self.hessian_cost_fn_with_input_state = \ self.hessian_cost_fn_input_state = \
config.hessian_cost_fn_with_input_state config.hessian_cost_fn_input_state
# controller parameters # controller parameters
self.max_iters = config.opt_config["iLQR"]["max_iters"] self.max_iters = config.opt_config["iLQR"]["max_iters"]
@ -244,31 +244,31 @@ class iLQR(Controller):
shape(pred_len, input_size, state_size) shape(pred_len, input_size, state_size)
""" """
# l_x.shape = (pred_len+1, state_size) # l_x.shape = (pred_len+1, state_size)
l_x = self.gradient_cost_fn_with_state(pred_xs[:-1], l_x = self.gradient_cost_fn_state(pred_xs[:-1],
g_x[:-1], terminal=False) g_x[:-1], terminal=False)
terminal_l_x = \ terminal_l_x = \
self.gradient_cost_fn_with_state(pred_xs[-1], self.gradient_cost_fn_state(pred_xs[-1],
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_x = np.concatenate((l_x, terminal_l_x), axis=0) l_x = np.concatenate((l_x, terminal_l_x), axis=0)
# l_u.shape = (pred_len, input_size) # l_u.shape = (pred_len, input_size)
l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) l_u = self.gradient_cost_fn_input(pred_xs[:-1], sol)
# l_xx.shape = (pred_len+1, state_size, state_size) # l_xx.shape = (pred_len+1, state_size, state_size)
l_xx = self.hessian_cost_fn_with_state(pred_xs[:-1], l_xx = self.hessian_cost_fn_state(pred_xs[:-1],
g_x[:-1], terminal=False) g_x[:-1], terminal=False)
terminal_l_xx = \ terminal_l_xx = \
self.hessian_cost_fn_with_state(pred_xs[-1], self.hessian_cost_fn_state(pred_xs[-1],
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0)
# l_uu.shape = (pred_len, input_size, input_size) # l_uu.shape = (pred_len, input_size, input_size)
l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) l_uu = self.hessian_cost_fn_input(pred_xs[:-1], sol)
# l_ux.shape = (pred_len, input_size, state_size) # l_ux.shape = (pred_len, input_size, state_size)
l_ux = self.hessian_cost_fn_with_input_state(pred_xs[:-1], sol) l_ux = self.hessian_cost_fn_input_state(pred_xs[:-1], sol)
return l_x, l_xx, l_u, l_uu, l_ux return l_x, l_xx, l_u, l_uu, l_ux

View File

@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams
from .ilqr import iLQR from .ilqr import iLQR
from .ddp import DDP from .ddp import DDP
from .nmpc import NMPC from .nmpc import NMPC
from .nmpc_cgmres import NMPCCGMRES
def make_controller(args, config, model): def make_controller(args, config, model):
@ -26,5 +27,7 @@ def make_controller(args, config, model):
return DDP(config, model) return DDP(config, model)
elif args.controller_type == "NMPC": elif args.controller_type == "NMPC":
return NMPC(config, model) return NMPC(config, model)
elif args.controller_type == "NMPCCGMRES":
return NMPCCGMRES(config, model)
raise ValueError("No controller: {}".format(args.controller_type)) raise ValueError("No controller: {}".format(args.controller_type))

View File

@ -5,6 +5,7 @@ import scipy.stats as stats
from .controller import Controller from .controller import Controller
from ..envs.cost import calc_cost from ..envs.cost import calc_cost
from ..common.utils import line_search
logger = getLogger(__name__) logger = getLogger(__name__)
@ -27,6 +28,7 @@ class NMPC(Controller):
self.threshold = config.opt_config["NMPC"]["threshold"] self.threshold = config.opt_config["NMPC"]["threshold"]
self.max_iters = config.opt_config["NMPC"]["max_iters"] self.max_iters = config.opt_config["NMPC"]["max_iters"]
self.learning_rate = config.opt_config["NMPC"]["learning_rate"] self.learning_rate = config.opt_config["NMPC"]["learning_rate"]
self.optimizer_mode = config.opt_config["NMPC"]["optimizer_mode"]
# general parameters # general parameters
self.pred_len = config.PRED_LEN self.pred_len = config.PRED_LEN
@ -46,6 +48,11 @@ class NMPC(Controller):
""" """
sol = self.prev_sol.copy() sol = self.prev_sol.copy()
count = 0 count = 0
# use for Conjugate method
conjugate_d = None
conjugate_prev_d = None
conjugate_s = None
conjugate_beta = None
while True: while True:
# shape(pred_len+1, state_size) # shape(pred_len+1, state_size)
@ -64,7 +71,35 @@ class NMPC(Controller):
np.linalg.norm(F_hat))) np.linalg.norm(F_hat)))
break break
sol -= self.learning_rate * F_hat if self.optimizer_mode == "conjugate":
conjugate_d = F_hat.flatten()
if conjugate_prev_d is None: # initial
conjugate_s = conjugate_d
conjugate_prev_d = conjugate_d
F_hat = conjugate_s.reshape(F_hat.shape)
else:
prev_d = np.dot(conjugate_prev_d, conjugate_prev_d)
d = np.dot(conjugate_d, conjugate_d - conjugate_prev_d)
conjugate_beta = (d + 1e-6) / (prev_d + 1e-6)
conjugate_s = conjugate_d + conjugate_beta * conjugate_s
conjugate_prev_d = conjugate_d
F_hat = conjugate_s.reshape(F_hat.shape)
def compute_eval_val(u):
pred_xs = self.model.predict_traj(curr_x, u)
state_cost = np.sum(self.config.state_cost_fn(
pred_xs[1:-1], g_xs[1:-1]))
input_cost = np.sum(self.config.input_cost_fn(u))
terminal_cost = np.sum(
self.config.terminal_state_cost_fn(pred_xs[-1], g_xs[-1]))
return state_cost + input_cost + terminal_cost
alpha = line_search(F_hat, sol,
compute_eval_val, init_alpha=self.learning_rate)
sol -= alpha * F_hat
count += 1 count += 1
# update us for next optimization # update us for next optimization

View File

@ -0,0 +1,167 @@
from logging import getLogger
import numpy as np
import scipy.stats as stats
from .controller import Controller
from ..envs.cost import calc_cost
from ..common.utils import line_search
logger = getLogger(__name__)
class NMPCCGMRES(Controller):
def __init__(self, config, model):
""" Nonlinear Model Predictive Control using cgmres
"""
super(NMPCCGMRES, self).__init__(config, model)
# model
self.model = model
# get cost func
self.state_cost_fn = config.state_cost_fn
self.terminal_state_cost_fn = config.terminal_state_cost_fn
self.input_cost_fn = config.input_cost_fn
# general parameters
self.pred_len = config.PRED_LEN
self.input_size = config.INPUT_SIZE
self.dt = config.DT
# controller parameters
self.threshold = config.opt_config["NMPCCGMRES"]["threshold"]
self.zeta = config.opt_config["NMPCCGMRES"]["zeta"]
self.delta = config.opt_config["NMPCCGMRES"]["delta"]
self.alpha = config.opt_config["NMPCCGMRES"]["alpha"]
self.tf = config.opt_config["NMPCCGMRES"]["tf"]
self.divide_num = config.PRED_LEN
self.with_constraint = config.opt_config["NMPCCGMRES"]["constraint"]
if not self.with_constraint:
raise NotImplementedError
# 3 means u, dummy_u, raw
self.max_iters = 3 * self.input_size * self.divide_num
# initialize
self.prev_sol = np.zeros((self.pred_len, self.input_size))
self.opt_count = 1
# add smaller than constraints value
input_constraint = np.abs([config.INPUT_LOWER_BOUND])
self.prev_dummy_sol = np.ones(
(self.pred_len, self.input_size)) * input_constraint - 1e-3
# add bigger than 0.01 to avoid computational error
self.prev_raw = np.zeros(
(self.pred_len, self.input_size)) + 0.01 + 1e-3
def _compute_f(self, curr_x, sol, g_xs, dummy_sol=None, raw=None):
# shape(pred_len+1, state_size)
pred_xs = self.model.predict_traj(curr_x, sol)
# shape(pred_len, state_size)
pred_lams = self.model.predict_adjoint_traj(pred_xs, sol, g_xs)
if self.with_constraint:
F = self.config.gradient_hamiltonian_input_with_constraint(
pred_xs, pred_lams, sol, g_xs, dummy_sol, raw)
return F
else:
raise NotImplementedError
def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs
Args:
curr_x (numpy.ndarray): current state, shape(state_size, )
g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
Returns:
opt_input (numpy.ndarray): optimal input, shape(input_size, )
"""
sol = self.prev_sol.copy()
dummy_sol = self.prev_dummy_sol.copy()
raw = self.prev_raw.copy()
# compute delta t
time = self.dt * self.opt_count
dt = self.tf * (1. - np.exp(-self.alpha * time)) / \
float(self.divide_num)
self.model.dt = dt
# compute Fxt
x_dot = self.model.x_dot(curr_x, sol[0])
dx = x_dot * self.delta
Fxt = self._compute_f(curr_x+dx, sol, g_xs, dummy_sol, raw).flatten()
# compute F
F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw).flatten()
right = - self.zeta * F - ((Fxt - F) / self.delta)
# compute Fuxt
du = sol * self.delta
ddummy_u = dummy_sol * self.delta
draw = raw * self.delta
Fuxt = self._compute_f(curr_x+dx, sol+du, g_xs,
dummy_sol+ddummy_u, raw+draw).flatten()
left = ((Fuxt - Fxt) / self.delta)
r0 = right - left
r0_norm = np.linalg.norm(r0)
vs = np.zeros((self.max_iters, self.max_iters + 1))
vs[:, 0] = r0 / r0_norm
hs = np.zeros((self.max_iters + 1, self.max_iters + 1))
e = np.zeros((self.max_iters + 1, 1))
e[0] = 1.
for i in range(self.max_iters):
reshaped_vs = vs.reshape(
(self.divide_num, 3, self.input_size, self.max_iters+1))
du = reshaped_vs[:, 0, :, i] * self.delta
ddummy_u = reshaped_vs[:, 1, :, i] * self.delta
draw = reshaped_vs[:, 2, :, i] * self.delta
Fuxt = self._compute_f(
curr_x+dx, sol+du, g_xs, dummy_sol+ddummy_u, raw+draw).flatten()
Av = ((Fuxt - Fxt) / self.delta)
sum_Av = np.zeros(self.max_iters)
for j in range(i + 1):
hs[j, i] = np.dot(Av, vs[:, j])
sum_Av = sum_Av + hs[j, i] * vs[:, j]
v_est = Av - sum_Av
hs[i+1, i] = np.linalg.norm(v_est)
vs[:, i+1] = v_est / hs[i+1, i]
inv_hs = np.linalg.pinv(hs[:i+1, :i])
ys = np.dot(inv_hs, r0_norm * e[:i+1])
judge_value = r0_norm * e[:i+1] - np.dot(hs[:i+1, :i], ys[:i])
if np.linalg.norm(judge_value) < self.threshold or i == self.max_iters-1:
update_value = np.dot(vs[:, :i-1], ys_pre[:i-1]).flatten()
update_value = update_value.reshape(
(self.divide_num, 3, self.input_size))
du_new = du + update_value[:, 0, :]
ddummy_u_new = ddummy_u + update_value[:, 1, :]
draw_new = draw + update_value[:, 2, :]
break
ys_pre = ys
sol += du_new * self.delta
dummy_sol += ddummy_u_new * self.delta
raw += draw_new * self.delta
F = self._compute_f(curr_x, sol, g_xs, dummy_sol, raw)
logger.debug("check F = {0}".format(np.linalg.norm(F)))
self.prev_sol = sol.copy()
self.prev_dummy_sol = dummy_sol.copy()
self.prev_raw = raw.copy()
self.opt_count += 1
return sol[0]

View File

@ -88,6 +88,11 @@ class Model():
""" """
raise NotImplementedError("Implement the model") raise NotImplementedError("Implement the model")
def x_dot(self, curr_x, u):
""" compute x dot
"""
raise NotImplementedError("Implement the model")
def predict_adjoint_traj(self, xs, us, g_xs): def predict_adjoint_traj(self, xs, us, g_xs):
""" """
Args: Args:
@ -111,7 +116,7 @@ class Model():
for t in range(pred_len-1, 0, -1): for t in range(pred_len-1, 0, -1):
prev_lam = \ prev_lam = \
self.predict_adjoint_state(lam, xs[t], us[t], self.predict_adjoint_state(lam, xs[t], us[t],
g_x=g_xs[t], t=t) g_x=g_xs[t])
# update # update
lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
lam = prev_lam lam = prev_lam

View File

@ -15,7 +15,7 @@ class NonlinearSampleSystemModel(Model):
self.dt = config.DT self.dt = config.DT
self.gradient_hamiltonian_state = config.gradient_hamiltonian_state self.gradient_hamiltonian_state = config.gradient_hamiltonian_state
self.gradient_hamiltonian_input = config.gradient_hamiltonian_input self.gradient_hamiltonian_input = config.gradient_hamiltonian_input
self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state self.gradient_cost_fn_state = config.gradient_cost_fn_state
def predict_next_state(self, curr_x, u): def predict_next_state(self, curr_x, u):
""" predict next state """ predict next state
@ -34,7 +34,7 @@ class NonlinearSampleSystemModel(Model):
func_2 = self._func_x_2 func_2 = self._func_x_2
functions = [func_1, func_2] functions = [func_1, func_2]
next_x = update_state_with_Runge_Kutta( next_x = update_state_with_Runge_Kutta(
curr_x, u, functions, batch=False) curr_x, u, functions, batch=False, dt=self.dt)
return next_x return next_x
elif len(u.shape) == 2: elif len(u.shape) == 2:
@ -42,11 +42,25 @@ class NonlinearSampleSystemModel(Model):
def func_2(xs, us): return self._func_x_2(xs, us, batch=True) def func_2(xs, us): return self._func_x_2(xs, us, batch=True)
functions = [func_1, func_2] functions = [func_1, func_2]
next_x = update_state_with_Runge_Kutta( next_x = update_state_with_Runge_Kutta(
curr_x, u, functions, batch=True) curr_x, u, functions, batch=True, dt=self.dt)
return next_x return next_x
def predict_adjoint_state(self, lam, x, u, g_x=None, t=None): def x_dot(self, curr_x, u):
"""
Args:
curr_x (numpy.ndarray): current state, shape(state_size, )
u (numpy.ndarray): input, shape(input_size, )
Returns:
x_dot (numpy.ndarray): next state, shape(state_size, )
"""
state_size = curr_x.shape[0]
x_dot = np.zeros(state_size)
x_dot[0] = self._func_x_1(curr_x, u)
x_dot[1] = self._func_x_2(curr_x, u)
return x_dot
def predict_adjoint_state(self, lam, x, u, g_x=None):
""" predict adjoint states """ predict adjoint states
Args: Args:
@ -78,7 +92,7 @@ class NonlinearSampleSystemModel(Model):
terminal_lam (numpy.ndarray): terminal adjoint state, terminal_lam (numpy.ndarray): terminal adjoint state,
shape(state_size, ) shape(state_size, )
""" """
terminal_lam = self.gradient_cost_fn_with_state( terminal_lam = self.gradient_cost_fn_state(
terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size]
return terminal_lam[0] return terminal_lam[0]

View File

@ -14,7 +14,7 @@ class TwoWheeledModel(Model):
self.dt = config.DT self.dt = config.DT
self.gradient_hamiltonian_state = config.gradient_hamiltonian_state self.gradient_hamiltonian_state = config.gradient_hamiltonian_state
self.gradient_hamiltonian_input = config.gradient_hamiltonian_input self.gradient_hamiltonian_input = config.gradient_hamiltonian_input
self.gradient_cost_fn_with_state = config.gradient_cost_fn_with_state self.gradient_cost_fn_state = config.gradient_cost_fn_state
def predict_next_state(self, curr_x, u): def predict_next_state(self, curr_x, u):
""" predict next state """ predict next state
@ -88,7 +88,7 @@ class TwoWheeledModel(Model):
terminal_lam (numpy.ndarray): terminal adjoint state, terminal_lam (numpy.ndarray): terminal adjoint state,
shape(state_size, ) shape(state_size, )
""" """
terminal_lam = self.gradient_cost_fn_with_state( terminal_lam = self.gradient_cost_fn_state(
terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size] terminal_x, terminal_g_x, terminal=True) # return in shape[1, state_size]
return terminal_lam[0] return terminal_lam[0]

View File

@ -40,8 +40,8 @@ def run(args):
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--controller_type", type=str, default="NMPC") parser.add_argument("--controller_type", type=str, default="NMPCCGMRES")
parser.add_argument("--env", type=str, default="TwoWheeledTrack") parser.add_argument("--env", type=str, default="NonlinearSample")
parser.add_argument("--save_anim", type=bool_flag, default=0) parser.add_argument("--save_anim", type=bool_flag, default=0)
parser.add_argument("--result_dir", type=str, default="./result") parser.add_argument("--result_dir", type=str, default="./result")

View File

@ -4,6 +4,7 @@ import numpy as np
from PythonLinearNonlinearControl.configs.cartpole \ from PythonLinearNonlinearControl.configs.cartpole \
import CartPoleConfigModule import CartPoleConfigModule
class TestCalcCost(): class TestCalcCost():
def test_calc_costs(self): def test_calc_costs(self):
# make config # make config
@ -25,17 +26,18 @@ class TestCalcCost():
assert costs.shape == (pop_size, pred_len, 1) assert costs.shape == (pop_size, pred_len, 1)
costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :]) g_xs[:, -1, :])
assert costs.shape == (pop_size, 1) assert costs.shape == (pop_size, 1)
class TestGradient(): class TestGradient():
def test_state_gradient(self): def test_state_gradient(self):
""" """
""" """
xs = np.ones((1, 4)) xs = np.ones((1, 4))
cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None) cost_grad = CartPoleConfigModule.gradient_cost_fn_state(xs, None)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -59,8 +61,8 @@ class TestGradient():
""" """
xs = np.ones(4) xs = np.ones(4)
cost_grad =\ cost_grad =\
CartPoleConfigModule.gradient_cost_fn_with_state(xs, None, CartPoleConfigModule.gradient_cost_fn_state(xs, None,
terminal=True) terminal=True)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -83,7 +85,7 @@ class TestGradient():
""" """
""" """
us = np.ones((1, 1)) us = np.ones((1, 1))
cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us) cost_grad = CartPoleConfigModule.gradient_cost_fn_input(None, us)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -106,7 +108,7 @@ class TestGradient():
""" """
""" """
xs = np.ones((1, 4)) xs = np.ones((1, 4))
cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None) cost_hess = CartPoleConfigModule.hessian_cost_fn_state(xs, None)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -115,12 +117,12 @@ class TestGradient():
tmp_x = xs.copy() tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps tmp_x[0, i] = xs[0, i] + eps
forward = \ forward = \
CartPoleConfigModule.gradient_cost_fn_with_state( CartPoleConfigModule.gradient_cost_fn_state(
tmp_x, None, terminal=False) tmp_x, None, terminal=False)
tmp_x = xs.copy() tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps tmp_x[0, i] = xs[0, i] - eps
backward = \ backward = \
CartPoleConfigModule.gradient_cost_fn_with_state( CartPoleConfigModule.gradient_cost_fn_state(
tmp_x, None, terminal=False) tmp_x, None, terminal=False)
expected_hess[0, :, i] = (forward - backward) / (2. * eps) expected_hess[0, :, i] = (forward - backward) / (2. * eps)
@ -132,8 +134,8 @@ class TestGradient():
""" """
xs = np.ones(4) xs = np.ones(4)
cost_hess =\ cost_hess =\
CartPoleConfigModule.hessian_cost_fn_with_state(xs, None, CartPoleConfigModule.hessian_cost_fn_state(xs, None,
terminal=True) terminal=True)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -142,12 +144,12 @@ class TestGradient():
tmp_x = xs.copy() tmp_x = xs.copy()
tmp_x[i] = xs[i] + eps tmp_x[i] = xs[i] + eps
forward = \ forward = \
CartPoleConfigModule.gradient_cost_fn_with_state( CartPoleConfigModule.gradient_cost_fn_state(
tmp_x, None, terminal=True) tmp_x, None, terminal=True)
tmp_x = xs.copy() tmp_x = xs.copy()
tmp_x[i] = xs[i] - eps tmp_x[i] = xs[i] - eps
backward = \ backward = \
CartPoleConfigModule.gradient_cost_fn_with_state( CartPoleConfigModule.gradient_cost_fn_state(
tmp_x, None, terminal=True) tmp_x, None, terminal=True)
expected_hess[0, :, i] = (forward - backward) / (2. * eps) expected_hess[0, :, i] = (forward - backward) / (2. * eps)
@ -159,7 +161,7 @@ class TestGradient():
""" """
us = np.ones((1, 1)) us = np.ones((1, 1))
xs = np.ones((1, 4)) xs = np.ones((1, 4))
cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs) cost_hess = CartPoleConfigModule.hessian_cost_fn_input(us, xs)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -168,12 +170,12 @@ class TestGradient():
tmp_u = us.copy() tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps tmp_u[0, i] = us[0, i] + eps
forward = \ forward = \
CartPoleConfigModule.gradient_cost_fn_with_input( CartPoleConfigModule.gradient_cost_fn_input(
xs, tmp_u) xs, tmp_u)
tmp_u = us.copy() tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps tmp_u[0, i] = us[0, i] - eps
backward = \ backward = \
CartPoleConfigModule.gradient_cost_fn_with_input( CartPoleConfigModule.gradient_cost_fn_input(
xs, tmp_u) xs, tmp_u)
expected_hess[0, :, i] = (forward - backward) / (2. * eps) expected_hess[0, :, i] = (forward - backward) / (2. * eps)

View File

@ -4,6 +4,7 @@ import numpy as np
from PythonLinearNonlinearControl.configs.two_wheeled \ from PythonLinearNonlinearControl.configs.two_wheeled \
import TwoWheeledConfigModule import TwoWheeledConfigModule
class TestCalcCost(): class TestCalcCost():
def test_calc_costs(self): def test_calc_costs(self):
# make config # make config
@ -27,12 +28,13 @@ class TestCalcCost():
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q)) assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q))
costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :]) g_xs[:, -1, :])
expected_costs = np.ones((pop_size, state_size))*0.5 expected_costs = np.ones((pop_size, state_size))*0.5
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
class TestGradient(): class TestGradient():
def test_state_gradient(self): def test_state_gradient(self):
""" """
@ -40,7 +42,7 @@ class TestGradient():
xs = np.ones((1, 3)) xs = np.ones((1, 3))
g_xs = np.ones((1, 3)) * 0.5 g_xs = np.ones((1, 3)) * 0.5
cost_grad =\ cost_grad =\
TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs) TwoWheeledConfigModule.gradient_cost_fn_state(xs, g_xs)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -66,7 +68,7 @@ class TestGradient():
""" """
us = np.ones((1, 2)) us = np.ones((1, 2))
cost_grad =\ cost_grad =\
TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us) TwoWheeledConfigModule.gradient_cost_fn_input(None, us)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -93,7 +95,7 @@ class TestGradient():
g_xs = np.ones((1, 3)) * 0.5 g_xs = np.ones((1, 3)) * 0.5
xs = np.ones((1, 3)) xs = np.ones((1, 3))
cost_hess =\ cost_hess =\
TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs) TwoWheeledConfigModule.hessian_cost_fn_state(xs, g_xs)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -102,12 +104,12 @@ class TestGradient():
tmp_x = xs.copy() tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] + eps tmp_x[0, i] = xs[0, i] + eps
forward = \ forward = \
TwoWheeledConfigModule.gradient_cost_fn_with_state( TwoWheeledConfigModule.gradient_cost_fn_state(
tmp_x, g_xs, terminal=False) tmp_x, g_xs, terminal=False)
tmp_x = xs.copy() tmp_x = xs.copy()
tmp_x[0, i] = xs[0, i] - eps tmp_x[0, i] = xs[0, i] - eps
backward = \ backward = \
TwoWheeledConfigModule.gradient_cost_fn_with_state( TwoWheeledConfigModule.gradient_cost_fn_state(
tmp_x, g_xs, terminal=False) tmp_x, g_xs, terminal=False)
expected_hess[0, :, i] = (forward - backward) / (2. * eps) expected_hess[0, :, i] = (forward - backward) / (2. * eps)
@ -119,7 +121,7 @@ class TestGradient():
""" """
us = np.ones((1, 2)) us = np.ones((1, 2))
xs = np.ones((1, 3)) xs = np.ones((1, 3))
cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs) cost_hess = TwoWheeledConfigModule.hessian_cost_fn_input(us, xs)
# numeric grad # numeric grad
eps = 1e-4 eps = 1e-4
@ -128,12 +130,12 @@ class TestGradient():
tmp_u = us.copy() tmp_u = us.copy()
tmp_u[0, i] = us[0, i] + eps tmp_u[0, i] = us[0, i] + eps
forward = \ forward = \
TwoWheeledConfigModule.gradient_cost_fn_with_input( TwoWheeledConfigModule.gradient_cost_fn_input(
xs, tmp_u) xs, tmp_u)
tmp_u = us.copy() tmp_u = us.copy()
tmp_u[0, i] = us[0, i] - eps tmp_u[0, i] = us[0, i] - eps
backward = \ backward = \
TwoWheeledConfigModule.gradient_cost_fn_with_input( TwoWheeledConfigModule.gradient_cost_fn_input(
xs, tmp_u) xs, tmp_u)
expected_hess[0, :, i] = (forward - backward) / (2. * eps) expected_hess[0, :, i] = (forward - backward) / (2. * eps)