259 lines
8.2 KiB
Python
259 lines
8.2 KiB
Python
import numpy as np
|
|
|
|
|
|
class CartPoleConfigModule():
|
|
# parameters
|
|
ENV_NAME = "CartPole-v0"
|
|
PLANNER_TYPE = "Const"
|
|
TYPE = "Nonlinear"
|
|
TASK_HORIZON = 500
|
|
PRED_LEN = 50
|
|
STATE_SIZE = 4
|
|
INPUT_SIZE = 1
|
|
DT = 0.02
|
|
# cost parameters
|
|
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
|
|
# 1. is worked for iLQR
|
|
TERMINAL_WEIGHT = 1.
|
|
Q = None
|
|
Sf = None
|
|
# bounds
|
|
INPUT_LOWER_BOUND = np.array([-3.])
|
|
INPUT_UPPER_BOUND = np.array([3.])
|
|
# parameters
|
|
MP = 0.2
|
|
MC = 1.
|
|
L = 0.5
|
|
G = 9.81
|
|
CART_SIZE = (0.15, 0.1)
|
|
|
|
def __init__(self):
|
|
"""
|
|
"""
|
|
# opt configs
|
|
self.opt_config = {
|
|
"Random": {
|
|
"popsize": 5000
|
|
},
|
|
"CEM": {
|
|
"popsize": 500,
|
|
"num_elites": 50,
|
|
"max_iters": 15,
|
|
"alpha": 0.3,
|
|
"init_var": 9.,
|
|
"threshold": 0.001
|
|
},
|
|
"MPPI": {
|
|
"beta": 0.6,
|
|
"popsize": 5000,
|
|
"kappa": 0.9,
|
|
"noise_sigma": 0.5,
|
|
},
|
|
"MPPIWilliams": {
|
|
"popsize": 5000,
|
|
"lambda": 1.,
|
|
"noise_sigma": 0.9,
|
|
},
|
|
"iLQR": {
|
|
"max_iter": 500,
|
|
"init_mu": 1.,
|
|
"mu_min": 1e-6,
|
|
"mu_max": 1e10,
|
|
"init_delta": 2.,
|
|
"threshold": 1e-6,
|
|
},
|
|
"DDP": {
|
|
"max_iter": 500,
|
|
"init_mu": 1.,
|
|
"mu_min": 1e-6,
|
|
"mu_max": 1e10,
|
|
"init_delta": 2.,
|
|
"threshold": 1e-6,
|
|
},
|
|
"NMPC-CGMRES": {
|
|
},
|
|
"NMPC-Newton": {
|
|
},
|
|
}
|
|
|
|
@staticmethod
|
|
def input_cost_fn(u):
|
|
""" input cost functions
|
|
|
|
Args:
|
|
u (numpy.ndarray): input, shape(pred_len, input_size)
|
|
or shape(pop_size, pred_len, input_size)
|
|
Returns:
|
|
cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
|
|
shape(pop_size, pred_len, input_size)
|
|
"""
|
|
return (u**2) * np.diag(CartPoleConfigModule.R)
|
|
|
|
@staticmethod
|
|
def state_cost_fn(x, g_x):
|
|
""" state cost function
|
|
|
|
Args:
|
|
x (numpy.ndarray): state, shape(pred_len, state_size)
|
|
or shape(pop_size, pred_len, state_size)
|
|
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
|
or shape(pop_size, pred_len, state_size)
|
|
Returns:
|
|
cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
|
|
shape(pop_size, pred_len, 1)
|
|
"""
|
|
|
|
if len(x.shape) > 2:
|
|
return (6. * (x[:, :, 0]**2)
|
|
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
|
|
+ 0.1 * (x[:, :, 1]**2)
|
|
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
|
|
|
|
elif len(x.shape) > 1:
|
|
return (6. * (x[:, 0]**2)
|
|
+ 12. * ((np.cos(x[:, 2]) + 1.)**2)
|
|
+ 0.1 * (x[:, 1]**2)
|
|
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
|
|
|
|
return 6. * (x[0]**2) \
|
|
+ 12. * ((np.cos(x[2]) + 1.)**2) \
|
|
+ 0.1 * (x[1]**2) \
|
|
+ 0.1 * (x[3]**2)
|
|
|
|
@staticmethod
|
|
def terminal_state_cost_fn(terminal_x, terminal_g_x):
|
|
"""
|
|
|
|
Args:
|
|
terminal_x (numpy.ndarray): terminal state,
|
|
shape(state_size, ) or shape(pop_size, state_size)
|
|
terminal_g_x (numpy.ndarray): terminal goal state,
|
|
shape(state_size, ) or shape(pop_size, state_size)
|
|
Returns:
|
|
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
|
|
shape(pop_size, pred_len)
|
|
"""
|
|
|
|
if len(terminal_x.shape) > 1:
|
|
return (6. * (terminal_x[:, 0]**2)
|
|
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
|
|
+ 0.1 * (terminal_x[:, 1]**2)
|
|
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
|
|
* CartPoleConfigModule.TERMINAL_WEIGHT
|
|
|
|
return (6. * (terminal_x[0]**2)
|
|
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
|
|
+ 0.1 * (terminal_x[1]**2)
|
|
+ 0.1 * (terminal_x[3]**2)) \
|
|
* CartPoleConfigModule.TERMINAL_WEIGHT
|
|
|
|
@staticmethod
|
|
def gradient_cost_fn_state(x, g_x, terminal=False):
|
|
""" gradient of costs with respect to the state
|
|
|
|
Args:
|
|
x (numpy.ndarray): state, shape(pred_len, state_size)
|
|
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
|
|
|
Returns:
|
|
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
|
|
or shape(1, state_size)
|
|
"""
|
|
if not terminal:
|
|
cost_dx0 = 12. * x[:, 0]
|
|
cost_dx1 = 0.2 * x[:, 1]
|
|
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
|
|
cost_dx3 = 0.2 * x[:, 3]
|
|
cost_dx = np.stack((cost_dx0, cost_dx1,
|
|
cost_dx2, cost_dx3), axis=1)
|
|
return cost_dx
|
|
|
|
cost_dx0 = 12. * x[0]
|
|
cost_dx1 = 0.2 * x[1]
|
|
cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
|
|
cost_dx3 = 0.2 * x[3]
|
|
cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
|
|
|
|
return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
|
|
|
|
@staticmethod
|
|
def gradient_cost_fn_input(x, u):
|
|
""" gradient of costs with respect to the input
|
|
|
|
Args:
|
|
x (numpy.ndarray): state, shape(pred_len, state_size)
|
|
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
|
Returns:
|
|
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
|
|
"""
|
|
return 2. * u * np.diag(CartPoleConfigModule.R)
|
|
|
|
@staticmethod
|
|
def hessian_cost_fn_state(x, g_x, terminal=False):
|
|
""" hessian costs with respect to the state
|
|
|
|
Args:
|
|
x (numpy.ndarray): state, shape(pred_len, state_size)
|
|
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
|
Returns:
|
|
l_xx (numpy.ndarray): gradient of cost,
|
|
shape(pred_len, state_size, state_size) or
|
|
shape(1, state_size, state_size) or
|
|
"""
|
|
if not terminal:
|
|
(pred_len, state_size) = x.shape
|
|
hessian = np.eye(state_size)
|
|
hessian = np.tile(hessian, (pred_len, 1, 1))
|
|
hessian[:, 0, 0] = 12.
|
|
hessian[:, 1, 1] = 0.2
|
|
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
|
|
* (-np.sin(x[:, 2])) \
|
|
+ 24. * (1. + np.cos(x[:, 2])) \
|
|
* -np.cos(x[:, 2])
|
|
hessian[:, 3, 3] = 0.2
|
|
|
|
return hessian
|
|
|
|
state_size = len(x)
|
|
hessian = np.eye(state_size)
|
|
hessian[0, 0] = 12.
|
|
hessian[1, 1] = 0.2
|
|
hessian[2, 2] = 24. * -np.sin(x[2]) \
|
|
* (-np.sin(x[2])) \
|
|
+ 24. * (1. + np.cos(x[2])) \
|
|
* -np.cos(x[2])
|
|
hessian[3, 3] = 0.2
|
|
|
|
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
|
|
|
|
@staticmethod
|
|
def hessian_cost_fn_input(x, u):
|
|
""" hessian costs with respect to the input
|
|
|
|
Args:
|
|
x (numpy.ndarray): state, shape(pred_len, state_size)
|
|
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
|
Returns:
|
|
l_uu (numpy.ndarray): gradient of cost,
|
|
shape(pred_len, input_size, input_size)
|
|
"""
|
|
(pred_len, _) = u.shape
|
|
|
|
return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
|
|
|
|
@staticmethod
|
|
def hessian_cost_fn_input_state(x, u):
|
|
""" hessian costs with respect to the state and input
|
|
|
|
Args:
|
|
x (numpy.ndarray): state, shape(pred_len, state_size)
|
|
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
|
Returns:
|
|
l_ux (numpy.ndarray): gradient of cost ,
|
|
shape(pred_len, input_size, state_size)
|
|
"""
|
|
(_, state_size) = x.shape
|
|
(pred_len, input_size) = u.shape
|
|
|
|
return np.zeros((pred_len, input_size, state_size))
|