Fix format

This commit is contained in:
Shunichi09 2021-02-13 17:18:42 +09:00
parent f49ed382a4
commit d64a799eda
37 changed files with 648 additions and 563 deletions

View File

@ -1,8 +1,9 @@
import numpy as np
def rotate_pos(pos, angle):
""" Transformation the coordinate in the angle
Args:
pos (numpy.ndarray): local state, shape(data_size, 2)
angle (float): rotate angle, in radians
@ -14,9 +15,10 @@ def rotate_pos(pos, angle):
return np.dot(pos, rot_mat.T)
def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
""" Check angle range and correct the range
Args:
angle (numpy.ndarray): in radians
min_angle (float): maximum of range in radians, default -pi
@ -29,7 +31,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
if (max_angle - min_angle) < 2.0 * np.pi:
raise ValueError("difference between max_angle \
and min_angle must be greater than 2.0 * pi")
output = np.array(angles)
output_shape = output.shape
@ -43,6 +45,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
output = np.minimum(max_angle, np.maximum(min_angle, output))
return output.reshape(output_shape)
def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
""" update state in Runge Kutta methods
Args:
@ -52,23 +55,23 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
each function will be called like func(*state, *u)
We expect that this function returns differential of each state
dt (float): float in seconds
Returns:
next_state (np.array): next state of system
Notes:
sample of function is as follows:
def func_x(self, x_1, x_2, u):
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
return x_dot
Note that the function return x_dot.
"""
state_size = len(state)
assert state_size == len(functions), \
"Invalid functions length, You need to give the state size functions"
k0 = np.zeros(state_size)
k1 = np.zeros(state_size)
k2 = np.zeros(state_size)
@ -84,10 +87,10 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
for i, func in enumerate(functions):
k1[i] = dt * func(*inputs)
add_state = state + k1 / 2.
inputs = np.concatenate([add_state, u])
for i, func in enumerate(functions):
k2[i] = dt * func(*inputs)
@ -95,6 +98,6 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
inputs = np.concatenate([add_state, u])
for i, func in enumerate(functions):
k3[i] = dt * func(*inputs)
k3[i] = dt * func(*inputs)
return (k0 + 2. * k1 + 2. * k2 + k3) / 6.
return (k0 + 2. * k1 + 2. * k2 + k3) / 6.

View File

@ -1,5 +1,6 @@
import numpy as np
class CartPoleConfigModule():
# parameters
ENV_NAME = "CartPole-v0"
@ -12,7 +13,7 @@ class CartPoleConfigModule():
DT = 0.02
# cost parameters
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
# 1. is worked for iLQR
# 1. is worked for iLQR
TERMINAL_WEIGHT = 1.
Q = None
Sf = None
@ -39,41 +40,41 @@ class CartPoleConfigModule():
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var":9.,
"threshold":0.001
"init_var": 9.,
"threshold": 0.001
},
"MPPI":{
"beta" : 0.6,
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"iLQR":{
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
}
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
def input_cost_fn(u):
@ -87,7 +88,7 @@ class CartPoleConfigModule():
shape(pop_size, pred_len, input_size)
"""
return (u**2) * np.diag(CartPoleConfigModule.R)
@staticmethod
def state_cost_fn(x, g_x):
""" state cost function
@ -103,21 +104,21 @@ class CartPoleConfigModule():
"""
if len(x.shape) > 2:
return (6. * (x[:, :, 0]**2) \
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
+ 0.1 * (x[:, :, 1]**2) \
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
return (6. * (x[:, :, 0]**2)
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
+ 0.1 * (x[:, :, 1]**2)
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
elif len(x.shape) > 1:
return (6. * (x[:, 0]**2) \
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \
+ 0.1 * (x[:, 1]**2) \
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
return (6. * (x[:, 0]**2)
+ 12. * ((np.cos(x[:, 2]) + 1.)**2)
+ 0.1 * (x[:, 1]**2)
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
return 6. * (x[0]**2) \
+ 12. * ((np.cos(x[2]) + 1.)**2) \
+ 0.1 * (x[1]**2) \
+ 0.1 * (x[3]**2)
+ 12. * ((np.cos(x[2]) + 1.)**2) \
+ 0.1 * (x[1]**2) \
+ 0.1 * (x[3]**2)
@staticmethod
def terminal_state_cost_fn(terminal_x, terminal_g_x):
@ -134,18 +135,18 @@ class CartPoleConfigModule():
"""
if len(terminal_x.shape) > 1:
return (6. * (terminal_x[:, 0]**2) \
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
+ 0.1 * (terminal_x[:, 1]**2) \
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2) \
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+ 0.1 * (terminal_x[1]**2) \
+ 0.1 * (terminal_x[3]**2)) \
return (6. * (terminal_x[:, 0]**2)
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
+ 0.1 * (terminal_x[:, 1]**2)
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2)
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
+ 0.1 * (terminal_x[1]**2)
+ 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
@ -153,26 +154,26 @@ class CartPoleConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size)
"""
if not terminal:
cost_dx0 = 12. * x[:, 0]
cost_dx0 = 12. * x[:, 0]
cost_dx1 = 0.2 * x[:, 1]
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
cost_dx3 = 0.2 * x[:, 3]
cost_dx = np.stack((cost_dx0, cost_dx1,\
cost_dx = np.stack((cost_dx0, cost_dx1,
cost_dx2, cost_dx3), axis=1)
return cost_dx
cost_dx0 = 12. * x[0]
cost_dx0 = 12. * x[0]
cost_dx1 = 0.2 * x[1]
cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
cost_dx3 = 0.2 * x[3]
cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod
@ -206,21 +207,21 @@ class CartPoleConfigModule():
hessian[:, 0, 0] = 12.
hessian[:, 1, 1] = 0.2
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
* (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2])
* (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2])
hessian[:, 3, 3] = 0.2
return hessian
state_size = len(x)
hessian = np.eye(state_size)
hessian[0, 0] = 12.
hessian[1, 1] = 0.2
hessian[2, 2] = 24. * -np.sin(x[2]) \
* (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2])
* (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2])
hessian[3, 3] = 0.2
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
@ -239,7 +240,7 @@ class CartPoleConfigModule():
(pred_len, _) = u.shape
return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
@staticmethod
def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input
@ -254,4 +255,4 @@ class CartPoleConfigModule():
(_, state_size) = x.shape
(pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size))
return np.zeros((pred_len, input_size, state_size))

View File

@ -1,5 +1,6 @@
import numpy as np
class FirstOrderLagConfigModule():
# parameters
ENV_NAME = "FirstOrderLag-v0"
@ -34,43 +35,43 @@ class FirstOrderLagConfigModule():
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var":1.,
"threshold":0.001
"init_var": 1.,
"threshold": 0.001
},
"MPPI":{
"beta" : 0.6,
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"MPC":{
},
"iLQR":{
"MPC": {
},
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
}
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
def input_cost_fn(u):
@ -83,7 +84,7 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len, input_size)
"""
return (u**2) * np.diag(FirstOrderLagConfigModule.R)
@staticmethod
def state_cost_fn(x, g_x):
""" state cost function
@ -111,8 +112,8 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len)
"""
return ((terminal_x - terminal_g_x)**2) \
* np.diag(FirstOrderLagConfigModule.Sf)
* np.diag(FirstOrderLagConfigModule.Sf)
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
@ -120,16 +121,16 @@ class FirstOrderLagConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size)
"""
if not terminal:
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
return (2. * (x - g_x) \
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
return (2. * (x - g_x)
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
@staticmethod
def gradient_cost_fn_with_input(x, u):
@ -138,7 +139,7 @@ class FirstOrderLagConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
"""
@ -151,7 +152,7 @@ class FirstOrderLagConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or
@ -159,9 +160,9 @@ class FirstOrderLagConfigModule():
"""
if not terminal:
(pred_len, _) = x.shape
return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
@staticmethod
def hessian_cost_fn_with_input(x, u):
@ -170,7 +171,7 @@ class FirstOrderLagConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size)
@ -178,7 +179,7 @@ class FirstOrderLagConfigModule():
(pred_len, _) = u.shape
return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
@staticmethod
def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input
@ -186,7 +187,7 @@ class FirstOrderLagConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size)

View File

@ -2,6 +2,7 @@ from .first_order_lag import FirstOrderLagConfigModule
from .two_wheeled import TwoWheeledConfigModule
from .cartpole import CartPoleConfigModule
def make_config(args):
"""
Returns:
@ -12,4 +13,4 @@ def make_config(args):
elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack":
return TwoWheeledConfigModule()
elif args.env == "CartPole":
return CartPoleConfigModule()
return CartPoleConfigModule()

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from ..plotters.plot_objs import square_with_angle, square
from ..common.utils import fit_angle_in_range
class TwoWheeledConfigModule():
# parameters
ENV_NAME = "TwoWheeled-v0"
@ -25,7 +26,7 @@ class TwoWheeledConfigModule():
R = np.diag([0.01, 0.01])
Q = np.diag([2.5, 2.5, 0.01])
Sf = np.diag([2.5, 2.5, 0.01])
# bounds
INPUT_LOWER_BOUND = np.array([-1.5, -3.14])
INPUT_UPPER_BOUND = np.array([1.5, 3.14])
@ -46,41 +47,41 @@ class TwoWheeledConfigModule():
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var":1.,
"threshold":0.001
"init_var": 1.,
"threshold": 0.001
},
"MPPI":{
"beta" : 0.6,
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1,
"noise_sigma": 1.,
},
"iLQR":{
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
}
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
def input_cost_fn(u):
@ -93,7 +94,7 @@ class TwoWheeledConfigModule():
shape(pop_size, pred_len, input_size)
"""
return (u**2) * np.diag(TwoWheeledConfigModule.R)
@staticmethod
def fit_diff_in_range(diff_x):
""" fit difference state in range(angle)
@ -107,7 +108,7 @@ class TwoWheeledConfigModule():
fitted_diff_x (numpy.ndarray): same shape as diff_x
"""
if len(diff_x.shape) == 3:
diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1])
diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1])
elif len(diff_x.shape) == 2:
diff_x[:, -1] = fit_angle_in_range(diff_x[:, -1])
elif len(diff_x.shape) == 1:
@ -142,11 +143,11 @@ class TwoWheeledConfigModule():
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
shape(pop_size, pred_len)
"""
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \
- terminal_g_x)
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x
- terminal_g_x)
return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf)
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
@ -154,18 +155,18 @@ class TwoWheeledConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size)
"""
diff = TwoWheeledConfigModule.fit_diff_in_range(x - g_x)
if not terminal:
return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q)
return (2. * (diff) \
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
return (2. * (diff)
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
@staticmethod
def gradient_cost_fn_with_input(x, u):
@ -174,7 +175,7 @@ class TwoWheeledConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
"""
@ -187,7 +188,7 @@ class TwoWheeledConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or
@ -195,9 +196,9 @@ class TwoWheeledConfigModule():
"""
if not terminal:
(pred_len, _) = x.shape
return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
@staticmethod
def hessian_cost_fn_with_input(x, u):
@ -206,7 +207,7 @@ class TwoWheeledConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size)
@ -214,7 +215,7 @@ class TwoWheeledConfigModule():
(pred_len, _) = u.shape
return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1))
@staticmethod
def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input
@ -222,7 +223,7 @@ class TwoWheeledConfigModule():
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size)
@ -230,4 +231,4 @@ class TwoWheeledConfigModule():
(_, state_size) = x.shape
(pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size))
return np.zeros((pred_len, input_size, state_size))

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class CEM(Controller):
""" Cross Entropy Method for linear and nonlinear method
@ -19,6 +20,7 @@ class CEM(Controller):
using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765).
"""
def __init__(self, config, model):
super(CEM, self).__init__(config, model)
@ -38,7 +40,7 @@ class CEM(Controller):
self.init_var = config.opt_config["CEM"]["init_var"]
self.opt_dim = self.input_size * self.pred_len
# get bound
# get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
self.pred_len)
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -50,18 +52,18 @@ class CEM(Controller):
self.input_cost_fn = config.input_cost_fn
# init mean
self.init_mean = np.tile((config.INPUT_UPPER_BOUND \
self.init_mean = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.init_mean.copy()
# init variance
var = np.ones_like(config.INPUT_UPPER_BOUND) \
* config.opt_config["CEM"]["init_var"]
* config.opt_config["CEM"]["init_var"]
self.init_var = np.tile(var, self.pred_len)
# save
self.history_u = []
def clear_sol(self):
""" clear prev sol
"""
@ -77,21 +79,21 @@ class CEM(Controller):
Returns:
opt_input (numpy.ndarray): optimal input, shape(input_size, )
"""
# initialize
# initialize
opt_count = 0
# get configuration
mean = self.prev_sol.flatten().copy()
mean = self.prev_sol.flatten().copy()
var = self.init_var.flatten().copy()
# make distribution
# make distribution
X = stats.truncnorm(-1, 1,
loc=np.zeros_like(mean),\
loc=np.zeros_like(mean),
scale=np.ones_like(mean))
while (opt_count < self.max_iters) and np.max(var) > self.epsilon:
# constrained
lb_dist = mean - self.input_lower_bounds
lb_dist = mean - self.input_lower_bounds
ub_dist = self.input_upper_bounds - mean
constrained_var = np.minimum(np.minimum(np.square(lb_dist),
np.square(ub_dist)),
@ -99,15 +101,15 @@ class CEM(Controller):
# sample
samples = X.rvs(size=[self.pop_size, self.opt_dim]) \
* np.sqrt(constrained_var) \
+ mean
* np.sqrt(constrained_var) \
+ mean
# calc cost
# samples.shape = (pop_size, opt_dim)
costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size,
self.pred_len,
self.input_size),
self.pred_len,
self.input_size),
g_xs)
# sort cost
@ -124,7 +126,7 @@ class CEM(Controller):
logger.debug("Var = {}".format(np.max(var)))
logger.debug("Costs = {}".format(np.mean(costs)))
opt_count += 1
sol = mean.copy()
self.prev_sol = np.concatenate((mean[self.input_size:],
np.zeros(self.input_size)))

View File

@ -2,9 +2,11 @@ import numpy as np
from ..envs.cost import calc_cost
class Controller():
""" Controller class
"""
def __init__(self, config, model):
"""
"""
@ -15,7 +17,7 @@ class Controller():
self.state_cost_fn = config.state_cost_fn
self.terminal_state_cost_fn = config.terminal_state_cost_fn
self.input_cost_fn = config.input_cost_fn
def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs
Args:
@ -26,7 +28,7 @@ class Controller():
"""
raise NotImplementedError("Implement the algorithm to \
get optimal input")
def calc_cost(self, curr_x, samples, g_xs):
""" calculate the cost of input samples
@ -46,22 +48,24 @@ class Controller():
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
pred_xs = self.model.predict_traj(curr_x, samples)
# get particle cost
costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, self.input_cost_fn, \
self.state_cost_fn, self.input_cost_fn,
self.terminal_state_cost_fn)
return costs
@staticmethod
def gradient_hamiltonian_x(x, u, lam):
""" gradient of hamitonian with respect to the state,
"""
raise NotImplementedError("Implement gradient of hamitonian with respect to the state")
raise NotImplementedError(
"Implement gradient of hamitonian with respect to the state")
@staticmethod
def gradient_hamiltonian_u(x, u, lam):
""" gradient of hamitonian with respect to the input
"""
raise NotImplementedError("Implement gradient of hamitonian with respect to the input")
raise NotImplementedError(
"Implement gradient of hamitonian with respect to the input")

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class DDP(Controller):
""" Differential Dynamic Programming
@ -18,11 +19,12 @@ class DDP(Controller):
https://github.com/studywolf/control, and
https://github.com/anassinator/ilqr
"""
def __init__(self, config, model):
"""
"""
super(DDP, self).__init__(config, model)
# model
self.model = model
@ -56,7 +58,7 @@ class DDP(Controller):
self.Q = config.Q
self.R = config.R
self.Sf = config.Sf
# initialize
self.prev_sol = np.zeros((self.pred_len, self.input_size))
@ -65,7 +67,7 @@ class DDP(Controller):
"""
logger.debug("Clear Sol")
self.prev_sol = np.zeros((self.pred_len, self.input_size))
def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs
@ -89,26 +91,26 @@ class DDP(Controller):
while opt_count < self.max_iter:
accepted_sol = False
# forward
# forward
if update_sol == True:
pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\
l_x, l_xx, l_u, l_uu, l_ux = \
l_x, l_xx, l_u, l_uu, l_ux = \
self.forward(curr_x, g_xs, sol)
update_sol = False
try:
# backward
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu,
l_x, l_xx, l_u, l_uu, l_ux)
# line search
for alpha in alphas:
new_pred_xs, new_sol = \
self.calc_input(k, K, pred_xs, sol, alpha)
new_cost = calc_cost(new_pred_xs[np.newaxis, :, :],
new_sol[np.newaxis, :, :],
g_xs[np.newaxis, :, :],
g_xs[np.newaxis, :, :],
self.state_cost_fn,
self.input_cost_fn,
self.terminal_state_cost_fn)
@ -131,15 +133,15 @@ class DDP(Controller):
# accept the solution
accepted_sol = True
break
except np.linalg.LinAlgError as e:
logger.debug("Non ans : {}".format(e))
if not accepted_sol:
# increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\
logger.debug("Update regularization term to {}"
.format(self.mu))
if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term")
@ -156,7 +158,7 @@ class DDP(Controller):
self.prev_sol[-1] = sol[-1] # last use the terminal input
return sol[0]
def calc_input(self, k, K, pred_xs, sol, alpha):
""" calc input trajectory by using k and K
@ -183,8 +185,8 @@ class DDP(Controller):
for t in range(pred_len):
new_sol[t] = sol[t] \
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t])
@ -227,7 +229,7 @@ class DDP(Controller):
g_xs)
# calc gradinet in batch
f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt)
f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt)
f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt)
# calc hessian in batch
f_xx = self.model.calc_f_xx(pred_xs[:-1], sol, self.dt)
@ -237,13 +239,13 @@ class DDP(Controller):
# gradint of costs
l_x, l_xx, l_u, l_uu, l_ux = \
self._calc_gradient_hessian_cost(pred_xs, g_xs, sol)
return pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu, \
l_x, l_xx, l_u, l_uu, l_ux
def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
""" calculate gradient and hessian of model and cost fn
Args:
pred_xs (numpy.ndarray): predict traj,
shape(pred_len+1, state_size)
@ -268,7 +270,7 @@ class DDP(Controller):
self.gradient_cost_fn_with_state(pred_xs[-1],
g_x[-1], terminal=True)
l_x = np.concatenate((l_x, terminal_l_x), axis=0)
l_x = np.concatenate((l_x, terminal_l_x), axis=0)
# l_u.shape = (pred_len, input_size)
l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol)
@ -281,7 +283,7 @@ class DDP(Controller):
g_x[-1], terminal=True)
l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0)
# l_uu.shape = (pred_len, input_size, input_size)
l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol)
@ -321,7 +323,7 @@ class DDP(Controller):
# get size
(_, state_size, _) = f_x.shape
# initialzie
# initialzie
V_x = l_x[-1]
V_xx = l_xx[-1]
k = np.zeros((self.pred_len, self.input_size))
@ -388,7 +390,7 @@ class DDP(Controller):
"""
# get size
state_size = len(l_x)
Q_x = l_x + np.dot(f_x.T, V_x)
Q_u = l_u + np.dot(f_u.T, V_x)
Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x)
@ -402,4 +404,4 @@ class DDP(Controller):
Q_ux += np.tensordot(V_x, f_ux, axes=1)
Q_uu += np.tensordot(V_x, f_uu, axes=1)
return Q_x, Q_u, Q_xx, Q_ux, Q_uu
return Q_x, Q_u, Q_xx, Q_ux, Q_uu

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class iLQR(Controller):
""" iterative Liner Quadratique Regulator
@ -16,11 +17,12 @@ class iLQR(Controller):
Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf,
https://github.com/studywolf/control
"""
def __init__(self, config, model):
"""
"""
super(iLQR, self).__init__(config, model)
# model
self.model = model
@ -58,7 +60,7 @@ class iLQR(Controller):
"""
logger.debug("Clear Sol")
self.prev_sol = np.zeros((self.pred_len, self.input_size))
def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs
@ -82,12 +84,12 @@ class iLQR(Controller):
while opt_count < self.max_iter:
accepted_sol = False
# forward
# forward
if update_sol == True:
pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux = \
self.forward(curr_x, g_xs, sol)
update_sol = False
try:
# backward
k, K = self.backward(f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux)
@ -96,10 +98,10 @@ class iLQR(Controller):
for alpha in alphas:
new_pred_xs, new_sol = \
self.calc_input(k, K, pred_xs, sol, alpha)
new_cost = calc_cost(new_pred_xs[np.newaxis, :, :],
new_sol[np.newaxis, :, :],
g_xs[np.newaxis, :, :],
g_xs[np.newaxis, :, :],
self.state_cost_fn,
self.input_cost_fn,
self.terminal_state_cost_fn)
@ -122,15 +124,15 @@ class iLQR(Controller):
# accept the solution
accepted_sol = True
break
except np.linalg.LinAlgError as e:
logger.debug("Non ans : {}".format(e))
if not accepted_sol:
# increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\
logger.debug("Update regularization term to {}"
.format(self.mu))
if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term")
@ -147,7 +149,7 @@ class iLQR(Controller):
self.prev_sol[-1] = sol[-1] # last use the terminal input
return sol[0]
def calc_input(self, k, K, pred_xs, sol, alpha):
""" calc input trajectory by using k and K
@ -174,8 +176,8 @@ class iLQR(Controller):
for t in range(pred_len):
new_sol[t] = sol[t] \
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t])
@ -212,18 +214,18 @@ class iLQR(Controller):
g_xs)
# calc gradinet in batch
f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt)
f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt)
f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt)
# gradint of costs
l_x, l_xx, l_u, l_uu, l_ux = \
self._calc_gradient_hessian_cost(pred_xs, g_xs, sol)
return pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux
def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
""" calculate gradient and hessian of model and cost fn
Args:
pred_xs (numpy.ndarray): predict traj,
shape(pred_len+1, state_size)
@ -248,7 +250,7 @@ class iLQR(Controller):
self.gradient_cost_fn_with_state(pred_xs[-1],
g_x[-1], terminal=True)
l_x = np.concatenate((l_x, terminal_l_x), axis=0)
l_x = np.concatenate((l_x, terminal_l_x), axis=0)
# l_u.shape = (pred_len, input_size)
l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol)
@ -261,7 +263,7 @@ class iLQR(Controller):
g_x[-1], terminal=True)
l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0)
# l_uu.shape = (pred_len, input_size, input_size)
l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol)
@ -287,7 +289,7 @@ class iLQR(Controller):
shape(pred_len, input_size, input_size)
l_ux (numpy.ndarray): hessian of cost with respect
to state and input, shape(pred_len, input_size, state_size)
Returns:
k (numpy.ndarray): gain, shape(pred_len, input_size)
K (numpy.ndarray): gain, shape(pred_len, input_size, state_size)
@ -295,7 +297,7 @@ class iLQR(Controller):
# get size
(_, state_size, _) = f_x.shape
# initialzie
# initialzie
V_x = l_x[-1]
V_xx = l_xx[-1]
k = np.zeros((self.pred_len, self.input_size))
@ -352,7 +354,7 @@ class iLQR(Controller):
"""
# get size
state_size = len(l_x)
Q_x = l_x + np.dot(f_x.T, V_x)
Q_u = l_u + np.dot(f_u.T, V_x)
Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x)
@ -361,4 +363,4 @@ class iLQR(Controller):
Q_ux = l_ux + np.dot(np.dot(f_u.T, (V_xx + reg)), f_x)
Q_uu = l_uu + np.dot(np.dot(f_u.T, (V_xx + reg)), f_u)
return Q_x, Q_u, Q_xx, Q_ux, Q_uu
return Q_x, Q_u, Q_xx, Q_ux, Q_uu

View File

@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams
from .ilqr import iLQR
from .ddp import DDP
def make_controller(args, config, model):
if args.controller_type == "MPC":
@ -22,5 +23,5 @@ def make_controller(args, config, model):
return iLQR(config, model)
elif args.controller_type == "DDP":
return DDP(config, model)
raise ValueError("No controller: {}".format(args.controller_type))
raise ValueError("No controller: {}".format(args.controller_type))

View File

@ -9,6 +9,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class LinearMPC(Controller):
""" Model Predictive Controller for linear model
@ -21,6 +22,7 @@ class LinearMPC(Controller):
Ref:
Maciejowski, J. M. (2002). Predictive control: with constraints.
"""
def __init__(self, config, model):
"""
Args:
@ -55,7 +57,7 @@ class LinearMPC(Controller):
self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND
self.input_lower_bound = config.INPUT_LOWER_BOUND
self.input_upper_bound = config.INPUT_UPPER_BOUND
# setup controllers
self.W = None
self.omega = None
@ -66,7 +68,7 @@ class LinearMPC(Controller):
# history
self.history_u = [np.zeros(self.input_size)]
def setup(self):
"""
setup Model Predictive Control as a quadratic programming
@ -77,11 +79,11 @@ class LinearMPC(Controller):
for _ in range(self.pred_len - 1):
temp_mat = np.matmul(A_factorials[-1], self.A)
self.phi_mat = np.vstack((self.phi_mat, temp_mat))
A_factorials.append(temp_mat) # after we use this factorials
A_factorials.append(temp_mat) # after we use this factorials
self.gamma_mat = self.B.copy()
gammma_mat_temp = self.B.copy()
for i in range(self.pred_len - 1):
temp_1_mat = np.matmul(A_factorials[i], self.B)
gammma_mat_temp = temp_1_mat + gammma_mat_temp
@ -91,8 +93,8 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1):
temp_mat = np.zeros_like(self.gamma_mat)
temp_mat[int((i + 1)*self.state_size): , :] =\
self.gamma_mat[:-int((i + 1)*self.state_size) , :]
temp_mat[int((i + 1)*self.state_size):, :] =\
self.gamma_mat[:-int((i + 1)*self.state_size), :]
self.theta_mat = np.hstack((self.theta_mat, temp_mat))
@ -114,12 +116,12 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1):
for j in range(self.input_size):
temp_F[j * 2: (j + 1) * 2,\
temp_F[j * 2: (j + 1) * 2,
((i+1) * self.input_size) + j] = np.array([1., -1.])
self.F = np.vstack((self.F, temp_F))
self.F1 = self.F[:, :self.input_size]
temp_f = []
for i in range(self.input_size):
temp_f.append(-1 * self.input_upper_bound[i])
@ -168,7 +170,7 @@ class LinearMPC(Controller):
H = H * 0.5
# constraints
A = []
A = []
b = []
if self.W is not None:
@ -187,14 +189,14 @@ class LinearMPC(Controller):
# using cvxopt
def optimized_func(dt_us):
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1)))
- np.dot(G.T, dt_us.reshape(-1, 1)))[0]
# constraint
lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons
cons = LinearConstraint(A, lb, ub)
# solve
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),
constraints=[cons])
opt_dt_us = opt_sol.x
@ -213,21 +215,21 @@ class LinearMPC(Controller):
"""
# to dt form
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,
self.input_size),
axis=0)
self.prev_sol = opt_dt_u_seq.copy()
opt_u_seq = opt_dt_u_seq + self.history_u[-1]
# save
self.history_u.append(opt_u_seq[0])
# check costs
costs = self.calc_cost(curr_x,
opt_u_seq.reshape(1,
self.pred_len,
self.input_size),
self.pred_len,
self.input_size),
g_xs)
logger.debug("Cost = {}".format(costs))
@ -235,4 +237,4 @@ class LinearMPC(Controller):
return opt_u_seq[0]
def __str__(self):
return "LinearMPC"
return "LinearMPC"

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class MPPI(Controller):
""" Model Predictive Path Integral for linear and nonlinear method
@ -18,6 +19,7 @@ class MPPI(Controller):
Deep Dynamics Models for Learning Dexterous Manipulation.
arXiv preprint arXiv:1909.11652.
"""
def __init__(self, config, model):
super(MPPI, self).__init__(config, model)
@ -35,7 +37,7 @@ class MPPI(Controller):
self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"]
self.opt_dim = self.input_size * self.pred_len
# get bound
# get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
(self.pred_len, 1))
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -47,14 +49,14 @@ class MPPI(Controller):
self.input_cost_fn = config.input_cost_fn
# init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
# save
self.history_u = [np.zeros(self.input_size)]
def clear_sol(self):
""" clear prev sol
"""
@ -74,24 +76,24 @@ class MPPI(Controller):
"""
# get noised inputs
noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
noised_inputs = noise.copy()
for t in range(self.pred_len):
if t > 0:
noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \
+ noise[:, t, :]) \
+ (1 - self.beta) \
* noised_inputs[:, t-1, :]
* (self.prev_sol[t, :]
+ noise[:, t, :]) \
+ (1 - self.beta) \
* noised_inputs[:, t-1, :]
else:
noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \
+ noise[:, t, :]) \
+ (1 - self.beta) \
* self.history_u[-1]
* (self.prev_sol[t, :]
+ noise[:, t, :]) \
+ (1 - self.beta) \
* self.history_u[-1]
# clip actions
noised_inputs = np.clip(
noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
@ -108,7 +110,7 @@ class MPPI(Controller):
# weight actions
weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \
* noised_inputs
* noised_inputs
sol = np.sum(weighted_inputs, 0) / denom
# update
@ -121,4 +123,4 @@ class MPPI(Controller):
return sol[0]
def __str__(self):
return "MPPI"
return "MPPI"

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class MPPIWilliams(Controller):
""" Model Predictive Path Integral for linear and nonlinear method
@ -19,6 +20,7 @@ class MPPIWilliams(Controller):
2017 IEEE International Conference on Robotics and Automation (ICRA),
Singapore, 2017, pp. 1714-1721.
"""
def __init__(self, config, model):
super(MPPIWilliams, self).__init__(config, model)
@ -35,7 +37,7 @@ class MPPIWilliams(Controller):
self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
self.opt_dim = self.input_size * self.pred_len
# get bound
# get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
(self.pred_len, 1))
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -47,14 +49,14 @@ class MPPIWilliams(Controller):
self.input_cost_fn = config.input_cost_fn
# init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
# save
self.history_u = [np.zeros(self.input_size)]
def clear_sol(self):
""" clear prev sol
"""
@ -62,7 +64,7 @@ class MPPIWilliams(Controller):
self.prev_sol = \
(self.input_upper_bounds + self.input_lower_bounds) / 2.
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
def calc_cost(self, curr_x, samples, g_xs):
""" calculate the cost of input samples by using MPPI's eq
@ -82,12 +84,12 @@ class MPPIWilliams(Controller):
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
pred_xs = self.model.predict_traj(curr_x, samples)
# get particle cost
costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, None, \
self.state_cost_fn, None,
self.terminal_state_cost_fn)
return costs
def obtain_sol(self, curr_x, g_xs):
@ -101,9 +103,9 @@ class MPPIWilliams(Controller):
"""
# get noised inputs
noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
noised_inputs = self.prev_sol + noise
# clip actions
@ -120,7 +122,7 @@ class MPPIWilliams(Controller):
# mppi update
beta = np.min(costs)
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
+ 1e-10
+ 1e-10
# weight
# eta.shape = (pred_len, input_size)
@ -128,7 +130,7 @@ class MPPIWilliams(Controller):
# update inputs
sol = self.prev_sol \
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
# update
self.prev_sol[:-1] = sol[1:]
@ -140,4 +142,4 @@ class MPPIWilliams(Controller):
return sol[0]
def __str__(self):
return "MPPIWilliams"
return "MPPIWilliams"

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class RandomShooting(Controller):
""" Random Shooting Method for linear and nonlinear method
@ -19,6 +20,7 @@ class RandomShooting(Controller):
using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765).
"""
def __init__(self, config, model):
super(RandomShooting, self).__init__(config, model)
@ -33,7 +35,7 @@ class RandomShooting(Controller):
self.pop_size = config.opt_config["Random"]["popsize"]
self.opt_dim = self.input_size * self.pred_len
# get bound
# get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
self.pred_len)
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -46,7 +48,7 @@ class RandomShooting(Controller):
# save
self.history_u = []
def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs
@ -65,8 +67,8 @@ class RandomShooting(Controller):
# calc cost
costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size,
self.pred_len,
self.input_size),
self.pred_len,
self.input_size),
g_xs)
# solution
sol = samples[np.argmin(costs)]
@ -74,4 +76,4 @@ class RandomShooting(Controller):
return sol.reshape(self.pred_len, self.input_size).copy()[0]
def __str__(self):
return "RandomShooting"
return "RandomShooting"

View File

@ -5,4 +5,4 @@ from PythonLinearNonlinearControl.envs.first_order_lag \
from PythonLinearNonlinearControl.envs.two_wheeled \
import TwoWheeledConstEnv # NOQA
from PythonLinearNonlinearControl.envs.two_wheeled \
import TwoWheeledTrackEnv # NOQA
import TwoWheeledTrackEnv # NOQA

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from .env import Env
from ..plotters.plot_objs import square
class CartPoleEnv(Env):
""" Cartpole Environment
@ -13,13 +14,14 @@ class CartPoleEnv(Env):
6-832-underactuated-robotics-spring-2009/readings/
MIT6_832s09_read_ch03.pdf
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 4,
"input_size" : 1,
"dt" : 0.02,
"max_step" : 500,
self.config = {"state_size": 4,
"input_size": 1,
"dt": 0.02,
"max_step": 500,
"input_lower_bound": [-3.],
"input_upper_bound": [3.],
"mp": 0.2,
@ -30,7 +32,7 @@ class CartPoleEnv(Env):
}
super(CartPoleEnv, self).__init__(self.config)
def reset(self, init_x=None):
""" reset state
@ -39,7 +41,7 @@ class CartPoleEnv(Env):
info (dict): information
"""
self.step_count = 0
theta = np.random.randn(1)
self.curr_x = np.array([0., 0., theta[0], 0.])
@ -48,7 +50,7 @@ class CartPoleEnv(Env):
# goal
self.g_x = np.array([0., 0., -np.pi, 0.])
# clear memory
self.history_x = []
self.history_g_x = []
@ -76,50 +78,50 @@ class CartPoleEnv(Env):
# x
d_x0 = self.curr_x[1]
# v_x
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \
* (self.config["l"] * (self.curr_x[3]**2) \
+ self.config["g"] * np.cos(self.curr_x[2]))) \
/ (self.config["mc"] + self.config["mp"] \
* (np.sin(self.curr_x[2])**2))
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2])
* (self.config["l"] * (self.curr_x[3]**2)
+ self.config["g"] * np.cos(self.curr_x[2]))) \
/ (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2))
# theta
d_x2 = self.curr_x[3]
# v_theta
d_x3 = (-u[0] * np.cos(self.curr_x[2]) \
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \
- (self.config["mc"] + self.config["mp"]) * self.config["g"] \
* np.sin(self.curr_x[2])) \
/ (self.config["l"] * (self.config["mc"] + self.config["mp"] \
* (np.sin(self.curr_x[2])**2)))
d_x3 = (-u[0] * np.cos(self.curr_x[2])
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2)
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2])
- (self.config["mc"] + self.config["mp"]) * self.config["g"]
* np.sin(self.curr_x[2])) \
/ (self.config["l"] * (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2)))
next_x = self.curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
# TODO: costs
costs = 0.
costs += 0.1 * np.sum(u**2)
costs += 6. * self.curr_x[0]**2 \
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+ 0.1 * self.curr_x[1]**2 \
+ 0.1 * self.curr_x[3]**2
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+ 0.1 * self.curr_x[1]**2 \
+ 0.1 * self.curr_x[3]**2
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten().copy()
# update costs
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function
Args:
to_plot (axis or imgs): plotted objects
i (int): frame count
@ -131,29 +133,29 @@ class CartPoleEnv(Env):
"""
if isinstance(to_plot, Axes):
imgs = {} # create new imgs
imgs["cart"] = to_plot.plot([], [], c="k")[0]
imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0]
imgs["center"] = to_plot.plot([], [], marker="o", c="k",\
imgs["center"] = to_plot.plot([], [], marker="o", c="k",
markersize=10)[0]
# centerline
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),
c="k", linestyle="dashed")
# set axis
to_plot.set_xlim([-1., 1.])
to_plot.set_ylim([-0.55, 1.5])
return imgs
# set imgs
cart_x, cart_y, pole_x, pole_y = \
self._plot_cartpole(history_x[i])
to_plot["cart"].set_data(cart_x, cart_y)
to_plot["pole"].set_data(pole_x, pole_y)
to_plot["center"].set_data(history_x[i][0], 0.)
def _plot_cartpole(self, curr_x):
""" plot cartpole fucntions
@ -166,13 +168,13 @@ class CartPoleEnv(Env):
pole_y (numpy.ndarray): y data of pole
"""
# cart
cart_x, cart_y = square(curr_x[0], 0.,\
cart_x, cart_y = square(curr_x[0], 0.,
self.config["cart_size"], 0.)
# pole
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \
* np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"] \
* np.sin(curr_x[2]-np.pi/2)])
return cart_x, cart_y, pole_x, pole_y
# pole
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"]
* np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"]
* np.sin(curr_x[2]-np.pi/2)])
return cart_x, cart_y, pole_x, pole_y

View File

@ -4,6 +4,7 @@ import numpy as np
logger = getLogger(__name__)
def calc_cost(pred_xs, input_sample, g_xs,
state_cost_fn, input_cost_fn, terminal_state_cost_fn):
""" calculate the cost
@ -24,20 +25,21 @@ def calc_cost(pred_xs, input_sample, g_xs,
# state cost
state_cost = 0.
if state_cost_fn is not None:
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_pred_par_cost = state_cost_fn(
pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
# terminal cost
terminal_state_cost = 0.
if terminal_state_cost_fn is not None:
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :])
g_xs[:, -1, :])
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
# act cost
act_cost = 0.
if input_cost_fn is not None:
act_pred_par_cost = input_cost_fn(input_sample)
act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1)
return state_cost + terminal_state_cost + act_cost
return state_cost + terminal_state_cost + act_cost

View File

@ -1,13 +1,15 @@
import numpy as np
class Env():
""" Environments class
Attributes:
curr_x (numpy.ndarray): current state
history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size)
step_count (int): step count
"""
def __init__(self, config):
"""
"""
@ -25,12 +27,12 @@ class Env():
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
self.curr_x = init_x
# clear memory
self.history_x = []
self.history_g_x = []
@ -52,4 +54,4 @@ class Env():
def __repr__(self):
"""
"""
return self.config
return self.config

View File

@ -3,25 +3,27 @@ import scipy
from scipy import integrate
from .env import Env
class FirstOrderLagEnv(Env):
""" First Order Lag System Env
"""
def __init__(self, tau=0.63):
"""
"""
self.config = {"state_size" : 4,\
"input_size" : 2,\
"dt" : 0.05,\
"max_step" : 500,\
"input_lower_bound": [-0.5, -0.5],\
self.config = {"state_size": 4,
"input_size": 2,
"dt": 0.05,
"max_step": 500,
"input_lower_bound": [-0.5, -0.5],
"input_upper_bound": [0.5, 0.5],
}
super(FirstOrderLagEnv, self).__init__(self.config)
# to get discrete system matrix
self.A, self.B = self._to_state_space(tau, dt=self.config["dt"])
self.A, self.B = self._to_state_space(tau, dt=self.config["dt"])
@staticmethod
def _to_state_space(tau, dt=0.05):
"""
@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env):
"""
# continuous
Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.],
[0., 1./tau],
[0., 0.],
[0., 0.]])
[0., 1./tau],
[0., 0.],
[0., 0.]])
# to discrete system
A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) -
@ -55,7 +57,7 @@ class FirstOrderLagEnv(Env):
B[m, n] = sol[0]
return A, B
def reset(self, init_x=None):
""" reset state
Returns:
@ -63,7 +65,7 @@ class FirstOrderLagEnv(Env):
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
@ -71,7 +73,7 @@ class FirstOrderLagEnv(Env):
# goal
self.g_x = np.array([0., 0, -2., 3.])
# clear memory
self.history_x = []
self.history_g_x = []
@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env):
self.config["input_upper_bound"])
next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis])
+ np.matmul(self.B, u[:, np.newaxis])
# cost
cost = 0
@ -104,17 +106,17 @@ class FirstOrderLagEnv(Env):
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
"""
"""
raise ValueError("FirstOrderLag does not have animation")
raise ValueError("FirstOrderLag does not have animation")

View File

@ -3,6 +3,7 @@ from .two_wheeled import TwoWheeledConstEnv
from .two_wheeled import TwoWheeledTrackEnv
from .cartpole import CartPoleEnv
def make_env(args):
if args.env == "FirstOrderLag":
@ -13,5 +14,5 @@ def make_env(args):
return TwoWheeledTrackEnv()
elif args.env == "CartPole":
return CartPoleEnv()
raise NotImplementedError("There is not {} Env".format(args.env))
raise NotImplementedError("There is not {} Env".format(args.env))

View File

@ -4,22 +4,24 @@ from scipy import integrate
from .env import Env
from ..common.utils import update_state_with_Runge_Kutta
class NonlinearSampleEnv(Env):
""" Nonlinear Sample Env
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 2,\
"input_size" : 1,\
"dt" : 0.01,\
"max_step" : 250,\
"input_lower_bound": [-0.5],\
self.config = {"state_size": 2,
"input_size": 1,
"dt": 0.01,
"max_step": 250,
"input_lower_bound": [-0.5],
"input_upper_bound": [0.5],
}
super(NonlinearSampleEnv, self).__init__(self.config)
def reset(self, init_x=np.array([2., 0.])):
""" reset state
Returns:
@ -27,7 +29,7 @@ class NonlinearSampleEnv(Env):
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
@ -35,7 +37,7 @@ class NonlinearSampleEnv(Env):
# goal
self.g_x = np.array([0., 0.])
# clear memory
self.history_x = []
self.history_g_x = []
@ -60,7 +62,7 @@ class NonlinearSampleEnv(Env):
funtions = [self._func_x_1, self._func_x_2]
next_x = update_state_with_Runge_Kutta(self._curr_x, u,
functions, self.config["dt"])
functions, self.config["dt"])
# cost
cost = 0
@ -70,29 +72,29 @@ class NonlinearSampleEnv(Env):
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def _func_x_1(self, x_1, x_2, u):
"""
"""
x_dot = x_2
return x_dot
def _func_x_2(self, x_1, x_2, u):
"""
"""
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
return x_dot
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
"""
"""
raise ValueError("NonlinearSampleEnv does not have animation")
raise ValueError("NonlinearSampleEnv does not have animation")

View File

@ -5,47 +5,50 @@ import matplotlib.pyplot as plt
from .env import Env
from ..plotters.plot_objs import circle_with_angle, square, circle
def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
""" step two wheeled enviroment
Args:
curr_x (numpy.ndarray): current state, shape(state_size, )
u (numpy.ndarray): input, shape(input_size, )
dt (float): sampling time
Returns:
next_x (numpy.ndarray): next state, shape(state_size. )
Notes:
TODO: deal with another method, like Runge Kutta
"""
B = np.array([[np.cos(curr_x[-1]), 0.],
[np.sin(curr_x[-1]), 0.],
[0., 1.]])
x_dot = np.matmul(B, u[:, np.newaxis])
next_x = x_dot.flatten() * dt + curr_x
return next_x
class TwoWheeledConstEnv(Env):
""" Two wheeled robot with constant goal Env
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 3,\
"input_size" : 2,\
"dt" : 0.01,\
"max_step" : 500,\
"input_lower_bound": (-1.5, -3.14),\
"input_upper_bound": (1.5, 3.14),\
"car_size": 0.2,\
self.config = {"state_size": 3,
"input_size": 2,
"dt": 0.01,
"max_step": 500,
"input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),
"car_size": 0.2,
"wheel_size": (0.075, 0.015)
}
super(TwoWheeledConstEnv, self).__init__(self.config)
def reset(self, init_x=None):
""" reset state
@ -54,7 +57,7 @@ class TwoWheeledConstEnv(Env):
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
@ -62,7 +65,7 @@ class TwoWheeledConstEnv(Env):
# goal
self.g_x = np.array([2.5, 2.5, 0.])
# clear memory
self.history_x = []
self.history_g_x = []
@ -96,32 +99,32 @@ class TwoWheeledConstEnv(Env):
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function
Args:
to_plot (axis or imgs): plotted objects
i (int): frame count
history_x (numpy.ndarray): history of state, shape(iters, state)
history_g_x (numpy.ndarray): history of goal state,
shape(iters, state)
Returns:
None or imgs : imgs order is ["cart_img", "pole_img"]
"""
if isinstance(to_plot, Axes):
imgs = {} # create new imgs
imgs["car"] = to_plot.plot([], [], c="k")[0]
imgs["car_angle"] = to_plot.plot([], [], c="k")[0]
imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0]
@ -139,9 +142,9 @@ class TwoWheeledConstEnv(Env):
# set imgs
# car imgs
car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y)
to_plot["car_angle"].set_data(car_angle_x, car_angle_y)
to_plot["left_tire"].set_data(left_tire_x, left_tire_y,)
@ -150,7 +153,7 @@ class TwoWheeledConstEnv(Env):
# goal and trajs
to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1])
to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1])
def _plot_car(self, curr_x):
""" plot car fucntions
"""
@ -158,53 +161,55 @@ class TwoWheeledConstEnv(Env):
car_x, car_y, car_angle_x, car_angle_y = \
circle_with_angle(curr_x[0], curr_x[1],
self.config["car_size"], curr_x[2])
# left tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \
square(center_x, center_y,
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
# right tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \
square(center_x, center_y,
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y
class TwoWheeledTrackEnv(Env):
""" Two wheeled robot with constant goal Env
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 3,\
"input_size" : 2,\
"dt" : 0.01,\
"max_step" : 1000,\
"input_lower_bound": (-1.5, -3.14),\
"input_upper_bound": (1.5, 3.14),\
"car_size": 0.2,\
self.config = {"state_size": 3,
"input_size": 2,
"dt": 0.01,
"max_step": 1000,
"input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),
"car_size": 0.2,
"wheel_size": (0.075, 0.015)
}
super(TwoWheeledTrackEnv, self).__init__(self.config)
@staticmethod
def make_road(linelength=3., circle_radius=1.):
""" make track
@ -220,23 +225,23 @@ class TwoWheeledTrackEnv(Env):
# circle
circle_1_x, circle_1_y = circle(linelength/2., circle_radius,
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
circle_1 = np.stack((circle_1_x , circle_1_y), axis=1)
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
circle_1 = np.stack((circle_1_x, circle_1_y), axis=1)
circle_2_x, circle_2_y = circle(-linelength/2., circle_radius,
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
circle_2 = np.stack((circle_2_x , circle_2_y), axis=1)
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
circle_2 = np.stack((circle_2_x, circle_2_y), axis=1)
road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0)
# calc road angle
road_diff = road_pos[1:] - road_pos[:-1]
road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0])
road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0])
road_angle = np.concatenate((np.zeros(1), road_angle))
road = np.concatenate((road_pos, road_angle[:, np.newaxis]), axis=1)
return np.tile(road, (3, 1))
return np.tile(road, (3, 1))
def reset(self, init_x=None):
""" reset state
@ -246,7 +251,7 @@ class TwoWheeledTrackEnv(Env):
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
@ -254,7 +259,7 @@ class TwoWheeledTrackEnv(Env):
# goal
self.g_traj = self.make_road()
# clear memory
self.history_x = []
self.history_g_x = []
@ -286,32 +291,32 @@ class TwoWheeledTrackEnv(Env):
# save history
self.history_x.append(next_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_traj}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_traj}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function
Args:
to_plot (axis or imgs): plotted objects
i (int): frame count
history_x (numpy.ndarray): history of state, shape(iters, state)
history_g_x (numpy.ndarray): history of goal state,
shape(iters, state)
Returns:
None or imgs : imgs order is ["cart_img", "pole_img"]
"""
if isinstance(to_plot, Axes):
imgs = {} # create new imgs
imgs["car"] = to_plot.plot([], [], c="k")[0]
imgs["car_angle"] = to_plot.plot([], [], c="k")[0]
imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0]
@ -333,9 +338,9 @@ class TwoWheeledTrackEnv(Env):
# set imgs
# car imgs
car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y)
to_plot["car_angle"].set_data(car_angle_x, car_angle_y)
to_plot["left_tire"].set_data(left_tire_x, left_tire_y,)
@ -344,7 +349,7 @@ class TwoWheeledTrackEnv(Env):
# goal and trajs
to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1])
to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1])
def _plot_car(self, curr_x):
""" plot car fucntions
"""
@ -352,31 +357,31 @@ class TwoWheeledTrackEnv(Env):
car_x, car_y, car_angle_x, car_angle_y = \
circle_with_angle(curr_x[0], curr_x[1],
self.config["car_size"], curr_x[2])
# left tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \
square(center_x, center_y,
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
# right tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \
square(center_x, center_y,
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y

View File

@ -7,6 +7,7 @@ import six
import pickle
from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger
def make_logger(save_dir):
"""
Args:
@ -21,7 +22,7 @@ def make_logger(save_dir):
# mypackage log level
logger = getLogger("PythonLinearNonlinearControl")
logger.setLevel(DEBUG)
# file handler
log_path = os.path.join(save_dir, "log.txt")
file_handler = FileHandler(log_path)
@ -33,6 +34,7 @@ def make_logger(save_dir):
# sh_handler = StreamHandler()
# logger.addHandler(sh_handler)
def int_tuple(s):
""" transform str to tuple
Args:
@ -42,6 +44,7 @@ def int_tuple(s):
"""
return tuple(int(i) for i in s.split(','))
def bool_flag(s):
""" transform str to bool flg
Args:
@ -54,6 +57,7 @@ def bool_flag(s):
msg = 'Invalid value "%s" for bool flag (should be 0 or 1)'
raise ValueError(msg % s)
def file_exists(path):
""" Check file existence on given path
Args:
@ -63,6 +67,7 @@ def file_exists(path):
"""
return os.path.exists(path)
def create_dir_if_not_exist(outdir):
""" Check directory existence and creates new directory if not exist
Args:
@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir):
return
os.makedirs(outdir)
def write_text_to_file(file_path, data):
""" Write given text data to file
Args:
@ -86,6 +92,7 @@ def write_text_to_file(file_path, data):
with open(file_path, 'w') as f:
f.write(data)
def read_text_from_file(file_path):
""" Read given file as text
Args:
@ -96,6 +103,7 @@ def read_text_from_file(file_path):
with open(file_path, 'r') as f:
return f.read()
def save_pickle(file_path, data):
""" pickle given data to file
Args:
@ -105,6 +113,7 @@ def save_pickle(file_path, data):
with open(file_path, 'wb') as f:
pickle.dump(data, f)
def load_pickle(file_path):
""" load pickled data from file
Args:
@ -118,6 +127,7 @@ def load_pickle(file_path):
else:
return pickle.load(f, encoding='bytes')
def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
""" prepare a directory with current datetime as name.
created directory contains the command and args when the script was called as text file.
@ -144,4 +154,4 @@ def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
argv = ' '.join(sys.argv)
write_text_to_file(argv_file_path, argv)
return outdir
return outdir

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model
class CartPoleModel(Model):
""" cartpole model
"""
def __init__(self, config):
"""
"""
@ -17,7 +19,7 @@ class CartPoleModel(Model):
def predict_next_state(self, curr_x, u):
""" predict next state
Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size)
@ -31,59 +33,59 @@ class CartPoleModel(Model):
# x
d_x0 = curr_x[1]
# v_x
d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \
* (self.l * (curr_x[3]**2) \
+ self.g * np.cos(curr_x[2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
d_x1 = (u[0] + self.mp * np.sin(curr_x[2])
* (self.l * (curr_x[3]**2)
+ self.g * np.cos(curr_x[2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
# theta
d_x2 = curr_x[3]
# v_theta
d_x3 = (-u[0] * np.cos(curr_x[2]) \
- self.mp * self.l * (curr_x[3]**2) \
* np.cos(curr_x[2]) * np.sin(curr_x[2]) \
d_x3 = (-u[0] * np.cos(curr_x[2])
- self.mp * self.l * (curr_x[3]**2)
* np.cos(curr_x[2]) * np.sin(curr_x[2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
next_x = curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
return next_x
elif len(u.shape) == 2:
# x
d_x0 = curr_x[:, 1]
# v_x
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \
* (self.l * (curr_x[:, 3]**2) \
+ self.g * np.cos(curr_x[:, 2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2])
* (self.l * (curr_x[:, 3]**2)
+ self.g * np.cos(curr_x[:, 2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
# theta
d_x2 = curr_x[:, 3]
# v_theta
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \
- self.mp * self.l * (curr_x[:, 3]**2) \
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2])
- self.mp * self.l * (curr_x[:, 3]**2)
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
next_x = curr_x +\
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
return next_x
def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size)
Notes:
This should be discrete form !!
"""
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
@ -95,36 +97,36 @@ class CartPoleModel(Model):
# f_theta
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_x[:, 1, 2] = - us[:, 0] * tmp \
- tmp * (self.mp * np.sin(xs[:, 2]) \
* (self.l * xs[:, 3]**2 \
- tmp * (self.mp * np.sin(xs[:, 2])
* (self.l * xs[:, 3]**2
+ self.g * np.cos(xs[:, 2]))) \
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
* xs[:, 3]**2 \
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \
- np.sin(xs[:, 2])**2))
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l
* xs[:, 3]**2
+ self.mp * self.g * (np.cos(xs[:, 2])**2
- np.sin(xs[:, 2])**2))
f_x[:, 3, 2] = - 1. / self.l * tmp \
* (-us[:, 0] * np.cos(xs[:, 2]) \
- self.mp * self.l * (xs[:, 3]**2) \
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2]) \
- self.mp * self.l * xs[:, 3]**2 \
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
- (self.mc + self.mp) \
* self.g * np.cos(xs[:, 2]))
* (-us[:, 0] * np.cos(xs[:, 2])
- self.mp * self.l * (xs[:, 3]**2)
* np.cos(xs[:, 2]) * np.sin(xs[:, 2])
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2])
- self.mp * self.l * xs[:, 3]**2
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2)
- (self.mc + self.mp)
* self.g * np.cos(xs[:, 2]))
# f_theta_dot
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
* self.l * 2 * xs[:, 3])
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2])
* self.l * 2 * xs[:, 3])
f_x[:, 2, 3] = np.ones(pred_len)
f_x[:, 3, 3] = 1. / self.l * tmp2 \
* (-2. * self.mp * self.l * xs[:, 3] \
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
* (-2. * self.mp * self.l * xs[:, 3]
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
return f_x * dt + np.eye(state_size) # to discrete form
@ -133,25 +135,25 @@ class CartPoleModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size)
Notes:
This should be discrete form !!
"""
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_u = np.zeros((pred_len, state_size, input_size))
f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
/ (self.l * (self.mc \
+ self.mp * (np.sin(xs[:, 2])**2)))
/ (self.l * (self.mc
+ self.mp * (np.sin(xs[:, 2])**2)))
return f_u * dt # to discrete form
@ -161,7 +163,7 @@ class CartPoleModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size)
@ -180,7 +182,7 @@ class CartPoleModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size)
@ -199,7 +201,7 @@ class CartPoleModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size)
@ -210,4 +212,4 @@ class CartPoleModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
raise NotImplementedError
raise NotImplementedError

View File

@ -3,6 +3,7 @@ import scipy.linalg
from scipy import integrate
from .model import LinearModel
class FirstOrderLagModel(LinearModel):
""" first order lag model
Attributes:
@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel):
u (numpy.ndarray):
history_pred_xs (numpy.ndarray):
"""
def __init__(self, config, tau=0.63):
"""
Args:
tau (float): time constant
"""
"""
# param
self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system
self.A, self.B = self._to_state_space(
tau, dt=config.DT) # discrete system
super(FirstOrderLagModel, self).__init__(self.A, self.B)
@staticmethod
@ -31,21 +34,22 @@ class FirstOrderLagModel(LinearModel):
"""
# continuous
Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.],
[0., 1./tau],
[0., 0.],
[0., 0.]])
[0., 1./tau],
[0., 0.],
[0., 0.]])
# to discrete system
A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc)
B = np.zeros_like(Bc)
for m in range(Bc.shape[0]):
for n in range(Bc.shape[1]):
integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n]
def integrate_fn(tau): return np.matmul(
scipy.linalg.expm(Ac*tau), Bc)[m, n]
sol = integrate.quad(integrate_fn, 0, dt)
B[m, n] = sol[0]
return A, B
return A, B

View File

@ -2,13 +2,14 @@ from .first_order_lag import FirstOrderLagModel
from .two_wheeled import TwoWheeledModel
from .cartpole import CartPoleModel
def make_model(args, config):
if args.env == "FirstOrderLag":
return FirstOrderLagModel(config)
elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack":
return TwoWheeledModel(config)
elif args.env == "CartPole":
return CartPoleModel(config)
raise NotImplementedError("There is not {} Model".format(args.env))
raise NotImplementedError("There is not {} Model".format(args.env))

View File

@ -1,8 +1,10 @@
import numpy as np
class Model():
""" base class of model
"""
def __init__(self):
"""
"""
@ -22,17 +24,17 @@ class Model():
or shape(pop_size, pred_len+1, state_size)
"""
if len(us.shape) == 3:
pred_xs =self._predict_traj_alltogether(curr_x, us)
pred_xs = self._predict_traj_alltogether(curr_x, us)
elif len(us.shape) == 2:
pred_xs = self._predict_traj(curr_x, us)
else:
raise ValueError("Invalid us")
return pred_xs
def _predict_traj(self, curr_x, us):
""" predict trajectories
Args:
curr_x (numpy.ndarray): current state, shape(state_size, )
us (numpy.ndarray): inputs, shape(pred_len, input_size)
@ -53,10 +55,10 @@ class Model():
x = next_x
return pred_xs
def _predict_traj_alltogether(self, curr_x, us):
""" predict trajectories for all samples
Args:
curr_x (numpy.ndarray): current state, shape(pop_size, state_size)
us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size)
@ -75,12 +77,12 @@ class Model():
# next_x.shape = (pop_size, state_size)
next_x = self.predict_next_state(x, us[t])
# update
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\
axis=0)
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),
axis=0)
x = next_x
return np.transpose(pred_xs, (1, 0, 2))
def predict_next_state(self, curr_x, u):
""" predict next state
"""
@ -99,23 +101,23 @@ class Model():
# get size
(pred_len, input_size) = us.shape
# pred final adjoint state
lam = self.predict_terminal_adjoint_state(xs[-1],\
lam = self.predict_terminal_adjoint_state(xs[-1],
terminal_g_x=g_xs[-1])
lams = lam[np.newaxis, :]
for t in range(pred_len-1, 0, -1):
for t in range(pred_len-1, 0, -1):
prev_lam = \
self.predict_adjoint_state(lam, xs[t], us[t],\
self.predict_adjoint_state(lam, xs[t], us[t],
goal=g_xs[t], t=t)
# update
lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
lam = prev_lam
return lams
def predict_adjoint_state(self, lam, x, u, goal=None, t=None):
""" predict adjoint states
Args:
lam (numpy.ndarray): adjoint state, shape(state_size, )
x (numpy.ndarray): state, shape(state_size, )
@ -129,7 +131,7 @@ class Model():
def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None):
""" predict terminal adjoint state
Args:
terminal_x (numpy.ndarray): terminal state, shape(state_size, )
terminal_g_x (numpy.ndarray): terminal goal state,
@ -143,7 +145,7 @@ class Model():
@staticmethod
def calc_f_x(xs, us, dt):
""" gradient of model with respect to the state in batch form
"""
"""
raise NotImplementedError("Implement gradient of model \
with respect to the state")
@ -153,11 +155,11 @@ class Model():
"""
raise NotImplementedError("Implement gradient of model \
with respect to the input")
@staticmethod
def calc_f_xx(xs, us, dt):
""" hessian of model with respect to the state in batch form
"""
"""
raise NotImplementedError("Implement hessian of model \
with respect to the state")
@ -171,27 +173,29 @@ class Model():
@staticmethod
def calc_f_uu(xs, us, dt):
""" hessian of model with respect to the state in batch form
"""
"""
raise NotImplementedError("Implement hessian of model \
with respect to the input")
class LinearModel(Model):
""" discrete linear model, x[k+1] = Ax[k] + Bu[k]
Attributes:
A (numpy.ndarray): shape(state_size, state_size)
B (numpy.ndarray): shape(state_size, input_size)
"""
def __init__(self, A, B):
"""
"""
super(LinearModel, self).__init__()
self.A = A
self.B = B
def predict_next_state(self, curr_x, u):
""" predict next state
Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size)
@ -203,7 +207,7 @@ class LinearModel(Model):
"""
if len(u.shape) == 1:
next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis])
+ np.matmul(self.B, u[:, np.newaxis])
return next_x.flatten()
@ -211,7 +215,7 @@ class LinearModel(Model):
next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T)
return next_x
def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form
@ -223,7 +227,7 @@ class LinearModel(Model):
shape(pred_len, state_size, state_size)
Notes:
This should be discrete form !!
"""
"""
# get size
(pred_len, _) = us.shape
@ -240,7 +244,7 @@ class LinearModel(Model):
shape(pred_len, state_size, input_size)
Notes:
This should be discrete form !!
"""
"""
# get size
(pred_len, input_size) = us.shape
@ -283,7 +287,7 @@ class LinearModel(Model):
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
return f_ux
@staticmethod
def calc_f_uu(xs, us, dt):
""" hessian of model with respect to input in batch form
@ -301,4 +305,4 @@ class LinearModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
return f_uu
return f_uu

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model
class TwoWheeledModel(Model):
""" two wheeled model
"""
def __init__(self, config):
"""
"""
@ -13,7 +15,7 @@ class TwoWheeledModel(Model):
def predict_next_state(self, curr_x, u):
""" predict next state
Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size)
@ -50,21 +52,21 @@ class TwoWheeledModel(Model):
next_x = x_dot[:, :, 0] * self.dt + curr_x
return next_x
@staticmethod
def calc_f_x(xs, us, dt):
""" gradient of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size)
Notes:
This should be discrete form !!
"""
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
@ -81,14 +83,14 @@ class TwoWheeledModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size)
Notes:
This should be discrete form !!
"""
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
@ -107,7 +109,7 @@ class TwoWheeledModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size)
@ -130,7 +132,7 @@ class TwoWheeledModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size)
@ -145,7 +147,7 @@ class TwoWheeledModel(Model):
f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
return f_ux * dt
@staticmethod
def calc_f_uu(xs, us, dt):
""" hessian of model with respect to input in batch form
@ -153,7 +155,7 @@ class TwoWheeledModel(Model):
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size)
@ -164,4 +166,4 @@ class TwoWheeledModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
return f_uu * dt
return f_uu * dt

View File

@ -1,9 +1,11 @@
import numpy as np
from .planner import Planner
class ClosestPointPlanner(Planner):
""" This planner make goal state according to goal path
"""
def __init__(self, config):
"""
"""
@ -24,7 +26,7 @@ class ClosestPointPlanner(Planner):
min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1],
axis=1))
start = (min_idx+self.n_ahead)
start = (min_idx+self.n_ahead)
if start > len(g_traj):
start = len(g_traj)
@ -32,8 +34,8 @@ class ClosestPointPlanner(Planner):
if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj):
end = len(g_traj)
if abs(start - end) != self.pred_len + 1:
return np.tile(g_traj[-1], (self.pred_len+1, 1))
return g_traj[start:end]
return g_traj[start:end]

View File

@ -1,9 +1,11 @@
import numpy as np
from .planner import Planner
class ConstantPlanner(Planner):
""" This planner make constant goal state
"""
def __init__(self, config):
"""
"""
@ -20,4 +22,4 @@ class ConstantPlanner(Planner):
Returns:
g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size)
"""
return np.tile(g_x, (self.pred_len+1, 1))
return np.tile(g_x, (self.pred_len+1, 1))

View File

@ -1,8 +1,10 @@
import numpy as np
class Planner():
"""
"""
def __init__(self):
"""
"""
@ -15,4 +17,4 @@ class Planner():
Returns:
g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size)
"""
raise NotImplementedError("Implement plan func")
raise NotImplementedError("Implement plan func")

View File

@ -8,9 +8,11 @@ import matplotlib.animation as animation
logger = getLogger(__name__)
class Animator():
""" animation class
"""
def __init__(self, env, args=None):
"""
"""
@ -34,7 +36,7 @@ class Animator():
# make fig
self.anim_fig = plt.figure()
# axis
# axis
self.axis = self.anim_fig.add_subplot(111)
self.axis.set_aspect('equal', adjustable='box')
@ -65,12 +67,12 @@ class Animator():
"""
# set up animation figures
self._setup()
_update_img = lambda i: self._update_img(i, history_x, history_g_x)
def _update_img(i): return self._update_img(i, history_x, history_g_x)
# Set up formatting for the movie files
Writer = animation.writers['ffmpeg']
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
# call funcanimation
ani = FuncAnimation(
self.anim_fig,
@ -79,6 +81,6 @@ class Animator():
# save animation
path = os.path.join(self.result_dir, self.controller_type,
"animation-" + self.env_name + ".mp4")
logger.info("Saved Animation to {} ...".format(path))
logger.info("Saved Animation to {} ...".format(path))
ani.save(path, writer=writer)

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from ..helper import save_pickle, load_pickle
def plot_result(history, history_g=None, ylabel="x",
save_dir="./result", name="state_history"):
"""
@ -28,14 +29,14 @@ def plot_result(history, history_g=None, ylabel="x",
def plot(axis, history, history_g=None):
axis.plot(range(iters), history, c="r", linewidth=3)
if history_g is not None:
axis.plot(range(iters), history_g,\
axis.plot(range(iters), history_g,
c="b", linewidth=3, label="goal")
if i < size:
plot(axis1, history[:, i], history_g=history_g[:, i])
if i+1 < size:
plot(axis2, history[:, i+1], history_g=history_g[:, i+1])
if i+2 < size:
if i+2 < size:
plot(axis3, history[:, i+2], history_g=history_g[:, i+2])
# save
@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x",
axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
def plot_results(history_x, history_u, history_g=None, args=None):
"""
@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None):
controller_type = args.controller_type
plot_result(history_x, history_g=history_g, ylabel="x",
name= env + "-state_history",
name=env + "-state_history",
save_dir="./result/" + controller_type)
plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
name= env + "-input_history",
name=env + "-input_history",
save_dir="./result/" + controller_type)
def save_plot_data(history_x, history_u, history_g=None, args=None):
""" save plot data
@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None):
env + "-history_g.pkl")
save_pickle(path, history_g)
def load_plot_data(env, controller_type, result_dir="./result"):
"""
Args:
@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"):
return history_x, history_u, history_g
def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
save_dir="./result", name="state_history"):
"""
@ -130,7 +135,7 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
history (numpy.ndarray): history, shape(iters, size)
"""
(_, iters, size) = histories.shape
for i in range(0, size, 2):
figure = plt.figure()
@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
axis.plot(range(iters), history,
linewidth=3, label=label, alpha=0.7, linestyle="dashed")
if history_g is not None:
axis.plot(range(iters), history_g,\
axis.plot(range(iters), history_g,
c="b", linewidth=3)
if i < size:
for j, (history, history_g) \
in enumerate(zip(histories, histories_g)):
in enumerate(zip(histories, histories_g)):
plot(axis1, history[:, i],
history_g=history_g[:, i], label=labels[j])
if i+1 < size:
for j, (history, history_g) in \
enumerate(zip(histories, histories_g)):
enumerate(zip(histories, histories_g)):
plot(axis2, history[:, i+1],
history_g=history_g[:, i+1], label=labels[j])

View File

@ -5,9 +5,10 @@ import matplotlib.pyplot as plt
from ..common.utils import rotate_pos
def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
""" Create circle matrix
Args:
center_x (float): the center x position of the circle
center_y (float): the center y position of the circle
@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
return np.array(circle_xs), np.array(circle_ys)
def circle_with_angle(center_x, center_y, radius, angle):
""" Create circle matrix with angle line matrix
@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle):
return circle_x, circle_y, angle_x, angle_y
def square(center_x, center_y, shape, angle):
""" Create square
@ -74,9 +77,10 @@ def square(center_x, center_y, shape, angle):
trans_points = rotate_pos(square_xy, angle)
# translation
trans_points += np.array([center_x, center_y])
return trans_points[:, 0], trans_points[:, 1]
def square_with_angle(center_x, center_y, shape, angle):
""" Create square with angle line
@ -96,4 +100,4 @@ def square_with_angle(center_x, center_y, shape, angle):
angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]])
angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]])
return square_x, square_y, angle_x, angle_y
return square_x, square_y, angle_x, angle_y

View File

@ -1,2 +1,2 @@
from PythonLinearNonlinearControl.runners.runner \
import ExpRunner # NOQA
import ExpRunner # NOQA

View File

@ -1,4 +1,5 @@
from .runner import ExpRunner
def make_runner(args):
return ExpRunner()
return ExpRunner()

View File

@ -4,9 +4,11 @@ import numpy as np
logger = getLogger(__name__)
class ExpRunner():
""" experiment runner
"""
def __init__(self):
"""
"""
@ -46,6 +48,6 @@ class ExpRunner():
score += cost
step_count += 1
logger.debug("Controller type = {}, Score = {}"\
logger.debug("Controller type = {}, Score = {}"
.format(controller, score))
return np.array(history_x), np.array(history_u), np.array(history_g)
return np.array(history_x), np.array(history_u), np.array(history_g)