Fix format

This commit is contained in:
Shunichi09 2021-02-13 17:18:42 +09:00
parent f49ed382a4
commit d64a799eda
37 changed files with 648 additions and 563 deletions

View File

@ -1,8 +1,9 @@
import numpy as np import numpy as np
def rotate_pos(pos, angle): def rotate_pos(pos, angle):
""" Transformation the coordinate in the angle """ Transformation the coordinate in the angle
Args: Args:
pos (numpy.ndarray): local state, shape(data_size, 2) pos (numpy.ndarray): local state, shape(data_size, 2)
angle (float): rotate angle, in radians angle (float): rotate angle, in radians
@ -14,9 +15,10 @@ def rotate_pos(pos, angle):
return np.dot(pos, rot_mat.T) return np.dot(pos, rot_mat.T)
def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
""" Check angle range and correct the range """ Check angle range and correct the range
Args: Args:
angle (numpy.ndarray): in radians angle (numpy.ndarray): in radians
min_angle (float): maximum of range in radians, default -pi min_angle (float): maximum of range in radians, default -pi
@ -29,7 +31,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
if (max_angle - min_angle) < 2.0 * np.pi: if (max_angle - min_angle) < 2.0 * np.pi:
raise ValueError("difference between max_angle \ raise ValueError("difference between max_angle \
and min_angle must be greater than 2.0 * pi") and min_angle must be greater than 2.0 * pi")
output = np.array(angles) output = np.array(angles)
output_shape = output.shape output_shape = output.shape
@ -43,6 +45,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
output = np.minimum(max_angle, np.maximum(min_angle, output)) output = np.minimum(max_angle, np.maximum(min_angle, output))
return output.reshape(output_shape) return output.reshape(output_shape)
def update_state_with_Runge_Kutta(state, u, functions, dt=0.01): def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
""" update state in Runge Kutta methods """ update state in Runge Kutta methods
Args: Args:
@ -52,23 +55,23 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
each function will be called like func(*state, *u) each function will be called like func(*state, *u)
We expect that this function returns differential of each state We expect that this function returns differential of each state
dt (float): float in seconds dt (float): float in seconds
Returns: Returns:
next_state (np.array): next state of system next_state (np.array): next state of system
Notes: Notes:
sample of function is as follows: sample of function is as follows:
def func_x(self, x_1, x_2, u): def func_x(self, x_1, x_2, u):
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
return x_dot return x_dot
Note that the function return x_dot. Note that the function return x_dot.
""" """
state_size = len(state) state_size = len(state)
assert state_size == len(functions), \ assert state_size == len(functions), \
"Invalid functions length, You need to give the state size functions" "Invalid functions length, You need to give the state size functions"
k0 = np.zeros(state_size) k0 = np.zeros(state_size)
k1 = np.zeros(state_size) k1 = np.zeros(state_size)
k2 = np.zeros(state_size) k2 = np.zeros(state_size)
@ -84,10 +87,10 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
for i, func in enumerate(functions): for i, func in enumerate(functions):
k1[i] = dt * func(*inputs) k1[i] = dt * func(*inputs)
add_state = state + k1 / 2. add_state = state + k1 / 2.
inputs = np.concatenate([add_state, u]) inputs = np.concatenate([add_state, u])
for i, func in enumerate(functions): for i, func in enumerate(functions):
k2[i] = dt * func(*inputs) k2[i] = dt * func(*inputs)
@ -95,6 +98,6 @@ def update_state_with_Runge_Kutta(state, u, functions, dt=0.01):
inputs = np.concatenate([add_state, u]) inputs = np.concatenate([add_state, u])
for i, func in enumerate(functions): for i, func in enumerate(functions):
k3[i] = dt * func(*inputs) k3[i] = dt * func(*inputs)
return (k0 + 2. * k1 + 2. * k2 + k3) / 6. return (k0 + 2. * k1 + 2. * k2 + k3) / 6.

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
class CartPoleConfigModule(): class CartPoleConfigModule():
# parameters # parameters
ENV_NAME = "CartPole-v0" ENV_NAME = "CartPole-v0"
@ -12,7 +13,7 @@ class CartPoleConfigModule():
DT = 0.02 DT = 0.02
# cost parameters # cost parameters
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
# 1. is worked for iLQR # 1. is worked for iLQR
TERMINAL_WEIGHT = 1. TERMINAL_WEIGHT = 1.
Q = None Q = None
Sf = None Sf = None
@ -39,41 +40,41 @@ class CartPoleConfigModule():
"num_elites": 50, "num_elites": 50,
"max_iters": 15, "max_iters": 15,
"alpha": 0.3, "alpha": 0.3,
"init_var":9., "init_var": 9.,
"threshold":0.001 "threshold": 0.001
}, },
"MPPI":{ "MPPI": {
"beta" : 0.6, "beta": 0.6,
"popsize": 5000, "popsize": 5000,
"kappa": 0.9, "kappa": 0.9,
"noise_sigma": 0.5, "noise_sigma": 0.5,
}, },
"MPPIWilliams":{ "MPPIWilliams": {
"popsize": 5000, "popsize": 5000,
"lambda": 1., "lambda": 1.,
"noise_sigma": 0.9, "noise_sigma": 0.9,
}, },
"iLQR":{ "iLQR": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"DDP":{ "DDP": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC-CGMRES":{ "NMPC-CGMRES": {
}, },
"NMPC-Newton":{ "NMPC-Newton": {
}, },
} }
@staticmethod @staticmethod
def input_cost_fn(u): def input_cost_fn(u):
@ -87,7 +88,7 @@ class CartPoleConfigModule():
shape(pop_size, pred_len, input_size) shape(pop_size, pred_len, input_size)
""" """
return (u**2) * np.diag(CartPoleConfigModule.R) return (u**2) * np.diag(CartPoleConfigModule.R)
@staticmethod @staticmethod
def state_cost_fn(x, g_x): def state_cost_fn(x, g_x):
""" state cost function """ state cost function
@ -103,21 +104,21 @@ class CartPoleConfigModule():
""" """
if len(x.shape) > 2: if len(x.shape) > 2:
return (6. * (x[:, :, 0]**2) \ return (6. * (x[:, :, 0]**2)
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \ + 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
+ 0.1 * (x[:, :, 1]**2) \ + 0.1 * (x[:, :, 1]**2)
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
elif len(x.shape) > 1: elif len(x.shape) > 1:
return (6. * (x[:, 0]**2) \ return (6. * (x[:, 0]**2)
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \ + 12. * ((np.cos(x[:, 2]) + 1.)**2)
+ 0.1 * (x[:, 1]**2) \ + 0.1 * (x[:, 1]**2)
+ 0.1 * (x[:, 3]**2))[:, np.newaxis] + 0.1 * (x[:, 3]**2))[:, np.newaxis]
return 6. * (x[0]**2) \ return 6. * (x[0]**2) \
+ 12. * ((np.cos(x[2]) + 1.)**2) \ + 12. * ((np.cos(x[2]) + 1.)**2) \
+ 0.1 * (x[1]**2) \ + 0.1 * (x[1]**2) \
+ 0.1 * (x[3]**2) + 0.1 * (x[3]**2)
@staticmethod @staticmethod
def terminal_state_cost_fn(terminal_x, terminal_g_x): def terminal_state_cost_fn(terminal_x, terminal_g_x):
@ -134,18 +135,18 @@ class CartPoleConfigModule():
""" """
if len(terminal_x.shape) > 1: if len(terminal_x.shape) > 1:
return (6. * (terminal_x[:, 0]**2) \ return (6. * (terminal_x[:, 0]**2)
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
+ 0.1 * (terminal_x[:, 1]**2) \ + 0.1 * (terminal_x[:, 1]**2)
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2) \
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+ 0.1 * (terminal_x[1]**2) \
+ 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.TERMINAL_WEIGHT * CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2)
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
+ 0.1 * (terminal_x[1]**2)
+ 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
@ -153,26 +154,26 @@ class CartPoleConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns: Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size) or shape(1, state_size)
""" """
if not terminal: if not terminal:
cost_dx0 = 12. * x[:, 0] cost_dx0 = 12. * x[:, 0]
cost_dx1 = 0.2 * x[:, 1] cost_dx1 = 0.2 * x[:, 1]
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2]) cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
cost_dx3 = 0.2 * x[:, 3] cost_dx3 = 0.2 * x[:, 3]
cost_dx = np.stack((cost_dx0, cost_dx1,\ cost_dx = np.stack((cost_dx0, cost_dx1,
cost_dx2, cost_dx3), axis=1) cost_dx2, cost_dx3), axis=1)
return cost_dx return cost_dx
cost_dx0 = 12. * x[0] cost_dx0 = 12. * x[0]
cost_dx1 = 0.2 * x[1] cost_dx1 = 0.2 * x[1]
cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2]) cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
cost_dx3 = 0.2 * x[3] cost_dx3 = 0.2 * x[3]
cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]]) cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod @staticmethod
@ -206,21 +207,21 @@ class CartPoleConfigModule():
hessian[:, 0, 0] = 12. hessian[:, 0, 0] = 12.
hessian[:, 1, 1] = 0.2 hessian[:, 1, 1] = 0.2
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \ hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
* (-np.sin(x[:, 2])) \ * (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \ + 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2]) * -np.cos(x[:, 2])
hessian[:, 3, 3] = 0.2 hessian[:, 3, 3] = 0.2
return hessian return hessian
state_size = len(x) state_size = len(x)
hessian = np.eye(state_size) hessian = np.eye(state_size)
hessian[0, 0] = 12. hessian[0, 0] = 12.
hessian[1, 1] = 0.2 hessian[1, 1] = 0.2
hessian[2, 2] = 24. * -np.sin(x[2]) \ hessian[2, 2] = 24. * -np.sin(x[2]) \
* (-np.sin(x[2])) \ * (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \ + 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2]) * -np.cos(x[2])
hessian[3, 3] = 0.2 hessian[3, 3] = 0.2
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
@ -239,7 +240,7 @@ class CartPoleConfigModule():
(pred_len, _) = u.shape (pred_len, _) = u.shape
return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
@ -254,4 +255,4 @@ class CartPoleConfigModule():
(_, state_size) = x.shape (_, state_size) = x.shape
(pred_len, input_size) = u.shape (pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size)) return np.zeros((pred_len, input_size, state_size))

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
class FirstOrderLagConfigModule(): class FirstOrderLagConfigModule():
# parameters # parameters
ENV_NAME = "FirstOrderLag-v0" ENV_NAME = "FirstOrderLag-v0"
@ -34,43 +35,43 @@ class FirstOrderLagConfigModule():
"num_elites": 50, "num_elites": 50,
"max_iters": 15, "max_iters": 15,
"alpha": 0.3, "alpha": 0.3,
"init_var":1., "init_var": 1.,
"threshold":0.001 "threshold": 0.001
}, },
"MPPI":{ "MPPI": {
"beta" : 0.6, "beta": 0.6,
"popsize": 5000, "popsize": 5000,
"kappa": 0.9, "kappa": 0.9,
"noise_sigma": 0.5, "noise_sigma": 0.5,
}, },
"MPPIWilliams":{ "MPPIWilliams": {
"popsize": 5000, "popsize": 5000,
"lambda": 1., "lambda": 1.,
"noise_sigma": 0.9, "noise_sigma": 0.9,
}, },
"MPC":{ "MPC": {
}, },
"iLQR":{ "iLQR": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"DDP":{ "DDP": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC-CGMRES":{ "NMPC-CGMRES": {
}, },
"NMPC-Newton":{ "NMPC-Newton": {
}, },
} }
@staticmethod @staticmethod
def input_cost_fn(u): def input_cost_fn(u):
@ -83,7 +84,7 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len, input_size) shape(pop_size, pred_len, input_size)
""" """
return (u**2) * np.diag(FirstOrderLagConfigModule.R) return (u**2) * np.diag(FirstOrderLagConfigModule.R)
@staticmethod @staticmethod
def state_cost_fn(x, g_x): def state_cost_fn(x, g_x):
""" state cost function """ state cost function
@ -111,8 +112,8 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len) shape(pop_size, pred_len)
""" """
return ((terminal_x - terminal_g_x)**2) \ return ((terminal_x - terminal_g_x)**2) \
* np.diag(FirstOrderLagConfigModule.Sf) * np.diag(FirstOrderLagConfigModule.Sf)
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
@ -120,16 +121,16 @@ class FirstOrderLagConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns: Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size) or shape(1, state_size)
""" """
if not terminal: if not terminal:
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q) return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
return (2. * (x - g_x) \ return (2. * (x - g_x)
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_with_input(x, u):
@ -138,7 +139,7 @@ class FirstOrderLagConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size) u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns: Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
""" """
@ -151,7 +152,7 @@ class FirstOrderLagConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns: Returns:
l_xx (numpy.ndarray): gradient of cost, l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or shape(pred_len, state_size, state_size) or
@ -159,9 +160,9 @@ class FirstOrderLagConfigModule():
""" """
if not terminal: if not terminal:
(pred_len, _) = x.shape (pred_len, _) = x.shape
return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_with_input(x, u):
@ -170,7 +171,7 @@ class FirstOrderLagConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size) u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns: Returns:
l_uu (numpy.ndarray): gradient of cost, l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size) shape(pred_len, input_size, input_size)
@ -178,7 +179,7 @@ class FirstOrderLagConfigModule():
(pred_len, _) = u.shape (pred_len, _) = u.shape
return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1)) return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
@ -186,7 +187,7 @@ class FirstOrderLagConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size) u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns: Returns:
l_ux (numpy.ndarray): gradient of cost , l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size) shape(pred_len, input_size, state_size)

View File

@ -2,6 +2,7 @@ from .first_order_lag import FirstOrderLagConfigModule
from .two_wheeled import TwoWheeledConfigModule from .two_wheeled import TwoWheeledConfigModule
from .cartpole import CartPoleConfigModule from .cartpole import CartPoleConfigModule
def make_config(args): def make_config(args):
""" """
Returns: Returns:
@ -12,4 +13,4 @@ def make_config(args):
elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack":
return TwoWheeledConfigModule() return TwoWheeledConfigModule()
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleConfigModule() return CartPoleConfigModule()

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from ..plotters.plot_objs import square_with_angle, square from ..plotters.plot_objs import square_with_angle, square
from ..common.utils import fit_angle_in_range from ..common.utils import fit_angle_in_range
class TwoWheeledConfigModule(): class TwoWheeledConfigModule():
# parameters # parameters
ENV_NAME = "TwoWheeled-v0" ENV_NAME = "TwoWheeled-v0"
@ -25,7 +26,7 @@ class TwoWheeledConfigModule():
R = np.diag([0.01, 0.01]) R = np.diag([0.01, 0.01])
Q = np.diag([2.5, 2.5, 0.01]) Q = np.diag([2.5, 2.5, 0.01])
Sf = np.diag([2.5, 2.5, 0.01]) Sf = np.diag([2.5, 2.5, 0.01])
# bounds # bounds
INPUT_LOWER_BOUND = np.array([-1.5, -3.14]) INPUT_LOWER_BOUND = np.array([-1.5, -3.14])
INPUT_UPPER_BOUND = np.array([1.5, 3.14]) INPUT_UPPER_BOUND = np.array([1.5, 3.14])
@ -46,41 +47,41 @@ class TwoWheeledConfigModule():
"num_elites": 50, "num_elites": 50,
"max_iters": 15, "max_iters": 15,
"alpha": 0.3, "alpha": 0.3,
"init_var":1., "init_var": 1.,
"threshold":0.001 "threshold": 0.001
}, },
"MPPI":{ "MPPI": {
"beta" : 0.6, "beta": 0.6,
"popsize": 5000, "popsize": 5000,
"kappa": 0.9, "kappa": 0.9,
"noise_sigma": 0.5, "noise_sigma": 0.5,
}, },
"MPPIWilliams":{ "MPPIWilliams": {
"popsize": 5000, "popsize": 5000,
"lambda": 1, "lambda": 1,
"noise_sigma": 1., "noise_sigma": 1.,
}, },
"iLQR":{ "iLQR": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"DDP":{ "DDP": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC-CGMRES":{ "NMPC-CGMRES": {
}, },
"NMPC-Newton":{ "NMPC-Newton": {
}, },
} }
@staticmethod @staticmethod
def input_cost_fn(u): def input_cost_fn(u):
@ -93,7 +94,7 @@ class TwoWheeledConfigModule():
shape(pop_size, pred_len, input_size) shape(pop_size, pred_len, input_size)
""" """
return (u**2) * np.diag(TwoWheeledConfigModule.R) return (u**2) * np.diag(TwoWheeledConfigModule.R)
@staticmethod @staticmethod
def fit_diff_in_range(diff_x): def fit_diff_in_range(diff_x):
""" fit difference state in range(angle) """ fit difference state in range(angle)
@ -107,7 +108,7 @@ class TwoWheeledConfigModule():
fitted_diff_x (numpy.ndarray): same shape as diff_x fitted_diff_x (numpy.ndarray): same shape as diff_x
""" """
if len(diff_x.shape) == 3: if len(diff_x.shape) == 3:
diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1]) diff_x[:, :, -1] = fit_angle_in_range(diff_x[:, :, -1])
elif len(diff_x.shape) == 2: elif len(diff_x.shape) == 2:
diff_x[:, -1] = fit_angle_in_range(diff_x[:, -1]) diff_x[:, -1] = fit_angle_in_range(diff_x[:, -1])
elif len(diff_x.shape) == 1: elif len(diff_x.shape) == 1:
@ -142,11 +143,11 @@ class TwoWheeledConfigModule():
cost (numpy.ndarray): cost of state, shape(pred_len, ) or cost (numpy.ndarray): cost of state, shape(pred_len, ) or
shape(pop_size, pred_len) shape(pop_size, pred_len)
""" """
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \ terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x
- terminal_g_x) - terminal_g_x)
return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf) return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf)
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
@ -154,18 +155,18 @@ class TwoWheeledConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns: Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size) or shape(1, state_size)
""" """
diff = TwoWheeledConfigModule.fit_diff_in_range(x - g_x) diff = TwoWheeledConfigModule.fit_diff_in_range(x - g_x)
if not terminal: if not terminal:
return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q) return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q)
return (2. * (diff) \ return (2. * (diff)
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_with_input(x, u):
@ -174,7 +175,7 @@ class TwoWheeledConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size) u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns: Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
""" """
@ -187,7 +188,7 @@ class TwoWheeledConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size) g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns: Returns:
l_xx (numpy.ndarray): gradient of cost, l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or shape(pred_len, state_size, state_size) or
@ -195,9 +196,9 @@ class TwoWheeledConfigModule():
""" """
if not terminal: if not terminal:
(pred_len, _) = x.shape (pred_len, _) = x.shape
return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input(x, u): def hessian_cost_fn_with_input(x, u):
@ -206,7 +207,7 @@ class TwoWheeledConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size) u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns: Returns:
l_uu (numpy.ndarray): gradient of cost, l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size) shape(pred_len, input_size, input_size)
@ -214,7 +215,7 @@ class TwoWheeledConfigModule():
(pred_len, _) = u.shape (pred_len, _) = u.shape
return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1)) return np.tile(2.*TwoWheeledConfigModule.R, (pred_len, 1, 1))
@staticmethod @staticmethod
def hessian_cost_fn_with_input_state(x, u): def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input """ hessian costs with respect to the state and input
@ -222,7 +223,7 @@ class TwoWheeledConfigModule():
Args: Args:
x (numpy.ndarray): state, shape(pred_len, state_size) x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size) u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns: Returns:
l_ux (numpy.ndarray): gradient of cost , l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size) shape(pred_len, input_size, state_size)
@ -230,4 +231,4 @@ class TwoWheeledConfigModule():
(_, state_size) = x.shape (_, state_size) = x.shape
(pred_len, input_size) = u.shape (pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size)) return np.zeros((pred_len, input_size, state_size))

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class CEM(Controller): class CEM(Controller):
""" Cross Entropy Method for linear and nonlinear method """ Cross Entropy Method for linear and nonlinear method
@ -19,6 +20,7 @@ class CEM(Controller):
using probabilistic dynamics models. using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765). In Advances in Neural Information Processing Systems (pp. 4754-4765).
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(CEM, self).__init__(config, model) super(CEM, self).__init__(config, model)
@ -38,7 +40,7 @@ class CEM(Controller):
self.init_var = config.opt_config["CEM"]["init_var"] self.init_var = config.opt_config["CEM"]["init_var"]
self.opt_dim = self.input_size * self.pred_len self.opt_dim = self.input_size * self.pred_len
# get bound # get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
self.pred_len) self.pred_len)
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -50,18 +52,18 @@ class CEM(Controller):
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
# init mean # init mean
self.init_mean = np.tile((config.INPUT_UPPER_BOUND \ self.init_mean = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2., + config.INPUT_LOWER_BOUND) / 2.,
self.pred_len) self.pred_len)
self.prev_sol = self.init_mean.copy() self.prev_sol = self.init_mean.copy()
# init variance # init variance
var = np.ones_like(config.INPUT_UPPER_BOUND) \ var = np.ones_like(config.INPUT_UPPER_BOUND) \
* config.opt_config["CEM"]["init_var"] * config.opt_config["CEM"]["init_var"]
self.init_var = np.tile(var, self.pred_len) self.init_var = np.tile(var, self.pred_len)
# save # save
self.history_u = [] self.history_u = []
def clear_sol(self): def clear_sol(self):
""" clear prev sol """ clear prev sol
""" """
@ -77,21 +79,21 @@ class CEM(Controller):
Returns: Returns:
opt_input (numpy.ndarray): optimal input, shape(input_size, ) opt_input (numpy.ndarray): optimal input, shape(input_size, )
""" """
# initialize # initialize
opt_count = 0 opt_count = 0
# get configuration # get configuration
mean = self.prev_sol.flatten().copy() mean = self.prev_sol.flatten().copy()
var = self.init_var.flatten().copy() var = self.init_var.flatten().copy()
# make distribution # make distribution
X = stats.truncnorm(-1, 1, X = stats.truncnorm(-1, 1,
loc=np.zeros_like(mean),\ loc=np.zeros_like(mean),
scale=np.ones_like(mean)) scale=np.ones_like(mean))
while (opt_count < self.max_iters) and np.max(var) > self.epsilon: while (opt_count < self.max_iters) and np.max(var) > self.epsilon:
# constrained # constrained
lb_dist = mean - self.input_lower_bounds lb_dist = mean - self.input_lower_bounds
ub_dist = self.input_upper_bounds - mean ub_dist = self.input_upper_bounds - mean
constrained_var = np.minimum(np.minimum(np.square(lb_dist), constrained_var = np.minimum(np.minimum(np.square(lb_dist),
np.square(ub_dist)), np.square(ub_dist)),
@ -99,15 +101,15 @@ class CEM(Controller):
# sample # sample
samples = X.rvs(size=[self.pop_size, self.opt_dim]) \ samples = X.rvs(size=[self.pop_size, self.opt_dim]) \
* np.sqrt(constrained_var) \ * np.sqrt(constrained_var) \
+ mean + mean
# calc cost # calc cost
# samples.shape = (pop_size, opt_dim) # samples.shape = (pop_size, opt_dim)
costs = self.calc_cost(curr_x, costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size, samples.reshape(self.pop_size,
self.pred_len, self.pred_len,
self.input_size), self.input_size),
g_xs) g_xs)
# sort cost # sort cost
@ -124,7 +126,7 @@ class CEM(Controller):
logger.debug("Var = {}".format(np.max(var))) logger.debug("Var = {}".format(np.max(var)))
logger.debug("Costs = {}".format(np.mean(costs))) logger.debug("Costs = {}".format(np.mean(costs)))
opt_count += 1 opt_count += 1
sol = mean.copy() sol = mean.copy()
self.prev_sol = np.concatenate((mean[self.input_size:], self.prev_sol = np.concatenate((mean[self.input_size:],
np.zeros(self.input_size))) np.zeros(self.input_size)))

View File

@ -2,9 +2,11 @@ import numpy as np
from ..envs.cost import calc_cost from ..envs.cost import calc_cost
class Controller(): class Controller():
""" Controller class """ Controller class
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
""" """
@ -15,7 +17,7 @@ class Controller():
self.state_cost_fn = config.state_cost_fn self.state_cost_fn = config.state_cost_fn
self.terminal_state_cost_fn = config.terminal_state_cost_fn self.terminal_state_cost_fn = config.terminal_state_cost_fn
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
def obtain_sol(self, curr_x, g_xs): def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs """ calculate the optimal inputs
Args: Args:
@ -26,7 +28,7 @@ class Controller():
""" """
raise NotImplementedError("Implement the algorithm to \ raise NotImplementedError("Implement the algorithm to \
get optimal input") get optimal input")
def calc_cost(self, curr_x, samples, g_xs): def calc_cost(self, curr_x, samples, g_xs):
""" calculate the cost of input samples """ calculate the cost of input samples
@ -46,22 +48,24 @@ class Controller():
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
pred_xs = self.model.predict_traj(curr_x, samples) pred_xs = self.model.predict_traj(curr_x, samples)
# get particle cost # get particle cost
costs = calc_cost(pred_xs, samples, g_xs, costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, self.input_cost_fn, \ self.state_cost_fn, self.input_cost_fn,
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
return costs return costs
@staticmethod @staticmethod
def gradient_hamiltonian_x(x, u, lam): def gradient_hamiltonian_x(x, u, lam):
""" gradient of hamitonian with respect to the state, """ gradient of hamitonian with respect to the state,
""" """
raise NotImplementedError("Implement gradient of hamitonian with respect to the state") raise NotImplementedError(
"Implement gradient of hamitonian with respect to the state")
@staticmethod @staticmethod
def gradient_hamiltonian_u(x, u, lam): def gradient_hamiltonian_u(x, u, lam):
""" gradient of hamitonian with respect to the input """ gradient of hamitonian with respect to the input
""" """
raise NotImplementedError("Implement gradient of hamitonian with respect to the input") raise NotImplementedError(
"Implement gradient of hamitonian with respect to the input")

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class DDP(Controller): class DDP(Controller):
""" Differential Dynamic Programming """ Differential Dynamic Programming
@ -18,11 +19,12 @@ class DDP(Controller):
https://github.com/studywolf/control, and https://github.com/studywolf/control, and
https://github.com/anassinator/ilqr https://github.com/anassinator/ilqr
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
""" """
super(DDP, self).__init__(config, model) super(DDP, self).__init__(config, model)
# model # model
self.model = model self.model = model
@ -56,7 +58,7 @@ class DDP(Controller):
self.Q = config.Q self.Q = config.Q
self.R = config.R self.R = config.R
self.Sf = config.Sf self.Sf = config.Sf
# initialize # initialize
self.prev_sol = np.zeros((self.pred_len, self.input_size)) self.prev_sol = np.zeros((self.pred_len, self.input_size))
@ -65,7 +67,7 @@ class DDP(Controller):
""" """
logger.debug("Clear Sol") logger.debug("Clear Sol")
self.prev_sol = np.zeros((self.pred_len, self.input_size)) self.prev_sol = np.zeros((self.pred_len, self.input_size))
def obtain_sol(self, curr_x, g_xs): def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs """ calculate the optimal inputs
@ -89,26 +91,26 @@ class DDP(Controller):
while opt_count < self.max_iter: while opt_count < self.max_iter:
accepted_sol = False accepted_sol = False
# forward # forward
if update_sol == True: if update_sol == True:
pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\ pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\
l_x, l_xx, l_u, l_uu, l_ux = \ l_x, l_xx, l_u, l_uu, l_ux = \
self.forward(curr_x, g_xs, sol) self.forward(curr_x, g_xs, sol)
update_sol = False update_sol = False
try: try:
# backward # backward
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \ k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu,
l_x, l_xx, l_u, l_uu, l_ux) l_x, l_xx, l_u, l_uu, l_ux)
# line search # line search
for alpha in alphas: for alpha in alphas:
new_pred_xs, new_sol = \ new_pred_xs, new_sol = \
self.calc_input(k, K, pred_xs, sol, alpha) self.calc_input(k, K, pred_xs, sol, alpha)
new_cost = calc_cost(new_pred_xs[np.newaxis, :, :], new_cost = calc_cost(new_pred_xs[np.newaxis, :, :],
new_sol[np.newaxis, :, :], new_sol[np.newaxis, :, :],
g_xs[np.newaxis, :, :], g_xs[np.newaxis, :, :],
self.state_cost_fn, self.state_cost_fn,
self.input_cost_fn, self.input_cost_fn,
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
@ -131,15 +133,15 @@ class DDP(Controller):
# accept the solution # accept the solution
accepted_sol = True accepted_sol = True
break break
except np.linalg.LinAlgError as e: except np.linalg.LinAlgError as e:
logger.debug("Non ans : {}".format(e)) logger.debug("Non ans : {}".format(e))
if not accepted_sol: if not accepted_sol:
# increase regularization term. # increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta) self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\ logger.debug("Update regularization term to {}"
.format(self.mu)) .format(self.mu))
if self.mu >= self.mu_max: if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term") logger.debug("Reach Max regularization term")
@ -156,7 +158,7 @@ class DDP(Controller):
self.prev_sol[-1] = sol[-1] # last use the terminal input self.prev_sol[-1] = sol[-1] # last use the terminal input
return sol[0] return sol[0]
def calc_input(self, k, K, pred_xs, sol, alpha): def calc_input(self, k, K, pred_xs, sol, alpha):
""" calc input trajectory by using k and K """ calc input trajectory by using k and K
@ -183,8 +185,8 @@ class DDP(Controller):
for t in range(pred_len): for t in range(pred_len):
new_sol[t] = sol[t] \ new_sol[t] = sol[t] \
+ alpha * k[t] \ + alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t]) new_sol[t])
@ -227,7 +229,7 @@ class DDP(Controller):
g_xs) g_xs)
# calc gradinet in batch # calc gradinet in batch
f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt)
f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt) f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt)
# calc hessian in batch # calc hessian in batch
f_xx = self.model.calc_f_xx(pred_xs[:-1], sol, self.dt) f_xx = self.model.calc_f_xx(pred_xs[:-1], sol, self.dt)
@ -237,13 +239,13 @@ class DDP(Controller):
# gradint of costs # gradint of costs
l_x, l_xx, l_u, l_uu, l_ux = \ l_x, l_xx, l_u, l_uu, l_ux = \
self._calc_gradient_hessian_cost(pred_xs, g_xs, sol) self._calc_gradient_hessian_cost(pred_xs, g_xs, sol)
return pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu, \ return pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu, \
l_x, l_xx, l_u, l_uu, l_ux l_x, l_xx, l_u, l_uu, l_ux
def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol): def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
""" calculate gradient and hessian of model and cost fn """ calculate gradient and hessian of model and cost fn
Args: Args:
pred_xs (numpy.ndarray): predict traj, pred_xs (numpy.ndarray): predict traj,
shape(pred_len+1, state_size) shape(pred_len+1, state_size)
@ -268,7 +270,7 @@ class DDP(Controller):
self.gradient_cost_fn_with_state(pred_xs[-1], self.gradient_cost_fn_with_state(pred_xs[-1],
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_x = np.concatenate((l_x, terminal_l_x), axis=0) l_x = np.concatenate((l_x, terminal_l_x), axis=0)
# l_u.shape = (pred_len, input_size) # l_u.shape = (pred_len, input_size)
l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol)
@ -281,7 +283,7 @@ class DDP(Controller):
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0)
# l_uu.shape = (pred_len, input_size, input_size) # l_uu.shape = (pred_len, input_size, input_size)
l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol)
@ -321,7 +323,7 @@ class DDP(Controller):
# get size # get size
(_, state_size, _) = f_x.shape (_, state_size, _) = f_x.shape
# initialzie # initialzie
V_x = l_x[-1] V_x = l_x[-1]
V_xx = l_xx[-1] V_xx = l_xx[-1]
k = np.zeros((self.pred_len, self.input_size)) k = np.zeros((self.pred_len, self.input_size))
@ -388,7 +390,7 @@ class DDP(Controller):
""" """
# get size # get size
state_size = len(l_x) state_size = len(l_x)
Q_x = l_x + np.dot(f_x.T, V_x) Q_x = l_x + np.dot(f_x.T, V_x)
Q_u = l_u + np.dot(f_u.T, V_x) Q_u = l_u + np.dot(f_u.T, V_x)
Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x) Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x)
@ -402,4 +404,4 @@ class DDP(Controller):
Q_ux += np.tensordot(V_x, f_ux, axes=1) Q_ux += np.tensordot(V_x, f_ux, axes=1)
Q_uu += np.tensordot(V_x, f_uu, axes=1) Q_uu += np.tensordot(V_x, f_uu, axes=1)
return Q_x, Q_u, Q_xx, Q_ux, Q_uu return Q_x, Q_u, Q_xx, Q_ux, Q_uu

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class iLQR(Controller): class iLQR(Controller):
""" iterative Liner Quadratique Regulator """ iterative Liner Quadratique Regulator
@ -16,11 +17,12 @@ class iLQR(Controller):
Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf, Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf,
https://github.com/studywolf/control https://github.com/studywolf/control
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
""" """
super(iLQR, self).__init__(config, model) super(iLQR, self).__init__(config, model)
# model # model
self.model = model self.model = model
@ -58,7 +60,7 @@ class iLQR(Controller):
""" """
logger.debug("Clear Sol") logger.debug("Clear Sol")
self.prev_sol = np.zeros((self.pred_len, self.input_size)) self.prev_sol = np.zeros((self.pred_len, self.input_size))
def obtain_sol(self, curr_x, g_xs): def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs """ calculate the optimal inputs
@ -82,12 +84,12 @@ class iLQR(Controller):
while opt_count < self.max_iter: while opt_count < self.max_iter:
accepted_sol = False accepted_sol = False
# forward # forward
if update_sol == True: if update_sol == True:
pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux = \ pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux = \
self.forward(curr_x, g_xs, sol) self.forward(curr_x, g_xs, sol)
update_sol = False update_sol = False
try: try:
# backward # backward
k, K = self.backward(f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux) k, K = self.backward(f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux)
@ -96,10 +98,10 @@ class iLQR(Controller):
for alpha in alphas: for alpha in alphas:
new_pred_xs, new_sol = \ new_pred_xs, new_sol = \
self.calc_input(k, K, pred_xs, sol, alpha) self.calc_input(k, K, pred_xs, sol, alpha)
new_cost = calc_cost(new_pred_xs[np.newaxis, :, :], new_cost = calc_cost(new_pred_xs[np.newaxis, :, :],
new_sol[np.newaxis, :, :], new_sol[np.newaxis, :, :],
g_xs[np.newaxis, :, :], g_xs[np.newaxis, :, :],
self.state_cost_fn, self.state_cost_fn,
self.input_cost_fn, self.input_cost_fn,
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
@ -122,15 +124,15 @@ class iLQR(Controller):
# accept the solution # accept the solution
accepted_sol = True accepted_sol = True
break break
except np.linalg.LinAlgError as e: except np.linalg.LinAlgError as e:
logger.debug("Non ans : {}".format(e)) logger.debug("Non ans : {}".format(e))
if not accepted_sol: if not accepted_sol:
# increase regularization term. # increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta) self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\ logger.debug("Update regularization term to {}"
.format(self.mu)) .format(self.mu))
if self.mu >= self.mu_max: if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term") logger.debug("Reach Max regularization term")
@ -147,7 +149,7 @@ class iLQR(Controller):
self.prev_sol[-1] = sol[-1] # last use the terminal input self.prev_sol[-1] = sol[-1] # last use the terminal input
return sol[0] return sol[0]
def calc_input(self, k, K, pred_xs, sol, alpha): def calc_input(self, k, K, pred_xs, sol, alpha):
""" calc input trajectory by using k and K """ calc input trajectory by using k and K
@ -174,8 +176,8 @@ class iLQR(Controller):
for t in range(pred_len): for t in range(pred_len):
new_sol[t] = sol[t] \ new_sol[t] = sol[t] \
+ alpha * k[t] \ + alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t]) new_sol[t])
@ -212,18 +214,18 @@ class iLQR(Controller):
g_xs) g_xs)
# calc gradinet in batch # calc gradinet in batch
f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt) f_x = self.model.calc_f_x(pred_xs[:-1], sol, self.dt)
f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt) f_u = self.model.calc_f_u(pred_xs[:-1], sol, self.dt)
# gradint of costs # gradint of costs
l_x, l_xx, l_u, l_uu, l_ux = \ l_x, l_xx, l_u, l_uu, l_ux = \
self._calc_gradient_hessian_cost(pred_xs, g_xs, sol) self._calc_gradient_hessian_cost(pred_xs, g_xs, sol)
return pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux return pred_xs, cost, f_x, f_u, l_x, l_xx, l_u, l_uu, l_ux
def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol): def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
""" calculate gradient and hessian of model and cost fn """ calculate gradient and hessian of model and cost fn
Args: Args:
pred_xs (numpy.ndarray): predict traj, pred_xs (numpy.ndarray): predict traj,
shape(pred_len+1, state_size) shape(pred_len+1, state_size)
@ -248,7 +250,7 @@ class iLQR(Controller):
self.gradient_cost_fn_with_state(pred_xs[-1], self.gradient_cost_fn_with_state(pred_xs[-1],
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_x = np.concatenate((l_x, terminal_l_x), axis=0) l_x = np.concatenate((l_x, terminal_l_x), axis=0)
# l_u.shape = (pred_len, input_size) # l_u.shape = (pred_len, input_size)
l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol) l_u = self.gradient_cost_fn_with_input(pred_xs[:-1], sol)
@ -261,7 +263,7 @@ class iLQR(Controller):
g_x[-1], terminal=True) g_x[-1], terminal=True)
l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0) l_xx = np.concatenate((l_xx, terminal_l_xx), axis=0)
# l_uu.shape = (pred_len, input_size, input_size) # l_uu.shape = (pred_len, input_size, input_size)
l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol) l_uu = self.hessian_cost_fn_with_input(pred_xs[:-1], sol)
@ -287,7 +289,7 @@ class iLQR(Controller):
shape(pred_len, input_size, input_size) shape(pred_len, input_size, input_size)
l_ux (numpy.ndarray): hessian of cost with respect l_ux (numpy.ndarray): hessian of cost with respect
to state and input, shape(pred_len, input_size, state_size) to state and input, shape(pred_len, input_size, state_size)
Returns: Returns:
k (numpy.ndarray): gain, shape(pred_len, input_size) k (numpy.ndarray): gain, shape(pred_len, input_size)
K (numpy.ndarray): gain, shape(pred_len, input_size, state_size) K (numpy.ndarray): gain, shape(pred_len, input_size, state_size)
@ -295,7 +297,7 @@ class iLQR(Controller):
# get size # get size
(_, state_size, _) = f_x.shape (_, state_size, _) = f_x.shape
# initialzie # initialzie
V_x = l_x[-1] V_x = l_x[-1]
V_xx = l_xx[-1] V_xx = l_xx[-1]
k = np.zeros((self.pred_len, self.input_size)) k = np.zeros((self.pred_len, self.input_size))
@ -352,7 +354,7 @@ class iLQR(Controller):
""" """
# get size # get size
state_size = len(l_x) state_size = len(l_x)
Q_x = l_x + np.dot(f_x.T, V_x) Q_x = l_x + np.dot(f_x.T, V_x)
Q_u = l_u + np.dot(f_u.T, V_x) Q_u = l_u + np.dot(f_u.T, V_x)
Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x) Q_xx = l_xx + np.dot(np.dot(f_x.T, V_xx), f_x)
@ -361,4 +363,4 @@ class iLQR(Controller):
Q_ux = l_ux + np.dot(np.dot(f_u.T, (V_xx + reg)), f_x) Q_ux = l_ux + np.dot(np.dot(f_u.T, (V_xx + reg)), f_x)
Q_uu = l_uu + np.dot(np.dot(f_u.T, (V_xx + reg)), f_u) Q_uu = l_uu + np.dot(np.dot(f_u.T, (V_xx + reg)), f_u)
return Q_x, Q_u, Q_xx, Q_ux, Q_uu return Q_x, Q_u, Q_xx, Q_ux, Q_uu

View File

@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams
from .ilqr import iLQR from .ilqr import iLQR
from .ddp import DDP from .ddp import DDP
def make_controller(args, config, model): def make_controller(args, config, model):
if args.controller_type == "MPC": if args.controller_type == "MPC":
@ -22,5 +23,5 @@ def make_controller(args, config, model):
return iLQR(config, model) return iLQR(config, model)
elif args.controller_type == "DDP": elif args.controller_type == "DDP":
return DDP(config, model) return DDP(config, model)
raise ValueError("No controller: {}".format(args.controller_type)) raise ValueError("No controller: {}".format(args.controller_type))

View File

@ -9,6 +9,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class LinearMPC(Controller): class LinearMPC(Controller):
""" Model Predictive Controller for linear model """ Model Predictive Controller for linear model
@ -21,6 +22,7 @@ class LinearMPC(Controller):
Ref: Ref:
Maciejowski, J. M. (2002). Predictive control: with constraints. Maciejowski, J. M. (2002). Predictive control: with constraints.
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
Args: Args:
@ -55,7 +57,7 @@ class LinearMPC(Controller):
self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND self.dt_input_upper_bound = config.DT_INPUT_UPPER_BOUND
self.input_lower_bound = config.INPUT_LOWER_BOUND self.input_lower_bound = config.INPUT_LOWER_BOUND
self.input_upper_bound = config.INPUT_UPPER_BOUND self.input_upper_bound = config.INPUT_UPPER_BOUND
# setup controllers # setup controllers
self.W = None self.W = None
self.omega = None self.omega = None
@ -66,7 +68,7 @@ class LinearMPC(Controller):
# history # history
self.history_u = [np.zeros(self.input_size)] self.history_u = [np.zeros(self.input_size)]
def setup(self): def setup(self):
""" """
setup Model Predictive Control as a quadratic programming setup Model Predictive Control as a quadratic programming
@ -77,11 +79,11 @@ class LinearMPC(Controller):
for _ in range(self.pred_len - 1): for _ in range(self.pred_len - 1):
temp_mat = np.matmul(A_factorials[-1], self.A) temp_mat = np.matmul(A_factorials[-1], self.A)
self.phi_mat = np.vstack((self.phi_mat, temp_mat)) self.phi_mat = np.vstack((self.phi_mat, temp_mat))
A_factorials.append(temp_mat) # after we use this factorials A_factorials.append(temp_mat) # after we use this factorials
self.gamma_mat = self.B.copy() self.gamma_mat = self.B.copy()
gammma_mat_temp = self.B.copy() gammma_mat_temp = self.B.copy()
for i in range(self.pred_len - 1): for i in range(self.pred_len - 1):
temp_1_mat = np.matmul(A_factorials[i], self.B) temp_1_mat = np.matmul(A_factorials[i], self.B)
gammma_mat_temp = temp_1_mat + gammma_mat_temp gammma_mat_temp = temp_1_mat + gammma_mat_temp
@ -91,8 +93,8 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1): for i in range(self.pred_len - 1):
temp_mat = np.zeros_like(self.gamma_mat) temp_mat = np.zeros_like(self.gamma_mat)
temp_mat[int((i + 1)*self.state_size): , :] =\ temp_mat[int((i + 1)*self.state_size):, :] =\
self.gamma_mat[:-int((i + 1)*self.state_size) , :] self.gamma_mat[:-int((i + 1)*self.state_size), :]
self.theta_mat = np.hstack((self.theta_mat, temp_mat)) self.theta_mat = np.hstack((self.theta_mat, temp_mat))
@ -114,12 +116,12 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1): for i in range(self.pred_len - 1):
for j in range(self.input_size): for j in range(self.input_size):
temp_F[j * 2: (j + 1) * 2,\ temp_F[j * 2: (j + 1) * 2,
((i+1) * self.input_size) + j] = np.array([1., -1.]) ((i+1) * self.input_size) + j] = np.array([1., -1.])
self.F = np.vstack((self.F, temp_F)) self.F = np.vstack((self.F, temp_F))
self.F1 = self.F[:, :self.input_size] self.F1 = self.F[:, :self.input_size]
temp_f = [] temp_f = []
for i in range(self.input_size): for i in range(self.input_size):
temp_f.append(-1 * self.input_upper_bound[i]) temp_f.append(-1 * self.input_upper_bound[i])
@ -168,7 +170,7 @@ class LinearMPC(Controller):
H = H * 0.5 H = H * 0.5
# constraints # constraints
A = [] A = []
b = [] b = []
if self.W is not None: if self.W is not None:
@ -187,14 +189,14 @@ class LinearMPC(Controller):
# using cvxopt # using cvxopt
def optimized_func(dt_us): def optimized_func(dt_us):
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \ return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1)))
- np.dot(G.T, dt_us.reshape(-1, 1)))[0] - np.dot(G.T, dt_us.reshape(-1, 1)))[0]
# constraint # constraint
lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons
cons = LinearConstraint(A, lb, ub) cons = LinearConstraint(A, lb, ub)
# solve # solve
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\ opt_sol = minimize(optimized_func, self.prev_sol.flatten(),
constraints=[cons]) constraints=[cons])
opt_dt_us = opt_sol.x opt_dt_us = opt_sol.x
@ -213,21 +215,21 @@ class LinearMPC(Controller):
""" """
# to dt form # to dt form
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\ opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,
self.input_size), self.input_size),
axis=0) axis=0)
self.prev_sol = opt_dt_u_seq.copy() self.prev_sol = opt_dt_u_seq.copy()
opt_u_seq = opt_dt_u_seq + self.history_u[-1] opt_u_seq = opt_dt_u_seq + self.history_u[-1]
# save # save
self.history_u.append(opt_u_seq[0]) self.history_u.append(opt_u_seq[0])
# check costs # check costs
costs = self.calc_cost(curr_x, costs = self.calc_cost(curr_x,
opt_u_seq.reshape(1, opt_u_seq.reshape(1,
self.pred_len, self.pred_len,
self.input_size), self.input_size),
g_xs) g_xs)
logger.debug("Cost = {}".format(costs)) logger.debug("Cost = {}".format(costs))
@ -235,4 +237,4 @@ class LinearMPC(Controller):
return opt_u_seq[0] return opt_u_seq[0]
def __str__(self): def __str__(self):
return "LinearMPC" return "LinearMPC"

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class MPPI(Controller): class MPPI(Controller):
""" Model Predictive Path Integral for linear and nonlinear method """ Model Predictive Path Integral for linear and nonlinear method
@ -18,6 +19,7 @@ class MPPI(Controller):
Deep Dynamics Models for Learning Dexterous Manipulation. Deep Dynamics Models for Learning Dexterous Manipulation.
arXiv preprint arXiv:1909.11652. arXiv preprint arXiv:1909.11652.
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(MPPI, self).__init__(config, model) super(MPPI, self).__init__(config, model)
@ -35,7 +37,7 @@ class MPPI(Controller):
self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"] self.noise_sigma = config.opt_config["MPPI"]["noise_sigma"]
self.opt_dim = self.input_size * self.pred_len self.opt_dim = self.input_size * self.pred_len
# get bound # get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
(self.pred_len, 1)) (self.pred_len, 1))
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -47,14 +49,14 @@ class MPPI(Controller):
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
# init mean # init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2., + config.INPUT_LOWER_BOUND) / 2.,
self.pred_len) self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
# save # save
self.history_u = [np.zeros(self.input_size)] self.history_u = [np.zeros(self.input_size)]
def clear_sol(self): def clear_sol(self):
""" clear prev sol """ clear prev sol
""" """
@ -74,24 +76,24 @@ class MPPI(Controller):
""" """
# get noised inputs # get noised inputs
noise = np.random.normal( noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len, loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma self.input_size)) * self.noise_sigma
noised_inputs = noise.copy() noised_inputs = noise.copy()
for t in range(self.pred_len): for t in range(self.pred_len):
if t > 0: if t > 0:
noised_inputs[:, t, :] = self.beta \ noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \ * (self.prev_sol[t, :]
+ noise[:, t, :]) \ + noise[:, t, :]) \
+ (1 - self.beta) \ + (1 - self.beta) \
* noised_inputs[:, t-1, :] * noised_inputs[:, t-1, :]
else: else:
noised_inputs[:, t, :] = self.beta \ noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \ * (self.prev_sol[t, :]
+ noise[:, t, :]) \ + noise[:, t, :]) \
+ (1 - self.beta) \ + (1 - self.beta) \
* self.history_u[-1] * self.history_u[-1]
# clip actions # clip actions
noised_inputs = np.clip( noised_inputs = np.clip(
noised_inputs, self.input_lower_bounds, self.input_upper_bounds) noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
@ -108,7 +110,7 @@ class MPPI(Controller):
# weight actions # weight actions
weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \ weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \
* noised_inputs * noised_inputs
sol = np.sum(weighted_inputs, 0) / denom sol = np.sum(weighted_inputs, 0) / denom
# update # update
@ -121,4 +123,4 @@ class MPPI(Controller):
return sol[0] return sol[0]
def __str__(self): def __str__(self):
return "MPPI" return "MPPI"

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class MPPIWilliams(Controller): class MPPIWilliams(Controller):
""" Model Predictive Path Integral for linear and nonlinear method """ Model Predictive Path Integral for linear and nonlinear method
@ -19,6 +20,7 @@ class MPPIWilliams(Controller):
2017 IEEE International Conference on Robotics and Automation (ICRA), 2017 IEEE International Conference on Robotics and Automation (ICRA),
Singapore, 2017, pp. 1714-1721. Singapore, 2017, pp. 1714-1721.
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(MPPIWilliams, self).__init__(config, model) super(MPPIWilliams, self).__init__(config, model)
@ -35,7 +37,7 @@ class MPPIWilliams(Controller):
self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"] self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
self.opt_dim = self.input_size * self.pred_len self.opt_dim = self.input_size * self.pred_len
# get bound # get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
(self.pred_len, 1)) (self.pred_len, 1))
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -47,14 +49,14 @@ class MPPIWilliams(Controller):
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
# init mean # init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2., + config.INPUT_LOWER_BOUND) / 2.,
self.pred_len) self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
# save # save
self.history_u = [np.zeros(self.input_size)] self.history_u = [np.zeros(self.input_size)]
def clear_sol(self): def clear_sol(self):
""" clear prev sol """ clear prev sol
""" """
@ -62,7 +64,7 @@ class MPPIWilliams(Controller):
self.prev_sol = \ self.prev_sol = \
(self.input_upper_bounds + self.input_lower_bounds) / 2. (self.input_upper_bounds + self.input_lower_bounds) / 2.
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
def calc_cost(self, curr_x, samples, g_xs): def calc_cost(self, curr_x, samples, g_xs):
""" calculate the cost of input samples by using MPPI's eq """ calculate the cost of input samples by using MPPI's eq
@ -82,12 +84,12 @@ class MPPIWilliams(Controller):
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size) # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
pred_xs = self.model.predict_traj(curr_x, samples) pred_xs = self.model.predict_traj(curr_x, samples)
# get particle cost # get particle cost
costs = calc_cost(pred_xs, samples, g_xs, costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, None, \ self.state_cost_fn, None,
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
return costs return costs
def obtain_sol(self, curr_x, g_xs): def obtain_sol(self, curr_x, g_xs):
@ -101,9 +103,9 @@ class MPPIWilliams(Controller):
""" """
# get noised inputs # get noised inputs
noise = np.random.normal( noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len, loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma self.input_size)) * self.noise_sigma
noised_inputs = self.prev_sol + noise noised_inputs = self.prev_sol + noise
# clip actions # clip actions
@ -120,7 +122,7 @@ class MPPIWilliams(Controller):
# mppi update # mppi update
beta = np.min(costs) beta = np.min(costs)
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \ eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
+ 1e-10 + 1e-10
# weight # weight
# eta.shape = (pred_len, input_size) # eta.shape = (pred_len, input_size)
@ -128,7 +130,7 @@ class MPPIWilliams(Controller):
# update inputs # update inputs
sol = self.prev_sol \ sol = self.prev_sol \
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
# update # update
self.prev_sol[:-1] = sol[1:] self.prev_sol[:-1] = sol[1:]
@ -140,4 +142,4 @@ class MPPIWilliams(Controller):
return sol[0] return sol[0]
def __str__(self): def __str__(self):
return "MPPIWilliams" return "MPPIWilliams"

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class RandomShooting(Controller): class RandomShooting(Controller):
""" Random Shooting Method for linear and nonlinear method """ Random Shooting Method for linear and nonlinear method
@ -19,6 +20,7 @@ class RandomShooting(Controller):
using probabilistic dynamics models. using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765). In Advances in Neural Information Processing Systems (pp. 4754-4765).
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(RandomShooting, self).__init__(config, model) super(RandomShooting, self).__init__(config, model)
@ -33,7 +35,7 @@ class RandomShooting(Controller):
self.pop_size = config.opt_config["Random"]["popsize"] self.pop_size = config.opt_config["Random"]["popsize"]
self.opt_dim = self.input_size * self.pred_len self.opt_dim = self.input_size * self.pred_len
# get bound # get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND, self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
self.pred_len) self.pred_len)
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND, self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
@ -46,7 +48,7 @@ class RandomShooting(Controller):
# save # save
self.history_u = [] self.history_u = []
def obtain_sol(self, curr_x, g_xs): def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs """ calculate the optimal inputs
@ -65,8 +67,8 @@ class RandomShooting(Controller):
# calc cost # calc cost
costs = self.calc_cost(curr_x, costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size, samples.reshape(self.pop_size,
self.pred_len, self.pred_len,
self.input_size), self.input_size),
g_xs) g_xs)
# solution # solution
sol = samples[np.argmin(costs)] sol = samples[np.argmin(costs)]
@ -74,4 +76,4 @@ class RandomShooting(Controller):
return sol.reshape(self.pred_len, self.input_size).copy()[0] return sol.reshape(self.pred_len, self.input_size).copy()[0]
def __str__(self): def __str__(self):
return "RandomShooting" return "RandomShooting"

View File

@ -5,4 +5,4 @@ from PythonLinearNonlinearControl.envs.first_order_lag \
from PythonLinearNonlinearControl.envs.two_wheeled \ from PythonLinearNonlinearControl.envs.two_wheeled \
import TwoWheeledConstEnv # NOQA import TwoWheeledConstEnv # NOQA
from PythonLinearNonlinearControl.envs.two_wheeled \ from PythonLinearNonlinearControl.envs.two_wheeled \
import TwoWheeledTrackEnv # NOQA import TwoWheeledTrackEnv # NOQA

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from .env import Env from .env import Env
from ..plotters.plot_objs import square from ..plotters.plot_objs import square
class CartPoleEnv(Env): class CartPoleEnv(Env):
""" Cartpole Environment """ Cartpole Environment
@ -13,13 +14,14 @@ class CartPoleEnv(Env):
6-832-underactuated-robotics-spring-2009/readings/ 6-832-underactuated-robotics-spring-2009/readings/
MIT6_832s09_read_ch03.pdf MIT6_832s09_read_ch03.pdf
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 4, self.config = {"state_size": 4,
"input_size" : 1, "input_size": 1,
"dt" : 0.02, "dt": 0.02,
"max_step" : 500, "max_step": 500,
"input_lower_bound": [-3.], "input_lower_bound": [-3.],
"input_upper_bound": [3.], "input_upper_bound": [3.],
"mp": 0.2, "mp": 0.2,
@ -30,7 +32,7 @@ class CartPoleEnv(Env):
} }
super(CartPoleEnv, self).__init__(self.config) super(CartPoleEnv, self).__init__(self.config)
def reset(self, init_x=None): def reset(self, init_x=None):
""" reset state """ reset state
@ -39,7 +41,7 @@ class CartPoleEnv(Env):
info (dict): information info (dict): information
""" """
self.step_count = 0 self.step_count = 0
theta = np.random.randn(1) theta = np.random.randn(1)
self.curr_x = np.array([0., 0., theta[0], 0.]) self.curr_x = np.array([0., 0., theta[0], 0.])
@ -48,7 +50,7 @@ class CartPoleEnv(Env):
# goal # goal
self.g_x = np.array([0., 0., -np.pi, 0.]) self.g_x = np.array([0., 0., -np.pi, 0.])
# clear memory # clear memory
self.history_x = [] self.history_x = []
self.history_g_x = [] self.history_g_x = []
@ -76,50 +78,50 @@ class CartPoleEnv(Env):
# x # x
d_x0 = self.curr_x[1] d_x0 = self.curr_x[1]
# v_x # v_x
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \ d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2])
* (self.config["l"] * (self.curr_x[3]**2) \ * (self.config["l"] * (self.curr_x[3]**2)
+ self.config["g"] * np.cos(self.curr_x[2]))) \ + self.config["g"] * np.cos(self.curr_x[2]))) \
/ (self.config["mc"] + self.config["mp"] \ / (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2)) * (np.sin(self.curr_x[2])**2))
# theta # theta
d_x2 = self.curr_x[3] d_x2 = self.curr_x[3]
# v_theta # v_theta
d_x3 = (-u[0] * np.cos(self.curr_x[2]) \ d_x3 = (-u[0] * np.cos(self.curr_x[2])
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \ - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2)
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \ * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2])
- (self.config["mc"] + self.config["mp"]) * self.config["g"] \ - (self.config["mc"] + self.config["mp"]) * self.config["g"]
* np.sin(self.curr_x[2])) \ * np.sin(self.curr_x[2])) \
/ (self.config["l"] * (self.config["mc"] + self.config["mp"] \ / (self.config["l"] * (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2))) * (np.sin(self.curr_x[2])**2)))
next_x = self.curr_x +\ next_x = self.curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
# TODO: costs # TODO: costs
costs = 0. costs = 0.
costs += 0.1 * np.sum(u**2) costs += 0.1 * np.sum(u**2)
costs += 6. * self.curr_x[0]**2 \ costs += 6. * self.curr_x[0]**2 \
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+ 0.1 * self.curr_x[1]**2 \ + 0.1 * self.curr_x[1]**2 \
+ 0.1 * self.curr_x[3]**2 + 0.1 * self.curr_x[3]**2
# save history # save history
self.history_x.append(next_x.flatten()) self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten()) self.history_g_x.append(self.g_x.flatten())
# update # update
self.curr_x = next_x.flatten().copy() self.curr_x = next_x.flatten().copy()
# update costs # update costs
self.step_count += 1 self.step_count += 1
return next_x.flatten(), costs, \ return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function """ plot cartpole object function
Args: Args:
to_plot (axis or imgs): plotted objects to_plot (axis or imgs): plotted objects
i (int): frame count i (int): frame count
@ -131,29 +133,29 @@ class CartPoleEnv(Env):
""" """
if isinstance(to_plot, Axes): if isinstance(to_plot, Axes):
imgs = {} # create new imgs imgs = {} # create new imgs
imgs["cart"] = to_plot.plot([], [], c="k")[0] imgs["cart"] = to_plot.plot([], [], c="k")[0]
imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0] imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0]
imgs["center"] = to_plot.plot([], [], marker="o", c="k",\ imgs["center"] = to_plot.plot([], [], marker="o", c="k",
markersize=10)[0] markersize=10)[0]
# centerline # centerline
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\ to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),
c="k", linestyle="dashed") c="k", linestyle="dashed")
# set axis # set axis
to_plot.set_xlim([-1., 1.]) to_plot.set_xlim([-1., 1.])
to_plot.set_ylim([-0.55, 1.5]) to_plot.set_ylim([-0.55, 1.5])
return imgs return imgs
# set imgs # set imgs
cart_x, cart_y, pole_x, pole_y = \ cart_x, cart_y, pole_x, pole_y = \
self._plot_cartpole(history_x[i]) self._plot_cartpole(history_x[i])
to_plot["cart"].set_data(cart_x, cart_y) to_plot["cart"].set_data(cart_x, cart_y)
to_plot["pole"].set_data(pole_x, pole_y) to_plot["pole"].set_data(pole_x, pole_y)
to_plot["center"].set_data(history_x[i][0], 0.) to_plot["center"].set_data(history_x[i][0], 0.)
def _plot_cartpole(self, curr_x): def _plot_cartpole(self, curr_x):
""" plot cartpole fucntions """ plot cartpole fucntions
@ -166,13 +168,13 @@ class CartPoleEnv(Env):
pole_y (numpy.ndarray): y data of pole pole_y (numpy.ndarray): y data of pole
""" """
# cart # cart
cart_x, cart_y = square(curr_x[0], 0.,\ cart_x, cart_y = square(curr_x[0], 0.,
self.config["cart_size"], 0.) self.config["cart_size"], 0.)
# pole
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \
* np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"] \
* np.sin(curr_x[2]-np.pi/2)])
return cart_x, cart_y, pole_x, pole_y # pole
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"]
* np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"]
* np.sin(curr_x[2]-np.pi/2)])
return cart_x, cart_y, pole_x, pole_y

View File

@ -4,6 +4,7 @@ import numpy as np
logger = getLogger(__name__) logger = getLogger(__name__)
def calc_cost(pred_xs, input_sample, g_xs, def calc_cost(pred_xs, input_sample, g_xs,
state_cost_fn, input_cost_fn, terminal_state_cost_fn): state_cost_fn, input_cost_fn, terminal_state_cost_fn):
""" calculate the cost """ calculate the cost
@ -24,20 +25,21 @@ def calc_cost(pred_xs, input_sample, g_xs,
# state cost # state cost
state_cost = 0. state_cost = 0.
if state_cost_fn is not None: if state_cost_fn is not None:
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) state_pred_par_cost = state_cost_fn(
pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
# terminal cost # terminal cost
terminal_state_cost = 0. terminal_state_cost = 0.
if terminal_state_cost_fn is not None: if terminal_state_cost_fn is not None:
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :]) g_xs[:, -1, :])
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
# act cost # act cost
act_cost = 0. act_cost = 0.
if input_cost_fn is not None: if input_cost_fn is not None:
act_pred_par_cost = input_cost_fn(input_sample) act_pred_par_cost = input_cost_fn(input_sample)
act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1) act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1)
return state_cost + terminal_state_cost + act_cost return state_cost + terminal_state_cost + act_cost

View File

@ -1,13 +1,15 @@
import numpy as np import numpy as np
class Env(): class Env():
""" Environments class """ Environments class
Attributes: Attributes:
curr_x (numpy.ndarray): current state curr_x (numpy.ndarray): current state
history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size) history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size)
step_count (int): step count step_count (int): step count
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """
@ -25,12 +27,12 @@ class Env():
info (dict): information info (dict): information
""" """
self.step_count = 0 self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"]) self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None: if init_x is not None:
self.curr_x = init_x self.curr_x = init_x
# clear memory # clear memory
self.history_x = [] self.history_x = []
self.history_g_x = [] self.history_g_x = []
@ -52,4 +54,4 @@ class Env():
def __repr__(self): def __repr__(self):
""" """
""" """
return self.config return self.config

View File

@ -3,25 +3,27 @@ import scipy
from scipy import integrate from scipy import integrate
from .env import Env from .env import Env
class FirstOrderLagEnv(Env): class FirstOrderLagEnv(Env):
""" First Order Lag System Env """ First Order Lag System Env
""" """
def __init__(self, tau=0.63): def __init__(self, tau=0.63):
""" """
""" """
self.config = {"state_size" : 4,\ self.config = {"state_size": 4,
"input_size" : 2,\ "input_size": 2,
"dt" : 0.05,\ "dt": 0.05,
"max_step" : 500,\ "max_step": 500,
"input_lower_bound": [-0.5, -0.5],\ "input_lower_bound": [-0.5, -0.5],
"input_upper_bound": [0.5, 0.5], "input_upper_bound": [0.5, 0.5],
} }
super(FirstOrderLagEnv, self).__init__(self.config) super(FirstOrderLagEnv, self).__init__(self.config)
# to get discrete system matrix # to get discrete system matrix
self.A, self.B = self._to_state_space(tau, dt=self.config["dt"]) self.A, self.B = self._to_state_space(tau, dt=self.config["dt"])
@staticmethod @staticmethod
def _to_state_space(tau, dt=0.05): def _to_state_space(tau, dt=0.05):
""" """
@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env):
""" """
# continuous # continuous
Ac = np.array([[-1./tau, 0., 0., 0.], Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.], [0., -1./tau, 0., 0.],
[1., 0., 0., 0.], [1., 0., 0., 0.],
[0., 1., 0., 0.]]) [0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.], Bc = np.array([[1./tau, 0.],
[0., 1./tau], [0., 1./tau],
[0., 0.], [0., 0.],
[0., 0.]]) [0., 0.]])
# to discrete system # to discrete system
A = scipy.linalg.expm(dt*Ac) A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) - # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) -
@ -55,7 +57,7 @@ class FirstOrderLagEnv(Env):
B[m, n] = sol[0] B[m, n] = sol[0]
return A, B return A, B
def reset(self, init_x=None): def reset(self, init_x=None):
""" reset state """ reset state
Returns: Returns:
@ -63,7 +65,7 @@ class FirstOrderLagEnv(Env):
info (dict): information info (dict): information
""" """
self.step_count = 0 self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"]) self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None: if init_x is not None:
@ -71,7 +73,7 @@ class FirstOrderLagEnv(Env):
# goal # goal
self.g_x = np.array([0., 0, -2., 3.]) self.g_x = np.array([0., 0, -2., 3.])
# clear memory # clear memory
self.history_x = [] self.history_x = []
self.history_g_x = [] self.history_g_x = []
@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env):
self.config["input_upper_bound"]) self.config["input_upper_bound"])
next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \ next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis]) + np.matmul(self.B, u[:, np.newaxis])
# cost # cost
cost = 0 cost = 0
@ -104,17 +106,17 @@ class FirstOrderLagEnv(Env):
# save history # save history
self.history_x.append(next_x.flatten()) self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten()) self.history_g_x.append(self.g_x.flatten())
# update # update
self.curr_x = next_x.flatten() self.curr_x = next_x.flatten()
# update costs # update costs
self.step_count += 1 self.step_count += 1
return next_x.flatten(), cost, \ return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" """
""" """
raise ValueError("FirstOrderLag does not have animation") raise ValueError("FirstOrderLag does not have animation")

View File

@ -3,6 +3,7 @@ from .two_wheeled import TwoWheeledConstEnv
from .two_wheeled import TwoWheeledTrackEnv from .two_wheeled import TwoWheeledTrackEnv
from .cartpole import CartPoleEnv from .cartpole import CartPoleEnv
def make_env(args): def make_env(args):
if args.env == "FirstOrderLag": if args.env == "FirstOrderLag":
@ -13,5 +14,5 @@ def make_env(args):
return TwoWheeledTrackEnv() return TwoWheeledTrackEnv()
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleEnv() return CartPoleEnv()
raise NotImplementedError("There is not {} Env".format(args.env)) raise NotImplementedError("There is not {} Env".format(args.env))

View File

@ -4,22 +4,24 @@ from scipy import integrate
from .env import Env from .env import Env
from ..common.utils import update_state_with_Runge_Kutta from ..common.utils import update_state_with_Runge_Kutta
class NonlinearSampleEnv(Env): class NonlinearSampleEnv(Env):
""" Nonlinear Sample Env """ Nonlinear Sample Env
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 2,\ self.config = {"state_size": 2,
"input_size" : 1,\ "input_size": 1,
"dt" : 0.01,\ "dt": 0.01,
"max_step" : 250,\ "max_step": 250,
"input_lower_bound": [-0.5],\ "input_lower_bound": [-0.5],
"input_upper_bound": [0.5], "input_upper_bound": [0.5],
} }
super(NonlinearSampleEnv, self).__init__(self.config) super(NonlinearSampleEnv, self).__init__(self.config)
def reset(self, init_x=np.array([2., 0.])): def reset(self, init_x=np.array([2., 0.])):
""" reset state """ reset state
Returns: Returns:
@ -27,7 +29,7 @@ class NonlinearSampleEnv(Env):
info (dict): information info (dict): information
""" """
self.step_count = 0 self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"]) self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None: if init_x is not None:
@ -35,7 +37,7 @@ class NonlinearSampleEnv(Env):
# goal # goal
self.g_x = np.array([0., 0.]) self.g_x = np.array([0., 0.])
# clear memory # clear memory
self.history_x = [] self.history_x = []
self.history_g_x = [] self.history_g_x = []
@ -60,7 +62,7 @@ class NonlinearSampleEnv(Env):
funtions = [self._func_x_1, self._func_x_2] funtions = [self._func_x_1, self._func_x_2]
next_x = update_state_with_Runge_Kutta(self._curr_x, u, next_x = update_state_with_Runge_Kutta(self._curr_x, u,
functions, self.config["dt"]) functions, self.config["dt"])
# cost # cost
cost = 0 cost = 0
@ -70,29 +72,29 @@ class NonlinearSampleEnv(Env):
# save history # save history
self.history_x.append(next_x.flatten()) self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten()) self.history_g_x.append(self.g_x.flatten())
# update # update
self.curr_x = next_x.flatten() self.curr_x = next_x.flatten()
# update costs # update costs
self.step_count += 1 self.step_count += 1
return next_x.flatten(), cost, \ return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def _func_x_1(self, x_1, x_2, u): def _func_x_1(self, x_1, x_2, u):
""" """
""" """
x_dot = x_2 x_dot = x_2
return x_dot return x_dot
def _func_x_2(self, x_1, x_2, u): def _func_x_2(self, x_1, x_2, u):
""" """
""" """
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
return x_dot return x_dot
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" """
""" """
raise ValueError("NonlinearSampleEnv does not have animation") raise ValueError("NonlinearSampleEnv does not have animation")

View File

@ -5,47 +5,50 @@ import matplotlib.pyplot as plt
from .env import Env from .env import Env
from ..plotters.plot_objs import circle_with_angle, square, circle from ..plotters.plot_objs import circle_with_angle, square, circle
def step_two_wheeled_env(curr_x, u, dt, method="Oylar"): def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
""" step two wheeled enviroment """ step two wheeled enviroment
Args: Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) curr_x (numpy.ndarray): current state, shape(state_size, )
u (numpy.ndarray): input, shape(input_size, ) u (numpy.ndarray): input, shape(input_size, )
dt (float): sampling time dt (float): sampling time
Returns: Returns:
next_x (numpy.ndarray): next state, shape(state_size. ) next_x (numpy.ndarray): next state, shape(state_size. )
Notes: Notes:
TODO: deal with another method, like Runge Kutta TODO: deal with another method, like Runge Kutta
""" """
B = np.array([[np.cos(curr_x[-1]), 0.], B = np.array([[np.cos(curr_x[-1]), 0.],
[np.sin(curr_x[-1]), 0.], [np.sin(curr_x[-1]), 0.],
[0., 1.]]) [0., 1.]])
x_dot = np.matmul(B, u[:, np.newaxis]) x_dot = np.matmul(B, u[:, np.newaxis])
next_x = x_dot.flatten() * dt + curr_x next_x = x_dot.flatten() * dt + curr_x
return next_x return next_x
class TwoWheeledConstEnv(Env): class TwoWheeledConstEnv(Env):
""" Two wheeled robot with constant goal Env """ Two wheeled robot with constant goal Env
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 3,\ self.config = {"state_size": 3,
"input_size" : 2,\ "input_size": 2,
"dt" : 0.01,\ "dt": 0.01,
"max_step" : 500,\ "max_step": 500,
"input_lower_bound": (-1.5, -3.14),\ "input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),\ "input_upper_bound": (1.5, 3.14),
"car_size": 0.2,\ "car_size": 0.2,
"wheel_size": (0.075, 0.015) "wheel_size": (0.075, 0.015)
} }
super(TwoWheeledConstEnv, self).__init__(self.config) super(TwoWheeledConstEnv, self).__init__(self.config)
def reset(self, init_x=None): def reset(self, init_x=None):
""" reset state """ reset state
@ -54,7 +57,7 @@ class TwoWheeledConstEnv(Env):
info (dict): information info (dict): information
""" """
self.step_count = 0 self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"]) self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None: if init_x is not None:
@ -62,7 +65,7 @@ class TwoWheeledConstEnv(Env):
# goal # goal
self.g_x = np.array([2.5, 2.5, 0.]) self.g_x = np.array([2.5, 2.5, 0.])
# clear memory # clear memory
self.history_x = [] self.history_x = []
self.history_g_x = [] self.history_g_x = []
@ -96,32 +99,32 @@ class TwoWheeledConstEnv(Env):
# save history # save history
self.history_x.append(next_x.flatten()) self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten()) self.history_g_x.append(self.g_x.flatten())
# update # update
self.curr_x = next_x.flatten() self.curr_x = next_x.flatten()
# update costs # update costs
self.step_count += 1 self.step_count += 1
return next_x.flatten(), costs, \ return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function """ plot cartpole object function
Args: Args:
to_plot (axis or imgs): plotted objects to_plot (axis or imgs): plotted objects
i (int): frame count i (int): frame count
history_x (numpy.ndarray): history of state, shape(iters, state) history_x (numpy.ndarray): history of state, shape(iters, state)
history_g_x (numpy.ndarray): history of goal state, history_g_x (numpy.ndarray): history of goal state,
shape(iters, state) shape(iters, state)
Returns: Returns:
None or imgs : imgs order is ["cart_img", "pole_img"] None or imgs : imgs order is ["cart_img", "pole_img"]
""" """
if isinstance(to_plot, Axes): if isinstance(to_plot, Axes):
imgs = {} # create new imgs imgs = {} # create new imgs
imgs["car"] = to_plot.plot([], [], c="k")[0] imgs["car"] = to_plot.plot([], [], c="k")[0]
imgs["car_angle"] = to_plot.plot([], [], c="k")[0] imgs["car_angle"] = to_plot.plot([], [], c="k")[0]
imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0] imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0]
@ -139,9 +142,9 @@ class TwoWheeledConstEnv(Env):
# set imgs # set imgs
# car imgs # car imgs
car_x, car_y, car_angle_x, car_angle_y, \ car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i]) self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y) to_plot["car"].set_data(car_x, car_y)
to_plot["car_angle"].set_data(car_angle_x, car_angle_y) to_plot["car_angle"].set_data(car_angle_x, car_angle_y)
to_plot["left_tire"].set_data(left_tire_x, left_tire_y,) to_plot["left_tire"].set_data(left_tire_x, left_tire_y,)
@ -150,7 +153,7 @@ class TwoWheeledConstEnv(Env):
# goal and trajs # goal and trajs
to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1]) to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1])
to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1]) to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1])
def _plot_car(self, curr_x): def _plot_car(self, curr_x):
""" plot car fucntions """ plot car fucntions
""" """
@ -158,53 +161,55 @@ class TwoWheeledConstEnv(Env):
car_x, car_y, car_angle_x, car_angle_y = \ car_x, car_y, car_angle_x, car_angle_y = \
circle_with_angle(curr_x[0], curr_x[1], circle_with_angle(curr_x[0], curr_x[1],
self.config["car_size"], curr_x[2]) self.config["car_size"], curr_x[2])
# left tire # left tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0] * np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1] * np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \ left_tire_x, left_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
# right tire # right tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0] * np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1] * np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \ right_tire_x, right_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\ return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\ left_tire_x, left_tire_y,\
right_tire_x, right_tire_y right_tire_x, right_tire_y
class TwoWheeledTrackEnv(Env): class TwoWheeledTrackEnv(Env):
""" Two wheeled robot with constant goal Env """ Two wheeled robot with constant goal Env
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 3,\ self.config = {"state_size": 3,
"input_size" : 2,\ "input_size": 2,
"dt" : 0.01,\ "dt": 0.01,
"max_step" : 1000,\ "max_step": 1000,
"input_lower_bound": (-1.5, -3.14),\ "input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),\ "input_upper_bound": (1.5, 3.14),
"car_size": 0.2,\ "car_size": 0.2,
"wheel_size": (0.075, 0.015) "wheel_size": (0.075, 0.015)
} }
super(TwoWheeledTrackEnv, self).__init__(self.config) super(TwoWheeledTrackEnv, self).__init__(self.config)
@staticmethod @staticmethod
def make_road(linelength=3., circle_radius=1.): def make_road(linelength=3., circle_radius=1.):
""" make track """ make track
@ -220,23 +225,23 @@ class TwoWheeledTrackEnv(Env):
# circle # circle
circle_1_x, circle_1_y = circle(linelength/2., circle_radius, circle_1_x, circle_1_y = circle(linelength/2., circle_radius,
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50) circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
circle_1 = np.stack((circle_1_x , circle_1_y), axis=1) circle_1 = np.stack((circle_1_x, circle_1_y), axis=1)
circle_2_x, circle_2_y = circle(-linelength/2., circle_radius, circle_2_x, circle_2_y = circle(-linelength/2., circle_radius,
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50) circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
circle_2 = np.stack((circle_2_x , circle_2_y), axis=1) circle_2 = np.stack((circle_2_x, circle_2_y), axis=1)
road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0) road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0)
# calc road angle # calc road angle
road_diff = road_pos[1:] - road_pos[:-1] road_diff = road_pos[1:] - road_pos[:-1]
road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0]) road_angle = np.arctan2(road_diff[:, 1], road_diff[:, 0])
road_angle = np.concatenate((np.zeros(1), road_angle)) road_angle = np.concatenate((np.zeros(1), road_angle))
road = np.concatenate((road_pos, road_angle[:, np.newaxis]), axis=1) road = np.concatenate((road_pos, road_angle[:, np.newaxis]), axis=1)
return np.tile(road, (3, 1)) return np.tile(road, (3, 1))
def reset(self, init_x=None): def reset(self, init_x=None):
""" reset state """ reset state
@ -246,7 +251,7 @@ class TwoWheeledTrackEnv(Env):
info (dict): information info (dict): information
""" """
self.step_count = 0 self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"]) self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None: if init_x is not None:
@ -254,7 +259,7 @@ class TwoWheeledTrackEnv(Env):
# goal # goal
self.g_traj = self.make_road() self.g_traj = self.make_road()
# clear memory # clear memory
self.history_x = [] self.history_x = []
self.history_g_x = [] self.history_g_x = []
@ -286,32 +291,32 @@ class TwoWheeledTrackEnv(Env):
# save history # save history
self.history_x.append(next_x.flatten()) self.history_x.append(next_x.flatten())
# update # update
self.curr_x = next_x.flatten() self.curr_x = next_x.flatten()
# update costs # update costs
self.step_count += 1 self.step_count += 1
return next_x.flatten(), costs, \ return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_traj} {"goal_state": self.g_traj}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function """ plot cartpole object function
Args: Args:
to_plot (axis or imgs): plotted objects to_plot (axis or imgs): plotted objects
i (int): frame count i (int): frame count
history_x (numpy.ndarray): history of state, shape(iters, state) history_x (numpy.ndarray): history of state, shape(iters, state)
history_g_x (numpy.ndarray): history of goal state, history_g_x (numpy.ndarray): history of goal state,
shape(iters, state) shape(iters, state)
Returns: Returns:
None or imgs : imgs order is ["cart_img", "pole_img"] None or imgs : imgs order is ["cart_img", "pole_img"]
""" """
if isinstance(to_plot, Axes): if isinstance(to_plot, Axes):
imgs = {} # create new imgs imgs = {} # create new imgs
imgs["car"] = to_plot.plot([], [], c="k")[0] imgs["car"] = to_plot.plot([], [], c="k")[0]
imgs["car_angle"] = to_plot.plot([], [], c="k")[0] imgs["car_angle"] = to_plot.plot([], [], c="k")[0]
imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0] imgs["left_tire"] = to_plot.plot([], [], c="k", linewidth=5)[0]
@ -333,9 +338,9 @@ class TwoWheeledTrackEnv(Env):
# set imgs # set imgs
# car imgs # car imgs
car_x, car_y, car_angle_x, car_angle_y, \ car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i]) self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y) to_plot["car"].set_data(car_x, car_y)
to_plot["car_angle"].set_data(car_angle_x, car_angle_y) to_plot["car_angle"].set_data(car_angle_x, car_angle_y)
to_plot["left_tire"].set_data(left_tire_x, left_tire_y,) to_plot["left_tire"].set_data(left_tire_x, left_tire_y,)
@ -344,7 +349,7 @@ class TwoWheeledTrackEnv(Env):
# goal and trajs # goal and trajs
to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1]) to_plot["goal"].set_data(history_g_x[i, 0], history_g_x[i, 1])
to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1]) to_plot["traj"].set_data(history_x[:i, 0], history_x[:i, 1])
def _plot_car(self, curr_x): def _plot_car(self, curr_x):
""" plot car fucntions """ plot car fucntions
""" """
@ -352,31 +357,31 @@ class TwoWheeledTrackEnv(Env):
car_x, car_y, car_angle_x, car_angle_y = \ car_x, car_y, car_angle_x, car_angle_y = \
circle_with_angle(curr_x[0], curr_x[1], circle_with_angle(curr_x[0], curr_x[1],
self.config["car_size"], curr_x[2]) self.config["car_size"], curr_x[2])
# left tire # left tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0] * np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1] * np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \ left_tire_x, left_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
# right tire # right tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0] * np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1] * np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \ right_tire_x, right_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\ return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\ left_tire_x, left_tire_y,\
right_tire_x, right_tire_y right_tire_x, right_tire_y

View File

@ -7,6 +7,7 @@ import six
import pickle import pickle
from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger
def make_logger(save_dir): def make_logger(save_dir):
""" """
Args: Args:
@ -21,7 +22,7 @@ def make_logger(save_dir):
# mypackage log level # mypackage log level
logger = getLogger("PythonLinearNonlinearControl") logger = getLogger("PythonLinearNonlinearControl")
logger.setLevel(DEBUG) logger.setLevel(DEBUG)
# file handler # file handler
log_path = os.path.join(save_dir, "log.txt") log_path = os.path.join(save_dir, "log.txt")
file_handler = FileHandler(log_path) file_handler = FileHandler(log_path)
@ -33,6 +34,7 @@ def make_logger(save_dir):
# sh_handler = StreamHandler() # sh_handler = StreamHandler()
# logger.addHandler(sh_handler) # logger.addHandler(sh_handler)
def int_tuple(s): def int_tuple(s):
""" transform str to tuple """ transform str to tuple
Args: Args:
@ -42,6 +44,7 @@ def int_tuple(s):
""" """
return tuple(int(i) for i in s.split(',')) return tuple(int(i) for i in s.split(','))
def bool_flag(s): def bool_flag(s):
""" transform str to bool flg """ transform str to bool flg
Args: Args:
@ -54,6 +57,7 @@ def bool_flag(s):
msg = 'Invalid value "%s" for bool flag (should be 0 or 1)' msg = 'Invalid value "%s" for bool flag (should be 0 or 1)'
raise ValueError(msg % s) raise ValueError(msg % s)
def file_exists(path): def file_exists(path):
""" Check file existence on given path """ Check file existence on given path
Args: Args:
@ -63,6 +67,7 @@ def file_exists(path):
""" """
return os.path.exists(path) return os.path.exists(path)
def create_dir_if_not_exist(outdir): def create_dir_if_not_exist(outdir):
""" Check directory existence and creates new directory if not exist """ Check directory existence and creates new directory if not exist
Args: Args:
@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir):
return return
os.makedirs(outdir) os.makedirs(outdir)
def write_text_to_file(file_path, data): def write_text_to_file(file_path, data):
""" Write given text data to file """ Write given text data to file
Args: Args:
@ -86,6 +92,7 @@ def write_text_to_file(file_path, data):
with open(file_path, 'w') as f: with open(file_path, 'w') as f:
f.write(data) f.write(data)
def read_text_from_file(file_path): def read_text_from_file(file_path):
""" Read given file as text """ Read given file as text
Args: Args:
@ -96,6 +103,7 @@ def read_text_from_file(file_path):
with open(file_path, 'r') as f: with open(file_path, 'r') as f:
return f.read() return f.read()
def save_pickle(file_path, data): def save_pickle(file_path, data):
""" pickle given data to file """ pickle given data to file
Args: Args:
@ -105,6 +113,7 @@ def save_pickle(file_path, data):
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
pickle.dump(data, f) pickle.dump(data, f)
def load_pickle(file_path): def load_pickle(file_path):
""" load pickled data from file """ load pickled data from file
Args: Args:
@ -118,6 +127,7 @@ def load_pickle(file_path):
else: else:
return pickle.load(f, encoding='bytes') return pickle.load(f, encoding='bytes')
def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
""" prepare a directory with current datetime as name. """ prepare a directory with current datetime as name.
created directory contains the command and args when the script was called as text file. created directory contains the command and args when the script was called as text file.
@ -144,4 +154,4 @@ def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
argv = ' '.join(sys.argv) argv = ' '.join(sys.argv)
write_text_to_file(argv_file_path, argv) write_text_to_file(argv_file_path, argv)
return outdir return outdir

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model from .model import Model
class CartPoleModel(Model): class CartPoleModel(Model):
""" cartpole model """ cartpole model
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """
@ -17,7 +19,7 @@ class CartPoleModel(Model):
def predict_next_state(self, curr_x, u): def predict_next_state(self, curr_x, u):
""" predict next state """ predict next state
Args: Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size) shape(pop_size, state_size)
@ -31,59 +33,59 @@ class CartPoleModel(Model):
# x # x
d_x0 = curr_x[1] d_x0 = curr_x[1]
# v_x # v_x
d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \ d_x1 = (u[0] + self.mp * np.sin(curr_x[2])
* (self.l * (curr_x[3]**2) \ * (self.l * (curr_x[3]**2)
+ self.g * np.cos(curr_x[2]))) \ + self.g * np.cos(curr_x[2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[2])**2)) / (self.mc + self.mp * (np.sin(curr_x[2])**2))
# theta # theta
d_x2 = curr_x[3] d_x2 = curr_x[3]
# v_theta # v_theta
d_x3 = (-u[0] * np.cos(curr_x[2]) \ d_x3 = (-u[0] * np.cos(curr_x[2])
- self.mp * self.l * (curr_x[3]**2) \ - self.mp * self.l * (curr_x[3]**2)
* np.cos(curr_x[2]) * np.sin(curr_x[2]) \ * np.cos(curr_x[2]) * np.sin(curr_x[2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \ - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
next_x = curr_x +\ next_x = curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
return next_x return next_x
elif len(u.shape) == 2: elif len(u.shape) == 2:
# x # x
d_x0 = curr_x[:, 1] d_x0 = curr_x[:, 1]
# v_x # v_x
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \ d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2])
* (self.l * (curr_x[:, 3]**2) \ * (self.l * (curr_x[:, 3]**2)
+ self.g * np.cos(curr_x[:, 2]))) \ + self.g * np.cos(curr_x[:, 2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
# theta # theta
d_x2 = curr_x[:, 3] d_x2 = curr_x[:, 3]
# v_theta # v_theta
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \ d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2])
- self.mp * self.l * (curr_x[:, 3]**2) \ - self.mp * self.l * (curr_x[:, 3]**2)
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \ * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \ - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
next_x = curr_x +\ next_x = curr_x +\
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
return next_x return next_x
def calc_f_x(self, xs, us, dt): def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form """ gradient of model with respect to the state in batch form
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_x (numpy.ndarray): gradient of model with respect to x, f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size) shape(pred_len, state_size, state_size)
Notes: Notes:
This should be discrete form !! This should be discrete form !!
""" """
# get size # get size
(_, state_size) = xs.shape (_, state_size) = xs.shape
(pred_len, _) = us.shape (pred_len, _) = us.shape
@ -95,36 +97,36 @@ class CartPoleModel(Model):
# f_theta # f_theta
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \ tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_x[:, 1, 2] = - us[:, 0] * tmp \ f_x[:, 1, 2] = - us[:, 0] * tmp \
- tmp * (self.mp * np.sin(xs[:, 2]) \ - tmp * (self.mp * np.sin(xs[:, 2])
* (self.l * xs[:, 3]**2 \ * (self.l * xs[:, 3]**2
+ self.g * np.cos(xs[:, 2]))) \ + self.g * np.cos(xs[:, 2]))) \
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \ + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l
* xs[:, 3]**2 \ * xs[:, 3]**2
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \ + self.mp * self.g * (np.cos(xs[:, 2])**2
- np.sin(xs[:, 2])**2)) - np.sin(xs[:, 2])**2))
f_x[:, 3, 2] = - 1. / self.l * tmp \ f_x[:, 3, 2] = - 1. / self.l * tmp \
* (-us[:, 0] * np.cos(xs[:, 2]) \ * (-us[:, 0] * np.cos(xs[:, 2])
- self.mp * self.l * (xs[:, 3]**2) \ - self.mp * self.l * (xs[:, 3]**2)
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \ * np.cos(xs[:, 2]) * np.sin(xs[:, 2])
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \ + 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2]) \ * (us[:, 0] * np.sin(xs[:, 2])
- self.mp * self.l * xs[:, 3]**2 \ - self.mp * self.l * xs[:, 3]**2
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \ * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2)
- (self.mc + self.mp) \ - (self.mc + self.mp)
* self.g * np.cos(xs[:, 2])) * self.g * np.cos(xs[:, 2]))
# f_theta_dot # f_theta_dot
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \ f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2])
* self.l * 2 * xs[:, 3]) * self.l * 2 * xs[:, 3])
f_x[:, 2, 3] = np.ones(pred_len) f_x[:, 2, 3] = np.ones(pred_len)
f_x[:, 3, 3] = 1. / self.l * tmp2 \ f_x[:, 3, 3] = 1. / self.l * tmp2 \
* (-2. * self.mp * self.l * xs[:, 3] \ * (-2. * self.mp * self.l * xs[:, 3]
* np.cos(xs[:, 2]) * np.sin(xs[:, 2])) * np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
return f_x * dt + np.eye(state_size) # to discrete form return f_x * dt + np.eye(state_size) # to discrete form
@ -133,25 +135,25 @@ class CartPoleModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_u (numpy.ndarray): gradient of model with respect to x, f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size) shape(pred_len, state_size, input_size)
Notes: Notes:
This should be discrete form !! This should be discrete form !!
""" """
# get size # get size
(_, state_size) = xs.shape (_, state_size) = xs.shape
(pred_len, input_size) = us.shape (pred_len, input_size) = us.shape
f_u = np.zeros((pred_len, state_size, input_size)) f_u = np.zeros((pred_len, state_size, input_size))
f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_u[:, 3, 0] = -np.cos(xs[:, 2]) \ f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
/ (self.l * (self.mc \ / (self.l * (self.mc
+ self.mp * (np.sin(xs[:, 2])**2))) + self.mp * (np.sin(xs[:, 2])**2)))
return f_u * dt # to discrete form return f_u * dt # to discrete form
@ -161,7 +163,7 @@ class CartPoleModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_xx (numpy.ndarray): gradient of model with respect to x, f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size) shape(pred_len, state_size, state_size, state_size)
@ -180,7 +182,7 @@ class CartPoleModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_ux (numpy.ndarray): gradient of model with respect to x, f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size) shape(pred_len, state_size, input_size, state_size)
@ -199,7 +201,7 @@ class CartPoleModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_uu (numpy.ndarray): gradient of model with respect to x, f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size) shape(pred_len, state_size, input_size, input_size)
@ -210,4 +212,4 @@ class CartPoleModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size)) f_uu = np.zeros((pred_len, state_size, input_size, input_size))
raise NotImplementedError raise NotImplementedError

View File

@ -3,6 +3,7 @@ import scipy.linalg
from scipy import integrate from scipy import integrate
from .model import LinearModel from .model import LinearModel
class FirstOrderLagModel(LinearModel): class FirstOrderLagModel(LinearModel):
""" first order lag model """ first order lag model
Attributes: Attributes:
@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel):
u (numpy.ndarray): u (numpy.ndarray):
history_pred_xs (numpy.ndarray): history_pred_xs (numpy.ndarray):
""" """
def __init__(self, config, tau=0.63): def __init__(self, config, tau=0.63):
""" """
Args: Args:
tau (float): time constant tau (float): time constant
""" """
# param # param
self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system self.A, self.B = self._to_state_space(
tau, dt=config.DT) # discrete system
super(FirstOrderLagModel, self).__init__(self.A, self.B) super(FirstOrderLagModel, self).__init__(self.A, self.B)
@staticmethod @staticmethod
@ -31,21 +34,22 @@ class FirstOrderLagModel(LinearModel):
""" """
# continuous # continuous
Ac = np.array([[-1./tau, 0., 0., 0.], Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.], [0., -1./tau, 0., 0.],
[1., 0., 0., 0.], [1., 0., 0., 0.],
[0., 1., 0., 0.]]) [0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.], Bc = np.array([[1./tau, 0.],
[0., 1./tau], [0., 1./tau],
[0., 0.], [0., 0.],
[0., 0.]]) [0., 0.]])
# to discrete system # to discrete system
A = scipy.linalg.expm(dt*Ac) A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc) # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc)
B = np.zeros_like(Bc) B = np.zeros_like(Bc)
for m in range(Bc.shape[0]): for m in range(Bc.shape[0]):
for n in range(Bc.shape[1]): for n in range(Bc.shape[1]):
integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n] def integrate_fn(tau): return np.matmul(
scipy.linalg.expm(Ac*tau), Bc)[m, n]
sol = integrate.quad(integrate_fn, 0, dt) sol = integrate.quad(integrate_fn, 0, dt)
B[m, n] = sol[0] B[m, n] = sol[0]
return A, B return A, B

View File

@ -2,13 +2,14 @@ from .first_order_lag import FirstOrderLagModel
from .two_wheeled import TwoWheeledModel from .two_wheeled import TwoWheeledModel
from .cartpole import CartPoleModel from .cartpole import CartPoleModel
def make_model(args, config): def make_model(args, config):
if args.env == "FirstOrderLag": if args.env == "FirstOrderLag":
return FirstOrderLagModel(config) return FirstOrderLagModel(config)
elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack": elif args.env == "TwoWheeledConst" or args.env == "TwoWheeledTrack":
return TwoWheeledModel(config) return TwoWheeledModel(config)
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleModel(config) return CartPoleModel(config)
raise NotImplementedError("There is not {} Model".format(args.env)) raise NotImplementedError("There is not {} Model".format(args.env))

View File

@ -1,8 +1,10 @@
import numpy as np import numpy as np
class Model(): class Model():
""" base class of model """ base class of model
""" """
def __init__(self): def __init__(self):
""" """
""" """
@ -22,17 +24,17 @@ class Model():
or shape(pop_size, pred_len+1, state_size) or shape(pop_size, pred_len+1, state_size)
""" """
if len(us.shape) == 3: if len(us.shape) == 3:
pred_xs =self._predict_traj_alltogether(curr_x, us) pred_xs = self._predict_traj_alltogether(curr_x, us)
elif len(us.shape) == 2: elif len(us.shape) == 2:
pred_xs = self._predict_traj(curr_x, us) pred_xs = self._predict_traj(curr_x, us)
else: else:
raise ValueError("Invalid us") raise ValueError("Invalid us")
return pred_xs return pred_xs
def _predict_traj(self, curr_x, us): def _predict_traj(self, curr_x, us):
""" predict trajectories """ predict trajectories
Args: Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) curr_x (numpy.ndarray): current state, shape(state_size, )
us (numpy.ndarray): inputs, shape(pred_len, input_size) us (numpy.ndarray): inputs, shape(pred_len, input_size)
@ -53,10 +55,10 @@ class Model():
x = next_x x = next_x
return pred_xs return pred_xs
def _predict_traj_alltogether(self, curr_x, us): def _predict_traj_alltogether(self, curr_x, us):
""" predict trajectories for all samples """ predict trajectories for all samples
Args: Args:
curr_x (numpy.ndarray): current state, shape(pop_size, state_size) curr_x (numpy.ndarray): current state, shape(pop_size, state_size)
us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size) us (numpy.ndarray): inputs, shape(pop_size, pred_len, input_size)
@ -75,12 +77,12 @@ class Model():
# next_x.shape = (pop_size, state_size) # next_x.shape = (pop_size, state_size)
next_x = self.predict_next_state(x, us[t]) next_x = self.predict_next_state(x, us[t])
# update # update
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\ pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),
axis=0) axis=0)
x = next_x x = next_x
return np.transpose(pred_xs, (1, 0, 2)) return np.transpose(pred_xs, (1, 0, 2))
def predict_next_state(self, curr_x, u): def predict_next_state(self, curr_x, u):
""" predict next state """ predict next state
""" """
@ -99,23 +101,23 @@ class Model():
# get size # get size
(pred_len, input_size) = us.shape (pred_len, input_size) = us.shape
# pred final adjoint state # pred final adjoint state
lam = self.predict_terminal_adjoint_state(xs[-1],\ lam = self.predict_terminal_adjoint_state(xs[-1],
terminal_g_x=g_xs[-1]) terminal_g_x=g_xs[-1])
lams = lam[np.newaxis, :] lams = lam[np.newaxis, :]
for t in range(pred_len-1, 0, -1): for t in range(pred_len-1, 0, -1):
prev_lam = \ prev_lam = \
self.predict_adjoint_state(lam, xs[t], us[t],\ self.predict_adjoint_state(lam, xs[t], us[t],
goal=g_xs[t], t=t) goal=g_xs[t], t=t)
# update # update
lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
lam = prev_lam lam = prev_lam
return lams return lams
def predict_adjoint_state(self, lam, x, u, goal=None, t=None): def predict_adjoint_state(self, lam, x, u, goal=None, t=None):
""" predict adjoint states """ predict adjoint states
Args: Args:
lam (numpy.ndarray): adjoint state, shape(state_size, ) lam (numpy.ndarray): adjoint state, shape(state_size, )
x (numpy.ndarray): state, shape(state_size, ) x (numpy.ndarray): state, shape(state_size, )
@ -129,7 +131,7 @@ class Model():
def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None): def predict_terminal_adjoint_state(self, terminal_x, terminal_g_x=None):
""" predict terminal adjoint state """ predict terminal adjoint state
Args: Args:
terminal_x (numpy.ndarray): terminal state, shape(state_size, ) terminal_x (numpy.ndarray): terminal state, shape(state_size, )
terminal_g_x (numpy.ndarray): terminal goal state, terminal_g_x (numpy.ndarray): terminal goal state,
@ -143,7 +145,7 @@ class Model():
@staticmethod @staticmethod
def calc_f_x(xs, us, dt): def calc_f_x(xs, us, dt):
""" gradient of model with respect to the state in batch form """ gradient of model with respect to the state in batch form
""" """
raise NotImplementedError("Implement gradient of model \ raise NotImplementedError("Implement gradient of model \
with respect to the state") with respect to the state")
@ -153,11 +155,11 @@ class Model():
""" """
raise NotImplementedError("Implement gradient of model \ raise NotImplementedError("Implement gradient of model \
with respect to the input") with respect to the input")
@staticmethod @staticmethod
def calc_f_xx(xs, us, dt): def calc_f_xx(xs, us, dt):
""" hessian of model with respect to the state in batch form """ hessian of model with respect to the state in batch form
""" """
raise NotImplementedError("Implement hessian of model \ raise NotImplementedError("Implement hessian of model \
with respect to the state") with respect to the state")
@ -171,27 +173,29 @@ class Model():
@staticmethod @staticmethod
def calc_f_uu(xs, us, dt): def calc_f_uu(xs, us, dt):
""" hessian of model with respect to the state in batch form """ hessian of model with respect to the state in batch form
""" """
raise NotImplementedError("Implement hessian of model \ raise NotImplementedError("Implement hessian of model \
with respect to the input") with respect to the input")
class LinearModel(Model): class LinearModel(Model):
""" discrete linear model, x[k+1] = Ax[k] + Bu[k] """ discrete linear model, x[k+1] = Ax[k] + Bu[k]
Attributes: Attributes:
A (numpy.ndarray): shape(state_size, state_size) A (numpy.ndarray): shape(state_size, state_size)
B (numpy.ndarray): shape(state_size, input_size) B (numpy.ndarray): shape(state_size, input_size)
""" """
def __init__(self, A, B): def __init__(self, A, B):
""" """
""" """
super(LinearModel, self).__init__() super(LinearModel, self).__init__()
self.A = A self.A = A
self.B = B self.B = B
def predict_next_state(self, curr_x, u): def predict_next_state(self, curr_x, u):
""" predict next state """ predict next state
Args: Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size) shape(pop_size, state_size)
@ -203,7 +207,7 @@ class LinearModel(Model):
""" """
if len(u.shape) == 1: if len(u.shape) == 1:
next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \ next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis]) + np.matmul(self.B, u[:, np.newaxis])
return next_x.flatten() return next_x.flatten()
@ -211,7 +215,7 @@ class LinearModel(Model):
next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T) next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T)
return next_x return next_x
def calc_f_x(self, xs, us, dt): def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form """ gradient of model with respect to the state in batch form
@ -223,7 +227,7 @@ class LinearModel(Model):
shape(pred_len, state_size, state_size) shape(pred_len, state_size, state_size)
Notes: Notes:
This should be discrete form !! This should be discrete form !!
""" """
# get size # get size
(pred_len, _) = us.shape (pred_len, _) = us.shape
@ -240,7 +244,7 @@ class LinearModel(Model):
shape(pred_len, state_size, input_size) shape(pred_len, state_size, input_size)
Notes: Notes:
This should be discrete form !! This should be discrete form !!
""" """
# get size # get size
(pred_len, input_size) = us.shape (pred_len, input_size) = us.shape
@ -283,7 +287,7 @@ class LinearModel(Model):
f_ux = np.zeros((pred_len, state_size, input_size, state_size)) f_ux = np.zeros((pred_len, state_size, input_size, state_size))
return f_ux return f_ux
@staticmethod @staticmethod
def calc_f_uu(xs, us, dt): def calc_f_uu(xs, us, dt):
""" hessian of model with respect to input in batch form """ hessian of model with respect to input in batch form
@ -301,4 +305,4 @@ class LinearModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size)) f_uu = np.zeros((pred_len, state_size, input_size, input_size))
return f_uu return f_uu

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model from .model import Model
class TwoWheeledModel(Model): class TwoWheeledModel(Model):
""" two wheeled model """ two wheeled model
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """
@ -13,7 +15,7 @@ class TwoWheeledModel(Model):
def predict_next_state(self, curr_x, u): def predict_next_state(self, curr_x, u):
""" predict next state """ predict next state
Args: Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size) shape(pop_size, state_size)
@ -50,21 +52,21 @@ class TwoWheeledModel(Model):
next_x = x_dot[:, :, 0] * self.dt + curr_x next_x = x_dot[:, :, 0] * self.dt + curr_x
return next_x return next_x
@staticmethod @staticmethod
def calc_f_x(xs, us, dt): def calc_f_x(xs, us, dt):
""" gradient of model with respect to the state in batch form """ gradient of model with respect to the state in batch form
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_x (numpy.ndarray): gradient of model with respect to x, f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size) shape(pred_len, state_size, state_size)
Notes: Notes:
This should be discrete form !! This should be discrete form !!
""" """
# get size # get size
(_, state_size) = xs.shape (_, state_size) = xs.shape
(pred_len, _) = us.shape (pred_len, _) = us.shape
@ -81,14 +83,14 @@ class TwoWheeledModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_u (numpy.ndarray): gradient of model with respect to x, f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size) shape(pred_len, state_size, input_size)
Notes: Notes:
This should be discrete form !! This should be discrete form !!
""" """
# get size # get size
(_, state_size) = xs.shape (_, state_size) = xs.shape
(pred_len, input_size) = us.shape (pred_len, input_size) = us.shape
@ -107,7 +109,7 @@ class TwoWheeledModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_xx (numpy.ndarray): gradient of model with respect to x, f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size) shape(pred_len, state_size, state_size, state_size)
@ -130,7 +132,7 @@ class TwoWheeledModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_ux (numpy.ndarray): gradient of model with respect to x, f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size) shape(pred_len, state_size, input_size, state_size)
@ -145,7 +147,7 @@ class TwoWheeledModel(Model):
f_ux[:, 1, 0, 2] = np.cos(xs[:, 2]) f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
return f_ux * dt return f_ux * dt
@staticmethod @staticmethod
def calc_f_uu(xs, us, dt): def calc_f_uu(xs, us, dt):
""" hessian of model with respect to input in batch form """ hessian of model with respect to input in batch form
@ -153,7 +155,7 @@ class TwoWheeledModel(Model):
Args: Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size) xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,) us (numpy.ndarray): input, shape(pred_len, input_size,)
Return: Return:
f_uu (numpy.ndarray): gradient of model with respect to x, f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size) shape(pred_len, state_size, input_size, input_size)
@ -164,4 +166,4 @@ class TwoWheeledModel(Model):
f_uu = np.zeros((pred_len, state_size, input_size, input_size)) f_uu = np.zeros((pred_len, state_size, input_size, input_size))
return f_uu * dt return f_uu * dt

View File

@ -1,9 +1,11 @@
import numpy as np import numpy as np
from .planner import Planner from .planner import Planner
class ClosestPointPlanner(Planner): class ClosestPointPlanner(Planner):
""" This planner make goal state according to goal path """ This planner make goal state according to goal path
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """
@ -24,7 +26,7 @@ class ClosestPointPlanner(Planner):
min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1], min_idx = np.argmin(np.linalg.norm(curr_x[:-1] - g_traj[:, :-1],
axis=1)) axis=1))
start = (min_idx+self.n_ahead) start = (min_idx+self.n_ahead)
if start > len(g_traj): if start > len(g_traj):
start = len(g_traj) start = len(g_traj)
@ -32,8 +34,8 @@ class ClosestPointPlanner(Planner):
if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj): if (min_idx+self.n_ahead+self.pred_len+1) > len(g_traj):
end = len(g_traj) end = len(g_traj)
if abs(start - end) != self.pred_len + 1: if abs(start - end) != self.pred_len + 1:
return np.tile(g_traj[-1], (self.pred_len+1, 1)) return np.tile(g_traj[-1], (self.pred_len+1, 1))
return g_traj[start:end] return g_traj[start:end]

View File

@ -1,9 +1,11 @@
import numpy as np import numpy as np
from .planner import Planner from .planner import Planner
class ConstantPlanner(Planner): class ConstantPlanner(Planner):
""" This planner make constant goal state """ This planner make constant goal state
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """
@ -20,4 +22,4 @@ class ConstantPlanner(Planner):
Returns: Returns:
g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size)
""" """
return np.tile(g_x, (self.pred_len+1, 1)) return np.tile(g_x, (self.pred_len+1, 1))

View File

@ -1,8 +1,10 @@
import numpy as np import numpy as np
class Planner(): class Planner():
""" """
""" """
def __init__(self): def __init__(self):
""" """
""" """
@ -15,4 +17,4 @@ class Planner():
Returns: Returns:
g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size) g_xs (numpy.ndarrya): goal state, shape(pred_len, state_size)
""" """
raise NotImplementedError("Implement plan func") raise NotImplementedError("Implement plan func")

View File

@ -8,9 +8,11 @@ import matplotlib.animation as animation
logger = getLogger(__name__) logger = getLogger(__name__)
class Animator(): class Animator():
""" animation class """ animation class
""" """
def __init__(self, env, args=None): def __init__(self, env, args=None):
""" """
""" """
@ -34,7 +36,7 @@ class Animator():
# make fig # make fig
self.anim_fig = plt.figure() self.anim_fig = plt.figure()
# axis # axis
self.axis = self.anim_fig.add_subplot(111) self.axis = self.anim_fig.add_subplot(111)
self.axis.set_aspect('equal', adjustable='box') self.axis.set_aspect('equal', adjustable='box')
@ -65,12 +67,12 @@ class Animator():
""" """
# set up animation figures # set up animation figures
self._setup() self._setup()
_update_img = lambda i: self._update_img(i, history_x, history_g_x) def _update_img(i): return self._update_img(i, history_x, history_g_x)
# Set up formatting for the movie files # Set up formatting for the movie files
Writer = animation.writers['ffmpeg'] Writer = animation.writers['ffmpeg']
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
# call funcanimation # call funcanimation
ani = FuncAnimation( ani = FuncAnimation(
self.anim_fig, self.anim_fig,
@ -79,6 +81,6 @@ class Animator():
# save animation # save animation
path = os.path.join(self.result_dir, self.controller_type, path = os.path.join(self.result_dir, self.controller_type,
"animation-" + self.env_name + ".mp4") "animation-" + self.env_name + ".mp4")
logger.info("Saved Animation to {} ...".format(path)) logger.info("Saved Animation to {} ...".format(path))
ani.save(path, writer=writer) ani.save(path, writer=writer)

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from ..helper import save_pickle, load_pickle from ..helper import save_pickle, load_pickle
def plot_result(history, history_g=None, ylabel="x", def plot_result(history, history_g=None, ylabel="x",
save_dir="./result", name="state_history"): save_dir="./result", name="state_history"):
""" """
@ -28,14 +29,14 @@ def plot_result(history, history_g=None, ylabel="x",
def plot(axis, history, history_g=None): def plot(axis, history, history_g=None):
axis.plot(range(iters), history, c="r", linewidth=3) axis.plot(range(iters), history, c="r", linewidth=3)
if history_g is not None: if history_g is not None:
axis.plot(range(iters), history_g,\ axis.plot(range(iters), history_g,
c="b", linewidth=3, label="goal") c="b", linewidth=3, label="goal")
if i < size: if i < size:
plot(axis1, history[:, i], history_g=history_g[:, i]) plot(axis1, history[:, i], history_g=history_g[:, i])
if i+1 < size: if i+1 < size:
plot(axis2, history[:, i+1], history_g=history_g[:, i+1]) plot(axis2, history[:, i+1], history_g=history_g[:, i+1])
if i+2 < size: if i+2 < size:
plot(axis3, history[:, i+2], history_g=history_g[:, i+2]) plot(axis3, history[:, i+2], history_g=history_g[:, i+2])
# save # save
@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x",
axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
figure.savefig(path, bbox_inches="tight", pad_inches=0.05) figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
def plot_results(history_x, history_u, history_g=None, args=None): def plot_results(history_x, history_u, history_g=None, args=None):
""" """
@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None):
controller_type = args.controller_type controller_type = args.controller_type
plot_result(history_x, history_g=history_g, ylabel="x", plot_result(history_x, history_g=history_g, ylabel="x",
name= env + "-state_history", name=env + "-state_history",
save_dir="./result/" + controller_type) save_dir="./result/" + controller_type)
plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u", plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
name= env + "-input_history", name=env + "-input_history",
save_dir="./result/" + controller_type) save_dir="./result/" + controller_type)
def save_plot_data(history_x, history_u, history_g=None, args=None): def save_plot_data(history_x, history_u, history_g=None, args=None):
""" save plot data """ save plot data
@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None):
env + "-history_g.pkl") env + "-history_g.pkl")
save_pickle(path, history_g) save_pickle(path, history_g)
def load_plot_data(env, controller_type, result_dir="./result"): def load_plot_data(env, controller_type, result_dir="./result"):
""" """
Args: Args:
@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"):
return history_x, history_u, history_g return history_x, history_u, history_g
def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
save_dir="./result", name="state_history"): save_dir="./result", name="state_history"):
""" """
@ -130,7 +135,7 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
history (numpy.ndarray): history, shape(iters, size) history (numpy.ndarray): history, shape(iters, size)
""" """
(_, iters, size) = histories.shape (_, iters, size) = histories.shape
for i in range(0, size, 2): for i in range(0, size, 2):
figure = plt.figure() figure = plt.figure()
@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
axis.plot(range(iters), history, axis.plot(range(iters), history,
linewidth=3, label=label, alpha=0.7, linestyle="dashed") linewidth=3, label=label, alpha=0.7, linestyle="dashed")
if history_g is not None: if history_g is not None:
axis.plot(range(iters), history_g,\ axis.plot(range(iters), history_g,
c="b", linewidth=3) c="b", linewidth=3)
if i < size: if i < size:
for j, (history, history_g) \ for j, (history, history_g) \
in enumerate(zip(histories, histories_g)): in enumerate(zip(histories, histories_g)):
plot(axis1, history[:, i], plot(axis1, history[:, i],
history_g=history_g[:, i], label=labels[j]) history_g=history_g[:, i], label=labels[j])
if i+1 < size: if i+1 < size:
for j, (history, history_g) in \ for j, (history, history_g) in \
enumerate(zip(histories, histories_g)): enumerate(zip(histories, histories_g)):
plot(axis2, history[:, i+1], plot(axis2, history[:, i+1],
history_g=history_g[:, i+1], label=labels[j]) history_g=history_g[:, i+1], label=labels[j])

View File

@ -5,9 +5,10 @@ import matplotlib.pyplot as plt
from ..common.utils import rotate_pos from ..common.utils import rotate_pos
def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
""" Create circle matrix """ Create circle matrix
Args: Args:
center_x (float): the center x position of the circle center_x (float): the center x position of the circle
center_y (float): the center y position of the circle center_y (float): the center y position of the circle
@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
return np.array(circle_xs), np.array(circle_ys) return np.array(circle_xs), np.array(circle_ys)
def circle_with_angle(center_x, center_y, radius, angle): def circle_with_angle(center_x, center_y, radius, angle):
""" Create circle matrix with angle line matrix """ Create circle matrix with angle line matrix
@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle):
return circle_x, circle_y, angle_x, angle_y return circle_x, circle_y, angle_x, angle_y
def square(center_x, center_y, shape, angle): def square(center_x, center_y, shape, angle):
""" Create square """ Create square
@ -74,9 +77,10 @@ def square(center_x, center_y, shape, angle):
trans_points = rotate_pos(square_xy, angle) trans_points = rotate_pos(square_xy, angle)
# translation # translation
trans_points += np.array([center_x, center_y]) trans_points += np.array([center_x, center_y])
return trans_points[:, 0], trans_points[:, 1] return trans_points[:, 0], trans_points[:, 1]
def square_with_angle(center_x, center_y, shape, angle): def square_with_angle(center_x, center_y, shape, angle):
""" Create square with angle line """ Create square with angle line
@ -96,4 +100,4 @@ def square_with_angle(center_x, center_y, shape, angle):
angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]]) angle_x = np.array([center_x, center_x + np.cos(angle) * shape[0]])
angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]]) angle_y = np.array([center_y, center_y + np.sin(angle) * shape[1]])
return square_x, square_y, angle_x, angle_y return square_x, square_y, angle_x, angle_y

View File

@ -1,2 +1,2 @@
from PythonLinearNonlinearControl.runners.runner \ from PythonLinearNonlinearControl.runners.runner \
import ExpRunner # NOQA import ExpRunner # NOQA

View File

@ -1,4 +1,5 @@
from .runner import ExpRunner from .runner import ExpRunner
def make_runner(args): def make_runner(args):
return ExpRunner() return ExpRunner()

View File

@ -4,9 +4,11 @@ import numpy as np
logger = getLogger(__name__) logger = getLogger(__name__)
class ExpRunner(): class ExpRunner():
""" experiment runner """ experiment runner
""" """
def __init__(self): def __init__(self):
""" """
""" """
@ -46,6 +48,6 @@ class ExpRunner():
score += cost score += cost
step_count += 1 step_count += 1
logger.debug("Controller type = {}, Score = {}"\ logger.debug("Controller type = {}, Score = {}"
.format(controller, score)) .format(controller, score))
return np.array(history_x), np.array(history_u), np.array(history_g) return np.array(history_x), np.array(history_u), np.array(history_g)