Merge pull request #11 from Shunichi09/develop

Add nonlinear sample Env
This commit is contained in:
Shunichi09 2021-02-13 21:21:30 +09:00 committed by GitHub
commit 0d443f7ed5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
46 changed files with 1226 additions and 560 deletions

View File

@ -6,6 +6,8 @@
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 |
| Cartpole (Swing up) | x | ✓ | 4 | 1 |
| Nonlinear Sample System Env | x | ✓ | 2 | 1 |
## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py)
@ -54,3 +56,13 @@ mc = 1, mp = 0.2, l = 0.5, g = 9.81
### Cost.
<img src="assets/cartpole_score.png" width="300">
## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py)
## System equation.
<img src="assets/nonlinear_sample_system.png" width="400">
### Cost.
<img src="assets/nonlinear_sample_system_score.png" width="400">

View File

@ -1,5 +1,6 @@
import numpy as np
def rotate_pos(pos, angle):
""" Transformation the coordinate in the angle
@ -14,6 +15,7 @@ def rotate_pos(pos, angle):
return np.dot(pos, rot_mat.T)
def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
""" Check angle range and correct the range
@ -42,3 +44,75 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
output = np.minimum(max_angle, np.maximum(min_angle, output))
return output.reshape(output_shape)
def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
""" update state in Runge Kutta methods
Args:
state (array-like): state of system
u (array-like): input of system
functions (list): update function of each state,
each function will be called like func(state, u)
We expect that this function returns differential of each state
dt (float): float in seconds
batch (bool): state and u is given by batch or not
Returns:
next_state (np.array): next state of system
Notes:
sample of function is as follows:
def func_x(self, x_1, x_2, u):
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
return x_dot
Note that the function return x_dot.
"""
if not batch:
state_size = len(state)
assert state_size == len(functions), \
"Invalid functions length, You need to give the state size functions"
k0 = np.zeros(state_size)
k1 = np.zeros(state_size)
k2 = np.zeros(state_size)
k3 = np.zeros(state_size)
for i, func in enumerate(functions):
k0[i] = dt * func(state, u)
for i, func in enumerate(functions):
k1[i] = dt * func(state + k0 / 2., u)
for i, func in enumerate(functions):
k2[i] = dt * func(state + k1 / 2., u)
for i, func in enumerate(functions):
k3[i] = dt * func(state + k2, u)
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
else:
batch_size, state_size = state.shape
assert state_size == len(functions), \
"Invalid functions length, You need to give the state size functions"
k0 = np.zeros((batch_size, state_size))
k1 = np.zeros((batch_size, state_size))
k2 = np.zeros((batch_size, state_size))
k3 = np.zeros((batch_size, state_size))
for i, func in enumerate(functions):
k0[:, i] = dt * func(state, u)
for i, func in enumerate(functions):
k1[:, i] = dt * func(state + k0 / 2., u)
for i, func in enumerate(functions):
k2[:, i] = dt * func(state + k1 / 2., u)
for i, func in enumerate(functions):
k3[:, i] = dt * func(state + k2, u)
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.

View File

@ -1,5 +1,6 @@
import numpy as np
class CartPoleConfigModule():
# parameters
ENV_NAME = "CartPole-v0"
@ -12,7 +13,7 @@ class CartPoleConfigModule():
DT = 0.02
# cost parameters
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
# 1. is worked for iLQR
# 1. is worked for iLQR
TERMINAL_WEIGHT = 1.
Q = None
Sf = None
@ -39,40 +40,40 @@ class CartPoleConfigModule():
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var":9.,
"threshold":0.001
"init_var": 9.,
"threshold": 0.001
},
"MPPI":{
"beta" : 0.6,
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"iLQR":{
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
@ -103,21 +104,21 @@ class CartPoleConfigModule():
"""
if len(x.shape) > 2:
return (6. * (x[:, :, 0]**2) \
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
+ 0.1 * (x[:, :, 1]**2) \
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
return (6. * (x[:, :, 0]**2)
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
+ 0.1 * (x[:, :, 1]**2)
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
elif len(x.shape) > 1:
return (6. * (x[:, 0]**2) \
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \
+ 0.1 * (x[:, 1]**2) \
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
return (6. * (x[:, 0]**2)
+ 12. * ((np.cos(x[:, 2]) + 1.)**2)
+ 0.1 * (x[:, 1]**2)
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
return 6. * (x[0]**2) \
+ 12. * ((np.cos(x[2]) + 1.)**2) \
+ 0.1 * (x[1]**2) \
+ 0.1 * (x[3]**2)
+ 12. * ((np.cos(x[2]) + 1.)**2) \
+ 0.1 * (x[1]**2) \
+ 0.1 * (x[3]**2)
@staticmethod
def terminal_state_cost_fn(terminal_x, terminal_g_x):
@ -134,18 +135,18 @@ class CartPoleConfigModule():
"""
if len(terminal_x.shape) > 1:
return (6. * (terminal_x[:, 0]**2) \
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
+ 0.1 * (terminal_x[:, 1]**2) \
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2) \
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+ 0.1 * (terminal_x[1]**2) \
+ 0.1 * (terminal_x[3]**2)) \
return (6. * (terminal_x[:, 0]**2)
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
+ 0.1 * (terminal_x[:, 1]**2)
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2)
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
+ 0.1 * (terminal_x[1]**2)
+ 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
@ -163,7 +164,7 @@ class CartPoleConfigModule():
cost_dx1 = 0.2 * x[:, 1]
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
cost_dx3 = 0.2 * x[:, 3]
cost_dx = np.stack((cost_dx0, cost_dx1,\
cost_dx = np.stack((cost_dx0, cost_dx1,
cost_dx2, cost_dx3), axis=1)
return cost_dx
@ -206,9 +207,9 @@ class CartPoleConfigModule():
hessian[:, 0, 0] = 12.
hessian[:, 1, 1] = 0.2
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
* (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2])
* (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2])
hessian[:, 3, 3] = 0.2
return hessian
@ -218,9 +219,9 @@ class CartPoleConfigModule():
hessian[0, 0] = 12.
hessian[1, 1] = 0.2
hessian[2, 2] = 24. * -np.sin(x[2]) \
* (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2])
* (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2])
hessian[3, 3] = 0.2
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT

View File

@ -1,5 +1,6 @@
import numpy as np
class FirstOrderLagConfigModule():
# parameters
ENV_NAME = "FirstOrderLag-v0"
@ -34,42 +35,42 @@ class FirstOrderLagConfigModule():
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var":1.,
"threshold":0.001
"init_var": 1.,
"threshold": 0.001
},
"MPPI":{
"beta" : 0.6,
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"MPC":{
},
"iLQR":{
"MPC": {
},
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
@ -111,7 +112,7 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len)
"""
return ((terminal_x - terminal_g_x)**2) \
* np.diag(FirstOrderLagConfigModule.Sf)
* np.diag(FirstOrderLagConfigModule.Sf)
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
@ -128,8 +129,8 @@ class FirstOrderLagConfigModule():
if not terminal:
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
return (2. * (x - g_x) \
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
return (2. * (x - g_x)
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
@staticmethod
def gradient_cost_fn_with_input(x, u):

View File

@ -1,6 +1,8 @@
from .first_order_lag import FirstOrderLagConfigModule
from .two_wheeled import TwoWheeledConfigModule
from .cartpole import CartPoleConfigModule
from .nonlinear_sample_system import NonlinearSampleSystemConfigModule
def make_config(args):
"""
@ -13,3 +15,5 @@ def make_config(args):
return TwoWheeledConfigModule()
elif args.env == "CartPole":
return CartPoleConfigModule()
elif args.env == "NonlinearSample":
return NonlinearSampleSystemConfigModule()

View File

@ -0,0 +1,219 @@
import numpy as np
class NonlinearSampleSystemConfigModule():
# parameters
ENV_NAME = "NonlinearSampleSystem-v0"
PLANNER_TYPE = "Const"
TYPE = "Nonlinear"
TASK_HORIZON = 2500
PRED_LEN = 10
STATE_SIZE = 2
INPUT_SIZE = 1
DT = 0.01
R = np.diag([0.01])
Q = None
Sf = None
# bounds
INPUT_LOWER_BOUND = np.array([-0.5])
INPUT_UPPER_BOUND = np.array([0.5])
def __init__(self):
"""
"""
# opt configs
self.opt_config = {
"Random": {
"popsize": 5000
},
"CEM": {
"popsize": 500,
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var": 9.,
"threshold": 0.001
},
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
def input_cost_fn(u):
""" input cost functions
Args:
u (numpy.ndarray): input, shape(pred_len, input_size)
or shape(pop_size, pred_len, input_size)
Returns:
cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
shape(pop_size, pred_len, input_size)
"""
return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R)
@staticmethod
def state_cost_fn(x, g_x):
""" state cost function
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
or shape(pop_size, pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
or shape(pop_size, pred_len, state_size)
Returns:
cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
shape(pop_size, pred_len, 1)
"""
if len(x.shape) > 2:
return (0.5 * (x[:, :, 0]**2) +
0.5 * (x[:, :, 1]**2))[:, :, np.newaxis]
elif len(x.shape) > 1:
return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis]
return 0.5 * (x[0]**2) + 0.5 * (x[1]**2)
@ staticmethod
def terminal_state_cost_fn(terminal_x, terminal_g_x):
"""
Args:
terminal_x (numpy.ndarray): terminal state,
shape(state_size, ) or shape(pop_size, state_size)
terminal_g_x (numpy.ndarray): terminal goal state,
shape(state_size, ) or shape(pop_size, state_size)
Returns:
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
shape(pop_size, pred_len)
"""
if len(terminal_x.shape) > 1:
return (0.5 * (terminal_x[:, 0]**2) +
0.5 * (terminal_x[:, 1]**2))[:, np.newaxis]
return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2)
@ staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size)
"""
if not terminal:
cost_dx0 = x[:, 0]
cost_dx1 = x[:, 1]
cost_dx = np.stack((cost_dx0, cost_dx1), axis=1)
return cost_dx
cost_dx0 = x[0]
cost_dx1 = x[1]
cost_dx = np.array([[cost_dx0, cost_dx1]])
return cost_dx
@ staticmethod
def gradient_cost_fn_with_input(x, u):
""" gradient of costs with respect to the input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
"""
return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R)
@ staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False):
""" hessian costs with respect to the state
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or
shape(1, state_size, state_size) or
"""
if not terminal:
(pred_len, state_size) = x.shape
hessian = np.eye(state_size)
hessian = np.tile(hessian, (pred_len, 1, 1))
hessian[:, 0, 0] = 1.
hessian[:, 1, 1] = 1.
return hessian
state_size = len(x)
hessian = np.eye(state_size)
hessian[0, 0] = 1.
hessian[1, 1] = 1.
return hessian[np.newaxis, :, :]
@ staticmethod
def hessian_cost_fn_with_input(x, u):
""" hessian costs with respect to the input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size)
"""
(pred_len, _) = u.shape
return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1))
@ staticmethod
def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size)
"""
(_, state_size) = x.shape
(pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size))

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from ..plotters.plot_objs import square_with_angle, square
from ..common.utils import fit_angle_in_range
class TwoWheeledConfigModule():
# parameters
ENV_NAME = "TwoWheeled-v0"
@ -46,40 +47,40 @@ class TwoWheeledConfigModule():
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var":1.,
"threshold":0.001
"init_var": 1.,
"threshold": 0.001
},
"MPPI":{
"beta" : 0.6,
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1,
"noise_sigma": 1.,
},
"iLQR":{
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
@ -142,8 +143,8 @@ class TwoWheeledConfigModule():
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
shape(pop_size, pred_len)
"""
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \
- terminal_g_x)
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x
- terminal_g_x)
return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf)
@ -164,8 +165,8 @@ class TwoWheeledConfigModule():
if not terminal:
return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q)
return (2. * (diff) \
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
return (2. * (diff)
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
@staticmethod
def gradient_cost_fn_with_input(x, u):

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class CEM(Controller):
""" Cross Entropy Method for linear and nonlinear method
@ -19,6 +20,7 @@ class CEM(Controller):
using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765).
"""
def __init__(self, config, model):
super(CEM, self).__init__(config, model)
@ -50,13 +52,13 @@ class CEM(Controller):
self.input_cost_fn = config.input_cost_fn
# init mean
self.init_mean = np.tile((config.INPUT_UPPER_BOUND \
self.init_mean = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.init_mean.copy()
# init variance
var = np.ones_like(config.INPUT_UPPER_BOUND) \
* config.opt_config["CEM"]["init_var"]
* config.opt_config["CEM"]["init_var"]
self.init_var = np.tile(var, self.pred_len)
# save
@ -86,7 +88,7 @@ class CEM(Controller):
# make distribution
X = stats.truncnorm(-1, 1,
loc=np.zeros_like(mean),\
loc=np.zeros_like(mean),
scale=np.ones_like(mean))
while (opt_count < self.max_iters) and np.max(var) > self.epsilon:
@ -99,15 +101,15 @@ class CEM(Controller):
# sample
samples = X.rvs(size=[self.pop_size, self.opt_dim]) \
* np.sqrt(constrained_var) \
+ mean
* np.sqrt(constrained_var) \
+ mean
# calc cost
# samples.shape = (pop_size, opt_dim)
costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size,
self.pred_len,
self.input_size),
self.pred_len,
self.input_size),
g_xs)
# sort cost

View File

@ -2,9 +2,11 @@ import numpy as np
from ..envs.cost import calc_cost
class Controller():
""" Controller class
"""
def __init__(self, config, model):
"""
"""
@ -49,7 +51,7 @@ class Controller():
# get particle cost
costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, self.input_cost_fn, \
self.state_cost_fn, self.input_cost_fn,
self.terminal_state_cost_fn)
return costs
@ -58,10 +60,12 @@ class Controller():
def gradient_hamiltonian_x(x, u, lam):
""" gradient of hamitonian with respect to the state,
"""
raise NotImplementedError("Implement gradient of hamitonian with respect to the state")
raise NotImplementedError(
"Implement gradient of hamitonian with respect to the state")
@staticmethod
def gradient_hamiltonian_u(x, u, lam):
""" gradient of hamitonian with respect to the input
"""
raise NotImplementedError("Implement gradient of hamitonian with respect to the input")
raise NotImplementedError(
"Implement gradient of hamitonian with respect to the input")

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class DDP(Controller):
""" Differential Dynamic Programming
@ -18,6 +19,7 @@ class DDP(Controller):
https://github.com/studywolf/control, and
https://github.com/anassinator/ilqr
"""
def __init__(self, config, model):
"""
"""
@ -92,13 +94,13 @@ class DDP(Controller):
# forward
if update_sol == True:
pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\
l_x, l_xx, l_u, l_uu, l_ux = \
l_x, l_xx, l_u, l_uu, l_ux = \
self.forward(curr_x, g_xs, sol)
update_sol = False
try:
# backward
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu,
l_x, l_xx, l_u, l_uu, l_ux)
# line search
@ -139,7 +141,7 @@ class DDP(Controller):
# increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\
logger.debug("Update regularization term to {}"
.format(self.mu))
if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term")
@ -183,8 +185,8 @@ class DDP(Controller):
for t in range(pred_len):
new_sol[t] = sol[t] \
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t])

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class iLQR(Controller):
""" iterative Liner Quadratique Regulator
@ -16,6 +17,7 @@ class iLQR(Controller):
Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf,
https://github.com/studywolf/control
"""
def __init__(self, config, model):
"""
"""
@ -130,7 +132,7 @@ class iLQR(Controller):
# increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\
logger.debug("Update regularization term to {}"
.format(self.mu))
if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term")
@ -174,8 +176,8 @@ class iLQR(Controller):
for t in range(pred_len):
new_sol[t] = sol[t] \
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
+ alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t])

View File

@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams
from .ilqr import iLQR
from .ddp import DDP
def make_controller(args, config, model):
if args.controller_type == "MPC":

View File

@ -9,6 +9,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class LinearMPC(Controller):
""" Model Predictive Controller for linear model
@ -21,6 +22,7 @@ class LinearMPC(Controller):
Ref:
Maciejowski, J. M. (2002). Predictive control: with constraints.
"""
def __init__(self, config, model):
"""
Args:
@ -77,7 +79,7 @@ class LinearMPC(Controller):
for _ in range(self.pred_len - 1):
temp_mat = np.matmul(A_factorials[-1], self.A)
self.phi_mat = np.vstack((self.phi_mat, temp_mat))
A_factorials.append(temp_mat) # after we use this factorials
A_factorials.append(temp_mat) # after we use this factorials
self.gamma_mat = self.B.copy()
gammma_mat_temp = self.B.copy()
@ -91,8 +93,8 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1):
temp_mat = np.zeros_like(self.gamma_mat)
temp_mat[int((i + 1)*self.state_size): , :] =\
self.gamma_mat[:-int((i + 1)*self.state_size) , :]
temp_mat[int((i + 1)*self.state_size):, :] =\
self.gamma_mat[:-int((i + 1)*self.state_size), :]
self.theta_mat = np.hstack((self.theta_mat, temp_mat))
@ -114,7 +116,7 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1):
for j in range(self.input_size):
temp_F[j * 2: (j + 1) * 2,\
temp_F[j * 2: (j + 1) * 2,
((i+1) * self.input_size) + j] = np.array([1., -1.])
self.F = np.vstack((self.F, temp_F))
@ -187,14 +189,14 @@ class LinearMPC(Controller):
# using cvxopt
def optimized_func(dt_us):
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1)))
- np.dot(G.T, dt_us.reshape(-1, 1)))[0]
# constraint
lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons
cons = LinearConstraint(A, lb, ub)
# solve
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),
constraints=[cons])
opt_dt_us = opt_sol.x
@ -213,7 +215,7 @@ class LinearMPC(Controller):
"""
# to dt form
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,
self.input_size),
axis=0)
self.prev_sol = opt_dt_u_seq.copy()
@ -226,8 +228,8 @@ class LinearMPC(Controller):
# check costs
costs = self.calc_cost(curr_x,
opt_u_seq.reshape(1,
self.pred_len,
self.input_size),
self.pred_len,
self.input_size),
g_xs)
logger.debug("Cost = {}".format(costs))

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class MPPI(Controller):
""" Model Predictive Path Integral for linear and nonlinear method
@ -18,6 +19,7 @@ class MPPI(Controller):
Deep Dynamics Models for Learning Dexterous Manipulation.
arXiv preprint arXiv:1909.11652.
"""
def __init__(self, config, model):
super(MPPI, self).__init__(config, model)
@ -47,7 +49,7 @@ class MPPI(Controller):
self.input_cost_fn = config.input_cost_fn
# init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
@ -74,23 +76,23 @@ class MPPI(Controller):
"""
# get noised inputs
noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
noised_inputs = noise.copy()
for t in range(self.pred_len):
if t > 0:
noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \
+ noise[:, t, :]) \
+ (1 - self.beta) \
* noised_inputs[:, t-1, :]
* (self.prev_sol[t, :]
+ noise[:, t, :]) \
+ (1 - self.beta) \
* noised_inputs[:, t-1, :]
else:
noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \
+ noise[:, t, :]) \
+ (1 - self.beta) \
* self.history_u[-1]
* (self.prev_sol[t, :]
+ noise[:, t, :]) \
+ (1 - self.beta) \
* self.history_u[-1]
# clip actions
noised_inputs = np.clip(
@ -108,7 +110,7 @@ class MPPI(Controller):
# weight actions
weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \
* noised_inputs
* noised_inputs
sol = np.sum(weighted_inputs, 0) / denom
# update

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class MPPIWilliams(Controller):
""" Model Predictive Path Integral for linear and nonlinear method
@ -19,6 +20,7 @@ class MPPIWilliams(Controller):
2017 IEEE International Conference on Robotics and Automation (ICRA),
Singapore, 2017, pp. 1714-1721.
"""
def __init__(self, config, model):
super(MPPIWilliams, self).__init__(config, model)
@ -47,7 +49,7 @@ class MPPIWilliams(Controller):
self.input_cost_fn = config.input_cost_fn
# init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
@ -85,7 +87,7 @@ class MPPIWilliams(Controller):
# get particle cost
costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, None, \
self.state_cost_fn, None,
self.terminal_state_cost_fn)
return costs
@ -101,8 +103,8 @@ class MPPIWilliams(Controller):
"""
# get noised inputs
noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
noised_inputs = self.prev_sol + noise
@ -120,7 +122,7 @@ class MPPIWilliams(Controller):
# mppi update
beta = np.min(costs)
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
+ 1e-10
+ 1e-10
# weight
# eta.shape = (pred_len, input_size)
@ -128,7 +130,7 @@ class MPPIWilliams(Controller):
# update inputs
sol = self.prev_sol \
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
# update
self.prev_sol[:-1] = sol[1:]

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__)
class RandomShooting(Controller):
""" Random Shooting Method for linear and nonlinear method
@ -19,6 +20,7 @@ class RandomShooting(Controller):
using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765).
"""
def __init__(self, config, model):
super(RandomShooting, self).__init__(config, model)
@ -65,8 +67,8 @@ class RandomShooting(Controller):
# calc cost
costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size,
self.pred_len,
self.input_size),
self.pred_len,
self.input_size),
g_xs)
# solution
sol = samples[np.argmin(costs)]

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from .env import Env
from ..plotters.plot_objs import square
class CartPoleEnv(Env):
""" Cartpole Environment
@ -13,13 +14,14 @@ class CartPoleEnv(Env):
6-832-underactuated-robotics-spring-2009/readings/
MIT6_832s09_read_ch03.pdf
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 4,
"input_size" : 1,
"dt" : 0.02,
"max_step" : 500,
self.config = {"state_size": 4,
"input_size": 1,
"dt": 0.02,
"max_step": 500,
"input_lower_bound": [-3.],
"input_upper_bound": [3.],
"mp": 0.2,
@ -76,33 +78,33 @@ class CartPoleEnv(Env):
# x
d_x0 = self.curr_x[1]
# v_x
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \
* (self.config["l"] * (self.curr_x[3]**2) \
+ self.config["g"] * np.cos(self.curr_x[2]))) \
/ (self.config["mc"] + self.config["mp"] \
* (np.sin(self.curr_x[2])**2))
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2])
* (self.config["l"] * (self.curr_x[3]**2)
+ self.config["g"] * np.cos(self.curr_x[2]))) \
/ (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2))
# theta
d_x2 = self.curr_x[3]
# v_theta
d_x3 = (-u[0] * np.cos(self.curr_x[2]) \
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \
- (self.config["mc"] + self.config["mp"]) * self.config["g"] \
* np.sin(self.curr_x[2])) \
/ (self.config["l"] * (self.config["mc"] + self.config["mp"] \
* (np.sin(self.curr_x[2])**2)))
d_x3 = (-u[0] * np.cos(self.curr_x[2])
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2)
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2])
- (self.config["mc"] + self.config["mp"]) * self.config["g"]
* np.sin(self.curr_x[2])) \
/ (self.config["l"] * (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2)))
next_x = self.curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
# TODO: costs
costs = 0.
costs += 0.1 * np.sum(u**2)
costs += 6. * self.curr_x[0]**2 \
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+ 0.1 * self.curr_x[1]**2 \
+ 0.1 * self.curr_x[3]**2
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+ 0.1 * self.curr_x[1]**2 \
+ 0.1 * self.curr_x[3]**2
# save history
self.history_x.append(next_x.flatten())
@ -114,8 +116,8 @@ class CartPoleEnv(Env):
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function
@ -134,10 +136,10 @@ class CartPoleEnv(Env):
imgs["cart"] = to_plot.plot([], [], c="k")[0]
imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0]
imgs["center"] = to_plot.plot([], [], marker="o", c="k",\
imgs["center"] = to_plot.plot([], [], marker="o", c="k",
markersize=10)[0]
# centerline
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),
c="k", linestyle="dashed")
# set axis
@ -166,13 +168,13 @@ class CartPoleEnv(Env):
pole_y (numpy.ndarray): y data of pole
"""
# cart
cart_x, cart_y = square(curr_x[0], 0.,\
cart_x, cart_y = square(curr_x[0], 0.,
self.config["cart_size"], 0.)
# pole
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \
* np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"] \
* np.sin(curr_x[2]-np.pi/2)])
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"]
* np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"]
* np.sin(curr_x[2]-np.pi/2)])
return cart_x, cart_y, pole_x, pole_y

View File

@ -4,6 +4,7 @@ import numpy as np
logger = getLogger(__name__)
def calc_cost(pred_xs, input_sample, g_xs,
state_cost_fn, input_cost_fn, terminal_state_cost_fn):
""" calculate the cost
@ -24,14 +25,15 @@ def calc_cost(pred_xs, input_sample, g_xs,
# state cost
state_cost = 0.
if state_cost_fn is not None:
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_pred_par_cost = state_cost_fn(
pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
# terminal cost
terminal_state_cost = 0.
if terminal_state_cost_fn is not None:
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :])
g_xs[:, -1, :])
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
# act cost

View File

@ -1,5 +1,6 @@
import numpy as np
class Env():
""" Environments class
Attributes:
@ -8,6 +9,7 @@ class Env():
history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size)
step_count (int): step count
"""
def __init__(self, config):
"""
"""

View File

@ -3,17 +3,19 @@ import scipy
from scipy import integrate
from .env import Env
class FirstOrderLagEnv(Env):
""" First Order Lag System Env
"""
def __init__(self, tau=0.63):
"""
"""
self.config = {"state_size" : 4,\
"input_size" : 2,\
"dt" : 0.05,\
"max_step" : 500,\
"input_lower_bound": [-0.5, -0.5],\
self.config = {"state_size": 4,
"input_size": 2,
"dt": 0.05,
"max_step": 500,
"input_lower_bound": [-0.5, -0.5],
"input_upper_bound": [0.5, 0.5],
}
@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env):
"""
# continuous
Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.],
[0., 1./tau],
[0., 0.],
[0., 0.]])
[0., 1./tau],
[0., 0.],
[0., 0.]])
# to discrete system
A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) -
@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env):
self.config["input_upper_bound"])
next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis])
+ np.matmul(self.B, u[:, np.newaxis])
# cost
cost = 0
@ -111,8 +113,8 @@ class FirstOrderLagEnv(Env):
self.step_count += 1
return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
"""

View File

@ -2,6 +2,8 @@ from .first_order_lag import FirstOrderLagEnv
from .two_wheeled import TwoWheeledConstEnv
from .two_wheeled import TwoWheeledTrackEnv
from .cartpole import CartPoleEnv
from .nonlinear_sample_system import NonlinearSampleSystemEnv
def make_env(args):
@ -13,5 +15,7 @@ def make_env(args):
return TwoWheeledTrackEnv()
elif args.env == "CartPole":
return CartPoleEnv()
elif args.env == "NonlinearSample":
return NonlinearSampleSystemEnv()
raise NotImplementedError("There is not {} Env".format(args.env))

View File

@ -0,0 +1,97 @@
import numpy as np
import scipy
from scipy import integrate
from .env import Env
from ..common.utils import update_state_with_Runge_Kutta
class NonlinearSampleSystemEnv(Env):
""" Nonlinear Sample Env
"""
def __init__(self):
"""
"""
self.config = {"state_size": 2,
"input_size": 1,
"dt": 0.01,
"max_step": 2000,
"input_lower_bound": [-0.5],
"input_upper_bound": [0.5],
}
super(NonlinearSampleSystemEnv, self).__init__(self.config)
def reset(self, init_x=np.array([2., 0.])):
""" reset state
Returns:
init_x (numpy.ndarray): initial state, shape(state_size, )
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
self.curr_x = init_x
# goal
self.g_x = np.array([0., 0.])
# clear memory
self.history_x = []
self.history_g_x = []
return self.curr_x, {"goal_state": self.g_x}
def step(self, u):
"""
Args:
u (numpy.ndarray) : input, shape(input_size, )
Returns:
next_x (numpy.ndarray): next state, shape(state_size, )
cost (float): costs
done (bool): end the simulation or not
info (dict): information
"""
# clip action
u = np.clip(u,
self.config["input_lower_bound"],
self.config["input_upper_bound"])
functions = [self._func_x_1, self._func_x_2]
next_x = update_state_with_Runge_Kutta(self.curr_x, u,
functions, self.config["dt"],
batch=False)
# cost
cost = 0
cost = np.sum(u**2)
cost += np.sum((self.curr_x - self.g_x)**2)
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def _func_x_1(self, x, u):
x_dot = x[1]
return x_dot
def _func_x_2(self, x, u):
x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
return x_dot
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
"""
"""
raise ValueError("NonlinearSampleSystemEnv does not have animation")

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from .env import Env
from ..plotters.plot_objs import circle_with_angle, square, circle
def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
""" step two wheeled enviroment
@ -28,19 +29,21 @@ def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
return next_x
class TwoWheeledConstEnv(Env):
""" Two wheeled robot with constant goal Env
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 3,\
"input_size" : 2,\
"dt" : 0.01,\
"max_step" : 500,\
"input_lower_bound": (-1.5, -3.14),\
"input_upper_bound": (1.5, 3.14),\
"car_size": 0.2,\
self.config = {"state_size": 3,
"input_size": 2,
"dt": 0.01,
"max_step": 500,
"input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),
"car_size": 0.2,
"wheel_size": (0.075, 0.015)
}
@ -103,8 +106,8 @@ class TwoWheeledConstEnv(Env):
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function
@ -139,7 +142,7 @@ class TwoWheeledConstEnv(Env):
# set imgs
# car imgs
car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y)
@ -160,46 +163,48 @@ class TwoWheeledConstEnv(Env):
self.config["car_size"], curr_x[2])
# left tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
# right tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y
class TwoWheeledTrackEnv(Env):
""" Two wheeled robot with constant goal Env
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 3,\
"input_size" : 2,\
"dt" : 0.01,\
"max_step" : 1000,\
"input_lower_bound": (-1.5, -3.14),\
"input_upper_bound": (1.5, 3.14),\
"car_size": 0.2,\
self.config = {"state_size": 3,
"input_size": 2,
"dt": 0.01,
"max_step": 1000,
"input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),
"car_size": 0.2,
"wheel_size": (0.075, 0.015)
}
@ -220,12 +225,12 @@ class TwoWheeledTrackEnv(Env):
# circle
circle_1_x, circle_1_y = circle(linelength/2., circle_radius,
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
circle_1 = np.stack((circle_1_x , circle_1_y), axis=1)
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
circle_1 = np.stack((circle_1_x, circle_1_y), axis=1)
circle_2_x, circle_2_y = circle(-linelength/2., circle_radius,
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
circle_2 = np.stack((circle_2_x , circle_2_y), axis=1)
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
circle_2 = np.stack((circle_2_x, circle_2_y), axis=1)
road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0)
@ -293,8 +298,8 @@ class TwoWheeledTrackEnv(Env):
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_traj}
self.step_count > self.config["max_step"], \
{"goal_state": self.g_traj}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function
@ -333,7 +338,7 @@ class TwoWheeledTrackEnv(Env):
# set imgs
# car imgs
car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y)
@ -354,29 +359,29 @@ class TwoWheeledTrackEnv(Env):
self.config["car_size"], curr_x[2])
# left tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
# right tire
center_x = (self.config["car_size"] \
center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \
square(center_x, center_y,
self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y
left_tire_x, left_tire_y,\
right_tire_x, right_tire_y

View File

@ -7,6 +7,7 @@ import six
import pickle
from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger
def make_logger(save_dir):
"""
Args:
@ -33,6 +34,7 @@ def make_logger(save_dir):
# sh_handler = StreamHandler()
# logger.addHandler(sh_handler)
def int_tuple(s):
""" transform str to tuple
Args:
@ -42,6 +44,7 @@ def int_tuple(s):
"""
return tuple(int(i) for i in s.split(','))
def bool_flag(s):
""" transform str to bool flg
Args:
@ -54,6 +57,7 @@ def bool_flag(s):
msg = 'Invalid value "%s" for bool flag (should be 0 or 1)'
raise ValueError(msg % s)
def file_exists(path):
""" Check file existence on given path
Args:
@ -63,6 +67,7 @@ def file_exists(path):
"""
return os.path.exists(path)
def create_dir_if_not_exist(outdir):
""" Check directory existence and creates new directory if not exist
Args:
@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir):
return
os.makedirs(outdir)
def write_text_to_file(file_path, data):
""" Write given text data to file
Args:
@ -86,6 +92,7 @@ def write_text_to_file(file_path, data):
with open(file_path, 'w') as f:
f.write(data)
def read_text_from_file(file_path):
""" Read given file as text
Args:
@ -96,6 +103,7 @@ def read_text_from_file(file_path):
with open(file_path, 'r') as f:
return f.read()
def save_pickle(file_path, data):
""" pickle given data to file
Args:
@ -105,6 +113,7 @@ def save_pickle(file_path, data):
with open(file_path, 'wb') as f:
pickle.dump(data, f)
def load_pickle(file_path):
""" load pickled data from file
Args:
@ -118,6 +127,7 @@ def load_pickle(file_path):
else:
return pickle.load(f, encoding='bytes')
def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
""" prepare a directory with current datetime as name.
created directory contains the command and args when the script was called as text file.

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model
class CartPoleModel(Model):
""" cartpole model
"""
def __init__(self, config):
"""
"""
@ -31,21 +33,21 @@ class CartPoleModel(Model):
# x
d_x0 = curr_x[1]
# v_x
d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \
* (self.l * (curr_x[3]**2) \
+ self.g * np.cos(curr_x[2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
d_x1 = (u[0] + self.mp * np.sin(curr_x[2])
* (self.l * (curr_x[3]**2)
+ self.g * np.cos(curr_x[2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
# theta
d_x2 = curr_x[3]
# v_theta
d_x3 = (-u[0] * np.cos(curr_x[2]) \
- self.mp * self.l * (curr_x[3]**2) \
* np.cos(curr_x[2]) * np.sin(curr_x[2]) \
d_x3 = (-u[0] * np.cos(curr_x[2])
- self.mp * self.l * (curr_x[3]**2)
* np.cos(curr_x[2]) * np.sin(curr_x[2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
next_x = curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
return next_x
@ -53,21 +55,21 @@ class CartPoleModel(Model):
# x
d_x0 = curr_x[:, 1]
# v_x
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \
* (self.l * (curr_x[:, 3]**2) \
+ self.g * np.cos(curr_x[:, 2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2])
* (self.l * (curr_x[:, 3]**2)
+ self.g * np.cos(curr_x[:, 2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
# theta
d_x2 = curr_x[:, 3]
# v_theta
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \
- self.mp * self.l * (curr_x[:, 3]**2) \
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2])
- self.mp * self.l * (curr_x[:, 3]**2)
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
next_x = curr_x +\
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
return next_x
@ -95,36 +97,36 @@ class CartPoleModel(Model):
# f_theta
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_x[:, 1, 2] = - us[:, 0] * tmp \
- tmp * (self.mp * np.sin(xs[:, 2]) \
* (self.l * xs[:, 3]**2 \
- tmp * (self.mp * np.sin(xs[:, 2])
* (self.l * xs[:, 3]**2
+ self.g * np.cos(xs[:, 2]))) \
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
* xs[:, 3]**2 \
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \
- np.sin(xs[:, 2])**2))
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l
* xs[:, 3]**2
+ self.mp * self.g * (np.cos(xs[:, 2])**2
- np.sin(xs[:, 2])**2))
f_x[:, 3, 2] = - 1. / self.l * tmp \
* (-us[:, 0] * np.cos(xs[:, 2]) \
- self.mp * self.l * (xs[:, 3]**2) \
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2]) \
- self.mp * self.l * xs[:, 3]**2 \
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
- (self.mc + self.mp) \
* self.g * np.cos(xs[:, 2]))
* (-us[:, 0] * np.cos(xs[:, 2])
- self.mp * self.l * (xs[:, 3]**2)
* np.cos(xs[:, 2]) * np.sin(xs[:, 2])
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2])
- self.mp * self.l * xs[:, 3]**2
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2)
- (self.mc + self.mp)
* self.g * np.cos(xs[:, 2]))
# f_theta_dot
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
* self.l * 2 * xs[:, 3])
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2])
* self.l * 2 * xs[:, 3])
f_x[:, 2, 3] = np.ones(pred_len)
f_x[:, 3, 3] = 1. / self.l * tmp2 \
* (-2. * self.mp * self.l * xs[:, 3] \
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
* (-2. * self.mp * self.l * xs[:, 3]
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
return f_x * dt + np.eye(state_size) # to discrete form
@ -150,8 +152,8 @@ class CartPoleModel(Model):
f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
/ (self.l * (self.mc \
+ self.mp * (np.sin(xs[:, 2])**2)))
/ (self.l * (self.mc
+ self.mp * (np.sin(xs[:, 2])**2)))
return f_u * dt # to discrete form

View File

@ -3,6 +3,7 @@ import scipy.linalg
from scipy import integrate
from .model import LinearModel
class FirstOrderLagModel(LinearModel):
""" first order lag model
Attributes:
@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel):
u (numpy.ndarray):
history_pred_xs (numpy.ndarray):
"""
def __init__(self, config, tau=0.63):
"""
Args:
tau (float): time constant
"""
# param
self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system
self.A, self.B = self._to_state_space(
tau, dt=config.DT) # discrete system
super(FirstOrderLagModel, self).__init__(self.A, self.B)
@staticmethod
@ -31,20 +34,21 @@ class FirstOrderLagModel(LinearModel):
"""
# continuous
Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
[0., -1./tau, 0., 0.],
[1., 0., 0., 0.],
[0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.],
[0., 1./tau],
[0., 0.],
[0., 0.]])
[0., 1./tau],
[0., 0.],
[0., 0.]])
# to discrete system
A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc)
B = np.zeros_like(Bc)
for m in range(Bc.shape[0]):
for n in range(Bc.shape[1]):
integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n]
def integrate_fn(tau): return np.matmul(
scipy.linalg.expm(Ac*tau), Bc)[m, n]
sol = integrate.quad(integrate_fn, 0, dt)
B[m, n] = sol[0]

View File

@ -1,6 +1,8 @@
from .first_order_lag import FirstOrderLagModel
from .two_wheeled import TwoWheeledModel
from .cartpole import CartPoleModel
from .nonlinear_sample_system import NonlinearSampleSystemModel
def make_model(args, config):
@ -10,5 +12,7 @@ def make_model(args, config):
return TwoWheeledModel(config)
elif args.env == "CartPole":
return CartPoleModel(config)
elif args.env == "NonlinearSample":
return NonlinearSampleSystemModel(config)
raise NotImplementedError("There is not {} Model".format(args.env))

View File

@ -1,8 +1,10 @@
import numpy as np
class Model():
""" base class of model
"""
def __init__(self):
"""
"""
@ -22,7 +24,7 @@ class Model():
or shape(pop_size, pred_len+1, state_size)
"""
if len(us.shape) == 3:
pred_xs =self._predict_traj_alltogether(curr_x, us)
pred_xs = self._predict_traj_alltogether(curr_x, us)
elif len(us.shape) == 2:
pred_xs = self._predict_traj(curr_x, us)
else:
@ -75,8 +77,8 @@ class Model():
# next_x.shape = (pop_size, state_size)
next_x = self.predict_next_state(x, us[t])
# update
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\
axis=0)
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),
axis=0)
x = next_x
return np.transpose(pred_xs, (1, 0, 2))
@ -99,13 +101,13 @@ class Model():
# get size
(pred_len, input_size) = us.shape
# pred final adjoint state
lam = self.predict_terminal_adjoint_state(xs[-1],\
lam = self.predict_terminal_adjoint_state(xs[-1],
terminal_g_x=g_xs[-1])
lams = lam[np.newaxis, :]
for t in range(pred_len-1, 0, -1):
prev_lam = \
self.predict_adjoint_state(lam, xs[t], us[t],\
self.predict_adjoint_state(lam, xs[t], us[t],
goal=g_xs[t], t=t)
# update
lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
@ -175,6 +177,7 @@ class Model():
raise NotImplementedError("Implement hessian of model \
with respect to the input")
class LinearModel(Model):
""" discrete linear model, x[k+1] = Ax[k] + Bu[k]
@ -182,6 +185,7 @@ class LinearModel(Model):
A (numpy.ndarray): shape(state_size, state_size)
B (numpy.ndarray): shape(state_size, input_size)
"""
def __init__(self, A, B):
"""
"""
@ -203,7 +207,7 @@ class LinearModel(Model):
"""
if len(u.shape) == 1:
next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis])
+ np.matmul(self.B, u[:, np.newaxis])
return next_x.flatten()

View File

@ -0,0 +1,164 @@
import numpy as np
from .model import Model
from ..common.utils import update_state_with_Runge_Kutta
class NonlinearSampleSystemModel(Model):
""" nonlinear sample system model
"""
def __init__(self, config):
"""
"""
super(NonlinearSampleSystemModel, self).__init__()
self.dt = config.DT
def predict_next_state(self, curr_x, u):
""" predict next state
Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size)
u (numpy.ndarray): input, shape(input_size, ) or
shape(pop_size, input_size)
Returns:
next_x (numpy.ndarray): next state, shape(state_size, ) or
shape(pop_size, state_size)
"""
if len(u.shape) == 1:
func_1 = self._func_x_1
func_2 = self._func_x_2
functions = [func_1, func_2]
next_x = update_state_with_Runge_Kutta(
curr_x, u, functions, batch=False)
return next_x
elif len(u.shape) == 2:
def func_1(xs, us): return self._func_x_1(xs, us, batch=True)
def func_2(xs, us): return self._func_x_2(xs, us, batch=True)
functions = [func_1, func_2]
next_x = update_state_with_Runge_Kutta(
curr_x, u, functions, batch=True)
return next_x
def _func_x_1(self, x, u, batch=False):
if not batch:
x_dot = x[1]
else:
x_dot = x[:, 1]
return x_dot
def _func_x_2(self, x, u, batch=False):
if not batch:
x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
else:
x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \
x[:, 1] - x[:, 0] + u[:, 0]
return x_dot
def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size)
Notes:
This should be discrete form !!
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
f_x = np.zeros((pred_len, state_size, state_size))
f_x[:, 0, 1] = 1.
f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1.
f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \
(1. - xs[:, 0]**2 - xs[:, 1]**2)
return f_x * dt + np.eye(state_size) # to discrete form
def calc_f_u(self, xs, us, dt):
""" gradient of model with respect to the input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size)
Notes:
This should be discrete form !!
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_u = np.zeros((pred_len, state_size, input_size))
f_u[:, 1, 0] = 1.
return f_u * dt # to discrete form
def calc_f_xx(self, xs, us, dt):
""" hessian of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
raise NotImplementedError
def calc_f_ux(self, xs, us, dt):
""" hessian of model with respect to state and input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
raise NotImplementedError
def calc_f_uu(self, xs, us, dt):
""" hessian of model with respect to input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
raise NotImplementedError

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model
class TwoWheeledModel(Model):
""" two wheeled model
"""
def __init__(self, config):
"""
"""

View File

@ -1,9 +1,11 @@
import numpy as np
from .planner import Planner
class ClosestPointPlanner(Planner):
""" This planner make goal state according to goal path
"""
def __init__(self, config):
"""
"""

View File

@ -1,9 +1,11 @@
import numpy as np
from .planner import Planner
class ConstantPlanner(Planner):
""" This planner make constant goal state
"""
def __init__(self, config):
"""
"""

View File

@ -1,6 +1,7 @@
from .const_planner import ConstantPlanner
from .closest_point_planner import ClosestPointPlanner
def make_planner(args, config):
if args.env == "FirstOrderLag":
@ -11,5 +12,8 @@ def make_planner(args, config):
return ClosestPointPlanner(config)
elif args.env == "CartPole":
return ConstantPlanner(config)
elif args.env == "NonlinearSample":
return ConstantPlanner(config)
raise NotImplementedError("There is not {} Planner".format(args.planner_type))
raise NotImplementedError(
"There is not {} Planner".format(args.planner_type))

View File

@ -1,8 +1,10 @@
import numpy as np
class Planner():
"""
"""
def __init__(self):
"""
"""

View File

@ -8,9 +8,11 @@ import matplotlib.animation as animation
logger = getLogger(__name__)
class Animator():
""" animation class
"""
def __init__(self, env, args=None):
"""
"""
@ -65,7 +67,7 @@ class Animator():
"""
# set up animation figures
self._setup()
_update_img = lambda i: self._update_img(i, history_x, history_g_x)
def _update_img(i): return self._update_img(i, history_x, history_g_x)
# Set up formatting for the movie files
Writer = animation.writers['ffmpeg']

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from ..helper import save_pickle, load_pickle
def plot_result(history, history_g=None, ylabel="x",
save_dir="./result", name="state_history"):
"""
@ -28,7 +29,7 @@ def plot_result(history, history_g=None, ylabel="x",
def plot(axis, history, history_g=None):
axis.plot(range(iters), history, c="r", linewidth=3)
if history_g is not None:
axis.plot(range(iters), history_g,\
axis.plot(range(iters), history_g,
c="b", linewidth=3, label="goal")
if i < size:
@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x",
axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
def plot_results(history_x, history_u, history_g=None, args=None):
"""
@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None):
controller_type = args.controller_type
plot_result(history_x, history_g=history_g, ylabel="x",
name= env + "-state_history",
name=env + "-state_history",
save_dir="./result/" + controller_type)
plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
name= env + "-input_history",
name=env + "-input_history",
save_dir="./result/" + controller_type)
def save_plot_data(history_x, history_u, history_g=None, args=None):
""" save plot data
@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None):
env + "-history_g.pkl")
save_pickle(path, history_g)
def load_plot_data(env, controller_type, result_dir="./result"):
"""
Args:
@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"):
return history_x, history_u, history_g
def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
save_dir="./result", name="state_history"):
"""
@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
axis.plot(range(iters), history,
linewidth=3, label=label, alpha=0.7, linestyle="dashed")
if history_g is not None:
axis.plot(range(iters), history_g,\
axis.plot(range(iters), history_g,
c="b", linewidth=3)
if i < size:
for j, (history, history_g) \
in enumerate(zip(histories, histories_g)):
in enumerate(zip(histories, histories_g)):
plot(axis1, history[:, i],
history_g=history_g[:, i], label=labels[j])
if i+1 < size:
for j, (history, history_g) in \
enumerate(zip(histories, histories_g)):
enumerate(zip(histories, histories_g)):
plot(axis2, history[:, i+1],
history_g=history_g[:, i+1], label=labels[j])

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from ..common.utils import rotate_pos
def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
""" Create circle matrix
@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
return np.array(circle_xs), np.array(circle_ys)
def circle_with_angle(center_x, center_y, radius, angle):
""" Create circle matrix with angle line matrix
@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle):
return circle_x, circle_y, angle_x, angle_y
def square(center_x, center_y, shape, angle):
""" Create square
@ -77,6 +80,7 @@ def square(center_x, center_y, shape, angle):
return trans_points[:, 0], trans_points[:, 1]
def square_with_angle(center_x, center_y, shape, angle):
""" Create square with angle line

View File

@ -1,4 +1,5 @@
from .runner import ExpRunner
def make_runner(args):
return ExpRunner()

View File

@ -4,9 +4,11 @@ import numpy as np
logger = getLogger(__name__)
class ExpRunner():
""" experiment runner
"""
def __init__(self):
"""
"""
@ -46,6 +48,6 @@ class ExpRunner():
score += cost
step_count += 1
logger.debug("Controller type = {}, Score = {}"\
logger.debug("Controller type = {}, Score = {}"
.format(controller, score))
return np.array(history_x), np.array(history_u), np.array(history_g)

View File

@ -71,6 +71,7 @@ There are 4 example environments, "FirstOrderLag", "TwoWheeledConst", "TwoWheele
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
| Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 |
| Cartpole (Swing up) | x | ✓ | 4 | 1 |
| Nonlinear Sample System Env | x | ✓ | 2 | 1 |
All states and inputs of environments are continuous.
**It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -6,7 +6,8 @@ import numpy as np
import matplotlib.pyplot as plt
from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \
plot_multi_result
plot_multi_result
def run(args):
@ -39,7 +40,8 @@ def run(args):
ylabel="x")
plot_multi_result(history_us, histories_g=np.zeros_like(history_us),
labels=controllers, ylabel="u", name="input_history")
labels=controllers, ylabel="u", name="input_history")
def main():
parser = argparse.ArgumentParser()
@ -51,5 +53,6 @@ def main():
run(args)
if __name__ == "__main__":
main()

View File

@ -8,9 +8,10 @@ from PythonLinearNonlinearControl.models.make_models import make_model
from PythonLinearNonlinearControl.envs.make_envs import make_env
from PythonLinearNonlinearControl.runners.make_runners import make_runner
from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \
save_plot_data
save_plot_data
from PythonLinearNonlinearControl.plotters.animator import Animator
def run(args):
# logger
make_logger(args.result_dir)
@ -44,17 +45,19 @@ def run(args):
animator = Animator(env, args=args)
animator.draw(history_x, history_g)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--controller_type", type=str, default="CEM")
parser.add_argument("--env", type=str, default="TwoWheeledTrack")
parser.add_argument("--save_anim", type=bool_flag, default=1)
parser.add_argument("--controller_type", type=str, default="DDP")
parser.add_argument("--env", type=str, default="NonlinearSample")
parser.add_argument("--save_anim", type=bool_flag, default=0)
parser.add_argument("--result_dir", type=str, default="./result")
args = parser.parse_args()
run(args)
if __name__ == "__main__":
main()