Merge pull request #11 from Shunichi09/develop

Add nonlinear sample Env
This commit is contained in:
Shunichi09 2021-02-13 21:21:30 +09:00 committed by GitHub
commit 0d443f7ed5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
46 changed files with 1226 additions and 560 deletions

View File

@ -6,6 +6,8 @@
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | | Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 |
| Cartpole (Swing up) | x | ✓ | 4 | 1 | | Cartpole (Swing up) | x | ✓ | 4 | 1 |
| Nonlinear Sample System Env | x | ✓ | 2 | 1 |
## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py) ## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py)
@ -54,3 +56,13 @@ mc = 1, mp = 0.2, l = 0.5, g = 9.81
### Cost. ### Cost.
<img src="assets/cartpole_score.png" width="300"> <img src="assets/cartpole_score.png" width="300">
## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py)
## System equation.
<img src="assets/nonlinear_sample_system.png" width="400">
### Cost.
<img src="assets/nonlinear_sample_system_score.png" width="400">

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
def rotate_pos(pos, angle): def rotate_pos(pos, angle):
""" Transformation the coordinate in the angle """ Transformation the coordinate in the angle
@ -14,6 +15,7 @@ def rotate_pos(pos, angle):
return np.dot(pos, rot_mat.T) return np.dot(pos, rot_mat.T)
def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi): def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
""" Check angle range and correct the range """ Check angle range and correct the range
@ -42,3 +44,75 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
output = np.minimum(max_angle, np.maximum(min_angle, output)) output = np.minimum(max_angle, np.maximum(min_angle, output))
return output.reshape(output_shape) return output.reshape(output_shape)
def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
""" update state in Runge Kutta methods
Args:
state (array-like): state of system
u (array-like): input of system
functions (list): update function of each state,
each function will be called like func(state, u)
We expect that this function returns differential of each state
dt (float): float in seconds
batch (bool): state and u is given by batch or not
Returns:
next_state (np.array): next state of system
Notes:
sample of function is as follows:
def func_x(self, x_1, x_2, u):
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
return x_dot
Note that the function return x_dot.
"""
if not batch:
state_size = len(state)
assert state_size == len(functions), \
"Invalid functions length, You need to give the state size functions"
k0 = np.zeros(state_size)
k1 = np.zeros(state_size)
k2 = np.zeros(state_size)
k3 = np.zeros(state_size)
for i, func in enumerate(functions):
k0[i] = dt * func(state, u)
for i, func in enumerate(functions):
k1[i] = dt * func(state + k0 / 2., u)
for i, func in enumerate(functions):
k2[i] = dt * func(state + k1 / 2., u)
for i, func in enumerate(functions):
k3[i] = dt * func(state + k2, u)
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
else:
batch_size, state_size = state.shape
assert state_size == len(functions), \
"Invalid functions length, You need to give the state size functions"
k0 = np.zeros((batch_size, state_size))
k1 = np.zeros((batch_size, state_size))
k2 = np.zeros((batch_size, state_size))
k3 = np.zeros((batch_size, state_size))
for i, func in enumerate(functions):
k0[:, i] = dt * func(state, u)
for i, func in enumerate(functions):
k1[:, i] = dt * func(state + k0 / 2., u)
for i, func in enumerate(functions):
k2[:, i] = dt * func(state + k1 / 2., u)
for i, func in enumerate(functions):
k3[:, i] = dt * func(state + k2, u)
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
class CartPoleConfigModule(): class CartPoleConfigModule():
# parameters # parameters
ENV_NAME = "CartPole-v0" ENV_NAME = "CartPole-v0"
@ -12,7 +13,7 @@ class CartPoleConfigModule():
DT = 0.02 DT = 0.02
# cost parameters # cost parameters
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
# 1. is worked for iLQR # 1. is worked for iLQR
TERMINAL_WEIGHT = 1. TERMINAL_WEIGHT = 1.
Q = None Q = None
Sf = None Sf = None
@ -39,40 +40,40 @@ class CartPoleConfigModule():
"num_elites": 50, "num_elites": 50,
"max_iters": 15, "max_iters": 15,
"alpha": 0.3, "alpha": 0.3,
"init_var":9., "init_var": 9.,
"threshold":0.001 "threshold": 0.001
}, },
"MPPI":{ "MPPI": {
"beta" : 0.6, "beta": 0.6,
"popsize": 5000, "popsize": 5000,
"kappa": 0.9, "kappa": 0.9,
"noise_sigma": 0.5, "noise_sigma": 0.5,
}, },
"MPPIWilliams":{ "MPPIWilliams": {
"popsize": 5000, "popsize": 5000,
"lambda": 1., "lambda": 1.,
"noise_sigma": 0.9, "noise_sigma": 0.9,
}, },
"iLQR":{ "iLQR": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"DDP":{ "DDP": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC-CGMRES":{ "NMPC-CGMRES": {
}, },
"NMPC-Newton":{ "NMPC-Newton": {
}, },
} }
@staticmethod @staticmethod
@ -103,21 +104,21 @@ class CartPoleConfigModule():
""" """
if len(x.shape) > 2: if len(x.shape) > 2:
return (6. * (x[:, :, 0]**2) \ return (6. * (x[:, :, 0]**2)
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \ + 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
+ 0.1 * (x[:, :, 1]**2) \ + 0.1 * (x[:, :, 1]**2)
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
elif len(x.shape) > 1: elif len(x.shape) > 1:
return (6. * (x[:, 0]**2) \ return (6. * (x[:, 0]**2)
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \ + 12. * ((np.cos(x[:, 2]) + 1.)**2)
+ 0.1 * (x[:, 1]**2) \ + 0.1 * (x[:, 1]**2)
+ 0.1 * (x[:, 3]**2))[:, np.newaxis] + 0.1 * (x[:, 3]**2))[:, np.newaxis]
return 6. * (x[0]**2) \ return 6. * (x[0]**2) \
+ 12. * ((np.cos(x[2]) + 1.)**2) \ + 12. * ((np.cos(x[2]) + 1.)**2) \
+ 0.1 * (x[1]**2) \ + 0.1 * (x[1]**2) \
+ 0.1 * (x[3]**2) + 0.1 * (x[3]**2)
@staticmethod @staticmethod
def terminal_state_cost_fn(terminal_x, terminal_g_x): def terminal_state_cost_fn(terminal_x, terminal_g_x):
@ -134,18 +135,18 @@ class CartPoleConfigModule():
""" """
if len(terminal_x.shape) > 1: if len(terminal_x.shape) > 1:
return (6. * (terminal_x[:, 0]**2) \ return (6. * (terminal_x[:, 0]**2)
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
+ 0.1 * (terminal_x[:, 1]**2) \ + 0.1 * (terminal_x[:, 1]**2)
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
* CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2) \
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+ 0.1 * (terminal_x[1]**2) \
+ 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.TERMINAL_WEIGHT * CartPoleConfigModule.TERMINAL_WEIGHT
return (6. * (terminal_x[0]**2)
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
+ 0.1 * (terminal_x[1]**2)
+ 0.1 * (terminal_x[3]**2)) \
* CartPoleConfigModule.TERMINAL_WEIGHT
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state """ gradient of costs with respect to the state
@ -163,7 +164,7 @@ class CartPoleConfigModule():
cost_dx1 = 0.2 * x[:, 1] cost_dx1 = 0.2 * x[:, 1]
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2]) cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
cost_dx3 = 0.2 * x[:, 3] cost_dx3 = 0.2 * x[:, 3]
cost_dx = np.stack((cost_dx0, cost_dx1,\ cost_dx = np.stack((cost_dx0, cost_dx1,
cost_dx2, cost_dx3), axis=1) cost_dx2, cost_dx3), axis=1)
return cost_dx return cost_dx
@ -206,9 +207,9 @@ class CartPoleConfigModule():
hessian[:, 0, 0] = 12. hessian[:, 0, 0] = 12.
hessian[:, 1, 1] = 0.2 hessian[:, 1, 1] = 0.2
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \ hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
* (-np.sin(x[:, 2])) \ * (-np.sin(x[:, 2])) \
+ 24. * (1. + np.cos(x[:, 2])) \ + 24. * (1. + np.cos(x[:, 2])) \
* -np.cos(x[:, 2]) * -np.cos(x[:, 2])
hessian[:, 3, 3] = 0.2 hessian[:, 3, 3] = 0.2
return hessian return hessian
@ -218,9 +219,9 @@ class CartPoleConfigModule():
hessian[0, 0] = 12. hessian[0, 0] = 12.
hessian[1, 1] = 0.2 hessian[1, 1] = 0.2
hessian[2, 2] = 24. * -np.sin(x[2]) \ hessian[2, 2] = 24. * -np.sin(x[2]) \
* (-np.sin(x[2])) \ * (-np.sin(x[2])) \
+ 24. * (1. + np.cos(x[2])) \ + 24. * (1. + np.cos(x[2])) \
* -np.cos(x[2]) * -np.cos(x[2])
hessian[3, 3] = 0.2 hessian[3, 3] = 0.2
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
class FirstOrderLagConfigModule(): class FirstOrderLagConfigModule():
# parameters # parameters
ENV_NAME = "FirstOrderLag-v0" ENV_NAME = "FirstOrderLag-v0"
@ -34,42 +35,42 @@ class FirstOrderLagConfigModule():
"num_elites": 50, "num_elites": 50,
"max_iters": 15, "max_iters": 15,
"alpha": 0.3, "alpha": 0.3,
"init_var":1., "init_var": 1.,
"threshold":0.001 "threshold": 0.001
}, },
"MPPI":{ "MPPI": {
"beta" : 0.6, "beta": 0.6,
"popsize": 5000, "popsize": 5000,
"kappa": 0.9, "kappa": 0.9,
"noise_sigma": 0.5, "noise_sigma": 0.5,
}, },
"MPPIWilliams":{ "MPPIWilliams": {
"popsize": 5000, "popsize": 5000,
"lambda": 1., "lambda": 1.,
"noise_sigma": 0.9, "noise_sigma": 0.9,
}, },
"MPC":{ "MPC": {
}, },
"iLQR":{ "iLQR": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"DDP":{ "DDP": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC-CGMRES":{ "NMPC-CGMRES": {
}, },
"NMPC-Newton":{ "NMPC-Newton": {
}, },
} }
@staticmethod @staticmethod
@ -111,7 +112,7 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len) shape(pop_size, pred_len)
""" """
return ((terminal_x - terminal_g_x)**2) \ return ((terminal_x - terminal_g_x)**2) \
* np.diag(FirstOrderLagConfigModule.Sf) * np.diag(FirstOrderLagConfigModule.Sf)
@staticmethod @staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False): def gradient_cost_fn_with_state(x, g_x, terminal=False):
@ -128,8 +129,8 @@ class FirstOrderLagConfigModule():
if not terminal: if not terminal:
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q) return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
return (2. * (x - g_x) \ return (2. * (x - g_x)
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :] * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_with_input(x, u):

View File

@ -1,6 +1,8 @@
from .first_order_lag import FirstOrderLagConfigModule from .first_order_lag import FirstOrderLagConfigModule
from .two_wheeled import TwoWheeledConfigModule from .two_wheeled import TwoWheeledConfigModule
from .cartpole import CartPoleConfigModule from .cartpole import CartPoleConfigModule
from .nonlinear_sample_system import NonlinearSampleSystemConfigModule
def make_config(args): def make_config(args):
""" """
@ -13,3 +15,5 @@ def make_config(args):
return TwoWheeledConfigModule() return TwoWheeledConfigModule()
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleConfigModule() return CartPoleConfigModule()
elif args.env == "NonlinearSample":
return NonlinearSampleSystemConfigModule()

View File

@ -0,0 +1,219 @@
import numpy as np
class NonlinearSampleSystemConfigModule():
# parameters
ENV_NAME = "NonlinearSampleSystem-v0"
PLANNER_TYPE = "Const"
TYPE = "Nonlinear"
TASK_HORIZON = 2500
PRED_LEN = 10
STATE_SIZE = 2
INPUT_SIZE = 1
DT = 0.01
R = np.diag([0.01])
Q = None
Sf = None
# bounds
INPUT_LOWER_BOUND = np.array([-0.5])
INPUT_UPPER_BOUND = np.array([0.5])
def __init__(self):
"""
"""
# opt configs
self.opt_config = {
"Random": {
"popsize": 5000
},
"CEM": {
"popsize": 500,
"num_elites": 50,
"max_iters": 15,
"alpha": 0.3,
"init_var": 9.,
"threshold": 0.001
},
"MPPI": {
"beta": 0.6,
"popsize": 5000,
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams": {
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"iLQR": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP": {
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES": {
},
"NMPC-Newton": {
},
}
@staticmethod
def input_cost_fn(u):
""" input cost functions
Args:
u (numpy.ndarray): input, shape(pred_len, input_size)
or shape(pop_size, pred_len, input_size)
Returns:
cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
shape(pop_size, pred_len, input_size)
"""
return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R)
@staticmethod
def state_cost_fn(x, g_x):
""" state cost function
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
or shape(pop_size, pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
or shape(pop_size, pred_len, state_size)
Returns:
cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
shape(pop_size, pred_len, 1)
"""
if len(x.shape) > 2:
return (0.5 * (x[:, :, 0]**2) +
0.5 * (x[:, :, 1]**2))[:, :, np.newaxis]
elif len(x.shape) > 1:
return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis]
return 0.5 * (x[0]**2) + 0.5 * (x[1]**2)
@ staticmethod
def terminal_state_cost_fn(terminal_x, terminal_g_x):
"""
Args:
terminal_x (numpy.ndarray): terminal state,
shape(state_size, ) or shape(pop_size, state_size)
terminal_g_x (numpy.ndarray): terminal goal state,
shape(state_size, ) or shape(pop_size, state_size)
Returns:
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
shape(pop_size, pred_len)
"""
if len(terminal_x.shape) > 1:
return (0.5 * (terminal_x[:, 0]**2) +
0.5 * (terminal_x[:, 1]**2))[:, np.newaxis]
return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2)
@ staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size)
"""
if not terminal:
cost_dx0 = x[:, 0]
cost_dx1 = x[:, 1]
cost_dx = np.stack((cost_dx0, cost_dx1), axis=1)
return cost_dx
cost_dx0 = x[0]
cost_dx1 = x[1]
cost_dx = np.array([[cost_dx0, cost_dx1]])
return cost_dx
@ staticmethod
def gradient_cost_fn_with_input(x, u):
""" gradient of costs with respect to the input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
"""
return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R)
@ staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False):
""" hessian costs with respect to the state
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or
shape(1, state_size, state_size) or
"""
if not terminal:
(pred_len, state_size) = x.shape
hessian = np.eye(state_size)
hessian = np.tile(hessian, (pred_len, 1, 1))
hessian[:, 0, 0] = 1.
hessian[:, 1, 1] = 1.
return hessian
state_size = len(x)
hessian = np.eye(state_size)
hessian[0, 0] = 1.
hessian[1, 1] = 1.
return hessian[np.newaxis, :, :]
@ staticmethod
def hessian_cost_fn_with_input(x, u):
""" hessian costs with respect to the input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size)
"""
(pred_len, _) = u.shape
return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1))
@ staticmethod
def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size)
"""
(_, state_size) = x.shape
(pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size))

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from ..plotters.plot_objs import square_with_angle, square from ..plotters.plot_objs import square_with_angle, square
from ..common.utils import fit_angle_in_range from ..common.utils import fit_angle_in_range
class TwoWheeledConfigModule(): class TwoWheeledConfigModule():
# parameters # parameters
ENV_NAME = "TwoWheeled-v0" ENV_NAME = "TwoWheeled-v0"
@ -46,40 +47,40 @@ class TwoWheeledConfigModule():
"num_elites": 50, "num_elites": 50,
"max_iters": 15, "max_iters": 15,
"alpha": 0.3, "alpha": 0.3,
"init_var":1., "init_var": 1.,
"threshold":0.001 "threshold": 0.001
}, },
"MPPI":{ "MPPI": {
"beta" : 0.6, "beta": 0.6,
"popsize": 5000, "popsize": 5000,
"kappa": 0.9, "kappa": 0.9,
"noise_sigma": 0.5, "noise_sigma": 0.5,
}, },
"MPPIWilliams":{ "MPPIWilliams": {
"popsize": 5000, "popsize": 5000,
"lambda": 1, "lambda": 1,
"noise_sigma": 1., "noise_sigma": 1.,
}, },
"iLQR":{ "iLQR": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"DDP":{ "DDP": {
"max_iter": 500, "max_iter": 500,
"init_mu": 1., "init_mu": 1.,
"mu_min": 1e-6, "mu_min": 1e-6,
"mu_max": 1e10, "mu_max": 1e10,
"init_delta": 2., "init_delta": 2.,
"threshold": 1e-6, "threshold": 1e-6,
}, },
"NMPC-CGMRES":{ "NMPC-CGMRES": {
}, },
"NMPC-Newton":{ "NMPC-Newton": {
}, },
} }
@staticmethod @staticmethod
@ -142,8 +143,8 @@ class TwoWheeledConfigModule():
cost (numpy.ndarray): cost of state, shape(pred_len, ) or cost (numpy.ndarray): cost of state, shape(pred_len, ) or
shape(pop_size, pred_len) shape(pop_size, pred_len)
""" """
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \ terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x
- terminal_g_x) - terminal_g_x)
return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf) return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf)
@ -164,8 +165,8 @@ class TwoWheeledConfigModule():
if not terminal: if not terminal:
return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q) return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q)
return (2. * (diff) \ return (2. * (diff)
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :] * np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
@staticmethod @staticmethod
def gradient_cost_fn_with_input(x, u): def gradient_cost_fn_with_input(x, u):

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class CEM(Controller): class CEM(Controller):
""" Cross Entropy Method for linear and nonlinear method """ Cross Entropy Method for linear and nonlinear method
@ -19,6 +20,7 @@ class CEM(Controller):
using probabilistic dynamics models. using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765). In Advances in Neural Information Processing Systems (pp. 4754-4765).
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(CEM, self).__init__(config, model) super(CEM, self).__init__(config, model)
@ -50,13 +52,13 @@ class CEM(Controller):
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
# init mean # init mean
self.init_mean = np.tile((config.INPUT_UPPER_BOUND \ self.init_mean = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2., + config.INPUT_LOWER_BOUND) / 2.,
self.pred_len) self.pred_len)
self.prev_sol = self.init_mean.copy() self.prev_sol = self.init_mean.copy()
# init variance # init variance
var = np.ones_like(config.INPUT_UPPER_BOUND) \ var = np.ones_like(config.INPUT_UPPER_BOUND) \
* config.opt_config["CEM"]["init_var"] * config.opt_config["CEM"]["init_var"]
self.init_var = np.tile(var, self.pred_len) self.init_var = np.tile(var, self.pred_len)
# save # save
@ -86,7 +88,7 @@ class CEM(Controller):
# make distribution # make distribution
X = stats.truncnorm(-1, 1, X = stats.truncnorm(-1, 1,
loc=np.zeros_like(mean),\ loc=np.zeros_like(mean),
scale=np.ones_like(mean)) scale=np.ones_like(mean))
while (opt_count < self.max_iters) and np.max(var) > self.epsilon: while (opt_count < self.max_iters) and np.max(var) > self.epsilon:
@ -99,15 +101,15 @@ class CEM(Controller):
# sample # sample
samples = X.rvs(size=[self.pop_size, self.opt_dim]) \ samples = X.rvs(size=[self.pop_size, self.opt_dim]) \
* np.sqrt(constrained_var) \ * np.sqrt(constrained_var) \
+ mean + mean
# calc cost # calc cost
# samples.shape = (pop_size, opt_dim) # samples.shape = (pop_size, opt_dim)
costs = self.calc_cost(curr_x, costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size, samples.reshape(self.pop_size,
self.pred_len, self.pred_len,
self.input_size), self.input_size),
g_xs) g_xs)
# sort cost # sort cost

View File

@ -2,9 +2,11 @@ import numpy as np
from ..envs.cost import calc_cost from ..envs.cost import calc_cost
class Controller(): class Controller():
""" Controller class """ Controller class
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
""" """
@ -49,7 +51,7 @@ class Controller():
# get particle cost # get particle cost
costs = calc_cost(pred_xs, samples, g_xs, costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, self.input_cost_fn, \ self.state_cost_fn, self.input_cost_fn,
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
return costs return costs
@ -58,10 +60,12 @@ class Controller():
def gradient_hamiltonian_x(x, u, lam): def gradient_hamiltonian_x(x, u, lam):
""" gradient of hamitonian with respect to the state, """ gradient of hamitonian with respect to the state,
""" """
raise NotImplementedError("Implement gradient of hamitonian with respect to the state") raise NotImplementedError(
"Implement gradient of hamitonian with respect to the state")
@staticmethod @staticmethod
def gradient_hamiltonian_u(x, u, lam): def gradient_hamiltonian_u(x, u, lam):
""" gradient of hamitonian with respect to the input """ gradient of hamitonian with respect to the input
""" """
raise NotImplementedError("Implement gradient of hamitonian with respect to the input") raise NotImplementedError(
"Implement gradient of hamitonian with respect to the input")

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class DDP(Controller): class DDP(Controller):
""" Differential Dynamic Programming """ Differential Dynamic Programming
@ -18,6 +19,7 @@ class DDP(Controller):
https://github.com/studywolf/control, and https://github.com/studywolf/control, and
https://github.com/anassinator/ilqr https://github.com/anassinator/ilqr
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
""" """
@ -92,13 +94,13 @@ class DDP(Controller):
# forward # forward
if update_sol == True: if update_sol == True:
pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\ pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\
l_x, l_xx, l_u, l_uu, l_ux = \ l_x, l_xx, l_u, l_uu, l_ux = \
self.forward(curr_x, g_xs, sol) self.forward(curr_x, g_xs, sol)
update_sol = False update_sol = False
try: try:
# backward # backward
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \ k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu,
l_x, l_xx, l_u, l_uu, l_ux) l_x, l_xx, l_u, l_uu, l_ux)
# line search # line search
@ -139,7 +141,7 @@ class DDP(Controller):
# increase regularization term. # increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta) self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\ logger.debug("Update regularization term to {}"
.format(self.mu)) .format(self.mu))
if self.mu >= self.mu_max: if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term") logger.debug("Reach Max regularization term")
@ -183,8 +185,8 @@ class DDP(Controller):
for t in range(pred_len): for t in range(pred_len):
new_sol[t] = sol[t] \ new_sol[t] = sol[t] \
+ alpha * k[t] \ + alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t]) new_sol[t])

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class iLQR(Controller): class iLQR(Controller):
""" iterative Liner Quadratique Regulator """ iterative Liner Quadratique Regulator
@ -16,6 +17,7 @@ class iLQR(Controller):
Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf, Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf,
https://github.com/studywolf/control https://github.com/studywolf/control
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
""" """
@ -130,7 +132,7 @@ class iLQR(Controller):
# increase regularization term. # increase regularization term.
self.delta = max(1.0, self.delta) * self.init_delta self.delta = max(1.0, self.delta) * self.init_delta
self.mu = max(self.mu_min, self.mu * self.delta) self.mu = max(self.mu_min, self.mu * self.delta)
logger.debug("Update regularization term to {}"\ logger.debug("Update regularization term to {}"
.format(self.mu)) .format(self.mu))
if self.mu >= self.mu_max: if self.mu >= self.mu_max:
logger.debug("Reach Max regularization term") logger.debug("Reach Max regularization term")
@ -174,8 +176,8 @@ class iLQR(Controller):
for t in range(pred_len): for t in range(pred_len):
new_sol[t] = sol[t] \ new_sol[t] = sol[t] \
+ alpha * k[t] \ + alpha * k[t] \
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t])) + np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t], new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
new_sol[t]) new_sol[t])

View File

@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams
from .ilqr import iLQR from .ilqr import iLQR
from .ddp import DDP from .ddp import DDP
def make_controller(args, config, model): def make_controller(args, config, model):
if args.controller_type == "MPC": if args.controller_type == "MPC":

View File

@ -9,6 +9,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class LinearMPC(Controller): class LinearMPC(Controller):
""" Model Predictive Controller for linear model """ Model Predictive Controller for linear model
@ -21,6 +22,7 @@ class LinearMPC(Controller):
Ref: Ref:
Maciejowski, J. M. (2002). Predictive control: with constraints. Maciejowski, J. M. (2002). Predictive control: with constraints.
""" """
def __init__(self, config, model): def __init__(self, config, model):
""" """
Args: Args:
@ -77,7 +79,7 @@ class LinearMPC(Controller):
for _ in range(self.pred_len - 1): for _ in range(self.pred_len - 1):
temp_mat = np.matmul(A_factorials[-1], self.A) temp_mat = np.matmul(A_factorials[-1], self.A)
self.phi_mat = np.vstack((self.phi_mat, temp_mat)) self.phi_mat = np.vstack((self.phi_mat, temp_mat))
A_factorials.append(temp_mat) # after we use this factorials A_factorials.append(temp_mat) # after we use this factorials
self.gamma_mat = self.B.copy() self.gamma_mat = self.B.copy()
gammma_mat_temp = self.B.copy() gammma_mat_temp = self.B.copy()
@ -91,8 +93,8 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1): for i in range(self.pred_len - 1):
temp_mat = np.zeros_like(self.gamma_mat) temp_mat = np.zeros_like(self.gamma_mat)
temp_mat[int((i + 1)*self.state_size): , :] =\ temp_mat[int((i + 1)*self.state_size):, :] =\
self.gamma_mat[:-int((i + 1)*self.state_size) , :] self.gamma_mat[:-int((i + 1)*self.state_size), :]
self.theta_mat = np.hstack((self.theta_mat, temp_mat)) self.theta_mat = np.hstack((self.theta_mat, temp_mat))
@ -114,7 +116,7 @@ class LinearMPC(Controller):
for i in range(self.pred_len - 1): for i in range(self.pred_len - 1):
for j in range(self.input_size): for j in range(self.input_size):
temp_F[j * 2: (j + 1) * 2,\ temp_F[j * 2: (j + 1) * 2,
((i+1) * self.input_size) + j] = np.array([1., -1.]) ((i+1) * self.input_size) + j] = np.array([1., -1.])
self.F = np.vstack((self.F, temp_F)) self.F = np.vstack((self.F, temp_F))
@ -187,14 +189,14 @@ class LinearMPC(Controller):
# using cvxopt # using cvxopt
def optimized_func(dt_us): def optimized_func(dt_us):
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \ return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1)))
- np.dot(G.T, dt_us.reshape(-1, 1)))[0] - np.dot(G.T, dt_us.reshape(-1, 1)))[0]
# constraint # constraint
lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons
cons = LinearConstraint(A, lb, ub) cons = LinearConstraint(A, lb, ub)
# solve # solve
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\ opt_sol = minimize(optimized_func, self.prev_sol.flatten(),
constraints=[cons]) constraints=[cons])
opt_dt_us = opt_sol.x opt_dt_us = opt_sol.x
@ -213,7 +215,7 @@ class LinearMPC(Controller):
""" """
# to dt form # to dt form
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\ opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,
self.input_size), self.input_size),
axis=0) axis=0)
self.prev_sol = opt_dt_u_seq.copy() self.prev_sol = opt_dt_u_seq.copy()
@ -226,8 +228,8 @@ class LinearMPC(Controller):
# check costs # check costs
costs = self.calc_cost(curr_x, costs = self.calc_cost(curr_x,
opt_u_seq.reshape(1, opt_u_seq.reshape(1,
self.pred_len, self.pred_len,
self.input_size), self.input_size),
g_xs) g_xs)
logger.debug("Cost = {}".format(costs)) logger.debug("Cost = {}".format(costs))

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class MPPI(Controller): class MPPI(Controller):
""" Model Predictive Path Integral for linear and nonlinear method """ Model Predictive Path Integral for linear and nonlinear method
@ -18,6 +19,7 @@ class MPPI(Controller):
Deep Dynamics Models for Learning Dexterous Manipulation. Deep Dynamics Models for Learning Dexterous Manipulation.
arXiv preprint arXiv:1909.11652. arXiv preprint arXiv:1909.11652.
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(MPPI, self).__init__(config, model) super(MPPI, self).__init__(config, model)
@ -47,7 +49,7 @@ class MPPI(Controller):
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
# init mean # init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2., + config.INPUT_LOWER_BOUND) / 2.,
self.pred_len) self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
@ -74,23 +76,23 @@ class MPPI(Controller):
""" """
# get noised inputs # get noised inputs
noise = np.random.normal( noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len, loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma self.input_size)) * self.noise_sigma
noised_inputs = noise.copy() noised_inputs = noise.copy()
for t in range(self.pred_len): for t in range(self.pred_len):
if t > 0: if t > 0:
noised_inputs[:, t, :] = self.beta \ noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \ * (self.prev_sol[t, :]
+ noise[:, t, :]) \ + noise[:, t, :]) \
+ (1 - self.beta) \ + (1 - self.beta) \
* noised_inputs[:, t-1, :] * noised_inputs[:, t-1, :]
else: else:
noised_inputs[:, t, :] = self.beta \ noised_inputs[:, t, :] = self.beta \
* (self.prev_sol[t, :] \ * (self.prev_sol[t, :]
+ noise[:, t, :]) \ + noise[:, t, :]) \
+ (1 - self.beta) \ + (1 - self.beta) \
* self.history_u[-1] * self.history_u[-1]
# clip actions # clip actions
noised_inputs = np.clip( noised_inputs = np.clip(
@ -108,7 +110,7 @@ class MPPI(Controller):
# weight actions # weight actions
weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \ weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \
* noised_inputs * noised_inputs
sol = np.sum(weighted_inputs, 0) / denom sol = np.sum(weighted_inputs, 0) / denom
# update # update

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class MPPIWilliams(Controller): class MPPIWilliams(Controller):
""" Model Predictive Path Integral for linear and nonlinear method """ Model Predictive Path Integral for linear and nonlinear method
@ -19,6 +20,7 @@ class MPPIWilliams(Controller):
2017 IEEE International Conference on Robotics and Automation (ICRA), 2017 IEEE International Conference on Robotics and Automation (ICRA),
Singapore, 2017, pp. 1714-1721. Singapore, 2017, pp. 1714-1721.
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(MPPIWilliams, self).__init__(config, model) super(MPPIWilliams, self).__init__(config, model)
@ -47,7 +49,7 @@ class MPPIWilliams(Controller):
self.input_cost_fn = config.input_cost_fn self.input_cost_fn = config.input_cost_fn
# init mean # init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \ self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
+ config.INPUT_LOWER_BOUND) / 2., + config.INPUT_LOWER_BOUND) / 2.,
self.pred_len) self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size) self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
@ -85,7 +87,7 @@ class MPPIWilliams(Controller):
# get particle cost # get particle cost
costs = calc_cost(pred_xs, samples, g_xs, costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, None, \ self.state_cost_fn, None,
self.terminal_state_cost_fn) self.terminal_state_cost_fn)
return costs return costs
@ -101,8 +103,8 @@ class MPPIWilliams(Controller):
""" """
# get noised inputs # get noised inputs
noise = np.random.normal( noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len, loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma self.input_size)) * self.noise_sigma
noised_inputs = self.prev_sol + noise noised_inputs = self.prev_sol + noise
@ -120,7 +122,7 @@ class MPPIWilliams(Controller):
# mppi update # mppi update
beta = np.min(costs) beta = np.min(costs)
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \ eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
+ 1e-10 + 1e-10
# weight # weight
# eta.shape = (pred_len, input_size) # eta.shape = (pred_len, input_size)
@ -128,7 +130,7 @@ class MPPIWilliams(Controller):
# update inputs # update inputs
sol = self.prev_sol \ sol = self.prev_sol \
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0) + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
# update # update
self.prev_sol[:-1] = sol[1:] self.prev_sol[:-1] = sol[1:]

View File

@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
logger = getLogger(__name__) logger = getLogger(__name__)
class RandomShooting(Controller): class RandomShooting(Controller):
""" Random Shooting Method for linear and nonlinear method """ Random Shooting Method for linear and nonlinear method
@ -19,6 +20,7 @@ class RandomShooting(Controller):
using probabilistic dynamics models. using probabilistic dynamics models.
In Advances in Neural Information Processing Systems (pp. 4754-4765). In Advances in Neural Information Processing Systems (pp. 4754-4765).
""" """
def __init__(self, config, model): def __init__(self, config, model):
super(RandomShooting, self).__init__(config, model) super(RandomShooting, self).__init__(config, model)
@ -65,8 +67,8 @@ class RandomShooting(Controller):
# calc cost # calc cost
costs = self.calc_cost(curr_x, costs = self.calc_cost(curr_x,
samples.reshape(self.pop_size, samples.reshape(self.pop_size,
self.pred_len, self.pred_len,
self.input_size), self.input_size),
g_xs) g_xs)
# solution # solution
sol = samples[np.argmin(costs)] sol = samples[np.argmin(costs)]

View File

@ -4,6 +4,7 @@ from matplotlib.axes import Axes
from .env import Env from .env import Env
from ..plotters.plot_objs import square from ..plotters.plot_objs import square
class CartPoleEnv(Env): class CartPoleEnv(Env):
""" Cartpole Environment """ Cartpole Environment
@ -13,13 +14,14 @@ class CartPoleEnv(Env):
6-832-underactuated-robotics-spring-2009/readings/ 6-832-underactuated-robotics-spring-2009/readings/
MIT6_832s09_read_ch03.pdf MIT6_832s09_read_ch03.pdf
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 4, self.config = {"state_size": 4,
"input_size" : 1, "input_size": 1,
"dt" : 0.02, "dt": 0.02,
"max_step" : 500, "max_step": 500,
"input_lower_bound": [-3.], "input_lower_bound": [-3.],
"input_upper_bound": [3.], "input_upper_bound": [3.],
"mp": 0.2, "mp": 0.2,
@ -76,33 +78,33 @@ class CartPoleEnv(Env):
# x # x
d_x0 = self.curr_x[1] d_x0 = self.curr_x[1]
# v_x # v_x
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \ d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2])
* (self.config["l"] * (self.curr_x[3]**2) \ * (self.config["l"] * (self.curr_x[3]**2)
+ self.config["g"] * np.cos(self.curr_x[2]))) \ + self.config["g"] * np.cos(self.curr_x[2]))) \
/ (self.config["mc"] + self.config["mp"] \ / (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2)) * (np.sin(self.curr_x[2])**2))
# theta # theta
d_x2 = self.curr_x[3] d_x2 = self.curr_x[3]
# v_theta # v_theta
d_x3 = (-u[0] * np.cos(self.curr_x[2]) \ d_x3 = (-u[0] * np.cos(self.curr_x[2])
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \ - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2)
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \ * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2])
- (self.config["mc"] + self.config["mp"]) * self.config["g"] \ - (self.config["mc"] + self.config["mp"]) * self.config["g"]
* np.sin(self.curr_x[2])) \ * np.sin(self.curr_x[2])) \
/ (self.config["l"] * (self.config["mc"] + self.config["mp"] \ / (self.config["l"] * (self.config["mc"] + self.config["mp"]
* (np.sin(self.curr_x[2])**2))) * (np.sin(self.curr_x[2])**2)))
next_x = self.curr_x +\ next_x = self.curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
# TODO: costs # TODO: costs
costs = 0. costs = 0.
costs += 0.1 * np.sum(u**2) costs += 0.1 * np.sum(u**2)
costs += 6. * self.curr_x[0]**2 \ costs += 6. * self.curr_x[0]**2 \
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+ 0.1 * self.curr_x[1]**2 \ + 0.1 * self.curr_x[1]**2 \
+ 0.1 * self.curr_x[3]**2 + 0.1 * self.curr_x[3]**2
# save history # save history
self.history_x.append(next_x.flatten()) self.history_x.append(next_x.flatten())
@ -114,8 +116,8 @@ class CartPoleEnv(Env):
self.step_count += 1 self.step_count += 1
return next_x.flatten(), costs, \ return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function """ plot cartpole object function
@ -134,10 +136,10 @@ class CartPoleEnv(Env):
imgs["cart"] = to_plot.plot([], [], c="k")[0] imgs["cart"] = to_plot.plot([], [], c="k")[0]
imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0] imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0]
imgs["center"] = to_plot.plot([], [], marker="o", c="k",\ imgs["center"] = to_plot.plot([], [], marker="o", c="k",
markersize=10)[0] markersize=10)[0]
# centerline # centerline
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\ to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),
c="k", linestyle="dashed") c="k", linestyle="dashed")
# set axis # set axis
@ -166,13 +168,13 @@ class CartPoleEnv(Env):
pole_y (numpy.ndarray): y data of pole pole_y (numpy.ndarray): y data of pole
""" """
# cart # cart
cart_x, cart_y = square(curr_x[0], 0.,\ cart_x, cart_y = square(curr_x[0], 0.,
self.config["cart_size"], 0.) self.config["cart_size"], 0.)
# pole # pole
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \ pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"]
* np.cos(curr_x[2]-np.pi/2)]) * np.cos(curr_x[2]-np.pi/2)])
pole_y = np.array([0., self.config["l"] \ pole_y = np.array([0., self.config["l"]
* np.sin(curr_x[2]-np.pi/2)]) * np.sin(curr_x[2]-np.pi/2)])
return cart_x, cart_y, pole_x, pole_y return cart_x, cart_y, pole_x, pole_y

View File

@ -4,6 +4,7 @@ import numpy as np
logger = getLogger(__name__) logger = getLogger(__name__)
def calc_cost(pred_xs, input_sample, g_xs, def calc_cost(pred_xs, input_sample, g_xs,
state_cost_fn, input_cost_fn, terminal_state_cost_fn): state_cost_fn, input_cost_fn, terminal_state_cost_fn):
""" calculate the cost """ calculate the cost
@ -24,14 +25,15 @@ def calc_cost(pred_xs, input_sample, g_xs,
# state cost # state cost
state_cost = 0. state_cost = 0.
if state_cost_fn is not None: if state_cost_fn is not None:
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :]) state_pred_par_cost = state_cost_fn(
pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1) state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
# terminal cost # terminal cost
terminal_state_cost = 0. terminal_state_cost = 0.
if terminal_state_cost_fn is not None: if terminal_state_cost_fn is not None:
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :], terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :]) g_xs[:, -1, :])
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1) terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
# act cost # act cost

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
class Env(): class Env():
""" Environments class """ Environments class
Attributes: Attributes:
@ -8,6 +9,7 @@ class Env():
history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size) history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size)
step_count (int): step count step_count (int): step count
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """

View File

@ -3,17 +3,19 @@ import scipy
from scipy import integrate from scipy import integrate
from .env import Env from .env import Env
class FirstOrderLagEnv(Env): class FirstOrderLagEnv(Env):
""" First Order Lag System Env """ First Order Lag System Env
""" """
def __init__(self, tau=0.63): def __init__(self, tau=0.63):
""" """
""" """
self.config = {"state_size" : 4,\ self.config = {"state_size": 4,
"input_size" : 2,\ "input_size": 2,
"dt" : 0.05,\ "dt": 0.05,
"max_step" : 500,\ "max_step": 500,
"input_lower_bound": [-0.5, -0.5],\ "input_lower_bound": [-0.5, -0.5],
"input_upper_bound": [0.5, 0.5], "input_upper_bound": [0.5, 0.5],
} }
@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env):
""" """
# continuous # continuous
Ac = np.array([[-1./tau, 0., 0., 0.], Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.], [0., -1./tau, 0., 0.],
[1., 0., 0., 0.], [1., 0., 0., 0.],
[0., 1., 0., 0.]]) [0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.], Bc = np.array([[1./tau, 0.],
[0., 1./tau], [0., 1./tau],
[0., 0.], [0., 0.],
[0., 0.]]) [0., 0.]])
# to discrete system # to discrete system
A = scipy.linalg.expm(dt*Ac) A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) - # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) -
@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env):
self.config["input_upper_bound"]) self.config["input_upper_bound"])
next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \ next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis]) + np.matmul(self.B, u[:, np.newaxis])
# cost # cost
cost = 0 cost = 0
@ -111,8 +113,8 @@ class FirstOrderLagEnv(Env):
self.step_count += 1 self.step_count += 1
return next_x.flatten(), cost, \ return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" """

View File

@ -2,6 +2,8 @@ from .first_order_lag import FirstOrderLagEnv
from .two_wheeled import TwoWheeledConstEnv from .two_wheeled import TwoWheeledConstEnv
from .two_wheeled import TwoWheeledTrackEnv from .two_wheeled import TwoWheeledTrackEnv
from .cartpole import CartPoleEnv from .cartpole import CartPoleEnv
from .nonlinear_sample_system import NonlinearSampleSystemEnv
def make_env(args): def make_env(args):
@ -13,5 +15,7 @@ def make_env(args):
return TwoWheeledTrackEnv() return TwoWheeledTrackEnv()
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleEnv() return CartPoleEnv()
elif args.env == "NonlinearSample":
return NonlinearSampleSystemEnv()
raise NotImplementedError("There is not {} Env".format(args.env)) raise NotImplementedError("There is not {} Env".format(args.env))

View File

@ -0,0 +1,97 @@
import numpy as np
import scipy
from scipy import integrate
from .env import Env
from ..common.utils import update_state_with_Runge_Kutta
class NonlinearSampleSystemEnv(Env):
""" Nonlinear Sample Env
"""
def __init__(self):
"""
"""
self.config = {"state_size": 2,
"input_size": 1,
"dt": 0.01,
"max_step": 2000,
"input_lower_bound": [-0.5],
"input_upper_bound": [0.5],
}
super(NonlinearSampleSystemEnv, self).__init__(self.config)
def reset(self, init_x=np.array([2., 0.])):
""" reset state
Returns:
init_x (numpy.ndarray): initial state, shape(state_size, )
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
self.curr_x = init_x
# goal
self.g_x = np.array([0., 0.])
# clear memory
self.history_x = []
self.history_g_x = []
return self.curr_x, {"goal_state": self.g_x}
def step(self, u):
"""
Args:
u (numpy.ndarray) : input, shape(input_size, )
Returns:
next_x (numpy.ndarray): next state, shape(state_size, )
cost (float): costs
done (bool): end the simulation or not
info (dict): information
"""
# clip action
u = np.clip(u,
self.config["input_lower_bound"],
self.config["input_upper_bound"])
functions = [self._func_x_1, self._func_x_2]
next_x = update_state_with_Runge_Kutta(self.curr_x, u,
functions, self.config["dt"],
batch=False)
# cost
cost = 0
cost = np.sum(u**2)
cost += np.sum((self.curr_x - self.g_x)**2)
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), cost, \
self.step_count > self.config["max_step"], \
{"goal_state": self.g_x}
def _func_x_1(self, x, u):
x_dot = x[1]
return x_dot
def _func_x_2(self, x, u):
x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
return x_dot
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
"""
"""
raise ValueError("NonlinearSampleSystemEnv does not have animation")

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from .env import Env from .env import Env
from ..plotters.plot_objs import circle_with_angle, square, circle from ..plotters.plot_objs import circle_with_angle, square, circle
def step_two_wheeled_env(curr_x, u, dt, method="Oylar"): def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
""" step two wheeled enviroment """ step two wheeled enviroment
@ -28,19 +29,21 @@ def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
return next_x return next_x
class TwoWheeledConstEnv(Env): class TwoWheeledConstEnv(Env):
""" Two wheeled robot with constant goal Env """ Two wheeled robot with constant goal Env
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 3,\ self.config = {"state_size": 3,
"input_size" : 2,\ "input_size": 2,
"dt" : 0.01,\ "dt": 0.01,
"max_step" : 500,\ "max_step": 500,
"input_lower_bound": (-1.5, -3.14),\ "input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),\ "input_upper_bound": (1.5, 3.14),
"car_size": 0.2,\ "car_size": 0.2,
"wheel_size": (0.075, 0.015) "wheel_size": (0.075, 0.015)
} }
@ -103,8 +106,8 @@ class TwoWheeledConstEnv(Env):
self.step_count += 1 self.step_count += 1
return next_x.flatten(), costs, \ return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x} {"goal_state": self.g_x}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function """ plot cartpole object function
@ -139,7 +142,7 @@ class TwoWheeledConstEnv(Env):
# set imgs # set imgs
# car imgs # car imgs
car_x, car_y, car_angle_x, car_angle_y, \ car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i]) self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y) to_plot["car"].set_data(car_x, car_y)
@ -160,46 +163,48 @@ class TwoWheeledConstEnv(Env):
self.config["car_size"], curr_x[2]) self.config["car_size"], curr_x[2])
# left tire # left tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0] * np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1] * np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \ left_tire_x, left_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
# right tire # right tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0] * np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1] * np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \ right_tire_x, right_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\ return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\ left_tire_x, left_tire_y,\
right_tire_x, right_tire_y right_tire_x, right_tire_y
class TwoWheeledTrackEnv(Env): class TwoWheeledTrackEnv(Env):
""" Two wheeled robot with constant goal Env """ Two wheeled robot with constant goal Env
""" """
def __init__(self): def __init__(self):
""" """
""" """
self.config = {"state_size" : 3,\ self.config = {"state_size": 3,
"input_size" : 2,\ "input_size": 2,
"dt" : 0.01,\ "dt": 0.01,
"max_step" : 1000,\ "max_step": 1000,
"input_lower_bound": (-1.5, -3.14),\ "input_lower_bound": (-1.5, -3.14),
"input_upper_bound": (1.5, 3.14),\ "input_upper_bound": (1.5, 3.14),
"car_size": 0.2,\ "car_size": 0.2,
"wheel_size": (0.075, 0.015) "wheel_size": (0.075, 0.015)
} }
@ -220,12 +225,12 @@ class TwoWheeledTrackEnv(Env):
# circle # circle
circle_1_x, circle_1_y = circle(linelength/2., circle_radius, circle_1_x, circle_1_y = circle(linelength/2., circle_radius,
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50) circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
circle_1 = np.stack((circle_1_x , circle_1_y), axis=1) circle_1 = np.stack((circle_1_x, circle_1_y), axis=1)
circle_2_x, circle_2_y = circle(-linelength/2., circle_radius, circle_2_x, circle_2_y = circle(-linelength/2., circle_radius,
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50) circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
circle_2 = np.stack((circle_2_x , circle_2_y), axis=1) circle_2 = np.stack((circle_2_x, circle_2_y), axis=1)
road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0) road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0)
@ -293,8 +298,8 @@ class TwoWheeledTrackEnv(Env):
self.step_count += 1 self.step_count += 1
return next_x.flatten(), costs, \ return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \ self.step_count > self.config["max_step"], \
{"goal_state" : self.g_traj} {"goal_state": self.g_traj}
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None): def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
""" plot cartpole object function """ plot cartpole object function
@ -333,7 +338,7 @@ class TwoWheeledTrackEnv(Env):
# set imgs # set imgs
# car imgs # car imgs
car_x, car_y, car_angle_x, car_angle_y, \ car_x, car_y, car_angle_x, car_angle_y, \
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \ left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
self._plot_car(history_x[i]) self._plot_car(history_x[i])
to_plot["car"].set_data(car_x, car_y) to_plot["car"].set_data(car_x, car_y)
@ -354,29 +359,29 @@ class TwoWheeledTrackEnv(Env):
self.config["car_size"], curr_x[2]) self.config["car_size"], curr_x[2])
# left tire # left tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0] * np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1] * np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
left_tire_x, left_tire_y = \ left_tire_x, left_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
# right tire # right tire
center_x = (self.config["car_size"] \ center_x = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0] * np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
center_y = (self.config["car_size"] \ center_y = (self.config["car_size"]
+ self.config["wheel_size"][1]) \ + self.config["wheel_size"][1]) \
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1] * np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
right_tire_x, right_tire_y = \ right_tire_x, right_tire_y = \
square(center_x, center_y, square(center_x, center_y,
self.config["wheel_size"], curr_x[2]) self.config["wheel_size"], curr_x[2])
return car_x, car_y, car_angle_x, car_angle_y,\ return car_x, car_y, car_angle_x, car_angle_y,\
left_tire_x, left_tire_y,\ left_tire_x, left_tire_y,\
right_tire_x, right_tire_y right_tire_x, right_tire_y

View File

@ -7,6 +7,7 @@ import six
import pickle import pickle
from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger
def make_logger(save_dir): def make_logger(save_dir):
""" """
Args: Args:
@ -33,6 +34,7 @@ def make_logger(save_dir):
# sh_handler = StreamHandler() # sh_handler = StreamHandler()
# logger.addHandler(sh_handler) # logger.addHandler(sh_handler)
def int_tuple(s): def int_tuple(s):
""" transform str to tuple """ transform str to tuple
Args: Args:
@ -42,6 +44,7 @@ def int_tuple(s):
""" """
return tuple(int(i) for i in s.split(',')) return tuple(int(i) for i in s.split(','))
def bool_flag(s): def bool_flag(s):
""" transform str to bool flg """ transform str to bool flg
Args: Args:
@ -54,6 +57,7 @@ def bool_flag(s):
msg = 'Invalid value "%s" for bool flag (should be 0 or 1)' msg = 'Invalid value "%s" for bool flag (should be 0 or 1)'
raise ValueError(msg % s) raise ValueError(msg % s)
def file_exists(path): def file_exists(path):
""" Check file existence on given path """ Check file existence on given path
Args: Args:
@ -63,6 +67,7 @@ def file_exists(path):
""" """
return os.path.exists(path) return os.path.exists(path)
def create_dir_if_not_exist(outdir): def create_dir_if_not_exist(outdir):
""" Check directory existence and creates new directory if not exist """ Check directory existence and creates new directory if not exist
Args: Args:
@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir):
return return
os.makedirs(outdir) os.makedirs(outdir)
def write_text_to_file(file_path, data): def write_text_to_file(file_path, data):
""" Write given text data to file """ Write given text data to file
Args: Args:
@ -86,6 +92,7 @@ def write_text_to_file(file_path, data):
with open(file_path, 'w') as f: with open(file_path, 'w') as f:
f.write(data) f.write(data)
def read_text_from_file(file_path): def read_text_from_file(file_path):
""" Read given file as text """ Read given file as text
Args: Args:
@ -96,6 +103,7 @@ def read_text_from_file(file_path):
with open(file_path, 'r') as f: with open(file_path, 'r') as f:
return f.read() return f.read()
def save_pickle(file_path, data): def save_pickle(file_path, data):
""" pickle given data to file """ pickle given data to file
Args: Args:
@ -105,6 +113,7 @@ def save_pickle(file_path, data):
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
pickle.dump(data, f) pickle.dump(data, f)
def load_pickle(file_path): def load_pickle(file_path):
""" load pickled data from file """ load pickled data from file
Args: Args:
@ -118,6 +127,7 @@ def load_pickle(file_path):
else: else:
return pickle.load(f, encoding='bytes') return pickle.load(f, encoding='bytes')
def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'): def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
""" prepare a directory with current datetime as name. """ prepare a directory with current datetime as name.
created directory contains the command and args when the script was called as text file. created directory contains the command and args when the script was called as text file.

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model from .model import Model
class CartPoleModel(Model): class CartPoleModel(Model):
""" cartpole model """ cartpole model
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """
@ -31,21 +33,21 @@ class CartPoleModel(Model):
# x # x
d_x0 = curr_x[1] d_x0 = curr_x[1]
# v_x # v_x
d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \ d_x1 = (u[0] + self.mp * np.sin(curr_x[2])
* (self.l * (curr_x[3]**2) \ * (self.l * (curr_x[3]**2)
+ self.g * np.cos(curr_x[2]))) \ + self.g * np.cos(curr_x[2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[2])**2)) / (self.mc + self.mp * (np.sin(curr_x[2])**2))
# theta # theta
d_x2 = curr_x[3] d_x2 = curr_x[3]
# v_theta # v_theta
d_x3 = (-u[0] * np.cos(curr_x[2]) \ d_x3 = (-u[0] * np.cos(curr_x[2])
- self.mp * self.l * (curr_x[3]**2) \ - self.mp * self.l * (curr_x[3]**2)
* np.cos(curr_x[2]) * np.sin(curr_x[2]) \ * np.cos(curr_x[2]) * np.sin(curr_x[2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \ - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
next_x = curr_x +\ next_x = curr_x +\
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
return next_x return next_x
@ -53,21 +55,21 @@ class CartPoleModel(Model):
# x # x
d_x0 = curr_x[:, 1] d_x0 = curr_x[:, 1]
# v_x # v_x
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \ d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2])
* (self.l * (curr_x[:, 3]**2) \ * (self.l * (curr_x[:, 3]**2)
+ self.g * np.cos(curr_x[:, 2]))) \ + self.g * np.cos(curr_x[:, 2]))) \
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
# theta # theta
d_x2 = curr_x[:, 3] d_x2 = curr_x[:, 3]
# v_theta # v_theta
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \ d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2])
- self.mp * self.l * (curr_x[:, 3]**2) \ - self.mp * self.l * (curr_x[:, 3]**2)
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \ * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2])
- (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \ - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
next_x = curr_x +\ next_x = curr_x +\
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
return next_x return next_x
@ -95,36 +97,36 @@ class CartPoleModel(Model):
# f_theta # f_theta
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \ tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_x[:, 1, 2] = - us[:, 0] * tmp \ f_x[:, 1, 2] = - us[:, 0] * tmp \
- tmp * (self.mp * np.sin(xs[:, 2]) \ - tmp * (self.mp * np.sin(xs[:, 2])
* (self.l * xs[:, 3]**2 \ * (self.l * xs[:, 3]**2
+ self.g * np.cos(xs[:, 2]))) \ + self.g * np.cos(xs[:, 2]))) \
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \ + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l
* xs[:, 3]**2 \ * xs[:, 3]**2
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \ + self.mp * self.g * (np.cos(xs[:, 2])**2
- np.sin(xs[:, 2])**2)) - np.sin(xs[:, 2])**2))
f_x[:, 3, 2] = - 1. / self.l * tmp \ f_x[:, 3, 2] = - 1. / self.l * tmp \
* (-us[:, 0] * np.cos(xs[:, 2]) \ * (-us[:, 0] * np.cos(xs[:, 2])
- self.mp * self.l * (xs[:, 3]**2) \ - self.mp * self.l * (xs[:, 3]**2)
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \ * np.cos(xs[:, 2]) * np.sin(xs[:, 2])
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
+ 1. / self.l * tmp2 \ + 1. / self.l * tmp2 \
* (us[:, 0] * np.sin(xs[:, 2]) \ * (us[:, 0] * np.sin(xs[:, 2])
- self.mp * self.l * xs[:, 3]**2 \ - self.mp * self.l * xs[:, 3]**2
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \ * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2)
- (self.mc + self.mp) \ - (self.mc + self.mp)
* self.g * np.cos(xs[:, 2])) * self.g * np.cos(xs[:, 2]))
# f_theta_dot # f_theta_dot
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \ f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2])
* self.l * 2 * xs[:, 3]) * self.l * 2 * xs[:, 3])
f_x[:, 2, 3] = np.ones(pred_len) f_x[:, 2, 3] = np.ones(pred_len)
f_x[:, 3, 3] = 1. / self.l * tmp2 \ f_x[:, 3, 3] = 1. / self.l * tmp2 \
* (-2. * self.mp * self.l * xs[:, 3] \ * (-2. * self.mp * self.l * xs[:, 3]
* np.cos(xs[:, 2]) * np.sin(xs[:, 2])) * np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
return f_x * dt + np.eye(state_size) # to discrete form return f_x * dt + np.eye(state_size) # to discrete form
@ -150,8 +152,8 @@ class CartPoleModel(Model):
f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
f_u[:, 3, 0] = -np.cos(xs[:, 2]) \ f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
/ (self.l * (self.mc \ / (self.l * (self.mc
+ self.mp * (np.sin(xs[:, 2])**2))) + self.mp * (np.sin(xs[:, 2])**2)))
return f_u * dt # to discrete form return f_u * dt # to discrete form

View File

@ -3,6 +3,7 @@ import scipy.linalg
from scipy import integrate from scipy import integrate
from .model import LinearModel from .model import LinearModel
class FirstOrderLagModel(LinearModel): class FirstOrderLagModel(LinearModel):
""" first order lag model """ first order lag model
Attributes: Attributes:
@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel):
u (numpy.ndarray): u (numpy.ndarray):
history_pred_xs (numpy.ndarray): history_pred_xs (numpy.ndarray):
""" """
def __init__(self, config, tau=0.63): def __init__(self, config, tau=0.63):
""" """
Args: Args:
tau (float): time constant tau (float): time constant
""" """
# param # param
self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system self.A, self.B = self._to_state_space(
tau, dt=config.DT) # discrete system
super(FirstOrderLagModel, self).__init__(self.A, self.B) super(FirstOrderLagModel, self).__init__(self.A, self.B)
@staticmethod @staticmethod
@ -31,20 +34,21 @@ class FirstOrderLagModel(LinearModel):
""" """
# continuous # continuous
Ac = np.array([[-1./tau, 0., 0., 0.], Ac = np.array([[-1./tau, 0., 0., 0.],
[0., -1./tau, 0., 0.], [0., -1./tau, 0., 0.],
[1., 0., 0., 0.], [1., 0., 0., 0.],
[0., 1., 0., 0.]]) [0., 1., 0., 0.]])
Bc = np.array([[1./tau, 0.], Bc = np.array([[1./tau, 0.],
[0., 1./tau], [0., 1./tau],
[0., 0.], [0., 0.],
[0., 0.]]) [0., 0.]])
# to discrete system # to discrete system
A = scipy.linalg.expm(dt*Ac) A = scipy.linalg.expm(dt*Ac)
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc) # B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc)
B = np.zeros_like(Bc) B = np.zeros_like(Bc)
for m in range(Bc.shape[0]): for m in range(Bc.shape[0]):
for n in range(Bc.shape[1]): for n in range(Bc.shape[1]):
integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n] def integrate_fn(tau): return np.matmul(
scipy.linalg.expm(Ac*tau), Bc)[m, n]
sol = integrate.quad(integrate_fn, 0, dt) sol = integrate.quad(integrate_fn, 0, dt)
B[m, n] = sol[0] B[m, n] = sol[0]

View File

@ -1,6 +1,8 @@
from .first_order_lag import FirstOrderLagModel from .first_order_lag import FirstOrderLagModel
from .two_wheeled import TwoWheeledModel from .two_wheeled import TwoWheeledModel
from .cartpole import CartPoleModel from .cartpole import CartPoleModel
from .nonlinear_sample_system import NonlinearSampleSystemModel
def make_model(args, config): def make_model(args, config):
@ -10,5 +12,7 @@ def make_model(args, config):
return TwoWheeledModel(config) return TwoWheeledModel(config)
elif args.env == "CartPole": elif args.env == "CartPole":
return CartPoleModel(config) return CartPoleModel(config)
elif args.env == "NonlinearSample":
return NonlinearSampleSystemModel(config)
raise NotImplementedError("There is not {} Model".format(args.env)) raise NotImplementedError("There is not {} Model".format(args.env))

View File

@ -1,8 +1,10 @@
import numpy as np import numpy as np
class Model(): class Model():
""" base class of model """ base class of model
""" """
def __init__(self): def __init__(self):
""" """
""" """
@ -22,7 +24,7 @@ class Model():
or shape(pop_size, pred_len+1, state_size) or shape(pop_size, pred_len+1, state_size)
""" """
if len(us.shape) == 3: if len(us.shape) == 3:
pred_xs =self._predict_traj_alltogether(curr_x, us) pred_xs = self._predict_traj_alltogether(curr_x, us)
elif len(us.shape) == 2: elif len(us.shape) == 2:
pred_xs = self._predict_traj(curr_x, us) pred_xs = self._predict_traj(curr_x, us)
else: else:
@ -75,8 +77,8 @@ class Model():
# next_x.shape = (pop_size, state_size) # next_x.shape = (pop_size, state_size)
next_x = self.predict_next_state(x, us[t]) next_x = self.predict_next_state(x, us[t])
# update # update
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\ pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),
axis=0) axis=0)
x = next_x x = next_x
return np.transpose(pred_xs, (1, 0, 2)) return np.transpose(pred_xs, (1, 0, 2))
@ -99,13 +101,13 @@ class Model():
# get size # get size
(pred_len, input_size) = us.shape (pred_len, input_size) = us.shape
# pred final adjoint state # pred final adjoint state
lam = self.predict_terminal_adjoint_state(xs[-1],\ lam = self.predict_terminal_adjoint_state(xs[-1],
terminal_g_x=g_xs[-1]) terminal_g_x=g_xs[-1])
lams = lam[np.newaxis, :] lams = lam[np.newaxis, :]
for t in range(pred_len-1, 0, -1): for t in range(pred_len-1, 0, -1):
prev_lam = \ prev_lam = \
self.predict_adjoint_state(lam, xs[t], us[t],\ self.predict_adjoint_state(lam, xs[t], us[t],
goal=g_xs[t], t=t) goal=g_xs[t], t=t)
# update # update
lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0) lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
@ -175,6 +177,7 @@ class Model():
raise NotImplementedError("Implement hessian of model \ raise NotImplementedError("Implement hessian of model \
with respect to the input") with respect to the input")
class LinearModel(Model): class LinearModel(Model):
""" discrete linear model, x[k+1] = Ax[k] + Bu[k] """ discrete linear model, x[k+1] = Ax[k] + Bu[k]
@ -182,6 +185,7 @@ class LinearModel(Model):
A (numpy.ndarray): shape(state_size, state_size) A (numpy.ndarray): shape(state_size, state_size)
B (numpy.ndarray): shape(state_size, input_size) B (numpy.ndarray): shape(state_size, input_size)
""" """
def __init__(self, A, B): def __init__(self, A, B):
""" """
""" """
@ -203,7 +207,7 @@ class LinearModel(Model):
""" """
if len(u.shape) == 1: if len(u.shape) == 1:
next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \ next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \
+ np.matmul(self.B, u[:, np.newaxis]) + np.matmul(self.B, u[:, np.newaxis])
return next_x.flatten() return next_x.flatten()

View File

@ -0,0 +1,164 @@
import numpy as np
from .model import Model
from ..common.utils import update_state_with_Runge_Kutta
class NonlinearSampleSystemModel(Model):
""" nonlinear sample system model
"""
def __init__(self, config):
"""
"""
super(NonlinearSampleSystemModel, self).__init__()
self.dt = config.DT
def predict_next_state(self, curr_x, u):
""" predict next state
Args:
curr_x (numpy.ndarray): current state, shape(state_size, ) or
shape(pop_size, state_size)
u (numpy.ndarray): input, shape(input_size, ) or
shape(pop_size, input_size)
Returns:
next_x (numpy.ndarray): next state, shape(state_size, ) or
shape(pop_size, state_size)
"""
if len(u.shape) == 1:
func_1 = self._func_x_1
func_2 = self._func_x_2
functions = [func_1, func_2]
next_x = update_state_with_Runge_Kutta(
curr_x, u, functions, batch=False)
return next_x
elif len(u.shape) == 2:
def func_1(xs, us): return self._func_x_1(xs, us, batch=True)
def func_2(xs, us): return self._func_x_2(xs, us, batch=True)
functions = [func_1, func_2]
next_x = update_state_with_Runge_Kutta(
curr_x, u, functions, batch=True)
return next_x
def _func_x_1(self, x, u, batch=False):
if not batch:
x_dot = x[1]
else:
x_dot = x[:, 1]
return x_dot
def _func_x_2(self, x, u, batch=False):
if not batch:
x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
else:
x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \
x[:, 1] - x[:, 0] + u[:, 0]
return x_dot
def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size)
Notes:
This should be discrete form !!
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
f_x = np.zeros((pred_len, state_size, state_size))
f_x[:, 0, 1] = 1.
f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1.
f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \
(1. - xs[:, 0]**2 - xs[:, 1]**2)
return f_x * dt + np.eye(state_size) # to discrete form
def calc_f_u(self, xs, us, dt):
""" gradient of model with respect to the input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size)
Notes:
This should be discrete form !!
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_u = np.zeros((pred_len, state_size, input_size))
f_u[:, 1, 0] = 1.
return f_u * dt # to discrete form
def calc_f_xx(self, xs, us, dt):
""" hessian of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
raise NotImplementedError
def calc_f_ux(self, xs, us, dt):
""" hessian of model with respect to state and input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
raise NotImplementedError
def calc_f_uu(self, xs, us, dt):
""" hessian of model with respect to input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
raise NotImplementedError

View File

@ -2,9 +2,11 @@ import numpy as np
from .model import Model from .model import Model
class TwoWheeledModel(Model): class TwoWheeledModel(Model):
""" two wheeled model """ two wheeled model
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """

View File

@ -1,9 +1,11 @@
import numpy as np import numpy as np
from .planner import Planner from .planner import Planner
class ClosestPointPlanner(Planner): class ClosestPointPlanner(Planner):
""" This planner make goal state according to goal path """ This planner make goal state according to goal path
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """

View File

@ -1,9 +1,11 @@
import numpy as np import numpy as np
from .planner import Planner from .planner import Planner
class ConstantPlanner(Planner): class ConstantPlanner(Planner):
""" This planner make constant goal state """ This planner make constant goal state
""" """
def __init__(self, config): def __init__(self, config):
""" """
""" """

View File

@ -1,6 +1,7 @@
from .const_planner import ConstantPlanner from .const_planner import ConstantPlanner
from .closest_point_planner import ClosestPointPlanner from .closest_point_planner import ClosestPointPlanner
def make_planner(args, config): def make_planner(args, config):
if args.env == "FirstOrderLag": if args.env == "FirstOrderLag":
@ -11,5 +12,8 @@ def make_planner(args, config):
return ClosestPointPlanner(config) return ClosestPointPlanner(config)
elif args.env == "CartPole": elif args.env == "CartPole":
return ConstantPlanner(config) return ConstantPlanner(config)
elif args.env == "NonlinearSample":
return ConstantPlanner(config)
raise NotImplementedError("There is not {} Planner".format(args.planner_type)) raise NotImplementedError(
"There is not {} Planner".format(args.planner_type))

View File

@ -1,8 +1,10 @@
import numpy as np import numpy as np
class Planner(): class Planner():
""" """
""" """
def __init__(self): def __init__(self):
""" """
""" """

View File

@ -8,9 +8,11 @@ import matplotlib.animation as animation
logger = getLogger(__name__) logger = getLogger(__name__)
class Animator(): class Animator():
""" animation class """ animation class
""" """
def __init__(self, env, args=None): def __init__(self, env, args=None):
""" """
""" """
@ -65,7 +67,7 @@ class Animator():
""" """
# set up animation figures # set up animation figures
self._setup() self._setup()
_update_img = lambda i: self._update_img(i, history_x, history_g_x) def _update_img(i): return self._update_img(i, history_x, history_g_x)
# Set up formatting for the movie files # Set up formatting for the movie files
Writer = animation.writers['ffmpeg'] Writer = animation.writers['ffmpeg']

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from ..helper import save_pickle, load_pickle from ..helper import save_pickle, load_pickle
def plot_result(history, history_g=None, ylabel="x", def plot_result(history, history_g=None, ylabel="x",
save_dir="./result", name="state_history"): save_dir="./result", name="state_history"):
""" """
@ -28,7 +29,7 @@ def plot_result(history, history_g=None, ylabel="x",
def plot(axis, history, history_g=None): def plot(axis, history, history_g=None):
axis.plot(range(iters), history, c="r", linewidth=3) axis.plot(range(iters), history, c="r", linewidth=3)
if history_g is not None: if history_g is not None:
axis.plot(range(iters), history_g,\ axis.plot(range(iters), history_g,
c="b", linewidth=3, label="goal") c="b", linewidth=3, label="goal")
if i < size: if i < size:
@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x",
axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3) axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
figure.savefig(path, bbox_inches="tight", pad_inches=0.05) figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
def plot_results(history_x, history_u, history_g=None, args=None): def plot_results(history_x, history_u, history_g=None, args=None):
""" """
@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None):
controller_type = args.controller_type controller_type = args.controller_type
plot_result(history_x, history_g=history_g, ylabel="x", plot_result(history_x, history_g=history_g, ylabel="x",
name= env + "-state_history", name=env + "-state_history",
save_dir="./result/" + controller_type) save_dir="./result/" + controller_type)
plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u", plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
name= env + "-input_history", name=env + "-input_history",
save_dir="./result/" + controller_type) save_dir="./result/" + controller_type)
def save_plot_data(history_x, history_u, history_g=None, args=None): def save_plot_data(history_x, history_u, history_g=None, args=None):
""" save plot data """ save plot data
@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None):
env + "-history_g.pkl") env + "-history_g.pkl")
save_pickle(path, history_g) save_pickle(path, history_g)
def load_plot_data(env, controller_type, result_dir="./result"): def load_plot_data(env, controller_type, result_dir="./result"):
""" """
Args: Args:
@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"):
return history_x, history_u, history_g return history_x, history_u, history_g
def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x", def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
save_dir="./result", name="state_history"): save_dir="./result", name="state_history"):
""" """
@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
axis.plot(range(iters), history, axis.plot(range(iters), history,
linewidth=3, label=label, alpha=0.7, linestyle="dashed") linewidth=3, label=label, alpha=0.7, linestyle="dashed")
if history_g is not None: if history_g is not None:
axis.plot(range(iters), history_g,\ axis.plot(range(iters), history_g,
c="b", linewidth=3) c="b", linewidth=3)
if i < size: if i < size:
for j, (history, history_g) \ for j, (history, history_g) \
in enumerate(zip(histories, histories_g)): in enumerate(zip(histories, histories_g)):
plot(axis1, history[:, i], plot(axis1, history[:, i],
history_g=history_g[:, i], label=labels[j]) history_g=history_g[:, i], label=labels[j])
if i+1 < size: if i+1 < size:
for j, (history, history_g) in \ for j, (history, history_g) in \
enumerate(zip(histories, histories_g)): enumerate(zip(histories, histories_g)):
plot(axis2, history[:, i+1], plot(axis2, history[:, i+1],
history_g=history_g[:, i+1], label=labels[j]) history_g=history_g[:, i+1], label=labels[j])

View File

@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
from ..common.utils import rotate_pos from ..common.utils import rotate_pos
def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100): def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
""" Create circle matrix """ Create circle matrix
@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
return np.array(circle_xs), np.array(circle_ys) return np.array(circle_xs), np.array(circle_ys)
def circle_with_angle(center_x, center_y, radius, angle): def circle_with_angle(center_x, center_y, radius, angle):
""" Create circle matrix with angle line matrix """ Create circle matrix with angle line matrix
@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle):
return circle_x, circle_y, angle_x, angle_y return circle_x, circle_y, angle_x, angle_y
def square(center_x, center_y, shape, angle): def square(center_x, center_y, shape, angle):
""" Create square """ Create square
@ -77,6 +80,7 @@ def square(center_x, center_y, shape, angle):
return trans_points[:, 0], trans_points[:, 1] return trans_points[:, 0], trans_points[:, 1]
def square_with_angle(center_x, center_y, shape, angle): def square_with_angle(center_x, center_y, shape, angle):
""" Create square with angle line """ Create square with angle line

View File

@ -1,4 +1,5 @@
from .runner import ExpRunner from .runner import ExpRunner
def make_runner(args): def make_runner(args):
return ExpRunner() return ExpRunner()

View File

@ -4,9 +4,11 @@ import numpy as np
logger = getLogger(__name__) logger = getLogger(__name__)
class ExpRunner(): class ExpRunner():
""" experiment runner """ experiment runner
""" """
def __init__(self): def __init__(self):
""" """
""" """
@ -46,6 +48,6 @@ class ExpRunner():
score += cost score += cost
step_count += 1 step_count += 1
logger.debug("Controller type = {}, Score = {}"\ logger.debug("Controller type = {}, Score = {}"
.format(controller, score)) .format(controller, score))
return np.array(history_x), np.array(history_u), np.array(history_g) return np.array(history_x), np.array(history_u), np.array(history_g)

View File

@ -71,6 +71,7 @@ There are 4 example environments, "FirstOrderLag", "TwoWheeledConst", "TwoWheele
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
| Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 | | Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 |
| Cartpole (Swing up) | x | ✓ | 4 | 1 | | Cartpole (Swing up) | x | ✓ | 4 | 1 |
| Nonlinear Sample System Env | x | ✓ | 2 | 1 |
All states and inputs of environments are continuous. All states and inputs of environments are continuous.
**It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.** **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -6,7 +6,8 @@ import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \ from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \
plot_multi_result plot_multi_result
def run(args): def run(args):
@ -39,7 +40,8 @@ def run(args):
ylabel="x") ylabel="x")
plot_multi_result(history_us, histories_g=np.zeros_like(history_us), plot_multi_result(history_us, histories_g=np.zeros_like(history_us),
labels=controllers, ylabel="u", name="input_history") labels=controllers, ylabel="u", name="input_history")
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -51,5 +53,6 @@ def main():
run(args) run(args)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -8,9 +8,10 @@ from PythonLinearNonlinearControl.models.make_models import make_model
from PythonLinearNonlinearControl.envs.make_envs import make_env from PythonLinearNonlinearControl.envs.make_envs import make_env
from PythonLinearNonlinearControl.runners.make_runners import make_runner from PythonLinearNonlinearControl.runners.make_runners import make_runner
from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \ from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \
save_plot_data save_plot_data
from PythonLinearNonlinearControl.plotters.animator import Animator from PythonLinearNonlinearControl.plotters.animator import Animator
def run(args): def run(args):
# logger # logger
make_logger(args.result_dir) make_logger(args.result_dir)
@ -44,17 +45,19 @@ def run(args):
animator = Animator(env, args=args) animator = Animator(env, args=args)
animator.draw(history_x, history_g) animator.draw(history_x, history_g)
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--controller_type", type=str, default="CEM") parser.add_argument("--controller_type", type=str, default="DDP")
parser.add_argument("--env", type=str, default="TwoWheeledTrack") parser.add_argument("--env", type=str, default="NonlinearSample")
parser.add_argument("--save_anim", type=bool_flag, default=1) parser.add_argument("--save_anim", type=bool_flag, default=0)
parser.add_argument("--result_dir", type=str, default="./result") parser.add_argument("--result_dir", type=str, default="./result")
args = parser.parse_args() args = parser.parse_args()
run(args) run(args)
if __name__ == "__main__": if __name__ == "__main__":
main() main()