Merge pull request #11 from Shunichi09/develop
Add nonlinear sample Env
This commit is contained in:
commit
0d443f7ed5
|
@ -6,6 +6,8 @@
|
|||
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
|
||||
| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 |
|
||||
| Cartpole (Swing up) | x | ✓ | 4 | 1 |
|
||||
| Nonlinear Sample System Env | x | ✓ | 2 | 1 |
|
||||
|
||||
|
||||
## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py)
|
||||
|
||||
|
@ -54,3 +56,13 @@ mc = 1, mp = 0.2, l = 0.5, g = 9.81
|
|||
### Cost.
|
||||
|
||||
<img src="assets/cartpole_score.png" width="300">
|
||||
|
||||
## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py)
|
||||
|
||||
## System equation.
|
||||
|
||||
<img src="assets/nonlinear_sample_system.png" width="400">
|
||||
|
||||
### Cost.
|
||||
|
||||
<img src="assets/nonlinear_sample_system_score.png" width="400">
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def rotate_pos(pos, angle):
|
||||
""" Transformation the coordinate in the angle
|
||||
|
||||
|
@ -14,6 +15,7 @@ def rotate_pos(pos, angle):
|
|||
|
||||
return np.dot(pos, rot_mat.T)
|
||||
|
||||
|
||||
def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
|
||||
""" Check angle range and correct the range
|
||||
|
||||
|
@ -42,3 +44,75 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
|
|||
|
||||
output = np.minimum(max_angle, np.maximum(min_angle, output))
|
||||
return output.reshape(output_shape)
|
||||
|
||||
|
||||
def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
|
||||
""" update state in Runge Kutta methods
|
||||
Args:
|
||||
state (array-like): state of system
|
||||
u (array-like): input of system
|
||||
functions (list): update function of each state,
|
||||
each function will be called like func(state, u)
|
||||
We expect that this function returns differential of each state
|
||||
dt (float): float in seconds
|
||||
batch (bool): state and u is given by batch or not
|
||||
|
||||
Returns:
|
||||
next_state (np.array): next state of system
|
||||
|
||||
Notes:
|
||||
sample of function is as follows:
|
||||
|
||||
def func_x(self, x_1, x_2, u):
|
||||
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
|
||||
return x_dot
|
||||
|
||||
Note that the function return x_dot.
|
||||
"""
|
||||
if not batch:
|
||||
state_size = len(state)
|
||||
assert state_size == len(functions), \
|
||||
"Invalid functions length, You need to give the state size functions"
|
||||
|
||||
k0 = np.zeros(state_size)
|
||||
k1 = np.zeros(state_size)
|
||||
k2 = np.zeros(state_size)
|
||||
k3 = np.zeros(state_size)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k0[i] = dt * func(state, u)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k1[i] = dt * func(state + k0 / 2., u)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k2[i] = dt * func(state + k1 / 2., u)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k3[i] = dt * func(state + k2, u)
|
||||
|
||||
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
|
||||
|
||||
else:
|
||||
batch_size, state_size = state.shape
|
||||
assert state_size == len(functions), \
|
||||
"Invalid functions length, You need to give the state size functions"
|
||||
|
||||
k0 = np.zeros((batch_size, state_size))
|
||||
k1 = np.zeros((batch_size, state_size))
|
||||
k2 = np.zeros((batch_size, state_size))
|
||||
k3 = np.zeros((batch_size, state_size))
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k0[:, i] = dt * func(state, u)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k1[:, i] = dt * func(state + k0 / 2., u)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k2[:, i] = dt * func(state + k1 / 2., u)
|
||||
|
||||
for i, func in enumerate(functions):
|
||||
k3[:, i] = dt * func(state + k2, u)
|
||||
|
||||
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class CartPoleConfigModule():
|
||||
# parameters
|
||||
ENV_NAME = "CartPole-v0"
|
||||
|
@ -12,7 +13,7 @@ class CartPoleConfigModule():
|
|||
DT = 0.02
|
||||
# cost parameters
|
||||
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
|
||||
# 1. is worked for iLQR
|
||||
# 1. is worked for iLQR
|
||||
TERMINAL_WEIGHT = 1.
|
||||
Q = None
|
||||
Sf = None
|
||||
|
@ -39,40 +40,40 @@ class CartPoleConfigModule():
|
|||
"num_elites": 50,
|
||||
"max_iters": 15,
|
||||
"alpha": 0.3,
|
||||
"init_var":9.,
|
||||
"threshold":0.001
|
||||
"init_var": 9.,
|
||||
"threshold": 0.001
|
||||
},
|
||||
"MPPI":{
|
||||
"beta" : 0.6,
|
||||
"MPPI": {
|
||||
"beta": 0.6,
|
||||
"popsize": 5000,
|
||||
"kappa": 0.9,
|
||||
"noise_sigma": 0.5,
|
||||
},
|
||||
"MPPIWilliams":{
|
||||
"MPPIWilliams": {
|
||||
"popsize": 5000,
|
||||
"lambda": 1.,
|
||||
"noise_sigma": 0.9,
|
||||
},
|
||||
"iLQR":{
|
||||
"iLQR": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"DDP":{
|
||||
},
|
||||
"DDP": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"NMPC-CGMRES":{
|
||||
},
|
||||
"NMPC-Newton":{
|
||||
},
|
||||
},
|
||||
"NMPC-CGMRES": {
|
||||
},
|
||||
"NMPC-Newton": {
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -103,21 +104,21 @@ class CartPoleConfigModule():
|
|||
"""
|
||||
|
||||
if len(x.shape) > 2:
|
||||
return (6. * (x[:, :, 0]**2) \
|
||||
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
|
||||
+ 0.1 * (x[:, :, 1]**2) \
|
||||
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
|
||||
return (6. * (x[:, :, 0]**2)
|
||||
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
|
||||
+ 0.1 * (x[:, :, 1]**2)
|
||||
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
|
||||
|
||||
elif len(x.shape) > 1:
|
||||
return (6. * (x[:, 0]**2) \
|
||||
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \
|
||||
+ 0.1 * (x[:, 1]**2) \
|
||||
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
|
||||
return (6. * (x[:, 0]**2)
|
||||
+ 12. * ((np.cos(x[:, 2]) + 1.)**2)
|
||||
+ 0.1 * (x[:, 1]**2)
|
||||
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
|
||||
|
||||
return 6. * (x[0]**2) \
|
||||
+ 12. * ((np.cos(x[2]) + 1.)**2) \
|
||||
+ 0.1 * (x[1]**2) \
|
||||
+ 0.1 * (x[3]**2)
|
||||
+ 12. * ((np.cos(x[2]) + 1.)**2) \
|
||||
+ 0.1 * (x[1]**2) \
|
||||
+ 0.1 * (x[3]**2)
|
||||
|
||||
@staticmethod
|
||||
def terminal_state_cost_fn(terminal_x, terminal_g_x):
|
||||
|
@ -134,18 +135,18 @@ class CartPoleConfigModule():
|
|||
"""
|
||||
|
||||
if len(terminal_x.shape) > 1:
|
||||
return (6. * (terminal_x[:, 0]**2) \
|
||||
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
|
||||
+ 0.1 * (terminal_x[:, 1]**2) \
|
||||
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
|
||||
* CartPoleConfigModule.TERMINAL_WEIGHT
|
||||
|
||||
return (6. * (terminal_x[0]**2) \
|
||||
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
|
||||
+ 0.1 * (terminal_x[1]**2) \
|
||||
+ 0.1 * (terminal_x[3]**2)) \
|
||||
return (6. * (terminal_x[:, 0]**2)
|
||||
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
|
||||
+ 0.1 * (terminal_x[:, 1]**2)
|
||||
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
|
||||
* CartPoleConfigModule.TERMINAL_WEIGHT
|
||||
|
||||
return (6. * (terminal_x[0]**2)
|
||||
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
|
||||
+ 0.1 * (terminal_x[1]**2)
|
||||
+ 0.1 * (terminal_x[3]**2)) \
|
||||
* CartPoleConfigModule.TERMINAL_WEIGHT
|
||||
|
||||
@staticmethod
|
||||
def gradient_cost_fn_with_state(x, g_x, terminal=False):
|
||||
""" gradient of costs with respect to the state
|
||||
|
@ -163,7 +164,7 @@ class CartPoleConfigModule():
|
|||
cost_dx1 = 0.2 * x[:, 1]
|
||||
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
|
||||
cost_dx3 = 0.2 * x[:, 3]
|
||||
cost_dx = np.stack((cost_dx0, cost_dx1,\
|
||||
cost_dx = np.stack((cost_dx0, cost_dx1,
|
||||
cost_dx2, cost_dx3), axis=1)
|
||||
return cost_dx
|
||||
|
||||
|
@ -206,9 +207,9 @@ class CartPoleConfigModule():
|
|||
hessian[:, 0, 0] = 12.
|
||||
hessian[:, 1, 1] = 0.2
|
||||
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
|
||||
* (-np.sin(x[:, 2])) \
|
||||
+ 24. * (1. + np.cos(x[:, 2])) \
|
||||
* -np.cos(x[:, 2])
|
||||
* (-np.sin(x[:, 2])) \
|
||||
+ 24. * (1. + np.cos(x[:, 2])) \
|
||||
* -np.cos(x[:, 2])
|
||||
hessian[:, 3, 3] = 0.2
|
||||
|
||||
return hessian
|
||||
|
@ -218,9 +219,9 @@ class CartPoleConfigModule():
|
|||
hessian[0, 0] = 12.
|
||||
hessian[1, 1] = 0.2
|
||||
hessian[2, 2] = 24. * -np.sin(x[2]) \
|
||||
* (-np.sin(x[2])) \
|
||||
+ 24. * (1. + np.cos(x[2])) \
|
||||
* -np.cos(x[2])
|
||||
* (-np.sin(x[2])) \
|
||||
+ 24. * (1. + np.cos(x[2])) \
|
||||
* -np.cos(x[2])
|
||||
hessian[3, 3] = 0.2
|
||||
|
||||
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class FirstOrderLagConfigModule():
|
||||
# parameters
|
||||
ENV_NAME = "FirstOrderLag-v0"
|
||||
|
@ -34,42 +35,42 @@ class FirstOrderLagConfigModule():
|
|||
"num_elites": 50,
|
||||
"max_iters": 15,
|
||||
"alpha": 0.3,
|
||||
"init_var":1.,
|
||||
"threshold":0.001
|
||||
"init_var": 1.,
|
||||
"threshold": 0.001
|
||||
},
|
||||
"MPPI":{
|
||||
"beta" : 0.6,
|
||||
"MPPI": {
|
||||
"beta": 0.6,
|
||||
"popsize": 5000,
|
||||
"kappa": 0.9,
|
||||
"noise_sigma": 0.5,
|
||||
},
|
||||
"MPPIWilliams":{
|
||||
"MPPIWilliams": {
|
||||
"popsize": 5000,
|
||||
"lambda": 1.,
|
||||
"noise_sigma": 0.9,
|
||||
},
|
||||
"MPC":{
|
||||
},
|
||||
"iLQR":{
|
||||
"MPC": {
|
||||
},
|
||||
"iLQR": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"DDP":{
|
||||
},
|
||||
"DDP": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"NMPC-CGMRES":{
|
||||
},
|
||||
"NMPC-Newton":{
|
||||
},
|
||||
},
|
||||
"NMPC-CGMRES": {
|
||||
},
|
||||
"NMPC-Newton": {
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -111,7 +112,7 @@ class FirstOrderLagConfigModule():
|
|||
shape(pop_size, pred_len)
|
||||
"""
|
||||
return ((terminal_x - terminal_g_x)**2) \
|
||||
* np.diag(FirstOrderLagConfigModule.Sf)
|
||||
* np.diag(FirstOrderLagConfigModule.Sf)
|
||||
|
||||
@staticmethod
|
||||
def gradient_cost_fn_with_state(x, g_x, terminal=False):
|
||||
|
@ -128,8 +129,8 @@ class FirstOrderLagConfigModule():
|
|||
if not terminal:
|
||||
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
|
||||
|
||||
return (2. * (x - g_x) \
|
||||
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
|
||||
return (2. * (x - g_x)
|
||||
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
|
||||
|
||||
@staticmethod
|
||||
def gradient_cost_fn_with_input(x, u):
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
from .first_order_lag import FirstOrderLagConfigModule
|
||||
from .two_wheeled import TwoWheeledConfigModule
|
||||
from .cartpole import CartPoleConfigModule
|
||||
from .nonlinear_sample_system import NonlinearSampleSystemConfigModule
|
||||
|
||||
|
||||
def make_config(args):
|
||||
"""
|
||||
|
@ -13,3 +15,5 @@ def make_config(args):
|
|||
return TwoWheeledConfigModule()
|
||||
elif args.env == "CartPole":
|
||||
return CartPoleConfigModule()
|
||||
elif args.env == "NonlinearSample":
|
||||
return NonlinearSampleSystemConfigModule()
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class NonlinearSampleSystemConfigModule():
|
||||
# parameters
|
||||
ENV_NAME = "NonlinearSampleSystem-v0"
|
||||
PLANNER_TYPE = "Const"
|
||||
TYPE = "Nonlinear"
|
||||
TASK_HORIZON = 2500
|
||||
PRED_LEN = 10
|
||||
STATE_SIZE = 2
|
||||
INPUT_SIZE = 1
|
||||
DT = 0.01
|
||||
R = np.diag([0.01])
|
||||
Q = None
|
||||
Sf = None
|
||||
# bounds
|
||||
INPUT_LOWER_BOUND = np.array([-0.5])
|
||||
INPUT_UPPER_BOUND = np.array([0.5])
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
# opt configs
|
||||
self.opt_config = {
|
||||
"Random": {
|
||||
"popsize": 5000
|
||||
},
|
||||
"CEM": {
|
||||
"popsize": 500,
|
||||
"num_elites": 50,
|
||||
"max_iters": 15,
|
||||
"alpha": 0.3,
|
||||
"init_var": 9.,
|
||||
"threshold": 0.001
|
||||
},
|
||||
"MPPI": {
|
||||
"beta": 0.6,
|
||||
"popsize": 5000,
|
||||
"kappa": 0.9,
|
||||
"noise_sigma": 0.5,
|
||||
},
|
||||
"MPPIWilliams": {
|
||||
"popsize": 5000,
|
||||
"lambda": 1.,
|
||||
"noise_sigma": 0.9,
|
||||
},
|
||||
"iLQR": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"DDP": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"NMPC-CGMRES": {
|
||||
},
|
||||
"NMPC-Newton": {
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def input_cost_fn(u):
|
||||
""" input cost functions
|
||||
|
||||
Args:
|
||||
u (numpy.ndarray): input, shape(pred_len, input_size)
|
||||
or shape(pop_size, pred_len, input_size)
|
||||
Returns:
|
||||
cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
|
||||
shape(pop_size, pred_len, input_size)
|
||||
"""
|
||||
return (u**2) * np.diag(NonlinearSampleSystemConfigModule.R)
|
||||
|
||||
@staticmethod
|
||||
def state_cost_fn(x, g_x):
|
||||
""" state cost function
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
or shape(pop_size, pred_len, state_size)
|
||||
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
||||
or shape(pop_size, pred_len, state_size)
|
||||
Returns:
|
||||
cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
|
||||
shape(pop_size, pred_len, 1)
|
||||
"""
|
||||
|
||||
if len(x.shape) > 2:
|
||||
return (0.5 * (x[:, :, 0]**2) +
|
||||
0.5 * (x[:, :, 1]**2))[:, :, np.newaxis]
|
||||
|
||||
elif len(x.shape) > 1:
|
||||
return (0.5 * (x[:, 0]**2) + 0.5 * (x[:, 1]**2))[:, np.newaxis]
|
||||
|
||||
return 0.5 * (x[0]**2) + 0.5 * (x[1]**2)
|
||||
|
||||
@ staticmethod
|
||||
def terminal_state_cost_fn(terminal_x, terminal_g_x):
|
||||
"""
|
||||
|
||||
Args:
|
||||
terminal_x (numpy.ndarray): terminal state,
|
||||
shape(state_size, ) or shape(pop_size, state_size)
|
||||
terminal_g_x (numpy.ndarray): terminal goal state,
|
||||
shape(state_size, ) or shape(pop_size, state_size)
|
||||
Returns:
|
||||
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
|
||||
shape(pop_size, pred_len)
|
||||
"""
|
||||
|
||||
if len(terminal_x.shape) > 1:
|
||||
return (0.5 * (terminal_x[:, 0]**2) +
|
||||
0.5 * (terminal_x[:, 1]**2))[:, np.newaxis]
|
||||
|
||||
return 0.5 * (terminal_x[0]**2) + 0.5 * (terminal_x[1]**2)
|
||||
|
||||
@ staticmethod
|
||||
def gradient_cost_fn_with_state(x, g_x, terminal=False):
|
||||
""" gradient of costs with respect to the state
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
||||
|
||||
Returns:
|
||||
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
|
||||
or shape(1, state_size)
|
||||
"""
|
||||
if not terminal:
|
||||
cost_dx0 = x[:, 0]
|
||||
cost_dx1 = x[:, 1]
|
||||
cost_dx = np.stack((cost_dx0, cost_dx1), axis=1)
|
||||
return cost_dx
|
||||
|
||||
cost_dx0 = x[0]
|
||||
cost_dx1 = x[1]
|
||||
cost_dx = np.array([[cost_dx0, cost_dx1]])
|
||||
|
||||
return cost_dx
|
||||
|
||||
@ staticmethod
|
||||
def gradient_cost_fn_with_input(x, u):
|
||||
""" gradient of costs with respect to the input
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
||||
Returns:
|
||||
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
|
||||
"""
|
||||
return 2. * u * np.diag(NonlinearSampleSystemConfigModule.R)
|
||||
|
||||
@ staticmethod
|
||||
def hessian_cost_fn_with_state(x, g_x, terminal=False):
|
||||
""" hessian costs with respect to the state
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
||||
Returns:
|
||||
l_xx (numpy.ndarray): gradient of cost,
|
||||
shape(pred_len, state_size, state_size) or
|
||||
shape(1, state_size, state_size) or
|
||||
"""
|
||||
if not terminal:
|
||||
(pred_len, state_size) = x.shape
|
||||
hessian = np.eye(state_size)
|
||||
hessian = np.tile(hessian, (pred_len, 1, 1))
|
||||
hessian[:, 0, 0] = 1.
|
||||
hessian[:, 1, 1] = 1.
|
||||
|
||||
return hessian
|
||||
|
||||
state_size = len(x)
|
||||
hessian = np.eye(state_size)
|
||||
hessian[0, 0] = 1.
|
||||
hessian[1, 1] = 1.
|
||||
|
||||
return hessian[np.newaxis, :, :]
|
||||
|
||||
@ staticmethod
|
||||
def hessian_cost_fn_with_input(x, u):
|
||||
""" hessian costs with respect to the input
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
||||
Returns:
|
||||
l_uu (numpy.ndarray): gradient of cost,
|
||||
shape(pred_len, input_size, input_size)
|
||||
"""
|
||||
(pred_len, _) = u.shape
|
||||
|
||||
return np.tile(NonlinearSampleSystemConfigModule.R, (pred_len, 1, 1))
|
||||
|
||||
@ staticmethod
|
||||
def hessian_cost_fn_with_input_state(x, u):
|
||||
""" hessian costs with respect to the state and input
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
||||
Returns:
|
||||
l_ux (numpy.ndarray): gradient of cost ,
|
||||
shape(pred_len, input_size, state_size)
|
||||
"""
|
||||
(_, state_size) = x.shape
|
||||
(pred_len, input_size) = u.shape
|
||||
|
||||
return np.zeros((pred_len, input_size, state_size))
|
|
@ -4,6 +4,7 @@ from matplotlib.axes import Axes
|
|||
from ..plotters.plot_objs import square_with_angle, square
|
||||
from ..common.utils import fit_angle_in_range
|
||||
|
||||
|
||||
class TwoWheeledConfigModule():
|
||||
# parameters
|
||||
ENV_NAME = "TwoWheeled-v0"
|
||||
|
@ -46,40 +47,40 @@ class TwoWheeledConfigModule():
|
|||
"num_elites": 50,
|
||||
"max_iters": 15,
|
||||
"alpha": 0.3,
|
||||
"init_var":1.,
|
||||
"threshold":0.001
|
||||
"init_var": 1.,
|
||||
"threshold": 0.001
|
||||
},
|
||||
"MPPI":{
|
||||
"beta" : 0.6,
|
||||
"MPPI": {
|
||||
"beta": 0.6,
|
||||
"popsize": 5000,
|
||||
"kappa": 0.9,
|
||||
"noise_sigma": 0.5,
|
||||
},
|
||||
"MPPIWilliams":{
|
||||
"MPPIWilliams": {
|
||||
"popsize": 5000,
|
||||
"lambda": 1,
|
||||
"noise_sigma": 1.,
|
||||
},
|
||||
"iLQR":{
|
||||
"iLQR": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"DDP":{
|
||||
},
|
||||
"DDP": {
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"NMPC-CGMRES":{
|
||||
},
|
||||
"NMPC-Newton":{
|
||||
},
|
||||
},
|
||||
"NMPC-CGMRES": {
|
||||
},
|
||||
"NMPC-Newton": {
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -142,8 +143,8 @@ class TwoWheeledConfigModule():
|
|||
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
|
||||
shape(pop_size, pred_len)
|
||||
"""
|
||||
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x \
|
||||
- terminal_g_x)
|
||||
terminal_diff = TwoWheeledConfigModule.fit_diff_in_range(terminal_x
|
||||
- terminal_g_x)
|
||||
|
||||
return ((terminal_diff)**2) * np.diag(TwoWheeledConfigModule.Sf)
|
||||
|
||||
|
@ -164,8 +165,8 @@ class TwoWheeledConfigModule():
|
|||
if not terminal:
|
||||
return 2. * (diff) * np.diag(TwoWheeledConfigModule.Q)
|
||||
|
||||
return (2. * (diff) \
|
||||
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
|
||||
return (2. * (diff)
|
||||
* np.diag(TwoWheeledConfigModule.Sf))[np.newaxis, :]
|
||||
|
||||
@staticmethod
|
||||
def gradient_cost_fn_with_input(x, u):
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class CEM(Controller):
|
||||
""" Cross Entropy Method for linear and nonlinear method
|
||||
|
||||
|
@ -19,6 +20,7 @@ class CEM(Controller):
|
|||
using probabilistic dynamics models.
|
||||
In Advances in Neural Information Processing Systems (pp. 4754-4765).
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
super(CEM, self).__init__(config, model)
|
||||
|
||||
|
@ -50,13 +52,13 @@ class CEM(Controller):
|
|||
self.input_cost_fn = config.input_cost_fn
|
||||
|
||||
# init mean
|
||||
self.init_mean = np.tile((config.INPUT_UPPER_BOUND \
|
||||
self.init_mean = np.tile((config.INPUT_UPPER_BOUND
|
||||
+ config.INPUT_LOWER_BOUND) / 2.,
|
||||
self.pred_len)
|
||||
self.prev_sol = self.init_mean.copy()
|
||||
# init variance
|
||||
var = np.ones_like(config.INPUT_UPPER_BOUND) \
|
||||
* config.opt_config["CEM"]["init_var"]
|
||||
* config.opt_config["CEM"]["init_var"]
|
||||
self.init_var = np.tile(var, self.pred_len)
|
||||
|
||||
# save
|
||||
|
@ -86,7 +88,7 @@ class CEM(Controller):
|
|||
|
||||
# make distribution
|
||||
X = stats.truncnorm(-1, 1,
|
||||
loc=np.zeros_like(mean),\
|
||||
loc=np.zeros_like(mean),
|
||||
scale=np.ones_like(mean))
|
||||
|
||||
while (opt_count < self.max_iters) and np.max(var) > self.epsilon:
|
||||
|
@ -99,15 +101,15 @@ class CEM(Controller):
|
|||
|
||||
# sample
|
||||
samples = X.rvs(size=[self.pop_size, self.opt_dim]) \
|
||||
* np.sqrt(constrained_var) \
|
||||
+ mean
|
||||
* np.sqrt(constrained_var) \
|
||||
+ mean
|
||||
|
||||
# calc cost
|
||||
# samples.shape = (pop_size, opt_dim)
|
||||
costs = self.calc_cost(curr_x,
|
||||
samples.reshape(self.pop_size,
|
||||
self.pred_len,
|
||||
self.input_size),
|
||||
self.pred_len,
|
||||
self.input_size),
|
||||
g_xs)
|
||||
|
||||
# sort cost
|
||||
|
|
|
@ -2,9 +2,11 @@ import numpy as np
|
|||
|
||||
from ..envs.cost import calc_cost
|
||||
|
||||
|
||||
class Controller():
|
||||
""" Controller class
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
"""
|
||||
"""
|
||||
|
@ -49,7 +51,7 @@ class Controller():
|
|||
|
||||
# get particle cost
|
||||
costs = calc_cost(pred_xs, samples, g_xs,
|
||||
self.state_cost_fn, self.input_cost_fn, \
|
||||
self.state_cost_fn, self.input_cost_fn,
|
||||
self.terminal_state_cost_fn)
|
||||
|
||||
return costs
|
||||
|
@ -58,10 +60,12 @@ class Controller():
|
|||
def gradient_hamiltonian_x(x, u, lam):
|
||||
""" gradient of hamitonian with respect to the state,
|
||||
"""
|
||||
raise NotImplementedError("Implement gradient of hamitonian with respect to the state")
|
||||
raise NotImplementedError(
|
||||
"Implement gradient of hamitonian with respect to the state")
|
||||
|
||||
@staticmethod
|
||||
def gradient_hamiltonian_u(x, u, lam):
|
||||
""" gradient of hamitonian with respect to the input
|
||||
"""
|
||||
raise NotImplementedError("Implement gradient of hamitonian with respect to the input")
|
||||
raise NotImplementedError(
|
||||
"Implement gradient of hamitonian with respect to the input")
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class DDP(Controller):
|
||||
""" Differential Dynamic Programming
|
||||
|
||||
|
@ -18,6 +19,7 @@ class DDP(Controller):
|
|||
https://github.com/studywolf/control, and
|
||||
https://github.com/anassinator/ilqr
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
"""
|
||||
"""
|
||||
|
@ -92,13 +94,13 @@ class DDP(Controller):
|
|||
# forward
|
||||
if update_sol == True:
|
||||
pred_xs, cost, f_x, f_u, f_xx, f_ux, f_uu,\
|
||||
l_x, l_xx, l_u, l_uu, l_ux = \
|
||||
l_x, l_xx, l_u, l_uu, l_ux = \
|
||||
self.forward(curr_x, g_xs, sol)
|
||||
update_sol = False
|
||||
|
||||
try:
|
||||
# backward
|
||||
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu, \
|
||||
k, K = self.backward(f_x, f_u, f_xx, f_ux, f_uu,
|
||||
l_x, l_xx, l_u, l_uu, l_ux)
|
||||
|
||||
# line search
|
||||
|
@ -139,7 +141,7 @@ class DDP(Controller):
|
|||
# increase regularization term.
|
||||
self.delta = max(1.0, self.delta) * self.init_delta
|
||||
self.mu = max(self.mu_min, self.mu * self.delta)
|
||||
logger.debug("Update regularization term to {}"\
|
||||
logger.debug("Update regularization term to {}"
|
||||
.format(self.mu))
|
||||
if self.mu >= self.mu_max:
|
||||
logger.debug("Reach Max regularization term")
|
||||
|
@ -183,8 +185,8 @@ class DDP(Controller):
|
|||
|
||||
for t in range(pred_len):
|
||||
new_sol[t] = sol[t] \
|
||||
+ alpha * k[t] \
|
||||
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
|
||||
+ alpha * k[t] \
|
||||
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
|
||||
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
|
||||
new_sol[t])
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class iLQR(Controller):
|
||||
""" iterative Liner Quadratique Regulator
|
||||
|
||||
|
@ -16,6 +17,7 @@ class iLQR(Controller):
|
|||
Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf,
|
||||
https://github.com/studywolf/control
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
"""
|
||||
"""
|
||||
|
@ -130,7 +132,7 @@ class iLQR(Controller):
|
|||
# increase regularization term.
|
||||
self.delta = max(1.0, self.delta) * self.init_delta
|
||||
self.mu = max(self.mu_min, self.mu * self.delta)
|
||||
logger.debug("Update regularization term to {}"\
|
||||
logger.debug("Update regularization term to {}"
|
||||
.format(self.mu))
|
||||
if self.mu >= self.mu_max:
|
||||
logger.debug("Reach Max regularization term")
|
||||
|
@ -174,8 +176,8 @@ class iLQR(Controller):
|
|||
|
||||
for t in range(pred_len):
|
||||
new_sol[t] = sol[t] \
|
||||
+ alpha * k[t] \
|
||||
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
|
||||
+ alpha * k[t] \
|
||||
+ np.dot(K[t], (new_pred_xs[t] - pred_xs[t]))
|
||||
new_pred_xs[t+1] = self.model.predict_next_state(new_pred_xs[t],
|
||||
new_sol[t])
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ from .mppi_williams import MPPIWilliams
|
|||
from .ilqr import iLQR
|
||||
from .ddp import DDP
|
||||
|
||||
|
||||
def make_controller(args, config, model):
|
||||
|
||||
if args.controller_type == "MPC":
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class LinearMPC(Controller):
|
||||
""" Model Predictive Controller for linear model
|
||||
|
||||
|
@ -21,6 +22,7 @@ class LinearMPC(Controller):
|
|||
Ref:
|
||||
Maciejowski, J. M. (2002). Predictive control: with constraints.
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
"""
|
||||
Args:
|
||||
|
@ -77,7 +79,7 @@ class LinearMPC(Controller):
|
|||
for _ in range(self.pred_len - 1):
|
||||
temp_mat = np.matmul(A_factorials[-1], self.A)
|
||||
self.phi_mat = np.vstack((self.phi_mat, temp_mat))
|
||||
A_factorials.append(temp_mat) # after we use this factorials
|
||||
A_factorials.append(temp_mat) # after we use this factorials
|
||||
|
||||
self.gamma_mat = self.B.copy()
|
||||
gammma_mat_temp = self.B.copy()
|
||||
|
@ -91,8 +93,8 @@ class LinearMPC(Controller):
|
|||
|
||||
for i in range(self.pred_len - 1):
|
||||
temp_mat = np.zeros_like(self.gamma_mat)
|
||||
temp_mat[int((i + 1)*self.state_size): , :] =\
|
||||
self.gamma_mat[:-int((i + 1)*self.state_size) , :]
|
||||
temp_mat[int((i + 1)*self.state_size):, :] =\
|
||||
self.gamma_mat[:-int((i + 1)*self.state_size), :]
|
||||
|
||||
self.theta_mat = np.hstack((self.theta_mat, temp_mat))
|
||||
|
||||
|
@ -114,7 +116,7 @@ class LinearMPC(Controller):
|
|||
|
||||
for i in range(self.pred_len - 1):
|
||||
for j in range(self.input_size):
|
||||
temp_F[j * 2: (j + 1) * 2,\
|
||||
temp_F[j * 2: (j + 1) * 2,
|
||||
((i+1) * self.input_size) + j] = np.array([1., -1.])
|
||||
self.F = np.vstack((self.F, temp_F))
|
||||
|
||||
|
@ -187,14 +189,14 @@ class LinearMPC(Controller):
|
|||
|
||||
# using cvxopt
|
||||
def optimized_func(dt_us):
|
||||
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1))) \
|
||||
return (np.dot(dt_us, np.dot(H, dt_us.reshape(-1, 1)))
|
||||
- np.dot(G.T, dt_us.reshape(-1, 1)))[0]
|
||||
|
||||
# constraint
|
||||
lb = np.array([-np.inf for _ in range(len(ub))]) # one side cons
|
||||
cons = LinearConstraint(A, lb, ub)
|
||||
# solve
|
||||
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),\
|
||||
opt_sol = minimize(optimized_func, self.prev_sol.flatten(),
|
||||
constraints=[cons])
|
||||
opt_dt_us = opt_sol.x
|
||||
|
||||
|
@ -213,7 +215,7 @@ class LinearMPC(Controller):
|
|||
"""
|
||||
|
||||
# to dt form
|
||||
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,\
|
||||
opt_dt_u_seq = np.cumsum(opt_dt_us.reshape(self.pred_len,
|
||||
self.input_size),
|
||||
axis=0)
|
||||
self.prev_sol = opt_dt_u_seq.copy()
|
||||
|
@ -226,8 +228,8 @@ class LinearMPC(Controller):
|
|||
# check costs
|
||||
costs = self.calc_cost(curr_x,
|
||||
opt_u_seq.reshape(1,
|
||||
self.pred_len,
|
||||
self.input_size),
|
||||
self.pred_len,
|
||||
self.input_size),
|
||||
g_xs)
|
||||
|
||||
logger.debug("Cost = {}".format(costs))
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class MPPI(Controller):
|
||||
""" Model Predictive Path Integral for linear and nonlinear method
|
||||
|
||||
|
@ -18,6 +19,7 @@ class MPPI(Controller):
|
|||
Deep Dynamics Models for Learning Dexterous Manipulation.
|
||||
arXiv preprint arXiv:1909.11652.
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
super(MPPI, self).__init__(config, model)
|
||||
|
||||
|
@ -47,7 +49,7 @@ class MPPI(Controller):
|
|||
self.input_cost_fn = config.input_cost_fn
|
||||
|
||||
# init mean
|
||||
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
|
||||
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
|
||||
+ config.INPUT_LOWER_BOUND) / 2.,
|
||||
self.pred_len)
|
||||
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
|
||||
|
@ -74,23 +76,23 @@ class MPPI(Controller):
|
|||
"""
|
||||
# get noised inputs
|
||||
noise = np.random.normal(
|
||||
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
|
||||
self.input_size)) * self.noise_sigma
|
||||
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
|
||||
self.input_size)) * self.noise_sigma
|
||||
noised_inputs = noise.copy()
|
||||
|
||||
for t in range(self.pred_len):
|
||||
if t > 0:
|
||||
noised_inputs[:, t, :] = self.beta \
|
||||
* (self.prev_sol[t, :] \
|
||||
+ noise[:, t, :]) \
|
||||
+ (1 - self.beta) \
|
||||
* noised_inputs[:, t-1, :]
|
||||
* (self.prev_sol[t, :]
|
||||
+ noise[:, t, :]) \
|
||||
+ (1 - self.beta) \
|
||||
* noised_inputs[:, t-1, :]
|
||||
else:
|
||||
noised_inputs[:, t, :] = self.beta \
|
||||
* (self.prev_sol[t, :] \
|
||||
+ noise[:, t, :]) \
|
||||
+ (1 - self.beta) \
|
||||
* self.history_u[-1]
|
||||
* (self.prev_sol[t, :]
|
||||
+ noise[:, t, :]) \
|
||||
+ (1 - self.beta) \
|
||||
* self.history_u[-1]
|
||||
|
||||
# clip actions
|
||||
noised_inputs = np.clip(
|
||||
|
@ -108,7 +110,7 @@ class MPPI(Controller):
|
|||
|
||||
# weight actions
|
||||
weighted_inputs = exp_rewards[:, np.newaxis, np.newaxis] \
|
||||
* noised_inputs
|
||||
* noised_inputs
|
||||
sol = np.sum(weighted_inputs, 0) / denom
|
||||
|
||||
# update
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class MPPIWilliams(Controller):
|
||||
""" Model Predictive Path Integral for linear and nonlinear method
|
||||
|
||||
|
@ -19,6 +20,7 @@ class MPPIWilliams(Controller):
|
|||
2017 IEEE International Conference on Robotics and Automation (ICRA),
|
||||
Singapore, 2017, pp. 1714-1721.
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
super(MPPIWilliams, self).__init__(config, model)
|
||||
|
||||
|
@ -47,7 +49,7 @@ class MPPIWilliams(Controller):
|
|||
self.input_cost_fn = config.input_cost_fn
|
||||
|
||||
# init mean
|
||||
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
|
||||
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND
|
||||
+ config.INPUT_LOWER_BOUND) / 2.,
|
||||
self.pred_len)
|
||||
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
|
||||
|
@ -85,7 +87,7 @@ class MPPIWilliams(Controller):
|
|||
|
||||
# get particle cost
|
||||
costs = calc_cost(pred_xs, samples, g_xs,
|
||||
self.state_cost_fn, None, \
|
||||
self.state_cost_fn, None,
|
||||
self.terminal_state_cost_fn)
|
||||
|
||||
return costs
|
||||
|
@ -101,8 +103,8 @@ class MPPIWilliams(Controller):
|
|||
"""
|
||||
# get noised inputs
|
||||
noise = np.random.normal(
|
||||
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
|
||||
self.input_size)) * self.noise_sigma
|
||||
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
|
||||
self.input_size)) * self.noise_sigma
|
||||
|
||||
noised_inputs = self.prev_sol + noise
|
||||
|
||||
|
@ -120,7 +122,7 @@ class MPPIWilliams(Controller):
|
|||
# mppi update
|
||||
beta = np.min(costs)
|
||||
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
|
||||
+ 1e-10
|
||||
+ 1e-10
|
||||
|
||||
# weight
|
||||
# eta.shape = (pred_len, input_size)
|
||||
|
@ -128,7 +130,7 @@ class MPPIWilliams(Controller):
|
|||
|
||||
# update inputs
|
||||
sol = self.prev_sol \
|
||||
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
|
||||
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
|
||||
|
||||
# update
|
||||
self.prev_sol[:-1] = sol[1:]
|
||||
|
|
|
@ -8,6 +8,7 @@ from ..envs.cost import calc_cost
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class RandomShooting(Controller):
|
||||
""" Random Shooting Method for linear and nonlinear method
|
||||
|
||||
|
@ -19,6 +20,7 @@ class RandomShooting(Controller):
|
|||
using probabilistic dynamics models.
|
||||
In Advances in Neural Information Processing Systems (pp. 4754-4765).
|
||||
"""
|
||||
|
||||
def __init__(self, config, model):
|
||||
super(RandomShooting, self).__init__(config, model)
|
||||
|
||||
|
@ -65,8 +67,8 @@ class RandomShooting(Controller):
|
|||
# calc cost
|
||||
costs = self.calc_cost(curr_x,
|
||||
samples.reshape(self.pop_size,
|
||||
self.pred_len,
|
||||
self.input_size),
|
||||
self.pred_len,
|
||||
self.input_size),
|
||||
g_xs)
|
||||
# solution
|
||||
sol = samples[np.argmin(costs)]
|
||||
|
|
|
@ -4,6 +4,7 @@ from matplotlib.axes import Axes
|
|||
from .env import Env
|
||||
from ..plotters.plot_objs import square
|
||||
|
||||
|
||||
class CartPoleEnv(Env):
|
||||
""" Cartpole Environment
|
||||
|
||||
|
@ -13,13 +14,14 @@ class CartPoleEnv(Env):
|
|||
6-832-underactuated-robotics-spring-2009/readings/
|
||||
MIT6_832s09_read_ch03.pdf
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
self.config = {"state_size" : 4,
|
||||
"input_size" : 1,
|
||||
"dt" : 0.02,
|
||||
"max_step" : 500,
|
||||
self.config = {"state_size": 4,
|
||||
"input_size": 1,
|
||||
"dt": 0.02,
|
||||
"max_step": 500,
|
||||
"input_lower_bound": [-3.],
|
||||
"input_upper_bound": [3.],
|
||||
"mp": 0.2,
|
||||
|
@ -76,33 +78,33 @@ class CartPoleEnv(Env):
|
|||
# x
|
||||
d_x0 = self.curr_x[1]
|
||||
# v_x
|
||||
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \
|
||||
* (self.config["l"] * (self.curr_x[3]**2) \
|
||||
+ self.config["g"] * np.cos(self.curr_x[2]))) \
|
||||
/ (self.config["mc"] + self.config["mp"] \
|
||||
* (np.sin(self.curr_x[2])**2))
|
||||
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2])
|
||||
* (self.config["l"] * (self.curr_x[3]**2)
|
||||
+ self.config["g"] * np.cos(self.curr_x[2]))) \
|
||||
/ (self.config["mc"] + self.config["mp"]
|
||||
* (np.sin(self.curr_x[2])**2))
|
||||
# theta
|
||||
d_x2 = self.curr_x[3]
|
||||
|
||||
# v_theta
|
||||
d_x3 = (-u[0] * np.cos(self.curr_x[2]) \
|
||||
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \
|
||||
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \
|
||||
- (self.config["mc"] + self.config["mp"]) * self.config["g"] \
|
||||
* np.sin(self.curr_x[2])) \
|
||||
/ (self.config["l"] * (self.config["mc"] + self.config["mp"] \
|
||||
* (np.sin(self.curr_x[2])**2)))
|
||||
d_x3 = (-u[0] * np.cos(self.curr_x[2])
|
||||
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2)
|
||||
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2])
|
||||
- (self.config["mc"] + self.config["mp"]) * self.config["g"]
|
||||
* np.sin(self.curr_x[2])) \
|
||||
/ (self.config["l"] * (self.config["mc"] + self.config["mp"]
|
||||
* (np.sin(self.curr_x[2])**2)))
|
||||
|
||||
next_x = self.curr_x +\
|
||||
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
|
||||
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
|
||||
|
||||
# TODO: costs
|
||||
costs = 0.
|
||||
costs += 0.1 * np.sum(u**2)
|
||||
costs += 6. * self.curr_x[0]**2 \
|
||||
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
|
||||
+ 0.1 * self.curr_x[1]**2 \
|
||||
+ 0.1 * self.curr_x[3]**2
|
||||
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
|
||||
+ 0.1 * self.curr_x[1]**2 \
|
||||
+ 0.1 * self.curr_x[3]**2
|
||||
|
||||
# save history
|
||||
self.history_x.append(next_x.flatten())
|
||||
|
@ -114,8 +116,8 @@ class CartPoleEnv(Env):
|
|||
self.step_count += 1
|
||||
|
||||
return next_x.flatten(), costs, \
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state" : self.g_x}
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state": self.g_x}
|
||||
|
||||
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
|
||||
""" plot cartpole object function
|
||||
|
@ -134,10 +136,10 @@ class CartPoleEnv(Env):
|
|||
|
||||
imgs["cart"] = to_plot.plot([], [], c="k")[0]
|
||||
imgs["pole"] = to_plot.plot([], [], c="k", linewidth=5)[0]
|
||||
imgs["center"] = to_plot.plot([], [], marker="o", c="k",\
|
||||
imgs["center"] = to_plot.plot([], [], marker="o", c="k",
|
||||
markersize=10)[0]
|
||||
# centerline
|
||||
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),\
|
||||
to_plot.plot(np.linspace(-1., 1., num=50), np.zeros(50),
|
||||
c="k", linestyle="dashed")
|
||||
|
||||
# set axis
|
||||
|
@ -166,13 +168,13 @@ class CartPoleEnv(Env):
|
|||
pole_y (numpy.ndarray): y data of pole
|
||||
"""
|
||||
# cart
|
||||
cart_x, cart_y = square(curr_x[0], 0.,\
|
||||
cart_x, cart_y = square(curr_x[0], 0.,
|
||||
self.config["cart_size"], 0.)
|
||||
|
||||
# pole
|
||||
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"] \
|
||||
* np.cos(curr_x[2]-np.pi/2)])
|
||||
pole_y = np.array([0., self.config["l"] \
|
||||
* np.sin(curr_x[2]-np.pi/2)])
|
||||
pole_x = np.array([curr_x[0], curr_x[0] + self.config["l"]
|
||||
* np.cos(curr_x[2]-np.pi/2)])
|
||||
pole_y = np.array([0., self.config["l"]
|
||||
* np.sin(curr_x[2]-np.pi/2)])
|
||||
|
||||
return cart_x, cart_y, pole_x, pole_y
|
|
@ -4,6 +4,7 @@ import numpy as np
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def calc_cost(pred_xs, input_sample, g_xs,
|
||||
state_cost_fn, input_cost_fn, terminal_state_cost_fn):
|
||||
""" calculate the cost
|
||||
|
@ -24,14 +25,15 @@ def calc_cost(pred_xs, input_sample, g_xs,
|
|||
# state cost
|
||||
state_cost = 0.
|
||||
if state_cost_fn is not None:
|
||||
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
|
||||
state_pred_par_cost = state_cost_fn(
|
||||
pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
|
||||
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
|
||||
|
||||
# terminal cost
|
||||
terminal_state_cost = 0.
|
||||
if terminal_state_cost_fn is not None:
|
||||
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
|
||||
g_xs[:, -1, :])
|
||||
g_xs[:, -1, :])
|
||||
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
|
||||
|
||||
# act cost
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class Env():
|
||||
""" Environments class
|
||||
Attributes:
|
||||
|
@ -8,6 +9,7 @@ class Env():
|
|||
history_x (list[numpy.ndarray]): historty of state, shape(step_count*state_size)
|
||||
step_count (int): step count
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
|
|
|
@ -3,17 +3,19 @@ import scipy
|
|||
from scipy import integrate
|
||||
from .env import Env
|
||||
|
||||
|
||||
class FirstOrderLagEnv(Env):
|
||||
""" First Order Lag System Env
|
||||
"""
|
||||
|
||||
def __init__(self, tau=0.63):
|
||||
"""
|
||||
"""
|
||||
self.config = {"state_size" : 4,\
|
||||
"input_size" : 2,\
|
||||
"dt" : 0.05,\
|
||||
"max_step" : 500,\
|
||||
"input_lower_bound": [-0.5, -0.5],\
|
||||
self.config = {"state_size": 4,
|
||||
"input_size": 2,
|
||||
"dt": 0.05,
|
||||
"max_step": 500,
|
||||
"input_lower_bound": [-0.5, -0.5],
|
||||
"input_upper_bound": [0.5, 0.5],
|
||||
}
|
||||
|
||||
|
@ -34,13 +36,13 @@ class FirstOrderLagEnv(Env):
|
|||
"""
|
||||
# continuous
|
||||
Ac = np.array([[-1./tau, 0., 0., 0.],
|
||||
[0., -1./tau, 0., 0.],
|
||||
[1., 0., 0., 0.],
|
||||
[0., 1., 0., 0.]])
|
||||
[0., -1./tau, 0., 0.],
|
||||
[1., 0., 0., 0.],
|
||||
[0., 1., 0., 0.]])
|
||||
Bc = np.array([[1./tau, 0.],
|
||||
[0., 1./tau],
|
||||
[0., 0.],
|
||||
[0., 0.]])
|
||||
[0., 1./tau],
|
||||
[0., 0.],
|
||||
[0., 0.]])
|
||||
# to discrete system
|
||||
A = scipy.linalg.expm(dt*Ac)
|
||||
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt) -
|
||||
|
@ -94,7 +96,7 @@ class FirstOrderLagEnv(Env):
|
|||
self.config["input_upper_bound"])
|
||||
|
||||
next_x = np.matmul(self.A, self.curr_x[:, np.newaxis]) \
|
||||
+ np.matmul(self.B, u[:, np.newaxis])
|
||||
+ np.matmul(self.B, u[:, np.newaxis])
|
||||
|
||||
# cost
|
||||
cost = 0
|
||||
|
@ -111,8 +113,8 @@ class FirstOrderLagEnv(Env):
|
|||
self.step_count += 1
|
||||
|
||||
return next_x.flatten(), cost, \
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state" : self.g_x}
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state": self.g_x}
|
||||
|
||||
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
|
||||
"""
|
||||
|
|
|
@ -2,6 +2,8 @@ from .first_order_lag import FirstOrderLagEnv
|
|||
from .two_wheeled import TwoWheeledConstEnv
|
||||
from .two_wheeled import TwoWheeledTrackEnv
|
||||
from .cartpole import CartPoleEnv
|
||||
from .nonlinear_sample_system import NonlinearSampleSystemEnv
|
||||
|
||||
|
||||
def make_env(args):
|
||||
|
||||
|
@ -13,5 +15,7 @@ def make_env(args):
|
|||
return TwoWheeledTrackEnv()
|
||||
elif args.env == "CartPole":
|
||||
return CartPoleEnv()
|
||||
elif args.env == "NonlinearSample":
|
||||
return NonlinearSampleSystemEnv()
|
||||
|
||||
raise NotImplementedError("There is not {} Env".format(args.env))
|
|
@ -0,0 +1,97 @@
|
|||
import numpy as np
|
||||
import scipy
|
||||
from scipy import integrate
|
||||
from .env import Env
|
||||
from ..common.utils import update_state_with_Runge_Kutta
|
||||
|
||||
|
||||
class NonlinearSampleSystemEnv(Env):
|
||||
""" Nonlinear Sample Env
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
self.config = {"state_size": 2,
|
||||
"input_size": 1,
|
||||
"dt": 0.01,
|
||||
"max_step": 2000,
|
||||
"input_lower_bound": [-0.5],
|
||||
"input_upper_bound": [0.5],
|
||||
}
|
||||
|
||||
super(NonlinearSampleSystemEnv, self).__init__(self.config)
|
||||
|
||||
def reset(self, init_x=np.array([2., 0.])):
|
||||
""" reset state
|
||||
Returns:
|
||||
init_x (numpy.ndarray): initial state, shape(state_size, )
|
||||
info (dict): information
|
||||
"""
|
||||
self.step_count = 0
|
||||
|
||||
self.curr_x = np.zeros(self.config["state_size"])
|
||||
|
||||
if init_x is not None:
|
||||
self.curr_x = init_x
|
||||
|
||||
# goal
|
||||
self.g_x = np.array([0., 0.])
|
||||
|
||||
# clear memory
|
||||
self.history_x = []
|
||||
self.history_g_x = []
|
||||
|
||||
return self.curr_x, {"goal_state": self.g_x}
|
||||
|
||||
def step(self, u):
|
||||
"""
|
||||
Args:
|
||||
u (numpy.ndarray) : input, shape(input_size, )
|
||||
Returns:
|
||||
next_x (numpy.ndarray): next state, shape(state_size, )
|
||||
cost (float): costs
|
||||
done (bool): end the simulation or not
|
||||
info (dict): information
|
||||
"""
|
||||
# clip action
|
||||
u = np.clip(u,
|
||||
self.config["input_lower_bound"],
|
||||
self.config["input_upper_bound"])
|
||||
|
||||
functions = [self._func_x_1, self._func_x_2]
|
||||
|
||||
next_x = update_state_with_Runge_Kutta(self.curr_x, u,
|
||||
functions, self.config["dt"],
|
||||
batch=False)
|
||||
|
||||
# cost
|
||||
cost = 0
|
||||
cost = np.sum(u**2)
|
||||
cost += np.sum((self.curr_x - self.g_x)**2)
|
||||
|
||||
# save history
|
||||
self.history_x.append(next_x.flatten())
|
||||
self.history_g_x.append(self.g_x.flatten())
|
||||
|
||||
# update
|
||||
self.curr_x = next_x.flatten()
|
||||
# update costs
|
||||
self.step_count += 1
|
||||
|
||||
return next_x.flatten(), cost, \
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state": self.g_x}
|
||||
|
||||
def _func_x_1(self, x, u):
|
||||
x_dot = x[1]
|
||||
return x_dot
|
||||
|
||||
def _func_x_2(self, x, u):
|
||||
x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
|
||||
return x_dot
|
||||
|
||||
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
|
||||
"""
|
||||
"""
|
||||
raise ValueError("NonlinearSampleSystemEnv does not have animation")
|
|
@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
|
|||
from .env import Env
|
||||
from ..plotters.plot_objs import circle_with_angle, square, circle
|
||||
|
||||
|
||||
def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
|
||||
""" step two wheeled enviroment
|
||||
|
||||
|
@ -28,19 +29,21 @@ def step_two_wheeled_env(curr_x, u, dt, method="Oylar"):
|
|||
|
||||
return next_x
|
||||
|
||||
|
||||
class TwoWheeledConstEnv(Env):
|
||||
""" Two wheeled robot with constant goal Env
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
self.config = {"state_size" : 3,\
|
||||
"input_size" : 2,\
|
||||
"dt" : 0.01,\
|
||||
"max_step" : 500,\
|
||||
"input_lower_bound": (-1.5, -3.14),\
|
||||
"input_upper_bound": (1.5, 3.14),\
|
||||
"car_size": 0.2,\
|
||||
self.config = {"state_size": 3,
|
||||
"input_size": 2,
|
||||
"dt": 0.01,
|
||||
"max_step": 500,
|
||||
"input_lower_bound": (-1.5, -3.14),
|
||||
"input_upper_bound": (1.5, 3.14),
|
||||
"car_size": 0.2,
|
||||
"wheel_size": (0.075, 0.015)
|
||||
}
|
||||
|
||||
|
@ -103,8 +106,8 @@ class TwoWheeledConstEnv(Env):
|
|||
self.step_count += 1
|
||||
|
||||
return next_x.flatten(), costs, \
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state" : self.g_x}
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state": self.g_x}
|
||||
|
||||
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
|
||||
""" plot cartpole object function
|
||||
|
@ -139,7 +142,7 @@ class TwoWheeledConstEnv(Env):
|
|||
# set imgs
|
||||
# car imgs
|
||||
car_x, car_y, car_angle_x, car_angle_y, \
|
||||
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
|
||||
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
|
||||
self._plot_car(history_x[i])
|
||||
|
||||
to_plot["car"].set_data(car_x, car_y)
|
||||
|
@ -160,46 +163,48 @@ class TwoWheeledConstEnv(Env):
|
|||
self.config["car_size"], curr_x[2])
|
||||
|
||||
# left tire
|
||||
center_x = (self.config["car_size"] \
|
||||
center_x = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"] \
|
||||
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
|
||||
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
|
||||
|
||||
left_tire_x, left_tire_y = \
|
||||
square(center_x, center_y,
|
||||
self.config["wheel_size"], curr_x[2])
|
||||
|
||||
# right tire
|
||||
center_x = (self.config["car_size"] \
|
||||
center_x = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"] \
|
||||
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
|
||||
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
|
||||
|
||||
right_tire_x, right_tire_y = \
|
||||
square(center_x, center_y,
|
||||
self.config["wheel_size"], curr_x[2])
|
||||
|
||||
return car_x, car_y, car_angle_x, car_angle_y,\
|
||||
left_tire_x, left_tire_y,\
|
||||
right_tire_x, right_tire_y
|
||||
left_tire_x, left_tire_y,\
|
||||
right_tire_x, right_tire_y
|
||||
|
||||
|
||||
class TwoWheeledTrackEnv(Env):
|
||||
""" Two wheeled robot with constant goal Env
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
self.config = {"state_size" : 3,\
|
||||
"input_size" : 2,\
|
||||
"dt" : 0.01,\
|
||||
"max_step" : 1000,\
|
||||
"input_lower_bound": (-1.5, -3.14),\
|
||||
"input_upper_bound": (1.5, 3.14),\
|
||||
"car_size": 0.2,\
|
||||
self.config = {"state_size": 3,
|
||||
"input_size": 2,
|
||||
"dt": 0.01,
|
||||
"max_step": 1000,
|
||||
"input_lower_bound": (-1.5, -3.14),
|
||||
"input_upper_bound": (1.5, 3.14),
|
||||
"car_size": 0.2,
|
||||
"wheel_size": (0.075, 0.015)
|
||||
}
|
||||
|
||||
|
@ -220,12 +225,12 @@ class TwoWheeledTrackEnv(Env):
|
|||
|
||||
# circle
|
||||
circle_1_x, circle_1_y = circle(linelength/2., circle_radius,
|
||||
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
|
||||
circle_1 = np.stack((circle_1_x , circle_1_y), axis=1)
|
||||
circle_radius, start=-np.pi/2., end=np.pi/2., n_point=50)
|
||||
circle_1 = np.stack((circle_1_x, circle_1_y), axis=1)
|
||||
|
||||
circle_2_x, circle_2_y = circle(-linelength/2., circle_radius,
|
||||
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
|
||||
circle_2 = np.stack((circle_2_x , circle_2_y), axis=1)
|
||||
circle_radius, start=np.pi/2., end=3*np.pi/2., n_point=50)
|
||||
circle_2 = np.stack((circle_2_x, circle_2_y), axis=1)
|
||||
|
||||
road_pos = np.concatenate((line_1, circle_1, line_2, circle_2), axis=0)
|
||||
|
||||
|
@ -293,8 +298,8 @@ class TwoWheeledTrackEnv(Env):
|
|||
self.step_count += 1
|
||||
|
||||
return next_x.flatten(), costs, \
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state" : self.g_traj}
|
||||
self.step_count > self.config["max_step"], \
|
||||
{"goal_state": self.g_traj}
|
||||
|
||||
def plot_func(self, to_plot, i=None, history_x=None, history_g_x=None):
|
||||
""" plot cartpole object function
|
||||
|
@ -333,7 +338,7 @@ class TwoWheeledTrackEnv(Env):
|
|||
# set imgs
|
||||
# car imgs
|
||||
car_x, car_y, car_angle_x, car_angle_y, \
|
||||
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
|
||||
left_tire_x, left_tire_y, right_tire_x, right_tire_y = \
|
||||
self._plot_car(history_x[i])
|
||||
|
||||
to_plot["car"].set_data(car_x, car_y)
|
||||
|
@ -354,29 +359,29 @@ class TwoWheeledTrackEnv(Env):
|
|||
self.config["car_size"], curr_x[2])
|
||||
|
||||
# left tire
|
||||
center_x = (self.config["car_size"] \
|
||||
center_x = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"] \
|
||||
* np.cos(curr_x[2]-np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
|
||||
* np.sin(curr_x[2]-np.pi/2.) + curr_x[1]
|
||||
|
||||
left_tire_x, left_tire_y = \
|
||||
square(center_x, center_y,
|
||||
self.config["wheel_size"], curr_x[2])
|
||||
|
||||
# right tire
|
||||
center_x = (self.config["car_size"] \
|
||||
center_x = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"] \
|
||||
* np.cos(curr_x[2]+np.pi/2.) + curr_x[0]
|
||||
center_y = (self.config["car_size"]
|
||||
+ self.config["wheel_size"][1]) \
|
||||
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
|
||||
* np.sin(curr_x[2]+np.pi/2.) + curr_x[1]
|
||||
|
||||
right_tire_x, right_tire_y = \
|
||||
square(center_x, center_y,
|
||||
self.config["wheel_size"], curr_x[2])
|
||||
|
||||
return car_x, car_y, car_angle_x, car_angle_y,\
|
||||
left_tire_x, left_tire_y,\
|
||||
right_tire_x, right_tire_y
|
||||
left_tire_x, left_tire_y,\
|
||||
right_tire_x, right_tire_y
|
||||
|
|
|
@ -7,6 +7,7 @@ import six
|
|||
import pickle
|
||||
from logging import DEBUG, basicConfig, getLogger, FileHandler, StreamHandler, Formatter, Logger
|
||||
|
||||
|
||||
def make_logger(save_dir):
|
||||
"""
|
||||
Args:
|
||||
|
@ -33,6 +34,7 @@ def make_logger(save_dir):
|
|||
# sh_handler = StreamHandler()
|
||||
# logger.addHandler(sh_handler)
|
||||
|
||||
|
||||
def int_tuple(s):
|
||||
""" transform str to tuple
|
||||
Args:
|
||||
|
@ -42,6 +44,7 @@ def int_tuple(s):
|
|||
"""
|
||||
return tuple(int(i) for i in s.split(','))
|
||||
|
||||
|
||||
def bool_flag(s):
|
||||
""" transform str to bool flg
|
||||
Args:
|
||||
|
@ -54,6 +57,7 @@ def bool_flag(s):
|
|||
msg = 'Invalid value "%s" for bool flag (should be 0 or 1)'
|
||||
raise ValueError(msg % s)
|
||||
|
||||
|
||||
def file_exists(path):
|
||||
""" Check file existence on given path
|
||||
Args:
|
||||
|
@ -63,6 +67,7 @@ def file_exists(path):
|
|||
"""
|
||||
return os.path.exists(path)
|
||||
|
||||
|
||||
def create_dir_if_not_exist(outdir):
|
||||
""" Check directory existence and creates new directory if not exist
|
||||
Args:
|
||||
|
@ -77,6 +82,7 @@ def create_dir_if_not_exist(outdir):
|
|||
return
|
||||
os.makedirs(outdir)
|
||||
|
||||
|
||||
def write_text_to_file(file_path, data):
|
||||
""" Write given text data to file
|
||||
Args:
|
||||
|
@ -86,6 +92,7 @@ def write_text_to_file(file_path, data):
|
|||
with open(file_path, 'w') as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
def read_text_from_file(file_path):
|
||||
""" Read given file as text
|
||||
Args:
|
||||
|
@ -96,6 +103,7 @@ def read_text_from_file(file_path):
|
|||
with open(file_path, 'r') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def save_pickle(file_path, data):
|
||||
""" pickle given data to file
|
||||
Args:
|
||||
|
@ -105,6 +113,7 @@ def save_pickle(file_path, data):
|
|||
with open(file_path, 'wb') as f:
|
||||
pickle.dump(data, f)
|
||||
|
||||
|
||||
def load_pickle(file_path):
|
||||
""" load pickled data from file
|
||||
Args:
|
||||
|
@ -118,6 +127,7 @@ def load_pickle(file_path):
|
|||
else:
|
||||
return pickle.load(f, encoding='bytes')
|
||||
|
||||
|
||||
def prepare_output_dir(base_dir, args, time_format='%Y-%m-%d-%H%M%S'):
|
||||
""" prepare a directory with current datetime as name.
|
||||
created directory contains the command and args when the script was called as text file.
|
||||
|
|
|
@ -2,9 +2,11 @@ import numpy as np
|
|||
|
||||
from .model import Model
|
||||
|
||||
|
||||
class CartPoleModel(Model):
|
||||
""" cartpole model
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
|
@ -31,21 +33,21 @@ class CartPoleModel(Model):
|
|||
# x
|
||||
d_x0 = curr_x[1]
|
||||
# v_x
|
||||
d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \
|
||||
* (self.l * (curr_x[3]**2) \
|
||||
+ self.g * np.cos(curr_x[2]))) \
|
||||
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
|
||||
d_x1 = (u[0] + self.mp * np.sin(curr_x[2])
|
||||
* (self.l * (curr_x[3]**2)
|
||||
+ self.g * np.cos(curr_x[2]))) \
|
||||
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
|
||||
# theta
|
||||
d_x2 = curr_x[3]
|
||||
# v_theta
|
||||
d_x3 = (-u[0] * np.cos(curr_x[2]) \
|
||||
- self.mp * self.l * (curr_x[3]**2) \
|
||||
* np.cos(curr_x[2]) * np.sin(curr_x[2]) \
|
||||
d_x3 = (-u[0] * np.cos(curr_x[2])
|
||||
- self.mp * self.l * (curr_x[3]**2)
|
||||
* np.cos(curr_x[2]) * np.sin(curr_x[2])
|
||||
- (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
|
||||
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
|
||||
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
|
||||
|
||||
next_x = curr_x +\
|
||||
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
|
||||
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
|
||||
|
||||
return next_x
|
||||
|
||||
|
@ -53,21 +55,21 @@ class CartPoleModel(Model):
|
|||
# x
|
||||
d_x0 = curr_x[:, 1]
|
||||
# v_x
|
||||
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \
|
||||
* (self.l * (curr_x[:, 3]**2) \
|
||||
+ self.g * np.cos(curr_x[:, 2]))) \
|
||||
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
|
||||
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2])
|
||||
* (self.l * (curr_x[:, 3]**2)
|
||||
+ self.g * np.cos(curr_x[:, 2]))) \
|
||||
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
|
||||
# theta
|
||||
d_x2 = curr_x[:, 3]
|
||||
# v_theta
|
||||
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \
|
||||
- self.mp * self.l * (curr_x[:, 3]**2) \
|
||||
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \
|
||||
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2])
|
||||
- self.mp * self.l * (curr_x[:, 3]**2)
|
||||
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2])
|
||||
- (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
|
||||
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
|
||||
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
|
||||
|
||||
next_x = curr_x +\
|
||||
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
|
||||
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
|
||||
|
||||
return next_x
|
||||
|
||||
|
@ -95,36 +97,36 @@ class CartPoleModel(Model):
|
|||
|
||||
# f_theta
|
||||
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
|
||||
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
|
||||
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
|
||||
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
|
||||
|
||||
f_x[:, 1, 2] = - us[:, 0] * tmp \
|
||||
- tmp * (self.mp * np.sin(xs[:, 2]) \
|
||||
* (self.l * xs[:, 3]**2 \
|
||||
- tmp * (self.mp * np.sin(xs[:, 2])
|
||||
* (self.l * xs[:, 3]**2
|
||||
+ self.g * np.cos(xs[:, 2]))) \
|
||||
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
|
||||
* xs[:, 3]**2 \
|
||||
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \
|
||||
- np.sin(xs[:, 2])**2))
|
||||
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l
|
||||
* xs[:, 3]**2
|
||||
+ self.mp * self.g * (np.cos(xs[:, 2])**2
|
||||
- np.sin(xs[:, 2])**2))
|
||||
f_x[:, 3, 2] = - 1. / self.l * tmp \
|
||||
* (-us[:, 0] * np.cos(xs[:, 2]) \
|
||||
- self.mp * self.l * (xs[:, 3]**2) \
|
||||
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
|
||||
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
|
||||
+ 1. / self.l * tmp2 \
|
||||
* (us[:, 0] * np.sin(xs[:, 2]) \
|
||||
- self.mp * self.l * xs[:, 3]**2 \
|
||||
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
|
||||
- (self.mc + self.mp) \
|
||||
* self.g * np.cos(xs[:, 2]))
|
||||
* (-us[:, 0] * np.cos(xs[:, 2])
|
||||
- self.mp * self.l * (xs[:, 3]**2)
|
||||
* np.cos(xs[:, 2]) * np.sin(xs[:, 2])
|
||||
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
|
||||
+ 1. / self.l * tmp2 \
|
||||
* (us[:, 0] * np.sin(xs[:, 2])
|
||||
- self.mp * self.l * xs[:, 3]**2
|
||||
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2)
|
||||
- (self.mc + self.mp)
|
||||
* self.g * np.cos(xs[:, 2]))
|
||||
|
||||
# f_theta_dot
|
||||
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
|
||||
* self.l * 2 * xs[:, 3])
|
||||
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2])
|
||||
* self.l * 2 * xs[:, 3])
|
||||
f_x[:, 2, 3] = np.ones(pred_len)
|
||||
f_x[:, 3, 3] = 1. / self.l * tmp2 \
|
||||
* (-2. * self.mp * self.l * xs[:, 3] \
|
||||
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
|
||||
* (-2. * self.mp * self.l * xs[:, 3]
|
||||
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
|
||||
|
||||
return f_x * dt + np.eye(state_size) # to discrete form
|
||||
|
||||
|
@ -150,8 +152,8 @@ class CartPoleModel(Model):
|
|||
f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
|
||||
|
||||
f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
|
||||
/ (self.l * (self.mc \
|
||||
+ self.mp * (np.sin(xs[:, 2])**2)))
|
||||
/ (self.l * (self.mc
|
||||
+ self.mp * (np.sin(xs[:, 2])**2)))
|
||||
|
||||
return f_u * dt # to discrete form
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ import scipy.linalg
|
|||
from scipy import integrate
|
||||
from .model import LinearModel
|
||||
|
||||
|
||||
class FirstOrderLagModel(LinearModel):
|
||||
""" first order lag model
|
||||
Attributes:
|
||||
|
@ -10,13 +11,15 @@ class FirstOrderLagModel(LinearModel):
|
|||
u (numpy.ndarray):
|
||||
history_pred_xs (numpy.ndarray):
|
||||
"""
|
||||
|
||||
def __init__(self, config, tau=0.63):
|
||||
"""
|
||||
Args:
|
||||
tau (float): time constant
|
||||
"""
|
||||
# param
|
||||
self.A, self.B = self._to_state_space(tau, dt=config.DT) # discrete system
|
||||
self.A, self.B = self._to_state_space(
|
||||
tau, dt=config.DT) # discrete system
|
||||
super(FirstOrderLagModel, self).__init__(self.A, self.B)
|
||||
|
||||
@staticmethod
|
||||
|
@ -31,20 +34,21 @@ class FirstOrderLagModel(LinearModel):
|
|||
"""
|
||||
# continuous
|
||||
Ac = np.array([[-1./tau, 0., 0., 0.],
|
||||
[0., -1./tau, 0., 0.],
|
||||
[1., 0., 0., 0.],
|
||||
[0., 1., 0., 0.]])
|
||||
[0., -1./tau, 0., 0.],
|
||||
[1., 0., 0., 0.],
|
||||
[0., 1., 0., 0.]])
|
||||
Bc = np.array([[1./tau, 0.],
|
||||
[0., 1./tau],
|
||||
[0., 0.],
|
||||
[0., 0.]])
|
||||
[0., 1./tau],
|
||||
[0., 0.],
|
||||
[0., 0.]])
|
||||
# to discrete system
|
||||
A = scipy.linalg.expm(dt*Ac)
|
||||
# B = np.matmul(np.matmul(scipy.linalg.expm(Ac*dt)-scipy.linalg.expm(Ac*0.), np.linalg.inv(Ac)), Bc)
|
||||
B = np.zeros_like(Bc)
|
||||
for m in range(Bc.shape[0]):
|
||||
for n in range(Bc.shape[1]):
|
||||
integrate_fn = lambda tau: np.matmul(scipy.linalg.expm(Ac*tau), Bc)[m, n]
|
||||
def integrate_fn(tau): return np.matmul(
|
||||
scipy.linalg.expm(Ac*tau), Bc)[m, n]
|
||||
sol = integrate.quad(integrate_fn, 0, dt)
|
||||
B[m, n] = sol[0]
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
from .first_order_lag import FirstOrderLagModel
|
||||
from .two_wheeled import TwoWheeledModel
|
||||
from .cartpole import CartPoleModel
|
||||
from .nonlinear_sample_system import NonlinearSampleSystemModel
|
||||
|
||||
|
||||
def make_model(args, config):
|
||||
|
||||
|
@ -10,5 +12,7 @@ def make_model(args, config):
|
|||
return TwoWheeledModel(config)
|
||||
elif args.env == "CartPole":
|
||||
return CartPoleModel(config)
|
||||
elif args.env == "NonlinearSample":
|
||||
return NonlinearSampleSystemModel(config)
|
||||
|
||||
raise NotImplementedError("There is not {} Model".format(args.env))
|
|
@ -1,8 +1,10 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class Model():
|
||||
""" base class of model
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
|
@ -22,7 +24,7 @@ class Model():
|
|||
or shape(pop_size, pred_len+1, state_size)
|
||||
"""
|
||||
if len(us.shape) == 3:
|
||||
pred_xs =self._predict_traj_alltogether(curr_x, us)
|
||||
pred_xs = self._predict_traj_alltogether(curr_x, us)
|
||||
elif len(us.shape) == 2:
|
||||
pred_xs = self._predict_traj(curr_x, us)
|
||||
else:
|
||||
|
@ -75,8 +77,8 @@ class Model():
|
|||
# next_x.shape = (pop_size, state_size)
|
||||
next_x = self.predict_next_state(x, us[t])
|
||||
# update
|
||||
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),\
|
||||
axis=0)
|
||||
pred_xs = np.concatenate((pred_xs, next_x[np.newaxis, :, :]),
|
||||
axis=0)
|
||||
x = next_x
|
||||
|
||||
return np.transpose(pred_xs, (1, 0, 2))
|
||||
|
@ -99,13 +101,13 @@ class Model():
|
|||
# get size
|
||||
(pred_len, input_size) = us.shape
|
||||
# pred final adjoint state
|
||||
lam = self.predict_terminal_adjoint_state(xs[-1],\
|
||||
lam = self.predict_terminal_adjoint_state(xs[-1],
|
||||
terminal_g_x=g_xs[-1])
|
||||
lams = lam[np.newaxis, :]
|
||||
|
||||
for t in range(pred_len-1, 0, -1):
|
||||
prev_lam = \
|
||||
self.predict_adjoint_state(lam, xs[t], us[t],\
|
||||
self.predict_adjoint_state(lam, xs[t], us[t],
|
||||
goal=g_xs[t], t=t)
|
||||
# update
|
||||
lams = np.concatenate((prev_lam[np.newaxis, :], lams), axis=0)
|
||||
|
@ -175,6 +177,7 @@ class Model():
|
|||
raise NotImplementedError("Implement hessian of model \
|
||||
with respect to the input")
|
||||
|
||||
|
||||
class LinearModel(Model):
|
||||
""" discrete linear model, x[k+1] = Ax[k] + Bu[k]
|
||||
|
||||
|
@ -182,6 +185,7 @@ class LinearModel(Model):
|
|||
A (numpy.ndarray): shape(state_size, state_size)
|
||||
B (numpy.ndarray): shape(state_size, input_size)
|
||||
"""
|
||||
|
||||
def __init__(self, A, B):
|
||||
"""
|
||||
"""
|
||||
|
@ -203,7 +207,7 @@ class LinearModel(Model):
|
|||
"""
|
||||
if len(u.shape) == 1:
|
||||
next_x = np.matmul(self.A, curr_x[:, np.newaxis]) \
|
||||
+ np.matmul(self.B, u[:, np.newaxis])
|
||||
+ np.matmul(self.B, u[:, np.newaxis])
|
||||
|
||||
return next_x.flatten()
|
||||
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
import numpy as np
|
||||
|
||||
from .model import Model
|
||||
from ..common.utils import update_state_with_Runge_Kutta
|
||||
|
||||
|
||||
class NonlinearSampleSystemModel(Model):
|
||||
""" nonlinear sample system model
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
super(NonlinearSampleSystemModel, self).__init__()
|
||||
self.dt = config.DT
|
||||
|
||||
def predict_next_state(self, curr_x, u):
|
||||
""" predict next state
|
||||
|
||||
Args:
|
||||
curr_x (numpy.ndarray): current state, shape(state_size, ) or
|
||||
shape(pop_size, state_size)
|
||||
u (numpy.ndarray): input, shape(input_size, ) or
|
||||
shape(pop_size, input_size)
|
||||
Returns:
|
||||
next_x (numpy.ndarray): next state, shape(state_size, ) or
|
||||
shape(pop_size, state_size)
|
||||
"""
|
||||
if len(u.shape) == 1:
|
||||
func_1 = self._func_x_1
|
||||
func_2 = self._func_x_2
|
||||
functions = [func_1, func_2]
|
||||
next_x = update_state_with_Runge_Kutta(
|
||||
curr_x, u, functions, batch=False)
|
||||
return next_x
|
||||
|
||||
elif len(u.shape) == 2:
|
||||
def func_1(xs, us): return self._func_x_1(xs, us, batch=True)
|
||||
def func_2(xs, us): return self._func_x_2(xs, us, batch=True)
|
||||
functions = [func_1, func_2]
|
||||
next_x = update_state_with_Runge_Kutta(
|
||||
curr_x, u, functions, batch=True)
|
||||
|
||||
return next_x
|
||||
|
||||
def _func_x_1(self, x, u, batch=False):
|
||||
if not batch:
|
||||
x_dot = x[1]
|
||||
else:
|
||||
x_dot = x[:, 1]
|
||||
return x_dot
|
||||
|
||||
def _func_x_2(self, x, u, batch=False):
|
||||
if not batch:
|
||||
x_dot = (1. - x[0]**2 - x[1]**2) * x[1] - x[0] + u[0]
|
||||
else:
|
||||
x_dot = (1. - x[:, 0]**2 - x[:, 1]**2) * \
|
||||
x[:, 1] - x[:, 0] + u[:, 0]
|
||||
return x_dot
|
||||
|
||||
def calc_f_x(self, xs, us, dt):
|
||||
""" gradient of model with respect to the state in batch form
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_x (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, state_size)
|
||||
|
||||
Notes:
|
||||
This should be discrete form !!
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, _) = us.shape
|
||||
|
||||
f_x = np.zeros((pred_len, state_size, state_size))
|
||||
f_x[:, 0, 1] = 1.
|
||||
f_x[:, 1, 0] = 2. * xs[:, 0] * xs[:, 1] - 1.
|
||||
f_x[:, 1, 1] = - 2. * xs[:, 1] * xs[:, 1] + \
|
||||
(1. - xs[:, 0]**2 - xs[:, 1]**2)
|
||||
|
||||
return f_x * dt + np.eye(state_size) # to discrete form
|
||||
|
||||
def calc_f_u(self, xs, us, dt):
|
||||
""" gradient of model with respect to the input in batch form
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_u (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, input_size)
|
||||
|
||||
Notes:
|
||||
This should be discrete form !!
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, input_size) = us.shape
|
||||
|
||||
f_u = np.zeros((pred_len, state_size, input_size))
|
||||
|
||||
f_u[:, 1, 0] = 1.
|
||||
|
||||
return f_u * dt # to discrete form
|
||||
|
||||
def calc_f_xx(self, xs, us, dt):
|
||||
""" hessian of model with respect to the state in batch form
|
||||
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_xx (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, state_size, state_size)
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, _) = us.shape
|
||||
|
||||
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def calc_f_ux(self, xs, us, dt):
|
||||
""" hessian of model with respect to state and input in batch form
|
||||
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_ux (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, input_size, state_size)
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, input_size) = us.shape
|
||||
|
||||
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def calc_f_uu(self, xs, us, dt):
|
||||
""" hessian of model with respect to input in batch form
|
||||
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_uu (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, input_size, input_size)
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, input_size) = us.shape
|
||||
|
||||
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
|
||||
|
||||
raise NotImplementedError
|
|
@ -2,9 +2,11 @@ import numpy as np
|
|||
|
||||
from .model import Model
|
||||
|
||||
|
||||
class TwoWheeledModel(Model):
|
||||
""" two wheeled model
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import numpy as np
|
||||
from .planner import Planner
|
||||
|
||||
|
||||
class ClosestPointPlanner(Planner):
|
||||
""" This planner make goal state according to goal path
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import numpy as np
|
||||
from .planner import Planner
|
||||
|
||||
|
||||
class ConstantPlanner(Planner):
|
||||
""" This planner make constant goal state
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .const_planner import ConstantPlanner
|
||||
from .closest_point_planner import ClosestPointPlanner
|
||||
|
||||
|
||||
def make_planner(args, config):
|
||||
|
||||
if args.env == "FirstOrderLag":
|
||||
|
@ -11,5 +12,8 @@ def make_planner(args, config):
|
|||
return ClosestPointPlanner(config)
|
||||
elif args.env == "CartPole":
|
||||
return ConstantPlanner(config)
|
||||
elif args.env == "NonlinearSample":
|
||||
return ConstantPlanner(config)
|
||||
|
||||
raise NotImplementedError("There is not {} Planner".format(args.planner_type))
|
||||
raise NotImplementedError(
|
||||
"There is not {} Planner".format(args.planner_type))
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class Planner():
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
|
|
|
@ -8,9 +8,11 @@ import matplotlib.animation as animation
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class Animator():
|
||||
""" animation class
|
||||
"""
|
||||
|
||||
def __init__(self, env, args=None):
|
||||
"""
|
||||
"""
|
||||
|
@ -65,7 +67,7 @@ class Animator():
|
|||
"""
|
||||
# set up animation figures
|
||||
self._setup()
|
||||
_update_img = lambda i: self._update_img(i, history_x, history_g_x)
|
||||
def _update_img(i): return self._update_img(i, history_x, history_g_x)
|
||||
|
||||
# Set up formatting for the movie files
|
||||
Writer = animation.writers['ffmpeg']
|
||||
|
|
|
@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
|
|||
|
||||
from ..helper import save_pickle, load_pickle
|
||||
|
||||
|
||||
def plot_result(history, history_g=None, ylabel="x",
|
||||
save_dir="./result", name="state_history"):
|
||||
"""
|
||||
|
@ -28,7 +29,7 @@ def plot_result(history, history_g=None, ylabel="x",
|
|||
def plot(axis, history, history_g=None):
|
||||
axis.plot(range(iters), history, c="r", linewidth=3)
|
||||
if history_g is not None:
|
||||
axis.plot(range(iters), history_g,\
|
||||
axis.plot(range(iters), history_g,
|
||||
c="b", linewidth=3, label="goal")
|
||||
|
||||
if i < size:
|
||||
|
@ -47,6 +48,7 @@ def plot_result(history, history_g=None, ylabel="x",
|
|||
axis1.legend(ncol=1, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
|
||||
figure.savefig(path, bbox_inches="tight", pad_inches=0.05)
|
||||
|
||||
|
||||
def plot_results(history_x, history_u, history_g=None, args=None):
|
||||
"""
|
||||
|
||||
|
@ -64,12 +66,13 @@ def plot_results(history_x, history_u, history_g=None, args=None):
|
|||
controller_type = args.controller_type
|
||||
|
||||
plot_result(history_x, history_g=history_g, ylabel="x",
|
||||
name= env + "-state_history",
|
||||
name=env + "-state_history",
|
||||
save_dir="./result/" + controller_type)
|
||||
plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
|
||||
name= env + "-input_history",
|
||||
name=env + "-input_history",
|
||||
save_dir="./result/" + controller_type)
|
||||
|
||||
|
||||
def save_plot_data(history_x, history_u, history_g=None, args=None):
|
||||
""" save plot data
|
||||
|
||||
|
@ -98,6 +101,7 @@ def save_plot_data(history_x, history_u, history_g=None, args=None):
|
|||
env + "-history_g.pkl")
|
||||
save_pickle(path, history_g)
|
||||
|
||||
|
||||
def load_plot_data(env, controller_type, result_dir="./result"):
|
||||
"""
|
||||
Args:
|
||||
|
@ -123,6 +127,7 @@ def load_plot_data(env, controller_type, result_dir="./result"):
|
|||
|
||||
return history_x, history_u, history_g
|
||||
|
||||
|
||||
def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
|
||||
save_dir="./result", name="state_history"):
|
||||
"""
|
||||
|
@ -146,17 +151,17 @@ def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
|
|||
axis.plot(range(iters), history,
|
||||
linewidth=3, label=label, alpha=0.7, linestyle="dashed")
|
||||
if history_g is not None:
|
||||
axis.plot(range(iters), history_g,\
|
||||
axis.plot(range(iters), history_g,
|
||||
c="b", linewidth=3)
|
||||
|
||||
if i < size:
|
||||
for j, (history, history_g) \
|
||||
in enumerate(zip(histories, histories_g)):
|
||||
in enumerate(zip(histories, histories_g)):
|
||||
plot(axis1, history[:, i],
|
||||
history_g=history_g[:, i], label=labels[j])
|
||||
if i+1 < size:
|
||||
for j, (history, history_g) in \
|
||||
enumerate(zip(histories, histories_g)):
|
||||
enumerate(zip(histories, histories_g)):
|
||||
plot(axis2, history[:, i+1],
|
||||
history_g=history_g[:, i+1], label=labels[j])
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import matplotlib.pyplot as plt
|
|||
|
||||
from ..common.utils import rotate_pos
|
||||
|
||||
|
||||
def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
|
||||
""" Create circle matrix
|
||||
|
||||
|
@ -29,6 +30,7 @@ def circle(center_x, center_y, radius, start=0., end=2*np.pi, n_point=100):
|
|||
|
||||
return np.array(circle_xs), np.array(circle_ys)
|
||||
|
||||
|
||||
def circle_with_angle(center_x, center_y, radius, angle):
|
||||
""" Create circle matrix with angle line matrix
|
||||
|
||||
|
@ -50,6 +52,7 @@ def circle_with_angle(center_x, center_y, radius, angle):
|
|||
|
||||
return circle_x, circle_y, angle_x, angle_y
|
||||
|
||||
|
||||
def square(center_x, center_y, shape, angle):
|
||||
""" Create square
|
||||
|
||||
|
@ -77,6 +80,7 @@ def square(center_x, center_y, shape, angle):
|
|||
|
||||
return trans_points[:, 0], trans_points[:, 1]
|
||||
|
||||
|
||||
def square_with_angle(center_x, center_y, shape, angle):
|
||||
""" Create square with angle line
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from .runner import ExpRunner
|
||||
|
||||
|
||||
def make_runner(args):
|
||||
return ExpRunner()
|
|
@ -4,9 +4,11 @@ import numpy as np
|
|||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class ExpRunner():
|
||||
""" experiment runner
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
|
@ -46,6 +48,6 @@ class ExpRunner():
|
|||
score += cost
|
||||
step_count += 1
|
||||
|
||||
logger.debug("Controller type = {}, Score = {}"\
|
||||
logger.debug("Controller type = {}, Score = {}"
|
||||
.format(controller, score))
|
||||
return np.array(history_x), np.array(history_u), np.array(history_g)
|
|
@ -71,6 +71,7 @@ There are 4 example environments, "FirstOrderLag", "TwoWheeledConst", "TwoWheele
|
|||
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
|
||||
| Two wheeled System (Moving Goal) | x | ✓ | 3 | 2 |
|
||||
| Cartpole (Swing up) | x | ✓ | 4 | 1 |
|
||||
| Nonlinear Sample System Env | x | ✓ | 2 | 1 |
|
||||
|
||||
All states and inputs of environments are continuous.
|
||||
**It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
Binary file not shown.
After Width: | Height: | Size: 28 KiB |
|
@ -6,7 +6,8 @@ import numpy as np
|
|||
import matplotlib.pyplot as plt
|
||||
|
||||
from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \
|
||||
plot_multi_result
|
||||
plot_multi_result
|
||||
|
||||
|
||||
def run(args):
|
||||
|
||||
|
@ -39,7 +40,8 @@ def run(args):
|
|||
ylabel="x")
|
||||
|
||||
plot_multi_result(history_us, histories_g=np.zeros_like(history_us),
|
||||
labels=controllers, ylabel="u", name="input_history")
|
||||
labels=controllers, ylabel="u", name="input_history")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
@ -51,5 +53,6 @@ def main():
|
|||
|
||||
run(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -8,9 +8,10 @@ from PythonLinearNonlinearControl.models.make_models import make_model
|
|||
from PythonLinearNonlinearControl.envs.make_envs import make_env
|
||||
from PythonLinearNonlinearControl.runners.make_runners import make_runner
|
||||
from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \
|
||||
save_plot_data
|
||||
save_plot_data
|
||||
from PythonLinearNonlinearControl.plotters.animator import Animator
|
||||
|
||||
|
||||
def run(args):
|
||||
# logger
|
||||
make_logger(args.result_dir)
|
||||
|
@ -44,17 +45,19 @@ def run(args):
|
|||
animator = Animator(env, args=args)
|
||||
animator.draw(history_x, history_g)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--controller_type", type=str, default="CEM")
|
||||
parser.add_argument("--env", type=str, default="TwoWheeledTrack")
|
||||
parser.add_argument("--save_anim", type=bool_flag, default=1)
|
||||
parser.add_argument("--controller_type", type=str, default="DDP")
|
||||
parser.add_argument("--env", type=str, default="NonlinearSample")
|
||||
parser.add_argument("--save_anim", type=bool_flag, default=0)
|
||||
parser.add_argument("--result_dir", type=str, default="./result")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
run(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue