Add: catpole env
This commit is contained in:
parent
4e01264bd9
commit
4627d6fc1e
|
@ -9,21 +9,39 @@
|
|||
|
||||
## FistOrderLagEnv
|
||||
|
||||
System equations.
|
||||
### System equation.
|
||||
|
||||
<img src="assets/firstorderlag.png" width="550">
|
||||
|
||||
You can set arbinatry time constant, tau. The default is 0.63 s
|
||||
|
||||
### Cost.
|
||||
|
||||
<img src="assets/quadratic_score.png" width="200">
|
||||
|
||||
Q = diag[1., 1., 1., 1.],
|
||||
R = diag[1., 1.]
|
||||
|
||||
X_g denote the goal states.
|
||||
|
||||
## TwoWheeledEnv
|
||||
|
||||
System equations.
|
||||
### System equation.
|
||||
|
||||
<img src="assets/twowheeled.png" width="300">
|
||||
|
||||
### Cost.
|
||||
|
||||
<img src="assets/quadratic_score.png" width="200">
|
||||
|
||||
Q = diag[5., 5., 1.],
|
||||
R = diag[0.1, 0.1]
|
||||
|
||||
X_g denote the goal states.
|
||||
|
||||
## CatpoleEnv (Swing up)
|
||||
|
||||
System equations.
|
||||
System equation.
|
||||
|
||||
<img src="assets/cartpole.png" width="600">
|
||||
|
||||
|
@ -31,4 +49,8 @@ You can set arbinatry parameters, mc, mp, l and g.
|
|||
|
||||
Default settings are as follows:
|
||||
|
||||
mc = 1, mp = 0.2, l = 0.5, g = 9.8
|
||||
mc = 1, mp = 0.2, l = 0.5, g = 9.81
|
||||
|
||||
### Cost.
|
||||
|
||||
<img src="assets/cartpole_score.png" width="300">
|
|
@ -0,0 +1,218 @@
|
|||
import numpy as np
|
||||
|
||||
class CartPoleConfigModule():
|
||||
# parameters
|
||||
ENV_NAME = "CartPole-v0"
|
||||
TYPE = "Nonlinear"
|
||||
TASK_HORIZON = 500
|
||||
PRED_LEN = 50
|
||||
STATE_SIZE = 4
|
||||
INPUT_SIZE = 1
|
||||
DT = 0.02
|
||||
# cost parameters
|
||||
R = np.diag([0.01])
|
||||
# bounds
|
||||
INPUT_LOWER_BOUND = np.array([-3.])
|
||||
INPUT_UPPER_BOUND = np.array([3.])
|
||||
# parameters
|
||||
MP = 0.2
|
||||
MC = 1.
|
||||
L = 0.5
|
||||
G = 9.81
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
# opt configs
|
||||
self.opt_config = {
|
||||
"Random": {
|
||||
"popsize": 5000
|
||||
},
|
||||
"CEM": {
|
||||
"popsize": 500,
|
||||
"num_elites": 50,
|
||||
"max_iters": 15,
|
||||
"alpha": 0.3,
|
||||
"init_var":9.,
|
||||
"threshold":0.001
|
||||
},
|
||||
"MPPI":{
|
||||
"beta" : 0.6,
|
||||
"popsize": 5000,
|
||||
"kappa": 0.9,
|
||||
"noise_sigma": 0.5,
|
||||
},
|
||||
"MPPIWilliams":{
|
||||
"popsize": 5000,
|
||||
"lambda": 1.,
|
||||
"noise_sigma": 0.9,
|
||||
},
|
||||
"iLQR":{
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"DDP":{
|
||||
"max_iter": 500,
|
||||
"init_mu": 1.,
|
||||
"mu_min": 1e-6,
|
||||
"mu_max": 1e10,
|
||||
"init_delta": 2.,
|
||||
"threshold": 1e-6,
|
||||
},
|
||||
"NMPC-CGMRES":{
|
||||
},
|
||||
"NMPC-Newton":{
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def input_cost_fn(u):
|
||||
""" input cost functions
|
||||
Args:
|
||||
u (numpy.ndarray): input, shape(pred_len, input_size)
|
||||
or shape(pop_size, pred_len, input_size)
|
||||
Returns:
|
||||
cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
|
||||
shape(pop_size, pred_len, input_size)
|
||||
"""
|
||||
return (u**2) * np.diag(CartPoleConfigModule.R)
|
||||
|
||||
@staticmethod
|
||||
def state_cost_fn(x, g_x):
|
||||
""" state cost function
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
or shape(pop_size, pred_len, state_size)
|
||||
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
||||
or shape(pop_size, pred_len, state_size)
|
||||
Returns:
|
||||
cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
|
||||
shape(pop_size, pred_len, 1)
|
||||
"""
|
||||
|
||||
if len(x.shape) > 2:
|
||||
return (6. * (x[:, :, 0]**2) \
|
||||
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
|
||||
+ 0.1 * (x[:, :, 1]**2) \
|
||||
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
|
||||
|
||||
elif len(x.shape) > 1:
|
||||
return (6. * (x[:, 0]**2) \
|
||||
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \
|
||||
+ 0.1 * (x[:, 1]**2) \
|
||||
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
|
||||
|
||||
return 6. * (x[0]**2) \
|
||||
+ 12. * ((np.cos(x[2]) + 1.)**2) \
|
||||
+ 0.1 * (x[1]**2) \
|
||||
+ 0.1 * (x[3]**2)
|
||||
|
||||
@staticmethod
|
||||
def terminal_state_cost_fn(terminal_x, terminal_g_x):
|
||||
"""
|
||||
Args:
|
||||
terminal_x (numpy.ndarray): terminal state,
|
||||
shape(state_size, ) or shape(pop_size, state_size)
|
||||
terminal_g_x (numpy.ndarray): terminal goal state,
|
||||
shape(state_size, ) or shape(pop_size, state_size)
|
||||
Returns:
|
||||
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
|
||||
shape(pop_size, pred_len)
|
||||
"""
|
||||
|
||||
if len(terminal_x.shape) > 1:
|
||||
return (6. * (terminal_x[:, 0]**2) \
|
||||
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
|
||||
+ 0.1 * (terminal_x[:, 1]**2) \
|
||||
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
|
||||
|
||||
return 6. * (terminal_x[0]**2) \
|
||||
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
|
||||
+ 0.1 * (terminal_x[1]**2) \
|
||||
+ 0.1 * (terminal_x[3]**2)
|
||||
|
||||
@staticmethod
|
||||
def gradient_cost_fn_with_state(x, g_x, terminal=False):
|
||||
""" gradient of costs with respect to the state
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
||||
|
||||
Returns:
|
||||
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
|
||||
or shape(1, state_size)
|
||||
"""
|
||||
if not terminal:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def gradient_cost_fn_with_input(x, u):
|
||||
""" gradient of costs with respect to the input
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
||||
|
||||
Returns:
|
||||
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
|
||||
"""
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def hessian_cost_fn_with_state(x, g_x, terminal=False):
|
||||
""" hessian costs with respect to the state
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
|
||||
|
||||
Returns:
|
||||
l_xx (numpy.ndarray): gradient of cost,
|
||||
shape(pred_len, state_size, state_size) or
|
||||
shape(1, state_size, state_size) or
|
||||
"""
|
||||
if not terminal:
|
||||
(pred_len, _) = x.shape
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def hessian_cost_fn_with_input(x, u):
|
||||
""" hessian costs with respect to the input
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
||||
|
||||
Returns:
|
||||
l_uu (numpy.ndarray): gradient of cost,
|
||||
shape(pred_len, input_size, input_size)
|
||||
"""
|
||||
(pred_len, _) = u.shape
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def hessian_cost_fn_with_input_state(x, u):
|
||||
""" hessian costs with respect to the state and input
|
||||
|
||||
Args:
|
||||
x (numpy.ndarray): state, shape(pred_len, state_size)
|
||||
u (numpy.ndarray): goal state, shape(pred_len, input_size)
|
||||
|
||||
Returns:
|
||||
l_ux (numpy.ndarray): gradient of cost ,
|
||||
shape(pred_len, input_size, state_size)
|
||||
"""
|
||||
(_, state_size) = x.shape
|
||||
(pred_len, input_size) = u.shape
|
||||
|
||||
return np.zeros((pred_len, input_size, state_size))
|
|
@ -1,5 +1,6 @@
|
|||
from .first_order_lag import FirstOrderLagConfigModule
|
||||
from .two_wheeled import TwoWheeledConfigModule
|
||||
from .cartpole import CartPoleConfigModule
|
||||
|
||||
def make_config(args):
|
||||
"""
|
||||
|
@ -10,3 +11,5 @@ def make_config(args):
|
|||
return FirstOrderLagConfigModule()
|
||||
elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
|
||||
return TwoWheeledConfigModule()
|
||||
elif args.env == "CartPole":
|
||||
return CartPoleConfigModule()
|
|
@ -14,12 +14,16 @@ class CartPoleEnv(Env):
|
|||
def __init__(self):
|
||||
"""
|
||||
"""
|
||||
self.config = {"state_size" : 4,\
|
||||
"input_size" : 1,\
|
||||
"dt" : 0.02,\
|
||||
"max_step" : 1000,\
|
||||
"input_lower_bound": None,\
|
||||
"input_upper_bound": None,
|
||||
self.config = {"state_size" : 4,
|
||||
"input_size" : 1,
|
||||
"dt" : 0.02,
|
||||
"max_step" : 500,
|
||||
"input_lower_bound": [-3.],
|
||||
"input_upper_bound": [3.],
|
||||
"mp": 0.2,
|
||||
"mc": 1.,
|
||||
"l": 0.5,
|
||||
"g": 9.81,
|
||||
}
|
||||
|
||||
super(CartPoleEnv, self).__init__(self.config)
|
||||
|
@ -33,13 +37,13 @@ class CartPoleEnv(Env):
|
|||
"""
|
||||
self.step_count = 0
|
||||
|
||||
self.curr_x = np.zeros(self.config["state_size"])
|
||||
self.curr_x = np.array([0., 0., 0., 0.])
|
||||
|
||||
if init_x is not None:
|
||||
self.curr_x = init_x
|
||||
|
||||
# goal
|
||||
self.g_x = np.array([0., 0., np.pi, 0.])
|
||||
self.g_x = np.array([0., 0., -np.pi, 0.])
|
||||
|
||||
# clear memory
|
||||
self.history_x = []
|
||||
|
@ -65,20 +69,43 @@ class CartPoleEnv(Env):
|
|||
self.config["input_upper_bound"])
|
||||
|
||||
# step
|
||||
next_x = np.zeros(self.config["state_size"])
|
||||
# x
|
||||
d_x0 = self.curr_x[1]
|
||||
# v_x
|
||||
d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \
|
||||
* (self.config["l"] * (self.curr_x[3]**2) \
|
||||
+ self.config["g"] * np.cos(self.curr_x[2]))) \
|
||||
/ (self.config["mc"] + self.config["mp"] \
|
||||
* (np.sin(self.curr_x[2])**2))
|
||||
# theta
|
||||
d_x2 = self.curr_x[3]
|
||||
|
||||
# v_theta
|
||||
d_x3 = (-u[0] * np.cos(self.curr_x[2]) \
|
||||
- self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \
|
||||
* np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \
|
||||
- (self.config["mc"] + self.config["mp"]) * self.config["g"] \
|
||||
* np.sin(self.curr_x[2])) \
|
||||
/ (self.config["l"] * (self.config["mc"] + self.config["mp"] \
|
||||
* (np.sin(self.curr_x[2])**2)))
|
||||
|
||||
next_x = self.curr_x +\
|
||||
np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"]
|
||||
|
||||
# TODO: costs
|
||||
costs = 0.
|
||||
costs += 0.1 * np.sum(u**2)
|
||||
costs += np.sum((self.curr_x - self.g_x)**2)
|
||||
|
||||
costs += 6. * self.curr_x[0]**2 \
|
||||
+ 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
|
||||
+ 0.1 * self.curr_x[1]**2 \
|
||||
+ 0.1 * self.curr_x[3]**2
|
||||
|
||||
# save history
|
||||
self.history_x.append(next_x.flatten())
|
||||
self.history_g_x.append(self.g_x.flatten())
|
||||
|
||||
# update
|
||||
self.curr_x = next_x.flatten()
|
||||
self.curr_x = next_x.flatten().copy()
|
||||
# update costs
|
||||
self.step_count += 1
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from .first_order_lag import FirstOrderLagEnv
|
||||
from .two_wheeled import TwoWheeledConstEnv
|
||||
from .cartpole import CartpoleEnv
|
||||
from .cartpole import CartPoleEnv
|
||||
|
||||
def make_env(args):
|
||||
|
||||
|
@ -9,6 +9,6 @@ def make_env(args):
|
|||
elif args.env == "TwoWheeledConst":
|
||||
return TwoWheeledConstEnv()
|
||||
elif args.env == "CartPole":
|
||||
return CartpoleEnv()
|
||||
return CartPoleEnv()
|
||||
|
||||
raise NotImplementedError("There is not {} Env".format(args.env))
|
|
@ -86,7 +86,7 @@ class TwoWheeledConstEnv(Env):
|
|||
# TODO: costs
|
||||
costs = 0.
|
||||
costs += 0.1 * np.sum(u**2)
|
||||
costs += np.sum((self.curr_x - self.g_x)**2)
|
||||
costs += np.sum(((self.curr_x - self.g_x)**2) * np.array([5., 5., 1.]))
|
||||
|
||||
# save history
|
||||
self.history_x.append(next_x.flatten())
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
import numpy as np
|
||||
|
||||
from .model import Model
|
||||
|
||||
class CartPoleModel(Model):
|
||||
""" cartpole model
|
||||
"""
|
||||
def __init__(self, config):
|
||||
"""
|
||||
"""
|
||||
super(CartPoleModel, self).__init__()
|
||||
self.dt = config.DT
|
||||
self.mc = config.MC
|
||||
self.mp = config.MP
|
||||
self.l = config.L
|
||||
self.g = config.G
|
||||
|
||||
def predict_next_state(self, curr_x, u):
|
||||
""" predict next state
|
||||
|
||||
Args:
|
||||
curr_x (numpy.ndarray): current state, shape(state_size, ) or
|
||||
shape(pop_size, state_size)
|
||||
u (numpy.ndarray): input, shape(input_size, ) or
|
||||
shape(pop_size, input_size)
|
||||
Returns:
|
||||
next_x (numpy.ndarray): next state, shape(state_size, ) or
|
||||
shape(pop_size, state_size)
|
||||
"""
|
||||
if len(u.shape) == 1:
|
||||
# x
|
||||
d_x0 = curr_x[1]
|
||||
# v_x
|
||||
d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \
|
||||
* (self.l * (curr_x[3]**2) \
|
||||
+ self.g * np.cos(curr_x[2]))) \
|
||||
/ (self.mc + self.mp * (np.sin(curr_x[2])**2))
|
||||
# theta
|
||||
d_x2 = curr_x[3]
|
||||
# v_theta
|
||||
d_x3 = (-u[0] * np.cos(curr_x[2]) \
|
||||
- self.mp * self.l * (curr_x[3]**2) \
|
||||
* np.cos(curr_x[2]) * np.sin(curr_x[2]) \
|
||||
- (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \
|
||||
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2)))
|
||||
|
||||
next_x = curr_x +\
|
||||
np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt
|
||||
|
||||
return next_x
|
||||
|
||||
elif len(u.shape) == 2:
|
||||
# x
|
||||
d_x0 = curr_x[:, 1]
|
||||
# v_x
|
||||
d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \
|
||||
* (self.l * (curr_x[:, 3]**2) \
|
||||
+ self.g * np.cos(curr_x[:, 2]))) \
|
||||
/ (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))
|
||||
# theta
|
||||
d_x2 = curr_x[:, 3]
|
||||
# v_theta
|
||||
d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \
|
||||
- self.mp * self.l * (curr_x[:, 3]**2) \
|
||||
* np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \
|
||||
- (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \
|
||||
/ (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)))
|
||||
|
||||
next_x = curr_x +\
|
||||
np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt
|
||||
|
||||
return next_x
|
||||
|
||||
def calc_f_x(self, xs, us, dt):
|
||||
""" gradient of model with respect to the state in batch form
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_x (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, state_size)
|
||||
|
||||
Notes:
|
||||
This should be discrete form !!
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, _) = us.shape
|
||||
|
||||
f_x = np.zeros((pred_len, state_size, state_size))
|
||||
|
||||
f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0]
|
||||
f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0]
|
||||
|
||||
return f_x * dt + np.eye(state_size) # to discrete form
|
||||
|
||||
def calc_f_u(self, xs, us, dt):
|
||||
""" gradient of model with respect to the input in batch form
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_u (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, input_size)
|
||||
|
||||
Notes:
|
||||
This should be discrete form !!
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, input_size) = us.shape
|
||||
|
||||
f_u = np.zeros((pred_len, state_size, input_size))
|
||||
|
||||
f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
|
||||
|
||||
f_u[:, 3, 0] = -np.cos(xs[:, 2]) \
|
||||
/ (self.l * (self.mc \
|
||||
+ self.mp * (np.sin(xs[:, 2])**2)))
|
||||
|
||||
return f_u * dt # to discrete form
|
||||
|
||||
def calc_f_xx(self, xs, us, dt):
|
||||
""" hessian of model with respect to the state in batch form
|
||||
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_xx (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, state_size, state_size)
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, _) = us.shape
|
||||
|
||||
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
|
||||
|
||||
f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
|
||||
f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
|
||||
|
||||
return f_xx * dt
|
||||
|
||||
def calc_f_ux(self, xs, us, dt):
|
||||
""" hessian of model with respect to state and input in batch form
|
||||
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_ux (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, input_size, state_size)
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, input_size) = us.shape
|
||||
|
||||
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
|
||||
|
||||
f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
|
||||
f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
|
||||
|
||||
return f_ux * dt
|
||||
|
||||
def calc_f_uu(self, xs, us, dt):
|
||||
""" hessian of model with respect to input in batch form
|
||||
|
||||
Args:
|
||||
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
|
||||
us (numpy.ndarray): input, shape(pred_len, input_size,)
|
||||
|
||||
Return:
|
||||
f_uu (numpy.ndarray): gradient of model with respect to x,
|
||||
shape(pred_len, state_size, input_size, input_size)
|
||||
"""
|
||||
# get size
|
||||
(_, state_size) = xs.shape
|
||||
(pred_len, input_size) = us.shape
|
||||
|
||||
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
|
||||
|
||||
return f_uu * dt
|
|
@ -1,5 +1,6 @@
|
|||
from .first_order_lag import FirstOrderLagModel
|
||||
from .two_wheeled import TwoWheeledModel
|
||||
from .cartpole import CartPoleModel
|
||||
|
||||
def make_model(args, config):
|
||||
|
||||
|
@ -7,5 +8,7 @@ def make_model(args, config):
|
|||
return FirstOrderLagModel(config)
|
||||
elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
|
||||
return TwoWheeledModel(config)
|
||||
elif args.env == "CartPole":
|
||||
return CartPoleModel(config)
|
||||
|
||||
raise NotImplementedError("There is not {} Model".format(args.env))
|
|
@ -15,7 +15,7 @@ PythonLinearNonLinearControl is a library implementing the linear and nonlinear
|
|||
| Linear Model Predictive Control (MPC) | ✓ | x | x | x | x |
|
||||
| Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x |
|
||||
| Model Preidictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x |
|
||||
| Model Preidictive Path Integral Control of Williams (MPPIWilliams) | ✓ | ✓ | x | x | x |
|
||||
| Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams) | ✓ | ✓ | x | x | x |
|
||||
| Random Shooting Method (Random) | ✓ | ✓ | x | x | x |
|
||||
| Iterative LQR (iLQR) | x | ✓ | x | ✓ | x |
|
||||
| Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ |
|
||||
|
@ -34,7 +34,7 @@ Following algorithms are implemented in PythonLinearNonlinearControl
|
|||
- [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114)
|
||||
- Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765)
|
||||
- [script](PythonLinearNonlinearControl/controllers/cem.py)
|
||||
- [Model Preidictive Path Integral Control Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652)
|
||||
- [Model Preidictive Path Integral Control of Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652)
|
||||
- Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652.
|
||||
- [script](PythonLinearNonlinearControl/controllers/mppi.py)
|
||||
- [Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202)
|
||||
|
@ -71,7 +71,7 @@ Following algorithms are implemented in PythonLinearNonlinearControl
|
|||
All states and inputs of environments are continuous.
|
||||
**It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**
|
||||
|
||||
You could know abount out environmets more in [Environments.md](Environments.md)
|
||||
You could know abount our environmets more in [Environments.md](Environments.md)
|
||||
|
||||
# Usage
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
Binary file not shown.
After Width: | Height: | Size: 22 KiB |
|
@ -42,9 +42,9 @@ def run(args):
|
|||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--controller_type", type=str, default="MPPIWilliams")
|
||||
parser.add_argument("--controller_type", type=str, default="CEM")
|
||||
parser.add_argument("--planner_type", type=str, default="const")
|
||||
parser.add_argument("--env", type=str, default="FirstOrderLag")
|
||||
parser.add_argument("--env", type=str, default="TwoWheeledConst")
|
||||
parser.add_argument("--result_dir", type=str, default="./result")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from PythonLinearNonlinearControl.configs.cartpole \
|
||||
import CartPoleConfigModule
|
||||
|
||||
class TestCalcCost():
|
||||
def test_calc_costs(self):
|
||||
# make config
|
||||
config = CartPoleConfigModule()
|
||||
# set
|
||||
pred_len = 5
|
||||
state_size = 4
|
||||
input_size = 1
|
||||
pop_size = 2
|
||||
pred_xs = np.ones((pop_size, pred_len, state_size))
|
||||
g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
|
||||
input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
|
||||
|
||||
costs = config.input_cost_fn(input_samples)
|
||||
|
||||
assert costs.shape == (pop_size, pred_len, input_size)
|
||||
|
||||
costs = config.state_cost_fn(pred_xs, g_xs)
|
||||
|
||||
assert costs.shape == (pop_size, pred_len, 1)
|
||||
|
||||
costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\
|
||||
g_xs[:, -1, :])
|
||||
|
||||
assert costs.shape == (pop_size, 1)
|
|
@ -0,0 +1,34 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from PythonLinearNonlinearControl.configs.two_wheeled \
|
||||
import TwoWheeledConfigModule
|
||||
|
||||
class TestCalcCost():
|
||||
def test_calc_costs(self):
|
||||
# make config
|
||||
config = TwoWheeledConfigModule()
|
||||
# set
|
||||
pred_len = 5
|
||||
state_size = 3
|
||||
input_size = 2
|
||||
pop_size = 2
|
||||
pred_xs = np.ones((pop_size, pred_len, state_size))
|
||||
g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5
|
||||
input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5
|
||||
|
||||
costs = config.input_cost_fn(input_samples)
|
||||
expected_costs = np.ones((pop_size, pred_len, input_size))*0.5
|
||||
|
||||
assert costs == pytest.approx(expected_costs**2 * np.diag(config.R))
|
||||
|
||||
costs = config.state_cost_fn(pred_xs, g_xs)
|
||||
expected_costs = np.ones((pop_size, pred_len, state_size))*0.5
|
||||
|
||||
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q))
|
||||
|
||||
costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\
|
||||
g_xs[:, -1, :])
|
||||
expected_costs = np.ones((pop_size, state_size))*0.5
|
||||
|
||||
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
|
|
@ -0,0 +1,73 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from PythonLinearNonlinearControl.envs.cartpole import CartPoleEnv
|
||||
|
||||
class TestCartPoleEnv():
|
||||
"""
|
||||
"""
|
||||
def test_step(self):
|
||||
env = CartPoleEnv()
|
||||
|
||||
curr_x = np.ones(4)
|
||||
curr_x[2] = np.pi / 6.
|
||||
|
||||
env.reset(init_x=curr_x)
|
||||
|
||||
u = np.ones(1)
|
||||
|
||||
next_x, _, _, _ = env.step(u)
|
||||
|
||||
d_x0 = curr_x[1]
|
||||
d_x1 = (1. + env.config["mp"] * np.sin(np.pi / 6.) \
|
||||
* (env.config["l"] * (1.**2) \
|
||||
+ env.config["g"] * np.cos(np.pi / 6.))) \
|
||||
/ (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2)
|
||||
d_x2 = curr_x[3]
|
||||
d_x3 = (-1. * np.cos(np.pi / 6.) \
|
||||
- env.config["mp"] * env.config["l"] * (1.**2) \
|
||||
* np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
|
||||
- (env.config["mp"] + env.config["mc"]) * env.config["g"] \
|
||||
* np.sin(np.pi / 6.)) \
|
||||
/ (env.config["l"] \
|
||||
* (env.config["mc"] \
|
||||
+ env.config["mp"] * np.sin(np.pi / 6.)**2))
|
||||
|
||||
expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \
|
||||
+ curr_x
|
||||
|
||||
assert next_x == pytest.approx(expected, abs=1e-5)
|
||||
|
||||
def test_bound_step(self):
|
||||
env = CartPoleEnv()
|
||||
|
||||
curr_x = np.ones(4)
|
||||
curr_x[2] = np.pi / 6.
|
||||
|
||||
env.reset(init_x=curr_x)
|
||||
|
||||
u = np.ones(1) * 1e3
|
||||
|
||||
next_x, _, _, _ = env.step(u)
|
||||
|
||||
u = env.config["input_upper_bound"][0]
|
||||
|
||||
d_x0 = curr_x[1]
|
||||
d_x1 = (u + env.config["mp"] * np.sin(np.pi / 6.) \
|
||||
* (env.config["l"] * (1.**2) \
|
||||
+ env.config["g"] * np.cos(np.pi / 6.))) \
|
||||
/ (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2)
|
||||
d_x2 = curr_x[3]
|
||||
d_x3 = (-u * np.cos(np.pi / 6.) \
|
||||
- env.config["mp"] * env.config["l"] * (1.**2) \
|
||||
* np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
|
||||
- (env.config["mp"] + env.config["mc"]) * env.config["g"] \
|
||||
* np.sin(np.pi / 6.)) \
|
||||
/ (env.config["l"] \
|
||||
* (env.config["mc"] \
|
||||
+ env.config["mp"] * np.sin(np.pi / 6.)**2))
|
||||
|
||||
expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \
|
||||
+ curr_x
|
||||
|
||||
assert next_x == pytest.approx(expected, abs=1e-5)
|
|
@ -0,0 +1,57 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from PythonLinearNonlinearControl.models.cartpole import CartPoleModel
|
||||
from PythonLinearNonlinearControl.configs.cartpole \
|
||||
import CartPoleConfigModule
|
||||
|
||||
class TestCartPoleModel():
|
||||
"""
|
||||
"""
|
||||
def test_step(self):
|
||||
config = CartPoleConfigModule()
|
||||
cartpole_model = CartPoleModel(config)
|
||||
|
||||
curr_x = np.ones(4)
|
||||
curr_x[2] = np.pi / 6.
|
||||
|
||||
us = np.ones((1, 1))
|
||||
|
||||
next_x = cartpole_model.predict_traj(curr_x, us)
|
||||
|
||||
d_x0 = curr_x[1]
|
||||
d_x1 = (1. + config.MP * np.sin(np.pi / 6.) \
|
||||
* (config.L * (1.**2) \
|
||||
+ config.G * np.cos(np.pi / 6.))) \
|
||||
/ (config.MC + config.MP * np.sin(np.pi / 6.)**2)
|
||||
d_x2 = curr_x[3]
|
||||
d_x3 = (-1. * np.cos(np.pi / 6.) \
|
||||
- config.MP * config.L * (1.**2) \
|
||||
* np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \
|
||||
- (config.MP + config.MC) * config.G \
|
||||
* np.sin(np.pi / 6.)) \
|
||||
/ (config.L \
|
||||
* (config.MC \
|
||||
+ config.MP * np.sin(np.pi / 6.)**2))
|
||||
|
||||
expected = np.array([d_x0, d_x1, d_x2, d_x3]) * config.DT \
|
||||
+ curr_x
|
||||
|
||||
expected = np.stack((curr_x, expected), axis=0)
|
||||
|
||||
assert next_x == pytest.approx(expected, abs=1e-5)
|
||||
|
||||
def test_predict_traj(self):
|
||||
config = CartPoleConfigModule()
|
||||
cartpole_model = CartPoleModel(config)
|
||||
|
||||
curr_x = np.ones(config.STATE_SIZE)
|
||||
curr_x[-1] = np.pi / 6.
|
||||
u = np.ones((1, config.INPUT_SIZE))
|
||||
|
||||
pred_xs = cartpole_model.predict_traj(curr_x, u)
|
||||
|
||||
u = np.tile(u, (2, 1, 1))
|
||||
pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
|
||||
|
||||
assert pred_xs_alltogether == pytest.approx(pred_xs)
|
|
@ -0,0 +1,43 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from PythonLinearNonlinearControl.models.model \
|
||||
import LinearModel
|
||||
from PythonLinearNonlinearControl.models.first_order_lag \
|
||||
import FirstOrderLagModel
|
||||
from PythonLinearNonlinearControl.configs.first_order_lag \
|
||||
import FirstOrderLagConfigModule
|
||||
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import Mock
|
||||
|
||||
class TestFirstOrderLagModel():
|
||||
"""
|
||||
"""
|
||||
def test_step(self):
|
||||
config = FirstOrderLagConfigModule()
|
||||
firstorderlag_model = FirstOrderLagModel(config)
|
||||
|
||||
curr_x = np.ones(config.STATE_SIZE)
|
||||
u = np.ones((1, config.INPUT_SIZE))
|
||||
|
||||
with patch.object(LinearModel, "predict_traj") as mock_predict_traj:
|
||||
firstorderlag_model.predict_traj(curr_x, u)
|
||||
|
||||
mock_predict_traj.assert_called_once_with(curr_x, u)
|
||||
|
||||
def test_predict_traj(self):
|
||||
|
||||
config = FirstOrderLagConfigModule()
|
||||
firstorderlag_model = FirstOrderLagModel(config)
|
||||
|
||||
curr_x = np.ones(config.STATE_SIZE)
|
||||
curr_x[-1] = np.pi / 6.
|
||||
u = np.ones((1, config.INPUT_SIZE))
|
||||
|
||||
pred_xs = firstorderlag_model.predict_traj(curr_x, u)
|
||||
|
||||
u = np.tile(u, (1, 1, 1))
|
||||
pred_xs_alltogether = firstorderlag_model.predict_traj(curr_x, u)[0]
|
||||
|
||||
assert pred_xs_alltogether == pytest.approx(pred_xs)
|
Loading…
Reference in New Issue