Add: Environments.md and MPPIWilliamns

This commit is contained in:
Shunichi09 2020-04-06 17:20:17 +09:00
parent a36a8bc9c1
commit 4e01264bd9
20 changed files with 669 additions and 30 deletions

34
Environments.md Normal file
View File

@ -0,0 +1,34 @@
# Enviroments
| Name | Linear | Nonlinear | State Size | Input size |
|:----------|:---------------:|:----------------:|:----------------:|:----------------:|
| First Order Lag System | ✓ | x | 4 | 2 |
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 |
| Cartpole (Swing up) | x | ✓ | 4 | 1 |
## FistOrderLagEnv
System equations.
<img src="assets/firstorderlag.png" width="550">
You can set arbinatry time constant, tau. The default is 0.63 s
## TwoWheeledEnv
System equations.
<img src="assets/twowheeled.png" width="300">
## CatpoleEnv (Swing up)
System equations.
<img src="assets/cartpole.png" width="600">
You can set arbinatry parameters, mc, mp, l and g.
Default settings are as follows:
mc = 1, mp = 0.2, l = 0.5, g = 9.8

View File

@ -1,2 +1 @@
import numpy as np

View File

@ -5,7 +5,7 @@ class FirstOrderLagConfigModule():
ENV_NAME = "FirstOrderLag-v0"
TYPE = "Linear"
TASK_HORIZON = 1000
PRED_LEN = 10
PRED_LEN = 50
STATE_SIZE = 4
INPUT_SIZE = 2
DT = 0.05
@ -43,8 +43,33 @@ class FirstOrderLagConfigModule():
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"popsize": 5000,
"lambda": 1.,
"noise_sigma": 0.9,
},
"MPC":{
}
},
"iLQR":{
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"DDP":{
"max_iter": 500,
"init_mu": 1.,
"mu_min": 1e-6,
"mu_max": 1e10,
"init_delta": 2.,
"threshold": 1e-6,
},
"NMPC-CGMRES":{
},
"NMPC-Newton":{
},
}
@staticmethod
@ -86,4 +111,89 @@ class FirstOrderLagConfigModule():
shape(pop_size, pred_len)
"""
return ((terminal_x - terminal_g_x)**2) \
* np.diag(FirstOrderLagConfigModule.Sf)
* np.diag(FirstOrderLagConfigModule.Sf)
@staticmethod
def gradient_cost_fn_with_state(x, g_x, terminal=False):
""" gradient of costs with respect to the state
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
or shape(1, state_size)
"""
if not terminal:
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
return (2. * (x - g_x) \
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
@staticmethod
def gradient_cost_fn_with_input(x, u):
""" gradient of costs with respect to the input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
"""
return 2. * u * np.diag(FirstOrderLagConfigModule.R)
@staticmethod
def hessian_cost_fn_with_state(x, g_x, terminal=False):
""" hessian costs with respect to the state
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
Returns:
l_xx (numpy.ndarray): gradient of cost,
shape(pred_len, state_size, state_size) or
shape(1, state_size, state_size) or
"""
if not terminal:
(pred_len, _) = x.shape
return -g_x[:, :, np.newaxis] \
* np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
return -g_x[:, np.newaxis] \
* np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
@staticmethod
def hessian_cost_fn_with_input(x, u):
""" hessian costs with respect to the input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_uu (numpy.ndarray): gradient of cost,
shape(pred_len, input_size, input_size)
"""
(pred_len, _) = u.shape
return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
@staticmethod
def hessian_cost_fn_with_input_state(x, u):
""" hessian costs with respect to the state and input
Args:
x (numpy.ndarray): state, shape(pred_len, state_size)
u (numpy.ndarray): goal state, shape(pred_len, input_size)
Returns:
l_ux (numpy.ndarray): gradient of cost ,
shape(pred_len, input_size, state_size)
"""
(_, state_size) = x.shape
(pred_len, input_size) = u.shape
return np.zeros((pred_len, input_size, state_size))

View File

@ -39,6 +39,11 @@ class TwoWheeledConfigModule():
"kappa": 0.9,
"noise_sigma": 0.5,
},
"MPPIWilliams":{
"popsize": 5000,
"lambda": 1,
"noise_sigma": 1.,
},
"iLQR":{
"max_iter": 500,
"init_mu": 1.,

View File

@ -23,10 +23,6 @@ class DDP(Controller):
"""
super(DDP, self).__init__(config, model)
if config.TYPE != "Nonlinear":
raise ValueError("{} could be not applied to \
this controller".format(model))
# model
self.model = model
@ -296,6 +292,7 @@ class DDP(Controller):
def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
""" backward step of iLQR
Args:
f_x (numpy.ndarray): gradient of model with respecto to state,
shape(pred_len+1, state_size, state_size)
@ -317,7 +314,6 @@ class DDP(Controller):
shape(pred_len, input_size, input_size)
l_ux (numpy.ndarray): hessian of cost with respect
to state and input, shape(pred_len, input_size, state_size)
Returns:
k (numpy.ndarray): gain, shape(pred_len, input_size)
K (numpy.ndarray): gain, shape(pred_len, input_size, state_size)
@ -353,7 +349,8 @@ class DDP(Controller):
def _Q(self, f_x, f_u, f_xx, f_ux, f_uu,
l_x, l_u, l_xx, l_ux, l_uu, V_x, V_xx):
"""Computes second order expansion.
""" compute Q function valued
Args:
f_x (numpy.ndarray): gradient of model with respecto to state,
shape(state_size, state_size)

View File

@ -21,10 +21,6 @@ class iLQR(Controller):
"""
super(iLQR, self).__init__(config, model)
if config.TYPE != "Nonlinear":
raise ValueError("{} could be not applied to \
this controller".format(model))
# model
self.model = model

View File

@ -2,6 +2,7 @@ from .mpc import LinearMPC
from .cem import CEM
from .random import RandomShooting
from .mppi import MPPI
from .mppi_williams import MPPIWilliams
from .ilqr import iLQR
from .ddp import DDP
@ -15,6 +16,8 @@ def make_controller(args, config, model):
return RandomShooting(config, model)
elif args.controller_type == "MPPI":
return MPPI(config, model)
elif args.controller_type == "MPPIWilliams":
return MPPIWilliams(config, model)
elif args.controller_type == "iLQR":
return iLQR(config, model)
elif args.controller_type == "DDP":

View File

@ -0,0 +1,143 @@
from logging import getLogger
import numpy as np
import scipy.stats as stats
from .controller import Controller
from ..envs.cost import calc_cost
logger = getLogger(__name__)
class MPPIWilliams(Controller):
""" Model Predictive Path Integral for linear and nonlinear method
Attributes:
history_u (list[numpy.ndarray]): time history of optimal input
Ref:
G. Williams et al., "Information theoretic MPC
for model-based reinforcement learning,"
2017 IEEE International Conference on Robotics and Automation (ICRA),
Singapore, 2017, pp. 1714-1721.
"""
def __init__(self, config, model):
super(MPPIWilliams, self).__init__(config, model)
# model
self.model = model
# general parameters
self.pred_len = config.PRED_LEN
self.input_size = config.INPUT_SIZE
# mppi parameters
self.pop_size = config.opt_config["MPPIWilliams"]["popsize"]
self.lam = config.opt_config["MPPIWilliams"]["lambda"]
self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
self.opt_dim = self.input_size * self.pred_len
# get bound
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
(self.pred_len, 1))
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
(self.pred_len, 1))
# get cost func
self.state_cost_fn = config.state_cost_fn
self.terminal_state_cost_fn = config.terminal_state_cost_fn
self.input_cost_fn = config.input_cost_fn
# init mean
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
+ config.INPUT_LOWER_BOUND) / 2.,
self.pred_len)
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
# save
self.history_u = [np.zeros(self.input_size)]
def clear_sol(self):
""" clear prev sol
"""
logger.debug("Clear Solution")
self.prev_sol = \
(self.input_upper_bounds + self.input_lower_bounds) / 2.
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
def calc_cost(self, curr_x, samples, g_xs):
""" calculate the cost of input samples by using MPPI's eq
Args:
curr_x (numpy.ndarray): shape(state_size),
current robot position
samples (numpy.ndarray): shape(pop_size, opt_dim),
input samples
g_xs (numpy.ndarray): shape(pred_len, state_size),
goal states
Returns:
costs (numpy.ndarray): shape(pop_size, )
"""
# get size
pop_size = samples.shape[0]
g_xs = np.tile(g_xs, (pop_size, 1, 1))
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
pred_xs = self.model.predict_traj(curr_x, samples)
# get particle cost
costs = calc_cost(pred_xs, samples, g_xs,
self.state_cost_fn, None, \
self.terminal_state_cost_fn)
return costs
def obtain_sol(self, curr_x, g_xs):
""" calculate the optimal inputs
Args:
curr_x (numpy.ndarray): current state, shape(state_size, )
g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
Returns:
opt_input (numpy.ndarray): optimal input, shape(input_size, )
"""
# get noised inputs
noise = np.random.normal(
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
self.input_size)) * self.noise_sigma
noised_inputs = self.prev_sol + noise
# clip actions
noised_inputs = np.clip(
noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
# calc cost
costs = self.calc_cost(curr_x, noised_inputs, g_xs)
costs += np.sum(np.sum(
self.lam * self.prev_sol * noise / self.noise_sigma,
axis=-1), axis=-1)
# mppi update
beta = np.min(costs)
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
+ 1e-10
# weight
# eta.shape = (pred_len, input_size)
weights = np.exp(- 1. / self.lam * (costs - beta)) / eta
# update inputs
sol = self.prev_sol \
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
# update
self.prev_sol[:-1] = sol[1:]
self.prev_sol[-1] = sol[-1] # last use the terminal input
# log
self.history_u.append(sol[0])
return sol[0]
def __str__(self):
return "MPPIWilliams"

View File

@ -0,0 +1,87 @@
import numpy as np
from .env import Env
class CartPoleEnv(Env):
""" Cartpole Environment
Ref :
https://ocw.mit.edu/courses/
electrical-engineering-and-computer-science/
6-832-underactuated-robotics-spring-2009/readings/
MIT6_832s09_read_ch03.pdf
"""
def __init__(self):
"""
"""
self.config = {"state_size" : 4,\
"input_size" : 1,\
"dt" : 0.02,\
"max_step" : 1000,\
"input_lower_bound": None,\
"input_upper_bound": None,
}
super(CartPoleEnv, self).__init__(self.config)
def reset(self, init_x=None):
""" reset state
Returns:
init_x (numpy.ndarray): initial state, shape(state_size, )
info (dict): information
"""
self.step_count = 0
self.curr_x = np.zeros(self.config["state_size"])
if init_x is not None:
self.curr_x = init_x
# goal
self.g_x = np.array([0., 0., np.pi, 0.])
# clear memory
self.history_x = []
self.history_g_x = []
return self.curr_x, {"goal_state": self.g_x}
def step(self, u):
""" step environments
Args:
u (numpy.ndarray) : input, shape(input_size, )
Returns:
next_x (numpy.ndarray): next state, shape(state_size, )
cost (float): costs
done (bool): end the simulation or not
info (dict): information
"""
# clip action
if self.config["input_lower_bound"] is not None:
u = np.clip(u,
self.config["input_lower_bound"],
self.config["input_upper_bound"])
# step
next_x = np.zeros(self.config["state_size"])
# TODO: costs
costs = 0.
costs += 0.1 * np.sum(u**2)
costs += np.sum((self.curr_x - self.g_x)**2)
# save history
self.history_x.append(next_x.flatten())
self.history_g_x.append(self.g_x.flatten())
# update
self.curr_x = next_x.flatten()
# update costs
self.step_count += 1
return next_x.flatten(), costs, \
self.step_count > self.config["max_step"], \
{"goal_state" : self.g_x}

View File

@ -22,16 +22,22 @@ def calc_cost(pred_xs, input_sample, g_xs,
cost (numpy.ndarray): cost of the input sample, shape(pop_size, )
"""
# state cost
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
state_cost = 0.
if state_cost_fn is not None:
state_pred_par_cost = state_cost_fn(pred_xs[:, 1:-1, :], g_xs[:, 1:-1, :])
state_cost = np.sum(np.sum(state_pred_par_cost, axis=-1), axis=-1)
# terminal cost
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :])
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
terminal_state_cost = 0.
if terminal_state_cost_fn is not None:
terminal_state_par_cost = terminal_state_cost_fn(pred_xs[:, -1, :],
g_xs[:, -1, :])
terminal_state_cost = np.sum(terminal_state_par_cost, axis=-1)
# act cost
act_pred_par_cost = input_cost_fn(input_sample)
act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1)
act_cost = 0.
if input_cost_fn is not None:
act_pred_par_cost = input_cost_fn(input_sample)
act_cost = np.sum(np.sum(act_pred_par_cost, axis=-1), axis=-1)
return state_cost + terminal_state_cost + act_cost

View File

@ -1,5 +1,6 @@
from .first_order_lag import FirstOrderLagEnv
from .two_wheeled import TwoWheeledConstEnv
from .cartpole import CartpoleEnv
def make_env(args):
@ -7,5 +8,7 @@ def make_env(args):
return FirstOrderLagEnv()
elif args.env == "TwoWheeledConst":
return TwoWheeledConstEnv()
elif args.env == "CartPole":
return CartpoleEnv()
raise NotImplementedError("There is not {} Env".format(args.env))

View File

@ -211,3 +211,94 @@ class LinearModel(Model):
next_x = np.matmul(curr_x, self.A.T) + np.matmul(u, self.B.T)
return next_x
def calc_f_x(self, xs, us, dt):
""" gradient of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_x (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size)
Notes:
This should be discrete form !!
"""
# get size
(pred_len, _) = us.shape
return np.tile(self.A, (pred_len, 1, 1))
def calc_f_u(self, xs, us, dt):
""" gradient of model with respect to the input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_u (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size)
Notes:
This should be discrete form !!
"""
# get size
(pred_len, input_size) = us.shape
return np.tile(self.B, (pred_len, 1, 1))
@staticmethod
def calc_f_xx(xs, us, dt):
""" hessian of model with respect to the state in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_xx (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, state_size, state_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, _) = us.shape
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
return f_xx
@staticmethod
def calc_f_ux(xs, us, dt):
""" hessian of model with respect to state and input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_ux (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, state_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
return f_ux
@staticmethod
def calc_f_uu(xs, us, dt):
""" hessian of model with respect to input in batch form
Args:
xs (numpy.ndarray): state, shape(pred_len+1, state_size)
us (numpy.ndarray): input, shape(pred_len, input_size,)
Return:
f_uu (numpy.ndarray): gradient of model with respect to x,
shape(pred_len, state_size, input_size, input_size)
"""
# get size
(_, state_size) = xs.shape
(pred_len, input_size) = us.shape
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
return f_uu

View File

@ -3,6 +3,8 @@ import os
import numpy as np
import matplotlib.pyplot as plt
from ..helper import save_pickle, load_pickle
def plot_result(history, history_g=None, ylabel="x",
save_dir="./result", name="state_history"):
"""
@ -47,14 +49,108 @@ def plot_result(history, history_g=None, ylabel="x",
def plot_results(args, history_x, history_u, history_g=None):
"""
Args:
history_x (numpy.ndarray): history of state, shape(iters, state_size)
history_u (numpy.ndarray): history of state, shape(iters, input_size)
Returns:
None
"""
plot_result(history_x, history_g=history_g, ylabel="x",
name="state_history",
name= args.env + "-state_history",
save_dir="./result/" + args.controller_type)
plot_result(history_u, history_g=np.zeros_like(history_u), ylabel="u",
name="input_history",
save_dir="./result/" + args.controller_type)
name= args.env + "-input_history",
save_dir="./result/" + args.controller_type)
def save_plot_data(args, history_x, history_u, history_g=None):
""" save plot data
Args:
history_x (numpy.ndarray): history of state, shape(iters, state_size)
history_u (numpy.ndarray): history of state, shape(iters, input_size)
Returns:
None
"""
path = os.path.join("./result/" + args.controller_type,
args.env + "-history_x.pkl")
save_pickle(path, history_x)
path = os.path.join("./result/" + args.controller_type,
args.env + "-history_u.pkl")
save_pickle(path, history_u)
path = os.path.join("./result/" + args.controller_type,
args.env + "-history_g.pkl")
save_pickle(path, history_g)
def load_plot_data(env, controller_type, result_dir="./result"):
"""
Args:
env (str): environments name
controller_type (str): controller type
result_dir (str): result directory
Returns:
history_x (numpy.ndarray): history of state, shape(iters, state_size)
history_u (numpy.ndarray): history of state, shape(iters, input_size)
history_g (numpy.ndarray): history of state, shape(iters, input_size)
"""
path = os.path.join("./result/" + controller_type,
env + "-history_x.pkl")
history_x = load_pickle(path)
path = os.path.join("./result/" + controller_type,
env + "-history_u.pkl")
history_u = load_pickle(path)
path = os.path.join("./result/" + controller_type,
env + "-history_g.pkl")
history_g = load_pickle(path)
return history_x, history_u, history_g
def plot_multi_result(histories, histories_g=None, labels=None, ylabel="x",
save_dir="./result", name="state_history"):
"""
Args:
history (numpy.ndarray): history, shape(iters, size)
"""
(_, iters, size) = histories.shape
for i in range(0, size, 2):
figure = plt.figure()
axis1 = figure.add_subplot(211)
axis2 = figure.add_subplot(212)
axis1.set_ylabel(ylabel + "_{}".format(i))
axis2.set_ylabel(ylabel + "_{}".format(i+1))
axis2.set_xlabel("time steps")
# gt
def plot(axis, history, history_g=None, label=""):
axis.plot(range(iters), history,
linewidth=3, label=label, alpha=0.7, linestyle="dashed")
if history_g is not None:
axis.plot(range(iters), history_g,\
c="b", linewidth=3)
if i < size:
for j, (history, history_g) \
in enumerate(zip(histories, histories_g)):
plot(axis1, history[:, i],
history_g=history_g[:, i], label=labels[j])
if i+1 < size:
for j, (history, history_g) in \
enumerate(zip(histories, histories_g)):
plot(axis2, history[:, i+1],
history_g=history_g[:, i+1], label=labels[j])
# save
if save_dir is not None:
path = os.path.join(save_dir, name + "-{}".format(i))
else:
path = name
axis1.legend(ncol=3, bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3)
figure.savefig(path, bbox_inches="tight", pad_inches=0.05)

View File

@ -14,7 +14,8 @@ PythonLinearNonLinearControl is a library implementing the linear and nonlinear
|:----------|:---------------: |:----------------:|:----------------:|:----------------:|:----------------:|
| Linear Model Predictive Control (MPC) | ✓ | x | x | x | x |
| Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x |
| Model Preidictive Path Integral Control (MPPI) | ✓ | ✓ | x | x | x |
| Model Preidictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x |
| Model Preidictive Path Integral Control of Williams (MPPIWilliams) | ✓ | ✓ | x | x | x |
| Random Shooting Method (Random) | ✓ | ✓ | x | x | x |
| Iterative LQR (iLQR) | x | ✓ | x | ✓ | x |
| Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ |
@ -33,9 +34,12 @@ Following algorithms are implemented in PythonLinearNonlinearControl
- [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114)
- Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765)
- [script](PythonLinearNonlinearControl/controllers/cem.py)
- [Model Preidictive Path Integral Control (MPPI)](https://arxiv.org/abs/1909.11652)
- [Model Preidictive Path Integral Control Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652)
- Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652.
- [script](PythonLinearNonlinearControl/controllers/mppi.py)
- [Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202)
- Ref: Williams, G., Wagener, N., Goldfain, B., Drews, P., Rehg, J. M., Boots, B., & Theodorou, E. A. (2017, May). Information theoretic MPC for model-based reinforcement learning. In 2017 IEEE International Conference on Robotics and Automation (ICRA) (pp. 1714-1721). IEEE.
- [script](PythonLinearNonlinearControl/controllers/mppi_williams.py)
- [Random Shooting Method (Random)](https://arxiv.org/abs/1805.12114)
- Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765)
- [script](PythonLinearNonlinearControl/controllers/random.py)
@ -62,10 +66,13 @@ Following algorithms are implemented in PythonLinearNonlinearControl
| First Order Lag System | ✓ | x | 4 | 2 |
| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 |
| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 |
| Cartpole (Swing up) | x | ✓ | 4 | 1 |
All environments are continuous.
All states and inputs of environments are continuous.
**It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.**
You could know abount out environmets more in [Environments.md](Environments.md)
# Usage
## To install this package

BIN
assets/cartpole.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

BIN
assets/firstorderlag.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

BIN
assets/twowheeled.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

55
scripts/show_result.py Normal file
View File

@ -0,0 +1,55 @@
import os
import argparse
import pickle
import numpy as np
import matplotlib.pyplot as plt
from PythonLinearNonlinearControl.plotters.plot_func import load_plot_data, \
plot_multi_result
def run(args):
controllers = ["iLQR", "DDP", "CEM", "MPPI"]
history_xs = None
history_us = None
history_gs = None
# load data
for controller in controllers:
history_x, history_u, history_g = \
load_plot_data(args.env, controller,
result_dir=args.result_dir)
if history_xs is None:
history_xs = history_x[np.newaxis, :]
history_us = history_u[np.newaxis, :]
history_gs = history_g[np.newaxis, :]
continue
history_xs = np.concatenate((history_xs,
history_x[np.newaxis, :]), axis=0)
history_us = np.concatenate((history_us,
history_u[np.newaxis, :]), axis=0)
history_gs = np.concatenate((history_gs,
history_g[np.newaxis, :]), axis=0)
plot_multi_result(history_xs, histories_g=history_gs, labels=controllers,
ylabel="x")
plot_multi_result(history_us, histories_g=np.zeros_like(history_us),
labels=controllers, ylabel="u", name="input_history")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--env", type=str, default="FirstOrderLag")
parser.add_argument("--result_dir", type=str, default="./result")
args = parser.parse_args()
run(args)
if __name__ == "__main__":
main()

View File

@ -7,7 +7,8 @@ from PythonLinearNonlinearControl.configs.make_configs import make_config
from PythonLinearNonlinearControl.models.make_models import make_model
from PythonLinearNonlinearControl.envs.make_envs import make_env
from PythonLinearNonlinearControl.runners.make_runners import make_runner
from PythonLinearNonlinearControl.plotters.plot_func import plot_results
from PythonLinearNonlinearControl.plotters.plot_func import plot_results, \
save_plot_data
def run(args):
# logger
@ -36,13 +37,14 @@ def run(args):
# plot results
plot_results(args, history_x, history_u, history_g=history_g)
save_plot_data(args, history_x, history_u, history_g=history_g)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--controller_type", type=str, default="DDP")
parser.add_argument("--controller_type", type=str, default="MPPIWilliams")
parser.add_argument("--planner_type", type=str, default="const")
parser.add_argument("--env", type=str, default="TwoWheeledConst")
parser.add_argument("--env", type=str, default="FirstOrderLag")
parser.add_argument("--result_dir", type=str, default="./result")
args = parser.parse_args()

5
setup.cfg Normal file
View File

@ -0,0 +1,5 @@
[aliases]
test=pytest
[tool:pytest]
addopts=-s