This commit is contained in:
Shunichi09 2019-02-24 22:28:21 +09:00
parent 80e8a112cd
commit 3beff53d83
5 changed files with 922 additions and 125 deletions

View File

@ -1,135 +1,16 @@
# Model Predictive Control Basic Tool # Iterative Linear Quadratic Regulator
This program is about template, generic function of linear model predictive control This program is about iLQR (Iteratice Linear Quadratic Regulator)
# Documentation of the MPC function # Problem Formulation
Linear model predicitive control should have state equation.
So if you want to use this function, you should model the plant as state equation.
Therefore, the parameters of this class are as following
**class MpcController()**
Attributes :
- A : numpy.ndarray
- system matrix
- B : numpy.ndarray
- input matrix
- Q : numpy.ndarray
- evaluation function weight for states
- Qs : numpy.ndarray
- concatenated evaluation function weight for states
- R : numpy.ndarray
- evaluation function weight for inputs
- Rs : numpy.ndarray
- concatenated evaluation function weight for inputs
- pre_step : int
- prediction step
- state_size : int
- state size of the plant
- input_size : int
- input size of the plant
- dt_input_upper : numpy.ndarray, shape(input_size, ), optional
- constraints of input dt, default is None
- dt_input_lower : numpy.ndarray, shape(input_size, ), optional
- constraints of input dt, default is None
- input_upper : numpy.ndarray, shape(input_size, ), optional
- constraints of input, default is None
- input_lower : numpy.ndarray, shape(input_size, ), optional
- constraints of input, default is None
Methods:
- initialize_controller() initialize the controller
- calc_input(states, references) calculating optimal input
More details, please look the **mpc_func_with_scipy.py** and **mpc_func_with_cvxopt.py**
We have two function, mpc_func_with_cvxopt.py and mpc_func_with_scipy.py
Both functions have same variable and member function. However the solver is different.
Plese choose the right method for your environment.
- example of import
```py
from mpc_func_with_scipy import MpcController as MpcController_scipy
from mpc_func_with_cvxopt import MpcController as MpcController_cvxopt
```
# Examples
## Problem Formulation
- **first order system**
<a href="https://www.codecogs.com/eqnedit.php?latex=\frac{d}{dt}&space;\boldsymbol{X}&space;=&space;\begin{bmatrix}&space;-1/&space;\tau&space;&&space;0&space;&&space;0&space;&&space;0\\&space;0&space;&&space;-1/&space;\tau&space;&&space;0&space;&&space;0\\&space;1&space;&&space;0&space;&&space;0&space;&&space;0\\&space;0&space;&&space;1&space;&&space;0&space;&&space;0\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;v_x&space;\\&space;v_y&space;\\&space;x&space;\\&space;y&space;\end{bmatrix}&space;&plus;&space;\begin{bmatrix}&space;1/&space;\tau&space;&&space;0&space;\\&space;0&space;&&space;1/&space;\tau&space;\\&space;0&space;&&space;0&space;\\&space;0&space;&&space;0&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_x&space;\\&space;u_y&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{A}\boldsymbol{X}&space;&plus;&space;\boldsymbol{B}\boldsymbol{U}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\frac{d}{dt}&space;\boldsymbol{X}&space;=&space;\begin{bmatrix}&space;-1/&space;\tau&space;&&space;0&space;&&space;0&space;&&space;0\\&space;0&space;&&space;-1/&space;\tau&space;&&space;0&space;&&space;0\\&space;1&space;&&space;0&space;&&space;0&space;&&space;0\\&space;0&space;&&space;1&space;&&space;0&space;&&space;0\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;v_x&space;\\&space;v_y&space;\\&space;x&space;\\&space;y&space;\end{bmatrix}&space;&plus;&space;\begin{bmatrix}&space;1/&space;\tau&space;&&space;0&space;\\&space;0&space;&&space;1/&space;\tau&space;\\&space;0&space;&&space;0&space;\\&space;0&space;&&space;0&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_x&space;\\&space;u_y&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{A}\boldsymbol{X}&space;&plus;&space;\boldsymbol{B}\boldsymbol{U}" title="\frac{d}{dt} \boldsymbol{X} = \begin{bmatrix} -1/ \tau & 0 & 0 & 0\\ 0 & -1/ \tau & 0 & 0\\ 1 & 0 & 0 & 0\\ 0 & 1 & 0 & 0\\ \end{bmatrix} \begin{bmatrix} v_x \\ v_y \\ x \\ y \end{bmatrix} + \begin{bmatrix} 1/ \tau & 0 \\ 0 & 1/ \tau \\ 0 & 0 \\ 0 & 0 \\ \end{bmatrix} \begin{bmatrix} u_x \\ u_y \\ \end{bmatrix} = \boldsymbol{A}\boldsymbol{X} + \boldsymbol{B}\boldsymbol{U}" /></a>
- **ACC (Adaptive cruise control)**
The two wheeled model are expressed the following equation.
<a href="https://www.codecogs.com/eqnedit.php?latex=\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" title="\frac{d}{dt} \boldsymbol{X}= \frac{d}{dt} \begin{bmatrix} x \\ y \\ \theta \end{bmatrix} = \begin{bmatrix} \cos(\theta) & 0 \\ \sin(\theta) & 0 \\ 0 & 1 \\ \end{bmatrix} \begin{bmatrix} u_v \\ u_\omega \\ \end{bmatrix} = \boldsymbol{B}\boldsymbol{U}" /></a>
However, if we assume the velocity are constant, we can approximate the equation as following,
<a href="https://www.codecogs.com/eqnedit.php?latex=\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;0&space;&&space;V&space;\\&space;0&space;&&space;0&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;&plus;&space;\begin{bmatrix}&space;0&space;\\&space;1&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{A}\boldsymbol{X}&space;&plus;&space;\boldsymbol{B}\boldsymbol{U}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;0&space;&&space;V&space;\\&space;0&space;&&space;0&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;&plus;&space;\begin{bmatrix}&space;0&space;\\&space;1&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{A}\boldsymbol{X}&space;&plus;&space;\boldsymbol{B}\boldsymbol{U}" title="\frac{d}{dt} \boldsymbol{X}= \frac{d}{dt} \begin{bmatrix} y \\ \theta \end{bmatrix} = \begin{bmatrix} 0 & V \\ 0 & 0 \\ \end{bmatrix} \begin{bmatrix} y \\ \theta \end{bmatrix} + \begin{bmatrix} 0 \\ 1 \end{bmatrix} \begin{bmatrix} u_\omega \\ \end{bmatrix} = \boldsymbol{A}\boldsymbol{X} + \boldsymbol{B}\boldsymbol{U}" /></a>
then we can apply this model to linear mpc, we should give the model reference V although.
- **evaluation function**
the both examples have same evaluation function form as following equation.
<a href="https://www.codecogs.com/eqnedit.php?latex=J&space;=&space;\sum_{i&space;=&space;0}^{prestep}||\boldsymbol{\hat{X}}(k&plus;i|k)-\boldsymbol{r}(k&plus;i|k)&space;||^2_{{\boldsymbol{Q}}(i)}&space;&plus;&space;||\boldsymbol{\Delta&space;{U}}(k&plus;i|k)||^2_{{\boldsymbol{R}}(i)}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?J&space;=&space;\sum_{i&space;=&space;0}^{prestep}||\boldsymbol{\hat{X}}(k&plus;i|k)-\boldsymbol{r}(k&plus;i|k)&space;||^2_{{\boldsymbol{Q}}(i)}&space;&plus;&space;||\boldsymbol{\Delta&space;{U}}(k&plus;i|k)||^2_{{\boldsymbol{R}}(i)}" title="J = \sum_{i = 0}^{prestep}||\boldsymbol{\hat{X}}(k+i|k)-\boldsymbol{r}(k+i|k) ||^2_{{\boldsymbol{Q}}(i)} + ||\boldsymbol{\Delta {U}}(k+i|k)||^2_{{\boldsymbol{R}}(i)}" /></a>
- <a href="https://www.codecogs.com/eqnedit.php?latex=\boldsymbol{\hat{X}}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\boldsymbol{\hat{X}}" title="\boldsymbol{\hat{X}}" /></a> is predicit state by using predict input
- <a href="https://www.codecogs.com/eqnedit.php?latex=\boldsymbol{{r}}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\boldsymbol{{r}}" title="\boldsymbol{{r}}" /></a> is reference state
- <a href="https://www.codecogs.com/eqnedit.php?latex=\boldsymbol{\Delta&space;\boldsymbol{U}}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\boldsymbol{\Delta&space;\boldsymbol{U}}" title="\boldsymbol{\Delta \boldsymbol{U}}" /></a> is predict amount of change of input
- <a href="https://www.codecogs.com/eqnedit.php?latex=\boldsymbol{\boldsymbol{R}},&space;\boldsymbol{\boldsymbol{Q}}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\boldsymbol{\boldsymbol{R}},&space;\boldsymbol{\boldsymbol{Q}}" title="\boldsymbol{\boldsymbol{R}}, \boldsymbol{\boldsymbol{Q}}" /></a> are evaluation function weights
## Expected Results
- first order system
- time history
<img src = https://github.com/Shunichi09/linear_nonlinear_control/blob/demo_gif/mpc/basic/first_order_states.png width = 70%>
- input
<img src = https://github.com/Shunichi09/linear_nonlinear_control/blob/demo_gif/mpc/basic/first_order_input.png width = 70%>
- ACC (Adaptive cruise control)
- time history of states
<img src = https://github.com/Shunichi09/linear_nonlinear_control/blob/demo_gif/mpc/basic/ACC_states.png width = 70%>
- animation
<img src = https://github.com/Shunichi09/linear_nonlinear_control/blob/demo_gif/mpc/basic/ACC.gif width = 70%>
# Usage # Usage
- for example(first order system)
```
$ python main_example.py
```
- for example(ACC (Adaptive cruise control)) # Expected Results
```
$ python main_ACC.py
```
- for comparing two methods of optimization solvers
```
$ python test_compare_methods.py
```
# Requirement # Requirement
@ -141,6 +22,13 @@ $ python test_compare_methods.py
- python-control - python-control
# Reference # Reference
I`m sorry that main references are written in Japanese
- モデル予測制御―制約のもとでの最適制御 著Jan M. Maciejowski 足立修一 東京電機大学出版局 - study wolf
https://github.com/studywolf/control
- Sergey Levine's lecture
http://rail.eecs.berkeley.edu/deeprlcourse/
- Tassa, Y., Erez, T., & Todorov, E. (2012). Synthesis and stabilization of complex behaviors through online trajectory optimization. IEEE International Conference on Intelligent Robots and Systems, 49064913. https://doi.org/10.1109/IROS.2012.6386025
- Li, W., & Todorov, E. (n.d.). Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems. Retrieved from https://homes.cs.washington.edu/~todorov/papers/LiICINCO04.pdf

294
iLQR/animation.py Normal file
View File

@ -0,0 +1,294 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as ani
import matplotlib.font_manager as fon
import sys
import math
# default setting of figures
plt.rcParams["mathtext.fontset"] = 'stix' # math fonts
plt.rcParams['xtick.direction'] = 'in' # x axis in
plt.rcParams['ytick.direction'] = 'in' # y axis in
plt.rcParams["font.size"] = 10
plt.rcParams['axes.linewidth'] = 1.0 # axis line width
plt.rcParams['axes.grid'] = True # make grid
def coordinate_transformation_in_angle(positions, base_angle):
'''
Transformation the coordinate in the angle
Parameters
-------
positions : numpy.ndarray
this parameter is composed of xs, ys
should have (2, N) shape
base_angle : float [rad]
Returns
-------
traslated_positions : numpy.ndarray
the shape is (2, N)
'''
if positions.shape[0] != 2:
raise ValueError('the input data should have (2, N)')
positions = np.array(positions)
positions = positions.reshape(2, -1)
rot_matrix = [[np.cos(base_angle), np.sin(base_angle)],
[-1*np.sin(base_angle), np.cos(base_angle)]]
rot_matrix = np.array(rot_matrix)
translated_positions = np.dot(rot_matrix, positions)
return translated_positions
def square_make_with_angles(center_x, center_y, size, angle):
'''
Create square matrix with angle line matrix(2D)
Parameters
-------
center_x : float in meters
the center x position of the square
center_y : float in meters
the center y position of the square
size : float in meters
the square's half-size
angle : float in radians
Returns
-------
square xs : numpy.ndarray
lenght is 5 (counterclockwise from right-up)
square ys : numpy.ndarray
length is 5 (counterclockwise from right-up)
angle line xs : numpy.ndarray
angle line ys : numpy.ndarray
'''
# start with the up right points
# create point in counterclockwise
square_xys = np.array([[size, 0.5 * size], [-size, 0.5 * size], [-size, -0.5 * size], [size, -0.5 * size], [size, 0.5 * size]])
trans_points = coordinate_transformation_in_angle(square_xys.T, -angle) # this is inverse type
trans_points += np.array([[center_x], [center_y]])
square_xs = trans_points[0, :]
square_ys = trans_points[1, :]
angle_line_xs = [center_x, center_x + math.cos(angle) * size]
angle_line_ys = [center_y, center_y + math.sin(angle) * size]
return square_xs, square_ys, np.array(angle_line_xs), np.array(angle_line_ys)
def circle_make_with_angles(center_x, center_y, radius, angle):
'''
Create circle matrix with angle line matrix
Parameters
-------
center_x : float
the center x position of the circle
center_y : float
the center y position of the circle
radius : float
angle : float [rad]
Returns
-------
circle xs : numpy.ndarray
circle ys : numpy.ndarray
angle line xs : numpy.ndarray
angle line ys : numpy.ndarray
'''
point_num = 100 # 分解能
circle_xs = []
circle_ys = []
for i in range(point_num + 1):
circle_xs.append(center_x + radius * math.cos(i*2*math.pi/point_num))
circle_ys.append(center_y + radius * math.sin(i*2*math.pi/point_num))
angle_line_xs = [center_x, center_x + math.cos(angle) * radius]
angle_line_ys = [center_y, center_y + math.sin(angle) * radius]
return np.array(circle_xs), np.array(circle_ys), np.array(angle_line_xs), np.array(angle_line_ys)
class AnimDrawer():
"""create animation of path and robot
Attributes
------------
cars :
anim_fig : figure of matplotlib
axis : axis of matplotlib
"""
def __init__(self, objects):
"""
Parameters
------------
objects : list of objects
Notes
---------
lead_history_states, lead_history_predict_states, traj_ref, history_traj_ref, history_angle_ref
"""
self.car_history_state = objects[0]
self.traget = objects[1]
self.history_xs = [self.car_history_state[:, 0]]
self.history_ys = [self.car_history_state[:, 1]]
self.history_ths = [self.car_history_state[:, 2]]
# setting up figure
self.anim_fig = plt.figure(dpi=150)
self.axis = self.anim_fig.add_subplot(111)
# imgs
self.car_imgs = []
self.traj_imgs = []
def draw_anim(self, interval=50):
"""draw the animation and save
Parameteres
-------------
interval : int, optional
animation's interval time, you should link the sampling time of systems
default is 50 [ms]
"""
self._set_axis()
self._set_img()
self.skip_num = 1
frame_num = int((len(self.history_xs[0])-1) / self.skip_num)
animation = ani.FuncAnimation(self.anim_fig, self._update_anim, interval=interval, frames=frame_num)
# self.axis.legend()
print('save_animation?')
shuold_save_animation = int(input())
if shuold_save_animation:
print('animation_number?')
num = int(input())
animation.save('animation_{0}.mp4'.format(num), writer='ffmpeg')
# animation.save("Sample.gif", writer = 'imagemagick') # gif保存
plt.show()
def _set_axis(self):
""" initialize the animation axies
"""
# (1) set the axis name
self.axis.set_xlabel(r'$\it{x}$ [m]')
self.axis.set_ylabel(r'$\it{y}$ [m]')
self.axis.set_aspect('equal', adjustable='box')
LOW_MARGIN = 5
HIGH_MARGIN = 5
self.axis.set_xlim(np.min(self.history_xs) - LOW_MARGIN, np.max(self.history_xs) + HIGH_MARGIN)
self.axis.set_ylim(np.min(self.history_ys) - LOW_MARGIN, np.max(self.history_ys) + HIGH_MARGIN)
def _set_img(self):
""" initialize the imgs of animation
this private function execute the make initial imgs for animation
"""
# object imgs
obj_color_list = ["k", "k", "m", "m"]
obj_styles = ["solid", "solid", "solid", "solid"]
for i in range(len(obj_color_list)):
temp_img, = self.axis.plot([], [], color=obj_color_list[i], linestyle=obj_styles[i])
self.car_imgs.append(temp_img)
traj_color_list = ["k", "b"]
for i in range(len(traj_color_list)):
temp_img, = self.axis.plot([],[], color=traj_color_list[i], linestyle="dashed")
self.traj_imgs.append(temp_img)
temp_img, = self.axis.plot([],[], "*", color="b")
self.traj_imgs.append(temp_img)
def _update_anim(self, i):
"""the update animation
this function should be used in the animation functions
Parameters
------------
i : int
time step of the animation
the sampling time should be related to the sampling time of system
Returns
-----------
object_imgs : list of img
traj_imgs : list of img
"""
i = int(i * self.skip_num)
# self._draw_set_axis(i)
self._draw_car(i)
self._draw_traj(i)
# self._draw_prediction(i)
return self.car_imgs, self.traj_imgs,
def _draw_set_axis(self, i):
"""
"""
# (2) set the xlim and ylim
LOW_MARGIN = 20
HIGH_MARGIN = 20
OVER_LOOK = 50
self.axis.set_xlim(np.min(self.history_xs[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_xs[0][i : i + OVER_LOOK]) + HIGH_MARGIN)
self.axis.set_ylim(np.min(self.history_ys[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_ys[0][i : i + OVER_LOOK]) + HIGH_MARGIN)
def _draw_car(self, i):
"""
This private function is just divided thing of
the _update_anim to see the code more clear
Parameters
------------
i : int
time step of the animation
the sampling time should be related to the sampling time of system
"""
# cars
object_x, object_y, angle_x, angle_y = square_make_with_angles(self.history_xs[0][i],
self.history_ys[0][i],
1.0,
self.history_ths[0][i])
self.car_imgs[0].set_data([object_x, object_y])
self.car_imgs[1].set_data([angle_x, angle_y])
def _draw_traj(self, i):
"""
This private function is just divided thing of
the _update_anim to see the code more clear
Parameters
------------
i : int
time step of the animation
the sampling time should be related to the sampling time of system
"""
# car
self.traj_imgs[0].set_data(self.history_xs[0][:i], self.history_ys[0][:i])
# goal
self.traj_imgs[-1].set_data(self.traget[0], self.traget[1])
# traj_ref
# self.traj_imgs[2].set_data(self.history_traj_ref[i][0, :], self.history_traj_ref[i][1, :])

380
iLQR/ilqr.py Normal file
View File

@ -0,0 +1,380 @@
import numpy as np
from copy import copy, deepcopy
from model import TwoWheeledCar
class iLQRController():
"""
A controller that implements iterative Linear Quadratic Gaussian control.
Controls the (x, y, th) of the two wheeled car
"""
def __init__(self, N=100, max_iter=100, dt=0.016):
'''
n int: length of the control sequence
max_iter int: limit on number of optimization iterations
'''
self.old_target = [None, None]
self.tN = N # number of timesteps
self.STATE_SIZE = 3
self.INPUT_SIZE = 2
self.dt = dt
self.max_iter = max_iter
self.lamb_factor = 10
self.lamb_max = 1e4
self.eps_converge = 0.001 # exit if relative improvement below threshold
def calc_input(self, car, x_target, changed=False):
"""Generates a control signal to move the
arm to the specified target.
car : the arm model being controlled NOTE:これが実際にコントロールされるやつ
des list : the desired system position
x_des np.array: desired task-space force,
irrelevant here.
"""
# if the target has changed, reset things and re-optimize
# for this movement、目標が変わっている場合があるので確認
if changed:
self.reset(x_target)
# Reset k if at the end of the sequence
if self.t >= self.tN - 1: # 最初のSTEPのみ計算
self.t = 0
# Compute the optimization
"""
NOTE : ここに条件を追加してもいいかもしれない何サイクルも回す必要ないし理想軌道とずれたらとか
"""
if self.t % 1 == 0:
x0 = np.zeros(self.STATE_SIZE) # 初期化、速度は0
self.simulator, x0 = self.initialize_simulator(car) # 前の時刻のものを確保
U = np.copy(self.U[self.t:]) # 初期入力かなこれ
self.X, self.U[self.t:], cost = self.ilqr(x0, U) # 入力列が入ってくる
self.u = self.U[self.t]
# move us a step forward in our control sequence
self.t += 1
return self.u
def initialize_simulator(self, car):
""" make a copy of the car model, to make sure that the
actual car model isn't affected during the iLQR process
"""
# need to make a copy the real car
simulator = TwoWheeledCar(deepcopy(car.xs))
return simulator, deepcopy(simulator.xs)
def cost(self, xs, us):
""" the immediate state cost function
Parameters
------------
xs : shape(STATE_SIZE, tN + 1)
us : shape(STATE_SIZE, tN)
"""
"""
NOTE : 拡張する説ありますがとりあえず飛ばします
"""
# total cost
l = np.sum(us**2)
# compute derivatives of cost
l_x = np.zeros(self.STATE_SIZE)
l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
# quadratic のもののみ計算
R = 0.01
l_u = 2. * us * R
l_uu = 2. * np.eye(self.INPUT_SIZE) * R
l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE))
# returned in an array for easy multiplication by time step
return l, l_x, l_xx, l_u, l_uu, l_ux
def cost_final(self, x):
""" the final state cost function
Parameters
-------------
xs : shape(STATE_SIZE,)
Notes :
---------
l_x = np.zeros((self.STATE_SIZE))
l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
"""
Q_11 = 1.e4 # terminal x cost weight
Q_22 = 1.e4 # terminal y cost weight
Q_33 = 1.e-1 # terminal theta cost weight
l = np.sum((self.simulator.xs - self.target)**2)
# about L_x
l_x1 = 2. * (x[0] - self.target[0]) * Q_11
l_x2 = 2. * (x[1] - self.target[1]) * Q_22
l_x3 = 2. * (x[2] -self.target[2]) * Q_33
l_x = np.array([l_x1, l_x2, l_x3])
# about l_xx
l_xx = 2. * np.diag([Q_11, Q_22, Q_33])
# Final cost only requires these three values
return l, l_x, l_xx
def finite_differences(self, x, u):
""" calculate gradient of plant dynamics using finite differences
x np.array: the state of the system
u np.array: the control signal
"""
A = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
eps = 1e-4 # finite differences epsilon
for ii in range(self.STATE_SIZE):
# calculate partial differential w.r.t. x
inc_x = x.copy()
inc_x[ii] += eps
state_inc,_ = self.plant_dynamics(inc_x, u.copy())
dec_x = x.copy()
dec_x[ii] -= eps
state_dec,_ = self.plant_dynamics(dec_x, u.copy())
A[:, ii] = (state_inc - state_dec) / (2 * eps)
for ii in range(self.INPUT_SIZE):
# calculate partial differential w.r.t. u
inc_u = u.copy()
inc_u[ii] += eps
state_inc,_ = self.plant_dynamics(x.copy(), inc_u)
dec_u = u.copy()
dec_u[ii] -= eps
state_dec,_ = self.plant_dynamics(x.copy(), dec_u)
B[:, ii] = (state_inc - state_dec) / (2 * eps)
return A, B
def ilqr(self, x0, U=None):
""" use iterative linear quadratic regulation to find a control
sequence that minimizes the cost function
x0 np.array: the initial state of the system
U np.array: the initial control trajectory dimensions = [dof, time]
"""
U = self.U if U is None else U
lamb = 1.0 # regularization parameter これが正規化項の1つ
sim_new_trajectory = True
tN = U.shape[0] # number of time steps
for ii in range(self.max_iter):
if sim_new_trajectory == True:
# simulate forward using the current control trajectory
"""
NOTE: 単純な計算でpredictする
"""
X, cost = self.simulate(x0, U)
oldcost = np.copy(cost) # copy for exit condition check
# now we linearly approximate the dynamics, and quadratically
# approximate the cost function so we can use LQR methods
# for storing linearized dynamics
# x(t+1) = f(x(t), u(t))
"""
NOTE: Gradiantの取得
"""
f_x = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # df / dx
f_u = np.zeros((tN, self.STATE_SIZE, self.INPUT_SIZE)) # df / du
# for storing quadratized cost function
l = np.zeros((tN,1)) # immediate state cost
l_x = np.zeros((tN, self.STATE_SIZE)) # dl / dx
l_xx = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # d^2 l / dx^2
l_u = np.zeros((tN, self.INPUT_SIZE)) # dl / du
l_uu = np.zeros((tN, self.INPUT_SIZE, self.INPUT_SIZE)) # d^2 l / du^2
l_ux = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # d^2 l / du / dx
# for everything except final state
for t in range(tN-1):
# x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt
# linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t))
# f_x = np.eye + A(t)
# f_u = B(t)
A, B = self.finite_differences(X[t], U[t])
f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt
f_u[t] = B * self.dt
(l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t])
l[t] *= self.dt
l_x[t] *= self.dt
l_xx[t] *= self.dt
l_u[t] *= self.dt
l_uu[t] *= self.dt
l_ux[t] *= self.dt
# and for final state
l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1])
sim_new_trajectory = False
# optimize things!
# initialize Vs with final state cost and set up k, K
V = l[-1].copy() # value function
V_x = l_x[-1].copy() # dV / dx
V_xx = l_xx[-1].copy() # d^2 V / dx^2
k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification
K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain
# NOTE: they use V' to denote the value at the next timestep,
# they have this redundant in their notation making it a
# function of f(x + dx, u + du) and using the ', but it makes for
# convenient shorthand when you drop function dependencies
# work backwards to solve for V, Q, k, and K
for t in range(self.tN-2, -1, -1):
# NOTE: we're working backwards, so V_x = V_x[t+1] = V'_x
# 4a) Q_x = l_x + np.dot(f_x^T, V'_x)
Q_x = l_x[t] + np.dot(f_x[t].T, V_x)
# 4b) Q_u = l_u + np.dot(f_u^T, V'_x)
Q_u = l_u[t] + np.dot(f_u[t].T, V_x)
# NOTE: last term for Q_xx, Q_uu, and Q_ux is vector / tensor product
# but also note f_xx = f_uu = f_ux = 0 so they're all 0 anyways.
# 4c) Q_xx = l_xx + np.dot(f_x^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_xx)
Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t]))
# 4d) Q_ux = l_ux + np.dot(f_u^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_ux)
Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t]))
# 4e) Q_uu = l_uu + np.dot(f_u^T, np.dot(V'_xx, f_u)) + np.einsum(V'_x, f_uu)
Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t]))
# Calculate Q_uu^-1 with regularization term set by
# Levenberg-Marquardt heuristic (at end of this loop)
Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu)
Q_uu_evals[Q_uu_evals < 0] = 0.0
Q_uu_evals += lamb
Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T))
# 5b) k = -np.dot(Q_uu^-1, Q_u)
k[t] = -np.dot(Q_uu_inv, Q_u)
# 5b) K = -np.dot(Q_uu^-1, Q_ux)
K[t] = -np.dot(Q_uu_inv, Q_ux)
# 6a) DV = -.5 np.dot(k^T, np.dot(Q_uu, k))
# 6b) V_x = Q_x - np.dot(K^T, np.dot(Q_uu, k))
V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t]))
# 6c) V_xx = Q_xx - np.dot(-K^T, np.dot(Q_uu, K))
V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t]))
U_new = np.zeros((tN, self.INPUT_SIZE))
# calculate the optimal change to the control trajectory
x_new = x0.copy() # 7a)
for t in range(tN - 1):
# use feedforward (k) and feedback (K) gain matrices
# calculated from our value function approximation
# to take a stab at the optimal control signal
U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t]) # 7b)
# given this u, find our next state
_,x_new = self.plant_dynamics(x_new, U_new[t]) # 7c)
# evaluate the new trajectory
X_new, cost_new = self.simulate(x0, U_new)
# Levenberg-Marquardt heuristic
if cost_new < cost:
# decrease lambda (get closer to Newton's method)
lamb /= self.lamb_factor
X = np.copy(X_new) # update trajectory
U = np.copy(U_new) # update control signal
oldcost = np.copy(cost)
cost = np.copy(cost_new)
sim_new_trajectory = True # do another rollout
# print("iteration = %d; Cost = %.4f;"%(ii, costnew) +
# " logLambda = %.1f"%np.log(lamb))
# check to see if update is small enough to exit
if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge):
print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) +
" logLambda = %.1f"%np.log(lamb))
break
else:
# increase lambda (get closer to gradient descent)
lamb *= self.lamb_factor
# print("cost: %.4f, increasing lambda to %.4f")%(cost, lamb)
if lamb > self.lamb_max:
print("lambda > max_lambda at iteration = %d;"%ii +
" Cost = %.4f; logLambda = %.1f"%(cost,
np.log(lamb)))
break
return X, U, cost
def plant_dynamics(self, x, u):
""" simulate a single time step of the plant, from
initial state x and applying control signal u
x np.array: the state of the system
u np.array: the control signal
"""
# set the arm position to x
self.simulator.initialize_state(x)
# apply the control signal
x_next = self.simulator.update_state(u, self.dt)
# calculate the change in state
xdot = ((x_next - x) / self.dt).squeeze()
return xdot, x_next
def reset(self, target):
""" reset the state of the system """
# Index along current control sequence
self.t = 0
self.U = np.zeros((self.tN, self.INPUT_SIZE))
self.target = target.copy()
def simulate(self, x0, U):
""" do a rollout of the system, starting at x0 and
applying the control sequence U
x0 np.array: the initial state of the system
U np.array: the control sequence to apply
"""
tN = U.shape[0]
X = np.zeros((tN, self.STATE_SIZE))
X[0] = x0
cost = 0
# Run simulation with substeps
for t in range(tN-1):
_,X[t+1] = self.plant_dynamics(X[t], U[t])
l, _ , _, _ , _ , _ = self.cost(X[t], U[t])
cost = cost + self.dt * l
# Adjust for final cost, subsample trajectory
l_f, _, _ = self.cost_final(X[-1])
cost = cost + l_f
return X, cost

58
iLQR/main.py Normal file
View File

@ -0,0 +1,58 @@
import numpy as np
import matplotlib.pyplot as plt
from model import TwoWheeledCar
from ilqr import iLQRController
from animation import AnimDrawer
def main():
"""
"""
# iteration parameters
NUM_ITERARIONS = 1000
dt = 0.01
# make plant
init_x = np.array([0., 0., 0.])
car = TwoWheeledCar(init_x)
# make goal
target = np.array([5., 3., 0.])
# controller
controller = iLQRController()
for iteration in range(NUM_ITERARIONS):
print("iteration num = {} / {}".format(iteration, NUM_ITERARIONS))
if iteration == 0:
changed = True
u = controller.calc_input(car, target, changed=changed)
car.update_state(u, dt)
# figures and animation
history_states = np.array(car.history_xs)
time_fig = plt.figure()
x_fig = time_fig.add_subplot(311)
y_fig = time_fig.add_subplot(312)
th_fig = time_fig.add_subplot(313)
time = len(history_states)
x_fig.plot(np.arange(time), history_states[:, 0])
y_fig.plot(np.arange(time), history_states[:, 1])
th_fig.plot(np.arange(time), history_states[:, 2])
plt.show()
history_states = np.array(car.history_xs)
animdrawer = AnimDrawer([history_states, target])
animdrawer.draw_anim()
if __name__ == "__main__":
main()

177
iLQR/model.py Normal file
View File

@ -0,0 +1,177 @@
import numpy as np
import matplotlib.pyplot as plt
import math
import copy
"""
このWheeled modelはコントローラー用
ホントはbase作って継承すべきですが省略
"""
class TwoWheeledCar():
"""SampleSystem, this is the simulator
Attributes
-----------
xs : numpy.ndarray
system states, [x, y, theta]
history_xs : list
time history of state
"""
def __init__(self, init_states=None):
"""
Palameters
-----------
init_state : float, optional, shape(3, )
initial state of system default is None
"""
self.STATE_SIZE = 3
self.INPUT_SIZE = 2
self.xs = np.zeros(3)
if init_states is not None:
self.xs = copy.deepcopy(init_states)
self.history_xs = [init_states]
self.history_predict_xs = []
def update_state(self, us, dt):
"""
Palameters
------------
us : numpy.ndarray
inputs of system in some cases this means the reference
dt : float in seconds, optional
sampling time of simulation, default is 0.01 [s]
"""
# for theta 1, theta 1 dot, theta 2, theta 2 dot
k0 = [0.0 for _ in range(3)]
k1 = [0.0 for _ in range(3)]
k2 = [0.0 for _ in range(3)]
k3 = [0.0 for _ in range(3)]
functions = [self._func_x_1, self._func_x_2, self._func_x_3]
# solve Runge-Kutta
for i, func in enumerate(functions):
k0[i] = dt * func(self.xs[0], self.xs[1], self.xs[2], us[0], us[1])
for i, func in enumerate(functions):
k1[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1])
for i, func in enumerate(functions):
k2[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1])
for i, func in enumerate(functions):
k3[i] = dt * func(self.xs[0] + k2[0], self.xs[1] + k2[1], self.xs[2] + k2[2], us[0], us[1])
self.xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
self.xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
self.xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
# save
save_states = copy.deepcopy(self.xs)
self.history_xs.append(save_states)
return self.xs.copy()
def predict_state(self, init_xs, us, dt=0.01):
"""make predict state by using optimal input made by MPC
Paramaters
-----------
us : array-like, shape(2, N)
optimal input made by MPC
dt : float in seconds, optional
sampling time of simulation, default is 0.01 [s]
"""
## test
# assert us.shape[0] == 2 and us.shape[1] == 15, "wrong shape"
xs = copy.deepcopy(init_xs)
predict_xs = [copy.deepcopy(xs)]
for i in range(us.shape[1]):
k0 = [0.0 for _ in range(self.NUM_STATE)]
k1 = [0.0 for _ in range(self.NUM_STATE)]
k2 = [0.0 for _ in range(self.NUM_STATE)]
k3 = [0.0 for _ in range(self.NUM_STATE)]
functions = [self._func_x_1, self._func_x_2, self._func_x_3]
# solve Runge-Kutta
for i, func in enumerate(functions):
k0[i] = dt * func(xs[0], xs[1], xs[2], us[0, i], us[1, i])
for i, func in enumerate(functions):
k1[i] = dt * func(xs[0] + k0[0]/2., xs[1] + k0[1]/2., xs[2] + k0[2]/2., us[0, i], us[1, i])
for i, func in enumerate(functions):
k2[i] = dt * func(xs[0] + k1[0]/2., xs[1] + k1[1]/2., xs[2] + k1[2]/2., us[0, i], us[1, i])
for i, func in enumerate(functions):
k3[i] = dt * func(xs[0] + k2[0], xs[1] + k2[1], xs[2] + k2[2], us[0, i], us[1, i])
xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
predict_xs.append(copy.deepcopy(xs))
self.history_predict_xs.append(np.array(predict_xs))
return np.array(predict_xs)
def initialize_state(self, init_xs):
"""
initialize the state
Parameters
------------
init_xs : numpy.ndarray
"""
self.xs = init_xs.flatten()
def _func_x_1(self, y_1, y_2, y_3, u_1, u_2):
"""
Parameters
------------
y_1 : float
y_2 : float
y_3 : float
u_1 : float
system input
u_2 : float
system input
"""
y_dot = math.cos(y_3) * u_1
return y_dot
def _func_x_2(self, y_1, y_2, y_3, u_1, u_2):
"""
Parameters
------------
y_1 : float
y_2 : float
y_3 : float
u_1 : float
system input
u_2 : float
system input
"""
y_dot = math.sin(y_3) * u_1
return y_dot
def _func_x_3(self, y_1, y_2, y_3, u_1, u_2):
"""
Parameters
------------
y_1 : float
y_2 : float
y_3 : float
u_1 : float
system input
u_2 : float
system input
"""
y_dot = u_2
return y_dot