PythonLinearNonlinearControl/ilqr/ilqr.py

466 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
from copy import copy, deepcopy
from model import TwoWheeledCar
class iLQRController():
"""
A controller that implements iterative Linear Quadratic control.
Controls the (x, y, th) of the two wheeled car
Attributes:
------------
tN : int
number of time step
STATE_SIZE : int
system state size
INPUT_SIZE : int
system input size
dt : float
sampling time
max_iter : int
number of max iteration
lamb_factor : int
lambda factor is that the adding value to make the matrix of Q _uu be positive
lamb_max : float
maximum lambda value to make the matrix of Q _uu be positive
eps_converge : float
threshold value of the iteration
"""
def __init__(self, N=100, max_iter=400, dt=0.01):
"""
Parameters
----------
N : int, optional
number of time step, default is 100
max_iter : int, optional
number of max iteration, default is 400
dt : float, optional
sampling time, default is 0.01
"""
self.tN = N
self.STATE_SIZE = 3
self.INPUT_SIZE = 2
self.dt = dt
self.max_iter = max_iter
self.lamb_factor = 10
self.lamb_max = 1e4
self.eps_converge = 0.001
def calc_input(self, car, x_target):
"""main loop of iterative LQR
Parameters
-------------
car : model class
should have initialize state and update state
x_target : numpy.ndarray, shape(STATE_SIZE, )
target state
Returns
-----------
u : numpy.ndarray, shape(INPUT_SIZE, )
See also
----------
model.py
"""
# initialize
self.reset(x_target)
# Compute the optimization
x0 = np.zeros(self.STATE_SIZE)
self.simulator, x0 = self.initialize_simulator(car)
U = np.copy(self.U)
self.X, self.U, cost = self.ilqr(x0, U)
self.u = self.U[self.t] # use only one time step (like MPC)
return self.u
def initialize_simulator(self, car):
""" make copy for controller
Parameters
-------------
car : model class
should have initialize state and update state
Returns
----------
simulator : model class
should have initialize state and update state
x0 : numpy.ndarray, shape(STATE_SIZE)
initial state of the simulator
"""
# copy
simulator = TwoWheeledCar(deepcopy(car.xs))
return simulator, deepcopy(simulator.xs)
def cost(self, xs, us):
""" the immediate state cost function
Parameters
------------
xs : shape(STATE_SIZE, tN + 1)
predict state of the system
us : shape(STATE_SIZE, tN)
predict input of the system
Returns
----------
l : float
stage cost
l_x : numpy.ndarray, shape(STATE_SIZE, )
differential of stage cost by x
l_xx : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE)
second order differential of stage cost by x
l_u : numpy.ndarray, shape(INPUT_SIZE, )
differential of stage cost by u
l_uu : numpy.ndarray, shape(INPUT_SIZE, INPUT_SIZE)
second order differential of stage cost by uu
l_ux numpy.ndarray, shape(INPUT_SIZE, STATE_SIZE)
second order differential of stage cost by ux
"""
# total cost
R_11 = 1e-4 # terminal u_v cost weight
R_22 = 1e-4 # terminal u_th cost weight
l = np.dot(us.T, np.dot(np.diag([R_11, R_22]), us))
# compute derivatives of cost
l_x = np.zeros(self.STATE_SIZE)
l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
l_u1 = 2. * us[0] * R_11
l_u2 = 2. * us[1] * R_22
l_u = np.array([l_u1, l_u2])
l_uu = 2. * np.diag([R_11, R_22])
l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE))
return l, l_x, l_xx, l_u, l_uu, l_ux
def cost_final(self, x):
""" the final state cost function
Parameters
-------------
x : numpy.ndarray, shape(STATE_SIZE,)
predict state of the system
Returns
---------
l : float
terminal cost
l_x : numpy.ndarray, shape(STATE_SIZE, )
differential of stage cost by x
l_xx : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE)
second order differential of stage cost by x
"""
Q_11 = 10. # terminal x cost weight
Q_22 = 10. # terminal y cost weight
Q_33 = 0.1 # terminal theta cost weight
error = self.simulator.xs - self.target
l = np.dot(error.T, np.dot(np.diag([Q_11, Q_22, Q_33]), error))
# about L_x
l_x1 = 2. * (x[0] - self.target[0]) * Q_11
l_x2 = 2. * (x[1] - self.target[1]) * Q_22
l_x3 = 2. * (x[2] -self.target[2]) * Q_33
l_x = np.array([l_x1, l_x2, l_x3])
# about l_xx
l_xx = 2. * np.diag([Q_11, Q_22, Q_33])
# Final cost only requires these three values
return l, l_x, l_xx
def finite_differences(self, x, u):
""" calculate gradient of plant dynamics using finite differences
Parameters
--------------
x : numpy.ndarray, shape(STATE_SIZE,)
the state of the system
u : numpy.ndarray, shape(INPUT_SIZE,)
the control input
Returns
------------
A : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE)
differential of the model /alpha X
B : numpy.ndarray, shape(STATE_SIZE, INPUT_SIZE)
differential of the model /alpha U
Notes
-------
in this case, I pre-calculated the differential of the model because the tow-wheeled model is not difficult to calculate the gradient.
If you dont or cannot do that, you can use the numerical differentiation.
However, sometimes the the numerical differentiation affect the accuracy of calculations.
"""
A = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
A_ideal = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
B_ideal = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
# if you want to use the numerical differentiation, please comment out this code
"""
eps = 1e-4 # finite differences epsilon
for ii in range(self.STATE_SIZE):
# calculate partial differential w.r.t. x
inc_x = x.copy()
inc_x[ii] += eps
state_inc,_ = self.plant_dynamics(inc_x, u.copy())
dec_x = x.copy()
dec_x[ii] -= eps
state_dec,_ = self.plant_dynamics(dec_x, u.copy())
A[:, ii] = (state_inc - state_dec) / (2 * eps)
"""
A_ideal[0, 2] = -np.sin(x[2]) * u[0]
A_ideal[1, 2] = np.cos(x[2]) * u[0]
# if you want to use the numerical differentiation, please comment out this code
"""
for ii in range(self.INPUT_SIZE):
# calculate partial differential w.r.t. u
inc_u = u.copy()
inc_u[ii] += eps
state_inc,_ = self.plant_dynamics(x.copy(), inc_u)
dec_u = u.copy()
dec_u[ii] -= eps
state_dec,_ = self.plant_dynamics(x.copy(), dec_u)
B[:, ii] = (state_inc - state_dec) / (2 * eps)
"""
B_ideal[0, 0] = np.cos(x[2])
B_ideal[1, 0] = np.sin(x[2])
B_ideal[2, 1] = 1.
return A_ideal, B_ideal
def ilqr(self, x0, U=None):
""" use iterative linear quadratic regulation to find a control
sequence that minimizes the cost function
Parameters
--------------
x0 : numpy.ndarray, shape(STATE_SIZE, )
the initial state of the system
U : numpy.ndarray(TIME, INPUT_SIZE)
the initial control trajectory dimension
"""
U = self.U if U is None else U
lamb = 1.0 # regularization parameter
sim_new_trajectory = True
tN = U.shape[0] # number of time steps
for ii in range(self.max_iter):
if sim_new_trajectory == True:
# simulate forward using the current control trajectory
X, cost = self.simulate(x0, U)
oldcost = np.copy(cost) # copy for exit condition check
#
f_x = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # df / dx
f_u = np.zeros((tN, self.STATE_SIZE, self.INPUT_SIZE)) # df / du
# for storing quadratized cost function
l = np.zeros((tN,1)) # immediate state cost
l_x = np.zeros((tN, self.STATE_SIZE)) # dl / dx
l_xx = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # d^2 l / dx^2
l_u = np.zeros((tN, self.INPUT_SIZE)) # dl / du
l_uu = np.zeros((tN, self.INPUT_SIZE, self.INPUT_SIZE)) # d^2 l / du^2
l_ux = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # d^2 l / du / dx
# for everything except final state
for t in range(tN-1):
# x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt
# linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t))
# f_x = (np.eye + A(t)) * dt
# f_u = (B(t)) * dt
# continuous --> discrete
A, B = self.finite_differences(X[t], U[t])
f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt
f_u[t] = B * self.dt
""" NOTE: why multiply dt in original program ??
So the dt multiplication and + I is because were actually taking the derivative of the state with respect to the previous state. Which is not
dx = Ax + Bu,
but rather
x(t) = x(t-1) + (Ax(t-1) + Bu(t-1))*dt
And thats where the identity matrix and dt come from!
So that parts in the comments of the code, but the *dt on all the cost function stuff is not commented on at all!
So here the dt lets you normalize behaviour for different time steps (assuming you also scale the number of steps in the sequence).
So if you have a time step of .01 or .001 youre not racking up 10 times as much cost function in the latter case.
And if you run the code with 50 steps in the sequence and dt=.01 and 500 steps in the sequence
and dt=.001 youll see that you get the same results, which is not the case at all when you dont take dt into account in the cost function!
"""
(l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t])
""" # we consider this part in cost function.
l[t] *= self.dt
l_x[t] *= self.dt
l_xx[t] *= self.dt
l_u[t] *= self.dt
l_uu[t] *= self.dt
l_ux[t] *= self.dt
"""
# and for final state
l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1])
sim_new_trajectory = False
V = l[-1].copy() # value function
V_x = l_x[-1].copy() # dV / dx
V_xx = l_xx[-1].copy() # d^2 V / dx^2
k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification
K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain
# work backwards to solve for V, Q, k, and K
for t in range(self.tN-2, -1, -1):
Q_x = l_x[t] + np.dot(f_x[t].T, V_x)
Q_u = l_u[t] + np.dot(f_u[t].T, V_x)
Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t]))
Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t]))
Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t]))
Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu)
Q_uu_evals[Q_uu_evals < 0] = 0.0
Q_uu_evals += lamb
Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T))
k[t] = -1. * np.dot(Q_uu_inv, Q_u)
K[t] = -1. * np.dot(Q_uu_inv, Q_ux)
V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t]))
V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t]))
U_new = np.zeros((tN, self.INPUT_SIZE))
x_new = x0.copy()
for t in range(tN - 1):
# use feedforward (k) and feedback (K) gain matrices
# calculated from our value function approximation
U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t])
_,x_new = self.plant_dynamics(x_new, U_new[t])
# evaluate the new trajectory
X_new, cost_new = self.simulate(x0, U_new)
if cost_new < cost:
# decrease lambda (get closer to Newton's method)
lamb /= self.lamb_factor
X = np.copy(X_new) # update trajectory
U = np.copy(U_new) # update control signal
oldcost = np.copy(cost)
cost = np.copy(cost_new)
sim_new_trajectory = True # do another rollout
# check to see if update is small enough to exit
if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge):
print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) +
" logLambda = %.1f"%np.log(lamb))
break
else:
# increase lambda (get closer to gradient descent)
lamb *= self.lamb_factor
# print("cost: %.4f, increasing lambda to %.4f")%(cost, lamb)
if lamb > self.lamb_max:
print("lambda > max_lambda at iteration = %d;"%ii +
" Cost = %.4f; logLambda = %.1f"%(cost,
np.log(lamb)))
break
return X, U, cost
def plant_dynamics(self, x, u):
""" simulate a single time step of the plant, from
initial state x and applying control signal u
Parameters
--------------
x : numpy.ndarray, shape(STATE_SIZE, )
the state of the system
u : numpy.ndarray, shape(INPUT_SIZE, )
the control signal
Returns
-----------
xdot : numpy.ndarray, shape(STATE_SIZE, )
the gradient of x
x_next : numpy.ndarray, shape(STATE_SIZE, )
next state of x
"""
self.simulator.initialize_state(x)
# apply the control signal
x_next = self.simulator.update_state(u, self.dt)
# calculate the change in state
xdot = ((x_next - x) / self.dt).squeeze()
return xdot, x_next
def reset(self, target):
""" reset the state of the system """
# Index along current control sequence
self.t = 0
self.U = np.zeros((self.tN, self.INPUT_SIZE))
self.target = target.copy()
def simulate(self, x0, U):
""" do a rollout of the system, starting at x0 and
applying the control sequence U
Parameters
----------
x0 : numpy.ndarray, shape(STATE_SIZE, )
the initial state of the system
U : numpy.ndarray, shape(tN, INPUT_SIZE)
the control sequence to apply
Returns
---------
X : numpy.ndarray, shape(tN, STATE_SIZE)
the state sequence
cost : float
cost
"""
tN = U.shape[0]
X = np.zeros((tN, self.STATE_SIZE))
X[0] = x0
cost = 0
# Run simulation with substeps
for t in range(tN-1):
_,X[t+1] = self.plant_dynamics(X[t], U[t])
l, _ , _, _ , _ , _ = self.cost(X[t], U[t])
cost = cost + self.dt * l
# Adjust for final cost, subsample trajectory
l_f, _, _ = self.cost_final(X[-1])
cost = cost + l_f
return X, cost