add LQR

2019-04-05 17:15:54 +09:00 · 2019-04-05 17:15:54 +09:00 · 0b6cdc5778
parent f1fc57a219
commit 0b6cdc5778
5 changed files with 999 additions and 0 deletions
--- a/iLQR/README.md
+++ b/iLQR/README.md
@ -0,0 +1,61 @@
 # Iterative Linear Quadratic Regulator
 This program is about iLQR (Iteratice Linear Quadratic Regulator)
 # Problem Formulation
 Two wheeled robot is expressed by the following equation.
 It is nonlinear and nonholonomic system. Sometimes, it's extremely difficult to control the 
 steering(or angular velocity) and velocity of the wheeled robot. Therefore, many methods control only steering, like purepersuit, Linear MPC.
 However, sometimes we should consider the velocity and steering simultaneously when the car or robots move fast.
 To solve the problem, we should apply the control methods which can treat the nonlinear system.
 <a href="https://www.codecogs.com/eqnedit.php?latex=\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" title="\frac{d}{dt} \boldsymbol{X}= \frac{d}{dt} \begin{bmatrix} x \\ y \\ \theta \end{bmatrix} = \begin{bmatrix} \cos(\theta) & 0 \\ \sin(\theta) & 0 \\ 0 & 1 \\ \end{bmatrix} \begin{bmatrix} u_v \\ u_\omega \\ \end{bmatrix} = \boldsymbol{B}\boldsymbol{U}" /></a>
 Nonliner Model Predictive Control is one of the famous methods, so I applied the method in the folder of this repository.
 (if you are interested, please look it)
 NMPC is very effecitive method to solve nonlinear optimal control problem but it is a handcraft method.
 This program is about one more other methods to solve the nonlinear optimal control problem.
 The method is iterative LQR.
 Iterative LQR is one of the DDP(differential dynamic programming) method.
 Recently, this method is used in IRL(inverse reinforcement learning), such as GPS(guided policy search)
 If you want to know more about the iLQR, please look the references.
 The paper and website is great.
 # Usage
 ```
 ```
 # Expected Results
 - static goal
 - track the goal
 # Applied other model
 # Requirement
 - python3.5 or more
 - numpy
 - matplotlib
 # Reference
 - study wolf
 https://github.com/studywolf/control
 - Sergey Levine's lecture
 http://rail.eecs.berkeley.edu/deeprlcourse/
 - Tassa, Y., Erez, T., & Todorov, E. (2012). Synthesis and stabilization of complex behaviors through online trajectory optimization. IEEE International Conference on Intelligent Robots and Systems, 4906–4913. https://doi.org/10.1109/IROS.2012.6386025
 - Li, W., & Todorov, E. (n.d.). Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems. Retrieved from https://homes.cs.washington.edu/~todorov/papers/LiICINCO04.pdf
--- a/iLQR/animation.py
+++ b/iLQR/animation.py
@ -0,0 +1,294 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.animation as ani
 import matplotlib.font_manager as fon
 import sys
 import math
 # default setting of figures
 plt.rcParams["mathtext.fontset"] = 'stix' # math fonts
 plt.rcParams['xtick.direction'] = 'in' # x axis in
 plt.rcParams['ytick.direction'] = 'in' # y axis in 
 plt.rcParams["font.size"] = 10
 plt.rcParams['axes.linewidth'] = 1.0 # axis line width
 plt.rcParams['axes.grid'] = True # make grid
 def coordinate_transformation_in_angle(positions, base_angle):
    '''
    Transformation the coordinate in the angle
    Parameters
    -------
    positions : numpy.ndarray
        this parameter is composed of xs, ys 
        should have (2, N) shape 
    base_angle : float [rad]
    Returns
    -------
    traslated_positions : numpy.ndarray
        the shape is (2, N)
    '''
    if positions.shape[0] != 2:
        raise ValueError('the input data should have (2, N)')
    positions = np.array(positions)
    positions = positions.reshape(2, -1)
    rot_matrix = [[np.cos(base_angle), np.sin(base_angle)],
                  [-1*np.sin(base_angle), np.cos(base_angle)]]
    rot_matrix = np.array(rot_matrix)
    translated_positions = np.dot(rot_matrix, positions)
    return translated_positions
 def square_make_with_angles(center_x, center_y, size, angle):
    '''
    Create square matrix with angle line matrix(2D)
    Parameters
    -------
    center_x : float in meters
        the center x position of the square
    center_y : float in meters
        the center y position of the square
    size : float in meters
        the square's half-size
    angle : float in radians
    Returns
    -------
    square xs : numpy.ndarray
        lenght is 5 (counterclockwise from right-up)
    square ys : numpy.ndarray
        length is 5 (counterclockwise from right-up)
    angle line xs : numpy.ndarray
    angle line ys : numpy.ndarray
    '''
    # start with the up right points
    # create point in counterclockwise
    square_xys = np.array([[size, 0.5 * size], [-size, 0.5 * size], [-size, -0.5 * size], [size, -0.5 * size], [size, 0.5 * size]])
    trans_points = coordinate_transformation_in_angle(square_xys.T, -angle) # this is inverse type
    trans_points += np.array([[center_x], [center_y]])
    square_xs = trans_points[0, :]
    square_ys = trans_points[1, :]
    angle_line_xs = [center_x, center_x + math.cos(angle) * size]
    angle_line_ys = [center_y, center_y + math.sin(angle) * size]
    return square_xs, square_ys, np.array(angle_line_xs), np.array(angle_line_ys)
 def circle_make_with_angles(center_x, center_y, radius, angle):
    '''
    Create circle matrix with angle line matrix
    Parameters
    -------
    center_x : float
        the center x position of the circle
    center_y : float
        the center y position of the circle
    radius : float
    angle : float [rad]
    Returns
    -------
    circle xs : numpy.ndarray
    circle ys : numpy.ndarray
    angle line xs : numpy.ndarray
    angle line ys : numpy.ndarray
    '''
    point_num = 100 # 分解能
    circle_xs = []
    circle_ys = []
    for i in range(point_num + 1):
        circle_xs.append(center_x + radius * math.cos(i*2*math.pi/point_num))
        circle_ys.append(center_y + radius * math.sin(i*2*math.pi/point_num))
    angle_line_xs = [center_x, center_x + math.cos(angle) * radius]
    angle_line_ys = [center_y, center_y + math.sin(angle) * radius]
    return np.array(circle_xs), np.array(circle_ys), np.array(angle_line_xs), np.array(angle_line_ys)
 class AnimDrawer():
    """create animation of path and robot
    Attributes
    ------------
    cars : 
    anim_fig : figure of matplotlib
    axis : axis of matplotlib
    """
    def __init__(self, objects):
        """
        Parameters
        ------------
        objects : list of objects
        Notes
        ---------
        lead_history_states, lead_history_predict_states, traj_ref, history_traj_ref, history_angle_ref
        """
        self.car_history_state = objects[0]
        self.traget = objects[1]
        self.history_xs = [self.car_history_state[:, 0]]
        self.history_ys = [self.car_history_state[:, 1]]
        self.history_ths = [self.car_history_state[:, 2]]
        # setting up figure
        self.anim_fig = plt.figure(dpi=150)
        self.axis = self.anim_fig.add_subplot(111)
        # imgs
        self.car_imgs = []
        self.traj_imgs = []
    def draw_anim(self, interval=50):
        """draw the animation and save
        Parameteres
        -------------
        interval : int, optional
            animation's interval time, you should link the sampling time of systems
            default is 50 [ms]
        """
        self._set_axis()
        self._set_img()
        self.skip_num = 1
        frame_num = int((len(self.history_xs[0])-1) / self.skip_num)
        animation = ani.FuncAnimation(self.anim_fig, self._update_anim, interval=interval, frames=frame_num)
        # self.axis.legend()
        print('save_animation?')
        shuold_save_animation = int(input())
        if shuold_save_animation: 
            print('animation_number?')
            num = int(input())
            animation.save('animation_{0}.mp4'.format(num), writer='ffmpeg')
            # animation.save("Sample.gif", writer = 'imagemagick') # gif保存
        plt.show()
    def _set_axis(self):
        """ initialize the animation axies
        """
        # (1) set the axis name
        self.axis.set_xlabel(r'$\it{x}$ [m]')
        self.axis.set_ylabel(r'$\it{y}$ [m]')
        self.axis.set_aspect('equal', adjustable='box')
        LOW_MARGIN = 5
        HIGH_MARGIN = 5
        self.axis.set_xlim(np.min(self.history_xs) - LOW_MARGIN, np.max(self.history_xs) + HIGH_MARGIN)
        self.axis.set_ylim(np.min(self.history_ys) - LOW_MARGIN, np.max(self.history_ys) + HIGH_MARGIN)
    def _set_img(self):
        """ initialize the imgs of animation
            this private function execute the make initial imgs for animation
        """
        # object imgs
        obj_color_list = ["k", "k", "m", "m"]
        obj_styles = ["solid", "solid", "solid", "solid"]
        for i in range(len(obj_color_list)):
            temp_img, = self.axis.plot([], [], color=obj_color_list[i], linestyle=obj_styles[i])
            self.car_imgs.append(temp_img)
        traj_color_list = ["k", "b"]
        for i in range(len(traj_color_list)):
            temp_img, = self.axis.plot([],[], color=traj_color_list[i], linestyle="dashed")
            self.traj_imgs.append(temp_img)
        temp_img, = self.axis.plot([],[], "*", color="b")
        self.traj_imgs.append(temp_img)
    def _update_anim(self, i):
        """the update animation
        this function should be used in the animation functions
        Parameters
        ------------
        i : int
            time step of the animation
            the sampling time should be related to the sampling time of system
        Returns
        -----------
        object_imgs : list of img
        traj_imgs : list of img
        """
        i = int(i * self.skip_num)
        # self._draw_set_axis(i)
        self._draw_car(i)
        self._draw_traj(i)
        # self._draw_prediction(i)
        return self.car_imgs, self.traj_imgs, 
    def _draw_set_axis(self, i):
        """
        """
        # (2) set the xlim and ylim
        LOW_MARGIN = 20
        HIGH_MARGIN = 20
        OVER_LOOK = 50
        self.axis.set_xlim(np.min(self.history_xs[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_xs[0][i : i + OVER_LOOK]) + HIGH_MARGIN)
        self.axis.set_ylim(np.min(self.history_ys[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_ys[0][i : i + OVER_LOOK]) + HIGH_MARGIN)         
    def _draw_car(self, i):
        """
        This private function is just divided thing of
        the _update_anim to see the code more clear
        Parameters
        ------------
        i : int
            time step of the animation
            the sampling time should be related to the sampling time of system
        """
        # cars
        object_x, object_y, angle_x, angle_y = square_make_with_angles(self.history_xs[0][i],
                                                                        self.history_ys[0][i], 
                                                                        1.0,
                                                                        self.history_ths[0][i])
        self.car_imgs[0].set_data([object_x, object_y])
        self.car_imgs[1].set_data([angle_x, angle_y])
    def _draw_traj(self, i):
        """
        This private function is just divided thing of
        the _update_anim to see the code more clear
        Parameters
        ------------
        i : int
            time step of the animation
            the sampling time should be related to the sampling time of system
        """
        # car
        self.traj_imgs[0].set_data(self.history_xs[0][:i], self.history_ys[0][:i])
        # goal
        self.traj_imgs[-1].set_data(self.traget[0], self.traget[1])
        # traj_ref
        # self.traj_imgs[2].set_data(self.history_traj_ref[i][0, :], self.history_traj_ref[i][1, :])
--- a/iLQR/ilqr.py
+++ b/iLQR/ilqr.py
@ -0,0 +1,408 @@
 import numpy as np
 from copy import copy, deepcopy
 from model import TwoWheeledCar
 class iLQRController():
    """
    A controller that implements iterative Linear Quadratic control.
    Controls the (x, y, th) of the two wheeled car
    Attributes:
    ------------
    """
    def __init__(self, N=100, max_iter=400, dt=0.016): 
        '''
        n int: length of the control sequence
        max_iter int: limit on number of optimization iterations
        '''
        self.old_target = [None, None]
        self.tN = N # number of timesteps
        self.STATE_SIZE = 3
        self.INPUT_SIZE = 2
        self.dt = dt
        self.max_iter = max_iter
        self.lamb_factor = 10
        self.lamb_max = 1e4
        self.eps_converge = 0.001 # exit if relative improvement below threshold
    def calc_input(self, car, x_target, changed=False):
        """Generates a control signal to move the 
        arm to the specified target.
        car : the arm model being controlled NOTE:これが実際にコントロールされるやつ
        des list : the desired system position
        x_des np.array: desired task-space force, 
                        irrelevant here.
        """
        # if the target has changed, reset things and re-optimize 
        # for this movement、目標が変わっている場合があるので確認
        if changed:
            self.reset(x_target)
        # Reset k if at the end of the sequence
        if self.t >= self.tN - 1: # 最初のSTEPのみ計算
            self.t = 0
        # Compute the optimization
        """
        NOTE : ここに条件を追加してもいいかもしれない、何サイクルも回す必要ないし、理想軌道とずれたらとか
        """
        if self.t % 1 == 0:
            x0 = np.zeros(self.STATE_SIZE) # 初期化、速度は0
            self.simulator, x0 = self.initialize_simulator(car) # 前の時刻のものを確保
            U = np.copy(self.U[self.t:]) # 初期入力かなこれ
            self.X, self.U[self.t:], cost = self.ilqr(x0, U) # 入力列が入ってくる
        self.u = self.U[self.t] 
        # move us a step forward in our control sequence
        self.t += 1
        return self.u
    def initialize_simulator(self, car):
        """ make a copy of the car model, to make sure that the
        actual car model isn't affected during the iLQR process
        """ 
        # need to make a copy the real car
        simulator = TwoWheeledCar(deepcopy(car.xs))
        return simulator, deepcopy(simulator.xs)
    def cost(self, xs, us):
        """ the immediate state cost function
        Parameters
        ------------
        xs : shape(STATE_SIZE, tN + 1)
        us : shape(STATE_SIZE, tN)
        """
        """
        NOTE : 拡張する説ありますがとりあえず飛ばします
        """
        # total cost
        # quadratic のもののみ計算
        R_11 = 0.01 # terminal u thorottle cost weight
        R_22 = 0.01 # terminal u steering cost weight
        l = np.dot(us.T, np.dot(np.diag([R_11, R_22]), us))
        # compute derivatives of cost
        l_x = np.zeros(self.STATE_SIZE)
        l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
        l_u1 = 2. * us[0] * R_11
        l_u2 = 2. * us[1] * R_22
        l_u = np.array([l_u1, l_u2])
        l_uu = 2. * np.diag([R_11, R_22])
        l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE))
        # returned in an array for easy multiplication by time step 
        return l, l_x, l_xx, l_u, l_uu, l_ux
    def cost_final(self, x):
        """ the final state cost function
        Parameters
        -------------
        xs : numpy.ndarray, shape(STATE_SIZE,)
        Notes : 
        ---------
        l_x = np.zeros((self.STATE_SIZE))
        l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
        """
        Q_11 = 1. # terminal x cost weight
        Q_22 = 1. # terminal y cost weight
        Q_33 = 0.01 # terminal theta cost weight
        error = self.simulator.xs - self.target
        l = np.dot(error.T, np.dot(np.diag([Q_11, Q_22, Q_33]), error))
        # about L_x
        l_x1 = 2. * (x[0] - self.target[0]) * Q_11
        l_x2 = 2. * (x[1] - self.target[1]) * Q_22
        l_x3 = 2. * (x[2] -self.target[2]) * Q_33
        l_x = np.array([l_x1, l_x2, l_x3])
        # about l_xx
        l_xx =  2. * np.diag([Q_11, Q_22, Q_33])
        # Final cost only requires these three values
        return l, l_x, l_xx
    def finite_differences(self, x, u): 
        """ calculate gradient of plant dynamics using finite differences
        Parameters
        --------------
        x : numpy.ndarray, shape(STATE_SIZE,)
            the state of the system
        u : numpy.ndarray, shape(INPUT_SIZE,)
            the control input
        Returns
        ------------
        A : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE)
            differential of the model /alpha X
        B : numpy.ndarray, shape(STATE_SIZE, INPUT_SIZE)
            differential of the model /alpha U
        """  
        A = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
        A_ideal = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
        B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
        B_ideal = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
        eps = 1e-4 # finite differences epsilon
        for ii in range(self.STATE_SIZE):
            # calculate partial differential w.r.t. x
            inc_x = x.copy()
            inc_x[ii] += eps
            state_inc,_ = self.plant_dynamics(inc_x, u.copy())
            dec_x = x.copy()
            dec_x[ii] -= eps
            state_dec,_ = self.plant_dynamics(dec_x, u.copy())
            A[:, ii] = (state_inc - state_dec) / (2 * eps)
        A_ideal[0, 2] = -np.sin(x[2]) * u[0]
        A_ideal[1, 2] = np.cos(x[2]) * u[0]
        for ii in range(self.INPUT_SIZE):
            # calculate partial differential w.r.t. u
            inc_u = u.copy()
            inc_u[ii] += eps
            state_inc,_ = self.plant_dynamics(x.copy(), inc_u)
            dec_u = u.copy()
            dec_u[ii] -= eps
            state_dec,_ = self.plant_dynamics(x.copy(), dec_u)
            B[:, ii] = (state_inc - state_dec) / (2 * eps)
        # calc by hand
        B_ideal[0, 0] = np.cos(x[2])
        B_ideal[1, 0] = np.sin(x[2])
        B_ideal[2, 1] = 1.
        return A_ideal, B_ideal
    def ilqr(self, x0, U=None): 
        """ use iterative linear quadratic regulation to find a control 
        sequence that minimizes the cost function 
        Parameters
        --------------
        x0 : numpy.ndarray, shape(STATE_SIZE, )
            the initial state of the system
        U : numpy.ndarray(TIME, INPUT_SIZE)
            the initial control trajectory dimension
        """
        U = self.U if U is None else U
        lamb = 1.0 # regularization parameter
        sim_new_trajectory = True
        tN = U.shape[0] # number of time steps
        for ii in range(self.max_iter):
            if sim_new_trajectory == True: 
                # simulate forward using the current control trajectory
                X, cost = self.simulate(x0, U)
                oldcost = np.copy(cost) # copy for exit condition check
                # 
                f_x = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # df / dx
                f_u = np.zeros((tN, self.STATE_SIZE, self.INPUT_SIZE)) # df / du
                # for storing quadratized cost function 
                l = np.zeros((tN,1)) # immediate state cost 
                l_x = np.zeros((tN, self.STATE_SIZE)) # dl / dx
                l_xx = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # d^2 l / dx^2
                l_u = np.zeros((tN, self.INPUT_SIZE)) # dl / du
                l_uu = np.zeros((tN, self.INPUT_SIZE, self.INPUT_SIZE)) # d^2 l / du^2
                l_ux = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # d^2 l / du / dx
                # for everything except final state
                for t in range(tN-1):
                    # x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt
                    # linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t))
                    # f_x = np.eye + A(t)
                    # f_u = B(t)
                    A, B = self.finite_differences(X[t], U[t])
                    f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt
                    f_u[t] = B * self.dt
                    (l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t])
                    l[t] *= self.dt
                    l_x[t] *= self.dt
                    l_xx[t] *= self.dt
                    l_u[t] *= self.dt
                    l_uu[t] *= self.dt
                    l_ux[t] *= self.dt
                # and for final state
                l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1])
                sim_new_trajectory = False
            # optimize things! 
            # initialize Vs with final state cost and set up k, K 
            V = l[-1].copy() # value function
            V_x = l_x[-1].copy() # dV / dx
            V_xx = l_xx[-1].copy() # d^2 V / dx^2
            k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification
            K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain
            # NOTE: they use V' to denote the value at the next timestep, 
            # they have this redundant in their notation making it a 
            # function of f(x + dx, u + du) and using the ', but it makes for 
            # convenient shorthand when you drop function dependencies
            # work backwards to solve for V, Q, k, and K
            for t in range(self.tN-2, -1, -1):
                # NOTE: we're working backwards, so V_x = V_x[t+1] = V'_x
                # 4a) Q_x = l_x + np.dot(f_x^T, V'_x)
                Q_x = l_x[t] + np.dot(f_x[t].T, V_x) 
                # 4b) Q_u = l_u + np.dot(f_u^T, V'_x)
                Q_u = l_u[t] + np.dot(f_u[t].T, V_x)
                # NOTE: last term for Q_xx, Q_uu, and Q_ux is vector / tensor product
                # but also note f_xx = f_uu = f_ux = 0 so they're all 0 anyways.
                # 4c) Q_xx = l_xx + np.dot(f_x^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_xx)
                Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t])) 
                # 4d) Q_ux = l_ux + np.dot(f_u^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_ux)
                Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t]))
                # 4e) Q_uu = l_uu + np.dot(f_u^T, np.dot(V'_xx, f_u)) + np.einsum(V'_x, f_uu)
                Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t]))
                # Calculate Q_uu^-1 with regularization term set by 
                # Levenberg-Marquardt heuristic (at end of this loop)
                Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu)
                Q_uu_evals[Q_uu_evals < 0] = 0.0
                Q_uu_evals += lamb
                Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T))
                # 5b) k = -np.dot(Q_uu^-1, Q_u)
                k[t] = -np.dot(Q_uu_inv, Q_u)
                # 5b) K = -np.dot(Q_uu^-1, Q_ux)
                K[t] = -np.dot(Q_uu_inv, Q_ux)
                # 6a) DV = -.5 np.dot(k^T, np.dot(Q_uu, k))
                # 6b) V_x = Q_x - np.dot(K^T, np.dot(Q_uu, k))
                V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t]))
                # 6c) V_xx = Q_xx - np.dot(-K^T, np.dot(Q_uu, K))
                V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t]))
            U_new = np.zeros((tN, self.INPUT_SIZE))
            # calculate the optimal change to the control trajectory
            x_new = x0.copy() # 7a)
            for t in range(tN - 1): 
                # use feedforward (k) and feedback (K) gain matrices 
                # calculated from our value function approximation
                # to take a stab at the optimal control signal
                U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t]) # 7b)
                # given this u, find our next state
                _,x_new = self.plant_dynamics(x_new, U_new[t]) # 7c)
            # evaluate the new trajectory 
            X_new, cost_new = self.simulate(x0, U_new)
            # Levenberg-Marquardt heuristic
            if cost_new < cost: 
                # decrease lambda (get closer to Newton's method)
                lamb /= self.lamb_factor
                X = np.copy(X_new) # update trajectory 
                U = np.copy(U_new) # update control signal
                oldcost = np.copy(cost)
                cost = np.copy(cost_new)
                sim_new_trajectory = True # do another rollout
                # print("iteration = %d; Cost = %.4f;"%(ii, costnew) + 
                #         " logLambda = %.1f"%np.log(lamb))
                # check to see if update is small enough to exit
                if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge):
                    print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) + 
                            " logLambda = %.1f"%np.log(lamb))
                    break
            else: 
                # increase lambda (get closer to gradient descent)
                lamb *= self.lamb_factor
                # print("cost: %.4f, increasing lambda to %.4f")%(cost, lamb)
                if lamb > self.lamb_max: 
                    print("lambda > max_lambda at iteration = %d;"%ii + 
                        " Cost = %.4f; logLambda = %.1f"%(cost, 
                                                          np.log(lamb)))
                    break
        return X, U, cost
    def plant_dynamics(self, x, u):
        """ simulate a single time step of the plant, from 
        initial state x and applying control signal u
        x np.array: the state of the system
        u np.array: the control signal
        """ 
        # set the arm position to x
        self.simulator.initialize_state(x)
        # apply the control signal
        x_next = self.simulator.update_state(u, self.dt)
        # calculate the change in state
        xdot = ((x_next - x) / self.dt).squeeze()
        return xdot, x_next
    def reset(self, target):
        """ reset the state of the system """
        # Index along current control sequence
        self.t = 0
        self.U = np.zeros((self.tN, self.INPUT_SIZE))
        self.target = target.copy()
    def simulate(self, x0, U):
        """ do a rollout of the system, starting at x0 and 
        applying the control sequence U
        x0 np.array: the initial state of the system
        U np.array: the control sequence to apply
        """ 
        tN = U.shape[0]
        X = np.zeros((tN, self.STATE_SIZE))
        X[0] = x0
        cost = 0
        # Run simulation with substeps
        for t in range(tN-1):
            _,X[t+1] = self.plant_dynamics(X[t], U[t])
            l, _ , _, _ , _ , _  = self.cost(X[t], U[t])
            cost = cost + self.dt * l
        # Adjust for final cost, subsample trajectory
        l_f, _, _ = self.cost_final(X[-1])
        cost = cost + l_f
        return X, cost
--- a/iLQR/main.py
+++ b/iLQR/main.py
@ -0,0 +1,59 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import math
 from model import TwoWheeledCar
 from ilqr import iLQRController
 from animation import AnimDrawer
 def main():
    """
    """
    # iteration parameters
    NUM_ITERARIONS = 500
    dt = 0.01
    # make plant
    init_x = np.array([0., 0., 0.5*math.pi])
    car = TwoWheeledCar(init_x)
    # make goal
    target = np.array([5., 3., 0.])
    # controller
    controller = iLQRController()
    for iteration in range(NUM_ITERARIONS):
        print("iteration num = {} / {}".format(iteration, NUM_ITERARIONS))
        if iteration == 0:
            changed = True
        u = controller.calc_input(car, target, changed=changed)
        car.update_state(u, dt)
    # figures and animation
    history_states = np.array(car.history_xs)
    time_fig = plt.figure()
    x_fig = time_fig.add_subplot(311)
    y_fig = time_fig.add_subplot(312)
    th_fig = time_fig.add_subplot(313)
    time = len(history_states)
    x_fig.plot(np.arange(time), history_states[:, 0])
    y_fig.plot(np.arange(time), history_states[:, 1])
    th_fig.plot(np.arange(time), history_states[:, 2])
    plt.show()
    history_states = np.array(car.history_xs)
    animdrawer = AnimDrawer([history_states, target])
    animdrawer.draw_anim()
 if __name__ == "__main__":
    main()
--- a/iLQR/model.py
+++ b/iLQR/model.py
@ -0,0 +1,177 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import math
 import copy
 """
 このWheeled modelはコントローラー用
 ホントはbase作って、継承すべきですが省略
 """
 class TwoWheeledCar():
    """SampleSystem, this is the simulator
    Attributes
    -----------
    xs : numpy.ndarray
        system states, [x, y, theta]
    history_xs : list
        time history of state
    """
    def __init__(self, init_states=None):
        """
        Palameters
        -----------
        init_state : float, optional, shape(3, )
            initial state of system default is None
        """
        self.STATE_SIZE = 3
        self.INPUT_SIZE = 2
        self.xs = np.zeros(3)
        if init_states is not None:
            self.xs = copy.deepcopy(init_states)
        self.history_xs = [init_states]
        self.history_predict_xs = []
    def update_state(self, us, dt):
        """
        Palameters
        ------------
        us : numpy.ndarray
            inputs of system in some cases this means the reference
        dt : float in seconds, optional
            sampling time of simulation, default is 0.01 [s]
        """
        # for theta 1, theta 1 dot, theta 2, theta 2 dot
        k0 = [0.0 for _ in range(3)]
        k1 = [0.0 for _ in range(3)]
        k2 = [0.0 for _ in range(3)]
        k3 = [0.0 for _ in range(3)]
        functions = [self._func_x_1, self._func_x_2, self._func_x_3]
        # solve Runge-Kutta
        for i, func in enumerate(functions):
            k0[i] = dt * func(self.xs[0], self.xs[1], self.xs[2], us[0], us[1])
        for i, func in enumerate(functions):
            k1[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1])
        for i, func in enumerate(functions):
            k2[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1])
        for i, func in enumerate(functions):
            k3[i] =  dt * func(self.xs[0] + k2[0], self.xs[1] + k2[1], self.xs[2] + k2[2], us[0], us[1])
        self.xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
        self.xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
        self.xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
        # save
        save_states = copy.deepcopy(self.xs)
        self.history_xs.append(save_states)
        return self.xs.copy()
    def predict_state(self, init_xs, us, dt=0.01):
        """make predict state by using optimal input made by MPC
        Paramaters
        -----------
        us : array-like, shape(2, N)
            optimal input made by MPC
        dt : float in seconds, optional
            sampling time of simulation, default is 0.01 [s]
        """
        ## test
        # assert us.shape[0] == 2 and us.shape[1] == 15, "wrong shape"
        xs = copy.deepcopy(init_xs)
        predict_xs = [copy.deepcopy(xs)]
        for i in range(us.shape[1]):
            k0 = [0.0 for _ in range(self.NUM_STATE)]
            k1 = [0.0 for _ in range(self.NUM_STATE)]
            k2 = [0.0 for _ in range(self.NUM_STATE)]
            k3 = [0.0 for _ in range(self.NUM_STATE)]
            functions = [self._func_x_1, self._func_x_2, self._func_x_3]
            # solve Runge-Kutta
            for i, func in enumerate(functions):
                k0[i] = dt * func(xs[0], xs[1], xs[2], us[0, i], us[1, i])
            for i, func in enumerate(functions):
                k1[i] = dt * func(xs[0] + k0[0]/2., xs[1] + k0[1]/2., xs[2] + k0[2]/2., us[0, i], us[1, i])
            for i, func in enumerate(functions):
                k2[i] = dt * func(xs[0] + k1[0]/2., xs[1] + k1[1]/2., xs[2] + k1[2]/2., us[0, i], us[1, i])
            for i, func in enumerate(functions):
                k3[i] =  dt * func(xs[0] + k2[0], xs[1] + k2[1], xs[2] + k2[2], us[0, i], us[1, i])
            xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
            xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
            xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
            predict_xs.append(copy.deepcopy(xs))
        self.history_predict_xs.append(np.array(predict_xs))
        return np.array(predict_xs)
    def initialize_state(self, init_xs):
        """
        initialize the state
        Parameters
        ------------
        init_xs : numpy.ndarray
        """
        self.xs = init_xs.flatten()
    def _func_x_1(self, y_1, y_2, y_3, u_1, u_2):
        """
        Parameters
        ------------
        y_1 : float
        y_2 : float
        y_3 : float
        u_1 : float
            system input
        u_2 : float
            system input
        """
        y_dot = math.cos(y_3) * u_1
        return y_dot
    def _func_x_2(self, y_1, y_2, y_3, u_1, u_2):
        """
        Parameters
        ------------
        y_1 : float
        y_2 : float
        y_3 : float
        u_1 : float
            system input
        u_2 : float
            system input
        """
        y_dot = math.sin(y_3) * u_1
        return y_dot
    def _func_x_3(self, y_1, y_2, y_3, u_1, u_2):
        """
        Parameters
        ------------
        y_1 : float
        y_2 : float
        y_3 : float
        u_1 : float
            system input
        u_2 : float
            system input
        """
        y_dot = u_2
        return y_dot