From 0b6cdc5778e6db45b6a3c39c3a080ee1aa8baac1 Mon Sep 17 00:00:00 2001
From: Shunichi09 <quick1st97@gmail.com>
Date: Fri, 5 Apr 2019 17:15:54 +0900
Subject: [PATCH] add LQR

---
 iLQR/README.md    |  61 +++++++
 iLQR/animation.py | 294 +++++++++++++++++++++++++++++++++
 iLQR/ilqr.py      | 408 ++++++++++++++++++++++++++++++++++++++++++++++
 iLQR/main.py      |  59 +++++++
 iLQR/model.py     | 177 ++++++++++++++++++++
 5 files changed, 999 insertions(+)
 create mode 100644 iLQR/README.md
 create mode 100644 iLQR/animation.py
 create mode 100644 iLQR/ilqr.py
 create mode 100644 iLQR/main.py
 create mode 100644 iLQR/model.py
diff --git a/iLQR/README.md b/iLQR/README.md
new file mode 100644
index 0000000..d893b05
--- /dev/null
+++ b/iLQR/README.md
@@ -0,0 +1,61 @@
+# Iterative Linear Quadratic Regulator
+This program is about iLQR (Iteratice Linear Quadratic Regulator)
+
+# Problem Formulation
+
+Two wheeled robot is expressed by the following equation.
+It is nonlinear and nonholonomic system. Sometimes, it's extremely difficult to control the 
+steering(or angular velocity) and velocity of the wheeled robot. Therefore, many methods control only steering, like purepersuit, Linear MPC.
+However, sometimes we should consider the velocity and steering simultaneously when the car or robots move fast.
+To solve the problem, we should apply the control methods which can treat the nonlinear system.
+
+<a href="https://www.codecogs.com/eqnedit.php?latex=\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" title="\frac{d}{dt} \boldsymbol{X}= \frac{d}{dt} \begin{bmatrix} x \\ y \\ \theta \end{bmatrix} = \begin{bmatrix} \cos(\theta) & 0 \\ \sin(\theta) & 0 \\ 0 & 1 \\ \end{bmatrix} \begin{bmatrix} u_v \\ u_\omega \\ \end{bmatrix} = \boldsymbol{B}\boldsymbol{U}" /></a>
+
+Nonliner Model Predictive Control is one of the famous methods, so I applied the method in the folder of this repository.
+(if you are interested, please look it)
+
+NMPC is very effecitive method to solve nonlinear optimal control problem but it is a handcraft method.
+This program is about one more other methods to solve the nonlinear optimal control problem.
+
+The method is iterative LQR.
+Iterative LQR is one of the DDP(differential dynamic programming) method.
+Recently, this method is used in IRL(inverse reinforcement learning), such as GPS(guided policy search)
+
+If you want to know more about the iLQR, please look the references.
+The paper and website is great.
+
+# Usage
+
+```
+
+```
+
+# Expected Results
+
+- static goal
+
+
+- track the goal
+
+
+# Applied other model
+
+
+
+# Requirement
+
+- python3.5 or more
+- numpy
+- matplotlib
+
+# Reference
+
+- study wolf
+https://github.com/studywolf/control
+
+- Sergey Levine's lecture
+http://rail.eecs.berkeley.edu/deeprlcourse/
+
+- Tassa, Y., Erez, T., & Todorov, E. (2012). Synthesis and stabilization of complex behaviors through online trajectory optimization. IEEE International Conference on Intelligent Robots and Systems, 4906–4913. https://doi.org/10.1109/IROS.2012.6386025
+
+- Li, W., & Todorov, E. (n.d.). Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems. Retrieved from https://homes.cs.washington.edu/~todorov/papers/LiICINCO04.pdf
\ No newline at end of file
diff --git a/iLQR/animation.py b/iLQR/animation.py
new file mode 100644
index 0000000..d6970d7
--- /dev/null
+++ b/iLQR/animation.py
@@ -0,0 +1,294 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.animation as ani
+import matplotlib.font_manager as fon
+import sys
+import math
+
+# default setting of figures
+plt.rcParams["mathtext.fontset"] = 'stix' # math fonts
+plt.rcParams['xtick.direction'] = 'in' # x axis in
+plt.rcParams['ytick.direction'] = 'in' # y axis in 
+plt.rcParams["font.size"] = 10
+plt.rcParams['axes.linewidth'] = 1.0 # axis line width
+plt.rcParams['axes.grid'] = True # make grid
+
+def coordinate_transformation_in_angle(positions, base_angle):
+    '''
+    Transformation the coordinate in the angle
+
+    Parameters
+    -------
+    positions : numpy.ndarray
+        this parameter is composed of xs, ys 
+        should have (2, N) shape 
+    base_angle : float [rad]
+    
+    Returns
+    -------
+    traslated_positions : numpy.ndarray
+        the shape is (2, N)
+    
+    '''
+    if positions.shape[0] != 2:
+        raise ValueError('the input data should have (2, N)')
+
+    positions = np.array(positions)
+    positions = positions.reshape(2, -1)
+
+    rot_matrix = [[np.cos(base_angle), np.sin(base_angle)],
+                  [-1*np.sin(base_angle), np.cos(base_angle)]]
+
+    rot_matrix = np.array(rot_matrix)
+    
+    translated_positions = np.dot(rot_matrix, positions)
+
+    return translated_positions
+
+def square_make_with_angles(center_x, center_y, size, angle):
+    '''
+    Create square matrix with angle line matrix(2D)
+    
+    Parameters
+    -------
+    center_x : float in meters
+        the center x position of the square
+    center_y : float in meters
+        the center y position of the square
+    size : float in meters
+        the square's half-size
+    angle : float in radians
+
+    Returns
+    -------
+    square xs : numpy.ndarray
+        lenght is 5 (counterclockwise from right-up)
+    square ys : numpy.ndarray
+        length is 5 (counterclockwise from right-up)
+    angle line xs : numpy.ndarray
+    angle line ys : numpy.ndarray
+    '''
+
+    # start with the up right points
+    # create point in counterclockwise
+    square_xys = np.array([[size, 0.5 * size], [-size, 0.5 * size], [-size, -0.5 * size], [size, -0.5 * size], [size, 0.5 * size]])
+    trans_points = coordinate_transformation_in_angle(square_xys.T, -angle) # this is inverse type
+    trans_points += np.array([[center_x], [center_y]])
+
+    square_xs = trans_points[0, :]
+    square_ys = trans_points[1, :]
+
+    angle_line_xs = [center_x, center_x + math.cos(angle) * size]
+    angle_line_ys = [center_y, center_y + math.sin(angle) * size]
+
+    return square_xs, square_ys, np.array(angle_line_xs), np.array(angle_line_ys)
+
+
+def circle_make_with_angles(center_x, center_y, radius, angle):
+    '''
+    Create circle matrix with angle line matrix
+    
+    Parameters
+    -------
+    center_x : float
+        the center x position of the circle
+    center_y : float
+        the center y position of the circle
+    radius : float
+    angle : float [rad]
+    
+    Returns
+    -------
+    circle xs : numpy.ndarray
+    circle ys : numpy.ndarray
+    angle line xs : numpy.ndarray
+    angle line ys : numpy.ndarray
+    '''
+
+    point_num = 100 # 分解能
+
+    circle_xs = []
+    circle_ys = []
+
+    for i in range(point_num + 1):
+        circle_xs.append(center_x + radius * math.cos(i*2*math.pi/point_num))
+        circle_ys.append(center_y + radius * math.sin(i*2*math.pi/point_num))
+
+    angle_line_xs = [center_x, center_x + math.cos(angle) * radius]
+    angle_line_ys = [center_y, center_y + math.sin(angle) * radius]
+
+    return np.array(circle_xs), np.array(circle_ys), np.array(angle_line_xs), np.array(angle_line_ys)
+
+
+class AnimDrawer():
+    """create animation of path and robot
+    
+    Attributes
+    ------------
+    cars : 
+    anim_fig : figure of matplotlib
+    axis : axis of matplotlib
+
+    """
+    def __init__(self, objects):
+        """
+        Parameters
+        ------------
+        objects : list of objects
+
+        Notes
+        ---------
+        lead_history_states, lead_history_predict_states, traj_ref, history_traj_ref, history_angle_ref
+        """
+        self.car_history_state = objects[0]
+        self.traget = objects[1]
+        
+        self.history_xs = [self.car_history_state[:, 0]]
+        self.history_ys = [self.car_history_state[:, 1]]
+        self.history_ths = [self.car_history_state[:, 2]]
+
+        # setting up figure
+        self.anim_fig = plt.figure(dpi=150)
+        self.axis = self.anim_fig.add_subplot(111)
+
+        # imgs
+        self.car_imgs = []
+        self.traj_imgs = []
+
+    def draw_anim(self, interval=50):
+        """draw the animation and save
+
+        Parameteres
+        -------------
+        interval : int, optional
+            animation's interval time, you should link the sampling time of systems
+            default is 50 [ms]
+        """
+        self._set_axis()
+        self._set_img()
+
+        self.skip_num = 1
+        frame_num = int((len(self.history_xs[0])-1) / self.skip_num)
+
+        animation = ani.FuncAnimation(self.anim_fig, self._update_anim, interval=interval, frames=frame_num)
+
+        # self.axis.legend()
+        print('save_animation?')
+        shuold_save_animation = int(input())
+
+        if shuold_save_animation: 
+            print('animation_number?')
+            num = int(input())
+            animation.save('animation_{0}.mp4'.format(num), writer='ffmpeg')
+            # animation.save("Sample.gif", writer = 'imagemagick') # gif保存
+
+        plt.show()
+
+    def _set_axis(self):
+        """ initialize the animation axies
+        """
+        # (1) set the axis name
+        self.axis.set_xlabel(r'$\it{x}$ [m]')
+        self.axis.set_ylabel(r'$\it{y}$ [m]')
+        self.axis.set_aspect('equal', adjustable='box')
+
+        LOW_MARGIN = 5
+        HIGH_MARGIN = 5
+
+        self.axis.set_xlim(np.min(self.history_xs) - LOW_MARGIN, np.max(self.history_xs) + HIGH_MARGIN)
+        self.axis.set_ylim(np.min(self.history_ys) - LOW_MARGIN, np.max(self.history_ys) + HIGH_MARGIN)
+        
+    def _set_img(self):
+        """ initialize the imgs of animation
+            this private function execute the make initial imgs for animation
+        """
+        # object imgs
+        obj_color_list = ["k", "k", "m", "m"]
+        obj_styles = ["solid", "solid", "solid", "solid"]
+
+        for i in range(len(obj_color_list)):
+            temp_img, = self.axis.plot([], [], color=obj_color_list[i], linestyle=obj_styles[i])
+            self.car_imgs.append(temp_img)
+        
+        traj_color_list = ["k", "b"]
+
+        for i in range(len(traj_color_list)):
+            temp_img, = self.axis.plot([],[], color=traj_color_list[i], linestyle="dashed")
+            self.traj_imgs.append(temp_img)
+
+        temp_img, = self.axis.plot([],[], "*", color="b")
+        self.traj_imgs.append(temp_img)
+    
+    def _update_anim(self, i):
+        """the update animation
+        this function should be used in the animation functions
+
+        Parameters
+        ------------
+        i : int
+            time step of the animation
+            the sampling time should be related to the sampling time of system
+
+        Returns
+        -----------
+        object_imgs : list of img
+        traj_imgs : list of img
+        """
+        i = int(i * self.skip_num)
+
+        # self._draw_set_axis(i)
+        self._draw_car(i)
+        self._draw_traj(i)
+        # self._draw_prediction(i)
+
+        return self.car_imgs, self.traj_imgs, 
+
+    def _draw_set_axis(self, i):
+        """
+        """
+        # (2) set the xlim and ylim
+        LOW_MARGIN = 20
+        HIGH_MARGIN = 20
+        OVER_LOOK = 50
+        self.axis.set_xlim(np.min(self.history_xs[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_xs[0][i : i + OVER_LOOK]) + HIGH_MARGIN)
+        self.axis.set_ylim(np.min(self.history_ys[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_ys[0][i : i + OVER_LOOK]) + HIGH_MARGIN)         
+
+    def _draw_car(self, i):
+        """
+        This private function is just divided thing of
+        the _update_anim to see the code more clear
+
+        Parameters
+        ------------
+        i : int
+            time step of the animation
+            the sampling time should be related to the sampling time of system
+        """
+        # cars
+        object_x, object_y, angle_x, angle_y = square_make_with_angles(self.history_xs[0][i],
+                                                                        self.history_ys[0][i], 
+                                                                        1.0,
+                                                                        self.history_ths[0][i])
+
+        self.car_imgs[0].set_data([object_x, object_y])
+        self.car_imgs[1].set_data([angle_x, angle_y])
+
+    def _draw_traj(self, i):
+        """
+        This private function is just divided thing of
+        the _update_anim to see the code more clear
+
+        Parameters
+        ------------
+        i : int
+            time step of the animation
+            the sampling time should be related to the sampling time of system
+        """
+        # car
+        self.traj_imgs[0].set_data(self.history_xs[0][:i], self.history_ys[0][:i])
+
+        # goal
+        self.traj_imgs[-1].set_data(self.traget[0], self.traget[1])
+
+        # traj_ref
+        # self.traj_imgs[2].set_data(self.history_traj_ref[i][0, :], self.history_traj_ref[i][1, :])
\ No newline at end of file
diff --git a/iLQR/ilqr.py b/iLQR/ilqr.py
new file mode 100644
index 0000000..882c4cd
--- /dev/null
+++ b/iLQR/ilqr.py
@@ -0,0 +1,408 @@
+import numpy as np
+from copy import copy, deepcopy
+
+from model import TwoWheeledCar
+
+class iLQRController():
+    """
+    A controller that implements iterative Linear Quadratic control.
+    Controls the (x, y, th) of the two wheeled car
+
+    Attributes:
+    ------------
+
+    """
+
+    def __init__(self, N=100, max_iter=400, dt=0.016): 
+        '''
+        n int: length of the control sequence
+        max_iter int: limit on number of optimization iterations
+        '''
+        self.old_target = [None, None]
+
+        self.tN = N # number of timesteps
+        self.STATE_SIZE = 3
+        self.INPUT_SIZE = 2
+        self.dt = dt
+
+        self.max_iter = max_iter
+        self.lamb_factor = 10
+        self.lamb_max = 1e4
+        self.eps_converge = 0.001 # exit if relative improvement below threshold
+
+    def calc_input(self, car, x_target, changed=False):
+        """Generates a control signal to move the 
+        arm to the specified target.
+            
+        car : the arm model being controlled NOTE:これが実際にコントロールされるやつ
+        des list : the desired system position
+        x_des np.array: desired task-space force, 
+                        irrelevant here.
+        """
+
+        # if the target has changed, reset things and re-optimize 
+        # for this movement、目標が変わっている場合があるので確認
+        if changed:
+            self.reset(x_target)
+
+        # Reset k if at the end of the sequence
+        if self.t >= self.tN - 1: # 最初のSTEPのみ計算
+            self.t = 0
+
+        # Compute the optimization
+        """
+        NOTE : ここに条件を追加してもいいかもしれない、何サイクルも回す必要ないし、理想軌道とずれたらとか
+        """
+        if self.t % 1 == 0:
+            x0 = np.zeros(self.STATE_SIZE) # 初期化、速度は0
+
+            self.simulator, x0 = self.initialize_simulator(car) # 前の時刻のものを確保
+            
+            U = np.copy(self.U[self.t:]) # 初期入力かなこれ
+            
+            self.X, self.U[self.t:], cost = self.ilqr(x0, U) # 入力列が入ってくる
+
+        self.u = self.U[self.t] 
+
+        # move us a step forward in our control sequence
+        self.t += 1
+
+        return self.u
+
+    def initialize_simulator(self, car):
+        """ make a copy of the car model, to make sure that the
+        actual car model isn't affected during the iLQR process
+        """ 
+        # need to make a copy the real car
+        simulator = TwoWheeledCar(deepcopy(car.xs))
+
+        return simulator, deepcopy(simulator.xs)
+
+    def cost(self, xs, us):
+        """ the immediate state cost function
+        
+        Parameters
+        ------------
+        xs : shape(STATE_SIZE, tN + 1)
+        us : shape(STATE_SIZE, tN)
+        """
+
+        """
+        NOTE : 拡張する説ありますがとりあえず飛ばします
+        """
+        # total cost
+        # quadratic のもののみ計算
+        R_11 = 0.01 # terminal u thorottle cost weight
+        R_22 = 0.01 # terminal u steering cost weight
+
+        l = np.dot(us.T, np.dot(np.diag([R_11, R_22]), us))
+
+        # compute derivatives of cost
+        l_x = np.zeros(self.STATE_SIZE)
+        l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
+
+        l_u1 = 2. * us[0] * R_11
+        l_u2 = 2. * us[1] * R_22
+
+        l_u = np.array([l_u1, l_u2])
+
+        l_uu = 2. * np.diag([R_11, R_22])
+        
+        l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE))
+
+        # returned in an array for easy multiplication by time step 
+        return l, l_x, l_xx, l_u, l_uu, l_ux
+
+    def cost_final(self, x):
+        """ the final state cost function
+        
+        Parameters
+        -------------
+        xs : numpy.ndarray, shape(STATE_SIZE,)
+
+        Notes : 
+        ---------
+        l_x = np.zeros((self.STATE_SIZE))
+        l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
+        """
+        Q_11 = 1. # terminal x cost weight
+        Q_22 = 1. # terminal y cost weight
+        Q_33 = 0.01 # terminal theta cost weight
+
+        error = self.simulator.xs - self.target
+
+        l = np.dot(error.T, np.dot(np.diag([Q_11, Q_22, Q_33]), error))
+        
+        # about L_x
+        l_x1 = 2. * (x[0] - self.target[0]) * Q_11
+        l_x2 = 2. * (x[1] - self.target[1]) * Q_22
+        l_x3 = 2. * (x[2] -self.target[2]) * Q_33
+        l_x = np.array([l_x1, l_x2, l_x3])
+
+        # about l_xx
+        l_xx =  2. * np.diag([Q_11, Q_22, Q_33])
+
+        # Final cost only requires these three values
+        return l, l_x, l_xx
+
+    def finite_differences(self, x, u): 
+        """ calculate gradient of plant dynamics using finite differences
+        
+        Parameters
+        --------------
+        x : numpy.ndarray, shape(STATE_SIZE,)
+            the state of the system
+        u : numpy.ndarray, shape(INPUT_SIZE,)
+            the control input
+
+        Returns
+        ------------
+        A : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE)
+            differential of the model /alpha X
+        B : numpy.ndarray, shape(STATE_SIZE, INPUT_SIZE)
+            differential of the model /alpha U
+        """  
+
+        A = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
+        A_ideal = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
+        
+        B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
+        B_ideal = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
+
+        eps = 1e-4 # finite differences epsilon
+
+        for ii in range(self.STATE_SIZE):
+            # calculate partial differential w.r.t. x
+            inc_x = x.copy()
+            inc_x[ii] += eps
+            state_inc,_ = self.plant_dynamics(inc_x, u.copy())
+            dec_x = x.copy()
+            dec_x[ii] -= eps
+            state_dec,_ = self.plant_dynamics(dec_x, u.copy())
+            A[:, ii] = (state_inc - state_dec) / (2 * eps)
+
+        A_ideal[0, 2] = -np.sin(x[2]) * u[0]
+        A_ideal[1, 2] = np.cos(x[2]) * u[0]
+        
+        for ii in range(self.INPUT_SIZE):
+            # calculate partial differential w.r.t. u
+            inc_u = u.copy()
+            inc_u[ii] += eps
+            state_inc,_ = self.plant_dynamics(x.copy(), inc_u)
+            dec_u = u.copy()
+            dec_u[ii] -= eps
+            state_dec,_ = self.plant_dynamics(x.copy(), dec_u)
+            B[:, ii] = (state_inc - state_dec) / (2 * eps)
+
+        # calc by hand
+        B_ideal[0, 0] = np.cos(x[2])
+        B_ideal[1, 0] = np.sin(x[2])
+        B_ideal[2, 1] = 1.
+
+        return A_ideal, B_ideal
+
+    def ilqr(self, x0, U=None): 
+        """ use iterative linear quadratic regulation to find a control 
+        sequence that minimizes the cost function 
+        
+        Parameters
+        --------------
+        x0 : numpy.ndarray, shape(STATE_SIZE, )
+            the initial state of the system
+        U : numpy.ndarray(TIME, INPUT_SIZE)
+            the initial control trajectory dimension
+        """
+        U = self.U if U is None else U
+
+        lamb = 1.0 # regularization parameter
+        sim_new_trajectory = True
+        tN = U.shape[0] # number of time steps
+
+        for ii in range(self.max_iter):
+
+            if sim_new_trajectory == True: 
+                # simulate forward using the current control trajectory
+                X, cost = self.simulate(x0, U)
+                oldcost = np.copy(cost) # copy for exit condition check
+
+                # 
+                f_x = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # df / dx
+                f_u = np.zeros((tN, self.STATE_SIZE, self.INPUT_SIZE)) # df / du
+                # for storing quadratized cost function 
+
+                l = np.zeros((tN,1)) # immediate state cost 
+                l_x = np.zeros((tN, self.STATE_SIZE)) # dl / dx
+                l_xx = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # d^2 l / dx^2
+                l_u = np.zeros((tN, self.INPUT_SIZE)) # dl / du
+                l_uu = np.zeros((tN, self.INPUT_SIZE, self.INPUT_SIZE)) # d^2 l / du^2
+                l_ux = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # d^2 l / du / dx
+                # for everything except final state
+                for t in range(tN-1):
+                    # x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt
+                    # linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t))
+                    # f_x = np.eye + A(t)
+                    # f_u = B(t)
+                    A, B = self.finite_differences(X[t], U[t])
+                    f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt
+                    f_u[t] = B * self.dt
+                
+                    (l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t])
+                    l[t] *= self.dt
+                    l_x[t] *= self.dt
+                    l_xx[t] *= self.dt
+                    l_u[t] *= self.dt
+                    l_uu[t] *= self.dt
+                    l_ux[t] *= self.dt
+
+                # and for final state
+                l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1])
+
+                sim_new_trajectory = False
+
+            # optimize things! 
+            # initialize Vs with final state cost and set up k, K 
+            V = l[-1].copy() # value function
+            V_x = l_x[-1].copy() # dV / dx
+            V_xx = l_xx[-1].copy() # d^2 V / dx^2
+            k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification
+            K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain
+
+            # NOTE: they use V' to denote the value at the next timestep, 
+            # they have this redundant in their notation making it a 
+            # function of f(x + dx, u + du) and using the ', but it makes for 
+            # convenient shorthand when you drop function dependencies
+
+            # work backwards to solve for V, Q, k, and K
+            for t in range(self.tN-2, -1, -1):
+
+                # NOTE: we're working backwards, so V_x = V_x[t+1] = V'_x
+
+                # 4a) Q_x = l_x + np.dot(f_x^T, V'_x)
+                Q_x = l_x[t] + np.dot(f_x[t].T, V_x) 
+                # 4b) Q_u = l_u + np.dot(f_u^T, V'_x)
+                Q_u = l_u[t] + np.dot(f_u[t].T, V_x)
+
+                # NOTE: last term for Q_xx, Q_uu, and Q_ux is vector / tensor product
+                # but also note f_xx = f_uu = f_ux = 0 so they're all 0 anyways.
+                
+                # 4c) Q_xx = l_xx + np.dot(f_x^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_xx)
+                Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t])) 
+                # 4d) Q_ux = l_ux + np.dot(f_u^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_ux)
+                Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t]))
+                # 4e) Q_uu = l_uu + np.dot(f_u^T, np.dot(V'_xx, f_u)) + np.einsum(V'_x, f_uu)
+                Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t]))
+
+                # Calculate Q_uu^-1 with regularization term set by 
+                # Levenberg-Marquardt heuristic (at end of this loop)
+                Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu)
+                Q_uu_evals[Q_uu_evals < 0] = 0.0
+                Q_uu_evals += lamb
+                Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T))
+
+                # 5b) k = -np.dot(Q_uu^-1, Q_u)
+                k[t] = -np.dot(Q_uu_inv, Q_u)
+                # 5b) K = -np.dot(Q_uu^-1, Q_ux)
+                K[t] = -np.dot(Q_uu_inv, Q_ux)
+
+                # 6a) DV = -.5 np.dot(k^T, np.dot(Q_uu, k))
+                # 6b) V_x = Q_x - np.dot(K^T, np.dot(Q_uu, k))
+                V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t]))
+                # 6c) V_xx = Q_xx - np.dot(-K^T, np.dot(Q_uu, K))
+                V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t]))
+
+            U_new = np.zeros((tN, self.INPUT_SIZE))
+            # calculate the optimal change to the control trajectory
+            x_new = x0.copy() # 7a)
+            for t in range(tN - 1): 
+                # use feedforward (k) and feedback (K) gain matrices 
+                # calculated from our value function approximation
+                # to take a stab at the optimal control signal
+                U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t]) # 7b)
+                # given this u, find our next state
+                _,x_new = self.plant_dynamics(x_new, U_new[t]) # 7c)
+
+            # evaluate the new trajectory 
+            X_new, cost_new = self.simulate(x0, U_new)
+
+            # Levenberg-Marquardt heuristic
+            if cost_new < cost: 
+                # decrease lambda (get closer to Newton's method)
+                lamb /= self.lamb_factor
+
+                X = np.copy(X_new) # update trajectory 
+                U = np.copy(U_new) # update control signal
+                oldcost = np.copy(cost)
+                cost = np.copy(cost_new)
+
+                sim_new_trajectory = True # do another rollout
+
+                # print("iteration = %d; Cost = %.4f;"%(ii, costnew) + 
+                #         " logLambda = %.1f"%np.log(lamb))
+                # check to see if update is small enough to exit
+                if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge):
+                    print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) + 
+                            " logLambda = %.1f"%np.log(lamb))
+                    break
+
+            else: 
+                # increase lambda (get closer to gradient descent)
+                lamb *= self.lamb_factor
+                # print("cost: %.4f, increasing lambda to %.4f")%(cost, lamb)
+                if lamb > self.lamb_max: 
+                    print("lambda > max_lambda at iteration = %d;"%ii + 
+                        " Cost = %.4f; logLambda = %.1f"%(cost, 
+                                                          np.log(lamb)))
+                    break
+
+        return X, U, cost
+
+    def plant_dynamics(self, x, u):
+        """ simulate a single time step of the plant, from 
+        initial state x and applying control signal u
+
+        x np.array: the state of the system
+        u np.array: the control signal
+        """ 
+
+        # set the arm position to x
+        self.simulator.initialize_state(x)
+
+        # apply the control signal
+        x_next = self.simulator.update_state(u, self.dt)
+        
+        # calculate the change in state
+        xdot = ((x_next - x) / self.dt).squeeze()
+
+        return xdot, x_next
+
+    def reset(self, target):
+        """ reset the state of the system """
+
+        # Index along current control sequence
+        self.t = 0
+        self.U = np.zeros((self.tN, self.INPUT_SIZE))
+        self.target = target.copy()
+
+    def simulate(self, x0, U):
+        """ do a rollout of the system, starting at x0 and 
+        applying the control sequence U
+
+        x0 np.array: the initial state of the system
+        U np.array: the control sequence to apply
+        """ 
+        tN = U.shape[0]
+        X = np.zeros((tN, self.STATE_SIZE))
+        X[0] = x0
+        cost = 0
+
+        # Run simulation with substeps
+        for t in range(tN-1):
+            _,X[t+1] = self.plant_dynamics(X[t], U[t])
+            l, _ , _, _ , _ , _  = self.cost(X[t], U[t])
+            cost = cost + self.dt * l
+
+        # Adjust for final cost, subsample trajectory
+        l_f, _, _ = self.cost_final(X[-1])
+        cost = cost + l_f
+
+        return X, cost
diff --git a/iLQR/main.py b/iLQR/main.py
new file mode 100644
index 0000000..8414fed
--- /dev/null
+++ b/iLQR/main.py
@@ -0,0 +1,59 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import math
+
+from model import TwoWheeledCar
+from ilqr import iLQRController
+from animation import AnimDrawer
+
+def main():
+    """
+    """
+    # iteration parameters
+    NUM_ITERARIONS = 500
+    dt = 0.01
+
+    # make plant
+    init_x = np.array([0., 0., 0.5*math.pi])
+    car = TwoWheeledCar(init_x)
+
+    # make goal
+    target = np.array([5., 3., 0.])
+
+    # controller
+    controller = iLQRController()
+
+
+    for iteration in range(NUM_ITERARIONS):
+        print("iteration num = {} / {}".format(iteration, NUM_ITERARIONS))
+
+        if iteration == 0:
+            changed = True
+
+        u = controller.calc_input(car, target, changed=changed)
+
+        car.update_state(u, dt)
+
+    # figures and animation
+    history_states = np.array(car.history_xs)
+
+    time_fig = plt.figure()
+
+    x_fig = time_fig.add_subplot(311)
+    y_fig = time_fig.add_subplot(312)
+    th_fig = time_fig.add_subplot(313)
+
+    time = len(history_states)
+    x_fig.plot(np.arange(time), history_states[:, 0])
+    y_fig.plot(np.arange(time), history_states[:, 1])
+    th_fig.plot(np.arange(time), history_states[:, 2])
+
+    plt.show()
+
+    history_states = np.array(car.history_xs)
+
+    animdrawer = AnimDrawer([history_states, target])
+    animdrawer.draw_anim()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/iLQR/model.py b/iLQR/model.py
new file mode 100644
index 0000000..a09fd40
--- /dev/null
+++ b/iLQR/model.py
@@ -0,0 +1,177 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import math
+import copy
+
+
+"""
+このWheeled modelはコントローラー用
+ホントはbase作って、継承すべきですが省略
+"""
+class TwoWheeledCar():
+    """SampleSystem, this is the simulator
+    Attributes
+    -----------
+    xs : numpy.ndarray
+        system states, [x, y, theta]
+    history_xs : list
+        time history of state
+    """
+    def __init__(self, init_states=None):
+        """
+        Palameters
+        -----------
+        init_state : float, optional, shape(3, )
+            initial state of system default is None
+        """
+        self.STATE_SIZE = 3
+        self.INPUT_SIZE = 2
+
+        self.xs = np.zeros(3)
+    
+        if init_states is not None:
+            self.xs = copy.deepcopy(init_states)
+
+        self.history_xs = [init_states]
+        self.history_predict_xs = []
+
+    def update_state(self, us, dt):
+        """
+        Palameters
+        ------------
+        us : numpy.ndarray
+            inputs of system in some cases this means the reference
+        dt : float in seconds, optional
+            sampling time of simulation, default is 0.01 [s]
+        """
+        # for theta 1, theta 1 dot, theta 2, theta 2 dot
+        k0 = [0.0 for _ in range(3)]
+        k1 = [0.0 for _ in range(3)]
+        k2 = [0.0 for _ in range(3)]
+        k3 = [0.0 for _ in range(3)]
+
+        functions = [self._func_x_1, self._func_x_2, self._func_x_3]
+
+        # solve Runge-Kutta
+        for i, func in enumerate(functions):
+            k0[i] = dt * func(self.xs[0], self.xs[1], self.xs[2], us[0], us[1])
+
+        for i, func in enumerate(functions):
+            k1[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1])
+        
+        for i, func in enumerate(functions):
+            k2[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1])
+        
+        for i, func in enumerate(functions):
+            k3[i] =  dt * func(self.xs[0] + k2[0], self.xs[1] + k2[1], self.xs[2] + k2[2], us[0], us[1])
+        
+        self.xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
+        self.xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
+        self.xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
+    
+        # save
+        save_states = copy.deepcopy(self.xs)
+        self.history_xs.append(save_states)
+
+        return self.xs.copy()
+    
+    def predict_state(self, init_xs, us, dt=0.01):
+        """make predict state by using optimal input made by MPC
+        Paramaters
+        -----------
+        us : array-like, shape(2, N)
+            optimal input made by MPC
+        dt : float in seconds, optional
+            sampling time of simulation, default is 0.01 [s]
+        """
+        ## test
+        # assert us.shape[0] == 2 and us.shape[1] == 15, "wrong shape"
+
+        xs = copy.deepcopy(init_xs)
+        predict_xs = [copy.deepcopy(xs)]
+
+        for i in range(us.shape[1]):
+            k0 = [0.0 for _ in range(self.NUM_STATE)]
+            k1 = [0.0 for _ in range(self.NUM_STATE)]
+            k2 = [0.0 for _ in range(self.NUM_STATE)]
+            k3 = [0.0 for _ in range(self.NUM_STATE)]
+
+            functions = [self._func_x_1, self._func_x_2, self._func_x_3]
+
+            # solve Runge-Kutta
+            for i, func in enumerate(functions):
+                k0[i] = dt * func(xs[0], xs[1], xs[2], us[0, i], us[1, i])
+
+            for i, func in enumerate(functions):
+                k1[i] = dt * func(xs[0] + k0[0]/2., xs[1] + k0[1]/2., xs[2] + k0[2]/2., us[0, i], us[1, i])
+            
+            for i, func in enumerate(functions):
+                k2[i] = dt * func(xs[0] + k1[0]/2., xs[1] + k1[1]/2., xs[2] + k1[2]/2., us[0, i], us[1, i])
+            
+            for i, func in enumerate(functions):
+                k3[i] =  dt * func(xs[0] + k2[0], xs[1] + k2[1], xs[2] + k2[2], us[0, i], us[1, i])
+            
+            xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
+            xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
+            xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
+
+            predict_xs.append(copy.deepcopy(xs))
+
+        self.history_predict_xs.append(np.array(predict_xs))
+
+        return np.array(predict_xs)
+    
+    def initialize_state(self, init_xs):
+        """
+        initialize the state
+        
+        Parameters
+        ------------
+        init_xs : numpy.ndarray
+        """
+        self.xs = init_xs.flatten()
+
+    def _func_x_1(self, y_1, y_2, y_3, u_1, u_2):
+        """
+        Parameters
+        ------------
+        y_1 : float
+        y_2 : float
+        y_3 : float
+        u_1 : float
+            system input
+        u_2 : float
+            system input
+        """
+        y_dot = math.cos(y_3) * u_1
+        return y_dot
+    
+    def _func_x_2(self, y_1, y_2, y_3, u_1, u_2):
+        """
+        Parameters
+        ------------
+        y_1 : float
+        y_2 : float
+        y_3 : float
+        u_1 : float
+            system input
+        u_2 : float
+            system input
+        """
+        y_dot = math.sin(y_3) * u_1
+        return y_dot
+    
+    def _func_x_3(self, y_1, y_2, y_3, u_1, u_2):
+        """
+        Parameters
+        ------------
+        y_1 : float
+        y_2 : float
+        y_3 : float
+        u_1 : float
+            system input
+        u_2 : float
+            system input
+        """
+        y_dot = u_2
+        return y_dot
\ No newline at end of file