From 0b6cdc5778e6db45b6a3c39c3a080ee1aa8baac1 Mon Sep 17 00:00:00 2001 From: Shunichi09 Date: Fri, 5 Apr 2019 17:15:54 +0900 Subject: [PATCH] add LQR --- iLQR/README.md | 61 +++++++ iLQR/animation.py | 294 +++++++++++++++++++++++++++++++++ iLQR/ilqr.py | 408 ++++++++++++++++++++++++++++++++++++++++++++++ iLQR/main.py | 59 +++++++ iLQR/model.py | 177 ++++++++++++++++++++ 5 files changed, 999 insertions(+) create mode 100644 iLQR/README.md create mode 100644 iLQR/animation.py create mode 100644 iLQR/ilqr.py create mode 100644 iLQR/main.py create mode 100644 iLQR/model.py diff --git a/iLQR/README.md b/iLQR/README.md new file mode 100644 index 0000000..d893b05 --- /dev/null +++ b/iLQR/README.md @@ -0,0 +1,61 @@ +# Iterative Linear Quadratic Regulator +This program is about iLQR (Iteratice Linear Quadratic Regulator) + +# Problem Formulation + +Two wheeled robot is expressed by the following equation. +It is nonlinear and nonholonomic system. Sometimes, it's extremely difficult to control the +steering(or angular velocity) and velocity of the wheeled robot. Therefore, many methods control only steering, like purepersuit, Linear MPC. +However, sometimes we should consider the velocity and steering simultaneously when the car or robots move fast. +To solve the problem, we should apply the control methods which can treat the nonlinear system. + + + +Nonliner Model Predictive Control is one of the famous methods, so I applied the method in the folder of this repository. +(if you are interested, please look it) + +NMPC is very effecitive method to solve nonlinear optimal control problem but it is a handcraft method. +This program is about one more other methods to solve the nonlinear optimal control problem. + +The method is iterative LQR. +Iterative LQR is one of the DDP(differential dynamic programming) method. +Recently, this method is used in IRL(inverse reinforcement learning), such as GPS(guided policy search) + +If you want to know more about the iLQR, please look the references. +The paper and website is great. + +# Usage + +``` + +``` + +# Expected Results + +- static goal + + +- track the goal + + +# Applied other model + + + +# Requirement + +- python3.5 or more +- numpy +- matplotlib + +# Reference + +- study wolf +https://github.com/studywolf/control + +- Sergey Levine's lecture +http://rail.eecs.berkeley.edu/deeprlcourse/ + +- Tassa, Y., Erez, T., & Todorov, E. (2012). Synthesis and stabilization of complex behaviors through online trajectory optimization. IEEE International Conference on Intelligent Robots and Systems, 4906–4913. https://doi.org/10.1109/IROS.2012.6386025 + +- Li, W., & Todorov, E. (n.d.). Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems. Retrieved from https://homes.cs.washington.edu/~todorov/papers/LiICINCO04.pdf \ No newline at end of file diff --git a/iLQR/animation.py b/iLQR/animation.py new file mode 100644 index 0000000..d6970d7 --- /dev/null +++ b/iLQR/animation.py @@ -0,0 +1,294 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.animation as ani +import matplotlib.font_manager as fon +import sys +import math + +# default setting of figures +plt.rcParams["mathtext.fontset"] = 'stix' # math fonts +plt.rcParams['xtick.direction'] = 'in' # x axis in +plt.rcParams['ytick.direction'] = 'in' # y axis in +plt.rcParams["font.size"] = 10 +plt.rcParams['axes.linewidth'] = 1.0 # axis line width +plt.rcParams['axes.grid'] = True # make grid + +def coordinate_transformation_in_angle(positions, base_angle): + ''' + Transformation the coordinate in the angle + + Parameters + ------- + positions : numpy.ndarray + this parameter is composed of xs, ys + should have (2, N) shape + base_angle : float [rad] + + Returns + ------- + traslated_positions : numpy.ndarray + the shape is (2, N) + + ''' + if positions.shape[0] != 2: + raise ValueError('the input data should have (2, N)') + + positions = np.array(positions) + positions = positions.reshape(2, -1) + + rot_matrix = [[np.cos(base_angle), np.sin(base_angle)], + [-1*np.sin(base_angle), np.cos(base_angle)]] + + rot_matrix = np.array(rot_matrix) + + translated_positions = np.dot(rot_matrix, positions) + + return translated_positions + +def square_make_with_angles(center_x, center_y, size, angle): + ''' + Create square matrix with angle line matrix(2D) + + Parameters + ------- + center_x : float in meters + the center x position of the square + center_y : float in meters + the center y position of the square + size : float in meters + the square's half-size + angle : float in radians + + Returns + ------- + square xs : numpy.ndarray + lenght is 5 (counterclockwise from right-up) + square ys : numpy.ndarray + length is 5 (counterclockwise from right-up) + angle line xs : numpy.ndarray + angle line ys : numpy.ndarray + ''' + + # start with the up right points + # create point in counterclockwise + square_xys = np.array([[size, 0.5 * size], [-size, 0.5 * size], [-size, -0.5 * size], [size, -0.5 * size], [size, 0.5 * size]]) + trans_points = coordinate_transformation_in_angle(square_xys.T, -angle) # this is inverse type + trans_points += np.array([[center_x], [center_y]]) + + square_xs = trans_points[0, :] + square_ys = trans_points[1, :] + + angle_line_xs = [center_x, center_x + math.cos(angle) * size] + angle_line_ys = [center_y, center_y + math.sin(angle) * size] + + return square_xs, square_ys, np.array(angle_line_xs), np.array(angle_line_ys) + + +def circle_make_with_angles(center_x, center_y, radius, angle): + ''' + Create circle matrix with angle line matrix + + Parameters + ------- + center_x : float + the center x position of the circle + center_y : float + the center y position of the circle + radius : float + angle : float [rad] + + Returns + ------- + circle xs : numpy.ndarray + circle ys : numpy.ndarray + angle line xs : numpy.ndarray + angle line ys : numpy.ndarray + ''' + + point_num = 100 # 分解能 + + circle_xs = [] + circle_ys = [] + + for i in range(point_num + 1): + circle_xs.append(center_x + radius * math.cos(i*2*math.pi/point_num)) + circle_ys.append(center_y + radius * math.sin(i*2*math.pi/point_num)) + + angle_line_xs = [center_x, center_x + math.cos(angle) * radius] + angle_line_ys = [center_y, center_y + math.sin(angle) * radius] + + return np.array(circle_xs), np.array(circle_ys), np.array(angle_line_xs), np.array(angle_line_ys) + + +class AnimDrawer(): + """create animation of path and robot + + Attributes + ------------ + cars : + anim_fig : figure of matplotlib + axis : axis of matplotlib + + """ + def __init__(self, objects): + """ + Parameters + ------------ + objects : list of objects + + Notes + --------- + lead_history_states, lead_history_predict_states, traj_ref, history_traj_ref, history_angle_ref + """ + self.car_history_state = objects[0] + self.traget = objects[1] + + self.history_xs = [self.car_history_state[:, 0]] + self.history_ys = [self.car_history_state[:, 1]] + self.history_ths = [self.car_history_state[:, 2]] + + # setting up figure + self.anim_fig = plt.figure(dpi=150) + self.axis = self.anim_fig.add_subplot(111) + + # imgs + self.car_imgs = [] + self.traj_imgs = [] + + def draw_anim(self, interval=50): + """draw the animation and save + + Parameteres + ------------- + interval : int, optional + animation's interval time, you should link the sampling time of systems + default is 50 [ms] + """ + self._set_axis() + self._set_img() + + self.skip_num = 1 + frame_num = int((len(self.history_xs[0])-1) / self.skip_num) + + animation = ani.FuncAnimation(self.anim_fig, self._update_anim, interval=interval, frames=frame_num) + + # self.axis.legend() + print('save_animation?') + shuold_save_animation = int(input()) + + if shuold_save_animation: + print('animation_number?') + num = int(input()) + animation.save('animation_{0}.mp4'.format(num), writer='ffmpeg') + # animation.save("Sample.gif", writer = 'imagemagick') # gif保存 + + plt.show() + + def _set_axis(self): + """ initialize the animation axies + """ + # (1) set the axis name + self.axis.set_xlabel(r'$\it{x}$ [m]') + self.axis.set_ylabel(r'$\it{y}$ [m]') + self.axis.set_aspect('equal', adjustable='box') + + LOW_MARGIN = 5 + HIGH_MARGIN = 5 + + self.axis.set_xlim(np.min(self.history_xs) - LOW_MARGIN, np.max(self.history_xs) + HIGH_MARGIN) + self.axis.set_ylim(np.min(self.history_ys) - LOW_MARGIN, np.max(self.history_ys) + HIGH_MARGIN) + + def _set_img(self): + """ initialize the imgs of animation + this private function execute the make initial imgs for animation + """ + # object imgs + obj_color_list = ["k", "k", "m", "m"] + obj_styles = ["solid", "solid", "solid", "solid"] + + for i in range(len(obj_color_list)): + temp_img, = self.axis.plot([], [], color=obj_color_list[i], linestyle=obj_styles[i]) + self.car_imgs.append(temp_img) + + traj_color_list = ["k", "b"] + + for i in range(len(traj_color_list)): + temp_img, = self.axis.plot([],[], color=traj_color_list[i], linestyle="dashed") + self.traj_imgs.append(temp_img) + + temp_img, = self.axis.plot([],[], "*", color="b") + self.traj_imgs.append(temp_img) + + def _update_anim(self, i): + """the update animation + this function should be used in the animation functions + + Parameters + ------------ + i : int + time step of the animation + the sampling time should be related to the sampling time of system + + Returns + ----------- + object_imgs : list of img + traj_imgs : list of img + """ + i = int(i * self.skip_num) + + # self._draw_set_axis(i) + self._draw_car(i) + self._draw_traj(i) + # self._draw_prediction(i) + + return self.car_imgs, self.traj_imgs, + + def _draw_set_axis(self, i): + """ + """ + # (2) set the xlim and ylim + LOW_MARGIN = 20 + HIGH_MARGIN = 20 + OVER_LOOK = 50 + self.axis.set_xlim(np.min(self.history_xs[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_xs[0][i : i + OVER_LOOK]) + HIGH_MARGIN) + self.axis.set_ylim(np.min(self.history_ys[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_ys[0][i : i + OVER_LOOK]) + HIGH_MARGIN) + + def _draw_car(self, i): + """ + This private function is just divided thing of + the _update_anim to see the code more clear + + Parameters + ------------ + i : int + time step of the animation + the sampling time should be related to the sampling time of system + """ + # cars + object_x, object_y, angle_x, angle_y = square_make_with_angles(self.history_xs[0][i], + self.history_ys[0][i], + 1.0, + self.history_ths[0][i]) + + self.car_imgs[0].set_data([object_x, object_y]) + self.car_imgs[1].set_data([angle_x, angle_y]) + + def _draw_traj(self, i): + """ + This private function is just divided thing of + the _update_anim to see the code more clear + + Parameters + ------------ + i : int + time step of the animation + the sampling time should be related to the sampling time of system + """ + # car + self.traj_imgs[0].set_data(self.history_xs[0][:i], self.history_ys[0][:i]) + + # goal + self.traj_imgs[-1].set_data(self.traget[0], self.traget[1]) + + # traj_ref + # self.traj_imgs[2].set_data(self.history_traj_ref[i][0, :], self.history_traj_ref[i][1, :]) \ No newline at end of file diff --git a/iLQR/ilqr.py b/iLQR/ilqr.py new file mode 100644 index 0000000..882c4cd --- /dev/null +++ b/iLQR/ilqr.py @@ -0,0 +1,408 @@ +import numpy as np +from copy import copy, deepcopy + +from model import TwoWheeledCar + +class iLQRController(): + """ + A controller that implements iterative Linear Quadratic control. + Controls the (x, y, th) of the two wheeled car + + Attributes: + ------------ + + """ + + def __init__(self, N=100, max_iter=400, dt=0.016): + ''' + n int: length of the control sequence + max_iter int: limit on number of optimization iterations + ''' + self.old_target = [None, None] + + self.tN = N # number of timesteps + self.STATE_SIZE = 3 + self.INPUT_SIZE = 2 + self.dt = dt + + self.max_iter = max_iter + self.lamb_factor = 10 + self.lamb_max = 1e4 + self.eps_converge = 0.001 # exit if relative improvement below threshold + + def calc_input(self, car, x_target, changed=False): + """Generates a control signal to move the + arm to the specified target. + + car : the arm model being controlled NOTE:これが実際にコントロールされるやつ + des list : the desired system position + x_des np.array: desired task-space force, + irrelevant here. + """ + + # if the target has changed, reset things and re-optimize + # for this movement、目標が変わっている場合があるので確認 + if changed: + self.reset(x_target) + + # Reset k if at the end of the sequence + if self.t >= self.tN - 1: # 最初のSTEPのみ計算 + self.t = 0 + + # Compute the optimization + """ + NOTE : ここに条件を追加してもいいかもしれない、何サイクルも回す必要ないし、理想軌道とずれたらとか + """ + if self.t % 1 == 0: + x0 = np.zeros(self.STATE_SIZE) # 初期化、速度は0 + + self.simulator, x0 = self.initialize_simulator(car) # 前の時刻のものを確保 + + U = np.copy(self.U[self.t:]) # 初期入力かなこれ + + self.X, self.U[self.t:], cost = self.ilqr(x0, U) # 入力列が入ってくる + + self.u = self.U[self.t] + + # move us a step forward in our control sequence + self.t += 1 + + return self.u + + def initialize_simulator(self, car): + """ make a copy of the car model, to make sure that the + actual car model isn't affected during the iLQR process + """ + # need to make a copy the real car + simulator = TwoWheeledCar(deepcopy(car.xs)) + + return simulator, deepcopy(simulator.xs) + + def cost(self, xs, us): + """ the immediate state cost function + + Parameters + ------------ + xs : shape(STATE_SIZE, tN + 1) + us : shape(STATE_SIZE, tN) + """ + + """ + NOTE : 拡張する説ありますがとりあえず飛ばします + """ + # total cost + # quadratic のもののみ計算 + R_11 = 0.01 # terminal u thorottle cost weight + R_22 = 0.01 # terminal u steering cost weight + + l = np.dot(us.T, np.dot(np.diag([R_11, R_22]), us)) + + # compute derivatives of cost + l_x = np.zeros(self.STATE_SIZE) + l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) + + l_u1 = 2. * us[0] * R_11 + l_u2 = 2. * us[1] * R_22 + + l_u = np.array([l_u1, l_u2]) + + l_uu = 2. * np.diag([R_11, R_22]) + + l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE)) + + # returned in an array for easy multiplication by time step + return l, l_x, l_xx, l_u, l_uu, l_ux + + def cost_final(self, x): + """ the final state cost function + + Parameters + ------------- + xs : numpy.ndarray, shape(STATE_SIZE,) + + Notes : + --------- + l_x = np.zeros((self.STATE_SIZE)) + l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) + """ + Q_11 = 1. # terminal x cost weight + Q_22 = 1. # terminal y cost weight + Q_33 = 0.01 # terminal theta cost weight + + error = self.simulator.xs - self.target + + l = np.dot(error.T, np.dot(np.diag([Q_11, Q_22, Q_33]), error)) + + # about L_x + l_x1 = 2. * (x[0] - self.target[0]) * Q_11 + l_x2 = 2. * (x[1] - self.target[1]) * Q_22 + l_x3 = 2. * (x[2] -self.target[2]) * Q_33 + l_x = np.array([l_x1, l_x2, l_x3]) + + # about l_xx + l_xx = 2. * np.diag([Q_11, Q_22, Q_33]) + + # Final cost only requires these three values + return l, l_x, l_xx + + def finite_differences(self, x, u): + """ calculate gradient of plant dynamics using finite differences + + Parameters + -------------- + x : numpy.ndarray, shape(STATE_SIZE,) + the state of the system + u : numpy.ndarray, shape(INPUT_SIZE,) + the control input + + Returns + ------------ + A : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE) + differential of the model /alpha X + B : numpy.ndarray, shape(STATE_SIZE, INPUT_SIZE) + differential of the model /alpha U + """ + + A = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) + A_ideal = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) + + B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE)) + B_ideal = np.zeros((self.STATE_SIZE, self.INPUT_SIZE)) + + eps = 1e-4 # finite differences epsilon + + for ii in range(self.STATE_SIZE): + # calculate partial differential w.r.t. x + inc_x = x.copy() + inc_x[ii] += eps + state_inc,_ = self.plant_dynamics(inc_x, u.copy()) + dec_x = x.copy() + dec_x[ii] -= eps + state_dec,_ = self.plant_dynamics(dec_x, u.copy()) + A[:, ii] = (state_inc - state_dec) / (2 * eps) + + A_ideal[0, 2] = -np.sin(x[2]) * u[0] + A_ideal[1, 2] = np.cos(x[2]) * u[0] + + for ii in range(self.INPUT_SIZE): + # calculate partial differential w.r.t. u + inc_u = u.copy() + inc_u[ii] += eps + state_inc,_ = self.plant_dynamics(x.copy(), inc_u) + dec_u = u.copy() + dec_u[ii] -= eps + state_dec,_ = self.plant_dynamics(x.copy(), dec_u) + B[:, ii] = (state_inc - state_dec) / (2 * eps) + + # calc by hand + B_ideal[0, 0] = np.cos(x[2]) + B_ideal[1, 0] = np.sin(x[2]) + B_ideal[2, 1] = 1. + + return A_ideal, B_ideal + + def ilqr(self, x0, U=None): + """ use iterative linear quadratic regulation to find a control + sequence that minimizes the cost function + + Parameters + -------------- + x0 : numpy.ndarray, shape(STATE_SIZE, ) + the initial state of the system + U : numpy.ndarray(TIME, INPUT_SIZE) + the initial control trajectory dimension + """ + U = self.U if U is None else U + + lamb = 1.0 # regularization parameter + sim_new_trajectory = True + tN = U.shape[0] # number of time steps + + for ii in range(self.max_iter): + + if sim_new_trajectory == True: + # simulate forward using the current control trajectory + X, cost = self.simulate(x0, U) + oldcost = np.copy(cost) # copy for exit condition check + + # + f_x = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # df / dx + f_u = np.zeros((tN, self.STATE_SIZE, self.INPUT_SIZE)) # df / du + # for storing quadratized cost function + + l = np.zeros((tN,1)) # immediate state cost + l_x = np.zeros((tN, self.STATE_SIZE)) # dl / dx + l_xx = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # d^2 l / dx^2 + l_u = np.zeros((tN, self.INPUT_SIZE)) # dl / du + l_uu = np.zeros((tN, self.INPUT_SIZE, self.INPUT_SIZE)) # d^2 l / du^2 + l_ux = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # d^2 l / du / dx + # for everything except final state + for t in range(tN-1): + # x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt + # linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t)) + # f_x = np.eye + A(t) + # f_u = B(t) + A, B = self.finite_differences(X[t], U[t]) + f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt + f_u[t] = B * self.dt + + (l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t]) + l[t] *= self.dt + l_x[t] *= self.dt + l_xx[t] *= self.dt + l_u[t] *= self.dt + l_uu[t] *= self.dt + l_ux[t] *= self.dt + + # and for final state + l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1]) + + sim_new_trajectory = False + + # optimize things! + # initialize Vs with final state cost and set up k, K + V = l[-1].copy() # value function + V_x = l_x[-1].copy() # dV / dx + V_xx = l_xx[-1].copy() # d^2 V / dx^2 + k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification + K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain + + # NOTE: they use V' to denote the value at the next timestep, + # they have this redundant in their notation making it a + # function of f(x + dx, u + du) and using the ', but it makes for + # convenient shorthand when you drop function dependencies + + # work backwards to solve for V, Q, k, and K + for t in range(self.tN-2, -1, -1): + + # NOTE: we're working backwards, so V_x = V_x[t+1] = V'_x + + # 4a) Q_x = l_x + np.dot(f_x^T, V'_x) + Q_x = l_x[t] + np.dot(f_x[t].T, V_x) + # 4b) Q_u = l_u + np.dot(f_u^T, V'_x) + Q_u = l_u[t] + np.dot(f_u[t].T, V_x) + + # NOTE: last term for Q_xx, Q_uu, and Q_ux is vector / tensor product + # but also note f_xx = f_uu = f_ux = 0 so they're all 0 anyways. + + # 4c) Q_xx = l_xx + np.dot(f_x^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_xx) + Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t])) + # 4d) Q_ux = l_ux + np.dot(f_u^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_ux) + Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t])) + # 4e) Q_uu = l_uu + np.dot(f_u^T, np.dot(V'_xx, f_u)) + np.einsum(V'_x, f_uu) + Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t])) + + # Calculate Q_uu^-1 with regularization term set by + # Levenberg-Marquardt heuristic (at end of this loop) + Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu) + Q_uu_evals[Q_uu_evals < 0] = 0.0 + Q_uu_evals += lamb + Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T)) + + # 5b) k = -np.dot(Q_uu^-1, Q_u) + k[t] = -np.dot(Q_uu_inv, Q_u) + # 5b) K = -np.dot(Q_uu^-1, Q_ux) + K[t] = -np.dot(Q_uu_inv, Q_ux) + + # 6a) DV = -.5 np.dot(k^T, np.dot(Q_uu, k)) + # 6b) V_x = Q_x - np.dot(K^T, np.dot(Q_uu, k)) + V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t])) + # 6c) V_xx = Q_xx - np.dot(-K^T, np.dot(Q_uu, K)) + V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t])) + + U_new = np.zeros((tN, self.INPUT_SIZE)) + # calculate the optimal change to the control trajectory + x_new = x0.copy() # 7a) + for t in range(tN - 1): + # use feedforward (k) and feedback (K) gain matrices + # calculated from our value function approximation + # to take a stab at the optimal control signal + U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t]) # 7b) + # given this u, find our next state + _,x_new = self.plant_dynamics(x_new, U_new[t]) # 7c) + + # evaluate the new trajectory + X_new, cost_new = self.simulate(x0, U_new) + + # Levenberg-Marquardt heuristic + if cost_new < cost: + # decrease lambda (get closer to Newton's method) + lamb /= self.lamb_factor + + X = np.copy(X_new) # update trajectory + U = np.copy(U_new) # update control signal + oldcost = np.copy(cost) + cost = np.copy(cost_new) + + sim_new_trajectory = True # do another rollout + + # print("iteration = %d; Cost = %.4f;"%(ii, costnew) + + # " logLambda = %.1f"%np.log(lamb)) + # check to see if update is small enough to exit + if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge): + print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) + + " logLambda = %.1f"%np.log(lamb)) + break + + else: + # increase lambda (get closer to gradient descent) + lamb *= self.lamb_factor + # print("cost: %.4f, increasing lambda to %.4f")%(cost, lamb) + if lamb > self.lamb_max: + print("lambda > max_lambda at iteration = %d;"%ii + + " Cost = %.4f; logLambda = %.1f"%(cost, + np.log(lamb))) + break + + return X, U, cost + + def plant_dynamics(self, x, u): + """ simulate a single time step of the plant, from + initial state x and applying control signal u + + x np.array: the state of the system + u np.array: the control signal + """ + + # set the arm position to x + self.simulator.initialize_state(x) + + # apply the control signal + x_next = self.simulator.update_state(u, self.dt) + + # calculate the change in state + xdot = ((x_next - x) / self.dt).squeeze() + + return xdot, x_next + + def reset(self, target): + """ reset the state of the system """ + + # Index along current control sequence + self.t = 0 + self.U = np.zeros((self.tN, self.INPUT_SIZE)) + self.target = target.copy() + + def simulate(self, x0, U): + """ do a rollout of the system, starting at x0 and + applying the control sequence U + + x0 np.array: the initial state of the system + U np.array: the control sequence to apply + """ + tN = U.shape[0] + X = np.zeros((tN, self.STATE_SIZE)) + X[0] = x0 + cost = 0 + + # Run simulation with substeps + for t in range(tN-1): + _,X[t+1] = self.plant_dynamics(X[t], U[t]) + l, _ , _, _ , _ , _ = self.cost(X[t], U[t]) + cost = cost + self.dt * l + + # Adjust for final cost, subsample trajectory + l_f, _, _ = self.cost_final(X[-1]) + cost = cost + l_f + + return X, cost diff --git a/iLQR/main.py b/iLQR/main.py new file mode 100644 index 0000000..8414fed --- /dev/null +++ b/iLQR/main.py @@ -0,0 +1,59 @@ +import numpy as np +import matplotlib.pyplot as plt +import math + +from model import TwoWheeledCar +from ilqr import iLQRController +from animation import AnimDrawer + +def main(): + """ + """ + # iteration parameters + NUM_ITERARIONS = 500 + dt = 0.01 + + # make plant + init_x = np.array([0., 0., 0.5*math.pi]) + car = TwoWheeledCar(init_x) + + # make goal + target = np.array([5., 3., 0.]) + + # controller + controller = iLQRController() + + + for iteration in range(NUM_ITERARIONS): + print("iteration num = {} / {}".format(iteration, NUM_ITERARIONS)) + + if iteration == 0: + changed = True + + u = controller.calc_input(car, target, changed=changed) + + car.update_state(u, dt) + + # figures and animation + history_states = np.array(car.history_xs) + + time_fig = plt.figure() + + x_fig = time_fig.add_subplot(311) + y_fig = time_fig.add_subplot(312) + th_fig = time_fig.add_subplot(313) + + time = len(history_states) + x_fig.plot(np.arange(time), history_states[:, 0]) + y_fig.plot(np.arange(time), history_states[:, 1]) + th_fig.plot(np.arange(time), history_states[:, 2]) + + plt.show() + + history_states = np.array(car.history_xs) + + animdrawer = AnimDrawer([history_states, target]) + animdrawer.draw_anim() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/iLQR/model.py b/iLQR/model.py new file mode 100644 index 0000000..a09fd40 --- /dev/null +++ b/iLQR/model.py @@ -0,0 +1,177 @@ +import numpy as np +import matplotlib.pyplot as plt +import math +import copy + + +""" +このWheeled modelはコントローラー用 +ホントはbase作って、継承すべきですが省略 +""" +class TwoWheeledCar(): + """SampleSystem, this is the simulator + Attributes + ----------- + xs : numpy.ndarray + system states, [x, y, theta] + history_xs : list + time history of state + """ + def __init__(self, init_states=None): + """ + Palameters + ----------- + init_state : float, optional, shape(3, ) + initial state of system default is None + """ + self.STATE_SIZE = 3 + self.INPUT_SIZE = 2 + + self.xs = np.zeros(3) + + if init_states is not None: + self.xs = copy.deepcopy(init_states) + + self.history_xs = [init_states] + self.history_predict_xs = [] + + def update_state(self, us, dt): + """ + Palameters + ------------ + us : numpy.ndarray + inputs of system in some cases this means the reference + dt : float in seconds, optional + sampling time of simulation, default is 0.01 [s] + """ + # for theta 1, theta 1 dot, theta 2, theta 2 dot + k0 = [0.0 for _ in range(3)] + k1 = [0.0 for _ in range(3)] + k2 = [0.0 for _ in range(3)] + k3 = [0.0 for _ in range(3)] + + functions = [self._func_x_1, self._func_x_2, self._func_x_3] + + # solve Runge-Kutta + for i, func in enumerate(functions): + k0[i] = dt * func(self.xs[0], self.xs[1], self.xs[2], us[0], us[1]) + + for i, func in enumerate(functions): + k1[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1]) + + for i, func in enumerate(functions): + k2[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1]) + + for i, func in enumerate(functions): + k3[i] = dt * func(self.xs[0] + k2[0], self.xs[1] + k2[1], self.xs[2] + k2[2], us[0], us[1]) + + self.xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6. + self.xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6. + self.xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6. + + # save + save_states = copy.deepcopy(self.xs) + self.history_xs.append(save_states) + + return self.xs.copy() + + def predict_state(self, init_xs, us, dt=0.01): + """make predict state by using optimal input made by MPC + Paramaters + ----------- + us : array-like, shape(2, N) + optimal input made by MPC + dt : float in seconds, optional + sampling time of simulation, default is 0.01 [s] + """ + ## test + # assert us.shape[0] == 2 and us.shape[1] == 15, "wrong shape" + + xs = copy.deepcopy(init_xs) + predict_xs = [copy.deepcopy(xs)] + + for i in range(us.shape[1]): + k0 = [0.0 for _ in range(self.NUM_STATE)] + k1 = [0.0 for _ in range(self.NUM_STATE)] + k2 = [0.0 for _ in range(self.NUM_STATE)] + k3 = [0.0 for _ in range(self.NUM_STATE)] + + functions = [self._func_x_1, self._func_x_2, self._func_x_3] + + # solve Runge-Kutta + for i, func in enumerate(functions): + k0[i] = dt * func(xs[0], xs[1], xs[2], us[0, i], us[1, i]) + + for i, func in enumerate(functions): + k1[i] = dt * func(xs[0] + k0[0]/2., xs[1] + k0[1]/2., xs[2] + k0[2]/2., us[0, i], us[1, i]) + + for i, func in enumerate(functions): + k2[i] = dt * func(xs[0] + k1[0]/2., xs[1] + k1[1]/2., xs[2] + k1[2]/2., us[0, i], us[1, i]) + + for i, func in enumerate(functions): + k3[i] = dt * func(xs[0] + k2[0], xs[1] + k2[1], xs[2] + k2[2], us[0, i], us[1, i]) + + xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6. + xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6. + xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6. + + predict_xs.append(copy.deepcopy(xs)) + + self.history_predict_xs.append(np.array(predict_xs)) + + return np.array(predict_xs) + + def initialize_state(self, init_xs): + """ + initialize the state + + Parameters + ------------ + init_xs : numpy.ndarray + """ + self.xs = init_xs.flatten() + + def _func_x_1(self, y_1, y_2, y_3, u_1, u_2): + """ + Parameters + ------------ + y_1 : float + y_2 : float + y_3 : float + u_1 : float + system input + u_2 : float + system input + """ + y_dot = math.cos(y_3) * u_1 + return y_dot + + def _func_x_2(self, y_1, y_2, y_3, u_1, u_2): + """ + Parameters + ------------ + y_1 : float + y_2 : float + y_3 : float + u_1 : float + system input + u_2 : float + system input + """ + y_dot = math.sin(y_3) * u_1 + return y_dot + + def _func_x_3(self, y_1, y_2, y_3, u_1, u_2): + """ + Parameters + ------------ + y_1 : float + y_2 : float + y_3 : float + u_1 : float + system input + u_2 : float + system input + """ + y_dot = u_2 + return y_dot \ No newline at end of file