diff --git a/README.md b/README.md index e35db85..6e97399 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ Now you can see the examples of control theories as following - **Nonlinear Model Predictive Control with CGMRES** - **Linear Model Predictive Control**(as generic function such as matlab tool box) - **Extended Linear Model Predictive Control for vehicle model** -- **Iterative LQR (linear quadratic regulator)** # Usage and Details you can see the usage in each folder diff --git a/iLQR/README.md b/iLQR/README.md deleted file mode 100644 index d893b05..0000000 --- a/iLQR/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# Iterative Linear Quadratic Regulator -This program is about iLQR (Iteratice Linear Quadratic Regulator) - -# Problem Formulation - -Two wheeled robot is expressed by the following equation. -It is nonlinear and nonholonomic system. Sometimes, it's extremely difficult to control the -steering(or angular velocity) and velocity of the wheeled robot. Therefore, many methods control only steering, like purepersuit, Linear MPC. -However, sometimes we should consider the velocity and steering simultaneously when the car or robots move fast. -To solve the problem, we should apply the control methods which can treat the nonlinear system. - - - -Nonliner Model Predictive Control is one of the famous methods, so I applied the method in the folder of this repository. -(if you are interested, please look it) - -NMPC is very effecitive method to solve nonlinear optimal control problem but it is a handcraft method. -This program is about one more other methods to solve the nonlinear optimal control problem. - -The method is iterative LQR. -Iterative LQR is one of the DDP(differential dynamic programming) method. -Recently, this method is used in IRL(inverse reinforcement learning), such as GPS(guided policy search) - -If you want to know more about the iLQR, please look the references. -The paper and website is great. - -# Usage - -``` - -``` - -# Expected Results - -- static goal - - -- track the goal - - -# Applied other model - - - -# Requirement - -- python3.5 or more -- numpy -- matplotlib - -# Reference - -- study wolf -https://github.com/studywolf/control - -- Sergey Levine's lecture -http://rail.eecs.berkeley.edu/deeprlcourse/ - -- Tassa, Y., Erez, T., & Todorov, E. (2012). Synthesis and stabilization of complex behaviors through online trajectory optimization. IEEE International Conference on Intelligent Robots and Systems, 4906–4913. https://doi.org/10.1109/IROS.2012.6386025 - -- Li, W., & Todorov, E. (n.d.). Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems. Retrieved from https://homes.cs.washington.edu/~todorov/papers/LiICINCO04.pdf \ No newline at end of file diff --git a/iLQR/animation.py b/iLQR/animation.py deleted file mode 100644 index d6970d7..0000000 --- a/iLQR/animation.py +++ /dev/null @@ -1,294 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.animation as ani -import matplotlib.font_manager as fon -import sys -import math - -# default setting of figures -plt.rcParams["mathtext.fontset"] = 'stix' # math fonts -plt.rcParams['xtick.direction'] = 'in' # x axis in -plt.rcParams['ytick.direction'] = 'in' # y axis in -plt.rcParams["font.size"] = 10 -plt.rcParams['axes.linewidth'] = 1.0 # axis line width -plt.rcParams['axes.grid'] = True # make grid - -def coordinate_transformation_in_angle(positions, base_angle): - ''' - Transformation the coordinate in the angle - - Parameters - ------- - positions : numpy.ndarray - this parameter is composed of xs, ys - should have (2, N) shape - base_angle : float [rad] - - Returns - ------- - traslated_positions : numpy.ndarray - the shape is (2, N) - - ''' - if positions.shape[0] != 2: - raise ValueError('the input data should have (2, N)') - - positions = np.array(positions) - positions = positions.reshape(2, -1) - - rot_matrix = [[np.cos(base_angle), np.sin(base_angle)], - [-1*np.sin(base_angle), np.cos(base_angle)]] - - rot_matrix = np.array(rot_matrix) - - translated_positions = np.dot(rot_matrix, positions) - - return translated_positions - -def square_make_with_angles(center_x, center_y, size, angle): - ''' - Create square matrix with angle line matrix(2D) - - Parameters - ------- - center_x : float in meters - the center x position of the square - center_y : float in meters - the center y position of the square - size : float in meters - the square's half-size - angle : float in radians - - Returns - ------- - square xs : numpy.ndarray - lenght is 5 (counterclockwise from right-up) - square ys : numpy.ndarray - length is 5 (counterclockwise from right-up) - angle line xs : numpy.ndarray - angle line ys : numpy.ndarray - ''' - - # start with the up right points - # create point in counterclockwise - square_xys = np.array([[size, 0.5 * size], [-size, 0.5 * size], [-size, -0.5 * size], [size, -0.5 * size], [size, 0.5 * size]]) - trans_points = coordinate_transformation_in_angle(square_xys.T, -angle) # this is inverse type - trans_points += np.array([[center_x], [center_y]]) - - square_xs = trans_points[0, :] - square_ys = trans_points[1, :] - - angle_line_xs = [center_x, center_x + math.cos(angle) * size] - angle_line_ys = [center_y, center_y + math.sin(angle) * size] - - return square_xs, square_ys, np.array(angle_line_xs), np.array(angle_line_ys) - - -def circle_make_with_angles(center_x, center_y, radius, angle): - ''' - Create circle matrix with angle line matrix - - Parameters - ------- - center_x : float - the center x position of the circle - center_y : float - the center y position of the circle - radius : float - angle : float [rad] - - Returns - ------- - circle xs : numpy.ndarray - circle ys : numpy.ndarray - angle line xs : numpy.ndarray - angle line ys : numpy.ndarray - ''' - - point_num = 100 # 分解能 - - circle_xs = [] - circle_ys = [] - - for i in range(point_num + 1): - circle_xs.append(center_x + radius * math.cos(i*2*math.pi/point_num)) - circle_ys.append(center_y + radius * math.sin(i*2*math.pi/point_num)) - - angle_line_xs = [center_x, center_x + math.cos(angle) * radius] - angle_line_ys = [center_y, center_y + math.sin(angle) * radius] - - return np.array(circle_xs), np.array(circle_ys), np.array(angle_line_xs), np.array(angle_line_ys) - - -class AnimDrawer(): - """create animation of path and robot - - Attributes - ------------ - cars : - anim_fig : figure of matplotlib - axis : axis of matplotlib - - """ - def __init__(self, objects): - """ - Parameters - ------------ - objects : list of objects - - Notes - --------- - lead_history_states, lead_history_predict_states, traj_ref, history_traj_ref, history_angle_ref - """ - self.car_history_state = objects[0] - self.traget = objects[1] - - self.history_xs = [self.car_history_state[:, 0]] - self.history_ys = [self.car_history_state[:, 1]] - self.history_ths = [self.car_history_state[:, 2]] - - # setting up figure - self.anim_fig = plt.figure(dpi=150) - self.axis = self.anim_fig.add_subplot(111) - - # imgs - self.car_imgs = [] - self.traj_imgs = [] - - def draw_anim(self, interval=50): - """draw the animation and save - - Parameteres - ------------- - interval : int, optional - animation's interval time, you should link the sampling time of systems - default is 50 [ms] - """ - self._set_axis() - self._set_img() - - self.skip_num = 1 - frame_num = int((len(self.history_xs[0])-1) / self.skip_num) - - animation = ani.FuncAnimation(self.anim_fig, self._update_anim, interval=interval, frames=frame_num) - - # self.axis.legend() - print('save_animation?') - shuold_save_animation = int(input()) - - if shuold_save_animation: - print('animation_number?') - num = int(input()) - animation.save('animation_{0}.mp4'.format(num), writer='ffmpeg') - # animation.save("Sample.gif", writer = 'imagemagick') # gif保存 - - plt.show() - - def _set_axis(self): - """ initialize the animation axies - """ - # (1) set the axis name - self.axis.set_xlabel(r'$\it{x}$ [m]') - self.axis.set_ylabel(r'$\it{y}$ [m]') - self.axis.set_aspect('equal', adjustable='box') - - LOW_MARGIN = 5 - HIGH_MARGIN = 5 - - self.axis.set_xlim(np.min(self.history_xs) - LOW_MARGIN, np.max(self.history_xs) + HIGH_MARGIN) - self.axis.set_ylim(np.min(self.history_ys) - LOW_MARGIN, np.max(self.history_ys) + HIGH_MARGIN) - - def _set_img(self): - """ initialize the imgs of animation - this private function execute the make initial imgs for animation - """ - # object imgs - obj_color_list = ["k", "k", "m", "m"] - obj_styles = ["solid", "solid", "solid", "solid"] - - for i in range(len(obj_color_list)): - temp_img, = self.axis.plot([], [], color=obj_color_list[i], linestyle=obj_styles[i]) - self.car_imgs.append(temp_img) - - traj_color_list = ["k", "b"] - - for i in range(len(traj_color_list)): - temp_img, = self.axis.plot([],[], color=traj_color_list[i], linestyle="dashed") - self.traj_imgs.append(temp_img) - - temp_img, = self.axis.plot([],[], "*", color="b") - self.traj_imgs.append(temp_img) - - def _update_anim(self, i): - """the update animation - this function should be used in the animation functions - - Parameters - ------------ - i : int - time step of the animation - the sampling time should be related to the sampling time of system - - Returns - ----------- - object_imgs : list of img - traj_imgs : list of img - """ - i = int(i * self.skip_num) - - # self._draw_set_axis(i) - self._draw_car(i) - self._draw_traj(i) - # self._draw_prediction(i) - - return self.car_imgs, self.traj_imgs, - - def _draw_set_axis(self, i): - """ - """ - # (2) set the xlim and ylim - LOW_MARGIN = 20 - HIGH_MARGIN = 20 - OVER_LOOK = 50 - self.axis.set_xlim(np.min(self.history_xs[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_xs[0][i : i + OVER_LOOK]) + HIGH_MARGIN) - self.axis.set_ylim(np.min(self.history_ys[0][i : i + OVER_LOOK]) - LOW_MARGIN, np.max(self.history_ys[0][i : i + OVER_LOOK]) + HIGH_MARGIN) - - def _draw_car(self, i): - """ - This private function is just divided thing of - the _update_anim to see the code more clear - - Parameters - ------------ - i : int - time step of the animation - the sampling time should be related to the sampling time of system - """ - # cars - object_x, object_y, angle_x, angle_y = square_make_with_angles(self.history_xs[0][i], - self.history_ys[0][i], - 1.0, - self.history_ths[0][i]) - - self.car_imgs[0].set_data([object_x, object_y]) - self.car_imgs[1].set_data([angle_x, angle_y]) - - def _draw_traj(self, i): - """ - This private function is just divided thing of - the _update_anim to see the code more clear - - Parameters - ------------ - i : int - time step of the animation - the sampling time should be related to the sampling time of system - """ - # car - self.traj_imgs[0].set_data(self.history_xs[0][:i], self.history_ys[0][:i]) - - # goal - self.traj_imgs[-1].set_data(self.traget[0], self.traget[1]) - - # traj_ref - # self.traj_imgs[2].set_data(self.history_traj_ref[i][0, :], self.history_traj_ref[i][1, :]) \ No newline at end of file diff --git a/iLQR/ilqr.py b/iLQR/ilqr.py deleted file mode 100644 index e4edf26..0000000 --- a/iLQR/ilqr.py +++ /dev/null @@ -1,410 +0,0 @@ -import numpy as np -from copy import copy, deepcopy - -from model import TwoWheeledCar - -class iLQRController(): - """ - A controller that implements iterative Linear Quadratic Gaussian control. - Controls the (x, y, th) of the two wheeled car - """ - - def __init__(self, N=100, max_iter=100, dt=0.016): - ''' - n int: length of the control sequence - max_iter int: limit on number of optimization iterations - ''' - self.old_target = [None, None] - - self.tN = N # number of timesteps - self.STATE_SIZE = 3 - self.INPUT_SIZE = 2 - self.dt = dt - - self.max_iter = max_iter - self.lamb_factor = 10 - self.lamb_max = 1e4 - self.eps_converge = 0.001 # exit if relative improvement below threshold - - def calc_input(self, car, x_target, changed=False): - """Generates a control signal to move the - arm to the specified target. - - car : the arm model being controlled NOTE:これが実際にコントロールされるやつ - des list : the desired system position - x_des np.array: desired task-space force, - irrelevant here. - """ - - # if the target has changed, reset things and re-optimize - # for this movement、目標が変わっている場合があるので確認 - if changed: - self.reset(x_target) - - # Reset k if at the end of the sequence - if self.t >= self.tN - 1: # 最初のSTEPのみ計算 - self.t = 0 - - # Compute the optimization - """ - NOTE : ここに条件を追加してもいいかもしれない、何サイクルも回す必要ないし、理想軌道とずれたらとか - """ - if self.t % 1 == 0: - x0 = np.zeros(self.STATE_SIZE) # 初期化、速度は0 - - self.simulator, x0 = self.initialize_simulator(car) # 前の時刻のものを確保 - - U = np.copy(self.U[self.t:]) # 初期入力かなこれ - - self.X, self.U[self.t:], cost = self.ilqr(x0, U) # 入力列が入ってくる - - self.u = self.U[self.t] - - # move us a step forward in our control sequence - self.t += 1 - - return self.u - - def initialize_simulator(self, car): - """ make a copy of the car model, to make sure that the - actual car model isn't affected during the iLQR process - """ - # need to make a copy the real car - simulator = TwoWheeledCar(deepcopy(car.xs)) - - return simulator, deepcopy(simulator.xs) - - def cost(self, xs, us): - """ the immediate state cost function - - Parameters - ------------ - xs : shape(STATE_SIZE, tN + 1) - us : shape(STATE_SIZE, tN) - """ - - """ - NOTE : 拡張する説ありますがとりあえず飛ばします - """ - # total cost - # quadratic のもののみ計算 - R_11 = 1. # terminal u thorottle cost weight - R_22 = 0.01 # terminal u steering cost weight - - l = np.dot(us.T, np.dot(np.diag([R_11, R_22]), us)) - - # compute derivatives of cost - l_x = np.zeros(self.STATE_SIZE) - l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) - - l_u1 = 2. * us[0] * R_11 - l_u2 = 2. * us[1] * R_22 - - l_u = np.array([l_u1, l_u2]) - - l_uu = 2. * np.diag([R_11, R_22]) - - l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE)) - - # returned in an array for easy multiplication by time step - return l, l_x, l_xx, l_u, l_uu, l_ux - - def cost_final(self, x): - """ the final state cost function - - Parameters - ------------- - xs : shape(STATE_SIZE,) - - Notes : - --------- - l_x = np.zeros((self.STATE_SIZE)) - l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) - """ - Q_11 = 10. # terminal x cost weight - Q_22 = 10. # terminal y cost weight - Q_33 = 0.0001 # terminal theta cost weight - - error = self.simulator.xs - self.target - - l = np.dot(error.T, np.dot(np.diag([Q_11, Q_22, Q_33]), error)) - - # about L_x - l_x1 = 2. * (x[0] - self.target[0]) * Q_11 - l_x2 = 2. * (x[1] - self.target[1]) * Q_22 - l_x3 = 2. * (x[2] -self.target[2]) * Q_33 - l_x = np.array([l_x1, l_x2, l_x3]) - - # about l_xx - l_xx = 2. * np.diag([Q_11, Q_22, Q_33]) - - # Final cost only requires these three values - return l, l_x, l_xx - - def finite_differences(self, x, u): - """ calculate gradient of plant dynamics using finite differences - - x np.array: the state of the system - u np.array: the control signal - """ - - A = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) - A_ideal = np.zeros((self.STATE_SIZE, self.STATE_SIZE)) - - B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE)) - B_ideal = np.zeros((self.STATE_SIZE, self.INPUT_SIZE)) - - - eps = 1e-4 # finite differences epsilon - - for ii in range(self.STATE_SIZE): - # calculate partial differential w.r.t. x - inc_x = x.copy() - inc_x[ii] += eps - state_inc,_ = self.plant_dynamics(inc_x, u.copy()) - dec_x = x.copy() - dec_x[ii] -= eps - state_dec,_ = self.plant_dynamics(dec_x, u.copy()) - A[:, ii] = (state_inc - state_dec) / (2 * eps) - - - A_ideal[0, 2] = -np.sin(x[2]) * u[1] - A_ideal[1, 2] = np.cos(x[2]) * u[1] - - # print("A = \n{}".format(A)) - # print("ideal A = \n{}".format(A_ideal)) - - - for ii in range(self.INPUT_SIZE): - # calculate partial differential w.r.t. u - inc_u = u.copy() - inc_u[ii] += eps - state_inc,_ = self.plant_dynamics(x.copy(), inc_u) - dec_u = u.copy() - dec_u[ii] -= eps - state_dec,_ = self.plant_dynamics(x.copy(), dec_u) - B[:, ii] = (state_inc - state_dec) / (2 * eps) - - B_ideal[0, 0] = np.cos(x[2]) - B_ideal[1, 0] = np.sin(x[2]) - B_ideal[2, 1] = 1. - - # print("B = \n{}".format(B)) - # print("ideal B = \n{}".format(B_ideal)) - - # input() - - return A_ideal, B_ideal - - def ilqr(self, x0, U=None): - """ use iterative linear quadratic regulation to find a control - sequence that minimizes the cost function - - x0 np.array: the initial state of the system - U np.array: the initial control trajectory dimensions = [dof, time] - """ - - U = self.U if U is None else U - - lamb = 1.0 # regularization parameter これが正規化項の1つ - sim_new_trajectory = True - tN = U.shape[0] # number of time steps - - for ii in range(self.max_iter): - - if sim_new_trajectory == True: - # simulate forward using the current control trajectory - """ - NOTE: 単純な計算でpredictする - """ - X, cost = self.simulate(x0, U) - oldcost = np.copy(cost) # copy for exit condition check - - # now we linearly approximate the dynamics, and quadratically - # approximate the cost function so we can use LQR methods - - # for storing linearized dynamics - # x(t+1) = f(x(t), u(t)) - """ - NOTE: Gradiantの取得 - """ - f_x = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # df / dx - f_u = np.zeros((tN, self.STATE_SIZE, self.INPUT_SIZE)) # df / du - # for storing quadratized cost function - - l = np.zeros((tN,1)) # immediate state cost - l_x = np.zeros((tN, self.STATE_SIZE)) # dl / dx - l_xx = np.zeros((tN, self.STATE_SIZE, self.STATE_SIZE)) # d^2 l / dx^2 - l_u = np.zeros((tN, self.INPUT_SIZE)) # dl / du - l_uu = np.zeros((tN, self.INPUT_SIZE, self.INPUT_SIZE)) # d^2 l / du^2 - l_ux = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # d^2 l / du / dx - # for everything except final state - for t in range(tN-1): - # x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt - # linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t)) - # f_x = np.eye + A(t) - # f_u = B(t) - A, B = self.finite_differences(X[t], U[t]) - f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt - f_u[t] = B * self.dt - - (l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t]) - l[t] *= self.dt - l_x[t] *= self.dt - l_xx[t] *= self.dt - l_u[t] *= self.dt - l_uu[t] *= self.dt - l_ux[t] *= self.dt - - # and for final state - l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1]) - - sim_new_trajectory = False - - # optimize things! - # initialize Vs with final state cost and set up k, K - V = l[-1].copy() # value function - V_x = l_x[-1].copy() # dV / dx - V_xx = l_xx[-1].copy() # d^2 V / dx^2 - k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification - K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain - - # NOTE: they use V' to denote the value at the next timestep, - # they have this redundant in their notation making it a - # function of f(x + dx, u + du) and using the ', but it makes for - # convenient shorthand when you drop function dependencies - - # work backwards to solve for V, Q, k, and K - for t in range(self.tN-2, -1, -1): - - # NOTE: we're working backwards, so V_x = V_x[t+1] = V'_x - - # 4a) Q_x = l_x + np.dot(f_x^T, V'_x) - Q_x = l_x[t] + np.dot(f_x[t].T, V_x) - # 4b) Q_u = l_u + np.dot(f_u^T, V'_x) - Q_u = l_u[t] + np.dot(f_u[t].T, V_x) - - # NOTE: last term for Q_xx, Q_uu, and Q_ux is vector / tensor product - # but also note f_xx = f_uu = f_ux = 0 so they're all 0 anyways. - - # 4c) Q_xx = l_xx + np.dot(f_x^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_xx) - Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t])) - # 4d) Q_ux = l_ux + np.dot(f_u^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_ux) - Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t])) - # 4e) Q_uu = l_uu + np.dot(f_u^T, np.dot(V'_xx, f_u)) + np.einsum(V'_x, f_uu) - Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t])) - - # Calculate Q_uu^-1 with regularization term set by - # Levenberg-Marquardt heuristic (at end of this loop) - Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu) - Q_uu_evals[Q_uu_evals < 0] = 0.0 - Q_uu_evals += lamb - Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T)) - - # 5b) k = -np.dot(Q_uu^-1, Q_u) - k[t] = -np.dot(Q_uu_inv, Q_u) - # 5b) K = -np.dot(Q_uu^-1, Q_ux) - K[t] = -np.dot(Q_uu_inv, Q_ux) - - # 6a) DV = -.5 np.dot(k^T, np.dot(Q_uu, k)) - # 6b) V_x = Q_x - np.dot(K^T, np.dot(Q_uu, k)) - V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t])) - # 6c) V_xx = Q_xx - np.dot(-K^T, np.dot(Q_uu, K)) - V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t])) - - U_new = np.zeros((tN, self.INPUT_SIZE)) - # calculate the optimal change to the control trajectory - x_new = x0.copy() # 7a) - for t in range(tN - 1): - # use feedforward (k) and feedback (K) gain matrices - # calculated from our value function approximation - # to take a stab at the optimal control signal - U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t]) # 7b) - # given this u, find our next state - _,x_new = self.plant_dynamics(x_new, U_new[t]) # 7c) - - # evaluate the new trajectory - X_new, cost_new = self.simulate(x0, U_new) - - # Levenberg-Marquardt heuristic - if cost_new < cost: - # decrease lambda (get closer to Newton's method) - lamb /= self.lamb_factor - - X = np.copy(X_new) # update trajectory - U = np.copy(U_new) # update control signal - oldcost = np.copy(cost) - cost = np.copy(cost_new) - - sim_new_trajectory = True # do another rollout - - # print("iteration = %d; Cost = %.4f;"%(ii, costnew) + - # " logLambda = %.1f"%np.log(lamb)) - # check to see if update is small enough to exit - if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge): - print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) + - " logLambda = %.1f"%np.log(lamb)) - break - - else: - # increase lambda (get closer to gradient descent) - lamb *= self.lamb_factor - # print("cost: %.4f, increasing lambda to %.4f")%(cost, lamb) - if lamb > self.lamb_max: - print("lambda > max_lambda at iteration = %d;"%ii + - " Cost = %.4f; logLambda = %.1f"%(cost, - np.log(lamb))) - break - - return X, U, cost - - def plant_dynamics(self, x, u): - """ simulate a single time step of the plant, from - initial state x and applying control signal u - - x np.array: the state of the system - u np.array: the control signal - """ - - # set the arm position to x - self.simulator.initialize_state(x) - - # apply the control signal - x_next = self.simulator.update_state(u, self.dt) - - # calculate the change in state - xdot = ((x_next - x) / self.dt).squeeze() - - return xdot, x_next - - def reset(self, target): - """ reset the state of the system """ - - # Index along current control sequence - self.t = 0 - self.U = np.zeros((self.tN, self.INPUT_SIZE)) - self.target = target.copy() - - def simulate(self, x0, U): - """ do a rollout of the system, starting at x0 and - applying the control sequence U - - x0 np.array: the initial state of the system - U np.array: the control sequence to apply - """ - tN = U.shape[0] - X = np.zeros((tN, self.STATE_SIZE)) - X[0] = x0 - cost = 0 - - # Run simulation with substeps - for t in range(tN-1): - _,X[t+1] = self.plant_dynamics(X[t], U[t]) - l, _ , _, _ , _ , _ = self.cost(X[t], U[t]) - cost = cost + self.dt * l - - # Adjust for final cost, subsample trajectory - l_f, _, _ = self.cost_final(X[-1]) - cost = cost + l_f - - return X, cost diff --git a/iLQR/main.py b/iLQR/main.py deleted file mode 100644 index 65a0f96..0000000 --- a/iLQR/main.py +++ /dev/null @@ -1,58 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt - -from model import TwoWheeledCar -from ilqr import iLQRController -from animation import AnimDrawer - -def main(): - """ - """ - # iteration parameters - NUM_ITERARIONS = 1000 - dt = 0.01 - - # make plant - init_x = np.array([0., 0., 0.]) - car = TwoWheeledCar(init_x) - - # make goal - target = np.array([5., 3., 0.]) - - # controller - controller = iLQRController() - - - for iteration in range(NUM_ITERARIONS): - print("iteration num = {} / {}".format(iteration, NUM_ITERARIONS)) - - if iteration == 0: - changed = True - - u = controller.calc_input(car, target, changed=changed) - - car.update_state(u, dt) - - # figures and animation - history_states = np.array(car.history_xs) - - time_fig = plt.figure() - - x_fig = time_fig.add_subplot(311) - y_fig = time_fig.add_subplot(312) - th_fig = time_fig.add_subplot(313) - - time = len(history_states) - x_fig.plot(np.arange(time), history_states[:, 0]) - y_fig.plot(np.arange(time), history_states[:, 1]) - th_fig.plot(np.arange(time), history_states[:, 2]) - - plt.show() - - history_states = np.array(car.history_xs) - - animdrawer = AnimDrawer([history_states, target]) - animdrawer.draw_anim() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/iLQR/model.py b/iLQR/model.py deleted file mode 100644 index a09fd40..0000000 --- a/iLQR/model.py +++ /dev/null @@ -1,177 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -import math -import copy - - -""" -このWheeled modelはコントローラー用 -ホントはbase作って、継承すべきですが省略 -""" -class TwoWheeledCar(): - """SampleSystem, this is the simulator - Attributes - ----------- - xs : numpy.ndarray - system states, [x, y, theta] - history_xs : list - time history of state - """ - def __init__(self, init_states=None): - """ - Palameters - ----------- - init_state : float, optional, shape(3, ) - initial state of system default is None - """ - self.STATE_SIZE = 3 - self.INPUT_SIZE = 2 - - self.xs = np.zeros(3) - - if init_states is not None: - self.xs = copy.deepcopy(init_states) - - self.history_xs = [init_states] - self.history_predict_xs = [] - - def update_state(self, us, dt): - """ - Palameters - ------------ - us : numpy.ndarray - inputs of system in some cases this means the reference - dt : float in seconds, optional - sampling time of simulation, default is 0.01 [s] - """ - # for theta 1, theta 1 dot, theta 2, theta 2 dot - k0 = [0.0 for _ in range(3)] - k1 = [0.0 for _ in range(3)] - k2 = [0.0 for _ in range(3)] - k3 = [0.0 for _ in range(3)] - - functions = [self._func_x_1, self._func_x_2, self._func_x_3] - - # solve Runge-Kutta - for i, func in enumerate(functions): - k0[i] = dt * func(self.xs[0], self.xs[1], self.xs[2], us[0], us[1]) - - for i, func in enumerate(functions): - k1[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1]) - - for i, func in enumerate(functions): - k2[i] = dt * func(self.xs[0] + k0[0]/2., self.xs[1] + k0[1]/2., self.xs[2] + k0[2]/2., us[0], us[1]) - - for i, func in enumerate(functions): - k3[i] = dt * func(self.xs[0] + k2[0], self.xs[1] + k2[1], self.xs[2] + k2[2], us[0], us[1]) - - self.xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6. - self.xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6. - self.xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6. - - # save - save_states = copy.deepcopy(self.xs) - self.history_xs.append(save_states) - - return self.xs.copy() - - def predict_state(self, init_xs, us, dt=0.01): - """make predict state by using optimal input made by MPC - Paramaters - ----------- - us : array-like, shape(2, N) - optimal input made by MPC - dt : float in seconds, optional - sampling time of simulation, default is 0.01 [s] - """ - ## test - # assert us.shape[0] == 2 and us.shape[1] == 15, "wrong shape" - - xs = copy.deepcopy(init_xs) - predict_xs = [copy.deepcopy(xs)] - - for i in range(us.shape[1]): - k0 = [0.0 for _ in range(self.NUM_STATE)] - k1 = [0.0 for _ in range(self.NUM_STATE)] - k2 = [0.0 for _ in range(self.NUM_STATE)] - k3 = [0.0 for _ in range(self.NUM_STATE)] - - functions = [self._func_x_1, self._func_x_2, self._func_x_3] - - # solve Runge-Kutta - for i, func in enumerate(functions): - k0[i] = dt * func(xs[0], xs[1], xs[2], us[0, i], us[1, i]) - - for i, func in enumerate(functions): - k1[i] = dt * func(xs[0] + k0[0]/2., xs[1] + k0[1]/2., xs[2] + k0[2]/2., us[0, i], us[1, i]) - - for i, func in enumerate(functions): - k2[i] = dt * func(xs[0] + k1[0]/2., xs[1] + k1[1]/2., xs[2] + k1[2]/2., us[0, i], us[1, i]) - - for i, func in enumerate(functions): - k3[i] = dt * func(xs[0] + k2[0], xs[1] + k2[1], xs[2] + k2[2], us[0, i], us[1, i]) - - xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6. - xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6. - xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6. - - predict_xs.append(copy.deepcopy(xs)) - - self.history_predict_xs.append(np.array(predict_xs)) - - return np.array(predict_xs) - - def initialize_state(self, init_xs): - """ - initialize the state - - Parameters - ------------ - init_xs : numpy.ndarray - """ - self.xs = init_xs.flatten() - - def _func_x_1(self, y_1, y_2, y_3, u_1, u_2): - """ - Parameters - ------------ - y_1 : float - y_2 : float - y_3 : float - u_1 : float - system input - u_2 : float - system input - """ - y_dot = math.cos(y_3) * u_1 - return y_dot - - def _func_x_2(self, y_1, y_2, y_3, u_1, u_2): - """ - Parameters - ------------ - y_1 : float - y_2 : float - y_3 : float - u_1 : float - system input - u_2 : float - system input - """ - y_dot = math.sin(y_3) * u_1 - return y_dot - - def _func_x_3(self, y_1, y_2, y_3, u_1, u_2): - """ - Parameters - ------------ - y_1 : float - y_2 : float - y_3 : float - u_1 : float - system input - u_2 : float - system input - """ - y_dot = u_2 - return y_dot \ No newline at end of file