add goal maker and dynamic main type

2019-04-11 19:03:18 +09:00 · 2019-04-11 19:03:18 +09:00 · 44da5d3515
parent ce1f9f7973
commit 44da5d3515
6 changed files with 286 additions and 159 deletions
--- a/iLQR/README.md
+++ b/iLQR/README.md
@ -11,23 +11,33 @@ To solve the problem, we should apply the control methods which can treat the no

 <a href="https://www.codecogs.com/eqnedit.php?latex=\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\frac{d}{dt}&space;\boldsymbol{X}=&space;\frac{d}{dt}&space;\begin{bmatrix}&space;x&space;\\&space;y&space;\\&space;\theta&space;\end{bmatrix}&space;=&space;\begin{bmatrix}&space;\cos(\theta)&space;&&space;0&space;\\&space;\sin(\theta)&space;&&space;0&space;\\&space;0&space;&&space;1&space;\\&space;\end{bmatrix}&space;\begin{bmatrix}&space;u_v&space;\\&space;u_\omega&space;\\&space;\end{bmatrix}&space;=&space;\boldsymbol{B}\boldsymbol{U}" title="\frac{d}{dt} \boldsymbol{X}= \frac{d}{dt} \begin{bmatrix} x \\ y \\ \theta \end{bmatrix} = \begin{bmatrix} \cos(\theta) & 0 \\ \sin(\theta) & 0 \\ 0 & 1 \\ \end{bmatrix} \begin{bmatrix} u_v \\ u_\omega \\ \end{bmatrix} = \boldsymbol{B}\boldsymbol{U}" /></a>

-Nonliner Model Predictive Control is one of the famous methods, so I applied the method in the folder of this repository.
-(if you are interested, please look it)
+Nonliner Model Predictive Control is one of the famous methods, so I applied the method to two-wheeled robot which is included in the folder of this repository.
+(if you are interested, please go to nmpc/ folder of this repository)

 NMPC is very effecitive method to solve nonlinear optimal control problem but it is a handcraft method.
 This program is about one more other methods to solve the nonlinear optimal control problem.

 The method is iterative LQR.
-Iterative LQR is one of the DDP(differential dynamic programming) method.
-Recently, this method is used in IRL(inverse reinforcement learning), such as GPS(guided policy search)
+Iterative LQR is one of the DDP(differential dynamic programming) methods.
+Recently, this method is used in model-based RL(reinforcement learning).
+Although, this method cannot guarantee to obtain the global optimal answer, we could apply any model such as nonliner model or time-varing model even the model that expressed by NN.
+(Still we can only get approximate optimal anwser)

 If you want to know more about the iLQR, please look the references.
-The paper and website is great.
+The paper and website are great.

 # Usage

+## static goal
+
+```
+$ python3 main_static.py
 ```

+## dynamic goal
+
+```
+$ python3 main_dynamic.py
 ```

 # Expected Results
@ -35,10 +45,11 @@ The paper and website is great.
 - static goal


+
+
 - track the goal


-# Applied other model



--- a/iLQR/goal_maker.py
+++ b/iLQR/goal_maker.py
@ -0,0 +1,70 @@
+import numpy as np
+import math
+import matplotlib.pyplot as plt
+
+
+def make_trajectory(goal_type, N):
+    """
+    Parameters
+    -------------
+    goal_type : str
+        goal type
+    N : int
+        length of reference trajectory
+    Returns
+    -----------
+    ref_trajectory : numpy.ndarray, shape(N, STATE_SIZE)
+
+    Notes
+    ---------
+    this function can only deal with the 
+    """
+    
+    if goal_type == "const":
+        ref_trajectory = np.array([[5., 3., 0.]])
+
+        return ref_trajectory
+    
+    if goal_type == "sin":
+
+
+
+
+
+
+class GoalMaker():
+    """
+    Attributes
+    -----------
+
+    """
+
+    def __init_(self, goal_type="const", N=500, dt=0.01):
+        """
+        """
+        self.N = N
+        self.goal_type = goal_type
+        self.dt = dt
+
+        self.ref_traj = None
+        
+    def preprocess(self):
+        """preprocess of make goal
+
+        """
+        goal_type_list = ["const", "sin"]
+
+        if self.goal_type not in goal_type_list:
+            raise ValueError("{0} is not in implemented goal type. please implement that!!".format(self.goal_type))
+
+        self.ref_traj = make_trajectory(self.goal_type)
+
+    def calc_goal(self, x):
+        """
+        """
+
+
+
+        return goal
+
+
--- a/iLQR/ilqr.py
+++ b/iLQR/ilqr.py
@ -10,17 +10,36 @@ class iLQRController():

    Attributes:
    ------------
-
+    tN : int
+        number of time step
+    STATE_SIZE : int
+        system state size
+    INPUT_SIZE : int
+        system input size
+    dt : float
+        sampling time
+    max_iter : int
+        number of max iteration
+    lamb_factor : int
+        lambda factor is that the adding value to make the matrix of Q _uu be positive
+    lamb_max : float
+        maximum lambda value to make the matrix of Q _uu be positive
+    eps_converge : float
+        threshold value of the iteration
    """

-    def __init__(self, N=100, max_iter=400, dt=0.016): 
-        '''
-        n int: length of the control sequence
-        max_iter int: limit on number of optimization iterations
-        '''
-        self.old_target = [None, None]
-
-        self.tN = N # number of timesteps
+    def __init__(self, N=100, max_iter=400, dt=0.01): 
+        """
+        Parameters
+        ----------
+        N : int, optional
+            number of time step, default is 100
+        max_iter : int, optional
+            number of max iteration, default is 400
+        dt : float, optional
+            sampling time, default is 0.01
+        """
+        self.tN = N
        self.STATE_SIZE = 3
        self.INPUT_SIZE = 2
        self.dt = dt
@ -28,52 +47,55 @@ class iLQRController():
        self.max_iter = max_iter
        self.lamb_factor = 10
        self.lamb_max = 1e4
-        self.eps_converge = 0.001 # exit if relative improvement below threshold
+        self.eps_converge = 0.001

-    def calc_input(self, car, x_target, changed=False):
-        """Generates a control signal to move the 
-        arm to the specified target.
+    def calc_input(self, car, x_target):
+        """main loop of iterative LQR
        
-        car : the arm model being controlled NOTE:これが実際にコントロールされるやつ
-        des list : the desired system position
-        x_des np.array: desired task-space force, 
-                        irrelevant here.
+        Parameters
+        -------------
+        car : model class
+            should have initialize state and update state
+        x_target : numpy.ndarray, shape(STATE_SIZE, )
+            target state
+
+        Returns
+        -----------
+        u : numpy.ndarray, shape(INPUT_SIZE, )
+
+        See also
+        ----------
+        model.py
        """

-        # if the target has changed, reset things and re-optimize 
-        # for this movement、目標が変わっている場合があるので確認
-        if changed:
-            self.reset(x_target)
-
-        # Reset k if at the end of the sequence
-        if self.t >= self.tN - 1: # 最初のSTEPのみ計算
-            self.t = 0
+        # initialize
+        self.reset(x_target)

        # Compute the optimization
-        """
-        NOTE : ここに条件を追加してもいいかもしれない、何サイクルも回す必要ないし、理想軌道とずれたらとか
-        """
-        if self.t % 1 == 0:
-            x0 = np.zeros(self.STATE_SIZE) # 初期化、速度は0
+        x0 = np.zeros(self.STATE_SIZE)
+        self.simulator, x0 = self.initialize_simulator(car)
+        U = np.copy(self.U)
+        self.X, self.U, cost = self.ilqr(x0, U)

-            self.simulator, x0 = self.initialize_simulator(car) # 前の時刻のものを確保
-            
-            U = np.copy(self.U[self.t:]) # 初期入力かなこれ
-            
-            self.X, self.U[self.t:], cost = self.ilqr(x0, U) # 入力列が入ってくる
-
-        self.u = self.U[self.t] 
-
-        # move us a step forward in our control sequence
-        self.t += 1
+        self.u = self.U[self.t] # use only one time step (like MPC)

        return self.u

    def initialize_simulator(self, car):
-        """ make a copy of the car model, to make sure that the
-        actual car model isn't affected during the iLQR process
+        """ make copy for controller
+        Parameters
+        -------------
+        car : model class
+            should have initialize state and update state
+
+        Returns
+        ----------
+        simulator : model class
+            should have initialize state and update state
+        x0 : numpy.ndarray, shape(STATE_SIZE)
+            initial state of the simulator
        """ 
-        # need to make a copy the real car
+        # copy
        simulator = TwoWheeledCar(deepcopy(car.xs))

        return simulator, deepcopy(simulator.xs)
@ -84,16 +106,29 @@ class iLQRController():
        Parameters
        ------------
        xs : shape(STATE_SIZE, tN + 1)
+            predict state of the system
        us : shape(STATE_SIZE, tN)
+            predict input of the system
+
+        Returns
+        ----------
+        l : float 
+            stage cost
+        l_x : numpy.ndarray, shape(STATE_SIZE, )
+            differential of stage cost by x
+        l_xx : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE) 
+            second order differential of stage cost by x
+        l_u : numpy.ndarray, shape(INPUT_SIZE, )
+            differential of stage cost by u
+        l_uu : numpy.ndarray, shape(INPUT_SIZE, INPUT_SIZE)
+            second order differential of stage cost by uu
+        l_ux numpy.ndarray, shape(INPUT_SIZE, STATE_SIZE)
+            second order differential of stage cost by ux
        """

-        """
-        NOTE : 拡張する説ありますがとりあえず飛ばします
-        """
        # total cost
-        # quadratic のもののみ計算
-        R_11 = 0.01 # terminal u thorottle cost weight
-        R_22 = 0.01 # terminal u steering cost weight
+        R_11 = 0.01 # terminal u_v cost weight
+        R_22 = 0.01 # terminal u_th cost weight

        l = np.dot(us.T, np.dot(np.diag([R_11, R_22]), us))

@ -110,7 +145,6 @@ class iLQRController():
        
        l_ux = np.zeros((self.INPUT_SIZE, self.STATE_SIZE))

-        # returned in an array for easy multiplication by time step 
        return l, l_x, l_xx, l_u, l_uu, l_ux

    def cost_final(self, x):
@ -118,12 +152,17 @@ class iLQRController():
        
        Parameters
        -------------
-        xs : numpy.ndarray, shape(STATE_SIZE,)
+        x : numpy.ndarray, shape(STATE_SIZE,)
+            predict state of the system

-        Notes : 
+        Returns
        ---------
-        l_x = np.zeros((self.STATE_SIZE))
-        l_xx = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
+        l : float
+            terminal cost
+        l_x : numpy.ndarray, shape(STATE_SIZE, )
+            differential of stage cost by x
+        l_xx : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE) 
+            second order differential of stage cost by x
        """
        Q_11 = 1. # terminal x cost weight
        Q_22 = 1. # terminal y cost weight
@ -161,6 +200,12 @@ class iLQRController():
            differential of the model /alpha X
        B : numpy.ndarray, shape(STATE_SIZE, INPUT_SIZE)
            differential of the model /alpha U
+
+        Notes
+        -------
+        in this case, I pre-calculated the differential of the model because the tow-wheeled model is not difficult to calculate the gradient.
+        If you dont or cannot do that, you can use the numerical differentiation.
+        However, sometimes the the numerical differentiation affect the accuracy of calculations.
        """

        A = np.zeros((self.STATE_SIZE, self.STATE_SIZE))
@ -169,6 +214,8 @@ class iLQRController():
        B = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))
        B_ideal = np.zeros((self.STATE_SIZE, self.INPUT_SIZE))

+        # if you want to use the numerical differentiation, please comment out this code
+        """ 
        eps = 1e-4 # finite differences epsilon

        for ii in range(self.STATE_SIZE):
@ -180,10 +227,13 @@ class iLQRController():
            dec_x[ii] -= eps
            state_dec,_ = self.plant_dynamics(dec_x, u.copy())
            A[:, ii] = (state_inc - state_dec) / (2 * eps)
+        """

        A_ideal[0, 2] = -np.sin(x[2]) * u[0]
        A_ideal[1, 2] = np.cos(x[2]) * u[0]
        
+        # if you want to use the numerical differentiation, please comment out this code
+        """
        for ii in range(self.INPUT_SIZE):
            # calculate partial differential w.r.t. u
            inc_u = u.copy()
@ -193,8 +243,8 @@ class iLQRController():
            dec_u[ii] -= eps
            state_dec,_ = self.plant_dynamics(x.copy(), dec_u)
            B[:, ii] = (state_inc - state_dec) / (2 * eps)
+        """

-        # calc by hand
        B_ideal[0, 0] = np.cos(x[2])
        B_ideal[1, 0] = np.sin(x[2])
        B_ideal[2, 1] = 1.
@ -259,72 +309,43 @@ class iLQRController():

                sim_new_trajectory = False

-            # optimize things! 
-            # initialize Vs with final state cost and set up k, K 
            V = l[-1].copy() # value function
            V_x = l_x[-1].copy() # dV / dx
            V_xx = l_xx[-1].copy() # d^2 V / dx^2
            k = np.zeros((tN, self.INPUT_SIZE)) # feedforward modification
            K = np.zeros((tN, self.INPUT_SIZE, self.STATE_SIZE)) # feedback gain

-            # NOTE: they use V' to denote the value at the next timestep, 
-            # they have this redundant in their notation making it a 
-            # function of f(x + dx, u + du) and using the ', but it makes for 
-            # convenient shorthand when you drop function dependencies
-
            # work backwards to solve for V, Q, k, and K
            for t in range(self.tN-2, -1, -1):
-
-                # NOTE: we're working backwards, so V_x = V_x[t+1] = V'_x
-
-                # 4a) Q_x = l_x + np.dot(f_x^T, V'_x)
                Q_x = l_x[t] + np.dot(f_x[t].T, V_x) 
-                # 4b) Q_u = l_u + np.dot(f_u^T, V'_x)
                Q_u = l_u[t] + np.dot(f_u[t].T, V_x)

-                # NOTE: last term for Q_xx, Q_uu, and Q_ux is vector / tensor product
-                # but also note f_xx = f_uu = f_ux = 0 so they're all 0 anyways.
-                
-                # 4c) Q_xx = l_xx + np.dot(f_x^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_xx)
                Q_xx = l_xx[t] + np.dot(f_x[t].T, np.dot(V_xx, f_x[t])) 
-                # 4d) Q_ux = l_ux + np.dot(f_u^T, np.dot(V'_xx, f_x)) + np.einsum(V'_x, f_ux)
                Q_ux = l_ux[t] + np.dot(f_u[t].T, np.dot(V_xx, f_x[t]))
-                # 4e) Q_uu = l_uu + np.dot(f_u^T, np.dot(V'_xx, f_u)) + np.einsum(V'_x, f_uu)
                Q_uu = l_uu[t] + np.dot(f_u[t].T, np.dot(V_xx, f_u[t]))

-                # Calculate Q_uu^-1 with regularization term set by 
-                # Levenberg-Marquardt heuristic (at end of this loop)
                Q_uu_evals, Q_uu_evecs = np.linalg.eig(Q_uu)
                Q_uu_evals[Q_uu_evals < 0] = 0.0
                Q_uu_evals += lamb
                Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T))

-                # 5b) k = -np.dot(Q_uu^-1, Q_u)
                k[t] = -np.dot(Q_uu_inv, Q_u)
-                # 5b) K = -np.dot(Q_uu^-1, Q_ux)
                K[t] = -np.dot(Q_uu_inv, Q_ux)

-                # 6a) DV = -.5 np.dot(k^T, np.dot(Q_uu, k))
-                # 6b) V_x = Q_x - np.dot(K^T, np.dot(Q_uu, k))
                V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t]))
-                # 6c) V_xx = Q_xx - np.dot(-K^T, np.dot(Q_uu, K))
                V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t]))

            U_new = np.zeros((tN, self.INPUT_SIZE))
-            # calculate the optimal change to the control trajectory
-            x_new = x0.copy() # 7a)
+            x_new = x0.copy()
            for t in range(tN - 1): 
                # use feedforward (k) and feedback (K) gain matrices 
                # calculated from our value function approximation
-                # to take a stab at the optimal control signal
-                U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t]) # 7b)
-                # given this u, find our next state
-                _,x_new = self.plant_dynamics(x_new, U_new[t]) # 7c)
+                U_new[t] = U[t] + k[t] + np.dot(K[t], x_new - X[t])
+                _,x_new = self.plant_dynamics(x_new, U_new[t])

            # evaluate the new trajectory 
            X_new, cost_new = self.simulate(x0, U_new)

-            # Levenberg-Marquardt heuristic
            if cost_new < cost: 
                # decrease lambda (get closer to Newton's method)
                lamb /= self.lamb_factor
@ -336,8 +357,6 @@ class iLQRController():

                sim_new_trajectory = True # do another rollout

-                # print("iteration = %d; Cost = %.4f;"%(ii, costnew) + 
-                #         " logLambda = %.1f"%np.log(lamb))
                # check to see if update is small enough to exit
                if ii > 0 and ((abs(oldcost-cost)/cost) < self.eps_converge):
                    print("Converged at iteration = %d; Cost = %.4f;"%(ii,cost_new) + 
@ -360,11 +379,20 @@ class iLQRController():
        """ simulate a single time step of the plant, from 
        initial state x and applying control signal u

-        x np.array: the state of the system
-        u np.array: the control signal
-        """ 
+        Parameters
+        --------------
+        x : numpy.ndarray, shape(STATE_SIZE, )
+            the state of the system
+        u : numpy.ndarray, shape(INPUT_SIZE, )
+            the control signal
        
-        # set the arm position to x
+        Returns
+        -----------
+        xdot : numpy.ndarray, shape(STATE_SIZE, )
+            the gradient of x
+        x_next : numpy.ndarray, shape(STATE_SIZE, )
+            next state of x
+        """ 
        self.simulator.initialize_state(x)

        # apply the control signal
@ -387,9 +415,21 @@ class iLQRController():
        """ do a rollout of the system, starting at x0 and 
        applying the control sequence U

-        x0 np.array: the initial state of the system
-        U np.array: the control sequence to apply
+        Parameters
+        ----------
+        x0 : numpy.ndarray, shape(STATE_SIZE, )
+            the initial state of the system
+        U : numpy.ndarray, shape(tN, INPUT_SIZE)
+            the control sequence to apply
+        
+        Returns
+        ---------
+        X : numpy.ndarray, shape(tN, STATE_SIZE)
+            the state sequence
+        cost : float
+            cost
        """ 
+        
        tN = U.shape[0]
        X = np.zeros((tN, self.STATE_SIZE))
        X[0] = x0
--- a/iLQR/main_dynamic.py
+++ b/iLQR/main_dynamic.py
@ -0,0 +1,56 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import math
+
+from model import TwoWheeledCar
+from ilqr import iLQRController
+from animation import AnimDrawer
+
+
+def main():
+    """
+    """
+    # iteration parameters
+    NUM_ITERATIONS = 500
+    dt = 0.01
+
+    # make plant
+    init_x = np.array([0., 0., 0.5*math.pi])
+    car = TwoWheeledCar(init_x)
+
+    # make goal
+    goal_maker = 
+
+    # controller
+    controller = iLQRController()
+
+
+    for iteration in range(NUM_ITERATIONS):
+        print("iteration num = {} / {}".format(iteration, NUM_ITERATIONS))
+
+        u = controller.calc_input(car, target)
+        car.update_state(u, dt) # update state
+
+    # figures and animation
+    history_states = np.array(car.history_xs)
+
+    time_fig = plt.figure(figsize=(3, 4))
+
+    x_fig = time_fig.add_subplot(311)
+    y_fig = time_fig.add_subplot(312)
+    th_fig = time_fig.add_subplot(313)
+
+    time = len(history_states)
+    x_fig.plot(np.arange(time), history_states[:, 0])
+    y_fig.plot(np.arange(time), history_states[:, 1])
+    th_fig.plot(np.arange(time), history_states[:, 2])
+
+    plt.show()
+
+    history_states = np.array(car.history_xs)
+
+    animdrawer = AnimDrawer([history_states, target])
+    animdrawer.draw_anim()
+
+if __name__ == "__main__":
+    main()
--- a/iLQR/main_static.py
+++ b/iLQR/main_static.py
@ -10,7 +10,7 @@ def main():
    """
    """
    # iteration parameters
-    NUM_ITERARIONS = 500
+    NUM_ITERATIONS = 500
    dt = 0.01

    # make plant
@ -24,20 +24,16 @@ def main():
    controller = iLQRController()


-    for iteration in range(NUM_ITERARIONS):
-        print("iteration num = {} / {}".format(iteration, NUM_ITERARIONS))
+    for iteration in range(NUM_ITERATIONS):
+        print("iteration num = {} / {}".format(iteration, NUM_ITERATIONS))

-        if iteration == 0:
-            changed = True
-
-        u = controller.calc_input(car, target, changed=changed)
-
-        car.update_state(u, dt)
+        u = controller.calc_input(car, target)
+        car.update_state(u, dt) # update state

    # figures and animation
    history_states = np.array(car.history_xs)

-    time_fig = plt.figure()
+    time_fig = plt.figure(figsize=(3, 4))

    x_fig = time_fig.add_subplot(311)
    y_fig = time_fig.add_subplot(312)
--- a/iLQR/model.py
+++ b/iLQR/model.py
@ -75,52 +75,6 @@ class TwoWheeledCar():

        return self.xs.copy()
    
-    def predict_state(self, init_xs, us, dt=0.01):
-        """make predict state by using optimal input made by MPC
-        Paramaters
-        -----------
-        us : array-like, shape(2, N)
-            optimal input made by MPC
-        dt : float in seconds, optional
-            sampling time of simulation, default is 0.01 [s]
-        """
-        ## test
-        # assert us.shape[0] == 2 and us.shape[1] == 15, "wrong shape"
-
-        xs = copy.deepcopy(init_xs)
-        predict_xs = [copy.deepcopy(xs)]
-
-        for i in range(us.shape[1]):
-            k0 = [0.0 for _ in range(self.NUM_STATE)]
-            k1 = [0.0 for _ in range(self.NUM_STATE)]
-            k2 = [0.0 for _ in range(self.NUM_STATE)]
-            k3 = [0.0 for _ in range(self.NUM_STATE)]
-
-            functions = [self._func_x_1, self._func_x_2, self._func_x_3]
-
-            # solve Runge-Kutta
-            for i, func in enumerate(functions):
-                k0[i] = dt * func(xs[0], xs[1], xs[2], us[0, i], us[1, i])
-
-            for i, func in enumerate(functions):
-                k1[i] = dt * func(xs[0] + k0[0]/2., xs[1] + k0[1]/2., xs[2] + k0[2]/2., us[0, i], us[1, i])
-            
-            for i, func in enumerate(functions):
-                k2[i] = dt * func(xs[0] + k1[0]/2., xs[1] + k1[1]/2., xs[2] + k1[2]/2., us[0, i], us[1, i])
-            
-            for i, func in enumerate(functions):
-                k3[i] =  dt * func(xs[0] + k2[0], xs[1] + k2[1], xs[2] + k2[2], us[0, i], us[1, i])
-            
-            xs[0] += (k0[0] + 2. * k1[0] + 2. * k2[0] + k3[0]) / 6.
-            xs[1] += (k0[1] + 2. * k1[1] + 2. * k2[1] + k3[1]) / 6.
-            xs[2] += (k0[2] + 2. * k1[2] + 2. * k2[2] + k3[2]) / 6.
-
-            predict_xs.append(copy.deepcopy(xs))
-
-        self.history_predict_xs.append(np.array(predict_xs))
-
-        return np.array(predict_xs)
-    
    def initialize_state(self, init_xs):
        """
        initialize the state