add cost

2019-05-25 17:06:35 +09:00 · 2019-05-25 17:06:35 +09:00 · 73bb2b6240
parent 17250480d7
commit 73bb2b6240
7 changed files with 25 additions and 8 deletions
--- a/ilqr/README.md
+++ b/ilqr/README.md
--- a/ilqr/animation.py
+++ b/ilqr/animation.py
--- a/ilqr/goal_maker.py
+++ b/ilqr/goal_maker.py
--- a/ilqr/ilqr.py
+++ b/ilqr/ilqr.py
@ -164,9 +164,9 @@ class iLQRController():
        l_xx : numpy.ndarray, shape(STATE_SIZE, STATE_SIZE) 
            second order differential of stage cost by x
        """
-        Q_11 = 1. # terminal x cost weight
+        Q_11 = 10. # terminal x cost weight
-        Q_22 = 1. # terminal y cost weight
+        Q_22 = 10. # terminal y cost weight
-        Q_33 = 0.01 # terminal theta cost weight
+        Q_33 = 0.1 # terminal theta cost weight
        error = self.simulator.xs - self.target
@ -290,19 +290,36 @@ class iLQRController():
                for t in range(tN-1):
                    # x(t+1) = f(x(t), u(t)) = x(t) + dx(t) * dt
                    # linearized dx(t) = np.dot(A(t), x(t)) + np.dot(B(t), u(t))
-                    # f_x = np.eye + A(t)
+                    # f_x = (np.eye + A(t)) * dt
-                    # f_u = B(t)
+                    # f_u = (B(t)) * dt
                    # continuous --> discrete
                    A, B = self.finite_differences(X[t], U[t])
                    f_x[t] = np.eye(self.STATE_SIZE) + A * self.dt
                    f_u[t] = B * self.dt
                    """ NOTE: why multiply dt in original program ??  
                    So the dt multiplication and + I is because we’re actually taking the derivative of the state with respect to the previous state. Which is not
                    dx = Ax + Bu,
                    but rather
                    x(t) = x(t-1) + (Ax(t-1) + Bu(t-1))*dt
                    And that’s where the identity matrix and dt come from!
                    So that part’s in the comments of the code, but the *dt on all the cost function stuff is not commented on at all!
                    So here the dt lets you normalize behaviour for different time steps (assuming you also scale the number of steps in the sequence).
                    So if you have a time step of .01 or .001 you’re not racking up 10 times as much cost function in the latter case.
                    And if you run the code with 50 steps in the sequence and dt=.01 and 500 steps in the sequence
                    and dt=.001 you’ll see that you get the same results, which is not the case at all when you don’t take dt into account in the cost function!
                    """
                    (l[t], l_x[t], l_xx[t], l_u[t], l_uu[t], l_ux[t]) = self.cost(X[t], U[t])
                    """ # we consider this part in cost function.
                    l[t] *= self.dt
                    l_x[t] *= self.dt
                    l_xx[t] *= self.dt
                    l_u[t] *= self.dt
                    l_uu[t] *= self.dt
                    l_ux[t] *= self.dt
                    """
                # and for final state
                l[-1], l_x[-1], l_xx[-1] = self.cost_final(X[-1])
@ -329,8 +346,8 @@ class iLQRController():
                Q_uu_evals += lamb
                Q_uu_inv = np.dot(Q_uu_evecs, np.dot(np.diag(1.0/Q_uu_evals), Q_uu_evecs.T))
-                k[t] = -np.dot(Q_uu_inv, Q_u)
+                k[t] = -1. * np.dot(Q_uu_inv, Q_u)
-                K[t] = -np.dot(Q_uu_inv, Q_ux)
+                K[t] = -1. * np.dot(Q_uu_inv, Q_ux)
                V_x = Q_x - np.dot(K[t].T, np.dot(Q_uu, k[t]))
                V_xx = Q_xx - np.dot(K[t].T, np.dot(Q_uu, K[t]))
--- a/ilqr/main_dynamic.py
+++ b/ilqr/main_dynamic.py
--- a/ilqr/main_static.py
+++ b/ilqr/main_static.py
--- a/ilqr/model.py
+++ b/ilqr/model.py