Update: fix ilqr and ddp, models

2020-04-05 17:52:02 +09:00 · 2020-04-05 17:52:02 +09:00 · a36a8bc9c1
parent bdb8225145
commit a36a8bc9c1
6 changed files with 22 additions and 14 deletions
--- a/PythonLinearNonlinearControl/configs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/configs/two_wheeled.py
@ -10,9 +10,9 @@ class TwoWheeledConfigModule():
    INPUT_SIZE = 2
    DT = 0.01
    # cost parameters
-    R = np.eye(INPUT_SIZE) * 0.1
-    Q = np.eye(STATE_SIZE) * 0.5
-    Sf = np.eye(STATE_SIZE)
+    R = np.diag([0.1, 0.1])
+    Q = np.diag([1., 1., 0.01])
+    Sf = np.diag([5., 5., 1.])
    # bounds
    INPUT_LOWER_BOUND = np.array([-1.5, 3.14])
    INPUT_UPPER_BOUND = np.array([1.5, 3.14])
@ -41,7 +41,7 @@ class TwoWheeledConfigModule():
            },
           "iLQR":{
                "max_iter": 500,
-                "mu": 1.,
+                "init_mu": 1.,
                "mu_min": 1e-6,
                "mu_max": 1e10,
                "init_delta": 2.,
@ -49,7 +49,7 @@ class TwoWheeledConfigModule():
           },
           "DDP":{
                "max_iter": 500,
-                "mu": 1.,
+                "init_mu": 1.,
                "mu_min": 1e-6,
                "mu_max": 1e10,
                "init_delta": 2.,
--- a/PythonLinearNonlinearControl/controllers/ddp.py
+++ b/PythonLinearNonlinearControl/controllers/ddp.py
@ -12,9 +12,11 @@ class DDP(Controller):
    """ Differential Dynamic Programming

    Ref:
-        Tassa, Y., Erez, T., & Todorov, E. (2012). . In 2012 IEEE/RSJ International Conference on
+        Tassa, Y., Erez, T., & Todorov, E. (2012). 
+        In 2012 IEEE/RSJ International Conference on
        Intelligent Robots and Systems (pp. 4906-4913). and Study Wolf,
-        https://github.com/studywolf/control
+        https://github.com/studywolf/control, and
+        https://github.com/anassinator/ilqr
    """
    def __init__(self, config, model):
        """
@ -41,7 +43,8 @@ class DDP(Controller):

        # controller parameters
        self.max_iter = config.opt_config["DDP"]["max_iter"]
-        self.mu = config.opt_config["DDP"]["mu"]
+        self.init_mu = config.opt_config["DDP"]["init_mu"]
+        self.mu = self.init_mu
        self.mu_min = config.opt_config["DDP"]["mu_min"]
        self.mu_max = config.opt_config["DDP"]["mu_max"]
        self.init_delta = config.opt_config["DDP"]["init_delta"]
@ -81,6 +84,8 @@ class DDP(Controller):
        sol = self.prev_sol.copy()
        converged_sol = False
        update_sol = True
+        self.mu = self.init_mu
+        self.delta = self.init_delta

        # line search param
        alphas = 1.1**(-np.arange(10)**2)
--- a/PythonLinearNonlinearControl/controllers/ilqr.py
+++ b/PythonLinearNonlinearControl/controllers/ilqr.py
@ -41,7 +41,8 @@ class iLQR(Controller):

        # controller parameters
        self.max_iter = config.opt_config["iLQR"]["max_iter"]
-        self.mu = config.opt_config["iLQR"]["mu"]
+        self.init_mu = config.opt_config["iLQR"]["init_mu"]
+        self.mu = self.init_mu
        self.mu_min = config.opt_config["iLQR"]["mu_min"]
        self.mu_max = config.opt_config["iLQR"]["mu_max"]
        self.init_delta = config.opt_config["iLQR"]["init_delta"]
@ -81,6 +82,8 @@ class iLQR(Controller):
        sol = self.prev_sol.copy()
        converged_sol = False
        update_sol = True
+        self.mu = self.init_mu
+        self.delta = self.init_delta

        # line search param
        alphas = 1.1**(-np.arange(10)**2)
--- a/PythonLinearNonlinearControl/controllers/make_controllers.py
+++ b/PythonLinearNonlinearControl/controllers/make_controllers.py
@ -18,4 +18,4 @@ def make_controller(args, config, model):
    elif args.controller_type == "iLQR":
        return iLQR(config, model)
    elif args.controller_type == "DDP":
-        return iLQR(config, model)
+        return DDP(config, model)
--- a/PythonLinearNonlinearControl/envs/two_wheeled.py
+++ b/PythonLinearNonlinearControl/envs/two_wheeled.py
@ -56,7 +56,7 @@ class TwoWheeledConstEnv(Env):
            self.curr_x = init_x

        # goal
-        self.g_x = np.array([5., 5., 0.])
+        self.g_x = np.array([2.5, 2.5, 0.])
        
        # clear memory
        self.history_x = []
--- a/PythonLinearNonlinearControl/models/two_wheeled.py
+++ b/PythonLinearNonlinearControl/models/two_wheeled.py
@ -121,7 +121,7 @@ class TwoWheeledModel(Model):
        f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
        f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]

-        return f_xx
+        return f_xx * dt

    @staticmethod
    def calc_f_ux(xs, us, dt):
@ -144,7 +144,7 @@ class TwoWheeledModel(Model):
        f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
        f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])

-        return f_ux
+        return f_ux * dt
    
    @staticmethod
    def calc_f_uu(xs, us, dt):
@ -164,4 +164,4 @@ class TwoWheeledModel(Model):

        f_uu = np.zeros((pred_len, state_size, input_size, input_size))

-        return f_uu
+        return f_uu * dt