diff --git a/Environments.md b/Environments.md index 5d0e89c..412b41c 100644 --- a/Environments.md +++ b/Environments.md @@ -39,9 +39,9 @@ R = diag[0.1, 0.1] X_g denote the goal states. -## [CatpoleEnv (Swing up)]((PythonLinearNonlinearControl/envs/cartpole.py)) +## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py) -System equation. +## System equation. diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py index 64a78db..fff5cf5 100644 --- a/PythonLinearNonlinearControl/configs/cartpole.py +++ b/PythonLinearNonlinearControl/configs/cartpole.py @@ -10,7 +10,11 @@ class CartPoleConfigModule(): INPUT_SIZE = 1 DT = 0.02 # cost parameters - R = np.diag([0.01]) + R = np.diag([1.]) # 0.01 is worked for MPPI and CEM and MPPIWilliams + # 1. is worked for iLQR + Terminal_Weight = 1. + Q = None + Sf = None # bounds INPUT_LOWER_BOUND = np.array([-3.]) INPUT_UPPER_BOUND = np.array([3.]) @@ -128,12 +132,14 @@ class CartPoleConfigModule(): return (6. * (terminal_x[:, 0]**2) \ + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ + 0.1 * (terminal_x[:, 1]**2) \ - + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] + + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \ + * CartPoleConfigModule.Terminal_Weight - return 6. * (terminal_x[0]**2) \ + return (6. * (terminal_x[0]**2) \ + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \ + 0.1 * (terminal_x[1]**2) \ - + 0.1 * (terminal_x[3]**2) + + 0.1 * (terminal_x[3]**2)) \ + * CartPoleConfigModule.Terminal_Weight @staticmethod def gradient_cost_fn_with_state(x, g_x, terminal=False): @@ -148,9 +154,21 @@ class CartPoleConfigModule(): or shape(1, state_size) """ if not terminal: - return None + cost_dx0 = 12. * x[:, 0] + cost_dx1 = 0.2 * x[:, 1] + cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2]) + cost_dx3 = 0.2 * x[:, 3] + cost_dx = np.stack((cost_dx0, cost_dx1,\ + cost_dx2, cost_dx3), axis=1) + return cost_dx - return None + cost_dx0 = 12. * x[0] + cost_dx1 = 0.2 * x[1] + cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2]) + cost_dx3 = 0.2 * x[3] + cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]]) + + return cost_dx * CartPoleConfigModule.Terminal_Weight @staticmethod def gradient_cost_fn_with_input(x, u): @@ -163,7 +181,7 @@ class CartPoleConfigModule(): Returns: l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) """ - return None + return 2. * u * np.diag(CartPoleConfigModule.R) @staticmethod def hessian_cost_fn_with_state(x, g_x, terminal=False): @@ -179,10 +197,30 @@ class CartPoleConfigModule(): shape(1, state_size, state_size) or """ if not terminal: - (pred_len, _) = x.shape - return None + (pred_len, state_size) = x.shape + hessian = np.eye(state_size) + hessian = np.tile(hessian, (pred_len, 1, 1)) + hessian[:, 0, 0] = 12. + hessian[:, 1, 1] = 0.2 + hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \ + * (-np.sin(x[:, 2])) \ + + 24. * (1. + np.cos(x[:, 2])) \ + * -np.cos(x[:, 2]) + hessian[:, 3, 3] = 0.2 + + return hessian - return None + state_size = len(x) + hessian = np.eye(state_size) + hessian[0, 0] = 12. + hessian[1, 1] = 0.2 + hessian[2, 2] = 24. * -np.sin(x[2]) \ + * (-np.sin(x[2])) \ + + 24. * (1. + np.cos(x[2])) \ + * -np.cos(x[2]) + hessian[3, 3] = 0.2 + + return hessian[np.newaxis, :, :] * CartPoleConfigModule.Terminal_Weight @staticmethod def hessian_cost_fn_with_input(x, u): @@ -198,7 +236,7 @@ class CartPoleConfigModule(): """ (pred_len, _) = u.shape - return None + return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1)) @staticmethod def hessian_cost_fn_with_input_state(x, u): diff --git a/PythonLinearNonlinearControl/configs/first_order_lag.py b/PythonLinearNonlinearControl/configs/first_order_lag.py index 1ad59f6..2c9d268 100644 --- a/PythonLinearNonlinearControl/configs/first_order_lag.py +++ b/PythonLinearNonlinearControl/configs/first_order_lag.py @@ -159,11 +159,9 @@ class FirstOrderLagConfigModule(): """ if not terminal: (pred_len, _) = x.shape - return -g_x[:, :, np.newaxis] \ - * np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) + return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1)) - return -g_x[:, np.newaxis] \ - * np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) + return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1)) @staticmethod def hessian_cost_fn_with_input(x, u): diff --git a/PythonLinearNonlinearControl/configs/two_wheeled.py b/PythonLinearNonlinearControl/configs/two_wheeled.py index 27e9834..8b79c87 100644 --- a/PythonLinearNonlinearControl/configs/two_wheeled.py +++ b/PythonLinearNonlinearControl/configs/two_wheeled.py @@ -153,11 +153,9 @@ class TwoWheeledConfigModule(): """ if not terminal: (pred_len, _) = x.shape - return -g_x[:, :, np.newaxis] \ - * np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) + return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1)) - return -g_x[:, np.newaxis] \ - * np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) + return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1)) @staticmethod def hessian_cost_fn_with_input(x, u): diff --git a/PythonLinearNonlinearControl/controllers/ilqr.py b/PythonLinearNonlinearControl/controllers/ilqr.py index a676ade..9a80dfa 100644 --- a/PythonLinearNonlinearControl/controllers/ilqr.py +++ b/PythonLinearNonlinearControl/controllers/ilqr.py @@ -50,11 +50,6 @@ class iLQR(Controller): self.input_size = config.INPUT_SIZE self.dt = config.DT - # cost parameters - self.Q = config.Q - self.R = config.R - self.Sf = config.Sf - # initialize self.prev_sol = np.zeros((self.pred_len, self.input_size)) diff --git a/PythonLinearNonlinearControl/envs/cartpole.py b/PythonLinearNonlinearControl/envs/cartpole.py index de9becb..fb190e6 100644 --- a/PythonLinearNonlinearControl/envs/cartpole.py +++ b/PythonLinearNonlinearControl/envs/cartpole.py @@ -37,7 +37,8 @@ class CartPoleEnv(Env): """ self.step_count = 0 - self.curr_x = np.array([0., 0., 0., 0.]) + theta = np.random.randn(1) + self.curr_x = np.array([0., 0., theta[0], 0.]) if init_x is not None: self.curr_x = init_x diff --git a/PythonLinearNonlinearControl/models/cartpole.py b/PythonLinearNonlinearControl/models/cartpole.py index 42c6616..db8ef61 100644 --- a/PythonLinearNonlinearControl/models/cartpole.py +++ b/PythonLinearNonlinearControl/models/cartpole.py @@ -90,8 +90,41 @@ class CartPoleModel(Model): f_x = np.zeros((pred_len, state_size, state_size)) - f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0] - f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0] + # f_x_dot + f_x[:, 0, 1] = np.ones(pred_len) + + # f_theta + tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \ + * self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2]) + tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) + + f_x[:, 1, 2] = - us[:, 0] * tmp \ + - tmp * (self.mp * np.sin(xs[:, 2]) \ + * (self.l * xs[:, 3]**2 \ + + self.g * np.cos(xs[:, 2]))) \ + + tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \ + * xs[:, 3]**2 \ + + self.mp * self.g * (np.cos(xs[:, 2])**2 \ + - np.sin(xs[:, 2])**2)) + f_x[:, 3, 2] = - 1. / self.l * tmp \ + * (-us[:, 0] * np.cos(xs[:, 2]) \ + - self.mp * self.l * (xs[:, 3]**2) \ + * np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \ + - (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \ + + 1. / self.l * tmp2 \ + * (us[:, 0] * np.sin(xs[:, 2]) \ + - self.mp * self.l * xs[:, 3]**2 \ + * (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \ + - (self.mc + self.mp) \ + * self.g * np.cos(xs[:, 2])) + + # f_theta_dot + f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \ + * self.l * 2 * xs[:, 3]) + f_x[:, 2, 3] = np.ones(pred_len) + f_x[:, 3, 3] = 1. / self.l * tmp2 \ + * (-2. * self.mp * self.l * xs[:, 3] \ + * np.cos(xs[:, 2]) * np.sin(xs[:, 2])) return f_x * dt + np.eye(state_size) # to discrete form @@ -139,10 +172,7 @@ class CartPoleModel(Model): f_xx = np.zeros((pred_len, state_size, state_size, state_size)) - f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0] - f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0] - - return f_xx * dt + raise NotImplementedError def calc_f_ux(self, xs, us, dt): """ hessian of model with respect to state and input in batch form @@ -161,11 +191,8 @@ class CartPoleModel(Model): f_ux = np.zeros((pred_len, state_size, input_size, state_size)) - f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2]) - f_ux[:, 1, 0, 2] = np.cos(xs[:, 2]) + raise NotImplementedError - return f_ux * dt - def calc_f_uu(self, xs, us, dt): """ hessian of model with respect to input in batch form @@ -183,4 +210,4 @@ class CartPoleModel(Model): f_uu = np.zeros((pred_len, state_size, input_size, input_size)) - return f_uu * dt \ No newline at end of file + raise NotImplementedError \ No newline at end of file diff --git a/README.md b/README.md index b177720..7bb14b1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) -[![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master) -[![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl) +[![Coverage Status](https://coveralls.io/repos/github/Shunichi09/PythonLinearNonlinearControl/badge.svg?branch=master&service=github)](https://coveralls.io/github/Shunichi09/PythonLinearNonlinearControl?branch=master&service=github) +[![Build Status](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl.svg?branch=master&service=github)](https://travis-ci.org/Shunichi09/PythonLinearNonlinearControl) # PythonLinearNonLinearControl @@ -116,21 +116,21 @@ It should be noted that **Model** and **Environment** are different. As mentione -## Model +## [Model](PythonLinearNonlinearControl/models/) System model. For an instance, in the case that a model is linear, this model should have a form, "x[k+1] = Ax[k] + Bu[k]". If you use gradient based control method, you are preferred to implement the gradients of the model, other wise the controllers use numeric gradients. -## Planner +## [Planner](PythonLinearNonlinearControl/planners/) Planner make the goal states. -## Controller +## [Controller](PythonLinearNonlinearControl/controllers/) Controller calculate the optimal inputs by using the model by using the algorithms. -## Runner +## [Runner](PythonLinearNonlinearControl/runners/) Runner runs the simulation. diff --git a/scripts/simple_run.py b/scripts/simple_run.py index 25f828c..f1d70fd 100644 --- a/scripts/simple_run.py +++ b/scripts/simple_run.py @@ -42,9 +42,9 @@ def run(args): def main(): parser = argparse.ArgumentParser() - parser.add_argument("--controller_type", type=str, default="CEM") + parser.add_argument("--controller_type", type=str, default="DDP") parser.add_argument("--planner_type", type=str, default="const") - parser.add_argument("--env", type=str, default="TwoWheeledConst") + parser.add_argument("--env", type=str, default="CartPole") parser.add_argument("--result_dir", type=str, default="./result") args = parser.parse_args() diff --git a/tests/configs/test_cartpole.py b/tests/configs/test_cartpole.py index 6f74321..877ba9e 100644 --- a/tests/configs/test_cartpole.py +++ b/tests/configs/test_cartpole.py @@ -28,4 +28,154 @@ class TestCalcCost(): costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ g_xs[:, -1, :]) - assert costs.shape == (pop_size, 1) \ No newline at end of file + assert costs.shape == (pop_size, 1) + +class TestGradient(): + def test_state_gradient(self): + """ + """ + xs = np.ones((1, 4)) + cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None) + + # numeric grad + eps = 1e-4 + expected_grad = np.zeros((1, 4)) + for i in range(4): + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] + eps + forward = \ + CartPoleConfigModule.state_cost_fn(tmp_x, None) + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] - eps + backward = \ + CartPoleConfigModule.state_cost_fn(tmp_x, None) + + expected_grad[0, i] = (forward - backward) / (2. * eps) + + assert cost_grad == pytest.approx(expected_grad) + + def test_terminal_state_gradient(self): + """ + """ + xs = np.ones(4) + cost_grad =\ + CartPoleConfigModule.gradient_cost_fn_with_state(xs, None, + terminal=True) + + # numeric grad + eps = 1e-4 + expected_grad = np.zeros((1, 4)) + for i in range(4): + tmp_x = xs.copy() + tmp_x[i] = xs[i] + eps + forward = \ + CartPoleConfigModule.state_cost_fn(tmp_x, None) + tmp_x = xs.copy() + tmp_x[i] = xs[i] - eps + backward = \ + CartPoleConfigModule.state_cost_fn(tmp_x, None) + + expected_grad[0, i] = (forward - backward) / (2. * eps) + + assert cost_grad == pytest.approx(expected_grad) + + def test_input_gradient(self): + """ + """ + us = np.ones((1, 1)) + cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us) + + # numeric grad + eps = 1e-4 + expected_grad = np.zeros((1, 1)) + for i in range(1): + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] + eps + forward = \ + CartPoleConfigModule.input_cost_fn(tmp_u) + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] - eps + backward = \ + CartPoleConfigModule.input_cost_fn(tmp_u) + + expected_grad[0, i] = (forward - backward) / (2. * eps) + + assert cost_grad == pytest.approx(expected_grad) + + def test_state_hessian(self): + """ + """ + xs = np.ones((1, 4)) + cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None) + + # numeric grad + eps = 1e-4 + expected_hess = np.zeros((1, 4, 4)) + for i in range(4): + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] + eps + forward = \ + CartPoleConfigModule.gradient_cost_fn_with_state( + tmp_x, None, terminal=False) + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] - eps + backward = \ + CartPoleConfigModule.gradient_cost_fn_with_state( + tmp_x, None, terminal=False) + + expected_hess[0, :, i] = (forward - backward) / (2. * eps) + + assert cost_hess == pytest.approx(expected_hess) + + def test_terminal_state_hessian(self): + """ + """ + xs = np.ones(4) + cost_hess =\ + CartPoleConfigModule.hessian_cost_fn_with_state(xs, None, + terminal=True) + + # numeric grad + eps = 1e-4 + expected_hess = np.zeros((1, 4, 4)) + for i in range(4): + tmp_x = xs.copy() + tmp_x[i] = xs[i] + eps + forward = \ + CartPoleConfigModule.gradient_cost_fn_with_state( + tmp_x, None, terminal=True) + tmp_x = xs.copy() + tmp_x[i] = xs[i] - eps + backward = \ + CartPoleConfigModule.gradient_cost_fn_with_state( + tmp_x, None, terminal=True) + + expected_hess[0, :, i] = (forward - backward) / (2. * eps) + + assert cost_hess == pytest.approx(expected_hess) + + def test_input_hessian(self): + """ + """ + us = np.ones((1, 1)) + xs = np.ones((1, 4)) + cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs) + + # numeric grad + eps = 1e-4 + expected_hess = np.zeros((1, 1, 1)) + for i in range(1): + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] + eps + forward = \ + CartPoleConfigModule.gradient_cost_fn_with_input( + xs, tmp_u) + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] - eps + backward = \ + CartPoleConfigModule.gradient_cost_fn_with_input( + xs, tmp_u) + + expected_hess[0, :, i] = (forward - backward) / (2. * eps) + + assert cost_hess == pytest.approx(expected_hess) \ No newline at end of file diff --git a/tests/configs/test_two_wheeled.py b/tests/configs/test_two_wheeled.py index fb9cb7c..9ad0408 100644 --- a/tests/configs/test_two_wheeled.py +++ b/tests/configs/test_two_wheeled.py @@ -31,4 +31,111 @@ class TestCalcCost(): g_xs[:, -1, :]) expected_costs = np.ones((pop_size, state_size))*0.5 - assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) \ No newline at end of file + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) + +class TestGradient(): + def test_state_gradient(self): + """ + """ + xs = np.ones((1, 3)) + g_xs = np.ones((1, 3)) * 0.5 + cost_grad =\ + TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs) + + # numeric grad + eps = 1e-4 + expected_grad = np.zeros((1, 3)) + for i in range(3): + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] + eps + forward = \ + TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs) + forward = np.sum(forward) + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] - eps + backward = \ + TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs) + backward = np.sum(backward) + + expected_grad[0, i] = (forward - backward) / (2. * eps) + + assert cost_grad == pytest.approx(expected_grad) + + def test_input_gradient(self): + """ + """ + us = np.ones((1, 2)) + cost_grad =\ + TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us) + + # numeric grad + eps = 1e-4 + expected_grad = np.zeros((1, 2)) + for i in range(2): + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] + eps + forward = \ + TwoWheeledConfigModule.input_cost_fn(tmp_u) + forward = np.sum(forward) + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] - eps + backward = \ + TwoWheeledConfigModule.input_cost_fn(tmp_u) + backward = np.sum(backward) + + expected_grad[0, i] = (forward - backward) / (2. * eps) + + assert cost_grad == pytest.approx(expected_grad) + + def test_state_hessian(self): + """ + """ + g_xs = np.ones((1, 3)) * 0.5 + xs = np.ones((1, 3)) + cost_hess =\ + TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs) + + # numeric grad + eps = 1e-4 + expected_hess = np.zeros((1, 3, 3)) + for i in range(3): + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] + eps + forward = \ + TwoWheeledConfigModule.gradient_cost_fn_with_state( + tmp_x, g_xs, terminal=False) + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] - eps + backward = \ + TwoWheeledConfigModule.gradient_cost_fn_with_state( + tmp_x, g_xs, terminal=False) + + expected_hess[0, :, i] = (forward - backward) / (2. * eps) + + assert cost_hess == pytest.approx(expected_hess) + + def test_input_hessian(self): + """ + """ + us = np.ones((1, 2)) + xs = np.ones((1, 3)) + cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs) + + # numeric grad + eps = 1e-4 + expected_hess = np.zeros((1, 2, 2)) + for i in range(2): + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] + eps + forward = \ + TwoWheeledConfigModule.gradient_cost_fn_with_input( + xs, tmp_u) + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] - eps + backward = \ + TwoWheeledConfigModule.gradient_cost_fn_with_input( + xs, tmp_u) + + expected_hess[0, :, i] = (forward - backward) / (2. * eps) + + assert cost_hess == pytest.approx(expected_hess) \ No newline at end of file diff --git a/tests/models/test_cartpole.py b/tests/models/test_cartpole.py index f7241b8..a6ca700 100644 --- a/tests/models/test_cartpole.py +++ b/tests/models/test_cartpole.py @@ -54,4 +54,64 @@ class TestCartPoleModel(): u = np.tile(u, (2, 1, 1)) pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0] - assert pred_xs_alltogether == pytest.approx(pred_xs) \ No newline at end of file + assert pred_xs_alltogether == pytest.approx(pred_xs) + + def test_gradient_state(self): + + config = CartPoleConfigModule() + cartpole_model = CartPoleModel(config) + + xs = np.ones((1, config.STATE_SIZE)) \ + * np.random.rand(1, config.STATE_SIZE) + xs[0, -1] = np.pi / 6. + us = np.ones((1, config.INPUT_SIZE)) + + grad = cartpole_model.calc_f_x(xs, us, config.DT) + + # expected cost + expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE)) + eps = 1e-4 + + for i in range(config.STATE_SIZE): + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] + eps + forward = \ + cartpole_model.predict_next_state(tmp_x[0], us[0]) + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] - eps + backward = \ + cartpole_model.predict_next_state(tmp_x[0], us[0]) + + expected_grad[0, :, i] = (forward - backward) / (2. * eps) + + assert grad == pytest.approx(expected_grad) + + def test_gradient_input(self): + + config = CartPoleConfigModule() + cartpole_model = CartPoleModel(config) + + xs = np.ones((1, config.STATE_SIZE)) \ + * np.random.rand(1, config.STATE_SIZE) + xs[0, -1] = np.pi / 6. + us = np.ones((1, config.INPUT_SIZE)) + + grad = cartpole_model.calc_f_u(xs, us, config.DT) + + # expected cost + expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE)) + eps = 1e-4 + + for i in range(config.INPUT_SIZE): + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] + eps + forward = \ + cartpole_model.predict_next_state(xs[0], tmp_u[0]) + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] - eps + backward = \ + cartpole_model.predict_next_state(xs[0], tmp_u[0]) + + expected_grad[0, :, i] = (forward - backward) / (2. * eps) + + assert grad == pytest.approx(expected_grad) diff --git a/tests/models/test_two_wheeled.py b/tests/models/test_two_wheeled.py index 4cdff5c..3ab1161 100644 --- a/tests/models/test_two_wheeled.py +++ b/tests/models/test_two_wheeled.py @@ -39,4 +39,62 @@ class TestTwoWheeledModel(): u = np.tile(u, (1, 1, 1)) pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0] - assert pred_xs_alltogether == pytest.approx(pred_xs) \ No newline at end of file + assert pred_xs_alltogether == pytest.approx(pred_xs) + + def test_gradient_state(self): + + config = TwoWheeledConfigModule() + two_wheeled_model = TwoWheeledModel(config) + + xs = np.ones((1, config.STATE_SIZE)) + xs[0, -1] = np.pi / 6. + us = np.ones((1, config.INPUT_SIZE)) + + grad = two_wheeled_model.calc_f_x(xs, us, config.DT) + + # expected cost + expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE)) + eps = 1e-4 + + for i in range(config.STATE_SIZE): + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] + eps + forward = \ + two_wheeled_model.predict_next_state(tmp_x[0], us[0]) + tmp_x = xs.copy() + tmp_x[0, i] = xs[0, i] - eps + backward = \ + two_wheeled_model.predict_next_state(tmp_x[0], us[0]) + + expected_grad[0, :, i] = (forward - backward) / (2. * eps) + + assert grad == pytest.approx(expected_grad) + + def test_gradient_input(self): + + config = TwoWheeledConfigModule() + two_wheeled_model = TwoWheeledModel(config) + + xs = np.ones((1, config.STATE_SIZE)) + xs[0, -1] = np.pi / 6. + us = np.ones((1, config.INPUT_SIZE)) + + grad = two_wheeled_model.calc_f_u(xs, us, config.DT) + + # expected cost + expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE)) + eps = 1e-4 + + for i in range(config.INPUT_SIZE): + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] + eps + forward = \ + two_wheeled_model.predict_next_state(xs[0], tmp_u[0]) + tmp_u = us.copy() + tmp_u[0, i] = us[0, i] - eps + backward = \ + two_wheeled_model.predict_next_state(xs[0], tmp_u[0]) + + expected_grad[0, :, i] = (forward - backward) / (2. * eps) + + assert grad == pytest.approx(expected_grad)