diff --git a/Environments.md b/Environments.md index 7799db1..70e8bcf 100644 --- a/Environments.md +++ b/Environments.md @@ -9,21 +9,39 @@ ## FistOrderLagEnv -System equations. +### System equation. You can set arbinatry time constant, tau. The default is 0.63 s +### Cost. + + + +Q = diag[1., 1., 1., 1.], +R = diag[1., 1.] + +X_g denote the goal states. + ## TwoWheeledEnv -System equations. +### System equation. +### Cost. + + + +Q = diag[5., 5., 1.], +R = diag[0.1, 0.1] + +X_g denote the goal states. + ## CatpoleEnv (Swing up) -System equations. +System equation. @@ -31,4 +49,8 @@ You can set arbinatry parameters, mc, mp, l and g. Default settings are as follows: -mc = 1, mp = 0.2, l = 0.5, g = 9.8 \ No newline at end of file +mc = 1, mp = 0.2, l = 0.5, g = 9.81 + +### Cost. + + \ No newline at end of file diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py new file mode 100644 index 0000000..64a78db --- /dev/null +++ b/PythonLinearNonlinearControl/configs/cartpole.py @@ -0,0 +1,218 @@ +import numpy as np + +class CartPoleConfigModule(): + # parameters + ENV_NAME = "CartPole-v0" + TYPE = "Nonlinear" + TASK_HORIZON = 500 + PRED_LEN = 50 + STATE_SIZE = 4 + INPUT_SIZE = 1 + DT = 0.02 + # cost parameters + R = np.diag([0.01]) + # bounds + INPUT_LOWER_BOUND = np.array([-3.]) + INPUT_UPPER_BOUND = np.array([3.]) + # parameters + MP = 0.2 + MC = 1. + L = 0.5 + G = 9.81 + + def __init__(self): + """ + """ + # opt configs + self.opt_config = { + "Random": { + "popsize": 5000 + }, + "CEM": { + "popsize": 500, + "num_elites": 50, + "max_iters": 15, + "alpha": 0.3, + "init_var":9., + "threshold":0.001 + }, + "MPPI":{ + "beta" : 0.6, + "popsize": 5000, + "kappa": 0.9, + "noise_sigma": 0.5, + }, + "MPPIWilliams":{ + "popsize": 5000, + "lambda": 1., + "noise_sigma": 0.9, + }, + "iLQR":{ + "max_iter": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "DDP":{ + "max_iter": 500, + "init_mu": 1., + "mu_min": 1e-6, + "mu_max": 1e10, + "init_delta": 2., + "threshold": 1e-6, + }, + "NMPC-CGMRES":{ + }, + "NMPC-Newton":{ + }, + } + + @staticmethod + def input_cost_fn(u): + """ input cost functions + Args: + u (numpy.ndarray): input, shape(pred_len, input_size) + or shape(pop_size, pred_len, input_size) + Returns: + cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or + shape(pop_size, pred_len, input_size) + """ + return (u**2) * np.diag(CartPoleConfigModule.R) + + @staticmethod + def state_cost_fn(x, g_x): + """ state cost function + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + or shape(pop_size, pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + or shape(pop_size, pred_len, state_size) + Returns: + cost (numpy.ndarray): cost of state, shape(pred_len, 1) or + shape(pop_size, pred_len, 1) + """ + + if len(x.shape) > 2: + return (6. * (x[:, :, 0]**2) \ + + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \ + + 0.1 * (x[:, :, 1]**2) \ + + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis] + + elif len(x.shape) > 1: + return (6. * (x[:, 0]**2) \ + + 12. * ((np.cos(x[:, 2]) + 1.)**2) \ + + 0.1 * (x[:, 1]**2) \ + + 0.1 * (x[:, 3]**2))[:, np.newaxis] + + return 6. * (x[0]**2) \ + + 12. * ((np.cos(x[2]) + 1.)**2) \ + + 0.1 * (x[1]**2) \ + + 0.1 * (x[3]**2) + + @staticmethod + def terminal_state_cost_fn(terminal_x, terminal_g_x): + """ + Args: + terminal_x (numpy.ndarray): terminal state, + shape(state_size, ) or shape(pop_size, state_size) + terminal_g_x (numpy.ndarray): terminal goal state, + shape(state_size, ) or shape(pop_size, state_size) + Returns: + cost (numpy.ndarray): cost of state, shape(pred_len, ) or + shape(pop_size, pred_len) + """ + + if len(terminal_x.shape) > 1: + return (6. * (terminal_x[:, 0]**2) \ + + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \ + + 0.1 * (terminal_x[:, 1]**2) \ + + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] + + return 6. * (terminal_x[0]**2) \ + + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \ + + 0.1 * (terminal_x[1]**2) \ + + 0.1 * (terminal_x[3]**2) + + @staticmethod + def gradient_cost_fn_with_state(x, g_x, terminal=False): + """ gradient of costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + + Returns: + l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size) + or shape(1, state_size) + """ + if not terminal: + return None + + return None + + @staticmethod + def gradient_cost_fn_with_input(x, u): + """ gradient of costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + + Returns: + l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size) + """ + return None + + @staticmethod + def hessian_cost_fn_with_state(x, g_x, terminal=False): + """ hessian costs with respect to the state + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + g_x (numpy.ndarray): goal state, shape(pred_len, state_size) + + Returns: + l_xx (numpy.ndarray): gradient of cost, + shape(pred_len, state_size, state_size) or + shape(1, state_size, state_size) or + """ + if not terminal: + (pred_len, _) = x.shape + return None + + return None + + @staticmethod + def hessian_cost_fn_with_input(x, u): + """ hessian costs with respect to the input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + + Returns: + l_uu (numpy.ndarray): gradient of cost, + shape(pred_len, input_size, input_size) + """ + (pred_len, _) = u.shape + + return None + + @staticmethod + def hessian_cost_fn_with_input_state(x, u): + """ hessian costs with respect to the state and input + + Args: + x (numpy.ndarray): state, shape(pred_len, state_size) + u (numpy.ndarray): goal state, shape(pred_len, input_size) + + Returns: + l_ux (numpy.ndarray): gradient of cost , + shape(pred_len, input_size, state_size) + """ + (_, state_size) = x.shape + (pred_len, input_size) = u.shape + + return np.zeros((pred_len, input_size, state_size)) \ No newline at end of file diff --git a/PythonLinearNonlinearControl/configs/make_configs.py b/PythonLinearNonlinearControl/configs/make_configs.py index 87e3709..984df94 100644 --- a/PythonLinearNonlinearControl/configs/make_configs.py +++ b/PythonLinearNonlinearControl/configs/make_configs.py @@ -1,5 +1,6 @@ from .first_order_lag import FirstOrderLagConfigModule from .two_wheeled import TwoWheeledConfigModule +from .cartpole import CartPoleConfigModule def make_config(args): """ @@ -9,4 +10,6 @@ def make_config(args): if args.env == "FirstOrderLag": return FirstOrderLagConfigModule() elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled": - return TwoWheeledConfigModule() \ No newline at end of file + return TwoWheeledConfigModule() + elif args.env == "CartPole": + return CartPoleConfigModule() \ No newline at end of file diff --git a/PythonLinearNonlinearControl/envs/cartpole.py b/PythonLinearNonlinearControl/envs/cartpole.py index cd84313..de9becb 100644 --- a/PythonLinearNonlinearControl/envs/cartpole.py +++ b/PythonLinearNonlinearControl/envs/cartpole.py @@ -14,12 +14,16 @@ class CartPoleEnv(Env): def __init__(self): """ """ - self.config = {"state_size" : 4,\ - "input_size" : 1,\ - "dt" : 0.02,\ - "max_step" : 1000,\ - "input_lower_bound": None,\ - "input_upper_bound": None, + self.config = {"state_size" : 4, + "input_size" : 1, + "dt" : 0.02, + "max_step" : 500, + "input_lower_bound": [-3.], + "input_upper_bound": [3.], + "mp": 0.2, + "mc": 1., + "l": 0.5, + "g": 9.81, } super(CartPoleEnv, self).__init__(self.config) @@ -33,13 +37,13 @@ class CartPoleEnv(Env): """ self.step_count = 0 - self.curr_x = np.zeros(self.config["state_size"]) + self.curr_x = np.array([0., 0., 0., 0.]) if init_x is not None: self.curr_x = init_x # goal - self.g_x = np.array([0., 0., np.pi, 0.]) + self.g_x = np.array([0., 0., -np.pi, 0.]) # clear memory self.history_x = [] @@ -65,20 +69,43 @@ class CartPoleEnv(Env): self.config["input_upper_bound"]) # step - next_x = np.zeros(self.config["state_size"]) + # x + d_x0 = self.curr_x[1] + # v_x + d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \ + * (self.config["l"] * (self.curr_x[3]**2) \ + + self.config["g"] * np.cos(self.curr_x[2]))) \ + / (self.config["mc"] + self.config["mp"] \ + * (np.sin(self.curr_x[2])**2)) + # theta + d_x2 = self.curr_x[3] + + # v_theta + d_x3 = (-u[0] * np.cos(self.curr_x[2]) \ + - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \ + * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \ + - (self.config["mc"] + self.config["mp"]) * self.config["g"] \ + * np.sin(self.curr_x[2])) \ + / (self.config["l"] * (self.config["mc"] + self.config["mp"] \ + * (np.sin(self.curr_x[2])**2))) + + next_x = self.curr_x +\ + np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] # TODO: costs costs = 0. costs += 0.1 * np.sum(u**2) - costs += np.sum((self.curr_x - self.g_x)**2) - + costs += 6. * self.curr_x[0]**2 \ + + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \ + + 0.1 * self.curr_x[1]**2 \ + + 0.1 * self.curr_x[3]**2 # save history self.history_x.append(next_x.flatten()) self.history_g_x.append(self.g_x.flatten()) # update - self.curr_x = next_x.flatten() + self.curr_x = next_x.flatten().copy() # update costs self.step_count += 1 diff --git a/PythonLinearNonlinearControl/envs/make_envs.py b/PythonLinearNonlinearControl/envs/make_envs.py index fd3ea09..4b1adf7 100644 --- a/PythonLinearNonlinearControl/envs/make_envs.py +++ b/PythonLinearNonlinearControl/envs/make_envs.py @@ -1,6 +1,6 @@ from .first_order_lag import FirstOrderLagEnv from .two_wheeled import TwoWheeledConstEnv -from .cartpole import CartpoleEnv +from .cartpole import CartPoleEnv def make_env(args): @@ -9,6 +9,6 @@ def make_env(args): elif args.env == "TwoWheeledConst": return TwoWheeledConstEnv() elif args.env == "CartPole": - return CartpoleEnv() + return CartPoleEnv() raise NotImplementedError("There is not {} Env".format(args.env)) \ No newline at end of file diff --git a/PythonLinearNonlinearControl/envs/two_wheeled.py b/PythonLinearNonlinearControl/envs/two_wheeled.py index c5194cd..8be0d36 100644 --- a/PythonLinearNonlinearControl/envs/two_wheeled.py +++ b/PythonLinearNonlinearControl/envs/two_wheeled.py @@ -86,7 +86,7 @@ class TwoWheeledConstEnv(Env): # TODO: costs costs = 0. costs += 0.1 * np.sum(u**2) - costs += np.sum((self.curr_x - self.g_x)**2) + costs += np.sum(((self.curr_x - self.g_x)**2) * np.array([5., 5., 1.])) # save history self.history_x.append(next_x.flatten()) diff --git a/PythonLinearNonlinearControl/models/cartpole.py b/PythonLinearNonlinearControl/models/cartpole.py new file mode 100644 index 0000000..42c6616 --- /dev/null +++ b/PythonLinearNonlinearControl/models/cartpole.py @@ -0,0 +1,186 @@ +import numpy as np + +from .model import Model + +class CartPoleModel(Model): + """ cartpole model + """ + def __init__(self, config): + """ + """ + super(CartPoleModel, self).__init__() + self.dt = config.DT + self.mc = config.MC + self.mp = config.MP + self.l = config.L + self.g = config.G + + def predict_next_state(self, curr_x, u): + """ predict next state + + Args: + curr_x (numpy.ndarray): current state, shape(state_size, ) or + shape(pop_size, state_size) + u (numpy.ndarray): input, shape(input_size, ) or + shape(pop_size, input_size) + Returns: + next_x (numpy.ndarray): next state, shape(state_size, ) or + shape(pop_size, state_size) + """ + if len(u.shape) == 1: + # x + d_x0 = curr_x[1] + # v_x + d_x1 = (u[0] + self.mp * np.sin(curr_x[2]) \ + * (self.l * (curr_x[3]**2) \ + + self.g * np.cos(curr_x[2]))) \ + / (self.mc + self.mp * (np.sin(curr_x[2])**2)) + # theta + d_x2 = curr_x[3] + # v_theta + d_x3 = (-u[0] * np.cos(curr_x[2]) \ + - self.mp * self.l * (curr_x[3]**2) \ + * np.cos(curr_x[2]) * np.sin(curr_x[2]) \ + - (self.mc + self.mp) * self.g * np.sin(curr_x[2])) \ + / (self.l * (self.mc + self.mp * (np.sin(curr_x[2])**2))) + + next_x = curr_x +\ + np.array([d_x0, d_x1, d_x2, d_x3]) * self.dt + + return next_x + + elif len(u.shape) == 2: + # x + d_x0 = curr_x[:, 1] + # v_x + d_x1 = (u[:, 0] + self.mp * np.sin(curr_x[:, 2]) \ + * (self.l * (curr_x[:, 3]**2) \ + + self.g * np.cos(curr_x[:, 2]))) \ + / (self.mc + self.mp * (np.sin(curr_x[:, 2])**2)) + # theta + d_x2 = curr_x[:, 3] + # v_theta + d_x3 = (-u[:, 0] * np.cos(curr_x[:, 2]) \ + - self.mp * self.l * (curr_x[:, 3]**2) \ + * np.cos(curr_x[:, 2]) * np.sin(curr_x[:, 2]) \ + - (self.mc + self.mp) * self.g * np.sin(curr_x[:, 2])) \ + / (self.l * (self.mc + self.mp * (np.sin(curr_x[:, 2])**2))) + + next_x = curr_x +\ + np.stack((d_x0, d_x1, d_x2, d_x3), axis=1) * self.dt + + return next_x + + def calc_f_x(self, xs, us, dt): + """ gradient of model with respect to the state in batch form + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_x (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size) + + Notes: + This should be discrete form !! + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_x = np.zeros((pred_len, state_size, state_size)) + + f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0] + f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0] + + return f_x * dt + np.eye(state_size) # to discrete form + + def calc_f_u(self, xs, us, dt): + """ gradient of model with respect to the input in batch form + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_u (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size) + + Notes: + This should be discrete form !! + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_u = np.zeros((pred_len, state_size, input_size)) + + f_u[:, 1, 0] = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2)) + + f_u[:, 3, 0] = -np.cos(xs[:, 2]) \ + / (self.l * (self.mc \ + + self.mp * (np.sin(xs[:, 2])**2))) + + return f_u * dt # to discrete form + + def calc_f_xx(self, xs, us, dt): + """ hessian of model with respect to the state in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_xx (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, state_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, _) = us.shape + + f_xx = np.zeros((pred_len, state_size, state_size, state_size)) + + f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0] + f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0] + + return f_xx * dt + + def calc_f_ux(self, xs, us, dt): + """ hessian of model with respect to state and input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_ux (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, state_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_ux = np.zeros((pred_len, state_size, input_size, state_size)) + + f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2]) + f_ux[:, 1, 0, 2] = np.cos(xs[:, 2]) + + return f_ux * dt + + def calc_f_uu(self, xs, us, dt): + """ hessian of model with respect to input in batch form + + Args: + xs (numpy.ndarray): state, shape(pred_len+1, state_size) + us (numpy.ndarray): input, shape(pred_len, input_size,) + + Return: + f_uu (numpy.ndarray): gradient of model with respect to x, + shape(pred_len, state_size, input_size, input_size) + """ + # get size + (_, state_size) = xs.shape + (pred_len, input_size) = us.shape + + f_uu = np.zeros((pred_len, state_size, input_size, input_size)) + + return f_uu * dt \ No newline at end of file diff --git a/PythonLinearNonlinearControl/models/make_models.py b/PythonLinearNonlinearControl/models/make_models.py index 7688f93..fcb29ae 100644 --- a/PythonLinearNonlinearControl/models/make_models.py +++ b/PythonLinearNonlinearControl/models/make_models.py @@ -1,5 +1,6 @@ from .first_order_lag import FirstOrderLagModel from .two_wheeled import TwoWheeledModel +from .cartpole import CartPoleModel def make_model(args, config): @@ -7,5 +8,7 @@ def make_model(args, config): return FirstOrderLagModel(config) elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled": return TwoWheeledModel(config) + elif args.env == "CartPole": + return CartPoleModel(config) - raise NotImplementedError("There is not {} Model".format(args.env)) + raise NotImplementedError("There is not {} Model".format(args.env)) \ No newline at end of file diff --git a/README.md b/README.md index 7c1e73d..b177720 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ PythonLinearNonLinearControl is a library implementing the linear and nonlinear | Linear Model Predictive Control (MPC) | ✓ | x | x | x | x | | Cross Entropy Method (CEM) | ✓ | ✓ | x | x | x | | Model Preidictive Path Integral Control of Nagabandi, A. (MPPI) | ✓ | ✓ | x | x | x | -| Model Preidictive Path Integral Control of Williams (MPPIWilliams) | ✓ | ✓ | x | x | x | +| Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams) | ✓ | ✓ | x | x | x | | Random Shooting Method (Random) | ✓ | ✓ | x | x | x | | Iterative LQR (iLQR) | x | ✓ | x | ✓ | x | | Differential Dynamic Programming (DDP) | x | ✓ | x | ✓ | ✓ | @@ -34,7 +34,7 @@ Following algorithms are implemented in PythonLinearNonlinearControl - [Cross Entropy Method (CEM)](https://arxiv.org/abs/1805.12114) - Ref: Chua, K., Calandra, R., McAllister, R., & Levine, S. (2018). Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In Advances in Neural Information Processing Systems (pp. 4754-4765) - [script](PythonLinearNonlinearControl/controllers/cem.py) -- [Model Preidictive Path Integral Control Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652) +- [Model Preidictive Path Integral Control of Nagabandi, A. (MPPI)](https://arxiv.org/abs/1909.11652) - Ref: Nagabandi, A., Konoglie, K., Levine, S., & Kumar, V. (2019). Deep Dynamics Models for Learning Dexterous Manipulation. arXiv preprint arXiv:1909.11652. - [script](PythonLinearNonlinearControl/controllers/mppi.py) - [Model Preidictive Path Integral Control of Williams, G. (MPPIWilliams)](https://ieeexplore.ieee.org/abstract/document/7989202) @@ -71,7 +71,7 @@ Following algorithms are implemented in PythonLinearNonlinearControl All states and inputs of environments are continuous. **It should be noted that the algorithms for linear model could be applied to nonlinear enviroments if you have linealized the model of nonlinear environments.** -You could know abount out environmets more in [Environments.md](Environments.md) +You could know abount our environmets more in [Environments.md](Environments.md) # Usage diff --git a/assets/cartpole_score.png b/assets/cartpole_score.png new file mode 100644 index 0000000..ef4d286 Binary files /dev/null and b/assets/cartpole_score.png differ diff --git a/assets/quadratic_score.png b/assets/quadratic_score.png new file mode 100644 index 0000000..7202879 Binary files /dev/null and b/assets/quadratic_score.png differ diff --git a/scripts/simple_run.py b/scripts/simple_run.py index b5af782..25f828c 100644 --- a/scripts/simple_run.py +++ b/scripts/simple_run.py @@ -42,9 +42,9 @@ def run(args): def main(): parser = argparse.ArgumentParser() - parser.add_argument("--controller_type", type=str, default="MPPIWilliams") + parser.add_argument("--controller_type", type=str, default="CEM") parser.add_argument("--planner_type", type=str, default="const") - parser.add_argument("--env", type=str, default="FirstOrderLag") + parser.add_argument("--env", type=str, default="TwoWheeledConst") parser.add_argument("--result_dir", type=str, default="./result") args = parser.parse_args() diff --git a/tests/configs/test_cartpole.py b/tests/configs/test_cartpole.py new file mode 100644 index 0000000..6f74321 --- /dev/null +++ b/tests/configs/test_cartpole.py @@ -0,0 +1,31 @@ +import pytest +import numpy as np + +from PythonLinearNonlinearControl.configs.cartpole \ + import CartPoleConfigModule + +class TestCalcCost(): + def test_calc_costs(self): + # make config + config = CartPoleConfigModule() + # set + pred_len = 5 + state_size = 4 + input_size = 1 + pop_size = 2 + pred_xs = np.ones((pop_size, pred_len, state_size)) + g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5 + input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5 + + costs = config.input_cost_fn(input_samples) + + assert costs.shape == (pop_size, pred_len, input_size) + + costs = config.state_cost_fn(pred_xs, g_xs) + + assert costs.shape == (pop_size, pred_len, 1) + + costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ + g_xs[:, -1, :]) + + assert costs.shape == (pop_size, 1) \ No newline at end of file diff --git a/tests/configs/test_two_wheeled.py b/tests/configs/test_two_wheeled.py new file mode 100644 index 0000000..fb9cb7c --- /dev/null +++ b/tests/configs/test_two_wheeled.py @@ -0,0 +1,34 @@ +import pytest +import numpy as np + +from PythonLinearNonlinearControl.configs.two_wheeled \ + import TwoWheeledConfigModule + +class TestCalcCost(): + def test_calc_costs(self): + # make config + config = TwoWheeledConfigModule() + # set + pred_len = 5 + state_size = 3 + input_size = 2 + pop_size = 2 + pred_xs = np.ones((pop_size, pred_len, state_size)) + g_xs = np.ones((pop_size, pred_len, state_size)) * 0.5 + input_samples = np.ones((pop_size, pred_len, input_size)) * 0.5 + + costs = config.input_cost_fn(input_samples) + expected_costs = np.ones((pop_size, pred_len, input_size))*0.5 + + assert costs == pytest.approx(expected_costs**2 * np.diag(config.R)) + + costs = config.state_cost_fn(pred_xs, g_xs) + expected_costs = np.ones((pop_size, pred_len, state_size))*0.5 + + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Q)) + + costs = config.terminal_state_cost_fn(pred_xs[:, -1, :],\ + g_xs[:, -1, :]) + expected_costs = np.ones((pop_size, state_size))*0.5 + + assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf)) \ No newline at end of file diff --git a/tests/env/test_cartpole.py b/tests/env/test_cartpole.py new file mode 100644 index 0000000..7b726bc --- /dev/null +++ b/tests/env/test_cartpole.py @@ -0,0 +1,73 @@ +import pytest +import numpy as np + +from PythonLinearNonlinearControl.envs.cartpole import CartPoleEnv + +class TestCartPoleEnv(): + """ + """ + def test_step(self): + env = CartPoleEnv() + + curr_x = np.ones(4) + curr_x[2] = np.pi / 6. + + env.reset(init_x=curr_x) + + u = np.ones(1) + + next_x, _, _, _ = env.step(u) + + d_x0 = curr_x[1] + d_x1 = (1. + env.config["mp"] * np.sin(np.pi / 6.) \ + * (env.config["l"] * (1.**2) \ + + env.config["g"] * np.cos(np.pi / 6.))) \ + / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2) + d_x2 = curr_x[3] + d_x3 = (-1. * np.cos(np.pi / 6.) \ + - env.config["mp"] * env.config["l"] * (1.**2) \ + * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \ + - (env.config["mp"] + env.config["mc"]) * env.config["g"] \ + * np.sin(np.pi / 6.)) \ + / (env.config["l"] \ + * (env.config["mc"] \ + + env.config["mp"] * np.sin(np.pi / 6.)**2)) + + expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \ + + curr_x + + assert next_x == pytest.approx(expected, abs=1e-5) + + def test_bound_step(self): + env = CartPoleEnv() + + curr_x = np.ones(4) + curr_x[2] = np.pi / 6. + + env.reset(init_x=curr_x) + + u = np.ones(1) * 1e3 + + next_x, _, _, _ = env.step(u) + + u = env.config["input_upper_bound"][0] + + d_x0 = curr_x[1] + d_x1 = (u + env.config["mp"] * np.sin(np.pi / 6.) \ + * (env.config["l"] * (1.**2) \ + + env.config["g"] * np.cos(np.pi / 6.))) \ + / (env.config["mc"] + env.config["mp"] * np.sin(np.pi / 6.)**2) + d_x2 = curr_x[3] + d_x3 = (-u * np.cos(np.pi / 6.) \ + - env.config["mp"] * env.config["l"] * (1.**2) \ + * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \ + - (env.config["mp"] + env.config["mc"]) * env.config["g"] \ + * np.sin(np.pi / 6.)) \ + / (env.config["l"] \ + * (env.config["mc"] \ + + env.config["mp"] * np.sin(np.pi / 6.)**2)) + + expected = np.array([d_x0, d_x1, d_x2, d_x3]) * env.config["dt"] \ + + curr_x + + assert next_x == pytest.approx(expected, abs=1e-5) \ No newline at end of file diff --git a/tests/models/test_cartpole.py b/tests/models/test_cartpole.py new file mode 100644 index 0000000..f7241b8 --- /dev/null +++ b/tests/models/test_cartpole.py @@ -0,0 +1,57 @@ +import pytest +import numpy as np + +from PythonLinearNonlinearControl.models.cartpole import CartPoleModel +from PythonLinearNonlinearControl.configs.cartpole \ + import CartPoleConfigModule + +class TestCartPoleModel(): + """ + """ + def test_step(self): + config = CartPoleConfigModule() + cartpole_model = CartPoleModel(config) + + curr_x = np.ones(4) + curr_x[2] = np.pi / 6. + + us = np.ones((1, 1)) + + next_x = cartpole_model.predict_traj(curr_x, us) + + d_x0 = curr_x[1] + d_x1 = (1. + config.MP * np.sin(np.pi / 6.) \ + * (config.L * (1.**2) \ + + config.G * np.cos(np.pi / 6.))) \ + / (config.MC + config.MP * np.sin(np.pi / 6.)**2) + d_x2 = curr_x[3] + d_x3 = (-1. * np.cos(np.pi / 6.) \ + - config.MP * config.L * (1.**2) \ + * np.cos(np.pi / 6.) * np.sin(np.pi / 6.) \ + - (config.MP + config.MC) * config.G \ + * np.sin(np.pi / 6.)) \ + / (config.L \ + * (config.MC \ + + config.MP * np.sin(np.pi / 6.)**2)) + + expected = np.array([d_x0, d_x1, d_x2, d_x3]) * config.DT \ + + curr_x + + expected = np.stack((curr_x, expected), axis=0) + + assert next_x == pytest.approx(expected, abs=1e-5) + + def test_predict_traj(self): + config = CartPoleConfigModule() + cartpole_model = CartPoleModel(config) + + curr_x = np.ones(config.STATE_SIZE) + curr_x[-1] = np.pi / 6. + u = np.ones((1, config.INPUT_SIZE)) + + pred_xs = cartpole_model.predict_traj(curr_x, u) + + u = np.tile(u, (2, 1, 1)) + pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0] + + assert pred_xs_alltogether == pytest.approx(pred_xs) \ No newline at end of file diff --git a/tests/models/test_first_order_lag.py b/tests/models/test_first_order_lag.py new file mode 100644 index 0000000..3f1790c --- /dev/null +++ b/tests/models/test_first_order_lag.py @@ -0,0 +1,43 @@ +import pytest +import numpy as np + +from PythonLinearNonlinearControl.models.model \ + import LinearModel +from PythonLinearNonlinearControl.models.first_order_lag \ + import FirstOrderLagModel +from PythonLinearNonlinearControl.configs.first_order_lag \ + import FirstOrderLagConfigModule + +from unittest.mock import patch +from unittest.mock import Mock + +class TestFirstOrderLagModel(): + """ + """ + def test_step(self): + config = FirstOrderLagConfigModule() + firstorderlag_model = FirstOrderLagModel(config) + + curr_x = np.ones(config.STATE_SIZE) + u = np.ones((1, config.INPUT_SIZE)) + + with patch.object(LinearModel, "predict_traj") as mock_predict_traj: + firstorderlag_model.predict_traj(curr_x, u) + + mock_predict_traj.assert_called_once_with(curr_x, u) + + def test_predict_traj(self): + + config = FirstOrderLagConfigModule() + firstorderlag_model = FirstOrderLagModel(config) + + curr_x = np.ones(config.STATE_SIZE) + curr_x[-1] = np.pi / 6. + u = np.ones((1, config.INPUT_SIZE)) + + pred_xs = firstorderlag_model.predict_traj(curr_x, u) + + u = np.tile(u, (1, 1, 1)) + pred_xs_alltogether = firstorderlag_model.predict_traj(curr_x, u)[0] + + assert pred_xs_alltogether == pytest.approx(pred_xs) \ No newline at end of file