Update: Cartpole Env and Cartpole models
This commit is contained in:
parent
e5237201f0
commit
1ed489cfc4
|
@ -41,7 +41,7 @@ X_g denote the goal states.
|
||||||
|
|
||||||
## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py)
|
## [CatpoleEnv (Swing up)](PythonLinearNonlinearControl/envs/cartpole.py)
|
||||||
|
|
||||||
System equation.
|
## System equation.
|
||||||
|
|
||||||
<img src="assets/cartpole.png" width="600">
|
<img src="assets/cartpole.png" width="600">
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,11 @@ class CartPoleConfigModule():
|
||||||
INPUT_SIZE = 1
|
INPUT_SIZE = 1
|
||||||
DT = 0.02
|
DT = 0.02
|
||||||
# cost parameters
|
# cost parameters
|
||||||
R = np.diag([0.01])
|
R = np.diag([1.]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
|
||||||
|
# 1. is worked for iLQR
|
||||||
|
Terminal_Weight = 1.
|
||||||
|
Q = None
|
||||||
|
Sf = None
|
||||||
# bounds
|
# bounds
|
||||||
INPUT_LOWER_BOUND = np.array([-3.])
|
INPUT_LOWER_BOUND = np.array([-3.])
|
||||||
INPUT_UPPER_BOUND = np.array([3.])
|
INPUT_UPPER_BOUND = np.array([3.])
|
||||||
|
@ -128,12 +132,14 @@ class CartPoleConfigModule():
|
||||||
return (6. * (terminal_x[:, 0]**2) \
|
return (6. * (terminal_x[:, 0]**2) \
|
||||||
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
|
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
|
||||||
+ 0.1 * (terminal_x[:, 1]**2) \
|
+ 0.1 * (terminal_x[:, 1]**2) \
|
||||||
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
|
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
|
||||||
|
* CartPoleConfigModule.Terminal_Weight
|
||||||
|
|
||||||
return 6. * (terminal_x[0]**2) \
|
return (6. * (terminal_x[0]**2) \
|
||||||
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
|
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
|
||||||
+ 0.1 * (terminal_x[1]**2) \
|
+ 0.1 * (terminal_x[1]**2) \
|
||||||
+ 0.1 * (terminal_x[3]**2)
|
+ 0.1 * (terminal_x[3]**2)) \
|
||||||
|
* CartPoleConfigModule.Terminal_Weight
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def gradient_cost_fn_with_state(x, g_x, terminal=False):
|
def gradient_cost_fn_with_state(x, g_x, terminal=False):
|
||||||
|
@ -148,9 +154,21 @@ class CartPoleConfigModule():
|
||||||
or shape(1, state_size)
|
or shape(1, state_size)
|
||||||
"""
|
"""
|
||||||
if not terminal:
|
if not terminal:
|
||||||
return None
|
cost_dx0 = 12. * x[:, 0]
|
||||||
|
cost_dx1 = 0.2 * x[:, 1]
|
||||||
|
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
|
||||||
|
cost_dx3 = 0.2 * x[:, 3]
|
||||||
|
cost_dx = np.stack((cost_dx0, cost_dx1,\
|
||||||
|
cost_dx2, cost_dx3), axis=1)
|
||||||
|
return cost_dx
|
||||||
|
|
||||||
return None
|
cost_dx0 = 12. * x[0]
|
||||||
|
cost_dx1 = 0.2 * x[1]
|
||||||
|
cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
|
||||||
|
cost_dx3 = 0.2 * x[3]
|
||||||
|
cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
|
||||||
|
|
||||||
|
return cost_dx * CartPoleConfigModule.Terminal_Weight
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def gradient_cost_fn_with_input(x, u):
|
def gradient_cost_fn_with_input(x, u):
|
||||||
|
@ -163,7 +181,7 @@ class CartPoleConfigModule():
|
||||||
Returns:
|
Returns:
|
||||||
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
|
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
|
||||||
"""
|
"""
|
||||||
return None
|
return 2. * u * np.diag(CartPoleConfigModule.R)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hessian_cost_fn_with_state(x, g_x, terminal=False):
|
def hessian_cost_fn_with_state(x, g_x, terminal=False):
|
||||||
|
@ -179,10 +197,30 @@ class CartPoleConfigModule():
|
||||||
shape(1, state_size, state_size) or
|
shape(1, state_size, state_size) or
|
||||||
"""
|
"""
|
||||||
if not terminal:
|
if not terminal:
|
||||||
(pred_len, _) = x.shape
|
(pred_len, state_size) = x.shape
|
||||||
return None
|
hessian = np.eye(state_size)
|
||||||
|
hessian = np.tile(hessian, (pred_len, 1, 1))
|
||||||
|
hessian[:, 0, 0] = 12.
|
||||||
|
hessian[:, 1, 1] = 0.2
|
||||||
|
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
|
||||||
|
* (-np.sin(x[:, 2])) \
|
||||||
|
+ 24. * (1. + np.cos(x[:, 2])) \
|
||||||
|
* -np.cos(x[:, 2])
|
||||||
|
hessian[:, 3, 3] = 0.2
|
||||||
|
|
||||||
return None
|
return hessian
|
||||||
|
|
||||||
|
state_size = len(x)
|
||||||
|
hessian = np.eye(state_size)
|
||||||
|
hessian[0, 0] = 12.
|
||||||
|
hessian[1, 1] = 0.2
|
||||||
|
hessian[2, 2] = 24. * -np.sin(x[2]) \
|
||||||
|
* (-np.sin(x[2])) \
|
||||||
|
+ 24. * (1. + np.cos(x[2])) \
|
||||||
|
* -np.cos(x[2])
|
||||||
|
hessian[3, 3] = 0.2
|
||||||
|
|
||||||
|
return hessian[np.newaxis, :, :] * CartPoleConfigModule.Terminal_Weight
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hessian_cost_fn_with_input(x, u):
|
def hessian_cost_fn_with_input(x, u):
|
||||||
|
@ -198,7 +236,7 @@ class CartPoleConfigModule():
|
||||||
"""
|
"""
|
||||||
(pred_len, _) = u.shape
|
(pred_len, _) = u.shape
|
||||||
|
|
||||||
return None
|
return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hessian_cost_fn_with_input_state(x, u):
|
def hessian_cost_fn_with_input_state(x, u):
|
||||||
|
|
|
@ -159,11 +159,9 @@ class FirstOrderLagConfigModule():
|
||||||
"""
|
"""
|
||||||
if not terminal:
|
if not terminal:
|
||||||
(pred_len, _) = x.shape
|
(pred_len, _) = x.shape
|
||||||
return -g_x[:, :, np.newaxis] \
|
return np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
|
||||||
* np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
|
|
||||||
|
|
||||||
return -g_x[:, np.newaxis] \
|
return np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
|
||||||
* np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hessian_cost_fn_with_input(x, u):
|
def hessian_cost_fn_with_input(x, u):
|
||||||
|
|
|
@ -153,11 +153,9 @@ class TwoWheeledConfigModule():
|
||||||
"""
|
"""
|
||||||
if not terminal:
|
if not terminal:
|
||||||
(pred_len, _) = x.shape
|
(pred_len, _) = x.shape
|
||||||
return -g_x[:, :, np.newaxis] \
|
return np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
|
||||||
* np.tile(2.*TwoWheeledConfigModule.Q, (pred_len, 1, 1))
|
|
||||||
|
|
||||||
return -g_x[:, np.newaxis] \
|
return np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
|
||||||
* np.tile(2.*TwoWheeledConfigModule.Sf, (1, 1, 1))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hessian_cost_fn_with_input(x, u):
|
def hessian_cost_fn_with_input(x, u):
|
||||||
|
|
|
@ -50,11 +50,6 @@ class iLQR(Controller):
|
||||||
self.input_size = config.INPUT_SIZE
|
self.input_size = config.INPUT_SIZE
|
||||||
self.dt = config.DT
|
self.dt = config.DT
|
||||||
|
|
||||||
# cost parameters
|
|
||||||
self.Q = config.Q
|
|
||||||
self.R = config.R
|
|
||||||
self.Sf = config.Sf
|
|
||||||
|
|
||||||
# initialize
|
# initialize
|
||||||
self.prev_sol = np.zeros((self.pred_len, self.input_size))
|
self.prev_sol = np.zeros((self.pred_len, self.input_size))
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,8 @@ class CartPoleEnv(Env):
|
||||||
"""
|
"""
|
||||||
self.step_count = 0
|
self.step_count = 0
|
||||||
|
|
||||||
self.curr_x = np.array([0., 0., 0., 0.])
|
theta = np.random.randn(1)
|
||||||
|
self.curr_x = np.array([0., 0., theta[0], 0.])
|
||||||
|
|
||||||
if init_x is not None:
|
if init_x is not None:
|
||||||
self.curr_x = init_x
|
self.curr_x = init_x
|
||||||
|
|
|
@ -90,8 +90,41 @@ class CartPoleModel(Model):
|
||||||
|
|
||||||
f_x = np.zeros((pred_len, state_size, state_size))
|
f_x = np.zeros((pred_len, state_size, state_size))
|
||||||
|
|
||||||
f_x[:, 0, 2] = -np.sin(xs[:, 2]) * us[:, 0]
|
# f_x_dot
|
||||||
f_x[:, 1, 2] = np.cos(xs[:, 2]) * us[:, 0]
|
f_x[:, 0, 1] = np.ones(pred_len)
|
||||||
|
|
||||||
|
# f_theta
|
||||||
|
tmp = ((self.mc + self.mp * np.sin(xs[:, 2])**2)**(-2)) \
|
||||||
|
* self.mp * 2. * np.sin(xs[:, 2]) * np.cos(xs[:, 2])
|
||||||
|
tmp2 = 1. / (self.mc + self.mp * (np.sin(xs[:, 2])**2))
|
||||||
|
|
||||||
|
f_x[:, 1, 2] = - us[:, 0] * tmp \
|
||||||
|
- tmp * (self.mp * np.sin(xs[:, 2]) \
|
||||||
|
* (self.l * xs[:, 3]**2 \
|
||||||
|
+ self.g * np.cos(xs[:, 2]))) \
|
||||||
|
+ tmp2 * (self.mp * np.cos(xs[:, 2]) * self.l \
|
||||||
|
* xs[:, 3]**2 \
|
||||||
|
+ self.mp * self.g * (np.cos(xs[:, 2])**2 \
|
||||||
|
- np.sin(xs[:, 2])**2))
|
||||||
|
f_x[:, 3, 2] = - 1. / self.l * tmp \
|
||||||
|
* (-us[:, 0] * np.cos(xs[:, 2]) \
|
||||||
|
- self.mp * self.l * (xs[:, 3]**2) \
|
||||||
|
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]) \
|
||||||
|
- (self.mc + self.mp) * self.g * np.sin(xs[:, 2])) \
|
||||||
|
+ 1. / self.l * tmp2 \
|
||||||
|
* (us[:, 0] * np.sin(xs[:, 2]) \
|
||||||
|
- self.mp * self.l * xs[:, 3]**2 \
|
||||||
|
* (np.cos(xs[:, 2])**2 - np.sin(xs[:, 2])**2) \
|
||||||
|
- (self.mc + self.mp) \
|
||||||
|
* self.g * np.cos(xs[:, 2]))
|
||||||
|
|
||||||
|
# f_theta_dot
|
||||||
|
f_x[:, 1, 3] = tmp2 * (self.mp * np.sin(xs[:, 2]) \
|
||||||
|
* self.l * 2 * xs[:, 3])
|
||||||
|
f_x[:, 2, 3] = np.ones(pred_len)
|
||||||
|
f_x[:, 3, 3] = 1. / self.l * tmp2 \
|
||||||
|
* (-2. * self.mp * self.l * xs[:, 3] \
|
||||||
|
* np.cos(xs[:, 2]) * np.sin(xs[:, 2]))
|
||||||
|
|
||||||
return f_x * dt + np.eye(state_size) # to discrete form
|
return f_x * dt + np.eye(state_size) # to discrete form
|
||||||
|
|
||||||
|
@ -139,10 +172,7 @@ class CartPoleModel(Model):
|
||||||
|
|
||||||
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
|
f_xx = np.zeros((pred_len, state_size, state_size, state_size))
|
||||||
|
|
||||||
f_xx[:, 0, 2, 2] = -np.cos(xs[:, 2]) * us[:, 0]
|
raise NotImplementedError
|
||||||
f_xx[:, 1, 2, 2] = -np.sin(xs[:, 2]) * us[:, 0]
|
|
||||||
|
|
||||||
return f_xx * dt
|
|
||||||
|
|
||||||
def calc_f_ux(self, xs, us, dt):
|
def calc_f_ux(self, xs, us, dt):
|
||||||
""" hessian of model with respect to state and input in batch form
|
""" hessian of model with respect to state and input in batch form
|
||||||
|
@ -161,10 +191,7 @@ class CartPoleModel(Model):
|
||||||
|
|
||||||
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
|
f_ux = np.zeros((pred_len, state_size, input_size, state_size))
|
||||||
|
|
||||||
f_ux[:, 0, 0, 2] = -np.sin(xs[:, 2])
|
raise NotImplementedError
|
||||||
f_ux[:, 1, 0, 2] = np.cos(xs[:, 2])
|
|
||||||
|
|
||||||
return f_ux * dt
|
|
||||||
|
|
||||||
def calc_f_uu(self, xs, us, dt):
|
def calc_f_uu(self, xs, us, dt):
|
||||||
""" hessian of model with respect to input in batch form
|
""" hessian of model with respect to input in batch form
|
||||||
|
@ -183,4 +210,4 @@ class CartPoleModel(Model):
|
||||||
|
|
||||||
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
|
f_uu = np.zeros((pred_len, state_size, input_size, input_size))
|
||||||
|
|
||||||
return f_uu * dt
|
raise NotImplementedError
|
|
@ -42,9 +42,9 @@ def run(args):
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
parser.add_argument("--controller_type", type=str, default="CEM")
|
parser.add_argument("--controller_type", type=str, default="DDP")
|
||||||
parser.add_argument("--planner_type", type=str, default="const")
|
parser.add_argument("--planner_type", type=str, default="const")
|
||||||
parser.add_argument("--env", type=str, default="TwoWheeledConst")
|
parser.add_argument("--env", type=str, default="CartPole")
|
||||||
parser.add_argument("--result_dir", type=str, default="./result")
|
parser.add_argument("--result_dir", type=str, default="./result")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
@ -29,3 +29,153 @@ class TestCalcCost():
|
||||||
g_xs[:, -1, :])
|
g_xs[:, -1, :])
|
||||||
|
|
||||||
assert costs.shape == (pop_size, 1)
|
assert costs.shape == (pop_size, 1)
|
||||||
|
|
||||||
|
class TestGradient():
|
||||||
|
def test_state_gradient(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
xs = np.ones((1, 4))
|
||||||
|
cost_grad = CartPoleConfigModule.gradient_cost_fn_with_state(xs, None)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_grad = np.zeros((1, 4))
|
||||||
|
for i in range(4):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
CartPoleConfigModule.state_cost_fn(tmp_x, None)
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
CartPoleConfigModule.state_cost_fn(tmp_x, None)
|
||||||
|
|
||||||
|
expected_grad[0, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_terminal_state_gradient(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
xs = np.ones(4)
|
||||||
|
cost_grad =\
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_state(xs, None,
|
||||||
|
terminal=True)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_grad = np.zeros((1, 4))
|
||||||
|
for i in range(4):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[i] = xs[i] + eps
|
||||||
|
forward = \
|
||||||
|
CartPoleConfigModule.state_cost_fn(tmp_x, None)
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[i] = xs[i] - eps
|
||||||
|
backward = \
|
||||||
|
CartPoleConfigModule.state_cost_fn(tmp_x, None)
|
||||||
|
|
||||||
|
expected_grad[0, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_input_gradient(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
us = np.ones((1, 1))
|
||||||
|
cost_grad = CartPoleConfigModule.gradient_cost_fn_with_input(None, us)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_grad = np.zeros((1, 1))
|
||||||
|
for i in range(1):
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
CartPoleConfigModule.input_cost_fn(tmp_u)
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
CartPoleConfigModule.input_cost_fn(tmp_u)
|
||||||
|
|
||||||
|
expected_grad[0, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_state_hessian(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
xs = np.ones((1, 4))
|
||||||
|
cost_hess = CartPoleConfigModule.hessian_cost_fn_with_state(xs, None)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_hess = np.zeros((1, 4, 4))
|
||||||
|
for i in range(4):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_state(
|
||||||
|
tmp_x, None, terminal=False)
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_state(
|
||||||
|
tmp_x, None, terminal=False)
|
||||||
|
|
||||||
|
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_hess == pytest.approx(expected_hess)
|
||||||
|
|
||||||
|
def test_terminal_state_hessian(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
xs = np.ones(4)
|
||||||
|
cost_hess =\
|
||||||
|
CartPoleConfigModule.hessian_cost_fn_with_state(xs, None,
|
||||||
|
terminal=True)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_hess = np.zeros((1, 4, 4))
|
||||||
|
for i in range(4):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[i] = xs[i] + eps
|
||||||
|
forward = \
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_state(
|
||||||
|
tmp_x, None, terminal=True)
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[i] = xs[i] - eps
|
||||||
|
backward = \
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_state(
|
||||||
|
tmp_x, None, terminal=True)
|
||||||
|
|
||||||
|
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_hess == pytest.approx(expected_hess)
|
||||||
|
|
||||||
|
def test_input_hessian(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
us = np.ones((1, 1))
|
||||||
|
xs = np.ones((1, 4))
|
||||||
|
cost_hess = CartPoleConfigModule.hessian_cost_fn_with_input(us, xs)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_hess = np.zeros((1, 1, 1))
|
||||||
|
for i in range(1):
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_input(
|
||||||
|
xs, tmp_u)
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
CartPoleConfigModule.gradient_cost_fn_with_input(
|
||||||
|
xs, tmp_u)
|
||||||
|
|
||||||
|
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_hess == pytest.approx(expected_hess)
|
|
@ -32,3 +32,110 @@ class TestCalcCost():
|
||||||
expected_costs = np.ones((pop_size, state_size))*0.5
|
expected_costs = np.ones((pop_size, state_size))*0.5
|
||||||
|
|
||||||
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
|
assert costs == pytest.approx(expected_costs**2 * np.diag(config.Sf))
|
||||||
|
|
||||||
|
class TestGradient():
|
||||||
|
def test_state_gradient(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
xs = np.ones((1, 3))
|
||||||
|
g_xs = np.ones((1, 3)) * 0.5
|
||||||
|
cost_grad =\
|
||||||
|
TwoWheeledConfigModule.gradient_cost_fn_with_state(xs, g_xs)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_grad = np.zeros((1, 3))
|
||||||
|
for i in range(3):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
|
||||||
|
forward = np.sum(forward)
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
TwoWheeledConfigModule.state_cost_fn(tmp_x, g_xs)
|
||||||
|
backward = np.sum(backward)
|
||||||
|
|
||||||
|
expected_grad[0, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_input_gradient(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
us = np.ones((1, 2))
|
||||||
|
cost_grad =\
|
||||||
|
TwoWheeledConfigModule.gradient_cost_fn_with_input(None, us)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_grad = np.zeros((1, 2))
|
||||||
|
for i in range(2):
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
TwoWheeledConfigModule.input_cost_fn(tmp_u)
|
||||||
|
forward = np.sum(forward)
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
TwoWheeledConfigModule.input_cost_fn(tmp_u)
|
||||||
|
backward = np.sum(backward)
|
||||||
|
|
||||||
|
expected_grad[0, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_state_hessian(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
g_xs = np.ones((1, 3)) * 0.5
|
||||||
|
xs = np.ones((1, 3))
|
||||||
|
cost_hess =\
|
||||||
|
TwoWheeledConfigModule.hessian_cost_fn_with_state(xs, g_xs)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_hess = np.zeros((1, 3, 3))
|
||||||
|
for i in range(3):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
TwoWheeledConfigModule.gradient_cost_fn_with_state(
|
||||||
|
tmp_x, g_xs, terminal=False)
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
TwoWheeledConfigModule.gradient_cost_fn_with_state(
|
||||||
|
tmp_x, g_xs, terminal=False)
|
||||||
|
|
||||||
|
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_hess == pytest.approx(expected_hess)
|
||||||
|
|
||||||
|
def test_input_hessian(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
us = np.ones((1, 2))
|
||||||
|
xs = np.ones((1, 3))
|
||||||
|
cost_hess = TwoWheeledConfigModule.hessian_cost_fn_with_input(us, xs)
|
||||||
|
|
||||||
|
# numeric grad
|
||||||
|
eps = 1e-4
|
||||||
|
expected_hess = np.zeros((1, 2, 2))
|
||||||
|
for i in range(2):
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
TwoWheeledConfigModule.gradient_cost_fn_with_input(
|
||||||
|
xs, tmp_u)
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
TwoWheeledConfigModule.gradient_cost_fn_with_input(
|
||||||
|
xs, tmp_u)
|
||||||
|
|
||||||
|
expected_hess[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert cost_hess == pytest.approx(expected_hess)
|
|
@ -55,3 +55,63 @@ class TestCartPoleModel():
|
||||||
pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
|
pred_xs_alltogether = cartpole_model.predict_traj(curr_x, u)[0]
|
||||||
|
|
||||||
assert pred_xs_alltogether == pytest.approx(pred_xs)
|
assert pred_xs_alltogether == pytest.approx(pred_xs)
|
||||||
|
|
||||||
|
def test_gradient_state(self):
|
||||||
|
|
||||||
|
config = CartPoleConfigModule()
|
||||||
|
cartpole_model = CartPoleModel(config)
|
||||||
|
|
||||||
|
xs = np.ones((1, config.STATE_SIZE)) \
|
||||||
|
* np.random.rand(1, config.STATE_SIZE)
|
||||||
|
xs[0, -1] = np.pi / 6.
|
||||||
|
us = np.ones((1, config.INPUT_SIZE))
|
||||||
|
|
||||||
|
grad = cartpole_model.calc_f_x(xs, us, config.DT)
|
||||||
|
|
||||||
|
# expected cost
|
||||||
|
expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
|
||||||
|
eps = 1e-4
|
||||||
|
|
||||||
|
for i in range(config.STATE_SIZE):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
cartpole_model.predict_next_state(tmp_x[0], us[0])
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
cartpole_model.predict_next_state(tmp_x[0], us[0])
|
||||||
|
|
||||||
|
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_gradient_input(self):
|
||||||
|
|
||||||
|
config = CartPoleConfigModule()
|
||||||
|
cartpole_model = CartPoleModel(config)
|
||||||
|
|
||||||
|
xs = np.ones((1, config.STATE_SIZE)) \
|
||||||
|
* np.random.rand(1, config.STATE_SIZE)
|
||||||
|
xs[0, -1] = np.pi / 6.
|
||||||
|
us = np.ones((1, config.INPUT_SIZE))
|
||||||
|
|
||||||
|
grad = cartpole_model.calc_f_u(xs, us, config.DT)
|
||||||
|
|
||||||
|
# expected cost
|
||||||
|
expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
|
||||||
|
eps = 1e-4
|
||||||
|
|
||||||
|
for i in range(config.INPUT_SIZE):
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
cartpole_model.predict_next_state(xs[0], tmp_u[0])
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
cartpole_model.predict_next_state(xs[0], tmp_u[0])
|
||||||
|
|
||||||
|
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert grad == pytest.approx(expected_grad)
|
||||||
|
|
|
@ -40,3 +40,61 @@ class TestTwoWheeledModel():
|
||||||
pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
|
pred_xs_alltogether = two_wheeled_model.predict_traj(curr_x, u)[0]
|
||||||
|
|
||||||
assert pred_xs_alltogether == pytest.approx(pred_xs)
|
assert pred_xs_alltogether == pytest.approx(pred_xs)
|
||||||
|
|
||||||
|
def test_gradient_state(self):
|
||||||
|
|
||||||
|
config = TwoWheeledConfigModule()
|
||||||
|
two_wheeled_model = TwoWheeledModel(config)
|
||||||
|
|
||||||
|
xs = np.ones((1, config.STATE_SIZE))
|
||||||
|
xs[0, -1] = np.pi / 6.
|
||||||
|
us = np.ones((1, config.INPUT_SIZE))
|
||||||
|
|
||||||
|
grad = two_wheeled_model.calc_f_x(xs, us, config.DT)
|
||||||
|
|
||||||
|
# expected cost
|
||||||
|
expected_grad = np.zeros((1, config.STATE_SIZE, config.STATE_SIZE))
|
||||||
|
eps = 1e-4
|
||||||
|
|
||||||
|
for i in range(config.STATE_SIZE):
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
two_wheeled_model.predict_next_state(tmp_x[0], us[0])
|
||||||
|
tmp_x = xs.copy()
|
||||||
|
tmp_x[0, i] = xs[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
two_wheeled_model.predict_next_state(tmp_x[0], us[0])
|
||||||
|
|
||||||
|
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert grad == pytest.approx(expected_grad)
|
||||||
|
|
||||||
|
def test_gradient_input(self):
|
||||||
|
|
||||||
|
config = TwoWheeledConfigModule()
|
||||||
|
two_wheeled_model = TwoWheeledModel(config)
|
||||||
|
|
||||||
|
xs = np.ones((1, config.STATE_SIZE))
|
||||||
|
xs[0, -1] = np.pi / 6.
|
||||||
|
us = np.ones((1, config.INPUT_SIZE))
|
||||||
|
|
||||||
|
grad = two_wheeled_model.calc_f_u(xs, us, config.DT)
|
||||||
|
|
||||||
|
# expected cost
|
||||||
|
expected_grad = np.zeros((1, config.STATE_SIZE, config.INPUT_SIZE))
|
||||||
|
eps = 1e-4
|
||||||
|
|
||||||
|
for i in range(config.INPUT_SIZE):
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] + eps
|
||||||
|
forward = \
|
||||||
|
two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
|
||||||
|
tmp_u = us.copy()
|
||||||
|
tmp_u[0, i] = us[0, i] - eps
|
||||||
|
backward = \
|
||||||
|
two_wheeled_model.predict_next_state(xs[0], tmp_u[0])
|
||||||
|
|
||||||
|
expected_grad[0, :, i] = (forward - backward) / (2. * eps)
|
||||||
|
|
||||||
|
assert grad == pytest.approx(expected_grad)
|
||||||
|
|
Loading…
Reference in New Issue