1d4823c0ec446e93d00df8ca654db4b45b63b3d4,rllib/models/tests/test_distributions.py,TestDistributions,test_squashed_gaussian,#TestDistributions#,102
Before Change
def test_squashed_gaussian(self):
Tests the SquashedGaussia ActionDistribution (tf-eager only).
with eager_mode():
input_space = Box(-1.0, 1.0, shape=(200, 10))
low, high = -2.0, 1.0
After Change
def test_squashed_gaussian(self):
Tests the SquashedGaussia ActionDistribution (tf-eager only).
for fw, sess in framework_iterator(
frameworks=["tf", "eager"], session=True):
input_space = Box(-1.0, 1.0, shape=(200, 10))
low, high = -2.0, 1.0
// Batch of size=n and deterministic.
inputs = input_space.sample()
means, _ = np.split(inputs, 2, axis=-1)
squashed_distribution = SquashedGaussian(
inputs, {}, low=low, high=high)
expected = ((np.tanh(means) + 1.0) / 2.0) * (high - low) + low
// Sample n times, expect always mean value (deterministic draw).
out = squashed_distribution.deterministic_sample()
check(out, expected)
// Batch of size=n and non-deterministic -> expect roughly the mean.
inputs = input_space.sample()
means, log_stds = np.split(inputs, 2, axis=-1)
squashed_distribution = SquashedGaussian(
inputs, {}, low=low, high=high)
expected = ((np.tanh(means) + 1.0) / 2.0) * (high - low) + low
values = squashed_distribution.sample()
if sess:
values = sess.run(values)
self.assertTrue(np.max(values) < high)
self.assertTrue(np.min(values) > low)
check(np.mean(values), expected.mean(), decimals=1)
// Test log-likelihood outputs.
sampled_action_logp = squashed_distribution.logp(values)
if sess:
sampled_action_logp = sess.run(sampled_action_logp)
// Convert to parameters for distr.
stds = np.exp(
np.clip(log_stds, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT))
// Unsquash values, then get log-llh from regular gaussian.
unsquashed_values = np.arctanh((values - low) /
(high - low) * 2.0 - 1.0)
log_prob_unsquashed = \
np.sum(np.log(norm.pdf(unsquashed_values, means, stds)), -1)
log_prob = log_prob_unsquashed - \
np.sum(np.log(1 - np.tanh(unsquashed_values) ** 2),
axis=-1)
check(np.mean(sampled_action_logp), np.mean(log_prob), rtol=0.01)
// NN output.
means = np.array([[0.1, 0.2, 0.3, 0.4, 50.0],
[-0.1, -0.2, -0.3, -0.4, -1.0]])
log_stds = np.array([[0.8, -0.2, 0.3, -1.0, 2.0],
[0.7, -0.3, 0.4, -0.9, 2.0]])
squashed_distribution = SquashedGaussian(
np.concatenate([means, log_stds], axis=-1), {},
low=low,
high=high)
// Convert to parameters for distr.
stds = np.exp(log_stds)
// Values to get log-likelihoods for.
values = np.array([[0.9, 0.2, 0.4, -0.1, -1.05],
[-0.9, -0.2, 0.4, -0.1, -1.05]])
// Unsquash values, then get log-llh from regular gaussian.
unsquashed_values = np.arctanh((values - low) /
(high - low) * 2.0 - 1.0)
log_prob_unsquashed = \
np.sum(np.log(norm.pdf(unsquashed_values, means, stds)), -1)
log_prob = log_prob_unsquashed - \
np.sum(np.log(1 - np.tanh(unsquashed_values) ** 2),
axis=-1)
outs = squashed_distribution.logp(values)
if sess:
outs = sess.run(outs)
check(outs, log_prob)
def test_gumbel_softmax(self):
Tests the GumbelSoftmax ActionDistribution (tf-eager only).
for fw, sess in framework_iterator(
frameworks=["tf", "eager"], session=True):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: ray-project/ray
Commit Name: 1d4823c0ec446e93d00df8ca654db4b45b63b3d4
Time: 2020-04-03
Author: sven@anyscale.io
File Name: rllib/models/tests/test_distributions.py
Class Name: TestDistributions
Method Name: test_squashed_gaussian
Project Name: ray-project/ray
Commit Name: 1d4823c0ec446e93d00df8ca654db4b45b63b3d4
Time: 2020-04-03
Author: sven@anyscale.io
File Name: rllib/agents/dqn/tests/test_dqn.py
Class Name: TestDQN
Method Name: test_dqn_compilation
Project Name: ray-project/ray
Commit Name: 1d4823c0ec446e93d00df8ca654db4b45b63b3d4
Time: 2020-04-03
Author: sven@anyscale.io
File Name: rllib/models/tests/test_distributions.py
Class Name: TestDistributions
Method Name: test_gumbel_softmax