num_time_steps = layer_input.shape[0]
num_sequences = layer_input.shape[1]
num_samples = self.network.num_noise_samples
num_classes = self.network.vocabulary.num_classes()
// Sample k noise words from unigram distribution. These are shared
// across mini-batch. We need to repeat the distribution as many times as
// we want samples, because multinomial() does not yet use the size
After Change
num_time_steps = layer_input.shape[0]
num_sequences = layer_input.shape[1]
num_samples = self.network.num_noise_samples
num_classes = numpy.int64(self.network.vocabulary.num_classes())
random = self.network.random
// Sample k noise words from unigram distribution. These are shared
// across mini-batch. We need to repeat the distribution as many times as
// we want samples, because multinomial() does not yet use the size
// argument.
if self.network.noise_probs is None:
// The upper bound is exclusive, so this always creates samples that
// are < num_classes
sample = random.uniform((num_samples,)) * num_classes
sample = sample.astype("int64")
else:
class_probs = self.network.noise_probs[None, :]
class_probs = tensor.tile(class_probs, [num_samples, 1])
sample = self.network.random.multinomial(pvals=class_probs)
sample = sample.argmax(1)
self.shared_sample_logprobs = \
self._get_target_list_preact(layer_input, sample)
self.shared_sample = sample
// Sample k noise words per training word from unigram distribution.
// multinomial() is only implemented for dimension <= 2, so we"ll create
// a 2-dimensional probability distribution and then reshape the result.
num_batch_samples = num_time_steps * num_sequences * num_samples
if self.network.noise_probs is None:
// The upper bound is exclusive, so this always creates samples that
// are < num_classes
sample = random.uniform((num_batch_samples,)) * num_classes
sample = sample.astype("int64")
else:
class_probs = self.network.noise_probs[None, :]
class_probs = tensor.tile(class_probs, [num_batch_samples, 1])