// NumPy has an np.argparition() method, however log(1000) is so small that
// sorting the whole array is simpler and fast enough.
top_indicies = np.argsort(predicted_scores_by_user, axis=1)[:, -_TOP_K:]
top_indicies = np.flip(top_indicies, axis=1)
// Both HR and NDCG vectorized computation takes advantage of the fact that if
// the positive example for a user is not in the top k, that index does not
// appear. That is to say: hit_ind.shape[0] <= num_users
hit_ind = np.argwhere(np.equal(top_indicies, 0))
hr = hit_ind.shape[0] / ncf_dataset.num_users
ndcg = np.sum(np.log(2) / np.log(hit_ind[:, 1] + 2)) / ncf_dataset.num_users
global_step = estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP)
eval_results = {
_HR_KEY: hr,
After Change
// Get predictions
predictions = estimator.predict(input_fn=pred_input_fn,
yield_single_examples=False)
predictions = list(predictions)
prediction_batches = [p[movielens.RATING_COLUMN] for p in predictions]
item_batches = [p[movielens.ITEM_COLUMN] for p in predictions]
// Reshape the predicted scores and items. Each user takes one row.
prediction_with_padding = np.concatenate(prediction_batches, axis=0)
predicted_scores_by_user = prediction_with_padding[
:ncf_dataset.num_users * (1 + rconst.NUM_EVAL_NEGATIVES)]\
.reshape(ncf_dataset.num_users, -1)
item_with_padding = np.concatenate(item_batches, axis=0)
items_by_user = item_with_padding[
:ncf_dataset.num_users * (1 + rconst.NUM_EVAL_NEGATIVES)]\
.reshape(ncf_dataset.num_users, -1)