num_seq_probs = sum(x is not None for x in merged_logprobs)
num_probs += num_seq_probs
// number of <unk>"s (just for reporting)
num_unks += len(merged_logprobs) - num_seq_probs
// number of sequences
num_sentences += 1
if word_level:
After Change
num_words = 0
num_probs = 0
num_unks = 0
num_zeroprobs = 0
for word_ids, words, mask in scoring_iter:
class_ids, membership_probs = vocabulary.get_class_memberships(word_ids)
logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
mask)
for seq_index, seq_logprobs in enumerate(logprobs):
seq_word_ids = word_ids[:, seq_index]
seq_mask = mask[:, seq_index]
seq_word_ids = seq_word_ids[seq_mask == 1]
seq_words = words[seq_index]
merged_words, merged_logprobs = _merge_subwords(seq_words,
seq_logprobs,
subword_marking)
// total logprob of this sequence
seq_logprob = sum(lp for lp in merged_logprobs
if (lp is not None) and (not numpy.isneginf(lp)))
// total logprob of all sequences
total_logprob += seq_logprob
// number of tokens, which may be subwords, including <unk>"s
num_tokens += len(seq_word_ids)
// number of words, including <s>"s and <unk>"s
num_words += len(merged_words)
// number of word probabilities computed (may not include <unk>"s)
num_seq_probs = sum((lp is not None) and (not numpy.isneginf(lp))
for lp in merged_logprobs)
num_probs += num_seq_probs
// number of unks and zeroprobs (just for reporting)
num_unks += sum(lp is None for lp in merged_logprobs)
num_zeroprobs += sum(numpy.isneginf(lp) for lp in merged_logprobs)
// number of sequences
num_sentences += 1
if word_level: