corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)
assert session.query(Document).count() == max_docs
assert session.query(Sentence).count() == 828
sents = session.query(Sentence).all()
// Doesn"t matter which sentence we grab, since we are overwriting the
// relevant attribs
sent = sents[0]
sent.text = "New-Text"
sent.words = ["New-Text"]
sent.char_offsets = [0]
sent.abs_char_offsets = [0]
After Change
// When a text ends with a split_token.
sent.text = "New-"
sent.words = ["New-"]
result = list(ngrams.apply(sent))
assert len(result) == 2
assert result[0].get_span() == "New-"
assert result[1].get_span() == "New"