corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)
assert session.query(Document).count() == max_docs
assert session.query(Sentence).count() == 828
sents = session.query(Sentence).all()
// Doesn"t matter which sentence we grab, since we are overwriting the// relevant attribssent = sents[0]
sent.text = "New-Text"
sent.words = ["New-Text"]
sent.char_offsets = [0]
sent.abs_char_offsets = [0]
After Change
// When a text ends with a split_token.
sent.text = "New-"
sent.words = ["New-"]
result = list(ngrams.apply(sent))assertlen(result)== 2assert result[0].get_span() == "New-"assert result[1].get_span() == "New"