estnltk.estner.ner.Document
A ner document.
estnerdoc = Document()
for json_sent in jsondoc.split_by_sentences():
snt = Sentence()
zipped = list(zip(
json_sent.word_texts,
json_sent.lemmas,
json_sent.root_tokens,
json_sent.forms,
json_sent.endings,
json_sent.postags))
json_toks = [{TEXT: text, LEMMA: lemma, ROOT_TOKENS: root_tokens, FORM: form, ENDING: ending, POSTAG: postag}
for text, lemma, root_tokens, form, ending, postag in zipped]
// add labels, if they are present
for tok, word in zip(json_toks, json_sent.words):
if LABEL in word:
tok[LABEL] = word[LABEL]
for json_tok in json_toks:
token = json_token_to_estner_token(json_tok)
snt.append(token)
estnerdoc.tokens.append(token)
if snt:
for i in range(1, len(snt)):
snt[i-1].next = snt[i]
snt[i].prew = snt[i-1]
estnerdoc.snts.append(snt)
return estnerdoc
After Change
snt[i - 1].next = snt[i]
snt[i].prew = snt[i - 1]
sentences.append(snt)
return Document(sentences=sentences)
def json_token_to_estner_token(json_token):
Convert a JSON-style word token to an estner token.