f44cb644187ca69159fd79fb917077520c3ea031,estnltk/examples/ex01.py,,,#,18

Before Change



corp_path = os.path.join(AA_PATH, "tea_AA_03_1.tasak.xml")

corp = json.loads(json.dumps(parse_tei_corpus(corp_path)))

from estnltk.corpus import Corpus
from estnltk.morf import PyVabamorfAnalyzer
from estnltk.ner import NerTagger

analyzer = PyVabamorfAnalyzer()
tagger = NerTagger()

analyzer(corp, inplace=True)
corp = tagger(corp, inplace=False)

corp = Corpus.construct(corp)
pprint (zip(corp.lemmas, corp.labels))

After Change


// tokenize it using default tokenizer
from estnltk import Tokenizer
tokenizer = Tokenizer()
document = tokenizer.tokenize(text)

// tokenized results
print (document.word_texts)
print (document.sentence_texts)
print (document.paragraph_texts)
print (document.text)

// start and end positions of words, sentences and paragraphs
from pprint import pprint
pprint (list(zip(document.word_texts, document.word_spans)))
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: estnltk/estnltk
Commit Name: f44cb644187ca69159fd79fb917077520c3ea031
Time: 2014-11-28
Author: brainscauseminds@gmail.com
File Name: estnltk/examples/ex01.py
Class Name:
Method Name:


Project Name: tensorflow/tensorboard
Commit Name: 21491193a9943e93c94a5bf85bf608d43585eb86
Time: 2019-07-17
Author: wchargin@gmail.com
File Name: tensorboard/plugins/text/text_plugin.py
Class Name: TextPlugin
Method Name: index_impl


Project Name: chakki-works/doccano
Commit Name: 49d41416e440926f0a9a8243b4d77f6f5468efe9
Time: 2019-03-12
Author: light.tree.1.13@gmail.com
File Name: app/server/utils.py
Class Name: JsonHandler
Method Name: parse