f44cb644187ca69159fd79fb917077520c3ea031,estnltk/examples/ex01.py,,,#,18

Before Change



corp_path = os.path.join(AA_PATH, "tea_AA_03_1.tasak.xml")

corp = json.loads(json.dumps(parse_tei_corpus(corp_path)))

from estnltk.corpus import Corpus
from estnltk.morf import PyVabamorfAnalyzer
from estnltk.ner import NerTagger

analyzer = PyVabamorfAnalyzer()
tagger = NerTagger()

analyzer(corp, inplace=True)
corp = tagger(corp, inplace=False)

corp = Corpus.construct(corp)
pprint (zip(corp.lemmas, corp.labels))

After Change


from __future__ import unicode_literals, print_function

// Let"s define a sample document
text = """Keeletehnoloogia on arvutilingvistika praktiline pool.
Keeletehnoloogid kasutavad arvutilingvistikas välja töötatud 
teooriaid, et luua rakendusi (nt arvutiprogramme), 
mis võimaldavad inimkeelt arvuti abil töödelda ja mõista. 

Tänapäeval on keeletehnoloogia tuntumateks valdkondadeks 
masintõlge, arvutileksikoloogia, dialoogisüsteemid, 
kõneanalüüs ja kõnesüntees.
"""

// tokenize it using default tokenizer
from estnltk import Tokenizer
tokenizer = Tokenizer()
document = tokenizer.tokenize(text)

// tokenized results
print (document.word_texts)
print (document.sentence_texts)
print (document.paragraph_texts)
print (document.text)

// start and end positions of words, sentences and paragraphs
from pprint import pprint
pprint (list(zip(document.word_texts, document.word_spans)))
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: estnltk/estnltk
Commit Name: f44cb644187ca69159fd79fb917077520c3ea031
Time: 2014-11-28
Author: brainscauseminds@gmail.com
File Name: estnltk/examples/ex01.py
Class Name:
Method Name:


Project Name: chakki-works/doccano
Commit Name: 49d41416e440926f0a9a8243b4d77f6f5468efe9
Time: 2019-03-12
Author: light.tree.1.13@gmail.com
File Name: app/server/utils.py
Class Name: JsonHandler
Method Name: parse


Project Name: azavea/raster-vision
Commit Name: 221617f6e125bf6ab920bf7fc9b8c23c3aa9d147
Time: 2019-03-18
Author: lewfish@gmail.com
File Name: rastervision/data/label_store/chip_classification_geojson_store.py
Class Name: ChipClassificationGeoJSONStore
Method Name: save