f44cb644187ca69159fd79fb917077520c3ea031,estnltk/examples/ex01.py,,,#,18
Before Change
corp_path = os.path.join(AA_PATH, "tea_AA_03_1.tasak.xml")
corp = json.loads(json.dumps(parse_tei_corpus(corp_path)))
from estnltk.corpus import Corpus
from estnltk.morf import PyVabamorfAnalyzer
from estnltk.ner import NerTagger
analyzer = PyVabamorfAnalyzer()
tagger = NerTagger()
analyzer(corp, inplace=True)
corp = tagger(corp, inplace=False)
corp = Corpus.construct(corp)
pprint (zip(corp.lemmas, corp.labels))
After Change
from __future__ import unicode_literals, print_function
// Let"s define a sample document
text = """Keeletehnoloogia on arvutilingvistika praktiline pool.
Keeletehnoloogid kasutavad arvutilingvistikas välja töötatud
teooriaid, et luua rakendusi (nt arvutiprogramme),
mis võimaldavad inimkeelt arvuti abil töödelda ja mõista.
Tänapäeval on keeletehnoloogia tuntumateks valdkondadeks
masintõlge, arvutileksikoloogia, dialoogisüsteemid,
kõneanalüüs ja kõnesüntees.
"""
// tokenize it using default tokenizer
from estnltk import Tokenizer
tokenizer = Tokenizer()
document = tokenizer.tokenize(text)
// tokenized results
print (document.word_texts)
print (document.sentence_texts)
print (document.paragraph_texts)
print (document.text)
// start and end positions of words, sentences and paragraphs
from pprint import pprint
pprint (list(zip(document.word_texts, document.word_spans)))
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: estnltk/estnltk
Commit Name: f44cb644187ca69159fd79fb917077520c3ea031
Time: 2014-11-28
Author: brainscauseminds@gmail.com
File Name: estnltk/examples/ex01.py
Class Name:
Method Name:
Project Name: chakki-works/doccano
Commit Name: 49d41416e440926f0a9a8243b4d77f6f5468efe9
Time: 2019-03-12
Author: light.tree.1.13@gmail.com
File Name: app/server/utils.py
Class Name: JsonHandler
Method Name: parse
Project Name: azavea/raster-vision
Commit Name: 221617f6e125bf6ab920bf7fc9b8c23c3aa9d147
Time: 2019-03-18
Author: lewfish@gmail.com
File Name: rastervision/data/label_store/chip_classification_geojson_store.py
Class Name: ChipClassificationGeoJSONStore
Method Name: save