b0c57631bfc686c9f5572681750c305cfc9754af,snippets/ch05/ner.py,,,#,5

Before Change


import nltk
from nltk.corpus.reader.plaintext import CategorizedPlaintextCorpusReader

path = os.path.join(os.getcwd(), "debates")

DOC_PATTERN = r"(?!\.)[\w_\s]+/[\w\s\d\-]+\.txt"
CAT_PATTERN = r"([\w_\s]+)/.*"

corpus = CategorizedPlaintextCorpusReader(
    path, DOC_PATTERN, cat_pattern=CAT_PATTERN)


def tag_corpus(corpus):
    return [nltk.pos_tag(sent) for sent in corpus.sents()]

After Change


from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

GOODLABELS = frozenset(["PERSON", "ORGANIZATION", "FACILITY", "GPE", "GSP"])
// GPE is Geo-Political Entity, GSP is Geo-Socio-Political group

Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 3

Instances


Project Name: foxbook/atap
Commit Name: b0c57631bfc686c9f5572681750c305cfc9754af
Time: 2018-03-08
Author: bilbro@gmail.com
File Name: snippets/ch05/ner.py
Class Name:
Method Name:


Project Name: epfl-lts2/pygsp
Commit Name: 65429420b0a4c75b0fa57073a5644e09706bf0f0
Time: 2017-08-11
Author: michael.defferrard@epfl.ch
File Name: doc/conf.py
Class Name:
Method Name:


Project Name: GoogleCloudPlatform/python-docs-samples
Commit Name: 3397e6bfd55584c62d64911ecdaf588ca3469b79
Time: 2020-04-01
Author: 31518063+kurtisvg@users.noreply.github.com
File Name: noxfile-template.py
Class Name:
Method Name: