b0c57631bfc686c9f5572681750c305cfc9754af,snippets/ch05/ner.py,,,#,5
Before Change
import nltk
from nltk.corpus.reader.plaintext import CategorizedPlaintextCorpusReader
path = os.path.join(os.getcwd(), "debates")
DOC_PATTERN = r"(?!\.)[\w_\s]+/[\w\s\d\-]+\.txt"
CAT_PATTERN = r"([\w_\s]+)/.*"
corpus = CategorizedPlaintextCorpusReader(
path, DOC_PATTERN, cat_pattern=CAT_PATTERN)
def tag_corpus(corpus):
return [nltk.pos_tag(sent) for sent in corpus.sents()]
After Change
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
GOODLABELS = frozenset(["PERSON", "ORGANIZATION", "FACILITY", "GPE", "GSP"])
// GPE is Geo-Political Entity, GSP is Geo-Socio-Political group
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances
Project Name: foxbook/atap
Commit Name: b0c57631bfc686c9f5572681750c305cfc9754af
Time: 2018-03-08
Author: bilbro@gmail.com
File Name: snippets/ch05/ner.py
Class Name:
Method Name:
Project Name: epfl-lts2/pygsp
Commit Name: 65429420b0a4c75b0fa57073a5644e09706bf0f0
Time: 2017-08-11
Author: michael.defferrard@epfl.ch
File Name: doc/conf.py
Class Name:
Method Name:
Project Name: GoogleCloudPlatform/python-docs-samples
Commit Name: 3397e6bfd55584c62d64911ecdaf588ca3469b79
Time: 2020-04-01
Author: 31518063+kurtisvg@users.noreply.github.com
File Name: noxfile-template.py
Class Name:
Method Name: