1b43ea3d9f7db02075536d9578891af06e324b9a,scattertext/WhitespaceNLP.py,,_toks_from_sentence,#Any#Any#Any#,62

Before Change


	toks = []
	for tok in re.split(r"(\W)", doc):
		pos = "WORD"
		if tok.strip() == "":
			pos = "SPACE"
		elif re.match("^\W+$", tok):
			pos = "PUNCT"
		toks.append(Tok(pos,
		                tok[:2].lower(),
		                tok.lower(),
		                ent_type="" if entity_type is None else entity_type.get(tok, ""),

After Change


def _toks_from_sentence(doc, entity_type, tag_type):
	toks = []
	for tok in re.split(r"(\W)", doc):
		if len(tok) > 0:
			toks.append(Tok(_get_pos_tag(tok),
			                tok[:2].lower(),
			                tok.lower(),
			                ent_type="" if entity_type is None else entity_type.get(tok, ""),
			                tag="" if tag_type is None else tag_type.get(tok, "")))
	return toks


def tweet_tokenzier_factory(tweet_tokenizer):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: JasonKessler/scattertext
Commit Name: 1b43ea3d9f7db02075536d9578891af06e324b9a
Time: 2017-12-04
Author: jason.kessler@gmail.com
File Name: scattertext/WhitespaceNLP.py
Class Name:
Method Name: _toks_from_sentence


Project Name: streamlit/streamlit
Commit Name: 30d53141553f9514d2d5474963abe1f1ad084ffa
Time: 2019-10-08
Author: thiago@streamlit.io
File Name: lib/streamlit/util.py
Class Name:
Method Name: open_browser


Project Name: JasonKessler/scattertext
Commit Name: 1b43ea3d9f7db02075536d9578891af06e324b9a
Time: 2017-12-04
Author: jason.kessler@gmail.com
File Name: scattertext/WhitespaceNLP.py
Class Name:
Method Name: whitespace_nlp_with_sentences