2fdabb14fd48534243b2e472995f4bec432ed493,textacy/constants.py,,,#,10

Before Change


EMAIL_REGEX = re.compile(r"(?:^|(?<=[^\w@.)]))([\w+-](\.(?!\.))?)*?[\w+-]@(?:\w-?)*?\w+(\.([a-z]{2,})){1,3}(?:$|(?=\b))", flags=re.IGNORECASE | re.UNICODE)
PHONE_REGEX = re.compile(r"(?:^|(?<=[^\w)]))(\+?1[ .-]?)?(\(?\d{3}\)?[ .-]?)?\d{3}[ .-]?\d{4}(\s?(?:ext\.?|[//x-])\s?\d{2,6})?(?:$|(?=\W))")
NUMBERS_REGEX = re.compile(r"(?:^|(?<=[^\w,.]))[+–-]?(([1-9]\d{0,2}(,\d{3})+(\.\d*)?)|([1-9]\d{0,2}([ .]\d{3})+(,\d*)?)|(\d*?[.,]\d+)|\d+)(?:$|(?=\b))")
PUNCT_REGEX = re.compile("[{0}]+".format(re.escape(string.punctuation)))
CURRENCY_REGEX = re.compile("[{0}]+".format("".join(CURRENCIES.keys())))
LINEBREAK_REGEX = re.compile(r"((\r\n)|[\n\v])+")
NONBREAKING_SPACE_REGEX = re.compile(r"(?!\n)\s+")

After Change


           "VP": r"<AUX>* <ADV>* <VERB>"}
    }

PUNCT_TRANSLATE_UNICODE = dict.fromkeys(
    i for i in range(sys.maxunicode)
    if unicodedata.category(chr_(i)).startswith("P"))
PUNCT_TRANSLATE_BYTES = b"".join(
    chr_(i).encode("utf8") for i in PUNCT_TRANSLATE_UNICODE.keys())

ACRONYM_REGEX = re.compile(r"(?:^|(?<=\W))(?:(?:(?:(?:[A-Z]\.?)+[a-z0-9&/-]?)+(?:[A-Z][s.]?|[0-9]s?))|(?:[0-9](?:\-?[A-Z])+))(?:$|(?=\W))", flags=re.UNICODE)
EMAIL_REGEX = re.compile(r"(?:^|(?<=[^\w@.)]))([\w+-](\.(?!\.))?)*?[\w+-]@(?:\w-?)*?\w+(\.([a-z]{2,})){1,3}(?:$|(?=\b))", flags=re.IGNORECASE | re.UNICODE)
PHONE_REGEX = re.compile(r"(?:^|(?<=[^\w)]))(\+?1[ .-]?)?(\(?\d{3}\)?[ .-]?)?\d{3}[ .-]?\d{4}(\s?(?:ext\.?|[//x-])\s?\d{2,6})?(?:$|(?=\W))")
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: chartbeat-labs/textacy
Commit Name: 2fdabb14fd48534243b2e472995f4bec432ed493
Time: 2017-05-07
Author: burton@chartbeat.com
File Name: textacy/constants.py
Class Name:
Method Name:


Project Name: nipunsadvilkar/pySBD
Commit Name: 026aefb394b2cffbf8b77cbb580adadf4513258d
Time: 2019-11-11
Author: daniel@allenai.org
File Name: pysbd/abbreviation_replacer.py
Class Name:
Method Name: replace_abbreviation_as_sentence_boundary


Project Name: hellohaptik/chatbot_ner
Commit Name: 178424dffc5608f527b05e00d977a10dbdf6d125
Time: 2018-12-13
Author: amans.rlx@gmail.com
File Name: ner_v2/detectors/numeral/number/standard_number_detector.py
Class Name: BaseNumberDetector
Method Name: _detect_number_from_digit