aee018f39c813bb3ff12f67f19022d5c584f629b,vendor/readability/encoding.py,,get_encoding,#Any#,4

Before Change



def get_encoding(page):
    // Regex for XML and HTML Meta charset declaration
    charset_re = re.compile(r"<meta.*?charset=["\"]*(.+?)["\">]", flags=re.I)
    pragma_re = re.compile(r"<meta.*?content=["\"]*;?charset=(.+?)["\">]", flags=re.I)
    xml_re = re.compile(r"^<\?xml.*?encoding=["\"]*(.+?)["\">]")

    declared_encodings = (charset_re.findall(page) +

After Change



    // Fallback to chardet if declared encodings fail
    // Remove all HTML tags, and leave only text for chardet
    text = re.sub(b"(\s*</?[^>]*>)+\s*", b" ", page).strip()
    enc = "utf-8"
    if len(text) < 10:
        return enc // can"t guess
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 3

Instances


Project Name: samuelclay/NewsBlur
Commit Name: aee018f39c813bb3ff12f67f19022d5c584f629b
Time: 2017-01-25
Author: samuel@ofbrooklyn.com
File Name: vendor/readability/encoding.py
Class Name:
Method Name: get_encoding


Project Name: estnltk/estnltk
Commit Name: 74b2ba903ecf195c2f6abf1432b8fa7240be0f08
Time: 2015-07-23
Author: amatsin@gmail.com
File Name: estnltk/wiki/infoBox.py
Class Name:
Method Name: infoBoxParser


Project Name: studioml/studio
Commit Name: 912ed846e7f0099335773635fa6868d30b50eccd
Time: 2017-07-14
Author: peter.zhokhov@sentient.ai
File Name: studio/runner.py
Class Name:
Method Name: add_packages