bd334ef20fdccb74d310ca00b1134388645ba0a5,vendor/readability/encoding.py,,get_encoding,#Any#,4

Before Change


    if not text.strip() or len(text) < 10:
        return enc // can"t guess
    try:
        diff = text.decode(enc, "ignore").encode(enc)
        sizes = len(diff), len(text)
        if abs(len(text) - len(diff)) < max(sizes) * 0.01: // 99% of utf-8
            return enc
    except UnicodeDecodeError:
        pass
    res = chardet.detect(text)
    enc = res["encoding"]

After Change


    pragma_re = re.compile(r"<meta.*?content=["\"]*;?charset=(.+?)["\">]", flags=re.I)
    xml_re = re.compile(r"^<\?xml.*?encoding=["\"]*(.+?)["\">]")

    declared_encodings = (charset_re.findall(page) +
            pragma_re.findall(page) +
            xml_re.findall(page))

    // Try any declared encodings
    if len(declared_encodings) > 0:
        for declared_encoding in declared_encodings:
            try:
                page.decode(custom_decode(declared_encoding))
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: samuelclay/NewsBlur
Commit Name: bd334ef20fdccb74d310ca00b1134388645ba0a5
Time: 2014-07-21
Author: samuel@ofbrooklyn.com
File Name: vendor/readability/encoding.py
Class Name:
Method Name: get_encoding


Project Name: chakki-works/doccano
Commit Name: 49d41416e440926f0a9a8243b4d77f6f5468efe9
Time: 2019-03-12
Author: light.tree.1.13@gmail.com
File Name: app/server/utils.py
Class Name: CoNLLHandler
Method Name: parse


Project Name: sony/nnabla
Commit Name: 86bcfb42aaa66a4a4da18fbba062bcda23d2b135
Time: 2021-03-08
Author: Yoshiyuki.Kobayashi@sony.com
File Name: python/src/nnabla/utils/data_source_implements.py
Class Name: CsvDataSource
Method Name: __init__