def get_encoding(page):
// Regex for XML and HTML Meta charset declaration
charset_re = re.compile(r"<meta.*?charset=["\"]*(.+?)["\">]", flags=re.I)
pragma_re = re.compile(r"<meta.*?content=["\"]*;?charset=(.+?)["\">]", flags=re.I)
xml_re = re.compile(r"^<\?xml.*?encoding=["\"]*(.+?)["\">]")
declared_encodings = (charset_re.findall(page) +
pragma_re.findall(page) +
After Change
// Now let"s decode the page
page.decode()
// It worked!
return encoding
except UnicodeDecodeError:
pass
// Fallback to chardet if declared encodings fail