doc = etree.parse(fp)
image = doc.find(".//{*}Page")
if image is None or image.get("imageFilename") is None:
raise KrakenInputException("No valid filename found in PageXML file")
lines = doc.findall(".//{*}TextLine")
data = {"image": os.path.join(base_dir, image.get("imageFilename")), "lines": []}
for line in lines:
pol = line.find("./{*}Coords")
After Change
raise KrakenInputException("Parsing {} failed: {}".format(filename, e))
image = doc.find(".//{*}Page")
if image is None or image.get("imageFilename") is None:
raise KrakenInputException("No valid image filename found in PageXML file {}".format(filename))
lines = doc.findall(".//{*}TextLine")
data = {"image": os.path.join(base_dir, image.get("imageFilename")), "lines": []}
for line in lines:
pol = line.find("./{*}Coords")