8b5a8bedaeb5e8bc35878d7ae39430a5c285c9ec,calamari_ocr/ocr/datasets/abbyy_dataset/dataset.py,AbbyyDataSet,_load_sample,#AbbyyDataSet#Any#Any#,68

Before Change


            ly, lx = img.shape

            // Cut the Image
            img = img[line.rect.top: -ly + line.rect.bottom, line.rect.left: -lx + line.rect.right]

            // add padding as required from normal files
            img = np.pad(img, ((3, 3), (0, 0)), mode="constant", constant_values=img.max())

            Binarize Image
            if self.binary:
                img = img > 0.9

        return img, text

    def store_text(self, sentence, sample, output_dir, extension):

After Change


            if self.binary:
                img = img > 0.9
        else:
            img = None

        for p, page in enumerate(book.pages):
            for l, line in enumerate(page.getLines()):
                for f, fo in enumerate(line.formats):
                    text = None
                    cut_img = None
                    if self.mode == DataSetMode.EVAL or self.mode == DataSetMode.TRAIN:
                        text = fo.text

                    if text_only:
                        yield cut_img, text

                    else:
                        if self.mode == DataSetMode.TRAIN or self.mode == DataSetMode.PREDICT:
                            ly, lx = img.shape
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: Calamari-OCR/calamari
Commit Name: 8b5a8bedaeb5e8bc35878d7ae39430a5c285c9ec
Time: 2019-04-08
Author: wick.chr.info@gmail.com
File Name: calamari_ocr/ocr/datasets/abbyy_dataset/dataset.py
Class Name: AbbyyDataSet
Method Name: _load_sample


Project Name: IndicoDataSolutions/finetune
Commit Name: 42e1e47ff386be77b740b64771644ea7aa875880
Time: 2020-02-26
Author: benlt@hotmail.co.uk
File Name: finetune/target_models/masked_language_model.py
Class Name: MaskedLanguageModelPipeline
Method Name: text_to_tokens_mask


Project Name: fgnt/nara_wpe
Commit Name: 09204a3dce2b419cd1416504903c41c724fa8de8
Time: 2018-06-27
Author: mail@lukas-drude.de
File Name: nara_wpe/wpe.py
Class Name:
Method Name: segment_axis