8b5a8bedaeb5e8bc35878d7ae39430a5c285c9ec,calamari_ocr/ocr/datasets/abbyy_dataset/dataset.py,AbbyyDataSet,_load_sample,#AbbyyDataSet#Any#Any#,68

Before Change


        if self.mode == DataSetMode.EVAL or self.mode == DataSetMode.TRAIN:
            text = sample["format"].text

        if text_only:
            return img, text

        if self.mode == DataSetMode.TRAIN or self.mode == DataSetMode.PREDICT:
            img = np.array(Image.open(image_path))

            ly, lx = img.shape

            // Cut the Image
            img = img[line.rect.top: -ly + line.rect.bottom, line.rect.left: -lx + line.rect.right]

            // add padding as required from normal files
            img = np.pad(img, ((3, 3), (0, 0)), mode="constant", constant_values=img.max())

            Binarize Image
            if self.binary:
                img = img > 0.9

        return img, text

    def store_text(self, sentence, sample, output_dir, extension):

After Change


                    if text_only:
                        yield cut_img, text

                    else:
                        if self.mode == DataSetMode.TRAIN or self.mode == DataSetMode.PREDICT:
                            ly, lx = img.shape

                            // Cut the Image
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: Calamari-OCR/calamari
Commit Name: 8b5a8bedaeb5e8bc35878d7ae39430a5c285c9ec
Time: 2019-04-08
Author: wick.chr.info@gmail.com
File Name: calamari_ocr/ocr/datasets/abbyy_dataset/dataset.py
Class Name: AbbyyDataSet
Method Name: _load_sample


Project Name: etal/cnvkit
Commit Name: 203ed6d1fda8de579c2bf311e5890dca05c14985
Time: 2016-12-10
Author: eric.talevich@gmail.com
File Name: cnvlib/target.py
Class Name:
Method Name: shorten_labels


Project Name: shubhomoydas/ad_examples
Commit Name: 1799d797fa39516d94208137e586c09c003a4763
Time: 2018-04-02
Author: smd.shubhomoydas@gmail.com
File Name: python/common/timeseries_datasets.py
Class Name: TSeries
Method Name: get_shingles