0b9336333a3d37c097849ec1218f845a399499d4,python/baseline/reader.py,CONLLSeqReader,build_vocab,#CONLLSeqReader#Any#,279

Before Change


        print("Max sentence length %d" % self.max_sentence_length)
        print("Max word length %d" % self.max_word_length)

        return {"char": vocab_ch, "word": vocab_word }

    def read_lines(self, tsfile):

        txts = []

After Change


        vocabs = {}
        keys = self.extended_features.keys()
        for key in keys:
            vocabs[key] = Counter()

        maxw = 0
        maxs = 0
        for file in files:
            if file is None:
                continue

            sl = 0
            with codecs.open(file, encoding="utf-8", mode="r") as f:
                for line in f:

                    line = line.strip()
                    if line == "":
                        maxs = max(maxs, sl)
                        sl = 0

                    else:
                        states = re.split("\s", line)
                        sl += 1
                        w = states[0]
                        vocab_word[self.cleanup_fn(w)] += 1
                        maxw = max(maxw, len(w))
                        for k in w:
                            vocab_ch[k] += 1
                        for key, index in self.extended_features.items():
                            vocabs[key][states[index]] += 1

        self.max_word_length = min(maxw, self.max_word_length) if self.max_word_length > 0 else maxw
        self.max_sentence_length = min(maxs, self.max_sentence_length) if self.max_sentence_length > 0 else maxs
        print("Max sentence length %d" % self.max_sentence_length)
        print("Max word length %d" % self.max_word_length)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: dpressel/mead-baseline
Commit Name: 0b9336333a3d37c097849ec1218f845a399499d4
Time: 2018-01-10
Author: dpressel@gmail.com
File Name: python/baseline/reader.py
Class Name: CONLLSeqReader
Method Name: build_vocab


Project Name: dpressel/mead-baseline
Commit Name: babd47e5ba6aece8a54c52b0ecefeb662ac05fa2
Time: 2016-12-01
Author: dpressel@gmail.com
File Name: tag/tf/data.py
Class Name:
Method Name: conllBuildVocab


Project Name: JasonKessler/scattertext
Commit Name: fd0c8e2d5b5e778f5804028c010f78f6b404fdd1
Time: 2018-02-14
Author: jason.kessler@gmail.com
File Name: scattertext/features/PhraseMachinePhrases.py
Class Name: PhraseMachinePhrases
Method Name: get_feats