6e8898b82f4591fe2256ca56be3c98836fe94b88,simhash/__init__.py,Simhash,__init__,#Simhash#Any#Any#Any#Any#,25

Before Change


            self.value = value.value
        elif isinstance(value, basestring):
            self.build_by_text(unicode(value))
        elif isinstance(value, collections.Iterable):
            self.build_by_features(value)
        elif isinstance(value, long):
            self.value = value
        else:
            raise Exception("Bad parameter with type {}".format(type(value)))

    def _slide(self, content, width=4):
        return [content[i:i + width] for i in range(max(len(content) - width + 1, 1))]

After Change


        count = len(objs)

        if log is None:
            self.log = logging.getLogger("simhash")
        else:
            self.log = log

        self.log.info("Initializing %s data.", count)

        self.bucket = collections.defaultdict(set)

        for i, q in enumerate(objs):
            if i % 10000 == 0 or i == count - 1:
                self.log.info("%s/%s", i + 1, count)

            self.add(*q)

    def get_near_dups(self, simhash):
        
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: leonsim/simhash
Commit Name: 6e8898b82f4591fe2256ca56be3c98836fe94b88
Time: 2017-10-31
Author: akellne@users.noreply.github.com
File Name: simhash/__init__.py
Class Name: Simhash
Method Name: __init__


Project Name: neurosynth/neurosynth
Commit Name: edb300280cd7854bb58e023d01ac24160bf6d98d
Time: 2014-11-07
Author: tyarkoni@gmail.com
File Name: neurosynth/analysis/cluster.py
Class Name: Clusterer
Method Name: __init__


Project Name: biolab/orange3
Commit Name: 976f9ea013348f6885114f0dae092e03dd0d1022
Time: 2018-08-02
Author: rafael@irgolic.com
File Name: Orange/canvas/document/schemeedit.py
Class Name: SchemeEditWidget
Method Name: __nodeInsert