3e2ebf76a530eca5305d1feba6be9c531ee33d71,gensim/corpora/hashdictionary.py,HashDictionary,filter_extremes,#HashDictionary#Any#Any#Any#,172

Before Change


        no_above_abs = int(no_above * self.num_docs) // convert fractional threshold to absolute threshold

        // statistics of which ids do we keep?
        good_ids = (hash_id for hash_id in self.keys() if no_below <= self.dfs.get(hash_id, 0) <= no_above_abs)

        if keep_n is not None:
            good_ids = sorted(good_ids, key=lambda item: self.dfs.get(item, 0), reverse=True)
            good_ids = good_ids[:keep_n]
        good_ids = set(good_ids)

        self.id2token = dict((tokenid, freq) for tokenid, freq in self.id2token.iteritems() if tokenid in good_ids)

After Change


        
        no_above_abs = int(no_above * self.num_docs) // convert fractional threshold to absolute threshold

        self.dfs_debug = dict((word, freq) for word, freq in self.dfs_debug.iteritems() if no_below <= freq <= no_above_abs)
        self.token2id = dict((token, tokenid) for token, tokenid in self.token2id.iteritems() if token in self.dfs_debug)
        self.id2token = dict((tokenid, set(token for token in tokens if token in self.dfs_debug)) for tokenid, tokens in self.id2token.iteritems())
        self.dfs = dict((tokenid, freq) for tokenid, freq in self.dfs.iteritems() if self.id2token.get(tokenid, set()))
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 8

Instances


Project Name: RaRe-Technologies/gensim
Commit Name: 3e2ebf76a530eca5305d1feba6be9c531ee33d71
Time: 2012-08-19
Author: radimrehurek@seznam.cz
File Name: gensim/corpora/hashdictionary.py
Class Name: HashDictionary
Method Name: filter_extremes


Project Name: sentinel-hub/eo-learn
Commit Name: e33574dcf176ecb0839cd6ffa2f0133083f5bf59
Time: 2020-02-05
Author: jovan.visnjic@sinergise.com
File Name: io/eolearn/io/processing_api.py
Class Name: SentinelHubInputTask
Method Name: __init__


Project Name: matplotlib/matplotlib
Commit Name: 5608ac37feb7fad002c0e030ba30f4924e8c1fc1
Time: 2018-09-24
Author: anntzer.lee@gmail.com
File Name: examples/statistics/barchart_demo.py
Class Name:
Method Name: