c5fff8cf4240310765d98172821ed008a9acb8d7,mindsdb/libs/phases/stats_generator/stats_generator.py,StatsGenerator,duplicates_score,#StatsGenerator#Any#Any#Any#,205
Before Change
def duplicates_score(self, stats, columns, col_name):
duplicates = len(columns[col_name]) - len(set(columns[col_name]))
data = {
"duplicates": duplicates
,"duplicates_percentage": duplicates*100/len(columns[col_name])
}
After Change
// duplicate_score, a quality score based on said percentage
occurances = Counter(columns[col_name])
values_that_occur_twice_or_more = filter(lambda val: occurances[val] < 2, occurances)
nr_of_occurances = map(lambda val: occurances[val], values_that_occur_twice_or_more)
nr_duplicates = sum(values_that_occur_twice_or_more)
data = {
"nr_duplicates": nr_duplicates
,"duplicates_percentage": nr_duplicates*100/len(columns[col_name])
}
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 8
Instances
Project Name: mindsdb/mindsdb
Commit Name: c5fff8cf4240310765d98172821ed008a9acb8d7
Time: 2019-02-08
Author: george@cerebralab.com
File Name: mindsdb/libs/phases/stats_generator/stats_generator.py
Class Name: StatsGenerator
Method Name: duplicates_score
Project Name: pgmpy/pgmpy
Commit Name: ca09dcecea454e6552a7f30e57aef1dc4f2f295e
Time: 2020-08-11
Author: tristandeleu@users.noreply.github.com
File Name: pgmpy/estimators/StructureScore.py
Class Name: BDeuScore
Method Name: local_score
Project Name: tensorflow/cleverhans
Commit Name: 68fe96add85bd842df23569dde490e1694d256fe
Time: 2019-06-21
Author: siarheisiniak@yahoo.com
File Name: cleverhans/model_zoo/deep_k_nearest_neighbors/dknn.py
Class Name: DkNNModel
Method Name: find_train_knns