06e94610dc2d71997ca567b45df02f5ea7613a81,apps/analyzer/classifier.py,Classifier,get_features,#Classifier#Any#,15
Before Change
self.feed = feed
def get_features(self, doc):
splitter=re.compile("\\W*")
// Split the words by non-alpha characters
words=[s.lower() for s in splitter.split(doc)
if len(s)>2 and len(s)<20]
// Return the unique set of words only
return dict([(w,1) for w in words])
def increment_feature(self, feature, category):
count = self.feature_count(feature,category)
After Change
def get_features(self, doc):
found = {}
for phrase in self.phrases:
if phrase in doc:
if phrase in found:
found[phrase] += 1
else:
found[phrase] = 1
return found
def increment_feature(self, feature, category):
count = self.feature_count(feature,category)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 6
Instances Project Name: samuelclay/NewsBlur
Commit Name: 06e94610dc2d71997ca567b45df02f5ea7613a81
Time: 2009-11-15
Author: samuel@ofbrooklyn.com
File Name: apps/analyzer/classifier.py
Class Name: Classifier
Method Name: get_features
Project Name: pantsbuild/pants
Commit Name: 085475ea0ac7c9689b9530e7c40196fcd907a181
Time: 2013-02-26
Author: ryan.blake.williams@gmail.com
File Name: src/python/twitter/pants/base/parse_context.py
Class Name: ParseContext
Method Name: parse
Project Name: pantsbuild/pants
Commit Name: aaa4c0467fbc0ea9bdc52b432c65499fc2f4901e
Time: 2013-03-04
Author: benjyw@gmail.com
File Name: src/python/twitter/pants/base/parse_context.py
Class Name: ParseContext
Method Name: parse