Term Frequency times Inverse Document Frequency
// TODO: extend support to other forms of Vectorization schemes - Feature Hashing
// Converting raw document to tf-idf feature matrix
tfidf_vec = TfidfVectorizer(sublinear_tf=kwargs["sublinear_tf"], max_df=kwargs["max_df"],
stop_words=kwargs["stop_words"], smooth_idf=kwargs["smooth_idf"],
ngram_range=kwargs["ngram_range"])
X = tfidf_vec.fit_transform(data)