4d6e349e05de71d7473ad2ed853b4f9b40746409,scattertext/TermDocMatrix.py,TermDocMatrix,get_metadata_freq_df,#TermDocMatrix#Any#,188
Before Change
-------
pd.DataFrame indexed on metadata, with columns giving frequencies for each category
"""
row = self._row_category_ids_for_meta()
newX = csr_matrix((self._mX.data, (row, self._mX.indices)))
return self._metadata_freq_df_from_matrix(newX, label_append)
def _row_category_ids(self):
row = self._X.tocoo().row
for i, cat in enumerate(self._y):
After Change
dtype=self.get_metadata_doc_mat().dtype)
for cat_i in range(self.get_num_categories()):
freq_mat[:, cat_i] = self._mX[self._y == cat_i, :].sum(axis=0)
return pd.DataFrame(freq_mat,
index=pd.Series(self.get_metadata(), name="term"),
columns=[c + label_append for c in self.get_categories()])
def _row_category_ids(self):
row = self._X.tocoo().row
for i, cat in enumerate(self._y):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances
Project Name: JasonKessler/scattertext
Commit Name: 4d6e349e05de71d7473ad2ed853b4f9b40746409
Time: 2020-02-22
Author: JasonKessler@users.noreply.github.com
File Name: scattertext/TermDocMatrix.py
Class Name: TermDocMatrix
Method Name: get_metadata_freq_df
Project Name: JasonKessler/scattertext
Commit Name: 6e6178652fe86ad00cd96bfcd97e613ff50816fa
Time: 2018-03-13
Author: Jason.Kessler@cdk.com
File Name: scattertext/termranking/OncePerDocFrequencyRanker.py
Class Name: OncePerDocFrequencyRanker
Method Name: get_ranks
Project Name: JasonKessler/scattertext
Commit Name: 810364662754ff3d11a9a573657fb572ae7135f7
Time: 2018-12-05
Author: JasonKessler@users.noreply.github.com
File Name: scattertext/termranking/DocLengthNormalizedFrequencyRanker.py
Class Name: DocLengthNormalizedFrequencyRanker
Method Name: get_ranks