4d6e349e05de71d7473ad2ed853b4f9b40746409,scattertext/TermDocMatrix.py,TermDocMatrix,get_metadata_freq_df,#TermDocMatrix#Any#,188

Before Change


        -------
        pd.DataFrame indexed on metadata, with columns giving frequencies for each category
        """
        row = self._row_category_ids_for_meta()
        newX = csr_matrix((self._mX.data, (row, self._mX.indices)))
        return self._metadata_freq_df_from_matrix(newX, label_append)

    def _row_category_ids(self):
        row = self._X.tocoo().row
        for i, cat in enumerate(self._y):

After Change


                            dtype=self.get_metadata_doc_mat().dtype)
        for cat_i in range(self.get_num_categories()):
            freq_mat[:, cat_i] = self._mX[self._y == cat_i, :].sum(axis=0)
        return pd.DataFrame(freq_mat,
                            index=pd.Series(self.get_metadata(), name="term"),
                            columns=[c + label_append for c in self.get_categories()])

    def _row_category_ids(self):
        row = self._X.tocoo().row
        for i, cat in enumerate(self._y):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: JasonKessler/scattertext
Commit Name: 4d6e349e05de71d7473ad2ed853b4f9b40746409
Time: 2020-02-22
Author: JasonKessler@users.noreply.github.com
File Name: scattertext/TermDocMatrix.py
Class Name: TermDocMatrix
Method Name: get_metadata_freq_df


Project Name: JasonKessler/scattertext
Commit Name: 6e6178652fe86ad00cd96bfcd97e613ff50816fa
Time: 2018-03-13
Author: Jason.Kessler@cdk.com
File Name: scattertext/termranking/OncePerDocFrequencyRanker.py
Class Name: OncePerDocFrequencyRanker
Method Name: get_ranks


Project Name: JasonKessler/scattertext
Commit Name: 810364662754ff3d11a9a573657fb572ae7135f7
Time: 2018-12-05
Author: JasonKessler@users.noreply.github.com
File Name: scattertext/termranking/DocLengthNormalizedFrequencyRanker.py
Class Name: DocLengthNormalizedFrequencyRanker
Method Name: get_ranks