0ed3e9e8b5383390506772029f1afa9f1f90819f,demo_log_odds_ratio_prior.py,,,#,9

Before Change


                          nlp=nlp).build()

term_freq_df = corpus.get_term_freq_df()
frequencies_scaled = scale(np.log(term_freq_df.sum(axis=1).values))
zeta_i_j = (LogOddsRatioUninformativeDirichletPrior()
            .get_zeta_i_j_given_separate_counts(term_freq_df["democrat freq"],
                                                term_freq_df["republican freq"]))
zeta_scaled_for_charting = scale_neg_1_to_1_with_zero_mean_abs_max(zeta_i_j)

After Change


                                    y_label="Log Odds Ratio w/ Prior (a_w=0.01)")
"""

bg_df = (corpus
	.get_term_and_background_counts()
	.where(lambda x: x.corpus > 0).dropna()
)
bg_df.background += bg_df.corpus
corpus_bg = corpus.remove_terms(set(corpus.get_terms()) - set(bg_df.index))
priors = (corpus_bg
	.get_term_and_background_counts()
	.reindex(corpus_bg.get_terms())["background"]
)
term_scorer = LogOddsRatioInformativeDirichletPrior(priors.values, 10)


tooltip_context = """(function(d) {
	return d.term+"<br/>Count ratio (per 25k): "+d.cat25k+":"+d.ncat25k+"<br/>Z-score: "+ Number(Math.round(d.os+"e3")+"e-3");
})"""

html = produce_fightin_words_explorer(corpus_bg,
                                      category="democrat",
                                      category_name="Democratic",
                                      not_category_name="Republican",
                                      minimum_term_frequency=5,
                                      get_tooltip_content = tooltip_context,
                                      term_scorer=term_scorer)

file_name = "demo_log_odds_ratio_prior_10.html"
open(file_name, "wb").write(html.encode("utf-8"))
print("Open %s in Chrome or Firefox." % file_name)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: JasonKessler/scattertext
Commit Name: 0ed3e9e8b5383390506772029f1afa9f1f90819f
Time: 2018-01-19
Author: jason.kessler@gmail.com
File Name: demo_log_odds_ratio_prior.py
Class Name:
Method Name:


Project Name: datascienceinc/Skater
Commit Name: da7adc91ee264f933edda8e0b42ac3188a44bc59
Time: 2017-04-03
Author: aikramer2@gmail.com
File Name: pyinterpret/core/global_interpretation/feature_importance.py
Class Name: FeatureImportance
Method Name: feature_importance


Project Name: adalca/neuron
Commit Name: 2019b3cae62bdb7740e93b0820fb25692c955cd6
Time: 2017-03-16
Author: adalca@mit.edu
File Name: src/pytools/metrics.py
Class Name: weighted_categorical_crossentropy
Method Name: loss