0ed3e9e8b5383390506772029f1afa9f1f90819f,demo_log_odds_ratio_prior.py,,,#,9

Before Change


                          nlp=nlp).build()

term_freq_df = corpus.get_term_freq_df()
frequencies_scaled = scale(np.log(term_freq_df.sum(axis=1).values))
zeta_i_j = (LogOddsRatioUninformativeDirichletPrior()
            .get_zeta_i_j_given_separate_counts(term_freq_df["democrat freq"],
                                                term_freq_df["republican freq"]))
zeta_scaled_for_charting = scale_neg_1_to_1_with_zero_mean_abs_max(zeta_i_j)

html = produce_scattertext_explorer(corpus,
                                    category="democrat",
                                    category_name="Democratic",
                                    not_category_name="Republican",
                                    minimum_term_frequency=5,
                                    width_in_pixels=1000,
                                    x_coords=frequencies_scaled,
                                    y_coords=zeta_scaled_for_charting,
                                    scores=zeta_i_j,
                                    sort_by_dist=False,
                                    metadata=convention_df["speaker"],
                                    x_label="Log Frequency",
                                    y_label="Log Odds Ratio w/ Prior (a_w=0.01)")
file_name = "demo_log_odds_ratio_prior.html"
open(file_name, "wb").write(html.encode("utf-8"))
print("Open %s in Chrome or Firefox." % file_name)

After Change


	.where(lambda x: x.corpus > 0).dropna()
)
bg_df.background += bg_df.corpus
corpus_bg = corpus.remove_terms(set(corpus.get_terms()) - set(bg_df.index))
priors = (corpus_bg
	.get_term_and_background_counts()
	.reindex(corpus_bg.get_terms())["background"]
)
term_scorer = LogOddsRatioInformativeDirichletPrior(priors.values, 10)


tooltip_context = """(function(d) {
	return d.term+"<br/>Count ratio (per 25k): "+d.cat25k+":"+d.ncat25k+"<br/>Z-score: "+ Number(Math.round(d.os+"e3")+"e-3");
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 4

Instances


Project Name: JasonKessler/scattertext
Commit Name: 0ed3e9e8b5383390506772029f1afa9f1f90819f
Time: 2018-01-19
Author: jason.kessler@gmail.com
File Name: demo_log_odds_ratio_prior.py
Class Name:
Method Name:


Project Name: automl/SMAC3
Commit Name: 2402c8a3c28f5c6faf9931fe97b6516b7c426edb
Time: 2016-02-12
Author: lindauer@cs.uni-freiburg.de
File Name: smac/smbo/intensification.py
Class Name: Intensifier
Method Name: intensify


Project Name: etal/cnvkit
Commit Name: 2654a34e8654f199e7d5f86d7115f3cb4233f2dc
Time: 2016-12-01
Author: eric.talevich@gmail.com
File Name: cnvlib/tabio/vcfio.py
Class Name:
Method Name: _extract_genotype


Project Name: bokeh/bokeh
Commit Name: f1bc5b054e29f6d8e948fe7222936a2b0a975535
Time: 2019-10-07
Author: mattpap@gmail.com
File Name: bokeh/util/compiler.py
Class Name:
Method Name: _bundle_models