0d768b06d5aea91bd6f8755e605f2c706a9373f8,conceptnet5/vectors/transforms.py,,standardize_row_labels,#Any#Any#,6

Before Change


    standardized URI get combined, with earlier rows given more weight.
    
    // Re-label the DataFrame with standardized, non-unique row labels
    frame.index = pd.Series(
        [standardized_uri(language, label) for label in frame.index],
        name="term"
    )

    // Assign row n a weight of 1/(n+1) for weighted averaging
    nrows = frame.shape[0]
    weights = 1.0 / np.arange(1, nrows + 1)
    label_weights = pd.Series(weights, index=frame.index)

    // groupby(level=0).sum() means to add rows that have the same label
    relabeled = frame.mul(weights, axis="rows").sort_index().groupby(level=0).sum()
    combined_weights = label_weights.sort_index().groupby(level=0).sum()
    return relabeled.div(combined_weights, axis="rows")


def l1_normalize_columns(frame):
    

After Change


    // groupby(level=0).sum() means to add rows that have the same label
    relabeled = frame.mul(weights, axis="rows").sort_index().groupby(level=0).sum()
    combined_weights = label_weights.sort_index().groupby(level=0).sum()
    scaled = relabeled.div(combined_weights, axis="rows")

    // Rearrange the items in descending order of weight, similar to the order
    // we get them in from word2vec and GloVe
    combined_weights.sort(ascending=False)
    result = scaled.loc[combined_weights.index.drop_duplicates()]
    assert not result.index.has_duplicates
    return result


def l1_normalize_columns(frame):
    
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 4

Instances


Project Name: commonsense/conceptnet5
Commit Name: 0d768b06d5aea91bd6f8755e605f2c706a9373f8
Time: 2016-02-24
Author: rob@luminoso.com
File Name: conceptnet5/vectors/transforms.py
Class Name:
Method Name: standardize_row_labels


Project Name: JasonKessler/scattertext
Commit Name: 810364662754ff3d11a9a573657fb572ae7135f7
Time: 2018-12-05
Author: JasonKessler@users.noreply.github.com
File Name: scattertext/termranking/OncePerDocFrequencyRanker.py
Class Name: OncePerDocFrequencyRanker
Method Name: get_ranks


Project Name: cesium-ml/cesium
Commit Name: 09bfc1b3ef8494cf17a192bbe83df576ccdac86f
Time: 2016-06-29
Author: brettnaul@gmail.com
File Name: cesium/predict.py
Class Name:
Method Name: model_predictions


Project Name: J535D165/recordlinkage
Commit Name: edccaa798d72a33648d6c9893a8f04d58ef4d1a2
Time: 2016-02-06
Author: jonathandebruinhome@gmail.com
File Name: recordlinkage/classifier.py
Class Name: LogisticRegressionClassifier
Method Name: predict