4a24e4e36ee175aef54b92eb03e08a2be7811a96,recordlinkage/datasets/febrl.py,,_febrl_links,#Any#,27

Before Change


def _febrl_links(df):
    Get the links of a FEBRL dataset.

    df_empty = df[[]].reset_index()
    df_empty["key"] = df_empty["rec_id"].str. \
        extract(r"rec-(\d+)", expand=True)[0]

    // split the dataframe (org and dup)
    org_bool = df_empty["rec_id"].str.endswith("org")

    // merge the two frame and make MultiIndex.
    pairs = df_empty[org_bool].merge(df_empty[~org_bool], on="key")
    pairs_mi = pairs.set_index(["rec_id_x", "rec_id_y"]).index
    pairs_mi.names = [None, None]

    return pairs_mi

After Change



    index_int = numpy.arange(len(df))

    df_helper = pandas.DataFrame({
        "key": keys,
        "index": index_int
    })

    // merge the two frame and make MultiIndex.
    pairs_df = df_helper.merge(
        df_helper, on="key"
    )[["index_x", "index_y"]]
    pairs_df = pairs_df[pairs_df["index_x"] > pairs_df["index_y"]]

    return pandas.MultiIndex(
        levels=[df.index.values, df.index.values],
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: J535D165/recordlinkage
Commit Name: 4a24e4e36ee175aef54b92eb03e08a2be7811a96
Time: 2018-03-10
Author: jonathandebruinhome@gmail.com
File Name: recordlinkage/datasets/febrl.py
Class Name:
Method Name: _febrl_links


Project Name: NTMC-Community/MatchZoo
Commit Name: a871536bcbb38b9ca03b0bc777712d8c0a79ad90
Time: 2018-12-13
Author: i@uduse.com
File Name: matchzoo/data_pack/pack.py
Class Name:
Method Name: pack


Project Name: streamlit/streamlit
Commit Name: aa8a7efd83e323856ca48742cb44a2fcb84b6bea
Time: 2018-04-24
Author: adrien.g.treuille@gmail.com
File Name: examples/scratchpad.py
Class Name:
Method Name: