da6fb45ee7a00f5051ec47bf15247045774c3604,mimic3benchmark/scripts/create_decompensation.py,,process_partition,#Any#Any#Any#Any#Any#Any#,13

Before Change



    xty_triples = []
    patients = list(filter(str.isdigit, os.listdir(os.path.join(args.root_path, partition))))
    for (patient_index, patient) in enumerate(patients):
        patient_folder = os.path.join(args.root_path, partition, patient)
        patient_ts_files = list(filter(lambda x: x.find("timeseries") != -1, os.listdir(patient_folder)))
        stays_df = pd.read_csv(os.path.join(patient_folder, "stays.csv"))

        for ts_filename in patient_ts_files:
            with open(os.path.join(patient_folder, ts_filename)) as tsfile:
                lb_filename = ts_filename.replace("_timeseries", "")
                label_df = pd.read_csv(os.path.join(patient_folder, lb_filename))

                // empty label file
                if label_df.shape[0] == 0:
                    continue

                mortality = int(label_df.iloc[0]["Mortality"])

                los = 24.0 * label_df.iloc[0]["Length of Stay"]  // in hours
                if pd.isnull(los):
                    print("(length of stay is missing)", patient, ts_filename)
                    continue

                stay = stays_df[stays_df.ICUSTAY_ID == label_df.iloc[0]["Icustay"]]
                deathtime = stay["DEATHTIME"].iloc[0]
                intime = stay["INTIME"].iloc[0]
                if pd.isnull(deathtime):
                    lived_time = 1e18
                else:
                    lived_time = (datetime.strptime(deathtime, "%Y-%m-%d %H:%M:%S") -
                                  datetime.strptime(intime, "%Y-%m-%d %H:%M:%S")).total_seconds() / 3600.0

                ts_lines = tsfile.readlines()
                header = ts_lines[0]
                ts_lines = ts_lines[1:]
                event_times = [float(line.split(",")[0]) for line in ts_lines]

                ts_lines = [line for (line, t) in zip(ts_lines, event_times)
                            if -eps < t < los + eps]
                event_times = [t for t in event_times
                               if -eps < t < los + eps]

                // no measurements in ICU
                if len(ts_lines) == 0:
                    print("(no events in ICU) ", patient, ts_filename)
                    continue

                sample_times = np.arange(0.0, min(los, lived_time) + eps, sample_rate)

                sample_times = list(filter(lambda x: x > shortest_length, sample_times))

                // At least one measurement
                sample_times = list(filter(lambda x: x > event_times[0], sample_times))

                output_ts_filename = patient + "_" + ts_filename
                with open(os.path.join(output_dir, output_ts_filename), "w") as outfile:
                    outfile.write(header)
                    for line in ts_lines:
                        outfile.write(line)

                for t in sample_times:
                    if mortality == 0:
                        cur_mortality = 0
                    else:
                        cur_mortality = int(lived_time - t < future_time_interval)
                    xty_triples.append((output_ts_filename, t, cur_mortality))

        if (patient_index + 1) % 100 == 0:
            print("processed {} / {} patients".format(patient_index + 1, len(patients)), end="\r")

    print(len(xty_triples))
    if partition == "train":
        random.shuffle(xty_triples)
    if partition == "test":

After Change


        os.mkdir(output_dir)

    xty_triples = []
    patients = list(filter(str.isdigit, os.listdir(os.path.join(args.root_path, partition))))
    for patient in tqdm(patients, desc="Iterating over patients in {}".format(partition)):
        patient_folder = os.path.join(args.root_path, partition, patient)
        patient_ts_files = list(filter(lambda x: x.find("timeseries") != -1, os.listdir(patient_folder)))
        stays_df = pd.read_csv(os.path.join(patient_folder, "stays.csv"))
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 11

Instances


Project Name: YerevaNN/mimic3-benchmarks
Commit Name: da6fb45ee7a00f5051ec47bf15247045774c3604
Time: 2020-03-26
Author: harhro@gmail.com
File Name: mimic3benchmark/scripts/create_decompensation.py
Class Name:
Method Name: process_partition


Project Name: YerevaNN/mimic3-benchmarks
Commit Name: da6fb45ee7a00f5051ec47bf15247045774c3604
Time: 2020-03-26
Author: harhro@gmail.com
File Name: mimic3benchmark/scripts/create_length_of_stay.py
Class Name:
Method Name: process_partition


Project Name: YerevaNN/mimic3-benchmarks
Commit Name: da6fb45ee7a00f5051ec47bf15247045774c3604
Time: 2020-03-26
Author: harhro@gmail.com
File Name: mimic3benchmark/scripts/create_in_hospital_mortality.py
Class Name:
Method Name: process_partition


Project Name: YerevaNN/mimic3-benchmarks
Commit Name: da6fb45ee7a00f5051ec47bf15247045774c3604
Time: 2020-03-26
Author: harhro@gmail.com
File Name: mimic3benchmark/scripts/create_phenotyping.py
Class Name:
Method Name: process_partition