for name, value in self.column_defaults.items()
if name not in existing
}
known = known.assign(**defaults_required)
// now verify that all of the columns needed are there (that is, the defaults have filled out appropriately)
require_dataframe_has_columns(
self.name(type_name), known, self.selected_columns
)
for column, transform in self.transform_columns.items():
known[column] = transform(known[column])
if self.allow_features:
// to_numpy returns an unspecified order but it"s Fortran in practice. Row-level bulk
// operations are more common (e.g. slicing out a couple of row, when sampling a few
After Change
ids = data.index
// split the dataframe based on the columns we know about
missing_columns = []
def select_column(old_name):
if old_name in data.columns:
column = data[old_name].to_numpy()