def read_data(self, filename, table):
_X, _Y = table._X, table._Y
_W = table._W if table._W.shape[-1] else None
f = open(filename)
f.readline(); f.readline(); f.readline()
padding = [""] * self.n_columns
if self.basket_column >= 0:
table._Xsparse = _Xsparse = sparse.lil_matrix(len(_X), 100) // TODO how many columns?!
table._metas = _metas = \
np.empty((len(_X), len(self.meta_columns)), dtype=object)
line_count = 0
_Xr = None // To be able to delete it below even when there are no attributes
for lne in f:
values = lne.strip().split()
if not values:
continue
if len(values) > self.n_columns:
raise ValueError("Too many columns in line {}", 4+line_count)
elif len(values) < self.n_columns:
values += padding
if self.attribute_columns:
_Xr = _X[line_count]
for i, (col, reader) in enumerate(self.attribute_columns):
_Xr[i] = reader(values[col])
for i, (col, reader) in enumerate(self.classvar_columns):
_Y[line_count, i] = reader(values[col])
if _W is not None:
_W[line_count] = float(values[self.weight_column])
for i, (col, reader) in enumerate(self.meta_columns):
_metas[line_count, i] = reader(values[col])
line_count += 1
if line_count != len(_X):
del _Xr, _X, _Y, _W, _metas
After Change
def read_data(self, f, table):
_X, _Y = table._X, table._Y
_W = table._W if table._W.shape[-1] else None
f.seek(0)
f.readline(); f.readline(); f.readline()
padding = [""] * self.n_columns
if self.basket_column >= 0:
table._Xsparse = _Xsparse = sparse.lil_matrix(len(_X), 100) // TODO how many columns?!