def process(
self, sparse_data: List[Dict[int, float]]
) -> Tuple[torch.Tensor, torch.Tensor]:
dense_data = torch.ones([len(sparse_data), len(self.feature_id_to_index)])
dense_presence = torch.zeros(
[len(sparse_data), len(self.feature_id_to_index)]
).byte()
for i, feature_map in enumerate(sparse_data):
assert (
feature_map is not None
), f"Please make sure that features are not NULL; row {i}"
for j, value in feature_map.items():
j_index = self.feature_id_to_index.get(j, None)
if j_index is None:
continue
dense_data[i][j_index] = value
dense_presence[i][j_index] = 1
if self.set_missing_value_to_zero:
// When we set missing values to 0, we don"t know what is and isn"t missing
dense_presence = dense_data != 0.0
return (dense_data, dense_presence)
After Change
// Add columns identified by normalization, but not present in batch
for col in self.sorted_features:
if col not in state_features_df.columns:
state_features_df[col] = missing_value
values = torch.from_numpy(
state_features_df[self.sorted_features].values
).float()
if self.set_missing_value_to_zero:
// When we set missing values to 0, we don"t know what is and isn"t missing
presence = torch.ones_like(values, dtype=torch.bool)
else:
presence = values != missing_value
return values, presence