genenames = np.array([gn.split("")[0] for gn in genenames])
// print("the first 10 trunkated gene names are \n",genenames[:10])
// mask array for the informative genes
infogenes_idcs = np.array([(True if gn in infogenenames else False)
for gn in genenames])
// restrict data array to the 3451 informative genes
X = X[:, infogenes_idcs]
genenames = genenames[infogenes_idcs]
After Change
adata = AnnData(ddata)
// the data has to be transposed (in the hdf5 and R files, each row
// corresponds to one gene, we use the opposite convention)
adata = adata.transpose()
// cluster assocations identified by Paul et al.
// groups = sc.read(filename,"cluster.id")["X"]
infogenes_names = sc.read(filename, "info.genes_strings")["X"]
// just keep the first of the two equivalent names per gene