gct_df = pd.concat(gct_df, axis=1)
// sort columns by sample_id
gct_df = gct_df[["Description"] + sorted(gct_df.columns[1:])]
with gzip.open(os.path.join(args.output_dir, args.output_prefix+".gct.gz"), "wt") as f:
f.write("/Ǘ.2\n")
f.write("{0:d}\t{1:d}\n".format(gct_df.shape[0], gct_df.shape[1]-1))
After Change
if len(args.input_files)==1 and ".gct" not in args.input_files[0]:
with open(args.input_files[0]) as f:
paths = f.read().strip().split("\n")
else:
paths = args.input_files
sample_ids = np.array([os.path.split(i)[1].split(".")[0] for i in paths])
i = np.argsort(sample_ids)
sample_ids = sample_ids[i]
paths = np.array(paths)[i]
// sort by sample_id
gct_df = [pd.read_csv(paths[0], sep="\t", skiprows=3, header=None, index_col=0, names=["Name","Description", sample_ids[0]])]
gct_df += [pd.read_csv(i, sep="\t", skiprows=3, header=None, usecols=[0,2], index_col=0, names=["Name","Description",j]) for i,j in zip(paths[1:], sample_ids[1:])]