target_bed,
tuple(sorted(target_chroms.keys())[:3])))
// But filter out untargeted alternative contigs and mitochondria
untgt_chroms = set(access_chroms) - set(target_chroms)
// Autosomes typically have numeric names, allosomes are X and Y
is_canonical = re.compile(r"(chr)?(\d+|[XYxy])$")
if any(is_canonical.match(c) for c in target_chroms):
chroms_to_skip = [c for c in untgt_chroms
if not is_canonical.match(c)]
else:
// Alternative contigs have long names -- skip them
max_tgt_chr_name_len = max(map(len, target_chroms))
chroms_to_skip = [c for c in untgt_chroms
if len(c) > max_tgt_chr_name_len]
for untgt_chr in chroms_to_skip:
logging.info("Skipping untargeted chromosome %s", untgt_chr)
del access_chroms[untgt_chr]
else:
// Chromosome accessible sequence regions not known -- use heuristics
// (chromosome length is endpoint of last probe; skip initial
// <magic number> of bases that are probably telomeric)
After Change
if access_bed:
// Chromosomes" accessible sequence regions are given -- use them
accessible = tabio.read_auto(access_bed)
access_chroms = set(accessible.chromosome.unique())
if access_chroms and access_chroms.isdisjoint(target_chroms):
raise ValueError("Chromosome names in the accessible regions file "
"%s %r do not match those in targets %s %r"
% (access_bed, tuple(sorted(access_chroms)[:3]),