Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Pangenome Outgroups #1043

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cactus/refmap/cactus_pangenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def main():
parser.add_argument("seqFile", help = "Seq file (will be modified if necessary to include graph Fasta sequence)")
parser.add_argument("--outDir", help = "Output directory", required=True)
parser.add_argument("--outName", help = "Output name (without extension)", required=True)
parser.add_argument("--reference", required=True, nargs='+', type=str, help = "Reference event name(s). The first will be the \"true\" reference and will be left unclipped and uncollapsed. It also should have been used with --reference in all upstream commands. Other names will be promoted to reference paths in vg")
parser.add_argument("--reference", required=True, nargs='+', type=str, help = "Reference event name(s). The first will be the \"true\" reference and will be left unclipped and uncollapsed. It also should have been used with --reference in all upstream commands. Other names will be promoted to reference paths in vg")
parser.add_argument("--outgroup", type=str, nargs='+', help = "Use given genome as outgroup. Multiple allowed")

# cactus-minigraph options
parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).")
Expand Down
11 changes: 8 additions & 3 deletions src/cactus/setup/cactus_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def main():
parser.add_argument("--outGFA", action="store_true", help = "export pangenome grpah in GFA (.gfa.gz) in addition to HAL")
parser.add_argument("--batch", action="store_true", help = "Launch batch of alignments. Input seqfile is expected to be chromfile as generated by cactus-graphmap-slit. IMPORTANT: paffile argument should bot be specified when using this option")
parser.add_argument("--reference", type=str, help = "Ensure that given genome is acyclic by deleting all paralogy edges in postprocessing, also do not mask its PAF mappings")
parser.add_argument("--outgroup", type=str, nargs='+', help = "Use given genome as outgroup. Only works with --pangenome. Multiple allowed")

#Progressive Cactus Options
parser.add_argument("--configFile", dest="configFile",
Expand Down Expand Up @@ -130,7 +131,10 @@ def main():
raise RuntimeError('--consCores required for non single_machine batch systems')
if options.maxCores is not None and options.consCores > int(options.maxCores):
raise RuntimeError('--consCores must be <= --maxCores')


if options.outgroup and not options.pangenome:
raise RuntimeError('--outgroup can only be used with --pangenome')

options.buildHal = True
options.buildFasta = True

Expand Down Expand Up @@ -261,6 +265,9 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None):
if options.reference and options.pangenome:
# validate the sample names
check_sample_names(input_seq_map.keys(), options.reference)

if options.outgroup and options.pangenome:
og_map[ options.root if options.root else mc_tree.getRootName()] = options.outgroup

# apply path overrides. this was necessary for wdl which doesn't take kindly to
# text files of local paths (ie seqfile). one way to fix would be to add support
Expand Down Expand Up @@ -311,8 +318,6 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None):
cafNode.attrib["runMapQFiltering"] = "0"
# more iterations here helps quite a bit to reduce underalignment
cafNode.attrib["maxRecoverableChainsIterations"] = "50"
# turn down minimum block degree to get a fat ancestor
barNode.attrib["minimumBlockDegree"] = "1"
# turn off POA seeding
poaNode.attrib["partialOrderAlignmentDisableSeeding"] = "1"

Expand Down