diff --git a/src/align/include/align_parameters.hpp b/src/align/include/align_parameters.hpp index e8bdee0a..45bdb0e7 100644 --- a/src/align/include/align_parameters.hpp +++ b/src/align/include/align_parameters.hpp @@ -33,6 +33,8 @@ struct Parameters { int wflign_gap_opening_score; int wflign_gap_extension_score; float wflign_max_mash_dist; + int wflign_min_wavefront_length; + int wflign_max_distance_threshold; uint64_t wflign_max_len_major; uint64_t wflign_max_len_minor; int wflign_erode_k; diff --git a/src/align/include/computeAlignments.hpp b/src/align/include/computeAlignments.hpp index cf6b809d..89f57da2 100644 --- a/src/align/include/computeAlignments.hpp +++ b/src/align/include/computeAlignments.hpp @@ -496,6 +496,8 @@ namespace align param.wflign_gap_opening_score, param.wflign_gap_extension_score, param.wflign_max_mash_dist, + param.wflign_min_wavefront_length, + param.wflign_max_distance_threshold, param.wflign_max_len_major, param.wflign_max_len_minor, param.wflign_erode_k); diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 1d3a541e..1d1df50d 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -50,6 +50,8 @@ WFlign::WFlign( const int wflign_gap_opening_score, const int wflign_gap_extension_score, const float wflign_max_mash_dist, + const int wflign_min_wavefront_length, + const int wflign_max_distance_threshold, const uint64_t wflign_max_len_major, const uint64_t wflign_max_len_minor, const int erode_k) { @@ -64,6 +66,8 @@ WFlign::WFlign( this->wflign_gap_opening_score = wflign_gap_opening_score; this->wflign_gap_extension_score = wflign_gap_extension_score; this->wflign_max_mash_dist = wflign_max_mash_dist; + this->wflign_min_wavefront_length = wflign_min_wavefront_length; + this->wflign_max_distance_threshold = wflign_max_distance_threshold; this->wflign_max_len_major = wflign_max_len_major; this->wflign_max_len_minor = wflign_max_len_minor; this->erode_k = erode_k; @@ -556,7 +560,12 @@ void WFlign::wflign_affine_wavefront( wflambda_affine_penalties.gap_extension, wfa::WFAligner::Alignment, wfa::WFAligner::MemoryUltralow); - wflambda_aligner->setHeuristicNone(); + wflambda_aligner->setHeuristicNone(); // It should help + if (wflign_max_distance_threshold <= 0) { + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) (2048.0 / (mashmap_estimated_identity*mashmap_estimated_identity))); + } else { + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold); + } // Save computed alignments in a pair-indexed map robin_hood::unordered_flat_map alignments; diff --git a/src/common/wflign/src/wflign.hpp b/src/common/wflign/src/wflign.hpp index 1eb7c3b4..e260712b 100644 --- a/src/common/wflign/src/wflign.hpp +++ b/src/common/wflign/src/wflign.hpp @@ -49,6 +49,8 @@ namespace wflign { int wflign_gap_opening_score; int wflign_gap_extension_score; float wflign_max_mash_dist; + int wflign_min_wavefront_length; + int wflign_max_distance_threshold; uint64_t wflign_max_len_major; uint64_t wflign_max_len_minor; int erode_k; @@ -89,6 +91,8 @@ namespace wflign { const int wflign_gap_opening_score, const int wflign_gap_extension_score, const float wflign_max_mash_dist, + const int wflign_min_wavefront_length, + const int wflign_max_distance_threshold, const uint64_t wflign_max_len_major, const uint64_t wflign_max_len_minor, const int erode_k); diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 73f30d19..496b77b4 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -79,7 +79,7 @@ void parse_args(int argc, args::ValueFlag skip_prefix(mapping_opts, "C", "skip mappings when the query and target have the same prefix before the last occurrence of the given character C", {'Y', "skip-prefix"}); args::Flag approx_mapping(mapping_opts, "approx-map", "skip base-level alignment, producing an approximate mapping in PAF", {'m',"approx-map"}); args::Flag no_split(mapping_opts, "no-split", "disable splitting of input sequences during mapping [default: enabled]", {'N',"no-split"}); - args::ValueFlag chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, retaining mappings in best chain [default: 100k]", {'c', "chain-gap"}); + args::ValueFlag chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, retaining mappings in best chain [default: 50k]", {'c', "chain-gap"}); args::Flag drop_low_map_pct_identity(mapping_opts, "K", "drop mappings with estimated identity below --map-pct-id=%", {'K', "drop-low-map-id"}); args::Flag no_filter(mapping_opts, "MODE", "disable mapping filtering", {'f', "no-filter"}); args::ValueFlag map_sparsification(mapping_opts, "FACTOR", "keep this fraction of mappings", {'x', "sparsify-mappings"}); @@ -103,6 +103,9 @@ void parse_args(int argc, "score parameters for the wflign alignment (affine); match score is fixed at 0 [default: adaptive with respect to the estimated identity]",//, if 4 then gaps are affine, if 6 then gaps are convex [default: 1,4,6,2,26,1]", {'G', "wflign-params"}); args::ValueFlag wflign_max_mash_dist(alignment_opts, "N", "maximum mash distance to perform the alignment in a wflambda segment [default: adaptive with respect to the estimated identity]", {'b', "max-mash-dist"}); + args::ValueFlag wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 1024]", {'j', "wflign-min-wf-len"}); + args::ValueFlag wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 2048/(estimated_identity^2)]", {'q', "wflign-max-disttance"}); + // patching parameter args::ValueFlag wflign_max_len_major(alignment_opts, "N", "maximum length to patch in the major axis [default: 512*segment-length]", {'C', "max-patch-major"}); args::ValueFlag wflign_max_len_minor(alignment_opts, "N", "maximum length to patch in the minor axis [default: 128*segment-length]", {'F', "max-patch-minor"}); @@ -291,6 +294,26 @@ void parse_args(int argc, align_parameters.wflign_max_mash_dist = -1; } + if (wflign_min_wavefront_length) { + if (args::get(wflign_min_wavefront_length) <= 0) { + std::cerr << "[wfmash] ERROR, skch::parseandSave, min wavefront length for heuristic WFlign must be greater than 0." << std::endl; + exit(1); + } + align_parameters.wflign_min_wavefront_length = args::get(wflign_min_wavefront_length); + } else { + align_parameters.wflign_min_wavefront_length = 1024; + } + + if (wflign_max_distance_threshold) { + if (args::get(wflign_max_distance_threshold) <= 0) { + std::cerr << "[wfmash] ERROR, skch::parseandSave, max distance threshold for heuristic WFlign must be greater than 0." << std::endl; + exit(1); + } + align_parameters.wflign_max_distance_threshold = args::get(wflign_max_distance_threshold); + } else { + align_parameters.wflign_max_distance_threshold = -1; + } + align_parameters.emit_md_tag = args::get(emit_md_tag); align_parameters.sam_format = args::get(sam_format); align_parameters.no_seq_in_sam = args::get(no_seq_in_sam); @@ -350,7 +373,7 @@ void parse_args(int argc, } map_parameters.chain_gap = l; } else { - map_parameters.chain_gap = 100000; + map_parameters.chain_gap = 50000; } if (drop_low_map_pct_identity) {