From 492e9a13389768d500c13206920e4f77e0cec070 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Mon, 12 Jun 2023 22:41:55 +0200 Subject: [PATCH 1/7] heuristic BiWFlambda --- src/common/wflign/src/wflign.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 0ad10868..39094fb5 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -555,7 +555,7 @@ void WFlign::wflign_affine_wavefront( wflambda_affine_penalties.gap_extension, wfa::WFAligner::Alignment, wfa::WFAligner::MemoryUltralow); - wflambda_aligner->setHeuristicNone(); + wflambda_aligner->setHeuristicWFmash(MIN_WF_LENGTH, 512); // Save computed alignments in a pair-indexed map robin_hood::unordered_flat_map alignments; From 3992ac46c90d85080ca2ee6c5c60278f0a1c0130 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Tue, 13 Jun 2023 16:14:32 +0200 Subject: [PATCH 2/7] expose WFlign's reduction parameters --- src/align/include/align_parameters.hpp | 2 ++ src/align/include/computeAlignments.hpp | 2 ++ src/common/wflign/src/wflign.cpp | 6 +++++- src/common/wflign/src/wflign.hpp | 4 ++++ src/interface/parse_args.hpp | 23 +++++++++++++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/align/include/align_parameters.hpp b/src/align/include/align_parameters.hpp index e8bdee0a..45bdb0e7 100644 --- a/src/align/include/align_parameters.hpp +++ b/src/align/include/align_parameters.hpp @@ -33,6 +33,8 @@ struct Parameters { int wflign_gap_opening_score; int wflign_gap_extension_score; float wflign_max_mash_dist; + int wflign_min_wavefront_length; + int wflign_max_distance_threshold; uint64_t wflign_max_len_major; uint64_t wflign_max_len_minor; int wflign_erode_k; diff --git a/src/align/include/computeAlignments.hpp b/src/align/include/computeAlignments.hpp index cf6b809d..89f57da2 100644 --- a/src/align/include/computeAlignments.hpp +++ b/src/align/include/computeAlignments.hpp @@ -496,6 +496,8 @@ namespace align param.wflign_gap_opening_score, param.wflign_gap_extension_score, param.wflign_max_mash_dist, + param.wflign_min_wavefront_length, + param.wflign_max_distance_threshold, param.wflign_max_len_major, param.wflign_max_len_minor, param.wflign_erode_k); diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 39094fb5..4f1aae80 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -49,6 +49,8 @@ WFlign::WFlign( const int wflign_gap_opening_score, const int wflign_gap_extension_score, const float wflign_max_mash_dist, + const int wflign_min_wavefront_length, + const int wflign_max_distance_threshold, const uint64_t wflign_max_len_major, const uint64_t wflign_max_len_minor, const int erode_k) { @@ -63,6 +65,8 @@ WFlign::WFlign( this->wflign_gap_opening_score = wflign_gap_opening_score; this->wflign_gap_extension_score = wflign_gap_extension_score; this->wflign_max_mash_dist = wflign_max_mash_dist; + this->wflign_min_wavefront_length = wflign_min_wavefront_length; + this->wflign_max_distance_threshold = wflign_max_distance_threshold; this->wflign_max_len_major = wflign_max_len_major; this->wflign_max_len_minor = wflign_max_len_minor; this->erode_k = erode_k; @@ -555,7 +559,7 @@ void WFlign::wflign_affine_wavefront( wflambda_affine_penalties.gap_extension, wfa::WFAligner::Alignment, wfa::WFAligner::MemoryUltralow); - wflambda_aligner->setHeuristicWFmash(MIN_WF_LENGTH, 512); + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold); // Save computed alignments in a pair-indexed map robin_hood::unordered_flat_map alignments; diff --git a/src/common/wflign/src/wflign.hpp b/src/common/wflign/src/wflign.hpp index 1eb7c3b4..e260712b 100644 --- a/src/common/wflign/src/wflign.hpp +++ b/src/common/wflign/src/wflign.hpp @@ -49,6 +49,8 @@ namespace wflign { int wflign_gap_opening_score; int wflign_gap_extension_score; float wflign_max_mash_dist; + int wflign_min_wavefront_length; + int wflign_max_distance_threshold; uint64_t wflign_max_len_major; uint64_t wflign_max_len_minor; int erode_k; @@ -89,6 +91,8 @@ namespace wflign { const int wflign_gap_opening_score, const int wflign_gap_extension_score, const float wflign_max_mash_dist, + const int wflign_min_wavefront_length, + const int wflign_max_distance_threshold, const uint64_t wflign_max_len_major, const uint64_t wflign_max_len_minor, const int erode_k); diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 73f30d19..f5698980 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -103,6 +103,9 @@ void parse_args(int argc, "score parameters for the wflign alignment (affine); match score is fixed at 0 [default: adaptive with respect to the estimated identity]",//, if 4 then gaps are affine, if 6 then gaps are convex [default: 1,4,6,2,26,1]", {'G', "wflign-params"}); args::ValueFlag wflign_max_mash_dist(alignment_opts, "N", "maximum mash distance to perform the alignment in a wflambda segment [default: adaptive with respect to the estimated identity]", {'b', "max-mash-dist"}); + args::ValueFlag wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 256]", {'j', "wflign-min-wf-len"}); + args::ValueFlag wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 512]", {'q', "wflign-max-disttance"}); + // patching parameter args::ValueFlag wflign_max_len_major(alignment_opts, "N", "maximum length to patch in the major axis [default: 512*segment-length]", {'C', "max-patch-major"}); args::ValueFlag wflign_max_len_minor(alignment_opts, "N", "maximum length to patch in the minor axis [default: 128*segment-length]", {'F', "max-patch-minor"}); @@ -291,6 +294,26 @@ void parse_args(int argc, align_parameters.wflign_max_mash_dist = -1; } + if (wflign_min_wavefront_length) { + if (args::get(wflign_min_wavefront_length) <= 0) { + std::cerr << "[wfmash] ERROR, skch::parseandSave, min wavefront length for heuristic WFlign must be greater than 0." << std::endl; + exit(1); + } + align_parameters.wflign_min_wavefront_length = args::get(wflign_min_wavefront_length); + } else { + align_parameters.wflign_min_wavefront_length = 256; + } + + if (wflign_max_distance_threshold) { + if (args::get(wflign_max_distance_threshold) <= 0) { + std::cerr << "[wfmash] ERROR, skch::parseandSave, max distance threshold for heuristic WFlign must be greater than 0." << std::endl; + exit(1); + } + align_parameters.wflign_max_distance_threshold = args::get(wflign_max_distance_threshold); + } else { + align_parameters.wflign_max_distance_threshold = 512; + } + align_parameters.emit_md_tag = args::get(emit_md_tag); align_parameters.sam_format = args::get(sam_format); align_parameters.no_seq_in_sam = args::get(no_seq_in_sam); From e9af72a5fbe70f498bd4b826c3f1b41eb7f57918 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Thu, 15 Jun 2023 10:11:22 +0200 Subject: [PATCH 3/7] Fixed heuristic-mode in WFA2-lib --- .../wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c index ae76256a..d72d7a3d 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c @@ -520,6 +520,9 @@ void wavefront_heuristic_cufoff( if (mwavefront == NULL || mwavefront->lo > mwavefront->hi) return; // Decrease wait steps --(wf_heuristic->steps_wait); + // Save lo/hi base + const int hi_base = mwavefront->hi; + const int lo_base = mwavefront->lo; // Select heuristic (WF-Adaptive) if (wf_heuristic->strategy & wf_heuristic_wfadaptive) { wavefront_heuristic_wfadaptive(wf_aligner,mwavefront,false); @@ -539,6 +542,7 @@ void wavefront_heuristic_cufoff( wavefront_heuristic_banded_adaptive(wf_aligner,mwavefront); } // Check wavefront length + if (lo_base == mwavefront->lo && hi_base == mwavefront->hi) return; // No wavefronts pruned if (mwavefront->lo > mwavefront->hi) mwavefront->null = true; // DEBUG // const int wf_length_base = hi_base-lo_base+1; From a111b92bc3ca751a2bff8e9531ed26749fba5f0b Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Wed, 21 Jun 2023 18:38:10 +0200 Subject: [PATCH 4/7] use an adaptive heuristic threshold --- src/common/wflign/src/wflign.cpp | 9 ++++++++- src/interface/parse_args.hpp | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index b732d0db..b678278a 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -560,7 +560,14 @@ void WFlign::wflign_affine_wavefront( wflambda_affine_penalties.gap_extension, wfa::WFAligner::Alignment, wfa::WFAligner::MemoryUltralow); - wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold); + wflambda_aligner->setHeuristicNone(); // It should help + if (wflign_max_distance_threshold < -1) { + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) (512.0 / (mashmap_estimated_identity*mashmap_estimated_identity))); + } else { + // TODO: to restore + //wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold); + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) ((float) wflign_max_distance_threshold / (mashmap_estimated_identity*mashmap_estimated_identity))); + } // Save computed alignments in a pair-indexed map robin_hood::unordered_flat_map alignments; diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index f5698980..626a7134 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -104,7 +104,7 @@ void parse_args(int argc, {'G', "wflign-params"}); args::ValueFlag wflign_max_mash_dist(alignment_opts, "N", "maximum mash distance to perform the alignment in a wflambda segment [default: adaptive with respect to the estimated identity]", {'b', "max-mash-dist"}); args::ValueFlag wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 256]", {'j', "wflign-min-wf-len"}); - args::ValueFlag wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 512]", {'q', "wflign-max-disttance"}); + args::ValueFlag wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 512/(estimated_identity^2)]", {'q', "wflign-max-disttance"}); // patching parameter args::ValueFlag wflign_max_len_major(alignment_opts, "N", "maximum length to patch in the major axis [default: 512*segment-length]", {'C', "max-patch-major"}); @@ -311,7 +311,7 @@ void parse_args(int argc, } align_parameters.wflign_max_distance_threshold = args::get(wflign_max_distance_threshold); } else { - align_parameters.wflign_max_distance_threshold = 512; + align_parameters.wflign_max_distance_threshold = -1; } align_parameters.emit_md_tag = args::get(emit_md_tag); From ccdf3a32eeff799aebeb238c55d427d6c9695e5b Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Fri, 23 Jun 2023 12:06:13 +0200 Subject: [PATCH 5/7] reduce chaining from 100k to 50k --- src/interface/parse_args.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 626a7134..05be72e1 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -79,7 +79,7 @@ void parse_args(int argc, args::ValueFlag skip_prefix(mapping_opts, "C", "skip mappings when the query and target have the same prefix before the last occurrence of the given character C", {'Y', "skip-prefix"}); args::Flag approx_mapping(mapping_opts, "approx-map", "skip base-level alignment, producing an approximate mapping in PAF", {'m',"approx-map"}); args::Flag no_split(mapping_opts, "no-split", "disable splitting of input sequences during mapping [default: enabled]", {'N',"no-split"}); - args::ValueFlag chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, retaining mappings in best chain [default: 100k]", {'c', "chain-gap"}); + args::ValueFlag chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, retaining mappings in best chain [default: 50k]", {'c', "chain-gap"}); args::Flag drop_low_map_pct_identity(mapping_opts, "K", "drop mappings with estimated identity below --map-pct-id=%", {'K', "drop-low-map-id"}); args::Flag no_filter(mapping_opts, "MODE", "disable mapping filtering", {'f', "no-filter"}); args::ValueFlag map_sparsification(mapping_opts, "FACTOR", "keep this fraction of mappings", {'x', "sparsify-mappings"}); @@ -373,7 +373,7 @@ void parse_args(int argc, } map_parameters.chain_gap = l; } else { - map_parameters.chain_gap = 100000; + map_parameters.chain_gap = 50000; } if (drop_low_map_pct_identity) { From 6e5977bf67e4db31dd5aaf7da6f4916be2d84ac9 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Sat, 24 Jun 2023 17:49:01 +0200 Subject: [PATCH 6/7] update heuristic defaults --- src/common/wflign/src/wflign.cpp | 6 ++---- src/interface/parse_args.hpp | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index b678278a..b858be2d 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -562,11 +562,9 @@ void WFlign::wflign_affine_wavefront( wfa::WFAligner::MemoryUltralow); wflambda_aligner->setHeuristicNone(); // It should help if (wflign_max_distance_threshold < -1) { - wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) (512.0 / (mashmap_estimated_identity*mashmap_estimated_identity))); + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) (2048.0 / (mashmap_estimated_identity*mashmap_estimated_identity))); } else { - // TODO: to restore - //wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold); - wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) ((float) wflign_max_distance_threshold / (mashmap_estimated_identity*mashmap_estimated_identity))); + wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold); } // Save computed alignments in a pair-indexed map diff --git a/src/interface/parse_args.hpp b/src/interface/parse_args.hpp index 05be72e1..496b77b4 100644 --- a/src/interface/parse_args.hpp +++ b/src/interface/parse_args.hpp @@ -103,8 +103,8 @@ void parse_args(int argc, "score parameters for the wflign alignment (affine); match score is fixed at 0 [default: adaptive with respect to the estimated identity]",//, if 4 then gaps are affine, if 6 then gaps are convex [default: 1,4,6,2,26,1]", {'G', "wflign-params"}); args::ValueFlag wflign_max_mash_dist(alignment_opts, "N", "maximum mash distance to perform the alignment in a wflambda segment [default: adaptive with respect to the estimated identity]", {'b', "max-mash-dist"}); - args::ValueFlag wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 256]", {'j', "wflign-min-wf-len"}); - args::ValueFlag wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 512/(estimated_identity^2)]", {'q', "wflign-max-disttance"}); + args::ValueFlag wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 1024]", {'j', "wflign-min-wf-len"}); + args::ValueFlag wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 2048/(estimated_identity^2)]", {'q', "wflign-max-disttance"}); // patching parameter args::ValueFlag wflign_max_len_major(alignment_opts, "N", "maximum length to patch in the major axis [default: 512*segment-length]", {'C', "max-patch-major"}); @@ -301,7 +301,7 @@ void parse_args(int argc, } align_parameters.wflign_min_wavefront_length = args::get(wflign_min_wavefront_length); } else { - align_parameters.wflign_min_wavefront_length = 256; + align_parameters.wflign_min_wavefront_length = 1024; } if (wflign_max_distance_threshold) { From e27f85ac9455655293cd92aec051815973a6cdb8 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Mon, 26 Jun 2023 12:03:06 +0200 Subject: [PATCH 7/7] fix condition --- src/common/wflign/src/wflign.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index b858be2d..1d1df50d 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -561,7 +561,7 @@ void WFlign::wflign_affine_wavefront( wfa::WFAligner::Alignment, wfa::WFAligner::MemoryUltralow); wflambda_aligner->setHeuristicNone(); // It should help - if (wflign_max_distance_threshold < -1) { + if (wflign_max_distance_threshold <= 0) { wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) (2048.0 / (mashmap_estimated_identity*mashmap_estimated_identity))); } else { wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold);