Skip to content

Commit

Permalink
Merge pull request #176 from waveygang/heuristic_biwfa
Browse files Browse the repository at this point in the history
Heuristic BiWFλ and less aggressive mapping chaining
  • Loading branch information
AndreaGuarracino authored Jun 27, 2023
2 parents 36b875e + e27f85a commit 00fed86
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/align/include/align_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ struct Parameters {
int wflign_gap_opening_score;
int wflign_gap_extension_score;
float wflign_max_mash_dist;
int wflign_min_wavefront_length;
int wflign_max_distance_threshold;
uint64_t wflign_max_len_major;
uint64_t wflign_max_len_minor;
int wflign_erode_k;
Expand Down
2 changes: 2 additions & 0 deletions src/align/include/computeAlignments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,8 @@ namespace align
param.wflign_gap_opening_score,
param.wflign_gap_extension_score,
param.wflign_max_mash_dist,
param.wflign_min_wavefront_length,
param.wflign_max_distance_threshold,
param.wflign_max_len_major,
param.wflign_max_len_minor,
param.wflign_erode_k);
Expand Down
11 changes: 10 additions & 1 deletion src/common/wflign/src/wflign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ WFlign::WFlign(
const int wflign_gap_opening_score,
const int wflign_gap_extension_score,
const float wflign_max_mash_dist,
const int wflign_min_wavefront_length,
const int wflign_max_distance_threshold,
const uint64_t wflign_max_len_major,
const uint64_t wflign_max_len_minor,
const int erode_k) {
Expand All @@ -64,6 +66,8 @@ WFlign::WFlign(
this->wflign_gap_opening_score = wflign_gap_opening_score;
this->wflign_gap_extension_score = wflign_gap_extension_score;
this->wflign_max_mash_dist = wflign_max_mash_dist;
this->wflign_min_wavefront_length = wflign_min_wavefront_length;
this->wflign_max_distance_threshold = wflign_max_distance_threshold;
this->wflign_max_len_major = wflign_max_len_major;
this->wflign_max_len_minor = wflign_max_len_minor;
this->erode_k = erode_k;
Expand Down Expand Up @@ -556,7 +560,12 @@ void WFlign::wflign_affine_wavefront(
wflambda_affine_penalties.gap_extension,
wfa::WFAligner::Alignment,
wfa::WFAligner::MemoryUltralow);
wflambda_aligner->setHeuristicNone();
wflambda_aligner->setHeuristicNone(); // It should help
if (wflign_max_distance_threshold <= 0) {
wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, (int) (2048.0 / (mashmap_estimated_identity*mashmap_estimated_identity)));
} else {
wflambda_aligner->setHeuristicWFmash(wflign_min_wavefront_length, wflign_max_distance_threshold);
}

// Save computed alignments in a pair-indexed map
robin_hood::unordered_flat_map<uint64_t,alignment_t*> alignments;
Expand Down
4 changes: 4 additions & 0 deletions src/common/wflign/src/wflign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ namespace wflign {
int wflign_gap_opening_score;
int wflign_gap_extension_score;
float wflign_max_mash_dist;
int wflign_min_wavefront_length;
int wflign_max_distance_threshold;
uint64_t wflign_max_len_major;
uint64_t wflign_max_len_minor;
int erode_k;
Expand Down Expand Up @@ -89,6 +91,8 @@ namespace wflign {
const int wflign_gap_opening_score,
const int wflign_gap_extension_score,
const float wflign_max_mash_dist,
const int wflign_min_wavefront_length,
const int wflign_max_distance_threshold,
const uint64_t wflign_max_len_major,
const uint64_t wflign_max_len_minor,
const int erode_k);
Expand Down
27 changes: 25 additions & 2 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void parse_args(int argc,
args::ValueFlag<char> skip_prefix(mapping_opts, "C", "skip mappings when the query and target have the same prefix before the last occurrence of the given character C", {'Y', "skip-prefix"});
args::Flag approx_mapping(mapping_opts, "approx-map", "skip base-level alignment, producing an approximate mapping in PAF", {'m',"approx-map"});
args::Flag no_split(mapping_opts, "no-split", "disable splitting of input sequences during mapping [default: enabled]", {'N',"no-split"});
args::ValueFlag<std::string> chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, retaining mappings in best chain [default: 100k]", {'c', "chain-gap"});
args::ValueFlag<std::string> chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, retaining mappings in best chain [default: 50k]", {'c', "chain-gap"});
args::Flag drop_low_map_pct_identity(mapping_opts, "K", "drop mappings with estimated identity below --map-pct-id=%", {'K', "drop-low-map-id"});
args::Flag no_filter(mapping_opts, "MODE", "disable mapping filtering", {'f', "no-filter"});
args::ValueFlag<double> map_sparsification(mapping_opts, "FACTOR", "keep this fraction of mappings", {'x', "sparsify-mappings"});
Expand All @@ -103,6 +103,9 @@ void parse_args(int argc,
"score parameters for the wflign alignment (affine); match score is fixed at 0 [default: adaptive with respect to the estimated identity]",//, if 4 then gaps are affine, if 6 then gaps are convex [default: 1,4,6,2,26,1]",
{'G', "wflign-params"});
args::ValueFlag<float> wflign_max_mash_dist(alignment_opts, "N", "maximum mash distance to perform the alignment in a wflambda segment [default: adaptive with respect to the estimated identity]", {'b', "max-mash-dist"});
args::ValueFlag<int> wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 1024]", {'j', "wflign-min-wf-len"});
args::ValueFlag<int> wflign_max_distance_threshold(alignment_opts, "N", "max distance threshold for heuristic WFlign [default: 2048/(estimated_identity^2)]", {'q', "wflign-max-disttance"});

// patching parameter
args::ValueFlag<std::string> wflign_max_len_major(alignment_opts, "N", "maximum length to patch in the major axis [default: 512*segment-length]", {'C', "max-patch-major"});
args::ValueFlag<std::string> wflign_max_len_minor(alignment_opts, "N", "maximum length to patch in the minor axis [default: 128*segment-length]", {'F', "max-patch-minor"});
Expand Down Expand Up @@ -291,6 +294,26 @@ void parse_args(int argc,
align_parameters.wflign_max_mash_dist = -1;
}

if (wflign_min_wavefront_length) {
if (args::get(wflign_min_wavefront_length) <= 0) {
std::cerr << "[wfmash] ERROR, skch::parseandSave, min wavefront length for heuristic WFlign must be greater than 0." << std::endl;
exit(1);
}
align_parameters.wflign_min_wavefront_length = args::get(wflign_min_wavefront_length);
} else {
align_parameters.wflign_min_wavefront_length = 1024;
}

if (wflign_max_distance_threshold) {
if (args::get(wflign_max_distance_threshold) <= 0) {
std::cerr << "[wfmash] ERROR, skch::parseandSave, max distance threshold for heuristic WFlign must be greater than 0." << std::endl;
exit(1);
}
align_parameters.wflign_max_distance_threshold = args::get(wflign_max_distance_threshold);
} else {
align_parameters.wflign_max_distance_threshold = -1;
}

align_parameters.emit_md_tag = args::get(emit_md_tag);
align_parameters.sam_format = args::get(sam_format);
align_parameters.no_seq_in_sam = args::get(no_seq_in_sam);
Expand Down Expand Up @@ -350,7 +373,7 @@ void parse_args(int argc,
}
map_parameters.chain_gap = l;
} else {
map_parameters.chain_gap = 100000;
map_parameters.chain_gap = 50000;
}

if (drop_low_map_pct_identity) {
Expand Down

0 comments on commit 00fed86

Please sign in to comment.