diff --git a/src/common/wflign/deps/WFA2-lib/CMakeLists.txt b/src/common/wflign/deps/WFA2-lib/CMakeLists.txt new file mode 100644 index 00000000..f575ff94 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/CMakeLists.txt @@ -0,0 +1,213 @@ +# For Debian currently with +# +# cd build +# cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. +# make +# make test +# make install +# See below option statements and the README for build information + +cmake_minimum_required(VERSION 3.16) +project(wfa2lib) + +set(CMAKE_CXX_STANDARD 17) + +include(FeatureSummary) +include(GNUInstallDirs) + +find_package(PkgConfig REQUIRED) + +feature_summary( + FATAL_ON_MISSING_REQUIRED_PACKAGES + WHAT REQUIRED_PACKAGES_NOT_FOUND) + +# ---- Options + +option(OPENMP "Enable OpenMP" OFF) # enables WFA_PARALLEL +option(PROFILING "Enable profiling" OFF) +option(ASAN "Use address sanitiser" OFF) +option(EXTRA_FLAGS "Add optimization flags for C/C++ compiler" OFF) + +# include(CheckIPOSupported) # adds lto +# check_ipo_supported(RESULT ipo_supported OUTPUT output) + +# ---- Dependencies + +if(OPENMP) + include(FindOpenMP) + set(OPTIMIZE_FLAGS "-DWFA_PARALLEL") +endif(OPENMP) + +if(EXTRA_FLAGS) + set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} ${EXTRA_FLAGS}") +endif(EXTRA_FLAGS) + +find_package(Threads) +set_package_properties(Threads PROPERTIES TYPE REQUIRED) + +# ---- Build switches +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +# set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ${ipo_supported}) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING + "Choose the type of build, options are: Release|Debug|RelWithDebInfo (for distros)." FORCE) +endif() + +if (${CMAKE_BUILD_TYPE} MATCHES Release) + set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} -march=native -D_FILE_OFFSET_BITS=64") +endif() + +if ((${CMAKE_BUILD_TYPE} MATCHES Release) OR (${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo)) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} ${OPTIMIZE_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} ${OPTIMIZE_FLAGS}") +endif () + +if (${CMAKE_BUILD_TYPE} MATCHES "Debug") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPTIMIZE_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTIMIZE_FLAGS}") + add_definitions(-Wfatal-errors) +endif () + +if (ASAN) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fno-common") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fno-common") +endif(ASAN) + +if(PROFILING) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") +endif(PROFILING) + +if(GPROF) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") +endif(GPROF) + +# ---- Include files + +file(GLOB INCLUDES + wavefront/*.h* + ) +file(GLOB UTILS_INCLUDES + utils/*.h* + ) +file(GLOB ALIGNMENT_INCLUDES + alignment/*.h* + ) +file(GLOB SYSTEM_INCLUDES + system/*.h* + ) + +set(wfa2lib_SOURCE + wavefront/wavefront_align.c + wavefront/wavefront_aligner.c + wavefront/wavefront_attributes.c + wavefront/wavefront_backtrace_buffer.c + wavefront/wavefront_backtrace.c + wavefront/wavefront_backtrace_offload.c + wavefront/wavefront_bialign.c + wavefront/wavefront_bialigner.c + wavefront/wavefront.c + wavefront/wavefront_components.c + wavefront/wavefront_compute_affine2p.c + wavefront/wavefront_compute_affine.c + wavefront/wavefront_compute.c + wavefront/wavefront_compute_edit.c + wavefront/wavefront_compute_linear.c + wavefront/wavefront_debug.c + wavefront/wavefront_display.c + wavefront/wavefront_extend.c + wavefront/wavefront_heuristic.c + wavefront/wavefront_pcigar.c + wavefront/wavefront_penalties.c + wavefront/wavefront_plot.c + wavefront/wavefront_sequences.c + wavefront/wavefront_slab.c + wavefront/wavefront_unialign.c + system/mm_stack.c + system/mm_allocator.c + system/profiler_counter.c + system/profiler_timer.c + utils/bitmap.c + utils/dna_text.c + utils/sequence_buffer.c + utils/vector.c + utils/commons.c + utils/heatmap.c + alignment/affine2p_penalties.c + alignment/affine_penalties.c + alignment/cigar.c + alignment/score_matrix.c +) + +add_library(wfa2_static + ${wfa2lib_SOURCE} + ) +add_library(wfa2 SHARED ${wfa2lib_SOURCE}) +set_target_properties(wfa2_static PROPERTIES OUTPUT_NAME wfa2) +set_target_properties(wfa2 PROPERTIES SOVERSION 0) +target_include_directories(wfa2 PUBLIC . wavefront utils) +target_include_directories(wfa2_static PUBLIC . wavefront utils) +add_library(wfa2::wfa2 ALIAS wfa2) +add_library(wfa2::wfa2_static ALIAS wfa2_static) + +# ---- C++ binding library + +set(wfa2cpp_SOURCE + bindings/cpp/WFAligner.cpp +) +file(GLOB CPP_INCLUDES + bindings/cpp/*.h* + ) +add_library(wfa2cpp_static STATIC ${wfa2cpp_SOURCE}) +add_library(wfa2cpp SHARED ${wfa2cpp_SOURCE}) +set_target_properties(wfa2cpp PROPERTIES SOVERSION 0) +set_target_properties(wfa2cpp_static PROPERTIES OUTPUT_NAME wfa2cpp) +target_link_libraries(wfa2cpp PUBLIC wfa2) +target_link_libraries(wfa2cpp_static PUBLIC wfa2) +add_library(wfa2::wfa2cpp ALIAS wfa2cpp) +add_library(wfa2::wfa2cpp_static ALIAS wfa2cpp_static) + +# ---- Get version + +file (STRINGS "VERSION" BUILD_NUMBER) +add_definitions(-DWFA2LIB_VERSION="${BUILD_NUMBER}") +add_definitions(-DVERSION="${BUILD_NUMBER}") + +set(wfa2lib_LIBS +) + +# add_dependencies(wfa2lib ${wfa2lib_DEPS}) + +# ---- Build all + +# ---- Test + +enable_testing() + + +function(add_wfa_test) + add_test( + NAME wfa2lib + COMMAND ./tests/wfa.utest.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) +endfunction() + +add_wfa_test() + +# ---- Install + +# Do not install anything when used with FetchContent +if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + install(TARGETS wfa2_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) + install(TARGETS wfa2 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) + + install(FILES ${INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/wavefront) + install(FILES ${UTILS_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/utils) + install(FILES ${ALIGNMENT_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/alignment) + install(FILES ${SYSTEM_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/system) + + install(TARGETS wfa2cpp ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(TARGETS wfa2cpp_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(FILES ${CPP_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/bindings/cpp) +endif() diff --git a/src/common/wflign/deps/WFA2-lib/Makefile b/src/common/wflign/deps/WFA2-lib/Makefile index 634689bb..86a6c72a 100644 --- a/src/common/wflign/deps/WFA2-lib/Makefile +++ b/src/common/wflign/deps/WFA2-lib/Makefile @@ -18,10 +18,10 @@ AR=ar AR_FLAGS=-rsc ifndef BUILD_EXAMPLES -BUILD_EXAMPLES=0 +BUILD_EXAMPLES=1 endif ifndef BUILD_TOOLS -BUILD_TOOLS=0 +BUILD_TOOLS=1 endif ifndef BUILD_WFA_PARALLEL BUILD_WFA_PARALLEL=0 diff --git a/src/common/wflign/deps/WFA2-lib/README.md b/src/common/wflign/deps/WFA2-lib/README.md index b41e4008..4d682e7e 100644 --- a/src/common/wflign/deps/WFA2-lib/README.md +++ b/src/common/wflign/deps/WFA2-lib/README.md @@ -16,14 +16,37 @@ The WFA2 library implements the WFA algorithm for different distance metrics and ### 1.3 Getting started -Git clone and compile the library, tools, and examples. +Git clone and compile the library, tools, and examples. By default use cmake: ``` -$> git clone https://github.com/smarco/WFA2-lib -$> cd WFA2-lib -$> make clean all +git clone https://github.com/smarco/WFA2-lib +cd WFA2-lib +mkdir build +cmake .. -DCMAKE_BUILD_TYPE=Release +cmake --build . --verbose +ctest . --verbose ``` +There are some flags that can be used: + +``` +cmake .. -DOPENMP=TRUE +``` + +To add vector optimization try + +``` +cmake .. -DCMAKE_BUILD_TYPE=Release -DEXTRA_FLAGS="-ftree-vectorize -msse2 -mfpmath=sse -ftree-vectorizer-verbose=5" +``` + +To build a shared library (static is the default) + +``` +cmake -DBUILD_SHARED_LIBS=ON +``` + +It is possible to build WFA2-lib in a GNU Guix container, for more information see [guix.scm](./guix.scm). + ### 1.4 Contents (where to go from here) Section [WFA2-lib features](#wfa2.features) explores the most relevant options and features of the library. Then, the folder [tools/](tools/README.md) contains tools that can be used to execute and understand the WFA2 library capabilities. Additionally, the folder [examples/](examples/README.md) contains simple examples illustrating how to integrate the WFA2 code into any tool. @@ -40,7 +63,7 @@ Section [WFA2-lib features](#wfa2.features) explores the most relevant options a * [Technical notes](#wfa2.other.notes) * [Reporting Bugs and Feature Request](#wfa2.complains) * [License](#wfa2.licence) -* [Citation](#wfa2.cite) +* [Citation](#wfa2.cite) ### 1.5 Important notes and clarifications @@ -62,7 +85,7 @@ This simple example illustrates how to align two sequences using the WFA2 librar #include "wavefront/wavefront_align.h" ``` -Next, create and configure the WFA alignment object. The following example uses the defaults configuration and sets custom `gap_affine` penalties. Note that mismatch, gap-opening, and gap-extension must be positive values. +Next, create and configure the WFA alignment object. The following example uses the defaults configuration and sets custom `gap_affine` penalties. Note that mismatch, gap-opening, and gap-extension must be positive values. ```C // Configure alignment attributes @@ -161,7 +184,7 @@ An example of how to use them is [here](./bindings/rust/example.rs). The WFA2 library implements the wavefront algorithm for the most widely used distance metrics. The practical alignment time can change depending on the distance function, although the computational complexity always remains proportional to the alignment score or distance. The WFA2 library offers the following distance metrics or functions: -- **Indel (or LCS).** Produces alignments allowing matches, insertions, and deletions with unitary cost (i.e., {M,I,D} = {0,1,1}) but not mismatches. Also known as the longest common subsequence (LCS) problem. The LCS is defined as the longest subsequence common to both sequences, provided that the characters of the subsequence are not required to occupy consecutive positions within the original sequences. +- **Indel (or LCS).** Produces alignments allowing matches, insertions, and deletions with unitary cost (i.e., {M,I,D} = {0,1,1}) but not mismatches. Also known as the longest common subsequence (LCS) problem. The LCS is defined as the longest subsequence common to both sequences, provided that the characters of the subsequence are not required to occupy consecutive positions within the original sequences. ``` PATTERN A-GCTA-GTGTC--AATGGCTACT-T-T-TCAGGTCCT @@ -204,7 +227,7 @@ The WFA2 library implements the wavefront algorithm for the most widely used dis // Configuration wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; attributes.distance_metric = gap_linear; - attributes.linear_penalties.mismatch = 6; // X > 0 + attributes.linear_penalties.mismatch = 6; // X > 0 attributes.linear_penalties.indel = 2; // I > 0 ``` @@ -240,9 +263,9 @@ The WFA2 library implements the wavefront algorithm for the most widely used dis ### 3.2 Alignment Scope -Depending on the use case, it is often the case that an application is only required to compute the alignment score, not the complete alignment (i.e., CIGAR). As it happens with traditional dynamic programming algorithms, the WFA algorithm requires less memory (i.e., `O(s)`) to compute the alignment score. In turn, this results in slighter faster alignment executions. For this reason, the WFA2 library implements two different modes depending on the alignment scope: score-only and full-CIGAR alignment. +Depending on the use case, it is often the case that an application is only required to compute the alignment score, not the complete alignment (i.e., CIGAR). As it happens with traditional dynamic programming algorithms, the WFA algorithm requires less memory (i.e., `O(s)`) to compute the alignment score. In turn, this results in slighter faster alignment executions. For this reason, the WFA2 library implements two different modes depending on the alignment scope: score-only and full-CIGAR alignment. -The ** score-only alignment ** mode computes only the alignment score. This mode utilizes only the front-wavefronts of the WFA algorithm to keep track of the optimal alignment score. As a result, it requires `O(s)` memory and, in practice, performs slighter faster than the standard full-CIGAR mode. +The ** score-only alignment ** mode computes only the alignment score. This mode utilizes only the front-wavefronts of the WFA algorithm to keep track of the optimal alignment score. As a result, it requires `O(s)` memory and, in practice, performs slighter faster than the standard full-CIGAR mode. ```C wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; @@ -278,7 +301,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt ``` PATTERN AATTAATTTAAGTCTAGGCTACTTTCGGTACTTTGTTCTT - |||||||||||||||||||||||||||||| || + |||||||||||||||||||||||||||||| || TEXT ----AATTTAAGTCTAGGCTACTTTCGGTACTTTCTT--- ``` @@ -300,7 +323,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt ``` PATTERN -------------AATTTAAGTCTAGGCTACTTTC--------------- - ||||||||| |||||||||||| + ||||||||| |||||||||||| TEXT ACGACTACTACGAAATTTAAGTATAGGCTACTTTCCGTACGTACGTACGT ``` @@ -329,9 +352,9 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.pattern_end_free = pattern_end_free; attributes.alignment_form.text_begin_free = 0; attributes.alignment_form.text_end_free = text_end_free; - + PATTERN AATTTAAGTCTG-CTACTTTCACGCA-GCT---------- - ||||| |||||| ||||||||||| | | | + ||||| |||||| ||||||||||| | | | TEXT AATTTCAGTCTGGCTACTTTCACGTACGATGACAGACTCT ``` @@ -343,7 +366,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.pattern_end_free = 0; attributes.alignment_form.text_begin_free = text_begin_free; attributes.alignment_form.text_end_free = 0; - + PATTERN -------------AAACTTTCACGTACG-TGACAGTCTCT ||||||||||||| |||||| |||| TEXT AATTTCAGTCTGGCTACTTTCACGTACGATGACAGACTCT @@ -355,7 +378,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt
Overlapped alignment

-- **Overlapped alignment (a.k.a. dovetail).** +- **Overlapped alignment (a.k.a. dovetail).** ```C // Overlapped (Right-Left) @@ -365,9 +388,9 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.pattern_end_free = 0; attributes.alignment_form.text_begin_free = 0; attributes.alignment_form.text_end_free = text_end_free; - + PATTERN ACGCGTCTGACTGACTGACTAAACTTTCATGTAC-TGACA----------------- - ||||||||| |||| ||||| + ||||||||| |||| ||||| TEXT --------------------AAACTTTCACGTACGTGACATATAGCGATCGATGACT ``` @@ -381,7 +404,7 @@ The WFA2 library allows computing alignments with different spans or shapes. Alt attributes.alignment_form.text_end_free = 0; PATTERN ----------------------ACGCGTCTGACTGACTACGACTACGACTGACTAGCAT - ||||||||| || || + ||||||||| || || TEXT ACATGCATCGATCAGACTGACTACGCGTCTG-CTAAC---------------------- ``` @@ -403,7 +426,7 @@ The WFA algorithm can be used combined with many heuristics to reduce the alignm WFA2's heuristics are classified into the following categories: ['wf-adaptive'](#wfa2.wfadaptive), ['drops'](#wfa2.drops), and ['bands'](#wfa2.bands). It is possible to combine a maximum of one heuristic from each category (OR-ing the strategy values or using the API). In the case of using multiple heuristics, these will applied in cascade, starting with 'wf-adaptive', then 'drops', and finally 'bands'. -- **None (for comparison)**. If no heuristic is used, the WFA behaves exploring cells of the DP-matrix in increasing score order (increasing scores correspond to colours from blue to red). +- **None (for comparison)**. If no heuristic is used, the WFA behaves exploring cells of the DP-matrix in increasing score order (increasing scores correspond to colours from blue to red).

@@ -446,7 +469,7 @@ WFA2's heuristics are classified into the following categories: ['wf-adaptive'](

-- **Heuristic drops.** This heuristic compares the maximum score computed so far with the score of the last computed cells. Depending on the score difference, these heuristic strategies can reduce the size of the wavefront computed or even abandon the alignment process. In the case of zero-match alignment, $M=1$ will be assumed just for computation of the score drop. Also note that this heuristic is not compatible with distances 'edit' or 'indel'. In this category, WFA2 implements 'X-drop' and 'Z-drop'. +- **Heuristic drops.** This heuristic compares the maximum score computed so far with the score of the last computed cells. Depending on the score difference, these heuristic strategies can reduce the size of the wavefront computed or even abandon the alignment process. In the case of zero-match alignment, $M=1$ will be assumed just for computation of the score drop. Also note that this heuristic is not compatible with distances 'edit' or 'indel'. In this category, WFA2 implements 'X-drop' and 'Z-drop'.         **X-drop** implements the classical X-drop heuristic. For each diagonal $k$, the X-drop heuristic compares the current score $sw_k$ with the maximum observed score so far $sw_{max}$. If the difference drops more than the $xdrop$ parameter (i.e., $sw_{max} - sw_k > xdrop$), the heuristic prunes the diagonal $k$ as it is unlikely to lead to the optimum alignment. If all the diagonals are pruned under this criteria, the alignment process is abandoned. @@ -458,7 +481,7 @@ WFA2's heuristics are classified into the following categories: ['wf-adaptive']( ```         **Z-drop** implements the Z-drop heuristic (as described in Minimap2). This heuristic halts the alignment process if the score drops too fast in the diagonal direction. Let $sw_{max}$ be the maximum observed score so far, computed at cell ($i'$,$j'$). Then, let $sw$ be the maximum score found in the last computed wavefront, computed at cell ($i$,$j$). The Z-drop heuristic stops the alignment process if $sw_{max} - sw > zdrop + gap_e·|(i-i')-(j-j')|$, being $gap_e$ the gap-extension penalty and $zdrop$ a parameter of the heuristic. - + ```C wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default; @@ -468,7 +491,7 @@ WFA2's heuristics are classified into the following categories: ['wf-adaptive']( ```         **Graphical examples:** - +

@@ -534,7 +557,7 @@ WFA2's heuristics are classified into the following categories: ['wf-adaptive']( - Thanks to Eizenga's formulation, WFA2-lib can operate with any match score. Although, in practice, M=0 is still the most efficient choice. -- Note that edit and LCS are distance metrics and, thus, the score computed is always positive. However, weighted distances, like gap-linear and gap-affine, have the sign of the computed alignment evaluated under the selected penalties. If WFA2-lib is executed using $M=0$, the final score is expected to be negative. +- Note that edit and LCS are distance metrics and, thus, the score computed is always positive. However, weighted distances, like gap-linear and gap-affine, have the sign of the computed alignment evaluated under the selected penalties. If WFA2-lib is executed using $M=0$, the final score is expected to be negative. - All WFA2-lib algorithms/variants are stable. That is, for alignments having the same score, the library always resolves ties (between M, X, I,and D) using the same criteria: M (highest prio) > X > D > I (lowest prio). Nevertheless, the memory mode `ultralow` (BiWFA) is optimal (always reports the best alignment) but not stable. @@ -557,6 +580,8 @@ WFA2-lib is distributed under MIT licence. [Andrea Guarracino](https://github.com/AndreaGuarracino) and [Erik Garrison](https://github.com/ekg) have contributed to the design of new features and intensive testing of the library. +[Pjotr Prins](https://thebird.nl/) contributed the CMake build system, preventing of leaking variables in include headers and other tweaks. + Miquel Moretó has contributed with fruitful technical discussions and tireless efforts seeking funding, so we could keep working on this project. ## 7. ACKNOWLEDGEMENTS @@ -567,7 +592,6 @@ Miquel Moretó has contributed with fruitful technical discussions and tireless ## 8. CITATION -**Santiago Marco-Sola, Juan Carlos Moure, Miquel Moreto, Antonio Espinosa**. ["Fast gap-affine pairwise alignment using the wavefront algorithm."](https://doi.org/10.1093/bioinformatics/btaa777) Bioinformatics, 2020. - -**Santiago Marco-Sola, Jordan M Eizenga, Andrea Guarracino, Benedict Paten, Erik Garrison, Miquel Moreto**. Optimal gap-affine alignment in O(s) space. _bioRxiv_ (2022). DOI [2022.04.14.488380](https://doi.org/10.1101/2022.04.14.488380) +**Santiago Marco-Sola, Juan Carlos Moure, Miquel Moreto, Antonio Espinosa**. ["Fast gap-affine pairwise alignment using the wavefront algorithm."](https://doi.org/10.1093/bioinformatics/btaa777). Bioinformatics, 2020. +**Santiago Marco-Sola, Jordan M Eizenga, Andrea Guarracino, Benedict Paten, Erik Garrison, Miquel Moreto**. ["Optimal gap-affine alignment in O(s) space"](https://doi.org/10.1093/bioinformatics/btad074). Bioinformatics, 2023. diff --git a/src/common/wflign/deps/WFA2-lib/alignment/affine2p_penalties.h b/src/common/wflign/deps/WFA2-lib/alignment/affine2p_penalties.h index 8b660718..50e0b182 100644 --- a/src/common/wflign/deps/WFA2-lib/alignment/affine2p_penalties.h +++ b/src/common/wflign/deps/WFA2-lib/alignment/affine2p_penalties.h @@ -32,8 +32,6 @@ #ifndef AFFINE2P_PENALTIES_H_ #define AFFINE2P_PENALTIES_H_ -#include "utils/commons.h" - /* * Affine 2-piece penalties */ diff --git a/src/common/wflign/deps/WFA2-lib/alignment/affine_penalties.h b/src/common/wflign/deps/WFA2-lib/alignment/affine_penalties.h index 3d0f6382..1306a62d 100644 --- a/src/common/wflign/deps/WFA2-lib/alignment/affine_penalties.h +++ b/src/common/wflign/deps/WFA2-lib/alignment/affine_penalties.h @@ -32,8 +32,6 @@ #ifndef AFFINE_PENALTIES_H_ #define AFFINE_PENALTIES_H_ -#include "utils/commons.h" - /* * Affine penalties */ diff --git a/src/common/wflign/deps/WFA2-lib/alignment/cigar.c b/src/common/wflign/deps/WFA2-lib/alignment/cigar.c index 555af458..77b3c630 100644 --- a/src/common/wflign/deps/WFA2-lib/alignment/cigar.c +++ b/src/common/wflign/deps/WFA2-lib/alignment/cigar.c @@ -29,6 +29,7 @@ * DESCRIPTION: Edit cigar data-structure (match/mismatch/insertion/deletion) */ +#include "utils/commons.h" #include "cigar.h" /* @@ -225,6 +226,10 @@ int cigar_score_gap_affine2p( /* * Utils */ +bool cigar_is_null( + cigar_t* const cigar) { + return (cigar->begin_offset >= cigar->end_offset); +} int cigar_cmp( cigar_t* const cigar_a, cigar_t* const cigar_b) { diff --git a/src/common/wflign/deps/WFA2-lib/alignment/cigar.h b/src/common/wflign/deps/WFA2-lib/alignment/cigar.h index 8d83d8a0..748e9244 100644 --- a/src/common/wflign/deps/WFA2-lib/alignment/cigar.h +++ b/src/common/wflign/deps/WFA2-lib/alignment/cigar.h @@ -32,7 +32,6 @@ #ifndef CIGAR_H_ #define CIGAR_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" #include "alignment/linear_penalties.h" #include "alignment/affine_penalties.h" @@ -97,6 +96,9 @@ int cigar_score_gap_affine2p( /* * Utils */ +bool cigar_is_null( + cigar_t* const cigar); + int cigar_cmp( cigar_t* const cigar_a, cigar_t* const cigar_b); diff --git a/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.c b/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.c index e6b7caee..beae531e 100644 --- a/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.c +++ b/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.c @@ -29,6 +29,7 @@ * DESCRIPTION: Score matrix for alignment using dynamic programming */ +#include "utils/commons.h" #include "score_matrix.h" /* @@ -114,7 +115,3 @@ void score_matrix_print( } fprintf(stream,"\n"); } - - - - diff --git a/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.h b/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.h index b6d4633e..8abaf996 100644 --- a/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.h +++ b/src/common/wflign/deps/WFA2-lib/alignment/score_matrix.h @@ -32,7 +32,6 @@ #ifndef SCORE_MATRIX_H_ #define SCORE_MATRIX_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" #include "alignment/cigar.h" diff --git a/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.cpp b/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.cpp index 89582582..8ca72c23 100644 --- a/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.cpp +++ b/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.cpp @@ -64,21 +64,6 @@ WFAligner::~WFAligner() { /* * Align End-to-end */ -WFAligner::AlignmentStatus WFAligner::alignEnd2EndLambda( - const int patternLength, - const int textLength) { - // Configure - wavefront_aligner_set_alignment_end_to_end(wfAligner); - // Align (using custom matching function) - int* pattern_lambda = (int*)mm_allocator_malloc(wfAligner->mm_allocator,patternLength*sizeof(int)); - for (int i=0;imm_allocator,textLength*sizeof(int)); - for (int i=0;imm_allocator,pattern_lambda); - mm_allocator_free(wfAligner->mm_allocator,text_lambda); - return (WFAligner::AlignmentStatus) status; -} WFAligner::AlignmentStatus WFAligner::alignEnd2End( const char* const pattern, const int patternLength, @@ -87,7 +72,7 @@ WFAligner::AlignmentStatus WFAligner::alignEnd2End( // Configure wavefront_aligner_set_alignment_end_to_end(wfAligner); // Align - return (WFAligner::AlignmentStatus) wavefront_align(wfAligner,pattern,NULL,patternLength,text,NULL,textLength); + return (WFAligner::AlignmentStatus) wavefront_align(wfAligner,pattern,patternLength,text,textLength); } WFAligner::AlignmentStatus WFAligner::alignEnd2End( std::string& pattern, @@ -95,29 +80,20 @@ WFAligner::AlignmentStatus WFAligner::alignEnd2End( // Delegate return alignEnd2End(pattern.c_str(),pattern.length(),text.c_str(),text.length()); } -/* - * Align Ends-free - */ -WFAligner::AlignmentStatus WFAligner::alignEndsFreeLambda( +WFAligner::AlignmentStatus WFAligner::alignEnd2EndLambda( + int (*matchFunct)(int,int,void*), + void* matchFunctArguments, const int patternLength, - const int patternBeginFree, - const int patternEndFree, - const int textLength, - const int textBeginFree, - const int textEndFree) { + const int textLength) { // Configure - wavefront_aligner_set_alignment_free_ends(wfAligner, - patternBeginFree,patternEndFree, - textBeginFree,textEndFree); + wavefront_aligner_set_alignment_end_to_end(wfAligner); // Align (using custom matching function) - int* pattern_lambda = (int*)mm_allocator_malloc(wfAligner->mm_allocator,patternLength*sizeof(int)); - for (int i=0;imm_allocator,textLength*sizeof(int)); - for (int i=0;imm_allocator,pattern_lambda); - return (WFAligner::AlignmentStatus) status; + return (WFAligner::AlignmentStatus) + wavefront_align_lambda(wfAligner,matchFunct,matchFunctArguments,patternLength,textLength); } +/* + * Align Ends-free + */ WFAligner::AlignmentStatus WFAligner::alignEndsFree( const char* const pattern, const int patternLength, @@ -132,7 +108,7 @@ WFAligner::AlignmentStatus WFAligner::alignEndsFree( patternBeginFree,patternEndFree, textBeginFree,textEndFree); // Align - return (WFAligner::AlignmentStatus) wavefront_align(wfAligner,pattern,NULL,patternLength,text,NULL,textLength); + return (WFAligner::AlignmentStatus) wavefront_align(wfAligner,pattern,patternLength,text,textLength); } WFAligner::AlignmentStatus WFAligner::alignEndsFree( std::string& pattern, @@ -148,13 +124,30 @@ WFAligner::AlignmentStatus WFAligner::alignEndsFree( text.c_str(),text.length(), textBeginFree,textEndFree); } -/* - * Alignment resume - */ -WFAligner::AlignmentStatus WFAligner::alignResume() { - // Resume alignment - return (WFAligner::AlignmentStatus) wavefront_align_resume(wfAligner); -} +WFAligner::AlignmentStatus WFAligner::alignEndsFreeLambda( + int (*matchFunct)(int,int,void*), + void* matchFunctArguments, + const int patternLength, + const int patternBeginFree, + const int patternEndFree, + const int textLength, + const int textBeginFree, + const int textEndFree) { + // Configure + wavefront_aligner_set_alignment_free_ends(wfAligner, + patternBeginFree,patternEndFree, + textBeginFree,textEndFree); + // Align (using custom matching function) + return (WFAligner::AlignmentStatus) + wavefront_align_lambda(wfAligner,matchFunct,matchFunctArguments,patternLength,textLength); +} +///* +// * Alignment resume +// */ +//WFAligner::AlignmentStatus WFAligner::alignResume() { +// // Resume alignment +// return (WFAligner::AlignmentStatus) wavefront_align_resume(wfAligner); +//} /* * Heuristics */ @@ -202,23 +195,6 @@ void WFAligner::setHeuristicZDrop( wavefront_aligner_set_heuristic_zdrop( wfAligner,zdrop,steps_between_cutoffs); } -/* - * Custom extend-match function (lambda) - */ -void WFAligner::setMatchFunct( - int (*matchFunct)(int,int,void*), - void* matchFunctArguments) { - wavefront_aligner_set_match_funct(wfAligner,matchFunct,matchFunctArguments); -} -/* - * Bidirectional - */ -void WFAligner::getLastBreakpoint( - int *v, - int *h) { - *h = WAVEFRONT_H(wfAligner->last_breakpoint.k_forward,wfAligner->last_breakpoint.offset_forward); - *v = WAVEFRONT_V(wfAligner->last_breakpoint.k_forward,wfAligner->last_breakpoint.offset_forward); -} /* * Limits */ @@ -234,12 +210,12 @@ void WFAligner::setMaxMemory( } // Parallelization void WFAligner::setMaxNumThreads( - const int maxNumThreads) { - wavefront_aligner_set_max_num_threads(wfAligner, maxNumThreads); + const int maxNumThreads) { + wavefront_aligner_set_max_num_threads(wfAligner, maxNumThreads); } void WFAligner::setMinOffsetsPerThread( - const int minOffsetsPerThread) { - wavefront_aligner_set_min_offsets_per_thread(wfAligner, minOffsetsPerThread); + const int minOffsetsPerThread) { + wavefront_aligner_set_min_offsets_per_thread(wfAligner, minOffsetsPerThread); } /* * Accessors @@ -271,9 +247,14 @@ char* WFAligner::strError( const int wfErrorCode) { return wavefront_align_strerror(wfErrorCode); } -void WFAligner::setVerbose( - const int verbose) { - wfAligner->system.verbose = verbose; +void WFAligner::debugAddTag( + char* const debugTag) { + wfAligner->align_mode_tag = debugTag; + if (wfAligner->bialigner != NULL) { + wfAligner->bialigner->alg_forward->align_mode_tag = debugTag; + wfAligner->bialigner->alg_reverse->align_mode_tag = debugTag; + wfAligner->bialigner->alg_subsidiary->align_mode_tag = debugTag; + } } /* * Indel Aligner (a.k.a Longest Common Subsequence - LCS) diff --git a/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.hpp b/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.hpp index 9da739c5..23481d83 100644 --- a/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.hpp +++ b/src/common/wflign/deps/WFA2-lib/bindings/cpp/WFAligner.hpp @@ -34,9 +34,7 @@ #include -extern "C" { - #include "../../wavefront/wavefront_aligner.h" -} +#include "../../wavefront/wfa.hpp" /* * Namespace @@ -66,9 +64,6 @@ class WFAligner { StatusOOM = WF_STATUS_OOM, }; // Align End-to-end - AlignmentStatus alignEnd2EndLambda( - const int patternLength, - const int textLength); AlignmentStatus alignEnd2End( const char* const pattern, const int patternLength, @@ -77,14 +72,12 @@ class WFAligner { AlignmentStatus alignEnd2End( std::string& pattern, std::string& text); - // Align Ends-free - AlignmentStatus alignEndsFreeLambda( + AlignmentStatus alignEnd2EndLambda( + int (*matchFunct)(int,int,void*), + void* matchFunctArguments, const int patternLength, - const int patternBeginFree, - const int patternEndFree, - const int textLength, - const int textBeginFree, - const int textEndFree); + const int textLength); + // Align Ends-free AlignmentStatus alignEndsFree( const char* const pattern, const int patternLength, @@ -101,8 +94,15 @@ class WFAligner { std::string& text, const int textBeginFree, const int textEndFree); - // Alignment resume - AlignmentStatus alignResume(); + AlignmentStatus alignEndsFreeLambda( + int (*matchFunct)(int,int,void*), + void* matchFunctArguments, + const int patternLength, + const int patternBeginFree, + const int patternEndFree, + const int textLength, + const int textBeginFree, + const int textEndFree); // Heuristics void setHeuristicNone(); void setHeuristicBandedStatic( @@ -126,14 +126,6 @@ class WFAligner { void setHeuristicZDrop( const int zdrop, const int steps_between_cutoffs = 1); - // Custom extend-match function (lambda) - void setMatchFunct( - int (*matchFunct)(int,int,void*), - void* matchFunctArguments); - // Bidirectional - void getLastBreakpoint( - int *v, - int *h); // Limits void setMaxAlignmentScore( const int maxAlignmentScore); @@ -155,8 +147,8 @@ class WFAligner { // Misc char* strError( const int wfErrorCode); - void setVerbose( - const int verbose); + void debugAddTag( + char* const debugTag); protected: wavefront_aligner_attr_t attributes; wavefront_aligner_t* wfAligner; diff --git a/src/common/wflign/deps/WFA2-lib/examples/wfa_adapt.c b/src/common/wflign/deps/WFA2-lib/examples/wfa_adapt.c index 3e875527..2766a206 100644 --- a/src/common/wflign/deps/WFA2-lib/examples/wfa_adapt.c +++ b/src/common/wflign/deps/WFA2-lib/examples/wfa_adapt.c @@ -29,6 +29,7 @@ * DESCRIPTION: WFA Sample-Code */ +#include "utils/commons.h" #include "wavefront/wavefront_align.h" int main(int argc,char* argv[]) { @@ -50,7 +51,7 @@ int main(int argc,char* argv[]) { // Initialize Wavefront Aligner wavefront_aligner_t* const wf_aligner = wavefront_aligner_new(&attributes); // Align - wavefront_align(wf_aligner,pattern,NULL,strlen(pattern),text,NULL,strlen(text)); + wavefront_align(wf_aligner,pattern,strlen(pattern),text,strlen(text)); fprintf(stderr,"WFA-Alignment returns score %d\n",wf_aligner->cigar->score); // Count mismatches, deletions, and insertions int i, misms=0, ins=0, del=0; diff --git a/src/common/wflign/deps/WFA2-lib/examples/wfa_basic.c b/src/common/wflign/deps/WFA2-lib/examples/wfa_basic.c index 1c8f51bd..1b3751ff 100644 --- a/src/common/wflign/deps/WFA2-lib/examples/wfa_basic.c +++ b/src/common/wflign/deps/WFA2-lib/examples/wfa_basic.c @@ -29,6 +29,7 @@ * DESCRIPTION: WFA Sample-Code */ +#include "utils/commons.h" #include "wavefront/wavefront_align.h" int main(int argc,char* argv[]) { @@ -45,7 +46,7 @@ int main(int argc,char* argv[]) { // Initialize Wavefront Aligner wavefront_aligner_t* const wf_aligner = wavefront_aligner_new(&attributes); // Align - wavefront_align(wf_aligner,pattern,NULL,strlen(pattern),text,NULL,strlen(text)); + wavefront_align(wf_aligner,pattern,strlen(pattern),text,strlen(text)); fprintf(stderr,"WFA-Alignment returns score %d\n",wf_aligner->cigar->score); // Display alignment fprintf(stderr," PATTERN %s\n",pattern); diff --git a/src/common/wflign/deps/WFA2-lib/examples/wfa_lambda.cpp b/src/common/wflign/deps/WFA2-lib/examples/wfa_lambda.cpp index 0fc60095..d2e2fb7d 100644 --- a/src/common/wflign/deps/WFA2-lib/examples/wfa_lambda.cpp +++ b/src/common/wflign/deps/WFA2-lib/examples/wfa_lambda.cpp @@ -56,9 +56,8 @@ int match_function( int main(int argc,char* argv[]) { // Create a WFAligner WFAlignerGapAffine aligner(1,0,1,WFAligner::Alignment,WFAligner::MemoryHigh); - aligner.setMatchFunct(match_function,NULL); // Align - aligner.alignEnd2EndLambda(patternLength,textLength); + aligner.alignEnd2EndLambda(match_function,NULL,patternLength,textLength); cout << "WFA-Alignment returns score " << aligner.getAlignmentScore() << endl; // Print CIGAR diff --git a/src/common/wflign/deps/WFA2-lib/examples/wfa_repeated.c b/src/common/wflign/deps/WFA2-lib/examples/wfa_repeated.c index 5e95ed5d..e0efe0d7 100644 --- a/src/common/wflign/deps/WFA2-lib/examples/wfa_repeated.c +++ b/src/common/wflign/deps/WFA2-lib/examples/wfa_repeated.c @@ -29,6 +29,7 @@ * DESCRIPTION: WFA Sample-Code */ +#include "utils/commons.h" #include "wavefront/wavefront_align.h" int main(int argc,char* argv[]) { @@ -48,7 +49,7 @@ int main(int argc,char* argv[]) { int i; for (i=0;i<100000;++i) { // Align - wavefront_align(wf_aligner,pattern,NULL,strlen(pattern),text,NULL,strlen(text)); + wavefront_align(wf_aligner,pattern,strlen(pattern),text,strlen(text)); // Report if ((i%1000) == 0) { fprintf(stderr,"... done %d alignments\n",i); diff --git a/src/common/wflign/deps/WFA2-lib/guix.scm b/src/common/wflign/deps/WFA2-lib/guix.scm new file mode 100644 index 00000000..b356f5f8 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/guix.scm @@ -0,0 +1,72 @@ +;; Set up build environment using GNU Guix packages +;; +;; CC0 license, Pjotr Prins (c) 2022-2023 +;; +;; To use this file to build HEAD: +;; +;; guix build -f guix.scm +;; +;; To get a development container (emacs shell will work) +;; +;; guix shell -C -D -f guix.scm +;; +;; For the tests you need /bin/bash. In a container create it with +;; +;; mkdir -p /bin ; ln -s $GUIX_ENVIRONMENT/bin/bash /bin/bash +;; +;; To find tools +;; +;; cd build +;; cmake .. -DCMAKE_MAKE_PROGRAM=make -DCMAKE_C_COMPILER=gcc +;; cmake --build . --verbose +;; +;; and run the tests with +;; +;; env CC=gcc make +;; ./tests/wfa.utest.sh + +(use-modules + ((guix licenses) #:prefix license:) + (guix gexp) + (guix packages) + (guix git-download) + (guix build-system cmake) + (gnu packages algebra) + (gnu packages autotools) + (gnu packages base) + (gnu packages bash) + (gnu packages compression) + (gnu packages build-tools) + (gnu packages check) + (gnu packages curl) + (gnu packages gcc) + (gnu packages gdb) + (gnu packages llvm) + (gnu packages parallel) + (gnu packages pkg-config) + (srfi srfi-1) + (ice-9 popen) + (ice-9 rdelim)) + +(define %source-dir (dirname (current-filename))) + +(define %git-commit + (read-string (open-pipe "git show HEAD | head -1 | cut -d ' ' -f 2" OPEN_READ))) + +(define-public wfa2-lib-git + (package + (name "wfa2-lib-git") + (version (git-version "1.3" "HEAD" %git-commit)) + (source (local-file %source-dir #:recursive? #f)) + (build-system cmake-build-system) + (inputs + `(("bash" ,bash) + ("gdb" ,gdb))) + (native-inputs + `(("pkg-config" ,pkg-config))) + (home-page "https://github.com/smarco/WFA2-lib/") + (synopsis "Library for wavefront aligner") + (description "The wavefront alignment (WFA) algorithm is an **exact** gap-affine algorithm that takes advantage of homologous regions between the sequences to accelerate the alignment process. Unlike to traditional dynamic programming algorithms that run in quadratic time, the WFA runs in time `O(ns+s^2)`, proportional to the sequence length `n` and the alignment score `s`, using `O(s^2)` memory (or `O(s)` using the ultralow/BiWFA mode).") + (license license:expat))) ;; MIT license + +wfa2-lib-git diff --git a/src/common/wflign/deps/WFA2-lib/img/wfa.endsfree.png b/src/common/wflign/deps/WFA2-lib/img/wfa.endsfree.png new file mode 100644 index 00000000..5eac1ee1 Binary files /dev/null and b/src/common/wflign/deps/WFA2-lib/img/wfa.endsfree.png differ diff --git a/src/common/wflign/deps/WFA2-lib/system/mm_allocator.c b/src/common/wflign/deps/WFA2-lib/system/mm_allocator.c index 3c5ff571..96831937 100644 --- a/src/common/wflign/deps/WFA2-lib/system/mm_allocator.c +++ b/src/common/wflign/deps/WFA2-lib/system/mm_allocator.c @@ -32,6 +32,7 @@ * and dispatching memory segments in order. */ +#include "utils/commons.h" #include "mm_allocator.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/system/mm_stack.c b/src/common/wflign/deps/WFA2-lib/system/mm_stack.c index eb57a5f3..abb86d21 100644 --- a/src/common/wflign/deps/WFA2-lib/system/mm_stack.c +++ b/src/common/wflign/deps/WFA2-lib/system/mm_stack.c @@ -32,6 +32,7 @@ * requested at once. */ +#include "utils/commons.h" #include "mm_stack.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/system/profiler_counter.c b/src/common/wflign/deps/WFA2-lib/system/profiler_counter.c index 465d3167..52264ff5 100644 --- a/src/common/wflign/deps/WFA2-lib/system/profiler_counter.c +++ b/src/common/wflign/deps/WFA2-lib/system/profiler_counter.c @@ -30,6 +30,7 @@ * DESCRIPTION: Simple profile counter */ +#include "utils/commons.h" #include "profiler_counter.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/system/profiler_counter.h b/src/common/wflign/deps/WFA2-lib/system/profiler_counter.h index d7ce69db..55b1b98a 100644 --- a/src/common/wflign/deps/WFA2-lib/system/profiler_counter.h +++ b/src/common/wflign/deps/WFA2-lib/system/profiler_counter.h @@ -33,7 +33,8 @@ #ifndef PROFILER_COUNTER_H_ #define PROFILER_COUNTER_H_ -#include "utils/commons.h" +#include +#include /* * Counters diff --git a/src/common/wflign/deps/WFA2-lib/system/profiler_timer.c b/src/common/wflign/deps/WFA2-lib/system/profiler_timer.c index e179f68f..eb25853d 100644 --- a/src/common/wflign/deps/WFA2-lib/system/profiler_timer.c +++ b/src/common/wflign/deps/WFA2-lib/system/profiler_timer.c @@ -30,6 +30,7 @@ * DESCRIPTION: Simple time profiler */ +#include "utils/commons.h" #include "profiler_timer.h" #ifdef __MACH__ @@ -51,7 +52,7 @@ void timer_get_system_time(struct timespec *ts) { ts->tv_sec = mts.tv_sec; ts->tv_nsec = mts.tv_nsec; #else - //clock_gettime(CLOCK_REALTIME,ts); + clock_gettime(CLOCK_REALTIME,ts); #endif } /* diff --git a/src/common/wflign/deps/WFA2-lib/system/profiler_timer.h b/src/common/wflign/deps/WFA2-lib/system/profiler_timer.h index 52c84e07..d3aee5e9 100644 --- a/src/common/wflign/deps/WFA2-lib/system/profiler_timer.h +++ b/src/common/wflign/deps/WFA2-lib/system/profiler_timer.h @@ -33,7 +33,7 @@ #ifndef PROFILER_TIMER_H #define PROFILER_TIMER_H -#include "utils/commons.h" +#include #include "profiler_counter.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.cmp.sh b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.cmp.sh index 707b0dc5..0aa9c5ef 100755 --- a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.cmp.sh +++ b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.cmp.sh @@ -8,6 +8,14 @@ # Parameters FOLDER1=$1 FOLDER2=$2 +CMP_PERF=0 +if [[ "$3" == "--cmp-performance" ]] +then + CMP_PERF=1 +fi + +# Innit status OK +STATUS=0 echo "> Comparing $FOLDER1 vs $FOLDER2" for FILE_ALG1 in $FOLDER1/*.alg @@ -21,6 +29,7 @@ do # Check existence if [[ ! -f "$FILE_ALG2" ]] then + STATUS=1 echo "$FILE_ALG2 doesn't exist." continue fi @@ -29,11 +38,27 @@ do then if [[ $(diff <(awk '{if ($1<0) print -$1; else print $1}' $FILE_ALG1) <(awk '{if ($1<0) print -$1; else print $1}' $FILE_ALG2)) ]] then + STATUS=1 echo "Error" + continue else - echo "ok" # Only score + STATUS=1 + echo -n "ok" # Only score fi else - echo "OK" + echo -n "OK" + fi + # Stats + if [[ $CMP_PERF == 1 ]] + then + T1=$(grep -m1 "Time.Alignment" $FOLDER1/$PREFIX.log | awk '{print $3" "$4}') + T2=$(grep -m1 "Time.Alignment" $FOLDER2/$PREFIX.log | awk '{print $3" "$4}') + M1=$(grep -m1 "Maximum resident set size" $FOLDER1/$PREFIX.log | tr -d "(:)" | awk '{print $6" "$5}') + M2=$(grep -m1 "Maximum resident set size" $FOLDER2/$PREFIX.log | tr -d "(:)" | awk '{print $6" "$5}') + echo -e "\tTIME($T1,$T2)\tMEM($M1,$M2)" + else + echo fi done + +exit $STATUS diff --git a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.cmp.sh b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.cmp.sh deleted file mode 100755 index 60b9f08e..00000000 --- a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.cmp.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# PROJECT: Wavefront Alignments Algorithms (Unitary Tests) -# LICENCE: MIT License -# AUTHOR(S): Santiago Marco-Sola -# DESCRIPTION: Compares alignments (*.alg) from two folders -# USAGE: ./wfa.utest.performance.cmp.sh wfa_results_folder_1 wfa_results_folder_2 - -# Parameters -FOLDER1=$1 -FOLDER2=$2 - -echo "> Comparing $FOLDER1 vs $FOLDER2" -for FILE_ALG1 in $FOLDER1/*.alg -do - FILENAME=$(basename -- "$FILE_ALG1") - PREFIX=${FILENAME%.*} - FILE_ALG2="$FOLDER2/$FILENAME" - echo -ne "[UTest::$PREFIX]" - if [[ ${#PREFIX} < 15 ]]; then echo -ne " "; fi - echo -ne "\t" - # Check existence - if [[ ! -f "$FILE_ALG2" ]] - then - echo "$FILE_ALG2 doesn't exist." - continue - fi - # Check diff - if [[ $(diff $FILE_ALG1 $FILE_ALG2) ]] - then - if [[ $(diff <(awk '{if ($1<0) print -$1; else print $1}' $FILE_ALG1) <(awk '{if ($1<0) print -$1; else print $1}' $FILE_ALG2)) ]] - then - echo "Error" - continue - else - echo -n "OK" # Only score - fi - else - echo -n "OK" - fi - # Stats - T1=$(grep -m1 "Time.Alignment" $FOLDER1/$PREFIX.log | awk '{print $3" "$4}') - T2=$(grep -m1 "Time.Alignment" $FOLDER2/$PREFIX.log | awk '{print $3" "$4}') - M1=$(grep -m1 "Maximum resident set size" $FOLDER1/$PREFIX.log | tr -d "(:)" | awk '{print $6" "$5}') - M2=$(grep -m1 "Maximum resident set size" $FOLDER2/$PREFIX.log | tr -d "(:)" | awk '{print $6" "$5}') - echo -e "\tTIME($T1,$T2)\tMEM($M1,$M2)" -done diff --git a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.sh b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.sh index 2ee4fbaa..eba3b0d7 100755 --- a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.sh +++ b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.performance.sh @@ -38,4 +38,4 @@ rm $OUTPUT/*.log $OUTPUT/*.alg \time -v ./bin/align_benchmark -a $ALGORITHM -i ../data/sim.l100K.n1.e10.seq -o $OUTPUT/sim.l100K.e10.Wb.alg $BIWFA &> $OUTPUT/sim.l100K.e10.Wb.log # Run the check -./tests/wfa.utest.performance.cmp.sh tests/ tests/wfa.utest.performance.check/ \ No newline at end of file +./tests/wfa.utest.cmp.sh tests/ tests/wfa.utest.performance.check/ --cmp-performance \ No newline at end of file diff --git a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.sh b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.sh index 652ca4dd..016795fe 100755 --- a/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.sh +++ b/src/common/wflign/deps/WFA2-lib/tests/wfa.utest.sh @@ -1,4 +1,4 @@ -#!/bin/bash -x +#!/bin/bash # PROJECT: Wavefront Alignments Algorithms (Unitary Tests) # LICENCE: MIT License # AUTHOR(S): Santiago Marco-Sola @@ -12,7 +12,7 @@ OUTPUT="./tests" LOG="./tests/wfa.utest.log" # Clear -rm $OUTPUT/*.alg $OUTPUT/*.log* +rm $OUTPUT/*.alg $OUTPUT/*.log* &> /dev/null # Run tests for opt in "--check=correct","test" \ @@ -72,16 +72,30 @@ diff tests/wfa.utest.check/test.affine.wfapt1.alg tests/wfa.utest.check/test.pb. ./scripts/wfa.alg.cmp.score.sh tests/wfa.utest.check/test.affine.wfapt0.alg tests/wfa.utest.check/test.biwfa.affine.wfapt0.alg >> $LOG.correct 2>&1 ./scripts/wfa.alg.cmp.score.sh tests/wfa.utest.check/test.affine.wfapt1.alg tests/wfa.utest.check/test.biwfa.affine.wfapt1.alg >> $LOG.correct 2>&1 -# Summary tests +# Summary tests (*.correct,*.time,*.mem) grep "Alignments.Correct" $LOG >> $LOG.correct grep "Time.Alignment" $LOG | awk '{if ($4 != "ms") print $3" "$4}' | sort -n > $LOG.time grep "Maximum resident set size" $LOG | awk '{print $6}' | sort -n > $LOG.mem -echo -n ">>> Correct: " -tail -n 4 $OUTPUT/wfa.utest.log.time $OUTPUT/wfa.utest.check/wfa.utest.log.time -tail -n 4 $OUTPUT/wfa.utest.log.mem $OUTPUT/wfa.utest.check/wfa.utest.log.mem +# Display performance +echo ">>> Performance Time (s): " +paste <(tail -n 4 $OUTPUT/wfa.utest.log.time) <(tail -n 4 $OUTPUT/wfa.utest.check/wfa.utest.log.time) +echo ">>> Performance Mem (KB): " +paste <(tail -n 4 $OUTPUT/wfa.utest.log.mem) <(tail -n 4 $OUTPUT/wfa.utest.check/wfa.utest.log.mem) + +# Display correct ./tests/wfa.utest.cmp.sh $OUTPUT $OUTPUT/wfa.utest.check -grep "Exit status:" $LOG | sort | uniq -c -grep "Command terminated by signal" $LOG | sort | uniq -c -cat $OUTPUT/wfa.utest.log.correct | awk '{print $5$6}' | sort | uniq -c +STATUS=$? +STATUS_EXIT=$(grep "Exit status:" $LOG | grep -v "Exit status: 0" | sort | uniq -c | tr '\n' ' ') +STATUS_SIGNAL=$(grep "Command terminated by signal" $LOG | sort | uniq -c | tr '\n' ' ') +STATUS_CORRECT=$(cat $OUTPUT/wfa.utest.log.correct | awk '{print $5$6}' | sort | uniq -c | tr '\n' ' ') +echo ">>> Correct: ExitStatus($STATUS_EXIT) Signal($STATUS_SIGNAL) Correct($STATUS_CORRECT)" +if [[ $STATUS -eq 0 && "$STATUS_EXIT"=="" && "$STATUS_SIGNAL"=="" ]] +then + echo -e ">>>\n>>> ALL GOOD!\n>>>" +else + echo -e ">>>\n>>> ERROR\n>>>" +fi + +exit $STATUS diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark.c index 22ab61f1..de4894de 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark.c @@ -51,6 +51,24 @@ #include "benchmark/benchmark_gap_affine.h" #include "benchmark/benchmark_gap_affine2p.h" +/* + * WFA lambda (custom match function) + */ +typedef struct { + char* pattern; + int pattern_length; + char* text; + int text_length; +} match_function_params_t; +match_function_params_t lambda_params; +// Simplest Extend-matching function (for testing purposes) +int lambda_function(int v,int h,void* arguments) { + // Extract parameters + match_function_params_t* const match_arguments = (match_function_params_t*)arguments; + // Check match + if (v >= match_arguments->pattern_length || h >= match_arguments->text_length) return 0; + return (match_arguments->pattern[v] == match_arguments->text[h]); +} /* * Algorithms */ @@ -113,7 +131,7 @@ void align_pairwise_test() { wavefront_aligner_t* const wf_aligner = wavefront_aligner_new(&attributes); // Align wavefront_align(wf_aligner, - pattern,NULL,strlen(pattern),text,NULL,strlen(text)); + pattern,strlen(pattern),text,strlen(text)); // CIGAR fprintf(stderr,">> WFA2"); cigar_print_pretty(stderr, @@ -203,10 +221,6 @@ wavefront_aligner_t* align_input_configure_wavefront( // Select alignment form attributes.alignment_form.span = (parameters.endsfree) ? alignment_endsfree : alignment_end2end; // Misc - if (parameters.wfa_match_funct_arguments != NULL) { - attributes.match_funct = parameters.wfa_match_funct; - attributes.match_funct_arguments = parameters.wfa_match_funct_arguments; - } attributes.plot.enabled = (parameters.plot != 0); attributes.plot.align_level = (parameters.plot < 0) ? -1 : parameters.plot - 1; attributes.system.verbose = parameters.verbose; @@ -233,6 +247,10 @@ void align_input_configure_global( align_input->mm_allocator = mm_allocator_new(BUFFER_SIZE_1M); // WFA if (align_benchmark_is_wavefront(parameters.algorithm)) { + if (parameters.wfa_lambda) { + align_input->wfa_match_funct = lambda_function; + align_input->wfa_match_funct_arguments = &lambda_params; + } align_input->wf_aligner = align_input_configure_wavefront(align_input); } else { align_input->wf_aligner = NULL; @@ -269,11 +287,11 @@ void align_input_configure_local( } } // Custom extend-match function - if (parameters.wfa_match_funct != NULL) { - match_function_params.pattern = align_input->pattern; - match_function_params.pattern_length = align_input->pattern_length; - match_function_params.text = align_input->text; - match_function_params.text_length = align_input->text_length; + if (parameters.wfa_lambda) { + lambda_params.pattern = align_input->pattern; + lambda_params.pattern_length = align_input->pattern_length; + lambda_params.text = align_input->text; + lambda_params.text_length = align_input->text_length; } } void align_benchmark_free( @@ -306,7 +324,9 @@ bool align_benchmark_read_input( align_input->pattern[align_input->pattern_length] = '\0'; align_input->text = *line2 + 1; align_input->text_length = line2_length - 2; - align_input->text[align_input->text_length] = '\0'; + if (align_input->text[align_input->text_length] == '\n') { + align_input->text[align_input->text_length] = '\0'; + } return true; } /* diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.c index 89a48558..e15a532e 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.c @@ -80,11 +80,10 @@ align_bench_params_t parameters = { .wfa_heuristic_p2 = -1, .wfa_heuristic_p3 = -1, .wfa_memory_mode = wavefront_memory_high, - .wfa_match_funct = NULL, - .wfa_match_funct_arguments = NULL, .wfa_max_memory = UINT64_MAX, .wfa_max_score = INT_MAX, .wfa_max_threads = 1, + .wfa_lambda = false, // Other algorithms parameters .bandwidth = -1, // Misc @@ -101,16 +100,6 @@ align_bench_params_t parameters = { .progress = 100000, .verbose = 0, }; -/* - * Simplest Extend-matching function (for testing purposes) - */ -int match_function(int v,int h,void* arguments) { - // Extract parameters - match_function_params_t* match_arguments = (match_function_params_t*)arguments; - // Check match - if (v > match_arguments->pattern_length || h > match_arguments->text_length) return 0; - return (match_arguments->pattern[v] == match_arguments->text[h]); -} /* * Menu */ @@ -208,10 +197,10 @@ void parse_arguments( { "wfa-memory-mode", required_argument, 0, 1001 }, { "wfa-heuristic", required_argument, 0, 1002 }, { "wfa-heuristic-parameters", required_argument, 0, 1003 }, - { "wfa-custom-match-funct", no_argument, 0, 1004 }, { "wfa-max-memory", required_argument, 0, 1005 }, { "wfa-max-score", required_argument, 0, 1006 }, { "wfa-max-threads", required_argument, 0, 1007 }, + { "wfa-lambda", no_argument, 0, 1008 }, /* Other alignment parameters */ { "bandwidth", required_argument, 0, 2000 }, /* Misc */ @@ -397,10 +386,6 @@ void parse_arguments( } break; } - case 1004: // --wfa-custom-match-funct - parameters.wfa_match_funct = match_function; - parameters.wfa_match_funct_arguments = &match_function_params; - break; case 1005: // --wfa-max-memory parameters.wfa_max_memory = atol(optarg); break; @@ -410,6 +395,9 @@ void parse_arguments( case 1007: // --wfa-max-threads parameters.wfa_max_threads = atoi(optarg); break; + case 1008: // --wfa-lambda + parameters.wfa_lambda = true; + break; /* * Other alignment parameters */ diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.h b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.h index c3e17003..0602688b 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.h +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/align_benchmark_params.h @@ -60,17 +60,6 @@ typedef enum { alignment_gap_affine2p_wavefront, } alignment_algorithm_type; -/* - * Match function - */ -typedef struct { - char* pattern; - int pattern_length; - char* text; - int text_length; -} match_function_params_t; -match_function_params_t match_function_params; - /* * Align-benchmark Parameters */ @@ -105,11 +94,10 @@ typedef struct { int wfa_heuristic_p2; int wfa_heuristic_p3; wavefront_memory_t wfa_memory_mode; - alignment_match_funct_t wfa_match_funct; - void* wfa_match_funct_arguments; uint64_t wfa_max_memory; int wfa_max_score; int wfa_max_threads; + bool wfa_lambda; // Other algorithms parameters int bandwidth; // Misc diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_edit.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_edit.c index e3180a5b..6a56055d 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_edit.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_edit.c @@ -137,9 +137,15 @@ void benchmark_edit_wavefront( wavefront_aligner_t* const wf_aligner = align_input->wf_aligner; // Align timer_start(&align_input->timer); - wavefront_align(wf_aligner, - align_input->pattern,NULL,align_input->pattern_length, - align_input->text,NULL,align_input->text_length); + if (align_input->wfa_match_funct == NULL) { + wavefront_align(wf_aligner, + align_input->pattern,align_input->pattern_length, + align_input->text,align_input->text_length); + } else { + wavefront_align_lambda(wf_aligner, + align_input->wfa_match_funct,align_input->wfa_match_funct_arguments, + align_input->pattern_length,align_input->text_length); + } timer_stop(&align_input->timer); // DEBUG if (align_input->debug_flags) { diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine.c index 375822ef..faf53812 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine.c @@ -135,9 +135,15 @@ void benchmark_gap_affine_wavefront( wavefront_aligner_t* const wf_aligner = align_input->wf_aligner; // Align timer_start(&align_input->timer); - wavefront_align(wf_aligner, - align_input->pattern,NULL,align_input->pattern_length, - align_input->text,NULL,align_input->text_length); + if (align_input->wfa_match_funct == NULL) { + wavefront_align(wf_aligner, + align_input->pattern,align_input->pattern_length, + align_input->text,align_input->text_length); + } else { + wavefront_align_lambda(wf_aligner, + align_input->wfa_match_funct,align_input->wfa_match_funct_arguments, + align_input->pattern_length,align_input->text_length); + } timer_stop(&align_input->timer); // DEBUG if (align_input->debug_flags) { diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine2p.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine2p.c index fad0107e..ba6daba4 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine2p.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_affine2p.c @@ -74,9 +74,15 @@ void benchmark_gap_affine2p_wavefront( wavefront_aligner_t* const wf_aligner = align_input->wf_aligner; // Align timer_start(&align_input->timer); - wavefront_align(wf_aligner, - align_input->pattern,NULL,align_input->pattern_length, - align_input->text,NULL,align_input->text_length); + if (align_input->wfa_match_funct == NULL) { + wavefront_align(wf_aligner, + align_input->pattern,align_input->pattern_length, + align_input->text,align_input->text_length); + } else { + wavefront_align_lambda(wf_aligner, + align_input->wfa_match_funct,align_input->wfa_match_funct_arguments, + align_input->pattern_length,align_input->text_length); + } timer_stop(&align_input->timer); // DEBUG if (align_input->debug_flags) { diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_linear.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_linear.c index 600d806c..41f679f9 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_linear.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_gap_linear.c @@ -73,9 +73,15 @@ void benchmark_gap_linear_wavefront( wavefront_aligner_t* const wf_aligner = align_input->wf_aligner; // Align timer_start(&align_input->timer); - wavefront_align(wf_aligner, - align_input->pattern,NULL,align_input->pattern_length, - align_input->text,NULL,align_input->text_length); + if (align_input->wfa_match_funct == NULL) { + wavefront_align(wf_aligner, + align_input->pattern,align_input->pattern_length, + align_input->text,align_input->text_length); + } else { + wavefront_align_lambda(wf_aligner, + align_input->wfa_match_funct,align_input->wfa_match_funct_arguments, + align_input->pattern_length,align_input->text_length); + } timer_stop(&align_input->timer); // DEBUG if (align_input->debug_flags) { diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_indel.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_indel.c index 2f624a88..44522db6 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_indel.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_indel.c @@ -42,9 +42,15 @@ void benchmark_indel_wavefront( wavefront_aligner_t* const wf_aligner = align_input->wf_aligner; // Align timer_start(&align_input->timer); - wavefront_align(wf_aligner, - align_input->pattern,NULL,align_input->pattern_length, - align_input->text,NULL,align_input->text_length); + if (align_input->wfa_match_funct == NULL) { + wavefront_align(wf_aligner, + align_input->pattern,align_input->pattern_length, + align_input->text,align_input->text_length); + } else { + wavefront_align_lambda(wf_aligner, + align_input->wfa_match_funct,align_input->wfa_match_funct_arguments, + align_input->pattern_length,align_input->text_length); + } timer_stop(&align_input->timer); // DEBUG if (align_input->debug_flags) { diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.c index ccbd915e..6c1ec4b4 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.c @@ -47,6 +47,8 @@ void benchmark_align_input_clear( align_input->text_begin_free = 0; align_input->pattern_end_free = 0; align_input->text_end_free = 0; + align_input->wfa_match_funct = NULL; + align_input->wfa_match_funct_arguments = NULL; // Output align_input->output_file = NULL; align_input->output_full = false; diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.h b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.h index b501d057..2d04c4b3 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.h +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/benchmark/benchmark_utils.h @@ -62,6 +62,8 @@ typedef struct { int pattern_length; char* text; int text_length; + alignment_match_funct_t wfa_match_funct; + void* wfa_match_funct_arguments; // Penalties linear_penalties_t linear_penalties; affine_penalties_t affine_penalties; diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.c index dd974c47..194ce3c6 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.c @@ -22,6 +22,8 @@ * DESCRIPTION: Edit-Distance based BPM alignment algorithm */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "edit/edit_bpm.h" #include "utils/dna_text.h" diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.h b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.h index fc21e0c0..b81686cb 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.h +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_bpm.h @@ -25,7 +25,6 @@ #ifndef EDIT_BPM_H_ #define EDIT_BPM_H_ -#include "utils/commons.h" #include "alignment/cigar.h" #include "system/mm_allocator.h" diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.c index 5ca62004..a7ff0e73 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.c @@ -29,6 +29,8 @@ * DESCRIPTION: Dynamic-programming algorithm to compute Levenshtein alignment (edit) */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "edit_dp.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.h b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.h index 806d80ae..ccb3e356 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.h +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/edit/edit_dp.h @@ -32,7 +32,6 @@ #ifndef EDIT_DP_H_ #define EDIT_DP_H_ -#include "utils/commons.h" #include "alignment/score_matrix.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.c index a194fc63..16e94e76 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.c @@ -29,6 +29,8 @@ * DESCRIPTION: Gap-affine Matrix */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "gap_affine/affine_matrix.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.h b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.h index 74d532f9..32311743 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.h +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/affine_matrix.h @@ -32,7 +32,6 @@ #ifndef AFFINE_MATRIX_H_ #define AFFINE_MATRIX_H_ -#include "utils/commons.h" #include "alignment/cigar.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.c b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.c index 24c93277..7be9557c 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.c +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.c @@ -30,6 +30,8 @@ * pairwise alignment (Smith-Waterman-Gotoh - SWG) */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "gap_affine/swg.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.h b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.h index a3a13c2b..9d64c887 100644 --- a/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.h +++ b/src/common/wflign/deps/WFA2-lib/tools/align_benchmark/gap_affine/swg.h @@ -33,7 +33,6 @@ #ifndef SWG_H_ #define SWG_H_ -#include "utils/commons.h" #include "gap_affine/affine_matrix.h" /* diff --git a/src/common/wflign/deps/WFA2-lib/utils/Makefile b/src/common/wflign/deps/WFA2-lib/utils/Makefile index 79d36bf0..01665c2e 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/Makefile +++ b/src/common/wflign/deps/WFA2-lib/utils/Makefile @@ -12,7 +12,6 @@ MODULES=bitmap \ dna_text \ heatmap \ sequence_buffer \ - string_padded \ vector SRCS=$(addsuffix .c, $(MODULES)) diff --git a/src/common/wflign/deps/WFA2-lib/utils/bitmap.c b/src/common/wflign/deps/WFA2-lib/utils/bitmap.c index b4270b83..1b791331 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/bitmap.c +++ b/src/common/wflign/deps/WFA2-lib/utils/bitmap.c @@ -29,6 +29,7 @@ * DESCRIPTION: Basic bitmap datastructure (static) */ +#include "utils/commons.h" #include "utils/bitmap.h" #include "system/mm_allocator.h" @@ -122,6 +123,3 @@ uint64_t bitmap_erank( const uint64_t bitmap_count = POPCOUNT_64(bitmap_masked); return bitmap_block->counter + bitmap_count; } - - - diff --git a/src/common/wflign/deps/WFA2-lib/utils/bitmap.h b/src/common/wflign/deps/WFA2-lib/utils/bitmap.h index 3fc53b04..3db59834 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/bitmap.h +++ b/src/common/wflign/deps/WFA2-lib/utils/bitmap.h @@ -35,7 +35,6 @@ /* * Includes */ -#include "utils/commons.h" #include "system/mm_allocator.h" #define BITMAP_BLOCK_ELEMENTS 64 diff --git a/src/common/wflign/deps/WFA2-lib/utils/commons.h b/src/common/wflign/deps/WFA2-lib/utils/commons.h index 528801b7..a1f0132f 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/commons.h +++ b/src/common/wflign/deps/WFA2-lib/utils/commons.h @@ -29,8 +29,7 @@ * DESCRIPTION: Common functions/utilities and headers for C development */ -#ifndef COMMONS_H_ -#define COMMONS_H_ +#pragma once #include #include @@ -278,5 +277,3 @@ uint64_t nominal_prop_u64(const uint64_t base,const double factor); int i; \ for (i=0;i */ +#include "utils/commons.h" #include "heatmap.h" /* @@ -168,6 +169,3 @@ void heatmap_print( fprintf(stream,"\n"); } } - - - diff --git a/src/common/wflign/deps/WFA2-lib/utils/heatmap.h b/src/common/wflign/deps/WFA2-lib/utils/heatmap.h index ff904c5d..fddadd16 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/heatmap.h +++ b/src/common/wflign/deps/WFA2-lib/utils/heatmap.h @@ -31,7 +31,7 @@ #ifndef HEATMAP_H_ #define HEATMAP_H_ -#include "utils/commons.h" +#include /* * Heatmap diff --git a/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.c b/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.c index 2b7f750d..a7126c6c 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.c +++ b/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.c @@ -29,6 +29,7 @@ * DESCRIPTION: Simple linear vector for generic type elements */ +#include "utils/commons.h" #include "utils/sequence_buffer.h" /* @@ -131,4 +132,3 @@ void sequence_buffer_add_pair( sequence_buffer->max_pattern_length = MAX(sequence_buffer->max_pattern_length,pattern_length); sequence_buffer->max_text_length = MAX(sequence_buffer->max_text_length,text_length); } - diff --git a/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.h b/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.h index 5a35ba65..961196bd 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.h +++ b/src/common/wflign/deps/WFA2-lib/utils/sequence_buffer.h @@ -31,7 +31,6 @@ #ifndef SEQUENCE_BUFFER_H_ #define SEQUENCE_BUFFER_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" typedef struct { diff --git a/src/common/wflign/deps/WFA2-lib/utils/string_padded.c b/src/common/wflign/deps/WFA2-lib/utils/string_padded.c deleted file mode 100644 index 67e5222c..00000000 --- a/src/common/wflign/deps/WFA2-lib/utils/string_padded.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * The MIT License - * - * Wavefront Alignment Algorithms - * Copyright (c) 2017 by Santiago Marco-Sola - * - * This file is part of Wavefront Alignment Algorithms. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * PROJECT: Wavefront Alignment Algorithms - * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: Padded string module to avoid handling corner conditions - */ - -#include "utils/string_padded.h" -#include "system/mm_allocator.h" - -/* - * Strings (text/pattern) padded - */ -void strings_padded_add_padding( - const char* const buffer, - const int buffer_length, - const int begin_padding_length, - const int end_padding_length, - const char padding_value, - char** const buffer_padded, - char** const buffer_padded_begin, - const bool reverse_sequence, - mm_allocator_t* const mm_allocator) { - // Allocate - const int buffer_padded_length = begin_padding_length + buffer_length + end_padding_length; - *buffer_padded = mm_allocator_malloc(mm_allocator,buffer_padded_length); - // Add begin padding - memset(*buffer_padded,padding_value,begin_padding_length); - // Copy buffer - *buffer_padded_begin = *buffer_padded + begin_padding_length; - if (reverse_sequence) { - int i; - for (i=0;imm_allocator = mm_allocator; - // Compute padding dimensions - const int pattern_begin_padding_length = 0; - const int pattern_end_padding_length = padding_length; - const int text_begin_padding_length = 0; - const int text_end_padding_length = padding_length; - // Add padding - strings_padded_add_padding( - pattern,pattern_length, - pattern_begin_padding_length,pattern_end_padding_length,'?', - &(strings_padded->pattern_padded_buffer), - &(strings_padded->pattern_padded), - reverse_sequences,mm_allocator); - strings_padded_add_padding( - text,text_length, - text_begin_padding_length,text_end_padding_length,'!', - &(strings_padded->text_padded_buffer), - &(strings_padded->text_padded), - reverse_sequences,mm_allocator); - // Return - return strings_padded; -} -strings_padded_t* strings_padded_new_rhomb( - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, - const int padding_length, - const bool reverse_sequences, - mm_allocator_t* const mm_allocator) { - // Allocate - strings_padded_t* const strings_padded = - mm_allocator_alloc(mm_allocator,strings_padded_t); - strings_padded->mm_allocator = mm_allocator; - // Compute padding dimensions - const int pattern_begin_padding_length = text_length + padding_length; - const int pattern_end_padding_length = pattern_length + text_length + padding_length; - const int text_begin_padding_length = padding_length; - const int text_end_padding_length = text_length + padding_length; - // Add padding - if (pattern != NULL && text != NULL) { - strings_padded_add_padding( - pattern,pattern_length, - pattern_begin_padding_length,pattern_end_padding_length,'?', - &(strings_padded->pattern_padded_buffer), - &(strings_padded->pattern_padded), - reverse_sequences,mm_allocator); - strings_padded_add_padding( - text,text_length, - text_begin_padding_length,text_end_padding_length,'!', - &(strings_padded->text_padded_buffer), - &(strings_padded->text_padded), - reverse_sequences,mm_allocator); - } else if (pattern_lambda != NULL && text_lambda != NULL) { - strings_padded_add_padding_lambda( - pattern_lambda,pattern_length, - pattern_begin_padding_length,pattern_end_padding_length,-1, - &(strings_padded->pattern_lambda_padded_buffer), - &(strings_padded->pattern_lambda_padded), - reverse_sequences,mm_allocator); - strings_padded_add_padding_lambda( - text_lambda,text_length, - text_begin_padding_length,text_end_padding_length,-1, - &(strings_padded->text_lambda_padded_buffer), - &(strings_padded->text_lambda_padded), - reverse_sequences,mm_allocator); - } - // Set lengths - strings_padded->pattern_length = pattern_length; - strings_padded->text_length = text_length; - // Return - return strings_padded; -} -void strings_padded_delete(strings_padded_t* const strings_padded) { - mm_allocator_free(strings_padded->mm_allocator,strings_padded->pattern_padded_buffer); - mm_allocator_free(strings_padded->mm_allocator,strings_padded->text_padded_buffer); - mm_allocator_free(strings_padded->mm_allocator,strings_padded); -} -void strings_padded_delete_lambda(strings_padded_t* const strings_padded) { - mm_allocator_free(strings_padded->mm_allocator,strings_padded->pattern_lambda_padded_buffer); - mm_allocator_free(strings_padded->mm_allocator,strings_padded->text_lambda_padded_buffer); - mm_allocator_free(strings_padded->mm_allocator,strings_padded); -} diff --git a/src/common/wflign/deps/WFA2-lib/utils/vector.c b/src/common/wflign/deps/WFA2-lib/utils/vector.c index 5870dac8..5a9109f0 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/vector.c +++ b/src/common/wflign/deps/WFA2-lib/utils/vector.c @@ -30,6 +30,7 @@ * DESCRIPTION: Simple linear vector (generic type elements) */ +#include "utils/commons.h" #include "vector.h" /* @@ -122,4 +123,3 @@ vector_t* vector_dup( memcpy(vector_cpy->memory,vector_src->memory,vector_src->used*vector_src->element_size); return vector_cpy; } - diff --git a/src/common/wflign/deps/WFA2-lib/utils/vector.h b/src/common/wflign/deps/WFA2-lib/utils/vector.h index c6327476..2c5dcf06 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/vector.h +++ b/src/common/wflign/deps/WFA2-lib/utils/vector.h @@ -33,8 +33,6 @@ #ifndef VECTOR_H_ #define VECTOR_H_ -#include "commons.h" - /* * Checkers */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/Makefile b/src/common/wflign/deps/WFA2-lib/wavefront/Makefile index c30efdde..1b372eeb 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/Makefile +++ b/src/common/wflign/deps/WFA2-lib/wavefront/Makefile @@ -24,11 +24,15 @@ MODULES=wavefront_align \ wavefront_debug \ wavefront_display \ wavefront_extend \ + wavefront_extend_kernels_avx \ + wavefront_extend_kernels \ wavefront_heuristic \ wavefront_pcigar \ wavefront_penalties \ + wavefront_sequences \ wavefront_plot \ wavefront_slab \ + wavefront_termination \ wavefront_unialign \ wavefront diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.c index 8c59dbfd..97ccd274 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.c @@ -29,6 +29,8 @@ * DESCRIPTION: Individual WaveFront data structure */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront.h" /* @@ -170,7 +172,3 @@ uint64_t wavefront_get_size( } return total_size; } - - - - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.h index 44f1829f..67c0483b 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_H_ #define WAVEFRONT_H_ -#include "utils/commons.h" #include "system/mm_allocator.h" #include "wavefront_offset.h" #include "wavefront_backtrace_buffer.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.c index a1dc7c6c..f35d023c 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront alignment module for sequence pairwise alignment */ +#include "utils/commons.h" #include "wavefront_align.h" #include "wavefront_unialign.h" #include "wavefront_bialign.h" @@ -107,76 +108,122 @@ void wavefront_align_unidirectional_cleanup( } } void wavefront_align_unidirectional( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length) { - // Prepare alignment - wavefront_unialign_init( - wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length, - affine2p_matrix_M,affine2p_matrix_M); - // DEBUG - wavefront_debug_prologue(wf_aligner,pattern,pattern_length,text,text_length); + wavefront_aligner_t* const wf_aligner) { // Wavefront align sequences - wavefront_unialign(wf_aligner); + wavefront_unialign_init(wf_aligner,affine2p_matrix_M,affine2p_matrix_M); // Init + wavefront_unialign(wf_aligner); // Align // Finish if (wf_aligner->align_status.status == WF_STATUS_MAX_SCORE_REACHED) return; // Alignment paused wavefront_align_unidirectional_cleanup(wf_aligner); - // DEBUG - wavefront_debug_epilogue(wf_aligner); - wavefront_debug_check_correct(wf_aligner); } /* * Wavefront Alignment Bidirectional */ void wavefront_align_bidirectional( + wavefront_aligner_t* const wf_aligner) { + // Bidirectional alignment + wavefront_bialign(wf_aligner); // Align + // Finish + wf_aligner->align_status.memory_used = wavefront_aligner_get_size(wf_aligner); +} +/* + * Wavefront Alignment Dispatcher + */ +int wavefront_align_lambda( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, + alignment_match_funct_t match_funct, + void* match_funct_arguments, const int pattern_length, - const char* const text, - const int* const text_lambda, const int text_length) { + // Checks + wavefront_align_checks(wf_aligner,pattern_length,text_length); + wavefront_debug_begin(wf_aligner); + // Plot + if (wf_aligner->plot != NULL) wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); + // Dispatcher + if (wf_aligner->bialigner == NULL) { + // Prepare Sequences + wavefront_sequences_init_lambda(&wf_aligner->sequences, + match_funct,match_funct_arguments, + pattern_length,text_length,false); + wavefront_align_unidirectional(wf_aligner); + } else { + // Prepare Sequences + wavefront_bialigner_set_sequences_lambda(wf_aligner->bialigner, + match_funct,match_funct_arguments, + pattern_length,text_length); + // Align + wavefront_align_bidirectional(wf_aligner); + } // DEBUG - wavefront_debug_prologue(wf_aligner,pattern,pattern_length,text,text_length); - // Bidirectional alignment - wavefront_bialign(wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length); - // Finish - const uint64_t memory_used = wavefront_aligner_get_size(wf_aligner); - wf_aligner->align_status.memory_used = memory_used; + wavefront_debug_end(wf_aligner); + wavefront_debug_check_correct(wf_aligner); + // Return + return wf_aligner->align_status.status; +} +int wavefront_align_packed2bits( + wavefront_aligner_t* const wf_aligner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length) { + // Checks + wavefront_align_checks(wf_aligner,pattern_length,text_length); + wavefront_debug_begin(wf_aligner); + // Plot + if (wf_aligner->plot != NULL) wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); + // Dispatcher + if (wf_aligner->bialigner == NULL) { + // Prepare Sequences + wavefront_sequences_init_packed2bits(&wf_aligner->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_align_unidirectional(wf_aligner); + } else { + // Prepare Sequences + wavefront_bialigner_set_sequences_packed2bits(wf_aligner->bialigner, + pattern,pattern_length,text,text_length); + // Align + wavefront_align_bidirectional(wf_aligner); + } // DEBUG - wavefront_debug_epilogue(wf_aligner); + wavefront_debug_end(wf_aligner); wavefront_debug_check_correct(wf_aligner); + // Return + return wf_aligner->align_status.status; } -/* - * Wavefront Alignment Dispatcher - */ int wavefront_align( wavefront_aligner_t* const wf_aligner, const char* const pattern, - int* const pattern_lambda, const int pattern_length, const char* const text, - int* const text_lambda, const int text_length) { // Checks wavefront_align_checks(wf_aligner,pattern_length,text_length); + wavefront_debug_begin(wf_aligner); // Plot - if (wf_aligner->plot != NULL) { - wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); - } + if (wf_aligner->plot != NULL) wavefront_plot_resize(wf_aligner->plot,pattern_length,text_length); // Dispatcher - if (wf_aligner->bialigner != NULL) { - wavefront_align_bidirectional(wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length); + if (wf_aligner->bialigner == NULL) { + // Prepare Sequences + wavefront_sequences_init_ascii(&wf_aligner->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_align_unidirectional(wf_aligner); } else { - wavefront_align_unidirectional(wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length); + // Prepare Sequences + wavefront_bialigner_set_sequences_ascii(wf_aligner->bialigner, + pattern,pattern_length,text,text_length); + // Align + wavefront_align_bidirectional(wf_aligner); } + // DEBUG + wavefront_debug_end(wf_aligner); + wavefront_debug_check_correct(wf_aligner); // Return return wf_aligner->align_status.status; } +/* + * Wavefront Alignment Resume (Experimental) + */ int wavefront_align_resume( wavefront_aligner_t* const wf_aligner) { // Parameters @@ -195,9 +242,7 @@ int wavefront_align_resume( } wavefront_align_unidirectional_cleanup(wf_aligner); // DEBUG - wavefront_debug_epilogue(wf_aligner); wavefront_debug_check_correct(wf_aligner); // Return return align_status->status; } - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.h index a2ffa9df..85180add 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_align.h @@ -29,8 +29,7 @@ * DESCRIPTION: WaveFront alignment module for sequence pairwise alignment */ -#ifndef WAVEFRONT_ALIGN_H_ -#define WAVEFRONT_ALIGN_H_ +#pragma once #include "wavefront_aligner.h" @@ -40,12 +39,18 @@ int wavefront_align( wavefront_aligner_t* const wf_aligner, const char* const pattern, - int* const pattern_lambda, const int pattern_length, const char* const text, - int* const text_lambda, const int text_length); -int wavefront_align_resume( - wavefront_aligner_t* const wf_aligner); - -#endif /* WAVEFRONT_ALIGN_H_ */ +int wavefront_align_lambda( + wavefront_aligner_t* const wf_aligner, + alignment_match_funct_t const match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length); +int wavefront_align_packed2bits( + wavefront_aligner_t* const wf_aligner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length); diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.c index 82741a98..a2273578 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.c @@ -29,10 +29,13 @@ * DESCRIPTION: WaveFront aligner data structure */ +#include "utils/commons.h" #include "wavefront_aligner.h" #include "wavefront_components.h" #include "wavefront_heuristic.h" #include "wavefront_plot.h" +#include "wavefront_compute.h" +#include "wavefront_sequences.h" /* * Configuration @@ -50,6 +53,13 @@ char* wf_error_msg[] = /* WF_STATUS_UNFEASIBLE == -1 */ "[WFA] Alignment unfeasible (possible due to heuristic parameters)", /* WF_STATUS_SUCCESSFUL == 0 */ "[WFA] Alignment finished successfully", }; +char* wf_error_msg_short[] = +{ + /* WF_STATUS_OOM == -3 */ "OOM", + /* WF_STATUS_MAX_SCORE_REACHED == -2 */ "MaxScore", + /* WF_STATUS_UNFEASIBLE == -1 */ "Unfeasible", + /* WF_STATUS_SUCCESSFUL == 0 */ "OK", +}; char* wavefront_align_strerror(const int error_code) { if (error_code > 0) { fprintf(stderr,"[WFA] Internal alignment error code (%d)",error_code); @@ -57,6 +67,9 @@ char* wavefront_align_strerror(const int error_code) { } return wf_error_msg[error_code+3]; } +char* wavefront_align_strerror_short(const int error_code) { + return wf_error_msg_short[error_code+3]; +} /* * Setup */ @@ -172,9 +185,6 @@ void wavefront_aligner_init_alignment( // Memory mode wf_aligner->memory_mode = attributes->memory_mode; wavefront_aligner_init_heuristic(wf_aligner,attributes); - // Custom matching functions - wf_aligner->match_funct = attributes->match_funct; - wf_aligner->match_funct_arguments = attributes->match_funct_arguments; } wavefront_aligner_t* wavefront_aligner_new( wavefront_aligner_attr_t* attributes) { @@ -210,7 +220,7 @@ wavefront_aligner_t* wavefront_aligner_new( wf_aligner->mm_allocator); } // Sequences - wf_aligner->sequences = NULL; + wavefront_sequences_allocate(&wf_aligner->sequences); // CIGAR const int cigar_length = (score_only) ? 10 : 2*(PATTERN_LENGTH_INIT+TEXT_LENGTH_INIT); wf_aligner->cigar = cigar_new(cigar_length,wf_aligner->mm_allocator); @@ -221,15 +231,6 @@ wavefront_aligner_t* wavefront_aligner_new( } void wavefront_aligner_reap( wavefront_aligner_t* const wf_aligner) { - // Padded sequences - if (wf_aligner->sequences != NULL) { - if (wf_aligner->match_funct == NULL) { - strings_padded_delete(wf_aligner->sequences); - } else { - strings_padded_delete_lambda(wf_aligner->sequences); - } - wf_aligner->sequences = NULL; - } // Select alignment mode if (wf_aligner->bialigner != NULL) { wavefront_bialigner_reap(wf_aligner->bialigner); @@ -245,14 +246,8 @@ void wavefront_aligner_delete( // Parameters mm_allocator_t* const mm_allocator = wf_aligner->mm_allocator; const bool mm_allocator_own = wf_aligner->mm_allocator_own; - // Padded sequences - if (wf_aligner->sequences != NULL) { - if (wf_aligner->match_funct == NULL) { - strings_padded_delete(wf_aligner->sequences); - } else { - strings_padded_delete_lambda(wf_aligner->sequences); - } - } + // Sequences + wavefront_sequences_free(&wf_aligner->sequences); // Select alignment mode if (wf_aligner->bialigner != NULL) { wavefront_bialigner_delete(wf_aligner->bialigner); @@ -271,7 +266,143 @@ void wavefront_aligner_delete( // MM mm_allocator_free(mm_allocator,wf_aligner); if (mm_allocator_own) { - mm_allocator_delete(wf_aligner->mm_allocator); + mm_allocator_delete(mm_allocator); + } +} +/* + * Initialize wf-alignment conditions + */ +void wavefront_aligner_init_wf_m( + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + wavefront_penalties_t* const penalties = &wf_aligner->penalties; + alignment_form_t* const form = &wf_aligner->alignment_form; + // Consider ends-free + const int hi = (penalties->match==0) ? form->text_begin_free : 0; + const int lo = (penalties->match==0) ? -form->pattern_begin_free : 0; + // Compute dimensions + int effective_lo, effective_hi; + wavefront_compute_limits_output(wf_aligner,lo,hi,&effective_lo,&effective_hi); + // Initialize end2end (wavefront zero) + wf_components->mwavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + wf_components->mwavefronts[0]->offsets[0] = 0; + wf_components->mwavefronts[0]->lo = lo; + wf_components->mwavefronts[0]->hi = hi; + // Store initial BT-piggypack element + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); + wf_components->mwavefronts[0]->bt_pcigar[0] = 0; + wf_components->mwavefronts[0]->bt_prev[0] = block_idx; + } + // Initialize ends-free + if (form->span == alignment_endsfree && penalties->match == 0) { + // Text begin-free + const int text_begin_free = form->text_begin_free; + int h; + for (h=1;h<=text_begin_free;++h) { + const int k = DPMATRIX_DIAGONAL(h,0); + wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(h,0); + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,h); + wf_components->mwavefronts[0]->bt_pcigar[k] = 0; + wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + } + } + // Pattern begin-free + const int pattern_begin_free = form->pattern_begin_free; + int v; + for (v=1;v<=pattern_begin_free;++v) { + const int k = DPMATRIX_DIAGONAL(0,v); + wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(0,v); + if (wf_components->bt_piggyback) { + const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,v,0); + wf_components->mwavefronts[0]->bt_pcigar[k] = 0; + wf_components->mwavefronts[0]->bt_prev[k] = block_idx; + } + } + } + // Nullify unused WFs + if (distance_metric <= gap_linear) return; + wf_components->d1wavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + if (distance_metric==gap_affine) return; + wf_components->d2wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = NULL; +} +void wavefront_aligner_init_wf( + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; + wavefront_components_t* const wf_components = &wf_aligner->wf_components; + const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; + // Init wavefronts + if (wf_aligner->component_begin == affine2p_matrix_M) { + // Initialize + wavefront_aligner_init_wf_m(wf_aligner); + // Nullify unused WFs + if (distance_metric <= gap_linear) return; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + if (distance_metric == gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + } else { + // Compute dimensions + int effective_lo, effective_hi; // Effective lo/hi + wavefront_compute_limits_output(wf_aligner,0,0,&effective_lo,&effective_hi); + wavefront_t* const wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); + // Initialize + switch (wf_aligner->component_begin) { + case affine2p_matrix_I1: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = wavefront; + wf_components->i1wavefronts[0]->offsets[0] = 0; + wf_components->i1wavefronts[0]->lo = 0; + wf_components->i1wavefronts[0]->hi = 0; + wf_components->d1wavefronts[0] = NULL; + // Nullify unused WFs + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_I2: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = wavefront; + wf_components->i2wavefronts[0]->offsets[0] = 0; + wf_components->i2wavefronts[0]->lo = 0; + wf_components->i2wavefronts[0]->hi = 0; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_D1: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = wavefront; + wf_components->d1wavefronts[0]->offsets[0] = 0; + wf_components->d1wavefronts[0]->lo = 0; + wf_components->d1wavefronts[0]->hi = 0; + // Nullify unused WFs + if (distance_metric==gap_affine) return; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = NULL; + break; + case affine2p_matrix_D2: + wf_components->mwavefronts[0] = NULL; + wf_components->i1wavefronts[0] = NULL; + wf_components->d1wavefronts[0] = NULL; + wf_components->i2wavefronts[0] = NULL; + wf_components->d2wavefronts[0] = wavefront; + wf_components->d2wavefronts[0]->offsets[0] = 0; + wf_components->d2wavefronts[0]->lo = 0; + wf_components->d2wavefronts[0]->hi = 0; + break; + default: + break; + } } } /* @@ -365,20 +496,6 @@ void wavefront_aligner_set_heuristic_zdrop( wavefront_bialigner_set_heuristic(wf_aligner->bialigner,&wf_aligner->heuristic); } } -/* - * Match-funct configuration - */ -void wavefront_aligner_set_match_funct( - wavefront_aligner_t* const wf_aligner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments) { - wf_aligner->match_funct = match_funct; - wf_aligner->match_funct_arguments = match_funct_arguments; - if (wf_aligner->bialigner != NULL) { - wavefront_bialigner_set_match_funct( - wf_aligner->bialigner,match_funct,match_funct_arguments); - } -} /* * System configuration */ @@ -403,22 +520,22 @@ void wavefront_aligner_set_max_memory( } } void wavefront_aligner_set_max_num_threads( - wavefront_aligner_t* const wf_aligner, - const int max_num_threads) { - wf_aligner->system.max_num_threads = max_num_threads; - if (wf_aligner->bialigner != NULL) { - wavefront_bialigner_set_max_num_threads( - wf_aligner->bialigner,max_num_threads); - } + wavefront_aligner_t* const wf_aligner, + const int max_num_threads) { + wf_aligner->system.max_num_threads = max_num_threads; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_max_num_threads( + wf_aligner->bialigner,max_num_threads); + } } void wavefront_aligner_set_min_offsets_per_thread( - wavefront_aligner_t* const wf_aligner, - const int min_offsets_per_thread) { - wf_aligner->system.min_offsets_per_thread = min_offsets_per_thread; - if (wf_aligner->bialigner != NULL) { - wavefront_bialigner_set_min_offsets_per_thread( - wf_aligner->bialigner,min_offsets_per_thread); - } + wavefront_aligner_t* const wf_aligner, + const int min_offsets_per_thread) { + wf_aligner->system.min_offsets_per_thread = min_offsets_per_thread; + if (wf_aligner->bialigner != NULL) { + wavefront_bialigner_set_min_offsets_per_thread( + wf_aligner->bialigner,min_offsets_per_thread); + } } /* * Utils @@ -443,29 +560,28 @@ uint64_t wavefront_aligner_get_size( /* * Display */ -void wavefront_aligner_print_type( +void wavefront_aligner_print_mode( FILE* const stream, wavefront_aligner_t* const wf_aligner) { - if (wf_aligner->align_mode_tag == NULL) { - switch (wf_aligner->align_mode) { - case wf_align_biwfa: - fprintf(stream,"BiWFA"); - break; - case wf_align_biwfa_breakpoint_forward: - fprintf(stream,"BiWFA::Forward"); - break; - case wf_align_biwfa_breakpoint_reverse: - fprintf(stream,"BiWFA::Reverse"); - break; - case wf_align_biwfa_subsidiary: - fprintf(stream,"BiWFA::SubWFA"); - break; - default: - fprintf(stream,"WFA"); - break; - } - } else { - fprintf(stream,"%s",wf_aligner->align_mode_tag); + if (wf_aligner->align_mode_tag != NULL) { + fprintf(stream,"%s::",wf_aligner->align_mode_tag); + } + switch (wf_aligner->align_mode) { + case wf_align_biwfa: + fprintf(stream,"BiWFA"); + break; + case wf_align_biwfa_breakpoint_forward: + fprintf(stream,"BiWFA::Forward"); + break; + case wf_align_biwfa_breakpoint_reverse: + fprintf(stream,"BiWFA::Reverse"); + break; + case wf_align_biwfa_subsidiary: + fprintf(stream,"BiWFA::SubWFA"); + break; + default: + fprintf(stream,"WFA"); + break; } } void wavefront_aligner_print_scope( @@ -484,15 +600,19 @@ void wavefront_aligner_print_scope( wf_aligner->alignment_form.text_end_free); } } -void wavefront_aligner_print_mode( +void wavefront_aligner_print_conf( FILE* const stream, wavefront_aligner_t* const wf_aligner) { - fprintf(stream,"(%s,",(wf_aligner->alignment_scope==compute_score)?"Score":"Alg"); + fprintf(stream,"("); switch (wf_aligner->memory_mode) { - case wavefront_memory_high: fprintf(stream,"MHigh)"); break; - case wavefront_memory_med: fprintf(stream,"MMed)"); break; - case wavefront_memory_low: fprintf(stream,"MLow)"); break; - case wavefront_memory_ultralow: fprintf(stream,"BiWFA)"); break; + case wavefront_memory_high: fprintf(stream,"MHigh"); break; + case wavefront_memory_med: fprintf(stream,"MMed"); break; + case wavefront_memory_low: fprintf(stream,"MLow"); break; + case wavefront_memory_ultralow: fprintf(stream,"BiWFA"); break; + } + if (wf_aligner->system.max_alignment_score == INT_MAX) { + fprintf(stream,",inf)"); + } else { + fprintf(stream,",%d)",wf_aligner->system.max_alignment_score); } } - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.h index fe8cc6fe..e5b32112 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_aligner.h @@ -29,206 +29,11 @@ * DESCRIPTION: WaveFront aligner data structure */ -#ifndef WAVEFRONT_ALIGNER_H_ -#define WAVEFRONT_ALIGNER_H_ +#pragma once -#include "utils/commons.h" #include "utils/heatmap.h" -#include "utils/string_padded.h" #include "system/profiler_counter.h" #include "system/profiler_timer.h" -#include "system/mm_allocator.h" #include "system/mm_stack.h" #include "alignment/cigar.h" -#include "wavefront_slab.h" -#include "wavefront_penalties.h" -#include "wavefront_attributes.h" -#include "wavefront_components.h" -#include "wavefront_bialigner.h" - -/* - * Error codes & messages - */ -// Success -#define WF_STATUS_SUCCESSFUL 0 -// Errors -#define WF_STATUS_UNFEASIBLE -1 -#define WF_STATUS_MAX_SCORE_REACHED -2 -#define WF_STATUS_OOM -3 -// Internal -#define WF_STATUS_END_REACHED 1 -// Error messages -extern char* wf_error_msg[5]; -char* wavefront_align_strerror(const int error_code); - -/* - * Alignment status - */ -typedef struct _wavefront_aligner_t wavefront_aligner_t; -typedef struct { - // Status - int status; // Status code - int score; // Current WF-alignment score - int num_null_steps; // Total contiguous null-steps performed - uint64_t memory_used; // Total memory used - // Wavefront alignment functions - void (*wf_align_compute)(wavefront_aligner_t* const,const int); // WF Compute function - int (*wf_align_extend)(wavefront_aligner_t* const,const int); // WF Extend function - int (*wf_align_extend_max)(wavefront_aligner_t* const,const int,int* const); // WF Extend function (for BiWFA) -} wavefront_align_status_t; - -/* - * Alignment type - */ -typedef enum { - wf_align_regular = 0, - wf_align_biwfa = 1, - wf_align_biwfa_breakpoint_forward = 2, - wf_align_biwfa_breakpoint_reverse = 3, - wf_align_biwfa_subsidiary = 4 -} wavefront_align_mode_t; - -/* - * Wavefront Aligner - */ -typedef struct _wavefront_aligner_t { - // Mode and status - wavefront_align_mode_t align_mode; // WFA alignment mode - char* align_mode_tag; // WFA mode tag - wavefront_align_status_t align_status; // Current alignment status - // Sequences - strings_padded_t* sequences; // Padded sequences - char* pattern; // Pattern sequence (padded) - int* pattern_lambda; // Pattern sequence (padded) - int pattern_length; // Pattern length - char* text; // Text sequence (padded) - int* text_lambda; // Text sequence (padded) - int text_length; // Text length - // Custom function to compare sequences - alignment_match_funct_t match_funct; // Custom matching function (match(v,h,args)) - void* match_funct_arguments; // Generic arguments passed to matching function (args) - // Alignment Attributes - alignment_scope_t alignment_scope; // Alignment scope (score only or full-CIGAR) - alignment_form_t alignment_form; // Alignment form (end-to-end/ends-free) - wavefront_penalties_t penalties; // Alignment penalties - wavefront_heuristic_t heuristic; // Heuristic's parameters - wavefront_memory_t memory_mode; // Wavefront memory strategy (modular wavefronts and piggyback) - // Wavefront components - wavefront_components_t wf_components; // Wavefront components - affine2p_matrix_type component_begin; // Alignment begin component - affine2p_matrix_type component_end; // Alignment end component - wavefront_pos_t alignment_end_pos; // Alignment end position - // Bidirectional Alignment - wavefront_bialigner_t* bialigner; // BiWFA aligner - wf_bialign_breakpoint_t last_breakpoint; // Last identified breakpoint - // CIGAR - cigar_t* cigar; // Alignment CIGAR - // MM - bool mm_allocator_own; // Ownership of MM-Allocator - mm_allocator_t* mm_allocator; // MM-Allocator - wavefront_slab_t* wavefront_slab; // MM-Wavefront-Slab (Allocates/Reuses the individual wavefronts) - // Display - wavefront_plot_t* plot; // Wavefront plot - // System - alignment_system_t system; // System related parameters -} wavefront_aligner_t; - -/* - * Setup - */ -wavefront_aligner_t* wavefront_aligner_new( - wavefront_aligner_attr_t* attributes); -void wavefront_aligner_reap( - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_delete( - wavefront_aligner_t* const wf_aligner); - -/* - * Span configuration - */ -void wavefront_aligner_set_alignment_end_to_end( - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_set_alignment_free_ends( - wavefront_aligner_t* const wf_aligner, - const int pattern_begin_free, - const int pattern_end_free, - const int text_begin_free, - const int text_end_free); - -/* - * Heuristic configuration - */ -void wavefront_aligner_set_heuristic_none( - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_set_heuristic_wfadaptive( - wavefront_aligner_t* const wf_aligner, - const int min_wavefront_length, - const int max_distance_threshold, - const int score_steps); -void wavefront_aligner_set_heuristic_wfmash( - wavefront_aligner_t* const wf_aligner, - const int min_wavefront_length, - const int max_distance_threshold, - const int score_steps); -void wavefront_aligner_set_heuristic_xdrop( - wavefront_aligner_t* const wf_aligner, - const int xdrop, - const int score_steps); -void wavefront_aligner_set_heuristic_zdrop( - wavefront_aligner_t* const wf_aligner, - const int ydrop, - const int score_steps); -void wavefront_aligner_set_heuristic_banded_static( - wavefront_aligner_t* const wf_aligner, - const int band_min_k, - const int band_max_k); -void wavefront_aligner_set_heuristic_banded_adaptive( - wavefront_aligner_t* const wf_aligner, - const int band_min_k, - const int band_max_k, - const int score_steps); - -/* - * Match-funct configuration - */ -void wavefront_aligner_set_match_funct( - wavefront_aligner_t* const wf_aligner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments); - -/* - * System configuration - */ -void wavefront_aligner_set_max_alignment_score( - wavefront_aligner_t* const wf_aligner, - const int max_alignment_score); -void wavefront_aligner_set_max_memory( - wavefront_aligner_t* const wf_aligner, - const uint64_t max_memory_resident, - const uint64_t max_memory_abort); -void wavefront_aligner_set_max_num_threads( - wavefront_aligner_t* const wf_aligner, - const int max_num_threads); -void wavefront_aligner_set_min_offsets_per_thread( - wavefront_aligner_t* const wf_aligner, - const int min_offsets_per_thread); -/* - * Utils - */ -uint64_t wavefront_aligner_get_size( - wavefront_aligner_t* const wf_aligner); - -/* - * Display - */ -void wavefront_aligner_print_type( - FILE* const stream, - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_print_scope( - FILE* const stream, - wavefront_aligner_t* const wf_aligner); -void wavefront_aligner_print_mode( - FILE* const stream, - wavefront_aligner_t* const wf_aligner); - -#endif /* WAVEFRONT_ALIGNER_H_ */ +#include "wfa.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.c index cafb93db..5869d214 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront aligner data structure attributes */ +#include "utils/commons.h" #include "wavefront_attributes.h" /* @@ -45,9 +46,6 @@ wavefront_aligner_attr_t wavefront_aligner_attr_default = { .text_begin_free = 0, .text_end_free = 0, }, - // Custom matching functions - .match_funct = NULL, // Use default match-compare function - .match_funct_arguments = NULL, // No arguments // Penalties .linear_penalties = { .match = 0, diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.h index 7a8ef140..413144d0 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_attributes.h @@ -66,26 +66,6 @@ typedef struct { int text_end_free; // Allow free-gap at the end of the text } alignment_form_t; -/* - * Custom extend-match function, e.g.: - * - * typedef struct { - * char* pattern; - * int pattern_length; - * char* text; - * int text_length; - * } match_function_params_t; - * - * int match_function(int v,int h,void* arguments) { - * // Extract parameters - * match_function_params_t* match_arguments = (match_function_params_t*)arguments; - * // Check match - * if (v > match_arguments->pattern_length || h > match_arguments->text_length) return 0; - * return (match_arguments->pattern[v] == match_arguments->text[h]); - * } - */ -typedef int (*alignment_match_funct_t)(int,int,void*); - /* * Alignment system configuration */ @@ -142,9 +122,6 @@ typedef struct { wavefront_heuristic_t heuristic; // Wavefront heuristic // Memory model wavefront_memory_t memory_mode; // Wavefront memory strategy (modular wavefronts and piggyback) - // Custom function to compare sequences - alignment_match_funct_t match_funct; // Custom matching function (match(v,h,args)) - void* match_funct_arguments; // Generic arguments passed to matching function (args) // External MM (instead of allocating one inside) mm_allocator_t* mm_allocator; // MM-Allocator // Display diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace.c index 3be9a0ab..962cfa33 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront-Alignment module for backtracing alignments */ +#include "utils/commons.h" #include "wavefront_backtrace.h" /* @@ -225,8 +226,9 @@ void wavefront_backtrace_linear( const int alignment_k, const wf_offset_t alignment_offset) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const wavefront_penalties_t* const penalties = &wf_aligner->penalties; const distance_metric_t distance_metric = penalties->distance_metric; // Prepare cigar @@ -322,8 +324,9 @@ void wavefront_backtrace_affine( const int alignment_k, const wf_offset_t alignment_offset) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const wavefront_penalties_t* const penalties = &wf_aligner->penalties; const distance_metric_t distance_metric = penalties->distance_metric; // Prepare cigar @@ -513,8 +516,8 @@ void wavefront_backtrace_affine( // DEBUG if (v != 0 || h != 0 || (score != 0 && penalties->match == 0)) { fprintf(stderr,"[WFA::Backtrace] I?/D?-Beginning backtrace error\n"); - fprintf(stderr,">%.*s\n",pattern_length,wf_aligner->pattern); - fprintf(stderr,"<%.*s\n",text_length,wf_aligner->text); + fprintf(stderr,">%.*s\n",pattern_length,sequences->pattern); + fprintf(stderr,"<%.*s\n",text_length,sequences->text); exit(-1); } } @@ -549,18 +552,12 @@ void wavefront_backtrace_pcigar( const int end_v = WAVEFRONT_V(alignment_k,alignment_offset); const int end_h = WAVEFRONT_H(alignment_k,alignment_offset); if (wf_aligner->penalties.distance_metric <= gap_linear) { - wf_backtrace_buffer_unpack_cigar_linear(bt_buffer, - wf_aligner->pattern,wf_aligner->pattern_length, - wf_aligner->text,wf_aligner->text_length, - wf_aligner->match_funct, - wf_aligner->match_funct_arguments, + wf_backtrace_buffer_unpack_cigar_linear( + bt_buffer,&wf_aligner->sequences, begin_v,begin_h,end_v,end_h,wf_aligner->cigar); } else { - wf_backtrace_buffer_unpack_cigar_affine(bt_buffer, - wf_aligner->pattern,wf_aligner->pattern_length, - wf_aligner->text,wf_aligner->text_length, - wf_aligner->match_funct, - wf_aligner->match_funct_arguments, + wf_backtrace_buffer_unpack_cigar_affine( + bt_buffer,&wf_aligner->sequences, begin_v,begin_h,end_v,end_h,wf_aligner->cigar); } } diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.c index 098f20b0..e8363740 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.c @@ -29,7 +29,9 @@ * DESCRIPTION: WaveFront backtrace buffer to store bactrace-blocks */ +#include "utils/commons.h" #include "wavefront_backtrace_buffer.h" +#include "wavefront_sequences.h" /* * Config @@ -265,17 +267,15 @@ bt_block_t* wf_backtrace_buffer_traceback_pcigar( } void wf_backtrace_buffer_unpack_cigar_linear( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, const int end_h, cigar_t* const cigar) { + // Parameters + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Clear cigar char* cigar_buffer = cigar->operations; cigar->begin_offset = 0; @@ -292,9 +292,7 @@ void wf_backtrace_buffer_unpack_cigar_linear( // Unpack block int cigar_block_length = 0; pcigar_unpack_linear( - palignment_blocks[i], - pattern,pattern_length,text,text_length, - match_funct,match_funct_arguments,&v,&h, + palignment_blocks[i],sequences,&v,&h, cigar_buffer,&cigar_block_length); // Update CIGAR cigar_buffer += cigar_block_length; @@ -313,17 +311,15 @@ void wf_backtrace_buffer_unpack_cigar_linear( } void wf_backtrace_buffer_unpack_cigar_affine( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, const int end_h, cigar_t* const cigar) { + // Parameters + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Clear cigar char* cigar_buffer = cigar->operations; cigar->begin_offset = 0; @@ -341,9 +337,7 @@ void wf_backtrace_buffer_unpack_cigar_affine( // Unpack block int cigar_block_length = 0; pcigar_unpack_affine( - palignment_blocks[i], - pattern,pattern_length,text,text_length, - match_funct,match_funct_arguments,&v,&h, + palignment_blocks[i],sequences,&v,&h, cigar_buffer,&cigar_block_length,¤t_matrix_type); // Update CIGAR cigar_buffer += cigar_block_length; @@ -398,7 +392,7 @@ void wf_backtrace_buffer_mark_backtrace_batch( while (active_blocks < max_batch_size && next_idx < num_block_idxs) { // Check NULL const bt_block_idx_t block_idx = bt_block_idxs[next_idx]; - if (offsets[next_idx] >= 0 && + if (offsets[next_idx] >= 0 && block_idx >= num_compacted_blocks) { // NOTE block_idx != BT_BLOCK_IDX_NULL // Prefetch (bt-block and bt_block) BITMAP_PREFETCH_BLOCK(bitmap,block_idx); @@ -526,6 +520,3 @@ bt_block_idx_t wf_backtrace_buffer_compact_marked( // Return last index return write_global_pos - 1; } - - - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.h index 2cd7b6ca..3b2f8084 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_buffer.h @@ -33,7 +33,6 @@ #define WAVEFRONT_BACKTRACE_BUFFER_H_ #include "alignment/cigar.h" -#include "utils/commons.h" #include "utils/vector.h" #include "utils/bitmap.h" #include "system/mm_allocator.h" @@ -120,12 +119,7 @@ bt_block_t* wf_backtrace_buffer_traceback_pcigar( bt_block_t* bt_block); void wf_backtrace_buffer_unpack_cigar_linear( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, @@ -133,12 +127,7 @@ void wf_backtrace_buffer_unpack_cigar_linear( cigar_t* const cigar); void wf_backtrace_buffer_unpack_cigar_affine( wf_backtrace_buffer_t* const bt_buffer, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, const int begin_v, const int begin_h, const int end_v, diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_offload.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_offload.c index 4109f927..fc8c0690 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_offload.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_backtrace_offload.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for offloading partial backtraces */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "wfa.h" #include "wavefront_backtrace_offload.h" /* @@ -285,4 +286,3 @@ void wavefront_backtrace_offload_affine( wf_aligner,out_d2,out_d2_bt_pcigar,out_d2_bt_prev,lo,hi); } } - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.c index 863af63d..c862216e 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.c @@ -28,6 +28,7 @@ * AUTHOR(S): Santiago Marco-Sola */ +#include "utils/commons.h" #include "wavefront_bialign.h" #include "wavefront_unialign.h" #include "wavefront_bialigner.h" @@ -71,6 +72,81 @@ void wavefront_bialign_debug( } fprintf(stderr,")\n"); } +/* + * Init + */ +void wavefront_bialign_init( + wavefront_bialigner_t* const bialigner, + const distance_metric_t distance_metric, + alignment_form_t* const form, + const affine2p_matrix_type component_begin, + const affine2p_matrix_type component_end, + const int verbose) { + // Parameters + wavefront_aligner_t* const alg_forward = bialigner->alg_forward; + wavefront_aligner_t* const alg_reverse = bialigner->alg_reverse; + // Resize wavefront aligner + wavefront_unialign_resize(alg_forward); + wavefront_unialign_resize(alg_reverse); + // Configure WF-compute function (global) + switch (distance_metric) { + case indel: + case edit: + bialigner->wf_align_compute = &wavefront_compute_edit; + break; + case gap_linear: + bialigner->wf_align_compute = &wavefront_compute_linear; + break; + case gap_affine: + bialigner->wf_align_compute = &wavefront_compute_affine; + break; + case gap_affine_2p: + bialigner->wf_align_compute = &wavefront_compute_affine2p; + break; + default: + fprintf(stderr,"[WFA] Distance function not implemented\n"); + exit(1); + break; + } + // Configure form forward and reverse + alignment_span_t span_forward = + (form->pattern_begin_free > 0 || form->text_begin_free > 0) ? + alignment_endsfree : alignment_end2end; + alignment_form_t form_forward = { + .span = span_forward, + .pattern_begin_free = form->pattern_begin_free, + .pattern_end_free = 0, + .text_begin_free = form->text_begin_free, + .text_end_free = 0, + }; + alignment_span_t span_reverse = + (form->pattern_end_free > 0 || form->text_end_free > 0) ? + alignment_endsfree : alignment_end2end; + alignment_form_t form_reverse = { + .span = span_reverse, + .pattern_begin_free = form->pattern_end_free, + .pattern_end_free = 0, + .text_begin_free = form->text_end_free, + .text_end_free = 0, + }; + // Initialize wavefront (forward) + alg_forward->align_status.num_null_steps = 0; + alg_forward->alignment_form = form_forward; + alg_forward->component_begin = component_begin; + alg_forward->component_end = component_end; + wavefront_aligner_init_wf(alg_forward); + // Initialize wavefront (reverse) + alg_reverse->align_status.num_null_steps = 0; + alg_reverse->alignment_form = form_reverse; + alg_reverse->component_begin = component_end; + alg_reverse->component_end = component_begin; + wavefront_aligner_init_wf(alg_reverse); + // DEBUG + if (verbose >= 2) { + wavefront_debug_begin(alg_forward); + wavefront_debug_begin(alg_reverse); + } +} /* * Bidirectional check breakpoints */ @@ -84,8 +160,9 @@ void wavefront_bialign_breakpoint_indel2indel( const affine2p_matrix_type component, wf_bialign_breakpoint_t* const breakpoint) { // Parameters - const int text_length = wf_aligner->text_length; - const int pattern_length = wf_aligner->pattern_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; const int gap_open = (component==affine2p_matrix_I1 || component==affine2p_matrix_D1) ? wf_aligner->penalties.gap_opening1 : wf_aligner->penalties.gap_opening2; @@ -140,8 +217,9 @@ void wavefront_bialign_breakpoint_m2m( wavefront_t* const mwf_1, wf_bialign_breakpoint_t* const breakpoint) { // Parameters - const int text_length = wf_aligner->text_length; - const int pattern_length = wf_aligner->pattern_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; // Check wavefronts overlapping const int lo_0 = mwf_0->lo; const int hi_0 = mwf_0->hi; @@ -269,87 +347,6 @@ void wavefront_bialign_overlap( /* * Bidirectional breakpoint detection */ -void wavefront_bialign_find_breakpoint_init( - wavefront_aligner_t* const alg_forward, - wavefront_aligner_t* const alg_reverse, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, - const distance_metric_t distance_metric, - alignment_form_t* const form, - const affine2p_matrix_type component_begin, - const affine2p_matrix_type component_end) { - // Resize wavefront aligner - wavefront_unialign_resize(alg_forward,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length,false); - wavefront_unialign_resize(alg_reverse,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length,true); - // Configure form forward and reverse - alignment_span_t span_forward = - (form->pattern_begin_free > 0 || form->text_begin_free > 0) ? alignment_endsfree : alignment_end2end; - alignment_form_t form_forward = { - .span = span_forward, - .pattern_begin_free = form->pattern_begin_free, - .pattern_end_free = 0, - .text_begin_free = form->text_begin_free, - .text_end_free = 0, - }; - alignment_span_t span_reverse = - (form->pattern_end_free > 0 || form->text_end_free > 0) ? alignment_endsfree : alignment_end2end; - alignment_form_t form_reverse = { - .span = span_reverse, - .pattern_begin_free = form->pattern_end_free, - .pattern_end_free = 0, - .text_begin_free = form->text_end_free, - .text_end_free = 0, - }; - // Configure WF-compute function (global) - switch (distance_metric) { - case indel: - case edit: - alg_forward->align_status.wf_align_compute = &wavefront_compute_edit; - break; - case gap_linear: - alg_forward->align_status.wf_align_compute = &wavefront_compute_linear; - break; - case gap_affine: - alg_forward->align_status.wf_align_compute = &wavefront_compute_affine; - break; - case gap_affine_2p: - alg_forward->align_status.wf_align_compute = &wavefront_compute_affine2p; - break; - default: - fprintf(stderr,"[WFA] Distance function not implemented\n"); - exit(1); - break; - } - // Configure WF-extend function - wavefront_align_status_t* const align_status = &alg_forward->align_status; - const bool end2end = (form_forward.span == alignment_end2end); - if (alg_forward->match_funct != NULL) { - align_status->wf_align_extend = &wavefront_extend_custom; - align_status->wf_align_extend_max = &wavefront_extend_custom_max; - } else if (end2end) { - align_status->wf_align_extend = &wavefront_extend_end2end; - align_status->wf_align_extend_max = &wavefront_extend_end2end_max; - } else { - fprintf(stderr,"[WFA] BiWFA and ends-free is not supported yet\n"); - exit(1); - } - // Initialize wavefront (forward) - alg_forward->align_status.num_null_steps = 0; - alg_forward->alignment_form = form_forward; - alg_forward->component_begin = component_begin; - alg_forward->component_end = component_end; - wavefront_unialign_initialize_wavefronts(alg_forward,pattern_length,text_length); - // Initialize wavefront (reverse) - alg_reverse->align_status.num_null_steps = 0; - alg_reverse->alignment_form = form_reverse; - alg_reverse->component_begin = component_end; - alg_reverse->component_end = component_begin; - wavefront_unialign_initialize_wavefronts(alg_reverse,pattern_length,text_length); -} int wavefront_bialign_overlap_gopen_adjust( wavefront_aligner_t* const wf_aligner, const distance_metric_t distance_metric) { @@ -367,12 +364,6 @@ int wavefront_bialign_overlap_gopen_adjust( } int wavefront_bialign_find_breakpoint( wavefront_bialigner_t* const bialigner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, const distance_metric_t distance_metric, alignment_form_t* const form, const affine2p_matrix_type component_begin, @@ -382,24 +373,19 @@ int wavefront_bialign_find_breakpoint( // Parameters wavefront_aligner_t* const alg_forward = bialigner->alg_forward; wavefront_aligner_t* const alg_reverse = bialigner->alg_reverse; - // Init bialignment - wavefront_bialign_find_breakpoint_init( - alg_forward,alg_reverse, - pattern,pattern_lambda,pattern_length,text,text_lambda,text_length, - distance_metric,form,component_begin,component_end); - // DEBUG alignment_system_t* const system = &alg_forward->system; const int verbose = system->verbose; - if (verbose >= 2) { - wavefront_debug_prologue(alg_forward,pattern,pattern_length,text,text_length); - wavefront_debug_prologue(alg_reverse,pattern,pattern_length,text,text_length); - } + // Init bialignment + wavefront_bialign_init(bialigner,distance_metric,form,component_begin,component_end,verbose); + // Sequences + wavefront_sequences_t* const sequences = &alg_forward->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; + // Operators + void (*wf_align_compute)(wavefront_aligner_t* const,const int) = bialigner->wf_align_compute; // Parameters const int max_alignment_score = alg_forward->system.max_alignment_score; const int max_antidiagonal = DPMATRIX_ANTIDIAGONAL(pattern_length,text_length) - 1; // Note: Even removing -1 - void (*wf_align_compute)(wavefront_aligner_t* const,const int) = alg_forward->align_status.wf_align_compute; - int (*wf_align_extend)(wavefront_aligner_t* const,const int) = alg_forward->align_status.wf_align_extend; - int (*wf_align_extend_max)(wavefront_aligner_t* const,const int,int* const) = alg_forward->align_status.wf_align_extend_max; int score_forward = 0, score_reverse = 0, forward_max_ak = 0, reverse_max_ak = 0; bool end_reached; // Plot @@ -410,13 +396,13 @@ int wavefront_bialign_find_breakpoint( } // Prepare and perform first bialignment step breakpoint->score = INT_MAX; - end_reached = (*wf_align_extend_max)(alg_forward,score_forward,&forward_max_ak); + end_reached = wavefront_extend_end2end_max(alg_forward,score_forward,&forward_max_ak); if (end_reached) return alg_forward->align_status.status; - end_reached = (*wf_align_extend_max)(alg_reverse,score_reverse,&reverse_max_ak); + end_reached = wavefront_extend_end2end_max(alg_reverse,score_reverse,&reverse_max_ak); if (end_reached) return alg_reverse->align_status.status; // Compute wavefronts of increasing score until both wavefronts overlap int max_ak = 0; - bool last_wf_forward; + bool last_wf_forward = false; while (true) { // Check close-to-collision if (forward_max_ak + reverse_max_ak >= max_antidiagonal) break; @@ -427,7 +413,7 @@ int wavefront_bialign_find_breakpoint( (*wf_align_compute)(alg_forward,score_forward); if (plot_enabled) wavefront_plot(alg_forward,score_forward,align_level); // Plot // Extend - end_reached = (*wf_align_extend_max)(alg_forward,score_forward,&max_ak); + end_reached = wavefront_extend_end2end_max(alg_forward,score_forward,&max_ak); if (forward_max_ak < max_ak) forward_max_ak = max_ak; last_wf_forward = true; // Check end-reached and close-to-collision @@ -440,7 +426,7 @@ int wavefront_bialign_find_breakpoint( (*wf_align_compute)(alg_reverse,score_reverse); if (plot_enabled) wavefront_plot(alg_reverse,score_reverse,align_level); // Plot // Extend - end_reached = (*wf_align_extend_max)(alg_reverse,score_reverse,&max_ak); + end_reached = wavefront_extend_end2end_max(alg_reverse,score_reverse,&max_ak); if (reverse_max_ak < max_ak) reverse_max_ak = max_ak; last_wf_forward = false; // Check end-reached and max-score-reached @@ -467,7 +453,7 @@ int wavefront_bialign_find_breakpoint( (*wf_align_compute)(alg_reverse,score_reverse); if (plot_enabled) wavefront_plot(alg_reverse,score_reverse,align_level); // Plot // Extend & check end-reached - end_reached = (*wf_align_extend)(alg_reverse,score_reverse); + end_reached = wavefront_extend_end2end(alg_reverse,score_reverse); if (end_reached) return alg_reverse->align_status.status; } // Check overlapping wavefronts @@ -481,7 +467,7 @@ int wavefront_bialign_find_breakpoint( (*wf_align_compute)(alg_forward,score_forward); if (plot_enabled) wavefront_plot(alg_forward,score_forward,align_level); // Plot // Extend & check end-reached/max-score-reached - end_reached = (*wf_align_extend)(alg_forward,score_forward); + end_reached = wavefront_extend_end2end(alg_forward,score_forward); if (end_reached) return alg_forward->align_status.status; if (score_reverse + score_forward >= max_alignment_score) return WF_STATUS_MAX_SCORE_REACHED; // Enable always @@ -495,12 +481,6 @@ int wavefront_bialign_find_breakpoint( */ void wavefront_bialign_base( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, alignment_form_t* const form, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end, @@ -510,21 +490,15 @@ void wavefront_bialign_base( const int verbose = wf_aligner->system.verbose; // Configure alg_subsidiary->alignment_form = *form; - wavefront_unialign_init( - alg_subsidiary, - pattern,pattern_lambda,pattern_length, - text,text_lambda,text_length, - component_begin,component_end); + wavefront_unialign_init(alg_subsidiary,component_begin,component_end); // DEBUG - if (verbose >= 2) { - wavefront_debug_prologue(alg_subsidiary,pattern,pattern_length,text,text_length); - } + if (verbose >= 2) wavefront_debug_begin(alg_subsidiary); // Wavefront align sequences wavefront_unialign(alg_subsidiary); wf_aligner->align_status.status = alg_subsidiary->align_status.status; // DEBUG if (verbose >= 2) { - wavefront_debug_epilogue(alg_subsidiary); + wavefront_debug_end(alg_subsidiary); wavefront_debug_check_correct(wf_aligner); } // Append CIGAR @@ -533,12 +507,6 @@ void wavefront_bialign_base( } void wavefront_bialign_exception( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, alignment_form_t* const form, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end, @@ -548,6 +516,9 @@ void wavefront_bialign_exception( if (align_status == WF_STATUS_MAX_SCORE_REACHED || align_status == WF_STATUS_UNFEASIBLE) { wf_aligner->align_status.status = align_status; + if (align_status == WF_STATUS_MAX_SCORE_REACHED) { + wf_aligner->cigar->score = -wf_aligner->system.max_alignment_score; + } return; } // Check end reached @@ -563,9 +534,7 @@ void wavefront_bialign_exception( } // Fallback if possible if (score_reached <= WF_BIALIGN_FALLBACK_MIN_SCORE) { - wavefront_bialign_base( - wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length, - form,component_begin,component_end,align_level); + wavefront_bialign_base(wf_aligner,form,component_begin,component_end,align_level); } else { wf_aligner->align_status.status = WF_STATUS_UNFEASIBLE; } @@ -608,20 +577,17 @@ void wavefront_bialign_init_half_1( } void wavefront_bialign_alignment( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_begin, - const int pattern_end, - const char* const text, - const int* const text_lambda, - const int text_begin, - const int text_end, alignment_form_t* const form, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end, const int score_remaining, const int align_level) { // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->bialigner->alg_forward->sequences; + const int pattern_begin = sequences->pattern_begin; + const int pattern_end = sequences->pattern_begin + sequences->pattern_length; + const int text_begin = sequences->text_begin; + const int text_end = sequences->text_begin + sequences->text_length; const int pattern_length = pattern_end - pattern_begin; const int text_length = text_end - text_begin; // Trivial cases @@ -634,36 +600,27 @@ void wavefront_bialign_alignment( } // Fall back to regular WFA if (score_remaining <= WF_BIALIGN_FALLBACK_MIN_SCORE) { - wavefront_bialign_base(wf_aligner, - pattern+pattern_begin,pattern_lambda+pattern_begin,pattern_length, - text+text_begin,text_lambda+text_begin,text_length, - form,component_begin,component_end,align_level); + wavefront_bialign_base(wf_aligner,form,component_begin,component_end,align_level); return; } // Find breakpoint in the alignment wf_bialign_breakpoint_t breakpoint; const int align_status = wavefront_bialign_find_breakpoint( - wf_aligner->bialigner, - pattern+pattern_begin,pattern_lambda+pattern_begin,pattern_length, - text+text_begin,text_lambda+text_begin,text_length, - wf_aligner->penalties.distance_metric, - form,component_begin,component_end, - &breakpoint,align_level); + wf_aligner->bialigner,wf_aligner->penalties.distance_metric, + form,component_begin,component_end,&breakpoint,align_level); // DEBUG if (wf_aligner->system.verbose >= 2) { - wavefront_debug_epilogue(wf_aligner->bialigner->alg_forward); - wavefront_debug_epilogue(wf_aligner->bialigner->alg_reverse); + wf_aligner->bialigner->alg_forward->align_status.status = align_status; + wf_aligner->bialigner->alg_reverse->align_status.status = align_status; + wavefront_debug_end(wf_aligner->bialigner->alg_forward); + wavefront_debug_end(wf_aligner->bialigner->alg_reverse); } // Check status if (align_status != WF_STATUS_SUCCESSFUL) { - wavefront_bialign_exception(wf_aligner, - pattern+pattern_begin,pattern_lambda+pattern_begin,pattern_length, - text+text_begin,text_lambda+text_begin,text_length, - form,component_begin,component_end,align_level,align_status); + wavefront_bialign_exception(wf_aligner,form,component_begin,component_end,align_level,align_status); return; } // Breakpoint found - wf_aligner->last_breakpoint = breakpoint; // Save it const int breakpoint_h = WAVEFRONT_H(breakpoint.k_forward,breakpoint.offset_forward); const int breakpoint_v = WAVEFRONT_V(breakpoint.k_forward,breakpoint.offset_forward); // DEBUG @@ -676,10 +633,11 @@ void wavefront_bialign_alignment( plot->offset_v = pattern_begin; plot->offset_h = text_begin; } + wavefront_bialigner_set_sequences_bounds(wf_aligner->bialigner, + pattern_begin,pattern_begin+breakpoint_v, + text_begin,text_begin+breakpoint_h); wavefront_bialign_init_half_0(form,&form_0); wavefront_bialign_alignment(wf_aligner, - pattern,pattern_lambda,pattern_begin,pattern_begin+breakpoint_v, - text,text_lambda,text_begin,text_begin+breakpoint_h, &form_0,component_begin,breakpoint.component, breakpoint.score_forward,align_level+1); if (wf_aligner->align_status.status != WF_STATUS_SUCCESSFUL) return; @@ -689,10 +647,11 @@ void wavefront_bialign_alignment( plot->offset_v = pattern_begin + breakpoint_v; plot->offset_h = text_begin + breakpoint_h; } + wavefront_bialigner_set_sequences_bounds(wf_aligner->bialigner, + pattern_begin+breakpoint_v,pattern_end, + text_begin+breakpoint_h,text_end); wavefront_bialign_init_half_1(form,&form_1); wavefront_bialign_alignment(wf_aligner, - pattern,pattern_lambda,pattern_begin+breakpoint_v,pattern_end, - text,text_lambda,text_begin+breakpoint_h,text_end, &form_1,breakpoint.component,component_end, breakpoint.score_reverse,align_level+1); if (wf_aligner->align_status.status != WF_STATUS_SUCCESSFUL) return; @@ -704,27 +663,28 @@ void wavefront_bialign_alignment( * Bidirectional Score-only */ void wavefront_bialign_compute_score( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length) { + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->bialigner->alg_forward->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; // Find breakpoint in the alignment wf_bialign_breakpoint_t breakpoint; - const int align_status = wavefront_bialign_find_breakpoint( - wf_aligner->bialigner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length, + const int align_status = wavefront_bialign_find_breakpoint(wf_aligner->bialigner, wf_aligner->penalties.distance_metric,&wf_aligner->alignment_form, affine_matrix_M,affine_matrix_M,&breakpoint,0); // DEBUG if (wf_aligner->system.verbose >= 2) { - wavefront_debug_epilogue(wf_aligner->bialigner->alg_forward); - wavefront_debug_epilogue(wf_aligner->bialigner->alg_reverse); + wavefront_debug_end(wf_aligner->bialigner->alg_forward); + wavefront_debug_end(wf_aligner->bialigner->alg_reverse); } // Check status - if (align_status == WF_STATUS_MAX_SCORE_REACHED || align_status == WF_STATUS_UNFEASIBLE) { + if (align_status == WF_STATUS_MAX_SCORE_REACHED || + align_status == WF_STATUS_UNFEASIBLE) { wf_aligner->align_status.status = align_status; + if (align_status == WF_STATUS_MAX_SCORE_REACHED) { + wf_aligner->cigar->score = -wf_aligner->system.max_alignment_score; + } return; } if (align_status == WF_STATUS_END_REACHED) { @@ -746,33 +706,23 @@ void wavefront_bialign_compute_score( * Bidirectional dispatcher */ void wavefront_bialign( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length) { + wavefront_aligner_t* const wf_aligner) { // Init wf_aligner->align_status.status = WF_STATUS_SUCCESSFUL; // Init OK - // Just for outputting info at plot - wf_aligner->pattern = (char*)pattern; - wf_aligner->pattern_length = pattern_length; - wf_aligner->text = (char*)text; - wf_aligner->text_length = text_length; // Select scope if (wf_aligner->alignment_scope == compute_score) { - wavefront_bialign_compute_score(wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length); + wavefront_bialign_compute_score(wf_aligner); } else { + // Resize CIGAR + wavefront_sequences_t* const sequences = &wf_aligner->bialigner->alg_forward->sequences; + const int text_length = sequences->text_length; + const int pattern_length = sequences->pattern_length; cigar_resize(wf_aligner->cigar,2*(pattern_length+text_length)); // Bidirectional alignment const bool min_length = MAX(pattern_length,text_length) <= WF_BIALIGN_FALLBACK_MIN_LENGTH; wavefront_bialign_alignment(wf_aligner, - pattern,pattern_lambda,0,pattern_length, - text,text_lambda,0,text_length, &wf_aligner->alignment_form, affine_matrix_M,affine_matrix_M, min_length ? 0 : INT_MAX,0); } } - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.h index 86f2e89b..7a96a0d6 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialign.h @@ -31,19 +31,12 @@ #ifndef WAVEFRONT_BIALIGN_H_ #define WAVEFRONT_BIALIGN_H_ -#include "utils/commons.h" #include "wavefront_aligner.h" /* * Bidirectional WFA */ void wavefront_bialign( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length); + wavefront_aligner_t* const wf_aligner); #endif /* WAVEFRONT_BIALIGN_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.c index f77dd467..dedc16ec 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.c @@ -28,6 +28,7 @@ * AUTHOR(S): Santiago Marco-Sola */ +#include "utils/commons.h" #include "wavefront_bialigner.h" #include "wavefront_aligner.h" #include "wavefront_attributes.h" @@ -48,8 +49,6 @@ wavefront_bialigner_t* wavefront_bialigner_new( subsidiary_attr.linear_penalties = attributes->linear_penalties; subsidiary_attr.affine_penalties = attributes->affine_penalties; subsidiary_attr.affine2p_penalties = attributes->affine2p_penalties; - subsidiary_attr.match_funct = attributes->match_funct; - subsidiary_attr.match_funct_arguments = attributes->match_funct_arguments; // Set specifics for subsidiary aligners subsidiary_attr.heuristic = attributes->heuristic; // Inherit same heuristic subsidiary_attr.memory_mode = wavefront_memory_high; // Classic WFA @@ -84,6 +83,70 @@ void wavefront_bialigner_delete( wavefront_aligner_delete(wf_bialigner->alg_subsidiary); free(wf_bialigner); } +/* + * Sequences + */ +void wavefront_bialigner_set_sequences_ascii( + wavefront_bialigner_t* const wf_bialigner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length) { + wavefront_sequences_init_ascii( + &wf_bialigner->alg_forward->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_sequences_init_ascii( + &wf_bialigner->alg_reverse->sequences, + pattern,pattern_length,text,text_length,true); + wavefront_sequences_init_ascii( + &wf_bialigner->alg_subsidiary->sequences, + pattern,pattern_length,text,text_length,false); +} +void wavefront_bialigner_set_sequences_lambda( + wavefront_bialigner_t* const wf_bialigner, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length) { + wavefront_sequences_init_lambda(&wf_bialigner->alg_forward->sequences, + match_funct,match_funct_arguments,pattern_length,text_length,false); + wavefront_sequences_init_lambda(&wf_bialigner->alg_reverse->sequences, + match_funct,match_funct_arguments,pattern_length,text_length,true); + wavefront_sequences_init_lambda(&wf_bialigner->alg_subsidiary->sequences, + match_funct,match_funct_arguments,pattern_length,text_length,false); +} +void wavefront_bialigner_set_sequences_packed2bits( + wavefront_bialigner_t* const wf_bialigner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length) { + wavefront_sequences_init_packed2bits( + &wf_bialigner->alg_forward->sequences, + pattern,pattern_length,text,text_length,false); + wavefront_sequences_init_packed2bits( + &wf_bialigner->alg_reverse->sequences, + pattern,pattern_length,text,text_length,true); + wavefront_sequences_init_packed2bits( + &wf_bialigner->alg_subsidiary->sequences, + pattern,pattern_length,text,text_length,false); +} +void wavefront_bialigner_set_sequences_bounds( + wavefront_bialigner_t* const wf_bialigner, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end) { + wavefront_sequences_set_bounds( + &wf_bialigner->alg_forward->sequences, + pattern_begin,pattern_end,text_begin,text_end); + wavefront_sequences_set_bounds( + &wf_bialigner->alg_reverse->sequences, + pattern_begin,pattern_end,text_begin,text_end); + wavefront_sequences_set_bounds( + &wf_bialigner->alg_subsidiary->sequences, + pattern_begin,pattern_end,text_begin,text_end); +} /* * Accessors */ @@ -100,17 +163,6 @@ void wavefront_bialigner_set_heuristic( wf_bialigner->alg_reverse->heuristic = *heuristic; wf_bialigner->alg_subsidiary->heuristic = *heuristic; } -void wavefront_bialigner_set_match_funct( - wavefront_bialigner_t* const wf_bialigner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments) { - wf_bialigner->alg_forward->match_funct = match_funct; - wf_bialigner->alg_forward->match_funct_arguments = match_funct_arguments; - wf_bialigner->alg_reverse->match_funct = match_funct; - wf_bialigner->alg_reverse->match_funct_arguments = match_funct_arguments; - wf_bialigner->alg_subsidiary->match_funct = match_funct; - wf_bialigner->alg_subsidiary->match_funct_arguments = match_funct_arguments; -} void wavefront_bialigner_set_max_alignment_score( wavefront_bialigner_t* const wf_bialigner, const int max_alignment_score) { @@ -130,16 +182,16 @@ void wavefront_bialigner_set_max_memory( wf_bialigner->alg_subsidiary->system.max_memory_abort = max_memory_abort; } void wavefront_bialigner_set_max_num_threads( - wavefront_bialigner_t* const wf_bialigner, - const int max_num_threads) { - wf_bialigner->alg_forward->system.max_num_threads = max_num_threads; - wf_bialigner->alg_reverse->system.max_num_threads = max_num_threads; - wf_bialigner->alg_subsidiary->system.max_num_threads = max_num_threads; + wavefront_bialigner_t* const wf_bialigner, + const int max_num_threads) { + wf_bialigner->alg_forward->system.max_num_threads = max_num_threads; + wf_bialigner->alg_reverse->system.max_num_threads = max_num_threads; + wf_bialigner->alg_subsidiary->system.max_num_threads = max_num_threads; } void wavefront_bialigner_set_min_offsets_per_thread( - wavefront_bialigner_t* const wf_bialigner, - const int min_offsets_per_thread) { - wf_bialigner->alg_forward->system.min_offsets_per_thread = min_offsets_per_thread; - wf_bialigner->alg_reverse->system.min_offsets_per_thread = min_offsets_per_thread; - wf_bialigner->alg_subsidiary->system.min_offsets_per_thread = min_offsets_per_thread; + wavefront_bialigner_t* const wf_bialigner, + const int min_offsets_per_thread) { + wf_bialigner->alg_forward->system.min_offsets_per_thread = min_offsets_per_thread; + wf_bialigner->alg_reverse->system.min_offsets_per_thread = min_offsets_per_thread; + wf_bialigner->alg_subsidiary->system.min_offsets_per_thread = min_offsets_per_thread; } diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.h index f787a042..81601a34 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_bialigner.h @@ -31,11 +31,11 @@ #ifndef WAVEFRONT_BIALIGNER_H_ #define WAVEFRONT_BIALIGNER_H_ -#include "utils/commons.h" #include "wavefront_penalties.h" #include "wavefront_attributes.h" #include "wavefront_heuristic.h" #include "wavefront_offset.h" +#include "wavefront_sequences.h" // Wavefront ahead definition typedef struct _wavefront_aligner_t wavefront_aligner_t; @@ -54,9 +54,12 @@ typedef struct { } wf_bialign_breakpoint_t; typedef struct { + // Wavefronts wavefront_aligner_t* alg_forward; // Forward aligner wavefront_aligner_t* alg_reverse; // Reverse aligner wavefront_aligner_t* alg_subsidiary; // Subsidiary aligner + // Operators + void (*wf_align_compute)(wavefront_aligner_t* const,const int); } wavefront_bialigner_t; /* @@ -70,6 +73,34 @@ void wavefront_bialigner_reap( void wavefront_bialigner_delete( wavefront_bialigner_t* const wf_bialigner); +/* + * Sequences + */ +void wavefront_bialigner_set_sequences_ascii( + wavefront_bialigner_t* const wf_bialigner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length); +void wavefront_bialigner_set_sequences_lambda( + wavefront_bialigner_t* const wf_bialigner, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length); +void wavefront_bialigner_set_sequences_packed2bits( + wavefront_bialigner_t* const wf_bialigner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length); +void wavefront_bialigner_set_sequences_bounds( + wavefront_bialigner_t* const wf_bialigner, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end); + /* * Accessors */ @@ -78,10 +109,6 @@ uint64_t wavefront_bialigner_get_size( void wavefront_bialigner_set_heuristic( wavefront_bialigner_t* const wf_bialigner, wavefront_heuristic_t* const heuristic); -void wavefront_bialigner_set_match_funct( - wavefront_bialigner_t* const wf_bialigner, - int (*match_funct)(int,int,void*), - void* const match_funct_arguments); void wavefront_bialigner_set_max_alignment_score( wavefront_bialigner_t* const wf_bialigner, const int max_alignment_score); @@ -93,6 +120,6 @@ void wavefront_bialigner_set_max_num_threads( wavefront_bialigner_t* const wf_bialigner, const int max_num_threads); void wavefront_bialigner_set_min_offsets_per_thread( - wavefront_bialigner_t* const wf_bialigner, - const int min_offsets_per_thread); + wavefront_bialigner_t* const wf_bialigner, + const int min_offsets_per_thread); #endif /* WAVEFRONT_BIALIGNER_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.c index 7a087d87..9512be47 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.c @@ -29,6 +29,7 @@ * DESCRIPTION: WaveFront aligner components */ +#include "utils/commons.h" #include "wavefront_components.h" #include "utils/bitmap.h" #include "system/profiler_timer.h" @@ -459,4 +460,3 @@ void wavefront_components_compact_bt_buffer( fprintf(stderr,"]\n"); } } - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.h index 227759c5..4d1856ef 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_components.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_WAVEFRONT_COMPONENTS_H_ #define WAVEFRONT_WAVEFRONT_COMPONENTS_H_ -#include "utils/commons.h" #include "wavefront/wavefront.h" #include "wavefront/wavefront_backtrace_buffer.h" #include "wavefront/wavefront_penalties.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute.c index 7c86d7d4..45a5f1a1 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "alignment/affine2p_penalties.h" #include "wavefront_compute.h" @@ -569,8 +570,9 @@ void wavefront_compute_trim_ends( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; wf_offset_t* const offsets = wavefront->offsets; // Trim from hi int k; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine.c index 7315648c..c093a4e6 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (gap-affine) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" @@ -46,8 +47,9 @@ void wavefront_compute_affine_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -91,8 +93,9 @@ void wavefront_compute_affine_idm_piggyback( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine2p.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine2p.c index 86b5da88..14b4bb1b 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine2p.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_affine2p.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (gap-affine-2p) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_compute_affine.h" #include "wavefront_backtrace_offload.h" @@ -47,8 +48,9 @@ void wavefront_compute_affine2p_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -111,8 +113,9 @@ void wavefront_compute_affine2p_idm_piggyback( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_edit.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_edit.c index d694f705..13202a05 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_edit.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_edit.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (edit/indel) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" @@ -47,8 +48,9 @@ void wavefront_compute_indel_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const wf_offset_t* const prev_offsets = wf_prev->offsets; wf_offset_t* const curr_offsets = wf_curr->offsets; // Compute-Next kernel loop @@ -74,8 +76,9 @@ void wavefront_compute_edit_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const wf_offset_t* const prev_offsets = wf_prev->offsets; wf_offset_t* const curr_offsets = wf_curr->offsets; // Compute-Next kernel loop @@ -106,8 +109,9 @@ void wavefront_compute_indel_idm_piggyback( const int hi, const int score) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Previous WF const wf_offset_t* const prev_offsets = wf_prev->offsets; const pcigar_t* const prev_pcigar = wf_prev->bt_pcigar; @@ -148,8 +152,9 @@ void wavefront_compute_edit_idm_piggyback( const int hi, const int score) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Previous WF const wf_offset_t* const prev_offsets = wf_prev->offsets; const pcigar_t* const prev_pcigar = wf_prev->bt_pcigar; @@ -215,8 +220,9 @@ void wavefront_compute_edit_exact_prune( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront) { // Parameters - const int plen = wf_aligner->pattern_length; - const int tlen = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int plen = sequences->pattern_length; + const int tlen = sequences->text_length; wf_offset_t* const offsets = wavefront->offsets; const int lo = wavefront->lo; const int hi = wavefront->hi; @@ -366,5 +372,3 @@ void wavefront_compute_edit( wavefront_compute_edit_exact_prune(wf_aligner,wf_curr); } } - - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_linear.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_linear.c index e78db054..d9c1aa7b 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_linear.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_compute_linear.c @@ -29,7 +29,8 @@ * DESCRIPTION: WaveFront alignment module for computing wavefronts (gap-linear) */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_compute.h" #include "wavefront_backtrace_offload.h" @@ -46,8 +47,9 @@ void wavefront_compute_linear_idm( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In Offsets const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const wf_offset_t* const m_open1 = wavefront_set->in_mwavefront_open1->offsets; @@ -79,8 +81,9 @@ void wavefront_compute_linear_idm_piggyback( const int lo, const int hi) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // In M const wf_offset_t* const m_misms = wavefront_set->in_mwavefront_misms->offsets; const pcigar_t* const m_misms_bt_pcigar = wavefront_set->in_mwavefront_misms->bt_pcigar; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.c index 41425a8d..8782b868 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.c @@ -41,13 +41,12 @@ bool wavefront_check_alignment( FILE* const stream, wavefront_aligner_t* const wf_aligner) { // Parameters - const char* const pattern = wf_aligner->pattern; - const int pattern_length = wf_aligner->pattern_length; - const char* const text = wf_aligner->text; - const int text_length = wf_aligner->text_length; - // Custom function to compare sequences - alignment_match_funct_t match_funct = wf_aligner->match_funct; - void* match_funct_arguments = wf_aligner->match_funct_arguments; + wavefront_sequences_t* const sequences = (wf_aligner->bialigner==NULL) ? + &wf_aligner->sequences : &wf_aligner->bialigner->alg_forward->sequences; + const char* const pattern = sequences->pattern_buffer; + const int pattern_length = sequences->pattern_buffer_length; + const char* const text = sequences->text_buffer; + const int text_length = sequences->text_buffer_length; // CIGAR cigar_t* const cigar = wf_aligner->cigar; char* const operations = cigar->operations; @@ -60,14 +59,15 @@ bool wavefront_check_alignment( switch (operations[i]) { case 'M': { // Check match - const bool is_match = (match_funct!=NULL) ? - match_funct(pattern_pos,text_pos,match_funct_arguments) : - pattern[pattern_pos] == text[text_pos]; - if (!is_match) { - fprintf(stream,"[WFA::Check] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n", - pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]); - alignment_correct = false; - break; + if (sequences->mode != wf_sequences_lambda) { + const bool is_match = (pattern[pattern_pos]==text[text_pos]); + if (!is_match) { + fprintf(stream,"[WFA::Check] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n", + pattern_pos,pattern[pattern_pos], + text_pos,text[text_pos]); + alignment_correct = false; + break; + } } ++pattern_pos; ++text_pos; @@ -75,14 +75,15 @@ bool wavefront_check_alignment( } case 'X': { // Check mismatch - const bool is_match = (match_funct!=NULL) ? - match_funct(pattern_pos,text_pos,match_funct_arguments) : - pattern[pattern_pos] == text[text_pos]; - if (is_match) { - fprintf(stream,"[WFA::Check] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n", - pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]); - alignment_correct = false; - break; + if (sequences->mode != wf_sequences_lambda) { + const bool is_match = (pattern[pattern_pos]==text[text_pos]); + if (is_match) { + fprintf(stream,"[WFA::Check] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n", + pattern_pos,pattern[pattern_pos], + text_pos,text[text_pos]); + alignment_correct = false; + break; + } } ++pattern_pos; ++text_pos; @@ -123,121 +124,80 @@ void wavefront_report_lite( FILE* const stream, wavefront_aligner_t* const wf_aligner) { // Parameters - const char* const pattern = wf_aligner->pattern; - const int pattern_length = wf_aligner->pattern_length; - const char* const text = wf_aligner->text; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = (wf_aligner->bialigner==NULL) ? + &wf_aligner->sequences : &wf_aligner->bialigner->alg_subsidiary->sequences; + const char* const pattern = sequences->pattern; + const int pattern_length = sequences->pattern_length; + const char* const text = sequences->text; + const int text_length = sequences->text_length; const int status = wf_aligner->align_status.status; const uint64_t memory_used = wf_aligner->align_status.memory_used; - // Banner + // BANNER (#0) fprintf(stream,"[WFA::Debug]"); - // Sequences - const int score = wavefront_compute_classic_score( - wf_aligner,wf_aligner->pattern_length, - wf_aligner->text_length,wf_aligner->cigar->score); - fprintf(stream,"\t%d",score); - fprintf(stream,"\t%d\t%d",pattern_length,text_length); - fprintf(stream,"\t%s",(status==0) ? "OK" : "FAIL"); + // SCORE (#1) + // const int score = wavefront_compute_classic_score( + // wf_aligner,pattern_length,text_length,wf_aligner->cigar->score); + const int score = wf_aligner->cigar->score; + fprintf(stream,"\t%d",(score==INT32_MIN) ? -1 : score); + // PATTERN_LENGTH (#2) + fprintf(stream,"\t%d",pattern_length); + // TEXT_LENGTH (#3) + fprintf(stream,"\t%d",text_length); + // STATUS (#4) + fprintf(stream,"\t%s",wavefront_align_strerror_short(status)); + // TIME (#5) fprintf(stream,"\t%2.3f",TIMER_GET_TOTAL_MS(&wf_aligner->system.timer)); + // MEMORY (#6) fprintf(stream,"\t%luMB\t",CONVERT_B_TO_MB(memory_used)); + // ATTRIBUTES (#7) fprintf(stream,"["); - wavefront_aligner_print_type(stream,wf_aligner); - fprintf(stream,","); + fprintf(stream,"%d",wf_aligner->align_status.status); + fprintf(stream,";"); + wavefront_aligner_print_mode(stream,wf_aligner); + fprintf(stream,";"); wavefront_aligner_print_scope(stream,wf_aligner); - fprintf(stream,","); + fprintf(stream,";"); wavefront_penalties_print(stream,&wf_aligner->penalties); + fprintf(stream,";"); + wavefront_aligner_print_conf(stream,wf_aligner); + fprintf(stream,";"); + wavefront_heuristic_print(stream,&wf_aligner->heuristic); + fprintf(stream,";"); + fprintf(stream,"(%d,%d,%d)", + wf_aligner->wf_components.num_wavefronts, + wf_aligner->wf_components.historic_min_lo, + wf_aligner->wf_components.historic_max_hi); fprintf(stream,"]\t"); - cigar_print(stream,wf_aligner->cigar,true); - if (wf_aligner->match_funct != NULL) { + // CIGAR (#8) + if (cigar_is_null(wf_aligner->cigar)) { + fprintf(stream,"-"); + } else { + cigar_print(stream,wf_aligner->cigar,true); + } + // SEQUENCES (#9 #10) + if (sequences->mode == wf_sequences_lambda) { fprintf(stream,"\t-\t-"); } else { fprintf(stream,"\t%.*s\t%.*s",pattern_length,pattern,text_length,text); } fprintf(stream,"\n"); } -void wavefront_report_verbose_begin( - FILE* const stream, - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length) { - // Input sequences - fprintf(stream,"[WFA::Report::Begin] ["); - wavefront_aligner_print_type(stream,wf_aligner); - fprintf(stream,"]-Alignment (obj=%p)\n",wf_aligner); - if (wf_aligner->match_funct != NULL) { - fprintf(stream,"[WFA::Report]\tPattern\t%d\tcustom-funct()\n",pattern_length); - fprintf(stream,"[WFA::Report]\tText\t%d\tcustom-funct()\n",text_length); - } else { - fprintf(stream,"[WFA::Report]\tPattern\t%d\t%.*s\n",pattern_length,pattern_length,pattern); - fprintf(stream,"[WFA::Report]\tText\t%d\t%.*s\n",text_length,text_length,text); - } - // Alignment scope/form - fprintf(stream,"[WFA::Report]\tScope="); - wavefront_aligner_print_scope(stream,wf_aligner); - fprintf(stream," Max-score=%d", - wf_aligner->system.max_alignment_score); - // Penalties - fprintf(stream," Penalties="); - wavefront_penalties_print(stream,&wf_aligner->penalties); - // Heuristic - fprintf(stream," Heuristic="); - wavefront_heuristic_print(stream,&wf_aligner->heuristic); - // Memory mode - fprintf(stream," Memory.mode=(%d,%luMB,%luMB,%luMB)\n", - wf_aligner->memory_mode, - CONVERT_B_TO_MB(wf_aligner->system.max_memory_compact), - CONVERT_B_TO_MB(wf_aligner->system.max_memory_resident), - CONVERT_B_TO_MB(wf_aligner->system.max_memory_abort)); -} -void wavefront_report_verbose_end( - FILE* const stream, - wavefront_aligner_t* const wf_aligner) { - // Finish report - fprintf(stream,"[WFA::Report::End]\tFinish.status=%d",wf_aligner->align_status.status); - fprintf(stream," Time.taken="); - timer_print_total(stream,&wf_aligner->system.timer); - fprintf(stream," Memory.used=%luMB", - CONVERT_B_TO_MB(wf_aligner->align_status.memory_used)); - fprintf(stream," WFA.components=(wfs=%d,maxlo=%d,maxhi=%d)", - wf_aligner->wf_components.num_wavefronts, - wf_aligner->wf_components.historic_min_lo, - wf_aligner->wf_components.historic_max_hi); - const int score = wavefront_compute_classic_score( - wf_aligner,wf_aligner->pattern_length, - wf_aligner->text_length,wf_aligner->cigar->score); - fprintf(stream," WFA.score=%d",score); - fprintf(stream," WFA.cigar="); - cigar_print(stream,wf_aligner->cigar,true); - fprintf(stream,"\n"); -} /* * Debug */ -void wavefront_debug_prologue( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length) { +void wavefront_debug_begin( + wavefront_aligner_t* const wf_aligner) { // Check verbose level if (wf_aligner->system.verbose >= 1) { + timer_reset(&wf_aligner->system.timer); timer_start(&wf_aligner->system.timer); - if (wf_aligner->system.verbose >= 4) { - wavefront_report_verbose_begin(stderr,wf_aligner, - pattern,pattern_length,text,text_length); - } } } -void wavefront_debug_epilogue( +void wavefront_debug_end( wavefront_aligner_t* const wf_aligner) { // Print Summary if (wf_aligner->system.verbose >= 1) { timer_stop(&wf_aligner->system.timer); - if (wf_aligner->system.verbose >= 4) { - wavefront_report_verbose_end(stderr,wf_aligner); - } wavefront_report_lite(stderr,wf_aligner); } } diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.h index 1bdb8532..828343af 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_debug.h @@ -37,13 +37,9 @@ /* * Debug */ -void wavefront_debug_prologue( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length); -void wavefront_debug_epilogue( +void wavefront_debug_begin( + wavefront_aligner_t* const wf_aligner); +void wavefront_debug_end( wavefront_aligner_t* const wf_aligner); /* diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.c index e7bc747e..a735686f 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.c @@ -29,6 +29,8 @@ * DESCRIPTION: WaveFront-Alignment module for display and report */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_display.h" #include "wavefront_aligner.h" #include "wavefront_compute.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.h index ab0c092e..38c65ef8 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_display.h @@ -32,8 +32,6 @@ #ifndef WAVEFRONT_DISPLAY_H_ #define WAVEFRONT_DISPLAY_H_ -#include "utils/commons.h" - // Wavefront ahead definition typedef struct _wavefront_aligner_t wavefront_aligner_t; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.c index 914f2896..21e7b3f2 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.c @@ -26,317 +26,69 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + * DESCRIPTION: WFA module for the "extension" of exact matches */ -#include "utils/string_padded.h" +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_extend.h" -#include "wavefront_align.h" +#include "wavefront_extend_kernels.h" #include "wavefront_compute.h" -#include "wavefront_heuristic.h" +#include "wavefront_termination.h" #ifdef WFA_PARALLEL #include #endif /* - * Termination (detect end of alignment) + * Wavefront Extension (End-to-end) */ -bool wavefront_extend_end2end_check_termination( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int score, - const int score_mod) { - // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - const affine2p_matrix_type component_end = wf_aligner->component_end; - const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); - const wf_offset_t alignment_offset = DPMATRIX_OFFSET(text_length,pattern_length); - // Select end component - switch (component_end) { - case affine2p_matrix_M: { - // Check diagonal/offset - if (mwavefront->lo > alignment_k || alignment_k > mwavefront->hi) return false; // Not done - const wf_offset_t moffset = mwavefront->offsets[alignment_k]; - if (moffset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_I1: { - // Fetch I1-wavefront & check diagonal/offset - wavefront_t* const i1wavefront = wf_aligner->wf_components.i1wavefronts[score_mod]; - if (i1wavefront == NULL || i1wavefront->lo > alignment_k || alignment_k > i1wavefront->hi) return false; // Not done - const wf_offset_t i1offset = i1wavefront->offsets[alignment_k]; - if (i1offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_I2: { - // Fetch I2-wavefront & check diagonal/offset - wavefront_t* const i2wavefront = wf_aligner->wf_components.i2wavefronts[score_mod]; - if (i2wavefront == NULL || i2wavefront->lo > alignment_k || alignment_k > i2wavefront->hi) return false; // Not done - const wf_offset_t i2offset = i2wavefront->offsets[alignment_k]; - if (i2offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_D1: { - // Fetch D1-wavefront & check diagonal/offset - wavefront_t* const d1wavefront = wf_aligner->wf_components.d1wavefronts[score_mod]; - if (d1wavefront == NULL || d1wavefront->lo > alignment_k || alignment_k > d1wavefront->hi) return false; // Not done - const wf_offset_t d1offset = d1wavefront->offsets[alignment_k]; - if (d1offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - case affine2p_matrix_D2: { - // Fetch D2-wavefront & check diagonal/offset - wavefront_t* const d2wavefront = wf_aligner->wf_components.d2wavefronts[score_mod]; - if (d2wavefront == NULL || d2wavefront->lo > alignment_k || alignment_k > d2wavefront->hi) return false; // Not done - const wf_offset_t d2offset = d2wavefront->offsets[alignment_k]; - if (d2offset < alignment_offset) return false; // Not done - // We are done - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = alignment_k; - wf_aligner->alignment_end_pos.offset = alignment_offset; - return true; - } - default: - break; - } - return false; -} -bool wavefront_extend_endsfree_check_termination( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int score, - const int k, - const wf_offset_t offset) { - // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - // Check ends-free reaching boundaries - const int h_pos = WAVEFRONT_H(k,offset); - const int v_pos = WAVEFRONT_V(k,offset); - if (h_pos >= text_length) { // Text is aligned - // Is Pattern end-free? - const int pattern_left = pattern_length - v_pos; - const int pattern_end_free = wf_aligner->alignment_form.pattern_end_free; - if (pattern_left <= pattern_end_free) { - #ifdef WFA_PARALLEL - #pragma omp critical - #endif - { - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = k; - wf_aligner->alignment_end_pos.offset = offset; - } - return true; // Quit (we are done) - } - } - if (v_pos >= pattern_length) { // Pattern is aligned - // Is text end-free? - const int text_left = text_length - h_pos; - const int text_end_free = wf_aligner->alignment_form.text_end_free; - if (text_left <= text_end_free) { - #ifdef WFA_PARALLEL - #pragma omp critical - #endif - { - wf_aligner->alignment_end_pos.score = score; - wf_aligner->alignment_end_pos.k = k; - wf_aligner->alignment_end_pos.offset = offset; - } - return true; // Quit (we are done) - } - } - // Not done - return false; -} -/* - * Extend kernel - */ -FORCE_INLINE wf_offset_t wavefront_extend_matches_packed_kernel( - wavefront_aligner_t* const wf_aligner, - const int k, - wf_offset_t offset) { - // Fetch pattern/text blocks - uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->pattern+WAVEFRONT_V(k,offset)); - uint64_t* text_blocks = (uint64_t*)(wf_aligner->text+WAVEFRONT_H(k,offset)); - // Compare 64-bits blocks - uint64_t cmp = *pattern_blocks ^ *text_blocks; - while (__builtin_expect(cmp==0,0)) { - // Increment offset (full block) - offset += 8; - // Next blocks - ++pattern_blocks; - ++text_blocks; - // Compare - cmp = *pattern_blocks ^ *text_blocks; - } - // Count equal characters - const int equal_right_bits = __builtin_ctzl(cmp); - const int equal_chars = DIV_FLOOR(equal_right_bits,8); - offset += equal_chars; - // Return extended offset - return offset; -} -/* - * Wavefront offset extension comparing characters - * Remember: - * - No offset is out of boundaries !(h>tlen,v>plen) - * - if (h==tlen,v==plen) extension won't increment (sentinels) - */ -FORCE_NO_INLINE void wavefront_extend_matches_packed_end2end( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int lo, - const int hi) { - wf_offset_t* const offsets = mwavefront->offsets; - int k; - for (k=lo;k<=hi;++k) { - // Fetch offset - const wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Extend offset - offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); - } -} -FORCE_NO_INLINE wf_offset_t wavefront_extend_matches_packed_max( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int lo, - const int hi) { - wf_offset_t* const offsets = mwavefront->offsets; - wf_offset_t max_antidiag = 0; - int k; - for (k=lo;k<=hi;++k) { - // Fetch offset - const wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Extend offset - offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); - // Compute max - const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offsets[k]); - if (max_antidiag < antidiag) max_antidiag = antidiag; - } - return max_antidiag; -} -FORCE_NO_INLINE bool wavefront_extend_matches_packed_endsfree( +void wavefront_extend_end2end_dispatcher_seq( wavefront_aligner_t* const wf_aligner, wavefront_t* const mwavefront, const int score, const int lo, const int hi) { - wf_offset_t* const offsets = mwavefront->offsets; - int k; - for (k=lo;k<=hi;++k) { - // Fetch offset - wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Extend offset - offset = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); - offsets[k] = offset; - // Check ends-free reaching boundaries - if (wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,score,k,offset)) { - return true; // Quit (we are done) - } + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Check the sequence mode + if (seqs->mode == wf_sequences_ascii) { + wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,lo,hi); + } else { + wf_offset_t dummy; + wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,false,&dummy); } - // Alignment not finished - return false; } -bool wavefront_extend_matches_custom( +void wavefront_extend_end2end_dispatcher_threads( wavefront_aligner_t* const wf_aligner, wavefront_t* const mwavefront, - const int score, - const int lo, - const int hi, - const bool endsfree) { - // Parameters (custom matching function) - alignment_match_funct_t match_funct = wf_aligner->match_funct; - void* const func_arguments = wf_aligner->match_funct_arguments; - // Extend diagonally each wavefront point - wf_offset_t* const offsets = mwavefront->offsets; - int k; - for (k=lo;k<=hi;++k) { - // Check offset - wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Count equal characters - int* pattern_lambda = wf_aligner->pattern_lambda; - int* text_lambda = wf_aligner->text_lambda; - int v = WAVEFRONT_V(k,offset); - int h = WAVEFRONT_H(k,offset); - while (match_funct(pattern_lambda[v],text_lambda[h],func_arguments)) { - h++; v++; offset++; - } - // Update offset - offsets[k] = offset; - // Check ends-free reaching boundaries - if (endsfree && wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,score,k,offset)) { - return true; // Quit (we are done) + const int score) { + // Parameters + const int lo = mwavefront->lo; + const int hi = mwavefront->hi; + const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); + if (num_threads == 1) { + // Extend wavefront single-thread + wavefront_extend_end2end_dispatcher_seq(wf_aligner,mwavefront,score,lo,hi); + } else { +#ifdef WFA_PARALLEL + // Extend wavefront in parallel + #pragma omp parallel num_threads(num_threads) + { + int t_lo, t_hi; + wavefront_compute_thread_limits(omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wavefront_extend_end2end_dispatcher_seq(wf_aligner,mwavefront,score,t_lo,t_hi); } +#endif } - // Alignment not finished - return false; } -wf_offset_t wavefront_extend_matches_custom_max( - wavefront_aligner_t* const wf_aligner, - wavefront_t* const mwavefront, - const int lo, - const int hi) { - // Parameters (custom matching function) - alignment_match_funct_t match_funct = wf_aligner->match_funct; - void* const func_arguments = wf_aligner->match_funct_arguments; - // Extend diagonally each wavefront point - wf_offset_t* const offsets = mwavefront->offsets; - wf_offset_t max_antidiag = 0; - int k; - for (k=lo;k<=hi;++k) { - // Check offset - wf_offset_t offset = offsets[k]; - if (offset == WAVEFRONT_OFFSET_NULL) continue; - // Count equal characters - int* pattern_lambda = wf_aligner->pattern_lambda; - int* text_lambda = wf_aligner->text_lambda; - int v = WAVEFRONT_V(k,offset); - int h = WAVEFRONT_H(k,offset); - while (match_funct(pattern_lambda[v],text_lambda[h],func_arguments)) { - h++; v++; offset++; - } - // Update offset - offsets[k] = offset; - // Compute max - const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offsets[k]); - if (max_antidiag < antidiag) max_antidiag = antidiag; - } - return max_antidiag; -} -/* - * Wavefront exact "extension" - */ -int wavefront_extend_end2end_max( +int wavefront_extend_end2end( wavefront_aligner_t* const wf_aligner, - const int score, - int* const max_antidiagonal) { + const int score) { // Compute score const bool memory_modular = wf_aligner->wf_components.memory_modular; const int max_score_scope = wf_aligner->wf_components.max_score_scope; const int score_mod = (memory_modular) ? score % max_score_scope : score; - *max_antidiagonal = 0; // Init // Fetch m-wavefront wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; if (mwavefront == NULL) { @@ -348,23 +100,61 @@ int wavefront_extend_end2end_max( } return 0; // Not done } - // Multithreading dispatcher + // Extend (dispatcher) + wavefront_extend_end2end_dispatcher_threads(wf_aligner,mwavefront,score); + const bool end_reached = wavefront_termination_end2end(wf_aligner,mwavefront,score,score_mod); + if (end_reached) { + wf_aligner->align_status.status = WF_STATUS_END_REACHED; + wf_aligner->align_status.score = score; + return 1; // Done + } + // Cut-off wavefront heuristically + if (wf_aligner->heuristic.strategy != wf_heuristic_none) { + wavefront_heuristic_cufoff(wf_aligner,score,score_mod); + } + return 0; // Not done +} +/* + * Wavefront Extension (End-to-end + MAX-antidiagonal) + */ +wf_offset_t wavefront_extend_end2end_max_dispatcher_seq( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi) { + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Check the sequence mode + if (seqs->mode == wf_sequences_ascii) { + return wavefront_extend_matches_packed_end2end_max(wf_aligner,mwavefront,lo,hi); + } else { + wf_offset_t max_antidiag; + wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,false,&max_antidiag); + return max_antidiag; + } +} +wf_offset_t wavefront_extend_end2end_max_dispatcher_threads( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score) { + // Parameters const int lo = mwavefront->lo; const int hi = mwavefront->hi; wf_offset_t max_antidiag = 0; + // Select number of threads const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); if (num_threads == 1) { - // Extend wavefront - max_antidiag = wavefront_extend_matches_packed_max(wf_aligner,mwavefront,lo,hi); + // Extend wavefront single-thread + max_antidiag = wavefront_extend_end2end_max_dispatcher_seq(wf_aligner,mwavefront,score,lo,hi); } else { -#ifdef WFA_PARALLEL // Extend wavefront in parallel +#ifdef WFA_PARALLEL #pragma omp parallel num_threads(num_threads) { int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wf_offset_t t_max_antidiag = wavefront_extend_matches_packed_max(wf_aligner,mwavefront,t_lo,t_hi); + wavefront_compute_thread_limits(omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + wf_offset_t t_max_antidiag = wavefront_extend_end2end_max_dispatcher_seq(wf_aligner,mwavefront,score,t_lo,t_hi); #pragma omp critical { if (t_max_antidiag > max_antidiag) max_antidiag = t_max_antidiag; @@ -372,31 +162,22 @@ int wavefront_extend_end2end_max( } #endif } - // Check end-to-end finished - const bool end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront,score,score_mod); - if (end_reached) { - wf_aligner->align_status.status = WF_STATUS_END_REACHED; - wf_aligner->align_status.score = score; - return 1; // Done - } - // Cut-off wavefront heuristically - if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - } - *max_antidiagonal = max_antidiag; - return 0; // Not done + // Return maximum antidiagonal + return max_antidiag; } -int wavefront_extend_end2end( +int wavefront_extend_end2end_max( wavefront_aligner_t* const wf_aligner, - const int score) { + const int score, + int* const max_antidiagonal) { // Compute score const bool memory_modular = wf_aligner->wf_components.memory_modular; const int max_score_scope = wf_aligner->wf_components.max_score_scope; const int score_mod = (memory_modular) ? score % max_score_scope : score; + *max_antidiagonal = 0; // Init // Fetch m-wavefront wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; if (mwavefront == NULL) { - // Check alignment feasibility (for heuristic variants that can lead to no solution) + // Check alignment feasibility (heuristic variants that can lead to no solution) if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { wf_aligner->align_status.status = WF_STATUS_UNFEASIBLE; wf_aligner->align_status.score = score; @@ -404,28 +185,9 @@ int wavefront_extend_end2end( } return 0; // Not done } - // Multithreading dispatcher - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; - bool end_reached = false; - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Extend wavefront - wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,lo,hi); - } else { -#ifdef WFA_PARALLEL - // Extend wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wavefront_extend_matches_packed_end2end(wf_aligner,mwavefront,t_lo,t_hi); - } -#endif - } - // Check end-to-end finished - end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront,score,score_mod); + // Extend (dispatcher) + const wf_offset_t max_ak = wavefront_extend_end2end_max_dispatcher_threads(wf_aligner,mwavefront,score); + const bool end_reached = wavefront_termination_end2end(wf_aligner,mwavefront,score,score_mod); if (end_reached) { wf_aligner->align_status.status = WF_STATUS_END_REACHED; wf_aligner->align_status.score = score; @@ -435,70 +197,67 @@ int wavefront_extend_end2end( if (wf_aligner->heuristic.strategy != wf_heuristic_none) { wavefront_heuristic_cufoff(wf_aligner,score,score_mod); } + *max_antidiagonal = max_ak; return 0; // Not done } -int wavefront_extend_endsfree( +/* + * Wavefront Extension (Ends-free) + */ +bool wavefront_extend_endsfree_dispatcher_seq( wavefront_aligner_t* const wf_aligner, - const int score) { - // Modular wavefront - const bool memory_modular = wf_aligner->wf_components.memory_modular; - const int max_score_scope = wf_aligner->wf_components.max_score_scope; - const int score_mod = (memory_modular) ? score % max_score_scope : score; - // Fetch m-wavefront - wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; - if (mwavefront == NULL) { - // Check alignment feasibility (for heuristic variants that can lead to no solution) - if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { - wf_aligner->align_status.status = WF_STATUS_UNFEASIBLE; - wf_aligner->align_status.score = score; - return 1; // Done - } - return 0; // Not done + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi) { + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Check the sequence mode + if (seqs->mode == wf_sequences_ascii) { + return wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,lo,hi); + } else { + wf_offset_t dummy; + return wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,true,&dummy); } - // Multithreading dispatcher +} +bool wavefront_extend_endsfree_dispatcher_threads( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score) { + // Parameters const int lo = mwavefront->lo; const int hi = mwavefront->hi; bool end_reached = false; const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); if (num_threads == 1) { - // Extend wavefront - end_reached = wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,lo,hi); + // Extend wavefront single-thread + end_reached = wavefront_extend_endsfree_dispatcher_seq(wf_aligner,mwavefront,score,lo,hi); } else { #ifdef WFA_PARALLEL // Extend wavefront in parallel #pragma omp parallel num_threads(num_threads) { int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - if (wavefront_extend_matches_packed_endsfree(wf_aligner,mwavefront,score,t_lo,t_hi)) { + wavefront_compute_thread_limits(omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); + if (wavefront_extend_endsfree_dispatcher_seq(wf_aligner,mwavefront,score,t_lo,t_hi)) { end_reached = true; } } #endif } - if (end_reached) { - wf_aligner->align_status.status = WF_STATUS_END_REACHED; - wf_aligner->align_status.score = score; - return 1; // Done - } - // Cut-off wavefront heuristically - if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - } - return 0; // Not done + // Return end-reached + return end_reached; } -int wavefront_extend_custom( +int wavefront_extend_endsfree( wavefront_aligner_t* const wf_aligner, const int score) { - // Compute score + // Modular wavefront const bool memory_modular = wf_aligner->wf_components.memory_modular; const int max_score_scope = wf_aligner->wf_components.max_score_scope; const int score_mod = (memory_modular) ? score % max_score_scope : score; // Fetch m-wavefront wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; if (mwavefront == NULL) { - // Check alignment feasibility (for heuristic variants that can lead to no solution) + // Check alignment feasibility (heuristic variants that can lead to no solution) if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { wf_aligner->align_status.status = WF_STATUS_UNFEASIBLE; wf_aligner->align_status.score = score; @@ -506,33 +265,8 @@ int wavefront_extend_custom( } return 0; // Not done } - // Multithreading dispatcher - const bool endsfree = (wf_aligner->alignment_form.span == alignment_endsfree); - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; - bool end_reached = false; - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Extend wavefront - end_reached = wavefront_extend_matches_custom(wf_aligner,mwavefront,score,lo,hi,endsfree); - } else { -#ifdef WFA_PARALLEL - // Extend wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - if (wavefront_extend_matches_custom(wf_aligner,mwavefront,score,t_lo,t_hi,endsfree)) { - end_reached = true; - } - } -#endif - } - // Check end-to-end finished - if (!endsfree) { - end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront,score,score_mod); - } + // Extend (dispatcher) + const bool end_reached = wavefront_extend_endsfree_dispatcher_threads(wf_aligner,mwavefront,score); if (end_reached) { wf_aligner->align_status.status = WF_STATUS_END_REACHED; wf_aligner->align_status.score = score; @@ -544,61 +278,3 @@ int wavefront_extend_custom( } return 0; // Not done } -int wavefront_extend_custom_max( - wavefront_aligner_t* const wf_aligner, - const int score, - int* const max_antidiagonal) { - // Compute score - const bool memory_modular = wf_aligner->wf_components.memory_modular; - const int max_score_scope = wf_aligner->wf_components.max_score_scope; - const int score_mod = (memory_modular) ? score % max_score_scope : score; - *max_antidiagonal = 0; // Init - // Fetch m-wavefront - wavefront_t* const mwavefront = wf_aligner->wf_components.mwavefronts[score_mod]; - if (mwavefront == NULL) { - // Check alignment feasibility (for heuristic variants that can lead to no solution) - if (wf_aligner->align_status.num_null_steps > wf_aligner->wf_components.max_score_scope) { - wf_aligner->align_status.status = WF_STATUS_UNFEASIBLE; - wf_aligner->align_status.score = score; - return 1; // Done - } - return 0; // Not done - } - // Multithreading dispatcher - const int lo = mwavefront->lo; - const int hi = mwavefront->hi; - wf_offset_t max_antidiag = 0; - const int num_threads = wavefront_compute_num_threads(wf_aligner,lo,hi); - if (num_threads == 1) { - // Extend wavefront - max_antidiag = wavefront_extend_matches_custom_max(wf_aligner,mwavefront,lo,hi); - } else { -#ifdef WFA_PARALLEL - // Extend wavefront in parallel - #pragma omp parallel num_threads(num_threads) - { - int t_lo, t_hi; - wavefront_compute_thread_limits( - omp_get_thread_num(),omp_get_num_threads(),lo,hi,&t_lo,&t_hi); - wf_offset_t t_max_antidiag = wavefront_extend_matches_custom_max(wf_aligner,mwavefront,t_lo,t_hi); - #pragma omp critical - { - if (t_max_antidiag > max_antidiag) max_antidiag = t_max_antidiag; - } - } -#endif - } - // Check end-to-end finished - const bool end_reached = wavefront_extend_end2end_check_termination(wf_aligner,mwavefront,score,score_mod); - if (end_reached) { - wf_aligner->align_status.status = WF_STATUS_END_REACHED; - wf_aligner->align_status.score = score; - return 1; // Done - } - // Cut-off wavefront heuristically - if (wf_aligner->heuristic.strategy != wf_heuristic_none) { - wavefront_heuristic_cufoff(wf_aligner,score,score_mod); - } - *max_antidiagonal = max_antidiag; - return 0; // Not done -} diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.h index 703a4b3a..24851d71 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend.h @@ -26,7 +26,7 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + * DESCRIPTION: WFA module for the "extension" of exact matches */ #ifndef WAVEFRONT_EXTEND_H_ @@ -35,23 +35,18 @@ #include "wavefront_aligner.h" /* - * Wavefront exact "extension" + * Wavefront extension */ int wavefront_extend_end2end( wavefront_aligner_t* const wf_aligner, const int score); -int wavefront_extend_end2end_max( - wavefront_aligner_t* const wf_aligner, - const int score, - int* const max_antidiagonal); int wavefront_extend_endsfree( wavefront_aligner_t* const wf_aligner, const int score); -int wavefront_extend_custom( - wavefront_aligner_t* const wf_aligner, - const int score); -int wavefront_extend_custom_max( + +int wavefront_extend_end2end_max( wavefront_aligner_t* const wf_aligner, const int score, int* const max_antidiagonal); + #endif /* WAVEFRONT_EXTEND_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels.c new file mode 100644 index 00000000..fe478693 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels.c @@ -0,0 +1,177 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module for the "extension" of exact matches + */ + +#include "wavefront_extend_kernels.h" +#include "wavefront_termination.h" + +/* + * Inner-most extend kernel (blockwise comparisons) + */ +FORCE_INLINE wf_offset_t wavefront_extend_matches_packed_kernel( + wavefront_aligner_t* const wf_aligner, + const int k, + wf_offset_t offset) { + // Fetch pattern/text blocks + uint64_t* pattern_blocks = (uint64_t*)(wf_aligner->sequences.pattern+WAVEFRONT_V(k,offset)); + uint64_t* text_blocks = (uint64_t*)(wf_aligner->sequences.text+WAVEFRONT_H(k,offset)); + // Compare 64-bits blocks + uint64_t cmp = *pattern_blocks ^ *text_blocks; + while (__builtin_expect(cmp==0,0)) { + // Increment offset (full block) + offset += 8; + // Next blocks + ++pattern_blocks; + ++text_blocks; + // Compare + cmp = *pattern_blocks ^ *text_blocks; + } + // Count equal characters + const int equal_right_bits = __builtin_ctzl(cmp); + const int equal_chars = DIV_FLOOR(equal_right_bits,8); + offset += equal_chars; + // Return extended offset + return offset; +} +/* + * Wavefront-Extend Inner Kernels + * Wavefront offset extension comparing characters + * Remember: + * - No offset is out of boundaries !(h>tlen,v>plen) + * - if (h==tlen,v==plen) extension won't increment (sentinels) + */ +FORCE_NO_INLINE void wavefront_extend_matches_packed_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi) { + wf_offset_t* const offsets = mwavefront->offsets; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset + const wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); + } +} +FORCE_NO_INLINE wf_offset_t wavefront_extend_matches_packed_end2end_max( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi) { + wf_offset_t* const offsets = mwavefront->offsets; + wf_offset_t max_antidiag = 0; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset + const wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offsets[k] = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); + // Compute max + const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offsets[k]); + if (max_antidiag < antidiag) max_antidiag = antidiag; + } + return max_antidiag; +} +FORCE_NO_INLINE bool wavefront_extend_matches_packed_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi) { + // Parameters + wf_offset_t* const offsets = mwavefront->offsets; + int k; + for (k=lo;k<=hi;++k) { + // Fetch offset + wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Extend offset + offset = wavefront_extend_matches_packed_kernel(wf_aligner,k,offset); + offsets[k] = offset; + // Check ends-free reaching boundaries + if (wavefront_termination_endsfree(wf_aligner,mwavefront,score,k,offset)) { + return true; // Quit (we are done) + } + /* + * TODO + const int h_pos = WAVEFRONT_H(k,offset); + const int v_pos = WAVEFRONT_V(k,offset); + if (h_pos >= text_length || v_pos >= pattern_length) { // FIXME Use wherever necessary + if (wavefront_extend_endsfree_check_termination(wf_aligner,mwavefront,score,k,offset)) { + return true; // Quit (we are done) + } + */ + } + // Alignment not finished + return false; +} +/* + * Wavefront-Extend Inner Kernel (Custom match function) + */ +bool wavefront_extend_matches_custom( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi, + const bool endsfree, + wf_offset_t* const max_antidiag) { + // Parameters + wavefront_sequences_t* const seqs = &wf_aligner->sequences; + // Extend diagonally each wavefront point + wf_offset_t* const offsets = mwavefront->offsets; + *max_antidiag = 0; + int k; + for (k=lo;k<=hi;++k) { + // Check offset + wf_offset_t offset = offsets[k]; + if (offset == WAVEFRONT_OFFSET_NULL) continue; + // Count equal characters + int v = WAVEFRONT_V(k,offset); + int h = WAVEFRONT_H(k,offset); + while (wavefront_sequences_cmp(seqs,v,h)) { + h++; v++; offset++; + } + // Update offset + offsets[k] = offset; + // Compute max + const wf_offset_t antidiag = WAVEFRONT_ANTIDIAGONAL(k,offset); + if (*max_antidiag < antidiag) *max_antidiag = antidiag; + // Check ends-free reaching boundaries + if (endsfree && wavefront_termination_endsfree(wf_aligner,mwavefront,score,k,offset)) { + return true; // Quit (we are done) + } + } + // Alignment not finished + return false; +} diff --git a/src/common/wflign/deps/WFA2-lib/utils/string_padded.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels.h similarity index 50% rename from src/common/wflign/deps/WFA2-lib/utils/string_padded.h rename to src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels.h index f2d0dbb4..d0a28eb0 100644 --- a/src/common/wflign/deps/WFA2-lib/utils/string_padded.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels.h @@ -26,61 +26,44 @@ * * PROJECT: Wavefront Alignment Algorithms * AUTHOR(S): Santiago Marco-Sola - * DESCRIPTION: Padded string module to avoid handling corner conditions + * DESCRIPTION: WFA module for the "extension" of exact matches */ -#ifndef STRING_PADDED_H_ -#define STRING_PADDED_H_ +#ifndef WAVEFRONT_EXTEND_KERNELS_H_ +#define WAVEFRONT_EXTEND_KERNELS_H_ -/* - * Includes - */ -#include "utils/commons.h" -#include "system/mm_allocator.h" +#include "wavefront_aligner.h" /* - * Strings Padded + * Wavefront-Extend Inner Kernels */ -typedef struct { - // Dimensions - int pattern_length; - int text_length; - // Padded strings - char* pattern_padded; - int* pattern_lambda_padded; - char* text_padded; - int* text_lambda_padded; - // MM - char* pattern_padded_buffer; - int* pattern_lambda_padded_buffer; - char* text_padded_buffer; - int* text_lambda_padded_buffer; - mm_allocator_t* mm_allocator; -} strings_padded_t; +void wavefront_extend_matches_packed_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi); +wf_offset_t wavefront_extend_matches_packed_end2end_max( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int lo, + const int hi); +bool wavefront_extend_matches_packed_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi); /* - * Strings (text/pattern) padded + * Wavefront-Extend Inner Kernel (Custom match function) */ -strings_padded_t* strings_padded_new( - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - const int padding_length, - const bool reverse_sequences, - mm_allocator_t* const mm_allocator); -strings_padded_t* strings_padded_new_rhomb( - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, - const int padding_length, - const bool reverse_sequences, - mm_allocator_t* const mm_allocator); -void strings_padded_delete( - strings_padded_t* const strings_padded); -void strings_padded_delete_lambda( - strings_padded_t* const strings_padded); -#endif /* STRING_PADDED_H_ */ +bool wavefront_extend_matches_custom( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int lo, + const int hi, + const bool endsfree, + wf_offset_t* const max_antidiag); + +#endif /* WAVEFRONT_EXTEND_KERNELS_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels_avx.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels_avx.c new file mode 100644 index 00000000..0b5278a0 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels_avx.c @@ -0,0 +1,35 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + */ + +#include "wavefront_extend.h" +#include "wavefront_align.h" +#include "wavefront_compute.h" +#include "wavefront_heuristic.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels_avx.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels_avx.h new file mode 100644 index 00000000..907f5698 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_extend_kernels_avx.h @@ -0,0 +1,37 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront-Alignment module for the "extension" of exact matches + */ + +#ifndef WAVEFRONT_EXTEND_H_ +#define WAVEFRONT_EXTEND_H_ + +#include "wavefront_aligner.h" + +#endif /* WAVEFRONT_EXTEND_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c index 0eb6409e..ae76256a 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.c @@ -29,6 +29,8 @@ * DESCRIPTION: Support functions for wavefront heuristic strategies */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_heuristic.h" #include "wavefront_aligner.h" @@ -257,8 +259,9 @@ void wavefront_heuristic_wfadaptive( wavefront_t* const wavefront, const bool wfmash_mode) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; const int min_wavefront_length = wf_aligner->heuristic.min_wavefront_length; const int max_distance_threshold = wf_aligner->heuristic.max_distance_threshold; wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; @@ -462,8 +465,9 @@ void wavefront_heuristic_banded_adaptive( wavefront_aligner_t* const wf_aligner, wavefront_t* const wavefront) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; // Check steps if (wf_heuristic->steps_wait > 0) return; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.h index 27ec491e..f59634ef 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_heuristic.h @@ -32,8 +32,6 @@ #ifndef WAVEFRONT_HEURISTIC_H_ #define WAVEFRONT_HEURISTIC_H_ -#include "utils/commons.h" - // Wavefront ahead definition typedef struct _wavefront_aligner_t wavefront_aligner_t; diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_offset.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_offset.h index 42d59b72..d1ee5c19 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_offset.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_offset.h @@ -32,8 +32,6 @@ #ifndef WAVEFRONT_OFFSET_H_ #define WAVEFRONT_OFFSET_H_ -#include "utils/commons.h" - /* * Wavefront Offset */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.c index a66e066a..4f6964ca 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.c @@ -29,6 +29,8 @@ * DESCRIPTION: Packed CIGAR (Alignment operations in 2-bits) */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_pcigar.h" /* @@ -150,17 +152,16 @@ int pcigar_unpack_extend_custom( */ void pcigar_unpack_linear( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, int* const cigar_length) { // Parameters + char* const pattern = sequences->pattern; + const int pattern_length = sequences->pattern_length; + char* const text = sequences->text; + const int text_length = sequences->text_length; char* const cigar_buffer_base = cigar_buffer; // Compute pcigar length and shift to the end of the word int pcigar_length = PCIGAR_MAX_LENGTH; @@ -174,10 +175,10 @@ void pcigar_unpack_linear( for (i=0;imode == wf_sequences_lambda) { // Custom extend-match function num_matches = pcigar_unpack_extend_custom( - pattern_length,text_length, - match_funct,match_funct_arguments,v,h,cigar_buffer); + pattern_length,text_length,sequences->match_funct, + sequences->match_funct_arguments,v,h,cigar_buffer); } else { num_matches = pcigar_unpack_extend( pattern,pattern_length,text,text_length,v,h,cigar_buffer); @@ -202,18 +203,17 @@ void pcigar_unpack_linear( } void pcigar_unpack_affine( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, int* const cigar_length, affine_matrix_type* const current_matrix_type) { // Parameters + char* const pattern = sequences->pattern; + const int pattern_length = sequences->pattern_length; + char* const text = sequences->text; + const int text_length = sequences->text_length; char* const cigar_buffer_base = cigar_buffer; // Compute pcigar length and shift to the end of the word int pcigar_length = PCIGAR_MAX_LENGTH; @@ -229,10 +229,10 @@ void pcigar_unpack_affine( // Extend exact-matches if (matrix_type == affine_matrix_M) { // Extend only on the M-wavefront int num_matches; - if (match_funct != NULL) { // Custom extend-match function + if (sequences->mode == wf_sequences_lambda) { // Custom extend-match function num_matches = pcigar_unpack_extend_custom( - pattern_length,text_length, - match_funct,match_funct_arguments,v,h,cigar_buffer); + pattern_length,text_length,sequences->match_funct, + sequences->match_funct_arguments,v,h,cigar_buffer); } else { num_matches = pcigar_unpack_extend( pattern,pattern_length,text,text_length,v,h,cigar_buffer); diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.h index f194c292..0f2edb7e 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_pcigar.h @@ -32,8 +32,8 @@ #ifndef WAVEFRONT_PACKED_CIGAR_H_ #define WAVEFRONT_PACKED_CIGAR_H_ -#include "utils/commons.h" #include "wavefront_attributes.h" +#include "wavefront_sequences.h" /* * Configuration @@ -91,24 +91,14 @@ int pcigar_unpack( */ void pcigar_unpack_linear( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, int* const cigar_length); void pcigar_unpack_affine( pcigar_t pcigar, - const char* const pattern, - const int pattern_length, - const char* const text, - const int text_length, - alignment_match_funct_t const match_funct, - void* const match_funct_arguments, + wavefront_sequences_t* const sequences, int* const v_pos, int* const h_pos, char* cigar_buffer, diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_penalties.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_penalties.c index 8e4b012e..45b933e1 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_penalties.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_penalties.c @@ -29,6 +29,8 @@ * DESCRIPTION: WaveFront penalties handling module */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_penalties.h" /* @@ -197,5 +199,3 @@ void wavefront_penalties_print( break; } } - - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.c index bd40297c..28c64c00 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.c @@ -29,6 +29,8 @@ * DESCRIPTION: Wavefront alignment module for plot */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_plot.h" #include "wavefront_aligner.h" @@ -134,10 +136,11 @@ void wavefront_plot_component( // Check wavefront if (wavefront == NULL) return; // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; - const char* const pattern = wf_aligner->pattern; - const char* const text = wf_aligner->text; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + const char* const pattern = sequences->pattern; + const char* const text = sequences->text; wavefront_plot_t* const plot = wf_aligner->plot; const bool reverse = (wf_aligner->align_mode == wf_align_biwfa_breakpoint_reverse); // Traverse all offsets @@ -252,24 +255,26 @@ void wavefront_plot_print( // Parameters const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; wavefront_plot_t* const wf_plot = wf_aligner->plot; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Metadata - if (wf_aligner->match_funct != NULL) { - fprintf(stream,"# PatternLength 0\n"); - fprintf(stream,"# TextLength 0\n"); + if (sequences->mode == wf_sequences_lambda) { + fprintf(stream,"# PatternLength %d\n",pattern_length); + fprintf(stream,"# TextLength %d\n",text_length); fprintf(stream,"# Pattern -\n"); fprintf(stream,"# Text -\n"); } else { - fprintf(stream,"# PatternLength %d\n",wf_aligner->pattern_length); - fprintf(stream,"# Pattern %.*s\n",wf_aligner->pattern_length,wf_aligner->pattern); - fprintf(stream,"# TextLength %d\n",wf_aligner->text_length); - fprintf(stream,"# Text %.*s\n",wf_aligner->text_length,wf_aligner->text); + fprintf(stream,"# PatternLength %d\n",pattern_length); + fprintf(stream,"# Pattern %.*s\n",pattern_length,sequences->pattern); + fprintf(stream,"# TextLength %d\n",text_length); + fprintf(stream,"# Text %.*s\n",text_length,sequences->text); } fprintf(stream,"# Penalties "); wavefront_penalties_print(stream,&wf_aligner->penalties); fprintf(stream,"\n"); // Alignment mode fprintf(stream,"# WFAMode "); - wavefront_aligner_print_mode(stream,wf_aligner); wavefront_heuristic_t* const wf_heuristic = &wf_aligner->heuristic; if (wf_heuristic->strategy != wf_heuristic_none) { wavefront_heuristic_print(stream,wf_heuristic); diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.h index c8fc944f..fb0cf11a 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_plot.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_PLOT_H_ #define WAVEFRONT_PLOT_H_ -#include "utils/commons.h" #include "utils/heatmap.h" #include "alignment/score_matrix.h" #include "wavefront/wavefront_penalties.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_sequences.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_sequences.c new file mode 100644 index 00000000..4551537d --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_sequences.c @@ -0,0 +1,310 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to encapsulate the input sequences + */ + +#include "wavefront_sequences.h" + +/* + * Configuration + */ +#define WF_SEQUENCES_PADDING 64 +#define WF_SEQUENCES_PATTERN_EOS '!' +#define WF_SEQUENCES_TEXT_EOS '?' + +/* + * Setup + */ +void wavefront_sequences_allocate( + wavefront_sequences_t* const wf_sequences) { + // Mode + wf_sequences->mode = wf_sequences_ascii; + wf_sequences->reverse = false; + // Source sequences + wf_sequences->seq_buffer = NULL; + wf_sequences->seq_buffer_allocated = 0; + // Current state + wf_sequences->pattern = NULL; + wf_sequences->text = NULL; +} +void wavefront_sequences_free( + wavefront_sequences_t* const wf_sequences) { + // Free internal buffers + if (wf_sequences->seq_buffer != NULL) free(wf_sequences->seq_buffer); +} +/* + * Init Sequences + */ +void wavefront_sequences_init_allocate( + wavefront_sequences_t* const wf_sequences, + const int pattern_length, + const int text_length) { + // Compute dimensions + const int buffer_size = pattern_length + text_length + 3*WF_SEQUENCES_PADDING; + // Check internal buffer allocated + if (wf_sequences->seq_buffer_allocated < buffer_size) { + // Free + if (wf_sequences->seq_buffer != NULL) free(wf_sequences->seq_buffer); + // Allocate + const int proposed_size = buffer_size + buffer_size/2; + wf_sequences->seq_buffer = malloc(proposed_size); + wf_sequences->seq_buffer_allocated = proposed_size; + } + // Assign memory + wf_sequences->pattern_buffer = wf_sequences->seq_buffer + WF_SEQUENCES_PADDING; + wf_sequences->text_buffer = wf_sequences->seq_buffer + WF_SEQUENCES_PADDING + pattern_length + WF_SEQUENCES_PADDING; +} +void wavefront_sequences_init_copy( + char* const buffer_dst, + const char* const sequence, + const int sequence_length, + const int padding_length, + const char padding_value, + const bool reverse) { + // Copy sequence + if (reverse) { + int i; + for (i=0;i>2 & 3)]; + const char letter2 = dna_packed2bits_decode[(word>>4 & 3)]; + const char letter3 = dna_packed2bits_decode[(word>>6 & 3)]; + if (reverse) { + buffer_dst[buffer_pos ] = letter0; + buffer_dst[buffer_pos-1] = letter1; + buffer_dst[buffer_pos-2] = letter2; + buffer_dst[buffer_pos-3] = letter3; + buffer_pos -= 4; + } else { + buffer_dst[buffer_pos ] = letter0; + buffer_dst[buffer_pos+1] = letter1; + buffer_dst[buffer_pos+2] = letter2; + buffer_dst[buffer_pos+3] = letter3; + buffer_pos += 4; + } + } + // Add end padding + buffer_dst[sequence_length] = padding_value; +} +void wavefront_sequences_init_ascii( + wavefront_sequences_t* const wf_sequences, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + const bool reverse) { + // Mode + wf_sequences->mode = wf_sequences_ascii; + wf_sequences->reverse = reverse; + // Allocate buffers + wavefront_sequences_init_allocate(wf_sequences,pattern_length,text_length); + // Copy internal sequences + wavefront_sequences_init_copy(wf_sequences->pattern_buffer, + pattern,pattern_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_PATTERN_EOS,reverse); + wf_sequences->pattern_buffer_length = pattern_length; + wavefront_sequences_init_copy(wf_sequences->text_buffer, + text,text_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_TEXT_EOS,reverse); + wf_sequences->text_buffer_length = text_length; + // Set pattern + wf_sequences->pattern = wf_sequences->pattern_buffer; + wf_sequences->pattern_begin = 0; + wf_sequences->pattern_length = pattern_length; + wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length]; + // Set text + wf_sequences->text = wf_sequences->text_buffer; + wf_sequences->text_begin = 0; + wf_sequences->text_length = text_length; + wf_sequences->text_eos = wf_sequences->text[text_length]; +} +void wavefront_sequences_init_lambda( + wavefront_sequences_t* const wf_sequences, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length, + const bool reverse) { + // Mode + wf_sequences->mode = wf_sequences_lambda; + wf_sequences->reverse = reverse; + // Set sequences' length + wf_sequences->pattern = NULL; + wf_sequences->text = NULL; + wf_sequences->pattern_begin = 0; + wf_sequences->pattern_length = pattern_length; + wf_sequences->text_begin = 0; + wf_sequences->text_length = text_length; + // Internals + wf_sequences->match_funct = match_funct; + wf_sequences->match_funct_arguments = match_funct_arguments; +} +void wavefront_sequences_init_packed2bits( + wavefront_sequences_t* const wf_sequences, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length, + const bool reverse) { + // Mode + wf_sequences->mode = wf_sequences_ascii; + wf_sequences->reverse = reverse; + // Allocate buffers + wavefront_sequences_init_allocate(wf_sequences,pattern_length,text_length); + // Copy internal sequences + wavefront_sequences_init_decode2bits(wf_sequences->pattern_buffer, + pattern,pattern_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_PATTERN_EOS,reverse); + wf_sequences->pattern_buffer_length = pattern_length; + wavefront_sequences_init_decode2bits(wf_sequences->text_buffer, + text,text_length,WF_SEQUENCES_PADDING,WF_SEQUENCES_TEXT_EOS,reverse); + wf_sequences->text_buffer_length = text_length; + // Set pattern + wf_sequences->pattern = wf_sequences->pattern_buffer; + wf_sequences->pattern_begin = 0; + wf_sequences->pattern_length = pattern_length; + wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length]; + // Set text + wf_sequences->text = wf_sequences->text_buffer; + wf_sequences->text_begin = 0; + wf_sequences->text_length = text_length; + wf_sequences->text_eos = wf_sequences->text[text_length]; +} +/* + * Accessors + */ +bool wavefront_sequences_cmp( + wavefront_sequences_t* const wf_sequences, + const int pattern_pos, + const int text_pos) { + // Select mode + if (wf_sequences->mode == wf_sequences_lambda) { + // Custom function to compare sequences + alignment_match_funct_t match_funct = wf_sequences->match_funct; + void* match_funct_arguments = wf_sequences->match_funct_arguments; + // Check coordinates (EOS) + const int pattern_length = wf_sequences->pattern_length; + const int text_length = wf_sequences->text_length; + if (pattern_pos >= pattern_length || text_pos >= text_length) return false; + // Compare using lambda (given coordinates) + const int pattern_begin = wf_sequences->pattern_begin; + const int text_begin = wf_sequences->text_begin; + if (wf_sequences->reverse) { + const int pattern_end = pattern_begin + pattern_length - 1; + const int text_end = text_begin + text_length - 1; + return match_funct(pattern_end-pattern_pos,text_end-text_pos,match_funct_arguments); + } else { + return match_funct(pattern_begin+pattern_pos,text_begin+text_pos,match_funct_arguments); + } + } else { + // Compare regular strings + return wf_sequences->pattern[pattern_pos] == wf_sequences->text[text_pos]; + } +} +char wavefront_sequences_get_pattern( + wavefront_sequences_t* const wf_sequences, + const int position) { + if (wf_sequences->mode == wf_sequences_lambda) { + return '-'; + } else { + return wf_sequences->pattern[position]; + } +} +char wavefront_sequences_get_text( + wavefront_sequences_t* const wf_sequences, + const int position) { + if (wf_sequences->mode == wf_sequences_lambda) { + return '-'; + } else { + return wf_sequences->text[position]; + } +} +/* + * Resize/Update + */ +void wavefront_sequences_set_bounds( + wavefront_sequences_t* const wf_sequences, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end) { + // Select mode + if (wf_sequences->mode != wf_sequences_lambda) { + // Restore previous EOS char + const int pattern_length_old = wf_sequences->pattern_length; + const int text_length_old = wf_sequences->text_length; + wf_sequences->pattern[pattern_length_old] = wf_sequences->pattern_eos; + wf_sequences->text[text_length_old] = wf_sequences->text_eos; + // Focus on the new section of the sequences + if (wf_sequences->reverse) { + // Compare given coordinates + wf_sequences->pattern = wf_sequences->pattern_buffer + (wf_sequences->pattern_buffer_length - pattern_end); + wf_sequences->text = wf_sequences->text_buffer + (wf_sequences->text_buffer_length - text_end); + } else { + wf_sequences->pattern = wf_sequences->pattern_buffer + pattern_begin; + wf_sequences->text = wf_sequences->text_buffer + text_begin; + } + // Save EOS char and truncate sequence + const int pattern_length_new = pattern_end - pattern_begin; + const int text_length_new = text_end - text_begin; + wf_sequences->pattern_eos = wf_sequences->pattern[pattern_length_new]; + wf_sequences->text_eos = wf_sequences->text[text_length_new]; + wf_sequences->pattern[pattern_length_new] = WF_SEQUENCES_PATTERN_EOS; + wf_sequences->text[text_length_new] = WF_SEQUENCES_TEXT_EOS; + } + // Set bounds + wf_sequences->pattern_begin = pattern_begin; + wf_sequences->pattern_length = pattern_end - pattern_begin; + wf_sequences->text_begin = text_begin; + wf_sequences->text_length = text_end - text_begin; +} + diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_sequences.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_sequences.h new file mode 100644 index 00000000..7e047c3c --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_sequences.h @@ -0,0 +1,148 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to encapsulate the input sequences + */ + +#ifndef WAVEFRONT_SEQUENCES_H_ +#define WAVEFRONT_SEQUENCES_H_ + +#include "utils/commons.h" + +/* + * Custom extend-match function, e.g.: + * + * typedef struct { + * char* pattern; + * int pattern_length; + * char* text; + * int text_length; + * } match_function_params_t; + * + * int match_function(int v,int h,void* arguments) { + * // Extract parameters + * match_function_params_t* match_arguments = (match_function_params_t*)arguments; + * // Check match + * if (v > match_arguments->pattern_length || h > match_arguments->text_length) return 0; + * return (match_arguments->pattern[v] == match_arguments->text[h]); + * } + */ +typedef int (*alignment_match_funct_t)(int,int,void*); + +/* + * Wavefront Sequences + */ +typedef enum { + wf_sequences_ascii = 0, + wf_sequences_lambda = 1, + wf_sequences_packed2bits = 2, +} wf_sequences_mode_t; +typedef struct { + // Mode + wf_sequences_mode_t mode; // Sequences mode + bool reverse; // Reverse sequences + // Current sequences & bounds + char* pattern; // Pointer to current pattern sequence (padded) + char* text; // Pointer to current text sequence (padded) + int pattern_begin; // Pattern begin offset + int pattern_length; // Pattern length + int text_begin; // Text begin offset + int text_length; // Text length + // Lambda Sequence + alignment_match_funct_t match_funct; // Custom matching function (match(v,h,args)) + void* match_funct_arguments; // Generic arguments passed to matching function (args) + // Internal buffers (ASCII encoded) + char* seq_buffer; // Internal buffer + int seq_buffer_allocated; // Internal buffer allocated + char* pattern_buffer; // Source pattern sequence + char* text_buffer; // Source text sequence + int pattern_buffer_length; // Source pattern length + int text_buffer_length; // Source text length + char pattern_eos; // Source pattern char at EOS + char text_eos; // Source pattern char at EOS +} wavefront_sequences_t; + +/* + * Setup + */ +void wavefront_sequences_allocate( + wavefront_sequences_t* const wf_sequences); +void wavefront_sequences_free( + wavefront_sequences_t* const wf_sequences); + +/* + * Init Sequences + */ +void wavefront_sequences_init_ascii( + wavefront_sequences_t* const wf_sequences, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length, + const bool reverse); +void wavefront_sequences_init_lambda( + wavefront_sequences_t* const wf_sequences, + alignment_match_funct_t match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length, + const bool reverse); +void wavefront_sequences_init_packed2bits( + wavefront_sequences_t* const wf_sequences, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length, + const bool reverse); + + +/* + * Accessors + */ +bool wavefront_sequences_cmp( + wavefront_sequences_t* const wf_sequences, + const int pattern_pos, + const int text_pos); +char wavefront_sequences_get_pattern( + wavefront_sequences_t* const wf_sequences, + const int position); +char wavefront_sequences_get_text( + wavefront_sequences_t* const wf_sequences, + const int position); + +/* + * Resize/Update + */ +void wavefront_sequences_set_bounds( + wavefront_sequences_t* const wf_sequences, + const int pattern_begin, + const int pattern_end, + const int text_begin, + const int text_end); + +#endif /* WAVEFRONT_SEQUENCES_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.c index 89a897b5..98a4f3f9 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.c @@ -29,6 +29,8 @@ * DESCRIPTION: WaveFront Slab for fast pre-allocated wavefronts' memory handling */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_slab.h" /* @@ -286,6 +288,3 @@ uint64_t wavefront_slab_get_size( wavefront_slab_t* const wavefront_slab) { return wavefront_slab->memory_used; } - - - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.h index 90f6bb63..9e1d2456 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_slab.h @@ -32,7 +32,6 @@ #ifndef WAVEFRONT_SLAB_H_ #define WAVEFRONT_SLAB_H_ -#include "utils/commons.h" #include "utils/vector.h" #include "system/mm_allocator.h" #include "wavefront.h" diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_termination.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_termination.c new file mode 100644 index 00000000..e9e91bb8 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_termination.c @@ -0,0 +1,162 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to check for the termination of an alignment + */ + +#include "wavefront_termination.h" + +/* + * Detect alignment termination (end of alignment) + */ +FORCE_NO_INLINE bool wavefront_termination_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int score_mod) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + const affine2p_matrix_type component_end = wf_aligner->component_end; + const int alignment_k = DPMATRIX_DIAGONAL(text_length,pattern_length); + const wf_offset_t alignment_offset = DPMATRIX_OFFSET(text_length,pattern_length); + // Select end component + switch (component_end) { + case affine2p_matrix_M: { + // Check diagonal/offset + if (mwavefront->lo > alignment_k || alignment_k > mwavefront->hi) return false; // Not done + const wf_offset_t moffset = mwavefront->offsets[alignment_k]; + if (moffset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_I1: { + // Fetch I1-wavefront & check diagonal/offset + wavefront_t* const i1wavefront = wf_aligner->wf_components.i1wavefronts[score_mod]; + if (i1wavefront == NULL || i1wavefront->lo > alignment_k || alignment_k > i1wavefront->hi) return false; // Not done + const wf_offset_t i1offset = i1wavefront->offsets[alignment_k]; + if (i1offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_I2: { + // Fetch I2-wavefront & check diagonal/offset + wavefront_t* const i2wavefront = wf_aligner->wf_components.i2wavefronts[score_mod]; + if (i2wavefront == NULL || i2wavefront->lo > alignment_k || alignment_k > i2wavefront->hi) return false; // Not done + const wf_offset_t i2offset = i2wavefront->offsets[alignment_k]; + if (i2offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_D1: { + // Fetch D1-wavefront & check diagonal/offset + wavefront_t* const d1wavefront = wf_aligner->wf_components.d1wavefronts[score_mod]; + if (d1wavefront == NULL || d1wavefront->lo > alignment_k || alignment_k > d1wavefront->hi) return false; // Not done + const wf_offset_t d1offset = d1wavefront->offsets[alignment_k]; + if (d1offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + case affine2p_matrix_D2: { + // Fetch D2-wavefront & check diagonal/offset + wavefront_t* const d2wavefront = wf_aligner->wf_components.d2wavefronts[score_mod]; + if (d2wavefront == NULL || d2wavefront->lo > alignment_k || alignment_k > d2wavefront->hi) return false; // Not done + const wf_offset_t d2offset = d2wavefront->offsets[alignment_k]; + if (d2offset < alignment_offset) return false; // Not done + // We are done + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = alignment_k; + wf_aligner->alignment_end_pos.offset = alignment_offset; + return true; + } + default: + break; + } + return false; +} +FORCE_NO_INLINE bool wavefront_termination_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int k, + const wf_offset_t offset) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + // Check ends-free reaching boundaries + const int h_pos = WAVEFRONT_H(k,offset); + const int v_pos = WAVEFRONT_V(k,offset); + if (h_pos >= text_length) { // Text is aligned + // Is Pattern end-free? + const int pattern_left = pattern_length - v_pos; + const int pattern_end_free = wf_aligner->alignment_form.pattern_end_free; + if (pattern_left <= pattern_end_free) { + #ifdef WFA_PARALLEL + #pragma omp critical + #endif + { + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = k; + wf_aligner->alignment_end_pos.offset = offset; + } + return true; // Quit (we are done) + } + } + if (v_pos >= pattern_length) { // Pattern is aligned + // Is text end-free? + const int text_left = text_length - h_pos; + const int text_end_free = wf_aligner->alignment_form.text_end_free; + if (text_left <= text_end_free) { + #ifdef WFA_PARALLEL + #pragma omp critical + #endif + { + wf_aligner->alignment_end_pos.score = score; + wf_aligner->alignment_end_pos.k = k; + wf_aligner->alignment_end_pos.offset = offset; + } + return true; // Quit (we are done) + } + } + // Not done + return false; +} diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_termination.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_termination.h new file mode 100644 index 00000000..05085ba8 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_termination.h @@ -0,0 +1,57 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WFA module to check for the termination of an alignment + */ + +#ifndef WAVEFRONT_TERMINATION_H_ +#define WAVEFRONT_TERMINATION_H_ + +#include "wavefront_aligner.h" + +/* + * Necessary condition for ends-free termination + */ +#define WF_TERMINATION_ENDSFREE(h,v) ((h >= text_length) || (v >= pattern_length)) + +/* + * Detect alignment termination (end of alignment) + */ +bool wavefront_termination_end2end( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int score_mod); +bool wavefront_termination_endsfree( + wavefront_aligner_t* const wf_aligner, + wavefront_t* const mwavefront, + const int score, + const int k, + const wf_offset_t offset); + +#endif /* WAVEFRONT_TERMINATION_H_ */ diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.c b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.c index 4eb05052..3ccaf3af 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.c +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.c @@ -28,6 +28,8 @@ * AUTHOR(S): Santiago Marco-Sola */ +#include "utils/commons.h" +#include "system/mm_allocator.h" #include "wavefront_unialign.h" #include "wavefront.h" #include "wavefront_attributes.h" @@ -46,11 +48,6 @@ #include "wavefront_backtrace.h" #include "wavefront_backtrace_buffer.h" -/* - * Configuration - */ -#define SEQUENCES_PADDING 10 - /* * Setup */ @@ -74,47 +71,17 @@ void wavefront_unialigner_system_clear( default: break; } - // Profile - timer_reset(&wf_aligner->system.timer); } /* * Resize */ void wavefront_unialign_resize( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, - const bool reverse_sequences) { - // Configure sequences and status - wf_aligner->pattern_length = pattern_length; - wf_aligner->text_length = text_length; - if (wf_aligner->match_funct == NULL) { - if (wf_aligner->sequences != NULL) strings_padded_delete(wf_aligner->sequences); - wf_aligner->sequences = strings_padded_new_rhomb( - pattern,NULL,pattern_length, - text,NULL,text_length, - SEQUENCES_PADDING,reverse_sequences, - wf_aligner->mm_allocator); - wf_aligner->pattern = wf_aligner->sequences->pattern_padded; - wf_aligner->text = wf_aligner->sequences->text_padded; - wf_aligner->pattern_lambda = NULL; - wf_aligner->text_lambda = NULL; - } else { - if (wf_aligner->sequences != NULL) strings_padded_delete_lambda(wf_aligner->sequences); - wf_aligner->sequences = strings_padded_new_rhomb( - NULL,pattern_lambda,pattern_length, - NULL,text_lambda,text_length, - SEQUENCES_PADDING,reverse_sequences, - wf_aligner->mm_allocator); - wf_aligner->pattern = NULL; - wf_aligner->text = NULL; - wf_aligner->pattern_lambda = wf_aligner->sequences->pattern_lambda_padded; - wf_aligner->text_lambda = wf_aligner->sequences->text_lambda_padded; - } + wavefront_aligner_t* const wf_aligner) { + // Parameters + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; + // Configure status wavefront_unialign_status_clear(&wf_aligner->align_status); // Heuristics clear wavefront_heuristic_clear(&wf_aligner->heuristic); @@ -133,157 +100,14 @@ void wavefront_unialign_resize( /* * Initialize alignment */ -void wavefront_unialign_initialize_wavefront_m( - wavefront_aligner_t* const wf_aligner, - const int pattern_length, - const int text_length) { - // Parameters - wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; - wavefront_components_t* const wf_components = &wf_aligner->wf_components; - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - wavefront_penalties_t* const penalties = &wf_aligner->penalties; - alignment_form_t* const form = &wf_aligner->alignment_form; - // Consider ends-free - const int hi = (penalties->match==0) ? form->text_begin_free : 0; - const int lo = (penalties->match==0) ? -form->pattern_begin_free : 0; - // Compute dimensions - int effective_lo, effective_hi; - wavefront_compute_limits_output(wf_aligner,lo,hi,&effective_lo,&effective_hi); - // Initialize end2end (wavefront zero) - wf_components->mwavefronts[0] = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - wf_components->mwavefronts[0]->offsets[0] = 0; - wf_components->mwavefronts[0]->lo = lo; - wf_components->mwavefronts[0]->hi = hi; - // Store initial BT-piggypack element - if (wf_components->bt_piggyback) { - const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,0); - wf_components->mwavefronts[0]->bt_pcigar[0] = 0; - wf_components->mwavefronts[0]->bt_prev[0] = block_idx; - } - // Initialize ends-free - if (form->span == alignment_endsfree && penalties->match == 0) { - // Text begin-free - const int text_begin_free = form->text_begin_free; - int h; - for (h=1;h<=text_begin_free;++h) { - const int k = DPMATRIX_DIAGONAL(h,0); - wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(h,0); - if (wf_components->bt_piggyback) { - const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,0,h); - wf_components->mwavefronts[0]->bt_pcigar[k] = 0; - wf_components->mwavefronts[0]->bt_prev[k] = block_idx; - } - } - // Pattern begin-free - const int pattern_begin_free = form->pattern_begin_free; - int v; - for (v=1;v<=pattern_begin_free;++v) { - const int k = DPMATRIX_DIAGONAL(0,v); - wf_components->mwavefronts[0]->offsets[k] = DPMATRIX_OFFSET(0,v); - if (wf_components->bt_piggyback) { - const bt_block_idx_t block_idx = wf_backtrace_buffer_init_block(wf_components->bt_buffer,v,0); - wf_components->mwavefronts[0]->bt_pcigar[k] = 0; - wf_components->mwavefronts[0]->bt_prev[k] = block_idx; - } - } - } - // Nullify unused WFs - if (distance_metric <= gap_linear) return; - wf_components->d1wavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - if (distance_metric==gap_affine) return; - wf_components->d2wavefronts[0] = NULL; - wf_components->i2wavefronts[0] = NULL; -} -void wavefront_unialign_initialize_wavefronts( - wavefront_aligner_t* const wf_aligner, - const int pattern_length, - const int text_length) { - // Parameters - wavefront_slab_t* const wavefront_slab = wf_aligner->wavefront_slab; - wavefront_components_t* const wf_components = &wf_aligner->wf_components; - const distance_metric_t distance_metric = wf_aligner->penalties.distance_metric; - // Init wavefronts - if (wf_aligner->component_begin == affine2p_matrix_M) { - // Initialize - wavefront_unialign_initialize_wavefront_m(wf_aligner,pattern_length,text_length); - // Nullify unused WFs - if (distance_metric <= gap_linear) return; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = NULL; - if (distance_metric==gap_affine) return; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = NULL; - } else { - // Compute dimensions - int effective_lo, effective_hi; // Effective lo/hi - wavefront_compute_limits_output(wf_aligner,0,0,&effective_lo,&effective_hi); - wavefront_t* const wavefront = wavefront_slab_allocate(wavefront_slab,effective_lo,effective_hi); - // Initialize - switch (wf_aligner->component_begin) { - case affine2p_matrix_I1: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = wavefront; - wf_components->i1wavefronts[0]->offsets[0] = 0; - wf_components->i1wavefronts[0]->lo = 0; - wf_components->i1wavefronts[0]->hi = 0; - wf_components->d1wavefronts[0] = NULL; - // Nullify unused WFs - if (distance_metric==gap_affine) return; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_I2: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = NULL; - wf_components->i2wavefronts[0] = wavefront; - wf_components->i2wavefronts[0]->offsets[0] = 0; - wf_components->i2wavefronts[0]->lo = 0; - wf_components->i2wavefronts[0]->hi = 0; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_D1: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = wavefront; - wf_components->d1wavefronts[0]->offsets[0] = 0; - wf_components->d1wavefronts[0]->lo = 0; - wf_components->d1wavefronts[0]->hi = 0; - // Nullify unused WFs - if (distance_metric==gap_affine) return; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = NULL; - break; - case affine2p_matrix_D2: - wf_components->mwavefronts[0] = NULL; - wf_components->i1wavefronts[0] = NULL; - wf_components->d1wavefronts[0] = NULL; - wf_components->i2wavefronts[0] = NULL; - wf_components->d2wavefronts[0] = wavefront; - wf_components->d2wavefronts[0]->offsets[0] = 0; - wf_components->d2wavefronts[0]->lo = 0; - wf_components->d2wavefronts[0]->hi = 0; - break; - default: - break; - } - } -} void wavefront_unialign_init( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end) { // Parameters wavefront_align_status_t* const align_status = &wf_aligner->align_status; // Resize wavefront aligner - wavefront_unialign_resize(wf_aligner,pattern,pattern_lambda,pattern_length,text,text_lambda,text_length,false); + wavefront_unialign_resize(wf_aligner); // Configure WF-compute function switch (wf_aligner->penalties.distance_metric) { case indel: @@ -306,19 +130,18 @@ void wavefront_unialign_init( } // Configure WF-extend function const bool end2end = (wf_aligner->alignment_form.span == alignment_end2end); - if (wf_aligner->match_funct != NULL) { - align_status->wf_align_extend = &wavefront_extend_custom; - } else if (end2end) { + if (end2end) { align_status->wf_align_extend = &wavefront_extend_end2end; } else { align_status->wf_align_extend = &wavefront_extend_endsfree; } // Initialize wavefront + align_status->num_null_steps = 0; wf_aligner->alignment_end_pos.score = -1; // Not aligned wf_aligner->alignment_end_pos.k = DPMATRIX_DIAGONAL_NULL; wf_aligner->component_begin = component_begin; wf_aligner->component_end = component_end; - wavefront_unialign_initialize_wavefronts(wf_aligner,pattern_length,text_length); + wavefront_aligner_init_wf(wf_aligner); // Plot (WF_0) if (wf_aligner->plot != NULL) wavefront_plot(wf_aligner,0,0); } @@ -330,7 +153,7 @@ bool wavefront_unialign_reached_limits( const int score) { // Check alignment-score limit if (score >= wf_aligner->system.max_alignment_score) { - wf_aligner->cigar->score = wf_aligner->system.max_alignment_score; + wf_aligner->cigar->score = -wf_aligner->system.max_alignment_score; wf_aligner->align_status.status = WF_STATUS_MAX_SCORE_REACHED; wf_aligner->align_status.score = score; return true; // Stop @@ -378,8 +201,9 @@ void wavefront_unialign_terminate( wavefront_aligner_t* const wf_aligner, const int score) { // Parameters - const int pattern_length = wf_aligner->pattern_length; - const int text_length = wf_aligner->text_length; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Retrieve alignment if (wf_aligner->alignment_scope == compute_score) { cigar_clear(wf_aligner->cigar); @@ -429,7 +253,6 @@ int wavefront_unialign( void (*wf_align_compute)(wavefront_aligner_t* const,const int) = align_status->wf_align_compute; int (*wf_align_extend)(wavefront_aligner_t* const,const int) = align_status->wf_align_extend; // Compute wavefronts of increasing score - align_status->num_null_steps = 0; int score = align_status->score; while (true) { // Exact extend s-wavefront @@ -466,8 +289,11 @@ void wavefront_unialign_print_status( const int score) { // Parameters wavefront_components_t* const wf_components = &wf_aligner->wf_components; + wavefront_sequences_t* const sequences = &wf_aligner->sequences; + const int pattern_length = sequences->pattern_length; + const int text_length = sequences->text_length; // Approximate progress - const int dist_total = MAX(wf_aligner->text_length,wf_aligner->pattern_length); + const int dist_total = MAX(text_length,pattern_length); int s = (wf_components->memory_modular) ? score%wf_components->max_score_scope : score; wavefront_t* wavefront = wf_components->mwavefronts[s]; if (wavefront==NULL && s>0) { @@ -492,12 +318,16 @@ void wavefront_unialign_print_status( const float million_offsets = (wf_len>=0) ? (float)wf_len/1000000.0f : -1.0f; // Print one-line status fprintf(stream,"["); - wavefront_aligner_print_type(stream,wf_aligner); + wavefront_aligner_print_mode(stream,wf_aligner); fprintf(stream, "] SequenceLength=(%d,%d) Score %d (~ %2.3f%% aligned). " "MemoryUsed(WF-Slab,BT-buffer)=(%lu MB,%lu MB). " "Wavefronts ~ %2.3f Moffsets\n", - wf_aligner->pattern_length,wf_aligner->text_length,score,aligned_progress, - CONVERT_B_TO_MB(slab_size),CONVERT_B_TO_MB(bt_buffer_used),million_offsets); + pattern_length, + text_length, + score, + aligned_progress, + CONVERT_B_TO_MB(slab_size), + CONVERT_B_TO_MB(bt_buffer_used), + million_offsets); } - diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.h b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.h index e28a21cd..b039ee5c 100644 --- a/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.h +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wavefront_unialign.h @@ -31,37 +31,19 @@ #ifndef WAVEFRONT_UNIALIGN_H_ #define WAVEFRONT_UNIALIGN_H_ -#include "utils/commons.h" #include "wavefront_aligner.h" /* * Resize */ void wavefront_unialign_resize( - wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, - const bool reverse_sequences); + wavefront_aligner_t* const wf_aligner); /* * Initialize alignment */ -void wavefront_unialign_initialize_wavefronts( - wavefront_aligner_t* const wf_aligner, - const int pattern_length, - const int text_length); void wavefront_unialign_init( wavefront_aligner_t* const wf_aligner, - const char* const pattern, - const int* const pattern_lambda, - const int pattern_length, - const char* const text, - const int* const text_lambda, - const int text_length, const affine2p_matrix_type component_begin, const affine2p_matrix_type component_end); diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wfa.h b/src/common/wflign/deps/WFA2-lib/wavefront/wfa.h new file mode 100644 index 00000000..e29c0eb1 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wfa.h @@ -0,0 +1,235 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront aligner external C-ABI functions with accompanying data structures + */ + +#pragma once + +#include "system/mm_allocator.h" +#include "wavefront_slab.h" +#include "wavefront_penalties.h" +#include "wavefront_attributes.h" +#include "wavefront_components.h" +#include "wavefront_sequences.h" +#include "wavefront_bialigner.h" + +/* + * Error codes & messages + */ +// Success +#define WF_STATUS_SUCCESSFUL 0 +// Errors +#define WF_STATUS_UNFEASIBLE -1 +#define WF_STATUS_MAX_SCORE_REACHED -2 +#define WF_STATUS_OOM -3 +// Internal +#define WF_STATUS_END_REACHED 1 +// Error messages +extern char* wf_error_msg[5]; +char* wavefront_align_strerror(const int error_code); +char* wavefront_align_strerror_short(const int error_code); + +/* + * Alignment status + */ +typedef struct _wavefront_aligner_t wavefront_aligner_t; +typedef struct { + // Status + int status; // Status code + int score; // Current WF-alignment score + int num_null_steps; // Total contiguous null-steps performed + uint64_t memory_used; // Total memory used + // Wavefront alignment functions + void (*wf_align_compute)(wavefront_aligner_t* const,const int); // WF Compute function + int (*wf_align_extend)(wavefront_aligner_t* const,const int); // WF Extend function +} wavefront_align_status_t; + +/* + * Alignment type + */ +typedef enum { + wf_align_regular = 0, + wf_align_biwfa = 1, + wf_align_biwfa_breakpoint_forward = 2, + wf_align_biwfa_breakpoint_reverse = 3, + wf_align_biwfa_subsidiary = 4 +} wavefront_align_mode_t; + +/* + * Wavefront Aligner + */ +typedef struct _wavefront_aligner_t { + // Mode and status + wavefront_align_mode_t align_mode; // WFA alignment mode + char* align_mode_tag; // WFA mode tag + wavefront_align_status_t align_status; // Current alignment status + // Sequences + wavefront_sequences_t sequences; // Input sequences + // Alignment Attributes + alignment_scope_t alignment_scope; // Alignment scope (score only or full-CIGAR) + alignment_form_t alignment_form; // Alignment form (end-to-end/ends-free) + wavefront_penalties_t penalties; // Alignment penalties + wavefront_heuristic_t heuristic; // Heuristic's parameters + wavefront_memory_t memory_mode; // Wavefront memory strategy (modular wavefronts and piggyback) + // Wavefront components + wavefront_components_t wf_components; // Wavefront components + affine2p_matrix_type component_begin; // Alignment begin component + affine2p_matrix_type component_end; // Alignment end component + wavefront_pos_t alignment_end_pos; // Alignment end position + // Bidirectional Alignment + wavefront_bialigner_t* bialigner; // BiWFA aligner + // CIGAR + cigar_t* cigar; // Alignment CIGAR + // MM + bool mm_allocator_own; // Ownership of MM-Allocator + mm_allocator_t* mm_allocator; // MM-Allocator + wavefront_slab_t* wavefront_slab; // MM-Wavefront-Slab (Allocates/Reuses the individual wavefronts) + // Display + wavefront_plot_t* plot; // Wavefront plot + // System + alignment_system_t system; // System related parameters +} wavefront_aligner_t; + +/* + * Setup + */ +wavefront_aligner_t* wavefront_aligner_new( + wavefront_aligner_attr_t* attributes); +void wavefront_aligner_reap( + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_delete( + wavefront_aligner_t* const wf_aligner); + +/* + * Initialize wf-alignment conditions + */ +void wavefront_aligner_init_wf( + wavefront_aligner_t* const wf_aligner); + +/* + * Span configuration + */ +void wavefront_aligner_set_alignment_end_to_end( + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_set_alignment_free_ends( + wavefront_aligner_t* const wf_aligner, + const int pattern_begin_free, + const int pattern_end_free, + const int text_begin_free, + const int text_end_free); + +/* + * Heuristic configuration + */ +void wavefront_aligner_set_heuristic_none( + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_set_heuristic_wfadaptive( + wavefront_aligner_t* const wf_aligner, + const int min_wavefront_length, + const int max_distance_threshold, + const int score_steps); +void wavefront_aligner_set_heuristic_wfmash( + wavefront_aligner_t* const wf_aligner, + const int min_wavefront_length, + const int max_distance_threshold, + const int score_steps); +void wavefront_aligner_set_heuristic_xdrop( + wavefront_aligner_t* const wf_aligner, + const int xdrop, + const int score_steps); +void wavefront_aligner_set_heuristic_zdrop( + wavefront_aligner_t* const wf_aligner, + const int ydrop, + const int score_steps); +void wavefront_aligner_set_heuristic_banded_static( + wavefront_aligner_t* const wf_aligner, + const int band_min_k, + const int band_max_k); +void wavefront_aligner_set_heuristic_banded_adaptive( + wavefront_aligner_t* const wf_aligner, + const int band_min_k, + const int band_max_k, + const int score_steps); + +/* + * System configuration + */ +void wavefront_aligner_set_max_alignment_score( + wavefront_aligner_t* const wf_aligner, + const int max_alignment_score); +void wavefront_aligner_set_max_memory( + wavefront_aligner_t* const wf_aligner, + const uint64_t max_memory_resident, + const uint64_t max_memory_abort); +void wavefront_aligner_set_max_num_threads( + wavefront_aligner_t* const wf_aligner, + const int max_num_threads); +void wavefront_aligner_set_min_offsets_per_thread( + wavefront_aligner_t* const wf_aligner, + const int min_offsets_per_thread); +/* + * Utils + */ +uint64_t wavefront_aligner_get_size( + wavefront_aligner_t* const wf_aligner); + +/* + * Display + */ +void wavefront_aligner_print_mode( + FILE* const stream, + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_print_scope( + FILE* const stream, + wavefront_aligner_t* const wf_aligner); +void wavefront_aligner_print_conf( + FILE* const stream, + wavefront_aligner_t* const wf_aligner); + +/* + * Wavefront Align + */ +int wavefront_align( + wavefront_aligner_t* const wf_aligner, + const char* const pattern, + const int pattern_length, + const char* const text, + const int text_length); +int wavefront_align_lambda( + wavefront_aligner_t* const wf_aligner, + alignment_match_funct_t const match_funct, + void* match_funct_arguments, + const int pattern_length, + const int text_length); +int wavefront_align_packed2bits( + wavefront_aligner_t* const wf_aligner, + const uint8_t* const pattern, + const int pattern_length, + const uint8_t* const text, + const int text_length); diff --git a/src/common/wflign/deps/WFA2-lib/wavefront/wfa.hpp b/src/common/wflign/deps/WFA2-lib/wavefront/wfa.hpp new file mode 100644 index 00000000..7908b974 --- /dev/null +++ b/src/common/wflign/deps/WFA2-lib/wavefront/wfa.hpp @@ -0,0 +1,36 @@ +/* + * The MIT License + * + * Wavefront Alignment Algorithms + * Copyright (c) 2017 by Santiago Marco-Sola + * + * This file is part of Wavefront Alignment Algorithms. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * PROJECT: Wavefront Alignment Algorithms + * AUTHOR(S): Santiago Marco-Sola + * DESCRIPTION: WaveFront aligner external C-ABI functions with accompanying data structures + */ + +#pragma once + +extern "C" { + #include "wfa.h" +} diff --git a/src/common/wflign/src/wflign.cpp b/src/common/wflign/src/wflign.cpp index 2d6d0502..0ad10868 100644 --- a/src/common/wflign/src/wflign.cpp +++ b/src/common/wflign/src/wflign.cpp @@ -604,10 +604,11 @@ void WFlign::wflign_affine_wavefront( extend_data.emit_png = !prefix_wavefront_plot_in_png->empty() && wfplot_max_size > 0; extend_data.high_order_dp_matrix_mismatch = &high_order_dp_matrix_mismatch; #endif - wflambda_aligner->setMatchFunct(wflambda_extend_match,(void*)&extend_data); // Align - wflambda_aligner->alignEnd2EndLambda(pattern_length,text_length); + wflambda_aligner->alignEnd2EndLambda( + wflambda_extend_match,(void*)&extend_data, + pattern_length,text_length); // Extract the trace if (wflambda_aligner->getAlignmentStatus() == WF_STATUS_SUCCESSFUL) {