From bec861285d7c560af59913f3ce88d4770988d1f1 Mon Sep 17 00:00:00 2001 From: hisakatha Date: Sat, 7 Aug 2021 09:30:22 +0900 Subject: [PATCH] Add makewindows initial window/step feature Add a feature to makewindows that the initial window size and the initial step size can be specified. Now makewindows can treat the first window specially, which is useful, for example, for tiling chromosomes by windows representing rounded coordinates. For example: chr1 0 500 chr1 500 1500 chr1 1500 2500 ... --- src/windowMaker/windowMaker.cpp | 34 ++++++++--- src/windowMaker/windowMaker.h | 4 +- src/windowMaker/windowMakerMain.cpp | 57 +++++++++++++++++- test/makewindows/test-makewindows.sh | 86 ++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+), 10 deletions(-) diff --git a/src/windowMaker/windowMaker.cpp b/src/windowMaker/windowMaker.cpp index c4b07e2d1..05350e635 100644 --- a/src/windowMaker/windowMaker.cpp +++ b/src/windowMaker/windowMaker.cpp @@ -11,13 +11,15 @@ Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include "windowMaker.h" -WindowMaker::WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step, bool reverse) +WindowMaker::WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step, uint32_t initialSize, uint32_t initialStep, bool reverse) : _size(size) , _step(step) , _count(0) , _reverse(reverse) , _window_method(FIXED_WINDOW_SIZE) , _id_method(id_method) +, _initialSize(initialSize) +, _initialStep(initialStep) { if (input_file_type==GENOME_FILE) MakeWindowsFromGenome(fileName); @@ -32,6 +34,8 @@ WindowMaker::WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE , _reverse(reverse) , _window_method(FIXED_WINDOW_COUNT) , _id_method(id_method) + , _initialSize(0) + , _initialStep(0) { if (input_file_type==GENOME_FILE) MakeWindowsFromGenome(fileName); @@ -79,24 +83,38 @@ void WindowMaker::MakeBEDWindow(const BED& interval) } uint32_t WindowMaker::CalculateWindows(const BED& interval) { - uint32_t num_windows = (interval.end - interval.start) / _step; - if ((interval.end - interval.start) % _step > 0) { - // add 1 to num_windows if the last window is less than _step - num_windows += 1; + uint32_t num_windows = 0; + if (interval.end - interval.start > 0) { + num_windows = 1; + } + CHRPOS rest_interval = interval.end - interval.start - (CHRPOS)_initialStep; + // need to avoid adding a negative number, which may result in a huge unsigned positive number + if (rest_interval > 0) { + num_windows += rest_interval / _step; + if (rest_interval % _step > 0) { + // add 1 to num_windows if the last window is less than _step + num_windows += 1; + } } return num_windows; } + void WindowMaker::MakeFixedSizeWindow(const BED& interval) { uint32_t i=1; + uint32_t current_size = _initialSize; + uint32_t current_step = _initialStep; uint32_t num_windows = CalculateWindows(interval); - for (uint32_t start = interval.start; start <= interval.end; start += _step, ++i) { + for (uint32_t start = interval.start; start <= interval.end; ++i) { string name = GenerateID(interval, i, num_windows, _reverse); - if ((start + _size) <= interval.end) { - cout << interval.chrom << "\t" << start << "\t" << start + _size << name << endl; + if ((start + current_size) <= interval.end) { + cout << interval.chrom << "\t" << start << "\t" << start + current_size << name << endl; } else if (start < interval.end) { cout << interval.chrom << "\t" << start << "\t" << interval.end << name << endl; } + start += current_step; + current_step = _step; // for i >= 2 + current_size = _size; // for i >= 2 } } diff --git a/src/windowMaker/windowMaker.h b/src/windowMaker/windowMaker.h index c823958c7..c48bf5a26 100644 --- a/src/windowMaker/windowMaker.h +++ b/src/windowMaker/windowMaker.h @@ -38,7 +38,7 @@ class WindowMaker { // constructor WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t count, bool reverse); - WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step, bool reverse); + WindowMaker(string &fileName, ID_METHOD id_method, INPUT_FILE_TYPE input_file_type, uint32_t size, uint32_t step, uint32_t initialSize, uint32_t initialStep, bool reverse); // destructor ~WindowMaker(void); @@ -53,6 +53,8 @@ class WindowMaker { bool _reverse; // should window numbering be reversed? WINDOW_METHOD _window_method; ID_METHOD _id_method; + uint32_t _initialSize; + uint32_t _initialStep; void MakeBEDWindow(const BED& interval); diff --git a/src/windowMaker/windowMakerMain.cpp b/src/windowMaker/windowMakerMain.cpp index c388efd95..264fe5451 100644 --- a/src/windowMaker/windowMakerMain.cpp +++ b/src/windowMaker/windowMakerMain.cpp @@ -38,6 +38,8 @@ int windowmaker_main(int argc, char* argv[]) { uint32_t size = 0; uint32_t step = 0; uint32_t count = 0; + uint32_t initialSize = 0; + uint32_t initialStep = 0; bool haveGenome = false; bool haveBed = false; @@ -45,6 +47,8 @@ int windowmaker_main(int argc, char* argv[]) { bool haveSize = false; bool haveCount = false; bool reverse = false; + bool haveInitialSize = false; + bool haveInitialStep = false; for(int i = 1; i < argc; i++) { int parameterLength = (int)strlen(argv[i]); @@ -117,6 +121,30 @@ int windowmaker_main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-reverse", 8, parameterLength)) { reverse = true; } + else if(PARAMETER_CHECK("-iw", 3, parameterLength)) { + if ((i+1) < argc) { + haveInitialSize = true; + int tmp = atoi(argv[i + 1]); + if (tmp <= 0) { + cerr << endl << "*****" << endl << "*****ERROR: The initial window size (-iw) option must be greater than zero." << endl << "*****" << endl; + showHelp = true; + } + initialSize = tmp; + i++; + } + } + else if(PARAMETER_CHECK("-is", 3, parameterLength)) { + if ((i+1) < argc) { + haveInitialStep = true; + int tmp = atoi(argv[i + 1]); + if (tmp <= 0) { + cerr << endl << "*****" << endl << "*****ERROR: The initial step size (-is) option must be greater than zero." << endl << "*****" << endl; + showHelp = true; + } + initialStep = tmp; + i++; + } + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; @@ -151,6 +179,24 @@ int windowmaker_main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: The step (-s) option must be greater than zero. " << endl << "*****" << endl; showHelp = true; } + if (haveInitialSize && !haveSize) { + // imply that initialSize cannot be combined with count + cerr << endl << "*****" << endl << "*****ERROR: Need -w (window size) to use -iw (initial window size)." << endl << "*****" << endl; + showHelp = true; + } + if (haveInitialStep && !haveStep) { + // imply that initialStep cannot be combined with count + cerr << endl << "*****" << endl << "*****ERROR: Need -s (step size) to use -is (initial step size)." << endl << "*****" << endl; + showHelp = true; + } + if (!haveInitialSize) { + initialSize = size; + if (!haveInitialStep) { + initialStep = step; + } + } else if (!haveInitialStep) { + initialStep = initialSize; + } if (!showHelp) { WindowMaker *wm = NULL; @@ -159,7 +205,7 @@ int windowmaker_main(int argc, char* argv[]) { inputFileType, count, reverse); if (haveSize) wm = new WindowMaker(inputFile, idMethod, - inputFileType, size, step, reverse); + inputFileType, size, step, initialSize, initialStep, reverse); delete wm; } else { @@ -199,6 +245,15 @@ void windowmaker_help(void) { cerr << "\t\tcreating a new window. Used to create \"sliding\" windows." << endl; cerr << "\t\t- Defaults to window size (non-sliding windows)." << endl << endl; + cerr << "\t-iw " << endl; + cerr << "\t\tWindow size for the first window. Cannot be used without -w " << endl; + cerr << "\t\t- Defaults to window size (i.e., the first window is treated equally)." << endl << endl; + + cerr << "\t-is " << endl; + cerr << "\t\tStep size for the first window. Cannot be used without -s " << endl; + cerr << "\t\t- Defaults to if -iw is present." << endl; + cerr << "\t\t Otherwise, defaults to step size." << endl << endl; + cerr << "\t-n " << endl; cerr << "\t\tDivide each input interval (either a chromosome or a BED interval)" << endl; cerr << "\t\tto fixed number of windows (i.e. same number of windows, with" << endl; diff --git a/test/makewindows/test-makewindows.sh b/test/makewindows/test-makewindows.sh index 730a36780..f5d798a30 100644 --- a/test/makewindows/test-makewindows.sh +++ b/test/makewindows/test-makewindows.sh @@ -178,4 +178,90 @@ echo \ $BT makewindows -b a.19bp.bed -n 10 -i srcwinnum 2> obs check obs exp rm obs exp + +########################################################### +# Test window + initial window size +########################################################### +echo -e " makewindows.t10...\c" +echo \ +"chr5 60000 62000 3 +chr5 62000 66000 2 +chr5 66000 70000 1 +chr5 73000 75000 5 +chr5 75000 79000 4 +chr5 79000 83000 3 +chr5 83000 87000 2 +chr5 87000 90000 1 +chr5 100000 101000 1" > exp +$BT makewindows -b input.bed -w 4000 -iw 2000 -i winnum -reverse > obs +check obs exp +rm obs exp + +########################################################### +# Test window + step + initial step size +########################################################### +echo -e " makewindows.t11...\c" +echo \ +"chr5 60000 64000 3 +chr5 62000 66000 2 +chr5 66000 70000 1 +chr5 73000 77000 5 +chr5 75000 79000 4 +chr5 79000 83000 3 +chr5 83000 87000 2 +chr5 87000 90000 1 +chr5 100000 101000 1" > exp +$BT makewindows -b input.bed -w 4000 -s 4000 -is 2000 -i winnum -reverse > obs +check obs exp +rm obs exp + +########################################################### +# Test window + initial window + step + initial step size +########################################################### +echo -e " makewindows.t12...\c" +echo \ +"chr5 60000 62000 3 +chr5 63000 67000 2 +chr5 68000 70000 1 +chr5 73000 75000 4 +chr5 76000 80000 3 +chr5 81000 85000 2 +chr5 86000 90000 1 +chr5 100000 101000 1" > exp +$BT makewindows -b input.bed -w 4000 -iw 2000 -s 5000 -is 3000 -i winnum -reverse > obs +check obs exp +rm obs exp + +########################################################### +# Test window + initial window + step + initial step (forward numbering) +########################################################### +echo -e " makewindows.t13...\c" +echo \ +"chr5 60000 62000 1 +chr5 63000 67000 2 +chr5 68000 70000 3 +chr5 73000 75000 1 +chr5 76000 80000 2 +chr5 81000 85000 3 +chr5 86000 90000 4 +chr5 100000 101000 1" > exp +$BT makewindows -b input.bed -w 4000 -iw 2000 -s 5000 -is 3000 -i winnum > obs +check obs exp +rm obs exp + +########################################################### +# Test window + initial window + step + initial step (large initial sizes) +########################################################### +echo -e " makewindows.t14...\c" +echo \ +"chr5 60000 68000 1 +chr5 73000 81000 4 +chr5 83000 85000 3 +chr5 86000 88000 2 +chr5 89000 90000 1 +chr5 100000 101000 1" > exp +$BT makewindows -b input.bed -w 2000 -iw 8000 -s 3000 -is 10000 -i winnum -reverse > obs +check obs exp +rm obs exp + [[ $FAILURES -eq 0 ]] || exit 1;