-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbedbin.py
executable file
·73 lines (61 loc) · 2.14 KB
/
bedbin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#@Date: 2019-12-04 16:33:42
#@Author: runsheng, [email protected]
from __future__ import print_function
# get a script to make the bin sized histogram
# prepare a bed file like:
#
# I\t\t1000\n
# I\t1000\t2000\n
# to use as the bed.a
# and use the bedtools coverage -a bed.a -b bam to get the interval wig file
# example, the length of C. elegans genome
"""
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
"""
def get_chrlist(chrsize_txt):
with open(chrsize_txt, "r") as f:
chr_list=[]
for line in f.readlines():
chro=line.split("\t")[0]
length=int(line.split("\t")[1])
chr_list.append((chro,length))
return chr_list
def bed_generate(chr_list,binsize=50, step=0):
"""
generate a bin file in bed format using bin size and steps
still using 0 based [start end) coding for the output
"""
for line in chr_list:
chro,length=line
### for oen chro
for i in range(0,length/binsize+1):
start=(i)*binsize
end=(i+1)*binsize
if end <= length:
print("{chro}\t{start}\t{end}".format(chro=chro, start=start, end=end) )
if step >0:
print("{chro}\t{start}\t{end}".format(chro=chro, start=start+step, end=end+step) )
else:
end=length
print("{chro}\t{start}\t{end}\n".format(chro=chro, start=start, end=end) )
return 0
if __name__=="__main__":
import argparse
parser=argparse.ArgumentParser()
parser.add_argument("-f", "--fa_size",
help="the size file for the fasta")
parser.add_argument("-b", "--bin", default=50,
help="the bin size used")
parser.add_argument("-s", "--step", default=0,
help="the bin size used")
args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
# make a file using the functions
chr_list=get_chrlist(chrsize_txt=args.fa_size)
bed_generate(chr_list=chr_list, binsize=int(args.bin), step=int(args.step))