-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5143629
commit 4fdd994
Showing
11 changed files
with
103 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,8 @@ | |
import datetime | ||
|
||
from math import fsum | ||
|
||
|
||
def average(x): | ||
return fsum(x)/float(len(x)) if x else 0 | ||
# | ||
|
@@ -91,6 +93,13 @@ def ATCGmapStatCov (fn, filetype = 'png', prefix = '', fW=8.0, fH=1.0): | |
GlobalCov = (float(sum([i*j for i,j in CovDict.items()]))/sum([i for i in CovDict.values()])) | ||
print("OverAllCov\tglobal\t%.4f" % GlobalCov ) | ||
ChrCov = [0] * len(ChrLst) | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
for chr_idx in xrange(len(ChrLst)): | ||
ChrCov[chr_idx] = float(sum([i * j for i, j in ChrCovDict[chr_idx].items()])) / sum([i for i in ChrCovDict[chr_idx].values()]) | ||
print("OverAllCov\t%s\t%.4f" % (ChrLst[chr_idx], ChrCov[chr_idx] ) ) | ||
|
@@ -142,7 +151,7 @@ def main(): | |
" (aka ATCGmapStatCov)\n" \ | ||
"Description: Get the distribution of overall coverages.\n" \ | ||
"Contact: Guo, Weilong; [email protected];\n" \ | ||
"Last Update: 2018-01-02\n" \ | ||
"Last Update: 2018-05-02\n" \ | ||
"Output Ex:\n" \ | ||
" OverAllCov global 47.0395\n" \ | ||
" OverAllCov chr1 45.3157\n" \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,13 +124,16 @@ def main(): | |
" (aka BismarkToCGmap.py)\n" \ | ||
"Description: Generate bismark output file to CGmap.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last Update: 2018-01-02" | ||
"Last Update: 2018-12-14\n" \ | ||
"Example of input file (generated by bismark):\n" \ | ||
"chr10 3107007 - 6 3 CG CGT\n" \ | ||
"chr10 3107024 - 9 3 CHH CTT\n" | ||
# | ||
parser = OptionParser(usage) | ||
# | ||
parser.add_option("-i", dest="Bismark", default=None, help="The output file of Bismark" | ||
"Input file name end with .gz is considered as compressed, " | ||
"use STDIN when not specified", metavar="FILE") | ||
"use STDIN when not specified.", metavar="FILE") | ||
parser.add_option("-o", "--CGmap", dest="CGmap", default=None, help="Output in CGmap format, " | ||
"use STDOUT if omitted (gzipped if end with \'.gz\')", metavar="FILE") | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,6 +78,13 @@ def CGmapFillIndex(index, CGmap_list, tag_list, min_cov=1, max_cov=200) : | |
if index : | ||
INDEX.close() | ||
# | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
# Read all the CGmap files | ||
for i in xrange(N) : | ||
fn = CGmap_lst[i] | ||
|
@@ -122,7 +129,7 @@ def main(): | |
" (aka CGmapFillIndex)\n" \ | ||
"Description: Fill methylation levels according to the Index file for CGmap files in list.\n" \ | ||
"Contact: Guo, Weilong; [email protected];\n" \ | ||
"Last Updated: 2018-01-02\n" \ | ||
"Last Updated: 2018-05-02\n" \ | ||
"Index format Ex:\n" \ | ||
" chr10 100005504\n" \ | ||
"Output format Ex:\n" \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -142,6 +142,13 @@ def CGmapStatCov (fn, CTX, filetype = 'png', prefix = '', fH=1.0, fW=11.0): | |
GlobalCovSum = 0 | ||
print("MethEffectCov\tglobal\tNaN") | ||
# | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
ChrCov = [0] * len(ChrLst) | ||
for chr_idx in xrange(len(ChrLst)): | ||
ChrCov[chr_idx] = float(sum([i * j for i, j in ChrCovDict[chr_idx].items()])) / sum([i for i in ChrCovDict[chr_idx].values()]) | ||
|
@@ -186,8 +193,14 @@ def CGmapStatCov (fn, CTX, filetype = 'png', prefix = '', fH=1.0, fW=11.0): | |
plt.title('Cumulative Effective Coverage', fontsize=10) | ||
if prefix != "" : | ||
prefix = prefix + "." | ||
# | ||
plt.savefig(prefix + "MethEffectCove." + filetype, format=filetype) | ||
plt.clf() | ||
# === | ||
|
||
# === | ||
# | ||
# | ||
|
||
from optparse import OptionParser | ||
|
||
|
@@ -197,7 +210,7 @@ def main(): | |
" (aka CGmapStatCov)\n" \ | ||
"Description: Get the distribution of methylation-effective coverages.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last Update: 2018-01-02\n" \ | ||
"Last Update: 2018-05-02\n" \ | ||
"Output Ex:\n" \ | ||
" MethEffectCove global 47.0395\n" \ | ||
" MethEffectCove chr1 45.3157\n" \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,13 @@ def CGmapStatMeth (fn, coverage = 10, filetype = "png", prefix = "", title="", f | |
sum_mC_byChr = {} | ||
count_mC_byChr = {} | ||
NQuant = 5 | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
quant_mC = [ [0]*(NQuant+1) for i in xrange(len(context_lst)) ] | ||
# | ||
# ================== | ||
|
@@ -253,7 +260,7 @@ def main(): | |
" (aka CGmapStatMeth)\n" \ | ||
"Description: Generate the bulk methylation.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last Update: 2018-01-02\n" \ | ||
"Last Update: 2018-05-02\n" \ | ||
"Output Ex:\n" \ | ||
" MethStat context C CG CHG CHH CA CC CT CH CW\n" \ | ||
" mean_mC global 0.0798 0.3719 0.0465 0.0403 0.0891 0.0071 0.0241 0.0419 0.0559\n" \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ def Get_key (str) : | |
chr = int(match.group(1)) | ||
else : | ||
chr = str | ||
# | ||
return [chr, str] | ||
#re.match(r"^chr(\d+)", "chr019_random").group(1) | ||
# | ||
|
@@ -63,6 +64,7 @@ def CGmapToRegion (CGmap_fn, region_fn): | |
REGION = gzip.open(region_fn, 'rb') | ||
else : | ||
REGION = open(region_fn, 'r') | ||
# | ||
except IOError: | ||
print ("\n[Error]:\n\t File cannot be open: %s" % region_fn) | ||
exit(-1) | ||
|
@@ -89,6 +91,7 @@ def CGmapToRegion (CGmap_fn, region_fn): | |
except ValueError : | ||
print("\n[Error]:\n\t File [ %s ] may have wrong number of columns." % CGmap_fn) | ||
exit(-1) | ||
# | ||
try : | ||
chr_r, pos_r1, pos_r2 = line_r.strip().split()[0:3] | ||
except ValueError : | ||
|
@@ -135,15 +138,20 @@ def CGmapToRegion (CGmap_fn, region_fn): | |
count = count + 1 | ||
NmC = NmC + int(NmC_c) | ||
NC = NC + int(NC_c) | ||
# | ||
line_c = CGMAP.readline() | ||
# | ||
# | ||
# End for reading files | ||
# | ||
if count > 0 : | ||
print ("%s\t%s\t%s\t%.2f\t%d\t%.2f\t%d" % (chr_r, pos_r1, pos_r2, mC/count, count, float(NmC)/NC, NC) ) | ||
else : | ||
print ("%s\t%s\t%s\tNA\t%d\tNA\t%d" % (chr_r, pos_r1, pos_r2, count, NC) ) | ||
while line_r : | ||
if count > 0 : | ||
print ("%s\t%s\t%s\t%.2f\t%d\t%.2f\t%d" % (chr_r, pos_r1, pos_r2, mC/count, count, float(NmC)/NC, NC) ) | ||
else : | ||
print ("%s\t%s\t%s\tNA\t%d\tNA\t%d" % (chr_r, pos_r1, pos_r2, count, NC) ) | ||
# | ||
# do the next | ||
line_r = REGION.readline() | ||
# | ||
REGION.close() | ||
if CGMAP is not sys.stdin: | ||
|
@@ -161,7 +169,7 @@ def main(): | |
" (aka CGmapToRegion)\n" \ | ||
"Description: Calculated the methylation levels in regions in two ways.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last Update: 2018-01-02\n" \ | ||
"Last Update: 2018-07-05\n" \ | ||
"Format of Region file:\n" \ | ||
" #chr start_pos end_pos\n" \ | ||
" chr1 8275 8429\n" \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,6 +60,13 @@ def CGmapMethylInBins (fn_lst_str, tag_lst_str, coverage, coverageXY, step, CTX | |
print("[Error]: tag list and file list are not same in lengths.") | ||
exit(-1) | ||
# | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
for fn_id in xrange( len(fn_lst) ) : | ||
fn = fn_lst[fn_id] | ||
try: | ||
|
@@ -192,7 +199,7 @@ def main(): | |
" (aka CGmapsMethInBins)\n" \ | ||
"Description: Generate the methylation in Bins.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last Update: 2018-01-02\n" \ | ||
"Last Update: 2018-05-02\n" \ | ||
"Output Ex:\n" \ | ||
" chr1 1 5000 0.0000\n" \ | ||
" chr1 5001 10000 0.0396\n" \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,7 +42,7 @@ def main(): | |
" (aka FragRegFromBED)\n" \ | ||
"Description: Generate fragmented regions from BED file.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last Update: 2018-01-02\n" \ | ||
"Last Update: 2018-05-02\n" \ | ||
" Split input region into N bins, get fragments from 5' end and 3' end.\n" \ | ||
"Input Ex:\n" \ | ||
" chr1 1000 2000 +\n"\ | ||
|
@@ -88,6 +88,13 @@ def main(): | |
sys.stdout = open(options.outfile, 'w') | ||
# | ||
# | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
# FiveMerEND | ||
if options.FiveMerEnd == "" : | ||
FiveMerLst=[] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,12 @@ def qbinom(p, size, prob, lower_tail = True): | |
'''Binomial quantiles''' | ||
if not lower_tail : | ||
prob = 1 - prob | ||
# # | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
sum = 0 | ||
for i in xrange(size) : | ||
|
@@ -361,6 +367,13 @@ def PredictNT_bayes ( W_A, W_T, W_C, W_G, C_A, C_T, C_C, C_G, nuc="N" ): | |
#global p_value | ||
#global error_rate | ||
#global dynamicP | ||
# | ||
# works for python2 and python3 | ||
try: | ||
xrange | ||
except NameError: | ||
xrange = range | ||
# | ||
global bayes_options | ||
p_value = bayes_options['pv'] | ||
error_rate = bayes_options['er'] | ||
|
@@ -684,7 +697,7 @@ def main(): | |
" (aka SNVFromATCGmap)\n" \ | ||
"Description: Predict the SNV from ATCGmap file.\n" \ | ||
"Contact: Guo, Weilong; [email protected]\n" \ | ||
"Last update: 2018-01-02\n" \ | ||
"Last update: 2018-05-02\n" \ | ||
"Output format example:\n" \ | ||
" #chr nuc pos ATCG_watson ATCG_crick predicted_nuc p_value\n" \ | ||
" chr1 G 4752 17,0,0,69 0,0,0,0 A,G 9.3e-07\n" \ | ||
|