Skip to content

Commit

Permalink
v0.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
guoweilong committed Dec 14, 2018
1 parent 5143629 commit 4fdd994
Show file tree
Hide file tree
Showing 11 changed files with 103 additions and 17 deletions.
3 changes: 2 additions & 1 deletion cgmaptools
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ import subprocess
argv_len = len(sys.argv)

def PrintVersion() :
print("Version: 0.1.1")
print("Version: 0.1.2")
print("Updated on: Dec. 14th, 2018")
#

if (argv_len) == 1 or sys.argv[1] in ["-h", "-H", "--help"]:
Expand Down
11 changes: 10 additions & 1 deletion src/ATCGmapStatCov.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import datetime

from math import fsum


def average(x):
return fsum(x)/float(len(x)) if x else 0
#
Expand Down Expand Up @@ -91,6 +93,13 @@ def ATCGmapStatCov (fn, filetype = 'png', prefix = '', fW=8.0, fH=1.0):
GlobalCov = (float(sum([i*j for i,j in CovDict.items()]))/sum([i for i in CovDict.values()]))
print("OverAllCov\tglobal\t%.4f" % GlobalCov )
ChrCov = [0] * len(ChrLst)
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
for chr_idx in xrange(len(ChrLst)):
ChrCov[chr_idx] = float(sum([i * j for i, j in ChrCovDict[chr_idx].items()])) / sum([i for i in ChrCovDict[chr_idx].values()])
print("OverAllCov\t%s\t%.4f" % (ChrLst[chr_idx], ChrCov[chr_idx] ) )
Expand Down Expand Up @@ -142,7 +151,7 @@ def main():
" (aka ATCGmapStatCov)\n" \
"Description: Get the distribution of overall coverages.\n" \
"Contact: Guo, Weilong; [email protected];\n" \
"Last Update: 2018-01-02\n" \
"Last Update: 2018-05-02\n" \
"Output Ex:\n" \
" OverAllCov global 47.0395\n" \
" OverAllCov chr1 45.3157\n" \
Expand Down
7 changes: 5 additions & 2 deletions src/BismarkToCGmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,16 @@ def main():
" (aka BismarkToCGmap.py)\n" \
"Description: Generate bismark output file to CGmap.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last Update: 2018-01-02"
"Last Update: 2018-12-14\n" \
"Example of input file (generated by bismark):\n" \
"chr10 3107007 - 6 3 CG CGT\n" \
"chr10 3107024 - 9 3 CHH CTT\n"
#
parser = OptionParser(usage)
#
parser.add_option("-i", dest="Bismark", default=None, help="The output file of Bismark"
"Input file name end with .gz is considered as compressed, "
"use STDIN when not specified", metavar="FILE")
"use STDIN when not specified.", metavar="FILE")
parser.add_option("-o", "--CGmap", dest="CGmap", default=None, help="Output in CGmap format, "
"use STDOUT if omitted (gzipped if end with \'.gz\')", metavar="FILE")
#
Expand Down
9 changes: 8 additions & 1 deletion src/CGmapFillIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def CGmapFillIndex(index, CGmap_list, tag_list, min_cov=1, max_cov=200) :
if index :
INDEX.close()
#
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
# Read all the CGmap files
for i in xrange(N) :
fn = CGmap_lst[i]
Expand Down Expand Up @@ -122,7 +129,7 @@ def main():
" (aka CGmapFillIndex)\n" \
"Description: Fill methylation levels according to the Index file for CGmap files in list.\n" \
"Contact: Guo, Weilong; [email protected];\n" \
"Last Updated: 2018-01-02\n" \
"Last Updated: 2018-05-02\n" \
"Index format Ex:\n" \
" chr10 100005504\n" \
"Output format Ex:\n" \
Expand Down
15 changes: 13 additions & 2 deletions src/CGmapFromBAM.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,19 +491,27 @@ void load_chromosome(int tid, infoholder_t *info){
if (info->current_chrom_seq != NULL) {
free(info->current_chrom_seq);
}
//printf("DEBUG | load_chr 1\n");
char genome_filename[MAX_PATH] = {0};
if (is_dir(info->genome_filename)) {
sprintf(genome_filename,"%s/%s.fa", info->genome_filename, chrom_name);
} else {
strcpy(genome_filename, info->genome_filename);
}
faidx_t *genome_idx = fai_load(genome_filename);

//printf("DEBUG | load_chr 2\n");
if (genome_idx == NULL) {
fprintf(stderr, "Fail to open index for genome file: %s\n", genome_filename);
exit(1);
}
//printf("DEBUG | load_chr 2.5\n");
//printf("DEBUG | %s | %s | %s \n", genome_idx, chrom_name, info->current_chrom_length );
info->current_chrom_seq = fai_fetch(genome_idx, chrom_name, &info->current_chrom_length);
//printf("DEBUG | load_chr 2.6\n");
info->current_tid = tid;

//printf("DEBUG | load_chr 3\n");
if (info->current_chrom_seq == NULL) {
fprintf(stderr, "Failed to load chromosome: %s\n", chrom_name);
exit(1);
Expand Down Expand Up @@ -688,7 +696,7 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
//printf("%s\n", aln_qname);
//printf("DEBUG 1\n");
if ( dict_get(Dict, aln_qname) == NULL ) {
//printf("DEBUG | First: %s\n", aln_qname);
//printf("DEBUG | First: %s\n", aln_qname);
dict_set(Dict, aln_qname, NULL);
// Redudent region A(1)
if (!pl->indel){
Expand Down Expand Up @@ -741,7 +749,7 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
char nuc;
context_calling(info->current_chrom_seq, info->current_chrom_length, pos,
&nuc, &context, subcontext);
// printf("DEBUG | pileup_func: after <context_call>\n");
// printf("DEBUG | pileup_func: after <context_call>\n");
double meth_level = 0;
int meth_level_is_available = 0;
int meth_cytosines = 0;
Expand Down Expand Up @@ -938,6 +946,9 @@ int main(int argc, char **argv){
$ ../bin/CGmapFromBAM
open: No such file or directory
Fail to open BAM file
$ potential bugs
If the input bam names is not in list of fasta genome
related with fai_fetch function
*/


Expand Down
15 changes: 14 additions & 1 deletion src/CGmapStatCov.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ def CGmapStatCov (fn, CTX, filetype = 'png', prefix = '', fH=1.0, fW=11.0):
GlobalCovSum = 0
print("MethEffectCov\tglobal\tNaN")
#
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
ChrCov = [0] * len(ChrLst)
for chr_idx in xrange(len(ChrLst)):
ChrCov[chr_idx] = float(sum([i * j for i, j in ChrCovDict[chr_idx].items()])) / sum([i for i in ChrCovDict[chr_idx].values()])
Expand Down Expand Up @@ -186,8 +193,14 @@ def CGmapStatCov (fn, CTX, filetype = 'png', prefix = '', fH=1.0, fW=11.0):
plt.title('Cumulative Effective Coverage', fontsize=10)
if prefix != "" :
prefix = prefix + "."
#
plt.savefig(prefix + "MethEffectCove." + filetype, format=filetype)
plt.clf()
# ===

# ===
#
#

from optparse import OptionParser

Expand All @@ -197,7 +210,7 @@ def main():
" (aka CGmapStatCov)\n" \
"Description: Get the distribution of methylation-effective coverages.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last Update: 2018-01-02\n" \
"Last Update: 2018-05-02\n" \
"Output Ex:\n" \
" MethEffectCove global 47.0395\n" \
" MethEffectCove chr1 45.3157\n" \
Expand Down
9 changes: 8 additions & 1 deletion src/CGmapStatMeth.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ def CGmapStatMeth (fn, coverage = 10, filetype = "png", prefix = "", title="", f
sum_mC_byChr = {}
count_mC_byChr = {}
NQuant = 5
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
quant_mC = [ [0]*(NQuant+1) for i in xrange(len(context_lst)) ]
#
# ==================
Expand Down Expand Up @@ -253,7 +260,7 @@ def main():
" (aka CGmapStatMeth)\n" \
"Description: Generate the bulk methylation.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last Update: 2018-01-02\n" \
"Last Update: 2018-05-02\n" \
"Output Ex:\n" \
" MethStat context C CG CHG CHH CA CC CT CH CW\n" \
" mean_mC global 0.0798 0.3719 0.0465 0.0403 0.0891 0.0071 0.0241 0.0419 0.0559\n" \
Expand Down
18 changes: 13 additions & 5 deletions src/CGmapToRegion.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def Get_key (str) :
chr = int(match.group(1))
else :
chr = str
#
return [chr, str]
#re.match(r"^chr(\d+)", "chr019_random").group(1)
#
Expand All @@ -63,6 +64,7 @@ def CGmapToRegion (CGmap_fn, region_fn):
REGION = gzip.open(region_fn, 'rb')
else :
REGION = open(region_fn, 'r')
#
except IOError:
print ("\n[Error]:\n\t File cannot be open: %s" % region_fn)
exit(-1)
Expand All @@ -89,6 +91,7 @@ def CGmapToRegion (CGmap_fn, region_fn):
except ValueError :
print("\n[Error]:\n\t File [ %s ] may have wrong number of columns." % CGmap_fn)
exit(-1)
#
try :
chr_r, pos_r1, pos_r2 = line_r.strip().split()[0:3]
except ValueError :
Expand Down Expand Up @@ -135,15 +138,20 @@ def CGmapToRegion (CGmap_fn, region_fn):
count = count + 1
NmC = NmC + int(NmC_c)
NC = NC + int(NC_c)
#
line_c = CGMAP.readline()
#
#
# End for reading files
#
if count > 0 :
print ("%s\t%s\t%s\t%.2f\t%d\t%.2f\t%d" % (chr_r, pos_r1, pos_r2, mC/count, count, float(NmC)/NC, NC) )
else :
print ("%s\t%s\t%s\tNA\t%d\tNA\t%d" % (chr_r, pos_r1, pos_r2, count, NC) )
while line_r :
if count > 0 :
print ("%s\t%s\t%s\t%.2f\t%d\t%.2f\t%d" % (chr_r, pos_r1, pos_r2, mC/count, count, float(NmC)/NC, NC) )
else :
print ("%s\t%s\t%s\tNA\t%d\tNA\t%d" % (chr_r, pos_r1, pos_r2, count, NC) )
#
# do the next
line_r = REGION.readline()
#
REGION.close()
if CGMAP is not sys.stdin:
Expand All @@ -161,7 +169,7 @@ def main():
" (aka CGmapToRegion)\n" \
"Description: Calculated the methylation levels in regions in two ways.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last Update: 2018-01-02\n" \
"Last Update: 2018-07-05\n" \
"Format of Region file:\n" \
" #chr start_pos end_pos\n" \
" chr1 8275 8429\n" \
Expand Down
9 changes: 8 additions & 1 deletion src/CGmapsMethInBins.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ def CGmapMethylInBins (fn_lst_str, tag_lst_str, coverage, coverageXY, step, CTX
print("[Error]: tag list and file list are not same in lengths.")
exit(-1)
#
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
for fn_id in xrange( len(fn_lst) ) :
fn = fn_lst[fn_id]
try:
Expand Down Expand Up @@ -192,7 +199,7 @@ def main():
" (aka CGmapsMethInBins)\n" \
"Description: Generate the methylation in Bins.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last Update: 2018-01-02\n" \
"Last Update: 2018-05-02\n" \
"Output Ex:\n" \
" chr1 1 5000 0.0000\n" \
" chr1 5001 10000 0.0396\n" \
Expand Down
9 changes: 8 additions & 1 deletion src/FragRegFromBED.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def main():
" (aka FragRegFromBED)\n" \
"Description: Generate fragmented regions from BED file.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last Update: 2018-01-02\n" \
"Last Update: 2018-05-02\n" \
" Split input region into N bins, get fragments from 5' end and 3' end.\n" \
"Input Ex:\n" \
" chr1 1000 2000 +\n"\
Expand Down Expand Up @@ -88,6 +88,13 @@ def main():
sys.stdout = open(options.outfile, 'w')
#
#
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
# FiveMerEND
if options.FiveMerEnd == "" :
FiveMerLst=[]
Expand Down
15 changes: 14 additions & 1 deletion src/SNVFromATCGmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ def qbinom(p, size, prob, lower_tail = True):
'''Binomial quantiles'''
if not lower_tail :
prob = 1 - prob
# #
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
sum = 0
for i in xrange(size) :
Expand Down Expand Up @@ -361,6 +367,13 @@ def PredictNT_bayes ( W_A, W_T, W_C, W_G, C_A, C_T, C_C, C_G, nuc="N" ):
#global p_value
#global error_rate
#global dynamicP
#
# works for python2 and python3
try:
xrange
except NameError:
xrange = range
#
global bayes_options
p_value = bayes_options['pv']
error_rate = bayes_options['er']
Expand Down Expand Up @@ -684,7 +697,7 @@ def main():
" (aka SNVFromATCGmap)\n" \
"Description: Predict the SNV from ATCGmap file.\n" \
"Contact: Guo, Weilong; [email protected]\n" \
"Last update: 2018-01-02\n" \
"Last update: 2018-05-02\n" \
"Output format example:\n" \
" #chr nuc pos ATCG_watson ATCG_crick predicted_nuc p_value\n" \
" chr1 G 4752 17,0,0,69 0,0,0,0 A,G 9.3e-07\n" \
Expand Down

0 comments on commit 4fdd994

Please sign in to comment.