-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph_global_transc.py
139 lines (109 loc) · 6.52 KB
/
graph_global_transc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
import pandas as pd
import matplotlib.pyplot as plt
# read the data from different files into one data frame
'''
path = r'/Users/mashrurhaidernew/Desktop/NKI/data' # use your path
all_files = glob.glob(os.path.join(path, "*.counts")) # advisable to use os.path.join as this makes concatenation OS independent
df_from_each_file = (pd.read_csv(f, sep='\t') for f in all_files)
data = pd.concat((pd.read_csv(f, sep='\t') for f in all_files))
data = pd.DataFrame(data)
'''
columns = ['transcript', 'gene', 'sample', 'pos_perc', 'cds_norm_count', 'alpha']
# opens each individual file
data_0 = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/26-32-0TX.profile.percentage.filtered.tsv', sep='\t', skip_blank_lines=True, names=columns, header=None)
data_15 = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/26-32-15TX.profile.percentage.filtered.tsv', sep='\t', skip_blank_lines=True, names=columns, header=None)
data_30 = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/26-32-30TX.profile.percentage.filtered.tsv', sep='\t', skip_blank_lines=True, names=columns, header=None)
data_45 = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/26-32-45TX.profile.percentage.filtered.tsv', sep='\t', skip_blank_lines=True, names=columns, header=None)
data_60 = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/26-32-60TX.profile.percentage.filtered.tsv', sep='\t', skip_blank_lines=True, names=columns, header=None)
data_300 = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/26-32-300TX.profile.percentage.filtered.tsv', sep='\t', skip_blank_lines=True, names=columns, header=None)
# Formatting data and calculating mean for each transcript
ax_0 = pd.pivot_table(data_0.reset_index(), index=['pos_perc', 'transcript'], columns='sample',
values='cds_norm_count', fill_value=0)
ax_15 = pd.pivot_table(data_15.reset_index(), index=['pos_perc', 'transcript'], columns='sample',
values='cds_norm_count', fill_value=0)
ax_30 = pd.pivot_table(data_30.reset_index(), index=['pos_perc', 'transcript'], columns='sample',
values='cds_norm_count', fill_value=0)
ax_45 = pd.pivot_table(data_45.reset_index(), index=['pos_perc', 'transcript'], columns='sample',
values='cds_norm_count', fill_value=0)
ax_60 = pd.pivot_table(data_60.reset_index(), index=['pos_perc', 'transcript'], columns='sample',
values='cds_norm_count', fill_value=0)
ax_300 = pd.pivot_table(data_300.reset_index(), index=['pos_perc', 'transcript'], columns='sample',
values='cds_norm_count', fill_value=0)
# mean with zero elements and adding a new column
ax_0['avg_cds_norm_count'] = ax_0.mean(axis=1, skipna=False)
ax_0['Time Point'] = 0
ax_0 = ax_0.reset_index()
ax_15['avg_cds_norm_count'] = ax_15.mean(axis=1, skipna=False)
ax_15['Time Point'] = 15
ax_15 = ax_15.reset_index()
ax_30['avg_cds_norm_count'] = ax_30.mean(axis=1, skipna=False)
ax_30['Time Point'] = 30
ax_30 = ax_30.reset_index()
ax_45['avg_cds_norm_count'] = ax_45.mean(axis=1, skipna=False)
ax_45['Time Point'] = 45
ax_45 = ax_45.reset_index()
ax_60['avg_cds_norm_count'] = ax_60.mean(axis=1, skipna=False)
ax_60['Time Point'] = 60
ax_60 = ax_60.reset_index()
ax_300['avg_cds_norm_count'] = ax_300.mean(axis=1, skipna=False)
ax_300['Time Point'] = 300
ax_300 = ax_300.reset_index()
print(ax_0)
'''
#calculating cumulative avg_cds_norm_count
ax_0['cum_sum_cds_count'] = ax_0['avg_cds_norm_count'].cumsum()
ax_15['cum_sum_cds_count'] = ax_15['avg_cds_norm_count'].cumsum()
ax_30['cum_sum_cds_count'] = ax_30['avg_cds_norm_count'].cumsum()
ax_45['cum_sum_cds_count'] = ax_45['avg_cds_norm_count'].cumsum()
ax_60['cum_sum_cds_count'] = ax_60['avg_cds_norm_count'].cumsum()
ax_300['cum_sum_cds_count'] = ax_300['avg_cds_norm_count'].cumsum()
ax_0 = ax_0.reset_index()
ax_15 = ax_15.reset_index()
ax_30 = ax_30.reset_index()
ax_45 = ax_45.reset_index()
ax_60 = ax_60.reset_index()
ax_300 = ax_300.reset_index()
'''
#creating one DF with the new column and create a new csv file
df = ax_0.append([ax_15, ax_30, ax_45, ax_60, ax_300], sort=True).fillna(value=0)
df = pd.DataFrame(df)
df.to_csv(r'/Users/mashrurhaidernew/Downloads/Skeletal_1/Skeletal_Proc_new.csv', encoding='utf-8-sig')
df_new = pd.read_csv('/Users/mashrurhaidernew/Downloads/Skeletal_1/Skeletal_Proc_new.csv', sep=',', skip_blank_lines=True)
print(df_new)
'''
#subplot according to timepoint
fig, axes = plt.subplots(nrows=2, ncols=3)
plot1_a = ax_0.plot(x='position', y='avg_cds_norm_count', ax=axes[0, 0], label='0', legend=False, color="red", sharex=True, sharey=True, xlim=(0,2100), ylim=(0,0.004))
plot2_b = ax_15.plot(x='position', y='avg_cds_norm_count', ax=axes[0, 1], label='15', legend=False, color="green", sharex=True, sharey=True, xlim=(0,2100), ylim=(0,0.004))
plot3_c = ax_30.plot(x='position', y='avg_cds_norm_count', ax=axes[0, 2], label='30', legend=False, color="blue", sharex=True, sharey=True, xlim=(0,2100), ylim=(0,0.004))
plot4_d = ax_45.plot(x='position', y='avg_cds_norm_count', ax=axes[1, 0], label='45', legend=False, color="orange",sharex=True, sharey=True, xlim=(0,2100), ylim=(0,0.004))
plot5_e = ax_60.plot(x='position', y='avg_cds_norm_count', ax=axes[1, 1], label='60', legend=False, color="magenta", sharex=True, sharey=True, xlim=(0,2100), ylim=(0,0.004))
plot6_f = ax_300.plot(x='position', y='avg_cds_norm_count', ax=axes[1, 2], label='300', legend=False, color="purple", sharex=True, sharey=True, xlim=(0,2100), ylim=(0,0.004))
line_labels = ["Time 0", "Time 15", "Time 30", "Time 45", "Time 60", "Time 300"]
fig.legend([plot1_a, plot2_b, plot3_c, plot4_d, plot5_e, plot6_f], labels=line_labels, loc="center right", borderaxespad=0.1)
plt.subplots_adjust(right=0.85, wspace=0.4, hspace=0.4)
plt.savefig('/Volumes/Haider/Data_haider_NKI/RiboSAK_results/Liver_prof_count/Liver_graph.pdf')
#cumulative plot
x_0 = ax_0['position']
y_0 = ax_0['cum_sum_cds_count']
x_15 = ax_15['position']
y_15 = ax_15['cum_sum_cds_count']
x_30 = ax_30['position']
y_30 = ax_30['cum_sum_cds_count']
x_45 = ax_45['position']
y_45 = ax_45['cum_sum_cds_count']
x_60 = ax_60['position']
y_60 = ax_60['cum_sum_cds_count']
x_300 = ax_300['position']
y_300 = ax_300['cum_sum_cds_count']
plt.plot(x_0, y_0)
plt.plot(x_15, y_15)
plt.plot(x_30, y_30)
plt.plot(x_45, y_45)
plt.plot(x_60, y_60)
plt.plot(x_300, y_300)
plt.legend(['time = 0', 'time = 15', 'time = 30', 'time = 45', 'time = 60', 'time = 300'], loc='upper right')
plt.show()
plt.savefig('/Volumes/Haider/Data_haider_NKI/RiboSAK_results/Liver_prof_count/Liver_graph_cumsum.pdf')
'''