Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve options for low count filters and add data dashboard module #64

Merged
merged 27 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"polars",
"biobear",
"numba",
"bokeh",
"pydeseq2",
"watermark"
]
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- matplotlib<3.7
- seaborn
- pyarrow
- bokeh
- ipykernel
- mscorefonts
- rust>=1.72
Expand Down
7 changes: 5 additions & 2 deletions screenpro/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from . import plotting as pl
from . import phenoscore as ps

from . import utils
from . import ngs
from . import load
from . import dashboard

from .ngs import Counter
from .assays import PooledScreens, GImaps

__version__ = "0.3.2"
__version__ = "0.3.3"
__author__ = "Abe Arab"
__email__ = '[email protected]' # "[email protected]"
__email__ = '[email protected]' # "[email protected]"
89 changes: 86 additions & 3 deletions screenpro/assays.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None
# save phenotype name for reference
self._add_phenotype_results(f'delta:{delta_name}')

def getPhenotypeScores(self, run_name, score_name, threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'):
def getPhenotypeScores(self, score_name, run_name='auto', threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'):
"""
Get phenotype scores for a given score level

Expand All @@ -203,19 +203,102 @@ def getPhenotypeScores(self, run_name, score_name, threshold=5, ctrl_label='negC
pvalue_column (str): column name for the p-value, default is 'ttest pvalue'
score_column (str): column name for the score, default is 'score'
"""
hit_dict = {
'gamma':{
'up_hit':'up_hit',
'down_hit':'essential_hit'
},
'tau':{
'up_hit':'up_hit',
'down_hit':'down_hit'
},
'rho':{
'up_hit':'resistance_hit',
'down_hit':'sensitivity_hit'
}
}

if run_name == 'auto':
if len(list(self.phenotypes.keys())) == 1:
run_name = list(self.phenotypes.keys())[0]
else:
raise ValueError(
'Multiple phenotype calculation runs found.'
'Please specify run_name. Available runs: '
'' + ', '.join(self.phenotypes.keys())
)

if score_name not in self.phenotype_names:
raise ValueError(f"Phenotype '{score_name}' not found in self.phenotype_names")

keep_col = [target_col, score_column, pvalue_column]

keep_col = [target_col, score_column, pvalue_column]
score_tag = score_name.split(':')[0]
out = ann_score_df(
self.phenotypes[run_name][score_name].loc[:,keep_col],
ctrl_label=ctrl_label,
up_hit=hit_dict[score_tag]['up_hit'],
down_hit=hit_dict[score_tag]['down_hit'],
threshold=threshold
)

return out

def getAnnotatedTable(self, run_name='auto', threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'):
hit_dict = {
'gamma':{
'up_hit':'up_hit',
'down_hit':'essential_hit'
},
'tau':{
'up_hit':'up_hit',
'down_hit':'down_hit'
},
'rho':{
'up_hit':'resistance_hit',
'down_hit':'sensitivity_hit'
}
}

if run_name == 'auto':
if len(list(self.phenotypes.keys())) == 1:
run_name = list(self.phenotypes.keys())[0]
else:
raise ValueError(
'Multiple phenotype calculation runs found.'
'Please specify run_name. Available runs: '
'' + ', '.join(self.phenotypes.keys())
)

keep_col = [target_col, score_column, pvalue_column]

score_names = {s for s, col in self.phenotypes[run_name].columns}
sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list()

df_list = {}
for score_name in score_names:
score_tag = score_name.split(':')[0]
# get label
df_label = ann_score_df(
self.phenotypes[run_name][score_name].loc[:,keep_col],
up_hit=hit_dict[score_tag]['up_hit'],
down_hit=hit_dict[score_tag]['down_hit'],
ctrl_label=ctrl_label,
threshold=threshold
)['label']
# get replicate phe
df_phe_reps = self.pdata[self.pdata.obs.score.eq(score_tag)].to_df().T

# make table
df = pd.concat([
self.phenotypes['compare_reps'][score_name], df_phe_reps, df_label
],axis=1).loc[sort_var,:]

df_list.update({score_name:df})

out = pd.concat(df_list,axis=1)

return out


class GImaps(object):
pass
Loading
Loading