From 59f89159055f69a9ddf88817d5de5a330b631e56 Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 14:44:22 -0700 Subject: [PATCH 01/37] refactor phenoscore module --- screenpro/{phenoscore.py => phenoscore/__init__.py} | 0 screenpro/{ => phenoscore}/phenostat.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename screenpro/{phenoscore.py => phenoscore/__init__.py} (100%) rename screenpro/{ => phenoscore}/phenostat.py (100%) diff --git a/screenpro/phenoscore.py b/screenpro/phenoscore/__init__.py similarity index 100% rename from screenpro/phenoscore.py rename to screenpro/phenoscore/__init__.py diff --git a/screenpro/phenostat.py b/screenpro/phenoscore/phenostat.py similarity index 100% rename from screenpro/phenostat.py rename to screenpro/phenoscore/phenostat.py From 20b53a06ee8d8850b17e57ec99d707be2c8e479f Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 14:44:34 -0700 Subject: [PATCH 02/37] draft deseq script --- screenpro/phenoscore/deseq.py | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 screenpro/phenoscore/deseq.py diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py new file mode 100644 index 0000000..de60824 --- /dev/null +++ b/screenpro/phenoscore/deseq.py @@ -0,0 +1,37 @@ +""" +deseq module: adapt pyDESeq2 for use in ScreenPro2 package +""" + +import numpy as np +import pandas as pd +import anndata as ad + +from pydeseq2.dds import DeseqDataSet +from pydeseq2.default_inference import DefaultInference +from pydeseq2.ds import DeseqStats + + +def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): + + inference = DefaultInference(n_cpus=n_cpus) + + dds = DeseqDataSet( + counts=adata.to_df().astype(int), + metadata=adata.obs, + design_factors=design, # compare samples based on the "condition" + refit_cooks=True, + inference=inference, + ) + + dds.deseq2() + + stat_res = DeseqStats( + dds, + contrast=[design, tested_level, ref_level], + inference=inference + ) + stat_res.summary() + + df = stat_res.results_df + + return df From cafd6a9287d89593c36969c0689cf0e112e633cc Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 14:47:10 -0700 Subject: [PATCH 03/37] import `runDESeq` --- screenpro/phenoscore/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index a06384b..4b6941b 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -5,6 +5,8 @@ import numpy as np import anndata as ad import pandas as pd + +from deseq import runDESeq from .phenostat import matrixStat, getFDR From 83e29bb194ee9830f7533f2dfe0ea1ed2742188b Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 14:48:51 -0700 Subject: [PATCH 04/37] mend --- screenpro/phenoscore/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 4b6941b..77c5bfe 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -6,7 +6,7 @@ import anndata as ad import pandas as pd -from deseq import runDESeq +from .deseq import runDESeq from .phenostat import matrixStat, getFDR From 079632122d786cbf3c6b51c9e4a98846f05eef9f Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 14:53:56 -0700 Subject: [PATCH 05/37] add `quiet=True` --- screenpro/phenoscore/deseq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index de60824..c6d8694 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -21,6 +21,7 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): design_factors=design, # compare samples based on the "condition" refit_cooks=True, inference=inference, + quiet=True ) dds.deseq2() @@ -28,7 +29,7 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): stat_res = DeseqStats( dds, contrast=[design, tested_level, ref_level], - inference=inference + inference=inference, ) stat_res.summary() From 03cdd9b55785edb76b0223c5946a67f71beaaec0 Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 14:56:31 -0700 Subject: [PATCH 06/37] add `quiet=True` --- screenpro/phenoscore/deseq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index c6d8694..01e725f 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -30,6 +30,7 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): dds, contrast=[design, tested_level, ref_level], inference=inference, + quiet=True ) stat_res.summary() From d557f1e991f6149355ab0627ed230fe04c024bab Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 15:01:03 -0700 Subject: [PATCH 07/37] avoid print --- screenpro/phenoscore/deseq.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index 01e725f..d0bb52b 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -2,6 +2,7 @@ deseq module: adapt pyDESeq2 for use in ScreenPro2 package """ +import sys import numpy as np import pandas as pd import anndata as ad @@ -32,7 +33,8 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): inference=inference, quiet=True ) - stat_res.summary() + + sys.stdout = open(stat_res.summary(), 'w') df = stat_res.results_df From 27b44ed85f9f8669a9b335d39c6b78dba71bcffd Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 15:01:54 -0700 Subject: [PATCH 08/37] mend --- screenpro/phenoscore/deseq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index d0bb52b..77f3b68 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -12,7 +12,7 @@ from pydeseq2.ds import DeseqStats -def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): +def runDESeq(adata, design, tested_level, ref_level, n_cpus=8,quiet=False): inference = DefaultInference(n_cpus=n_cpus) @@ -22,7 +22,7 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): design_factors=design, # compare samples based on the "condition" refit_cooks=True, inference=inference, - quiet=True + quiet=quiet ) dds.deseq2() @@ -31,7 +31,7 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8): dds, contrast=[design, tested_level, ref_level], inference=inference, - quiet=True + quiet=quiet ) sys.stdout = open(stat_res.summary(), 'w') From 92d8741ac1bcd622314a6a416b319f6cb41b5006 Mon Sep 17 00:00:00 2001 From: abearab Date: Sat, 29 Jun 2024 15:06:29 -0700 Subject: [PATCH 09/37] mend --- screenpro/phenoscore/deseq.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index 77f3b68..7bb2c62 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -2,10 +2,10 @@ deseq module: adapt pyDESeq2 for use in ScreenPro2 package """ -import sys import numpy as np import pandas as pd import anndata as ad +import os, contextlib from pydeseq2.dds import DeseqDataSet from pydeseq2.default_inference import DefaultInference @@ -34,7 +34,10 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8,quiet=False): quiet=quiet ) - sys.stdout = open(stat_res.summary(), 'w') + + with open(os.devnull, 'w') as devnull: + with contextlib.redirect_stdout(devnull): + stat_res.summary() df = stat_res.results_df From 58efc393f85f0d092590d610ae20882e3947d152 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 01:45:22 -0700 Subject: [PATCH 10/37] update log scale --- screenpro/plotting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/screenpro/plotting.py b/screenpro/plotting.py index 84cccb0..8d75b58 100644 --- a/screenpro/plotting.py +++ b/screenpro/plotting.py @@ -65,14 +65,17 @@ def draw_threshold(x, threshold, pseudo_sd): ## Scatter plot of replicates -def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform=True, **args): +def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform='log10(x+1)', **args): adat = adat_in[[x, y], :].copy() adat.obs.index = [f'Replicate {str(r)}' for r in adat.obs.replicate.to_list()] x_lab, y_lab = [f'Replicate {str(r)}' for r in adat.obs.replicate.to_list()] if log_transform: - sc.pp.log1p(adat) + if log_transform == 'log1p': + adat = sc.pp.log1p(adat) + elif log_transform == 'log10(x+1)': + adat.X = np.log10(adat.X+1) if min_val is None: min_val = min([adat.to_df().loc[x_lab,:].min(), adat.to_df().loc[y_lab,:].min()]) From c0ef0f51a8c209a18cd5ce27d7a896f3f9f73be9 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 01:47:51 -0700 Subject: [PATCH 11/37] add a ` adata.copy()` line --- screenpro/assays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index 0f11ed4..a6e6970 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -25,7 +25,7 @@ def __init__(self, adata, fc_transformation='log2(x+1)', test='ttest', n_reps=3) fc_transformation (str): fold change transformation to apply for calculating phenotype scores test (str): statistical test to use for calculating phenotype scores """ - self.adata = adata + self.adata = adata.copy() self.pdata = None self.fc_transformation = fc_transformation self.test = test From 132e6e4b020c6ad066ac52138f8e403b24675835 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:29:18 -0700 Subject: [PATCH 12/37] split function --- screenpro/phenoscore/deseq.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index 7bb2c62..0d7be39 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -12,7 +12,7 @@ from pydeseq2.ds import DeseqStats -def runDESeq(adata, design, tested_level, ref_level, n_cpus=8,quiet=False): +def runDESeq(adata, design, n_cpus=8,quiet=False): inference = DefaultInference(n_cpus=n_cpus) @@ -27,6 +27,13 @@ def runDESeq(adata, design, tested_level, ref_level, n_cpus=8,quiet=False): dds.deseq2() + return dds + + +def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=False): + + inference = DefaultInference(n_cpus=n_cpus) + stat_res = DeseqStats( dds, contrast=[design, tested_level, ref_level], From eb160c683b4410971973b4bfb673bff365f3f2e2 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:42:06 -0700 Subject: [PATCH 13/37] mend --- screenpro/phenoscore/deseq.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index 0d7be39..3702ea1 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -34,6 +34,8 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa inference = DefaultInference(n_cpus=n_cpus) + name = f'{tested_level}_vs_{ref_level}' + stat_res = DeseqStats( dds, contrast=[design, tested_level, ref_level], @@ -48,4 +50,4 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa df = stat_res.results_df - return df + return name, df \ No newline at end of file From 668c88c340712c384c3fd206a352c5d6e5d44ec0 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:42:27 -0700 Subject: [PATCH 14/37] add `calculateDrugScreenDESeq` function --- screenpro/assays.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/screenpro/assays.py b/screenpro/assays.py index a6e6970..7482e3c 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -8,6 +8,7 @@ import scanpy as sc from pydeseq2 import preprocessing +from .phenoscore.deseq import runDESeq, extractDESeqResults from .phenoscore import runPhenoScore, runPhenoScoreForReplicate from .utils import ann_score_df from copy import copy @@ -90,6 +91,36 @@ def countNormalization(self): self.adata.layers['seq_depth_norm'] = norm_counts self.adata.X = self.adata.layers['seq_depth_norm'] + def calculateDrugScreenDESeq(self, t0, untreated, treated, run_name=None, **kwargs): + """ + Calculate DESeq2 results for a given drug screen dataset. + + Args: + design (str): design matrix for DESeq2 + run_name (str): name for the DESeq2 calculation run + **kwargs: additional arguments to pass to runDESeq + """ + dds = runDESeq(self.adata, 'condition', **kwargs) + + # Calculate `gamma`, `rho`, and `tau` phenotype scores + gamma_name, gamma = extractDESeqResults( + dds, 'condition', t0, untreated, **kwargs + ) + + tau_name, tau = extractDESeqResults( + dds, 'condition', t0, treated, **kwargs + ) + + rho_name, rho = extractDESeqResults( + dds, 'condition', untreated, treated, **kwargs + ) + + if not run_name: run_name = 'pyDESeq2' + + self.phenotypes[run_name] = pd.concat({ + f'gamma:{gamma_name}': gamma, f'tau:{tau_name}': tau, f'rho:{rho_name}': rho + }, axis=1) + def calculateDrugScreen(self, t0, untreated, treated, db_untreated, db_treated, score_level, db_rate_col='pop_doublings', run_name=None, **kwargs): """ Calculate `gamma`, `rho`, and `tau` phenotype scores for a drug screen dataset in a given `score_level`. From 4682bfc5b34cfb1cd98632e80021214b54eaf2f4 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:49:53 -0700 Subject: [PATCH 15/37] add a print line --- screenpro/phenoscore/deseq.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index 3702ea1..7ba3ae1 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -34,7 +34,9 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa inference = DefaultInference(n_cpus=n_cpus) - name = f'{tested_level}_vs_{ref_level}' + result_name = f'{tested_level}_vs_{ref_level}' + + print(f'\t{tested_level}_vs_{ref_level}') stat_res = DeseqStats( dds, @@ -48,6 +50,6 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa with contextlib.redirect_stdout(devnull): stat_res.summary() - df = stat_res.results_df + results = stat_res.results_df - return name, df \ No newline at end of file + return result_name, results From b39113252bb3f49f04dd8c2ee8bed6f656fcae2d Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:54:00 -0700 Subject: [PATCH 16/37] mend --- screenpro/phenoscore/deseq.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index 7ba3ae1..ff7be3a 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -15,7 +15,7 @@ def runDESeq(adata, design, n_cpus=8,quiet=False): inference = DefaultInference(n_cpus=n_cpus) - + dds = DeseqDataSet( counts=adata.to_df().astype(int), metadata=adata.obs, @@ -36,8 +36,6 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa result_name = f'{tested_level}_vs_{ref_level}' - print(f'\t{tested_level}_vs_{ref_level}') - stat_res = DeseqStats( dds, contrast=[design, tested_level, ref_level], @@ -45,7 +43,6 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa quiet=quiet ) - with open(os.devnull, 'w') as devnull: with contextlib.redirect_stdout(devnull): stat_res.summary() From 30e3b289d5008efa7531f9b193d437a7ca086329 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:54:58 -0700 Subject: [PATCH 17/37] mend --- screenpro/phenoscore/deseq.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index ff7be3a..c8f5600 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -16,6 +16,8 @@ def runDESeq(adata, design, n_cpus=8,quiet=False): inference = DefaultInference(n_cpus=n_cpus) + print(f'\tcreating `dds` object...') + dds = DeseqDataSet( counts=adata.to_df().astype(int), metadata=adata.obs, @@ -36,6 +38,8 @@ def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=Fa result_name = f'{tested_level}_vs_{ref_level}' + print(f'\t{tested_level}_vs_{ref_level}') + stat_res = DeseqStats( dds, contrast=[design, tested_level, ref_level], From fdcc519993ded790ac8a12c92f44b34aa3620e9c Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 4 Jul 2024 03:57:12 -0700 Subject: [PATCH 18/37] mend --- screenpro/phenoscore/deseq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/phenoscore/deseq.py b/screenpro/phenoscore/deseq.py index c8f5600..8399633 100644 --- a/screenpro/phenoscore/deseq.py +++ b/screenpro/phenoscore/deseq.py @@ -32,7 +32,7 @@ def runDESeq(adata, design, n_cpus=8,quiet=False): return dds -def extractDESeqResults(dds, design, tested_level, ref_level, n_cpus=8, quiet=False): +def extractDESeqResults(dds, design, ref_level, tested_level, n_cpus=8, quiet=False): inference = DefaultInference(n_cpus=n_cpus) From 6f0a7a612e458f8346efe288aa2f095788b4a376 Mon Sep 17 00:00:00 2001 From: abearab Date: Wed, 10 Jul 2024 01:21:17 -0700 Subject: [PATCH 19/37] mend --- screenpro/plotting.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/screenpro/plotting.py b/screenpro/plotting.py index 8d75b58..5393a94 100644 --- a/screenpro/plotting.py +++ b/screenpro/plotting.py @@ -65,17 +65,14 @@ def draw_threshold(x, threshold, pseudo_sd): ## Scatter plot of replicates -def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform='log10(x+1)', **args): +def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform=False, **args): adat = adat_in[[x, y], :].copy() adat.obs.index = [f'Replicate {str(r)}' for r in adat.obs.replicate.to_list()] x_lab, y_lab = [f'Replicate {str(r)}' for r in adat.obs.replicate.to_list()] if log_transform: - if log_transform == 'log1p': - adat = sc.pp.log1p(adat) - elif log_transform == 'log10(x+1)': - adat.X = np.log10(adat.X+1) + adat.X = np.log10(adat.X+1) if min_val is None: min_val = min([adat.to_df().loc[x_lab,:].min(), adat.to_df().loc[y_lab,:].min()]) From 1c42c5cf926f4cffc890df901e60f12a98210a56 Mon Sep 17 00:00:00 2001 From: abearab Date: Wed, 10 Jul 2024 01:24:13 -0700 Subject: [PATCH 20/37] mend --- screenpro/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/plotting.py b/screenpro/plotting.py index 5393a94..0cd958e 100644 --- a/screenpro/plotting.py +++ b/screenpro/plotting.py @@ -65,7 +65,7 @@ def draw_threshold(x, threshold, pseudo_sd): ## Scatter plot of replicates -def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform=False, **args): +def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform=True, **args): adat = adat_in[[x, y], :].copy() adat.obs.index = [f'Replicate {str(r)}' for r in adat.obs.replicate.to_list()] From e527eca1b243d1f596fa283ce55ebbc7910b3ab5 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:10:46 -0700 Subject: [PATCH 21/37] add and update files `visualize` module --- .../{plotting.py => visualize/__init__.py} | 65 ++------------- screenpro/{ => visualize}/dashboard.py | 5 -- screenpro/visualize/utils.py | 80 +++++++++++++++++++ 3 files changed, 87 insertions(+), 63 deletions(-) rename screenpro/{plotting.py => visualize/__init__.py} (85%) rename screenpro/{ => visualize}/dashboard.py (98%) create mode 100644 screenpro/visualize/utils.py diff --git a/screenpro/plotting.py b/screenpro/visualize/__init__.py similarity index 85% rename from screenpro/plotting.py rename to screenpro/visualize/__init__.py index 0cd958e..7dd300e 100644 --- a/screenpro/plotting.py +++ b/screenpro/visualize/__init__.py @@ -1,67 +1,16 @@ +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib -from .utils import ann_score_df +from ..ngs.utils import ann_score_df +from .utils import almost_black import scanpy as sc -# variables -almost_black = '#111111' -dark2 = ['#1b9e77', - '#d95f02', - '#7570b3', - '#e7298a', - '#66a61e', - '#e6ab02', - '#a6761d', - '#666666'] -blue_yellow = matplotlib.colors.LinearSegmentedColormap.from_list( - 'BuYl', [(0, '#ffff00'), (.49, '#000000'), (.51, '#000000'), (1, '#0000ff')]) -blue_yellow.set_bad('#999999', 1) -yellow_blue = matplotlib.colors.LinearSegmentedColormap.from_list( - 'YlBu', [(0, '#0000ff'), (.49, '#000000'), (.51, '#000000'), (1, '#ffff00')]) -yellow_blue.set_bad('#999999', 1) - -# plt.rcParams['font.sans-serif'] = [ -# 'Helvetica', 'Arial', 'Verdana', 'Bitstream Vera Sans' -# ] -# plt.rcParams['font.size'] = 8 -# plt.rcParams['font.weight'] = 'regular' -# plt.rcParams['text.color'] = almost_black -# -# axisLineWidth = .5 -# plt.rcParams['axes.linewidth'] = axisLineWidth -# plt.rcParams['lines.linewidth'] = 1.5 -# -# plt.rcParams['axes.facecolor'] = 'white' -# plt.rcParams['axes.edgecolor'] = almost_black -# plt.rcParams['axes.labelcolor'] = almost_black -# # plt.rcParams['axes.color_cycle'] = dark2_all -# -# plt.rcParams['patch.edgecolor'] = 'none' -# plt.rcParams['patch.linewidth'] = .25 -# # plt.rcParams['patch.facecolor'] = dark2_all[0] -# -# plt.rcParams['savefig.dpi'] = 1000 -# plt.rcParams['savefig.format'] = 'svg' -# -# plt.rcParams['legend.frameon'] = False -# plt.rcParams['legend.handletextpad'] = .25 -# plt.rcParams['legend.fontsize'] = 8 -# plt.rcParams['legend.numpoints'] = 1 -# plt.rcParams['legend.scatterpoints'] = 1 -# -# plt.rcParams['ytick.direction'] = 'out' -# plt.rcParams['ytick.color'] = almost_black -# plt.rcParams['ytick.major.width'] = axisLineWidth -# plt.rcParams['xtick.direction'] = 'out' -# plt.rcParams['xtick.color'] = almost_black -# plt.rcParams['xtick.major.width'] = axisLineWidth - - -def draw_threshold(x, threshold, pseudo_sd): - return threshold * pseudo_sd * (1 if x > 0 else -1) / abs(x) - ## Scatter plot of replicates diff --git a/screenpro/dashboard.py b/screenpro/visualize/dashboard.py similarity index 98% rename from screenpro/dashboard.py rename to screenpro/visualize/dashboard.py index d709e37..44f1c62 100644 --- a/screenpro/dashboard.py +++ b/screenpro/visualize/dashboard.py @@ -1,8 +1,3 @@ -## Copyright (c) 2022-2024 ScreenPro2 Development Team. -## All rights reserved. -## Gilbart Lab, UCSF / Arc Institute. -## Multi-Omics Tech Center, Arc Insititue. - import numpy as np import pandas as pd import bokeh diff --git a/screenpro/visualize/utils.py b/screenpro/visualize/utils.py new file mode 100644 index 0000000..84b9c7e --- /dev/null +++ b/screenpro/visualize/utils.py @@ -0,0 +1,80 @@ +import matplotlib.pyplot as plt +import matplotlib +import numpy as np +import pandas as pd + +# variables +almost_black = '#111111' +dark2 = ['#1b9e77', + '#d95f02', + '#7570b3', + '#e7298a', + '#66a61e', + '#e6ab02', + '#a6761d', + '#666666'] +blue_yellow = matplotlib.colors.LinearSegmentedColormap.from_list( + 'BuYl', [(0, '#ffff00'), (.49, '#000000'), (.51, '#000000'), (1, '#0000ff')]) +blue_yellow.set_bad('#999999', 1) +yellow_blue = matplotlib.colors.LinearSegmentedColormap.from_list( + 'YlBu', [(0, '#0000ff'), (.49, '#000000'), (.51, '#000000'), (1, '#ffff00')]) +yellow_blue.set_bad('#999999', 1) + +# plt.rcParams['font.sans-serif'] = [ +# 'Helvetica', 'Arial', 'Verdana', 'Bitstream Vera Sans' +# ] +# plt.rcParams['font.size'] = 8 +# plt.rcParams['font.weight'] = 'regular' +# plt.rcParams['text.color'] = almost_black +# +# axisLineWidth = .5 +# plt.rcParams['axes.linewidth'] = axisLineWidth +# plt.rcParams['lines.linewidth'] = 1.5 +# +# plt.rcParams['axes.facecolor'] = 'white' +# plt.rcParams['axes.edgecolor'] = almost_black +# plt.rcParams['axes.labelcolor'] = almost_black +# # plt.rcParams['axes.color_cycle'] = dark2_all +# +# plt.rcParams['patch.edgecolor'] = 'none' +# plt.rcParams['patch.linewidth'] = .25 +# # plt.rcParams['patch.facecolor'] = dark2_all[0] +# +# plt.rcParams['savefig.dpi'] = 1000 +# plt.rcParams['savefig.format'] = 'svg' +# +# plt.rcParams['legend.frameon'] = False +# plt.rcParams['legend.handletextpad'] = .25 +# plt.rcParams['legend.fontsize'] = 8 +# plt.rcParams['legend.numpoints'] = 1 +# plt.rcParams['legend.scatterpoints'] = 1 +# +# plt.rcParams['ytick.direction'] = 'out' +# plt.rcParams['ytick.color'] = almost_black +# plt.rcParams['ytick.major.width'] = axisLineWidth +# plt.rcParams['xtick.direction'] = 'out' +# plt.rcParams['xtick.color'] = almost_black +# plt.rcParams['xtick.major.width'] = axisLineWidth + + +def cleanAxes(ax, top=False, right=False, bottom=True, left=True): + ax.grid('off') + ax.spines['top'].set_visible(top) + ax.spines['right'].set_visible(right) + ax.spines['left'].set_visible(left) + ax.spines['bottom'].set_visible(bottom) + + #turn off all ticks + ax.yaxis.set_ticks_position('none') + ax.xaxis.set_ticks_position('none') + + #now re-enable visibles + if top: + ax.xaxis.tick_top() + if bottom: + ax.xaxis.tick_bottom() + if left: + ax.yaxis.tick_left() + if right: + ax.yaxis.tick_right() + From e69c8bb984b27f50d286c1de18d81e089e0371f3 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:12:23 -0700 Subject: [PATCH 22/37] update files `ngs` module --- screenpro/ngs/__init__.py | 6 ++++ screenpro/ngs/utils.py | 60 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 screenpro/ngs/utils.py diff --git a/screenpro/ngs/__init__.py b/screenpro/ngs/__init__.py index 3e3f052..490148d 100644 --- a/screenpro/ngs/__init__.py +++ b/screenpro/ngs/__init__.py @@ -40,6 +40,7 @@ from . import cas9 from . import cas12 +from .utils import find_low_counts, addPseudoCount from ..load import load_cas9_sgRNA_library from simple_colors import green @@ -384,3 +385,8 @@ def build_counts_anndata(self, source='library', verbose=False): return rdata else: raise ValueError("Invalid source argument. Please choose from 'mapped', 'recombinant' or 'library'. Note: 'mapped' and 'library' act the same way.") + + def filter_low_counts(self, filter_type='either', minimum_reads=50, verbose=False): + '''Filter low counts from count matrix + ''' + raise NotImplementedError("This method is not yet implemented.") \ No newline at end of file diff --git a/screenpro/ngs/utils.py b/screenpro/ngs/utils.py new file mode 100644 index 0000000..2f9e8ce --- /dev/null +++ b/screenpro/ngs/utils.py @@ -0,0 +1,60 @@ +import pandas as pd +import numpy as np + + +def find_low_counts(adata, filter_type='either', minimum_reads=50): + """ + Label variables with low counts in either or all samples. + + Parameters: + adata (AnnData): AnnData object + filter_type (str): either or all + minimum_reads (int): minimum number of reads + + Returns: + None + """ + count_bin = adata.X >= minimum_reads + if filter_type == 'either': + out = adata[:, ~(~count_bin.all(axis=0))].copy() + elif filter_type == 'all': + out = adata[:, count_bin.all(axis=0)].copy() + elif filter_type == 'sum': + out = adata[:, adata.to_df().sum(axis=0) >= minimum_reads].copy() + else: + raise ValueError(f'filter_type "{filter_type}" not recognized. Use "either", "all", or "sum".') + + # print the number of removed variables + n_removed = adata.shape[1] - out.shape[1] + print( + f"{n_removed} variables with less than {minimum_reads} reads (filter_type: '{filter_type}')" + ) + + adata.var['low_count'] = ~adata.var.index.isin(out.var.index.to_list()) + + +def addPseudoCount(counts, pseudocountBehavior, pseudocountValue): + pass + + ## possible pseudocount behaviors + # 1. remove 0 + # 2. add pseudocount + # 3. impute 0 (it's hard) + + # # pseudocount + # if pseudocountBehavior == 'default' or pseudocountBehavior == 'zeros only': + # def defaultBehavior(row): + # return row if min( + # row) != 0 else row + pseudocountValue + + # combinedCountsPseudo = combinedCounts.apply(defaultBehavior, axis=1) + # elif pseudocountBehavior == 'all values': + # combinedCountsPseudo = combinedCounts.apply( + # lambda row: row + pseudocountValue, axis=1) + # elif pseudocountBehavior == 'filter out': + # combinedCountsPseudo = combinedCounts.copy() + # zeroRows = combinedCounts.apply(lambda row: min(row) <= 0, axis=1) + # combinedCountsPseudo.loc[zeroRows, :] = np.nan + # else: + # raise ValueError( + # 'Pseudocount behavior not recognized or not implemented') \ No newline at end of file From 73a86bdc2b2f2a24e69b7aeac3aab8d7a5ddf9eb Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:13:01 -0700 Subject: [PATCH 23/37] update files `phenoscore` module --- .../{utils.py => phenoscore/annotate.py} | 52 +------------------ 1 file changed, 1 insertion(+), 51 deletions(-) rename screenpro/{utils.py => phenoscore/annotate.py} (50%) diff --git a/screenpro/utils.py b/screenpro/phenoscore/annotate.py similarity index 50% rename from screenpro/utils.py rename to screenpro/phenoscore/annotate.py index c86bc95..07e612b 100644 --- a/screenpro/utils.py +++ b/screenpro/phenoscore/annotate.py @@ -1,55 +1,5 @@ -import pandas as pd import numpy as np - - -def check_protospacer_length(library, protospacer_col): - lengths = list(set(library[protospacer_col].str.len())) - if len(lengths) > 1: - raise ValueError(f"Protospacer lengths are not uniform: {lengths}") - else: - length = lengths[0] - return length - - -def trim_protospacer(library, protospacer_col, trim_side, trim_len): - if trim_side == '5prime': - library[protospacer_col] = library[protospacer_col].str[trim_len:].str.upper() - - elif trim_side == '3prime': - library[protospacer_col] = library[protospacer_col].str[:-trim_len].str.upper() - - return library - - -def find_low_counts(adata, filter_type='either', minimum_reads=50): - """ - Label variables with low counts in either or all samples. - - Parameters: - adata (AnnData): AnnData object - filter_type (str): either or all - minimum_reads (int): minimum number of reads - - Returns: - None - """ - count_bin = adata.X >= minimum_reads - if filter_type == 'either': - out = adata[:, ~(~count_bin.all(axis=0))].copy() - elif filter_type == 'all': - out = adata[:, count_bin.all(axis=0)].copy() - elif filter_type == 'sum': - out = adata[:, adata.to_df().sum(axis=0) >= minimum_reads].copy() - else: - raise ValueError(f'filter_type "{filter_type}" not recognized. Use "either", "all", or "sum".') - - # print the number of removed variables - n_removed = adata.shape[1] - out.shape[1] - print( - f"{n_removed} variables with less than {minimum_reads} reads (filter_type: '{filter_type}')" - ) - - adata.var['low_count'] = ~adata.var.index.isin(out.var.index.to_list()) +import pandas as pd def ann_score_df(df_in, up_hit='resistance_hit', down_hit='sensitivity_hit', ctrl_label='control', threshold=10): From 6b9ba7d56e77429d5c508bc19ae02251c952118b Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:16:00 -0700 Subject: [PATCH 24/37] mend --- screenpro/visualize/__init__.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/screenpro/visualize/__init__.py b/screenpro/visualize/__init__.py index 7dd300e..3a0e9d7 100644 --- a/screenpro/visualize/__init__.py +++ b/screenpro/visualize/__init__.py @@ -3,13 +3,9 @@ ## Gilbart Lab, UCSF / Arc Institute. ## Multi-Omics Tech Center, Arc Insititue. -import pandas as pd import numpy as np -import matplotlib.pyplot as plt -import matplotlib -from ..ngs.utils import ann_score_df -from .utils import almost_black import scanpy as sc +from .utils import almost_black ## Scatter plot of replicates From 1630e44c4c6d61db5864ebff2b00c947e9a44df0 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:42:58 -0700 Subject: [PATCH 25/37] update docs files --- docs/source/history.rst | 3 +- docs/source/index.rst | 1 - docs/source/phenotype.md | 55 ++++++++++++++++++++++++++++++++++ docs/source/phenotype.rst | 62 --------------------------------------- docs/source/plotting.rst | 9 ------ 5 files changed, 57 insertions(+), 73 deletions(-) create mode 100644 docs/source/phenotype.md delete mode 100644 docs/source/phenotype.rst delete mode 100644 docs/source/plotting.rst diff --git a/docs/source/history.rst b/docs/source/history.rst index a3149e4..a58a937 100644 --- a/docs/source/history.rst +++ b/docs/source/history.rst @@ -2,9 +2,10 @@ History ======= -0.4.0 (coming soon) +0.3.1 - 0.4.0 (June 2024 - July 2024) ~~~~~~~~~~~~~~~~~~~ * add command line interface +* major bug fixes and improvements in code formatting 0.2.11 - 0.3.0 (Apr 2024 - May 2024) ~~~~~~~~~~~~~~~~~ diff --git a/docs/source/index.rst b/docs/source/index.rst index 4d9dd0f..d188efb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,7 +15,6 @@ Welcome to ScreenPro2's documentation! assays ngs phenotype - plotting load .. toctree:: diff --git a/docs/source/phenotype.md b/docs/source/phenotype.md new file mode 100644 index 0000000..eac5e98 --- /dev/null +++ b/docs/source/phenotype.md @@ -0,0 +1,55 @@ +# Phenotype calculation modules + +Log ratio of $y$ vs $x$: + +$$\Delta=\log(\frac{\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} + 1}{\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)} + 1})$$ + +- $y \rightarrow$ condition $x$ (e.g. treated samples) +- $x \rightarrow$ condition $y$ (e.g. $t_{0}$ samples) +- $a \rightarrow$ number of library elements with sgRNAs targeting $T$ +- $b \rightarrow$ number of biological replicates, $R$ (e.g. 2 or 3) +- $N_{x}$ \| $N_{y} \rightarrow$ read counts normalized for sequencing + depth in condition $x$ or $y$ + +Here is a formula for V3 library with single library element per gene +(i.e. dual sgRNAs in one construct targeting same gene). + +Phenotype score for each $T$ comparing $y$ vs $x$: + +$$\text{PhenoScore}(T,x,y) = +\left( +\frac{ +\overline{\Delta_{(x,y)}} +}{ +\text{median}( {\overline{\Delta_{(x_{ctrl},y_{ctrl})}}} ) +} +\right) +\times \frac{ 1 }{d_{growth}}$$ + +- $\overline{\Delta(x,y)} \rightarrow$ log ratio averaged across + replicates +- $T \rightarrow$ library elements with sgRNAs targeting $T$ +- $d_{growth} \rightarrow$ growth factor to normalize the phenotype + score. + +Statistical test comparing $y$ vs $x$ per each target, $T$: + +$$\text{p-value}(T,x,y) = \text{t-test} \left( +\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)}, +\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} +\right)$$ + +(see this wikipedia page: [Dependent t-test for paired +samples](https://en.wikipedia.org/wiki/Student%27s_t-test#Dependent_t-test_for_paired_samples)) + +(see the link to the implemented tool: [ttest_rel, a scipy +module](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html)) + +> This is a test for the null hypothesis that two related or repeated +> samples have identical average (expected) values. + +------------------------------------------------------------------------ + +::: {.automodule members="" show-inheritance=""} +screenpro.phenoscore +::: diff --git a/docs/source/phenotype.rst b/docs/source/phenotype.rst deleted file mode 100644 index c462937..0000000 --- a/docs/source/phenotype.rst +++ /dev/null @@ -1,62 +0,0 @@ -Phenotype calculation modules -======================= - -Log ratio of :math:`y` vs :math:`x`: - -.. math:: \Delta=\log(\frac{\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} + 1}{\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)} + 1}) - -- :math:`y \rightarrow` condition :math:`x` (e.g. treated samples) -- :math:`x \rightarrow` condition :math:`y` (e.g. :math:`t_{0}` samples) -- :math:`a \rightarrow` number of library elements with sgRNAs targeting :math:`T` -- :math:`b \rightarrow` number of biological replicates, :math:`R` (e.g. 2 or 3) -- :math:`N_{x}` | :math:`N_{y} \rightarrow` read counts normalized for sequencing depth in condition :math:`x` or :math:`y` - - -Here is a formula for V3 library with single library element per gene (i.e. dual sgRNAs in one construct targeting same gene). - -Phenotype score for each :math:`T` comparing :math:`y` vs :math:`x`: - -.. math:: - \text{PhenoScore}(T,x,y) = - \left( - \frac{ - \overline{\Delta_{(x,y)}} - }{ - \text{median}( {\overline{\Delta_{(x_{ctrl},y_{ctrl})}}} ) - } - \right) - \times \frac{ 1 }{d_{growth}} - -- :math:`\overline{\Delta(x,y)} \rightarrow` log ratio averaged across replicates -- :math:`T \rightarrow` library elements with sgRNAs targeting :math:`T` -- :math:`d_{growth} \rightarrow` growth factor to normalize the phenotype score. - -Statistical test comparing :math:`y` vs :math:`x` per each target, :math:`T`: - -.. math:: - \text{p-value}(T,x,y) = \text{t-test} \left( - \begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)}, - \begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} - \right) - - -(see this wikipedia page: `Dependent t-test for paired samples`_) - -(see the link to the implemented tool: `ttest_rel, a scipy module`_) - - This is a test for the null hypothesis that two related or repeated samples have identical average (expected) values. - ------------------------ - -.. automodule:: screenpro.phenoscore - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: screenpro.phenostats - :members: - :undoc-members: - :show-inheritance: - -.. _`Dependent t-test for paired samples`: https://en.wikipedia.org/wiki/Student%27s_t-test#Dependent_t-test_for_paired_samples -.. _`ttest_rel, a scipy module`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html diff --git a/docs/source/plotting.rst b/docs/source/plotting.rst deleted file mode 100644 index 98d85e3..0000000 --- a/docs/source/plotting.rst +++ /dev/null @@ -1,9 +0,0 @@ -Plotting and visualization modules -======================= - - -.. automodule:: screenpro.plotting - :members: - :undoc-members: - :show-inheritance: - From 00988262d11f960a6ae56c27ec020c337fba7052 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:43:13 -0700 Subject: [PATCH 26/37] mend --- screenpro/phenoscore/__init__.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 77c5bfe..696ba9f 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -1,5 +1,12 @@ -""" -phenoscore module +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + +"""phenoscore module + +This module contains functions for calculating relative phenotypes from CRISPR screens +datasets. """ import numpy as np From 67989e0ac83a4c6b7ec3175111b05212f73cf551 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:43:34 -0700 Subject: [PATCH 27/37] update files main modules --- screenpro/__init__.py | 25 ++++++++++++++++++++++--- screenpro/__main__.py | 5 +++++ screenpro/assays.py | 11 ++++++++--- screenpro/load.py | 43 +++++++++++++++++++++++++++++++++---------- screenpro/main.py | 1 + 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 036f773..1c2da6f 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -1,10 +1,29 @@ -from . import plotting as pl +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + +'''ScreenPro2: A Python package for pooled CRISPR screens analysis + +This package contains several modules, including: + +**Main modules:** +- ngs: tools for generating counts from NGS data +- phenoscore: tools for calculating phenoscores +- assays: wrappers for analyzing CRISPR screens data from standard assays + +**Additional modules:** +- load: tools for loading and saving data +- visualize: tools for visualizing data +- datasets: API for accessing pre-processed datasets +''' + from . import phenoscore as ps -from . import utils from . import ngs +from . import assays from . import load -from . import dashboard +from . import visualize as viz from .ngs import GuideCounter from .assays import PooledScreens, GImaps diff --git a/screenpro/__main__.py b/screenpro/__main__.py index 5d6a810..a0fad73 100644 --- a/screenpro/__main__.py +++ b/screenpro/__main__.py @@ -1,3 +1,8 @@ +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + from .main import main main() diff --git a/screenpro/assays.py b/screenpro/assays.py index 7482e3c..42b6b2d 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -1,5 +1,10 @@ -""" -Assays module +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + +"""Assays module + """ import numpy as np @@ -10,7 +15,7 @@ from pydeseq2 import preprocessing from .phenoscore.deseq import runDESeq, extractDESeqResults from .phenoscore import runPhenoScore, runPhenoScoreForReplicate -from .utils import ann_score_df +from .ngs.utils import ann_score_df from copy import copy diff --git a/screenpro/load.py b/screenpro/load.py index 4eb629d..0ded2e6 100644 --- a/screenpro/load.py +++ b/screenpro/load.py @@ -1,12 +1,16 @@ -""" -Module for loading screen datasets +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + +"""Load module + +Functions to load screen datasets and sgRNA library tables. """ import pickle import pandas as pd -from .utils import check_protospacer_length, trim_protospacer - def load_cas9_sgRNA_library(library_path, library_type, sep='\t', index_col=0, protospacer_length=19, verbose=True, **args): '''Load Cas9 sgRNA library table for single or dual guide design. @@ -38,12 +42,12 @@ def load_cas9_sgRNA_library(library_path, library_type, sep='\t', index_col=0, p library['protospacer'] = library['protospacer'].str.upper() protospacer_col = 'protospacer' - in_length = check_protospacer_length(library, 'protospacer') + in_length = _check_protospacer_length(library, 'protospacer') if in_length == protospacer_length: pass elif in_length > protospacer_length: if verbose: print(f"Trimming protospacer sequences in '{protospacer_col}' column.") - library = trim_protospacer( + library = _trim_protospacer( library, protospacer_col, '5prime', in_length - protospacer_length @@ -82,12 +86,12 @@ def load_cas9_sgRNA_library(library_path, library_type, sep='\t', index_col=0, p # # TODO: Enable trimming of protospacer sequences through command line arguments. for protospacer_col in ['protospacer_A', 'protospacer_B']: - in_length = check_protospacer_length(library, protospacer_col) + in_length = _check_protospacer_length(library, protospacer_col) if in_length == protospacer_length: pass elif in_length > protospacer_length: if verbose: print(f"Trimming protospacer sequences in '{protospacer_col}' column.") - library = trim_protospacer( + library = _trim_protospacer( library, protospacer_col, '5prime', in_length - protospacer_length @@ -189,7 +193,26 @@ def loadScreenProcessingData(experimentName, collapsedToTranscripts=True, premer return dataDict -def write_screen_pkl(screen, name): +def _check_protospacer_length(library, protospacer_col): + lengths = list(set(library[protospacer_col].str.len())) + if len(lengths) > 1: + raise ValueError(f"Protospacer lengths are not uniform: {lengths}") + else: + length = lengths[0] + return length + + +def _trim_protospacer(library, protospacer_col, trim_side, trim_len): + if trim_side == '5prime': + library[protospacer_col] = library[protospacer_col].str[trim_len:].str.upper() + + elif trim_side == '3prime': + library[protospacer_col] = library[protospacer_col].str[:-trim_len].str.upper() + + return library + + +def _write_screen_pkl(screen, name): """ Write AnnData object to a pickle file @@ -203,7 +226,7 @@ def write_screen_pkl(screen, name): print(f'Object successfully saved to "{file_name}"') -def read_screen_pkl(name): +def _read_screen_pkl(name): """ Read ScreenPro object from a pickle file diff --git a/screenpro/main.py b/screenpro/main.py index 0eb34c2..66d6c43 100644 --- a/screenpro/main.py +++ b/screenpro/main.py @@ -8,6 +8,7 @@ from .__init__ import __version__ from . import ngs + def add_counter_parser(parent_subparsers, parent): name = "guidecounter" desc = "Process FASTQ files to count sgRNA sequences." From 593a4795748a24009537a62f4946399a564728a5 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:50:52 -0700 Subject: [PATCH 28/37] change file format --- docs/source/ngs.md | 27 +++++++++++++++++++++++++++ docs/source/ngs.rst | 30 ------------------------------ 2 files changed, 27 insertions(+), 30 deletions(-) create mode 100644 docs/source/ngs.md delete mode 100644 docs/source/ngs.rst diff --git a/docs/source/ngs.md b/docs/source/ngs.md new file mode 100644 index 0000000..5e22e9e --- /dev/null +++ b/docs/source/ngs.md @@ -0,0 +1,27 @@ +# NGS screen processing module + +`GuideCounter` class is a wrapper to run the functions for a +CRISPR screen experiment. + +This module contains a set of python functions to process and analyze +NGS files from CRISPR screens. Based on the type of CRISPR-Cas system +used for the screen, the functions are divided into two classes: +`Cas9` and `Cas12`. + +------------------------------------------------------------------------ + +::: {.automodule members="" undoc-members="" show-inheritance=""} +screenpro.ngs +::: + +::: {.automodule members="" undoc-members="" show-inheritance=""} +screenpro.ngs.GuideCounter +::: + +::: {.automodule members="" undoc-members="" show-inheritance=""} +screenpro.ngs.cas9 +::: + +::: {.automodule members="" undoc-members="" show-inheritance=""} +screenpro.ngs.cas12 +::: diff --git a/docs/source/ngs.rst b/docs/source/ngs.rst deleted file mode 100644 index 148c29a..0000000 --- a/docs/source/ngs.rst +++ /dev/null @@ -1,30 +0,0 @@ -NGS screen processing module -======================= - -`Counter` class is a wrapper to run the functions for a CRISPR screen experiment. - -This module contains a set of python functions to process and analyze NGS files from CRISPR screens. -Based on the type of CRISPR-Cas system used for the screen, the functions are divided into -two classes: `Cas9` and `Cas12`. - ------------------------ - -.. automodule:: screenpro.ngs - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: screenpro.ngs.counter - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: screenpro.ngs.cas9 - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: screenpro.ngs.cas12 - :members: - :undoc-members: - :show-inheritance: From 5bf67efa4bed58d85e4f8814e18b3f012c91ebd5 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 00:51:40 -0700 Subject: [PATCH 29/37] fix import --- screenpro/assays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index 42b6b2d..c292fed 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -15,7 +15,7 @@ from pydeseq2 import preprocessing from .phenoscore.deseq import runDESeq, extractDESeqResults from .phenoscore import runPhenoScore, runPhenoScoreForReplicate -from .ngs.utils import ann_score_df +from .phenoscore.annotate import ann_score_df from copy import copy From 7b0aa65a4be76294560ead09c751a05685826e97 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 01:57:33 -0700 Subject: [PATCH 30/37] fix format https://stackoverflow.com/questions/58945322/how-can-i-use-automodule-or-autoclass-for-sphinx-in-a-markdown-file --- docs/source/ngs.md | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/docs/source/ngs.md b/docs/source/ngs.md index 5e22e9e..37764f6 100644 --- a/docs/source/ngs.md +++ b/docs/source/ngs.md @@ -9,19 +9,8 @@ used for the screen, the functions are divided into two classes: `Cas9` and `Cas12`. ------------------------------------------------------------------------ - -::: {.automodule members="" undoc-members="" show-inheritance=""} -screenpro.ngs -::: - -::: {.automodule members="" undoc-members="" show-inheritance=""} -screenpro.ngs.GuideCounter -::: - -::: {.automodule members="" undoc-members="" show-inheritance=""} -screenpro.ngs.cas9 -::: - -::: {.automodule members="" undoc-members="" show-inheritance=""} -screenpro.ngs.cas12 -::: +```{eval-rst} +.. automodule:: screenpro.ngs + :members: + :show-inheritance: +``` \ No newline at end of file From bc485b4fbb25e7e40f3452b24213e3a55d137835 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 02:26:00 -0700 Subject: [PATCH 31/37] mend --- docs/source/ngs.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/source/ngs.md b/docs/source/ngs.md index 37764f6..df58ec4 100644 --- a/docs/source/ngs.md +++ b/docs/source/ngs.md @@ -13,4 +13,18 @@ used for the screen, the functions are divided into two classes: .. automodule:: screenpro.ngs :members: :show-inheritance: -``` \ No newline at end of file +``` + +### Cas9 CRISPR-Cas system (single or dual sgRNA libraries) +```{eval-rst} +.. automodule:: screenpro.cas9 + :members: + :show-inheritance: +``` + +### Cas12 CRISPR-Cas system (multiplexed crRNA libraries) +```{eval-rst} +.. automodule:: screenpro.cas12 + :members: + :show-inheritance: +``` From 24499ec20609f6998600ea8bfb04bb705a7faa4e Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 02:42:25 -0700 Subject: [PATCH 32/37] mend --- docs/source/assays.md | 8 ++++++++ docs/source/phenotype.md | 18 +++++++++++++----- docs/source/references.md | 20 ++++++++++++++++++++ docs/source/references.rst | 27 --------------------------- 4 files changed, 41 insertions(+), 32 deletions(-) create mode 100644 docs/source/assays.md create mode 100644 docs/source/references.md delete mode 100644 docs/source/references.rst diff --git a/docs/source/assays.md b/docs/source/assays.md new file mode 100644 index 0000000..5524eff --- /dev/null +++ b/docs/source/assays.md @@ -0,0 +1,8 @@ +# CRISPR Screen Assays + +```{eval-rst} +.. automodule:: screenpro.assays + :members: + :undoc-members: + :show-inheritance: +``` \ No newline at end of file diff --git a/docs/source/phenotype.md b/docs/source/phenotype.md index eac5e98..f50aacd 100644 --- a/docs/source/phenotype.md +++ b/docs/source/phenotype.md @@ -48,8 +48,16 @@ module](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_r > This is a test for the null hypothesis that two related or repeated > samples have identical average (expected) values. ------------------------------------------------------------------------- - -::: {.automodule members="" show-inheritance=""} -screenpro.phenoscore -::: +___ + +```{eval-rst} +.. automodule:: screenpro.phenoscore + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: screenpro.phenostats + :members: + :undoc-members: + :show-inheritance: +``` \ No newline at end of file diff --git a/docs/source/references.md b/docs/source/references.md new file mode 100644 index 0000000..63a27cc --- /dev/null +++ b/docs/source/references.md @@ -0,0 +1,20 @@ +# References + +- @Herken2023 *bioRxiv* - Gilbert Lab +- @Hsiung2023 *bioRxiv* - Gilbert Lab +- @Han2020 *Nature* - Bassik Lab +- @Han2017 *Nature Biotechnology* - Bassik Lab +- @Tian2019 *Neuron* - Kampmann Lab +- @Horlbeck2016 *eLife* - Weissman Lab +- @Gilbert2014 *Cell* - Weissman Lab +- @Kampmann2014 *Nature Protocols* - Weissman Lab +- @Kampmann2013 *PNAS* - Weissman Lab +- @Bassik2013 *Cell* - Weissman Lab + +___ + +```{eval-rst} +.. bibliography:: + :all: + +``` diff --git a/docs/source/references.rst b/docs/source/references.rst deleted file mode 100644 index 14fc766..0000000 --- a/docs/source/references.rst +++ /dev/null @@ -1,27 +0,0 @@ -References ----------- -* :cite:t:`Herken2023` *bioRxiv* - Gilbert Lab - -* :cite:t:`Hsiung2023` *bioRxiv* - Gilbert Lab - -* :cite:t:`Han2020` *Nature* - Bassik Lab - -* :cite:t:`Han2017` *Nature Biotechnology* - Bassik Lab - -* :cite:t:`Tian2019` *Neuron* - Kampmann Lab - -* :cite:t:`Horlbeck2016` *eLife* - Weissman Lab - -* :cite:t:`Gilbert2014` *Cell* - Weissman Lab - -* :cite:t:`Kampmann2014` *Nature Protocols* - Weissman Lab - -* :cite:t:`Kampmann2013` *PNAS* - Weissman Lab - -* :cite:t:`Bassik2013` *Cell* - Weissman Lab - - -============= - -.. bibliography:: - :all: From 5bf65e484fb0fb3a3759439840c28995c93fe091 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 03:21:43 -0700 Subject: [PATCH 33/37] clear --- docs/source/assays.rst | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 docs/source/assays.rst diff --git a/docs/source/assays.rst b/docs/source/assays.rst deleted file mode 100644 index 0de12cb..0000000 --- a/docs/source/assays.rst +++ /dev/null @@ -1,7 +0,0 @@ -CRISPR Screen Assays -====================== - -.. automodule:: screenpro.assays - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file From 3ec885eb1d4c2e9e4696fb12fce12f88f79fe983 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 03:38:15 -0700 Subject: [PATCH 34/37] update docs files --- docs/source/history.rst | 11 ++++++++--- docs/source/index.rst | 1 + docs/source/load.md | 13 +++++++++++++ docs/source/load.rst | 14 -------------- docs/source/visualize.md | 12 ++++++++++++ 5 files changed, 34 insertions(+), 17 deletions(-) create mode 100644 docs/source/load.md delete mode 100644 docs/source/load.rst create mode 100644 docs/source/visualize.md diff --git a/docs/source/history.rst b/docs/source/history.rst index a58a937..7277db6 100644 --- a/docs/source/history.rst +++ b/docs/source/history.rst @@ -2,12 +2,17 @@ History ======= -0.3.1 - 0.4.0 (June 2024 - July 2024) +1.0.0 +~~~~~ +* coming soon! + +0.4.0 - after (June 2024 - July 2024) ~~~~~~~~~~~~~~~~~~~ -* add command line interface +* add command line interface, i.e. `screenpro --help` +* rename `Counter` class to `GuideCounter` for code clarity * major bug fixes and improvements in code formatting -0.2.11 - 0.3.0 (Apr 2024 - May 2024) +0.2.11 - 0.3.5 (Apr 2024 - June 2024) ~~~~~~~~~~~~~~~~~ * introduce `Counter` class as wrapper for `ngs` module * improve core functionalities for CLI diff --git a/docs/source/index.rst b/docs/source/index.rst index d188efb..ed19189 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,6 +15,7 @@ Welcome to ScreenPro2's documentation! assays ngs phenotype + visulize load .. toctree:: diff --git a/docs/source/load.md b/docs/source/load.md new file mode 100644 index 0000000..bc73d89 --- /dev/null +++ b/docs/source/load.md @@ -0,0 +1,13 @@ +# Load module + +Set of python functions to read, write, and integrate CRISPR screening +datasets from different platforms. + +___ + +```{eval-rst} +.. automodule:: screenpro.load + :members: + :undoc-members: + :show-inheritance: +``` \ No newline at end of file diff --git a/docs/source/load.rst b/docs/source/load.rst deleted file mode 100644 index acda05e..0000000 --- a/docs/source/load.rst +++ /dev/null @@ -1,14 +0,0 @@ -Load module -======================= - -Set of python functions to read, write, and integrate CRISPR screening datasets from different platforms. -Notably, `loadScreenProcessingData` function can load `ScreenProcessing`_ outputs. - ------------------------ - -.. automodule:: screenpro.load - :members: - :undoc-members: - :show-inheritance: - -.. _`ScreenProcessing`: https://github.com/mhorlbeck/ScreenProcessing diff --git a/docs/source/visualize.md b/docs/source/visualize.md new file mode 100644 index 0000000..c2febe3 --- /dev/null +++ b/docs/source/visualize.md @@ -0,0 +1,12 @@ +# Visualize module + +Set of python classes and functions to enable visualizing CRISPR screening datasets. + +___ + +```{eval-rst} +.. automodule:: screenpro.visualize + :members: + :undoc-members: + :show-inheritance: +``` \ No newline at end of file From cd59d95f35917494240af681568642c9b54e023f Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 17:10:14 -0700 Subject: [PATCH 35/37] update default `fc_transformation` parameter --- screenpro/assays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index c292fed..ece57ce 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -24,7 +24,7 @@ class PooledScreens(object): pooledScreens class for processing CRISPR screen datasets """ - def __init__(self, adata, fc_transformation='log2(x+1)', test='ttest', n_reps=3): + def __init__(self, adata, fc_transformation='log2', test='ttest', n_reps=3): """ Args: adata (AnnData): AnnData object with adata.X as a matrix of sgRNA counts From 52769e8737dd2e952411a5e5ddb0af6b2eff6bfe Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 19:48:09 -0700 Subject: [PATCH 36/37] update docs --- docs/source/load.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/load.md b/docs/source/load.md index bc73d89..569e11f 100644 --- a/docs/source/load.md +++ b/docs/source/load.md @@ -1,7 +1,8 @@ -# Load module +# Data loading module Set of python functions to read, write, and integrate CRISPR screening -datasets from different platforms. +resources (e.g. guide RNA libraries) and datasets from different platforms +and analysis tools. ___ From 6fe2cf266aef28355d53bbaae1df519cbab56938 Mon Sep 17 00:00:00 2001 From: abearab Date: Fri, 12 Jul 2024 19:50:37 -0700 Subject: [PATCH 37/37] bump version 0.4.1 --- screenpro/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 1c2da6f..96f91f5 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -28,6 +28,6 @@ from .ngs import GuideCounter from .assays import PooledScreens, GImaps -__version__ = "0.4.0" +__version__ = "0.4.1" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"