generated from openproblems-bio/task_template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
25 changed files
with
1,323 additions
and
125 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
__merge__: /src/api/comp_method.yaml | ||
name: bbknn | ||
label: BBKNN | ||
summary: BBKNN creates k nearest neighbours graph by identifying neighbours within | ||
batches, then combining and processing them with UMAP for visualization. | ||
description: | | ||
"BBKNN or batch balanced k nearest neighbours graph is built for each cell by | ||
identifying its k nearest neighbours within each defined batch separately, | ||
creating independent neighbour sets for each cell in each batch. These sets | ||
are then combined and processed with the UMAP algorithm for visualisation." | ||
references: | ||
doi: 10.1093/bioinformatics/btz625 | ||
links: | ||
repository: https://github.com/Teichlab/bbknn | ||
documentation: https://github.com/Teichlab/bbknn#readme | ||
info: | ||
method_types: [graph] | ||
preferred_normalization: log_cp10k | ||
variants: | ||
bbknn_full_unscaled: | ||
bbknn_full_scaled: | ||
preferred_normalization: log_cp10k_scaled | ||
arguments: | ||
- name: --annoy_n_trees | ||
type: integer | ||
default: 10 | ||
description: Number of trees to use in the annoy forrest. | ||
- name: --neighbors_within_batch | ||
type: integer | ||
default: 3 | ||
description: Number of neighbors to report within each batch. | ||
- name: --n_hvg | ||
type: integer | ||
default: 2000 | ||
description: Number of highly variable genes to use. | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- type: python_script | ||
path: /src/utils/read_anndata_partial.py | ||
engines: | ||
- type: docker | ||
image: openproblems/base_python:1.0.0 | ||
setup: | ||
- type: python | ||
pypi: | ||
- bbknn | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [midtime, midmem, lowcpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import sys | ||
import anndata as ad | ||
import scanpy as sc | ||
import bbknn | ||
|
||
## VIASH START | ||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
'annoy_n_trees': 10, | ||
'neighbors_within_batch': 3, | ||
'n_hvg': 2000, | ||
} | ||
meta = { | ||
'name': 'foo', | ||
'config': 'bar' | ||
} | ||
## VIASH END | ||
|
||
sys.path.append(meta["resources_dir"]) | ||
from read_anndata_partial import read_anndata | ||
|
||
|
||
print('Read input', flush=True) | ||
adata = read_anndata( | ||
par['input'], | ||
X='layers/normalized', | ||
obs='obs', | ||
var='var', | ||
uns='uns' | ||
) | ||
|
||
if par['n_hvg']: | ||
print(f"Select top {par['n_hvg']} high variable genes", flush=True) | ||
idx = adata.var['hvg_score'].to_numpy().argsort()[::-1][:par['n_hvg']] | ||
adata = adata[:, idx].copy() | ||
sc.pp.pca(adata) | ||
|
||
print('Run BBKNN', flush=True) | ||
kwargs = dict(batch_key='batch', copy=True) | ||
kwargs['annoy_n_trees'] = par['annoy_n_trees'] | ||
kwargs['neighbors_within_batch'] = par['neighbors_within_batch'] | ||
|
||
ad_bbknn = bbknn.bbknn(adata, **kwargs) | ||
|
||
print("Store output", flush=True) | ||
output = ad.AnnData( | ||
obs=adata.obs[[]], | ||
var=adata.var[[]], | ||
obsp={ | ||
'connectivities': ad_bbknn.obsp['connectivities'], | ||
'distances': ad_bbknn.obsp['distances'], | ||
}, | ||
uns={ | ||
'dataset_id': adata.uns['dataset_id'], | ||
'normalization_id': adata.uns['normalization_id'], | ||
'method_id': meta['name'], | ||
'neighbors': ad_bbknn.uns['neighbors'] | ||
} | ||
) | ||
|
||
print("Store outputs", flush=True) | ||
output.write_h5ad(par['output'], compression='gzip') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
__merge__: /src/api/comp_method.yaml | ||
name: combat | ||
label: Combat | ||
summary: Adjusting batch effects in microarray expression data using empirical Bayes | ||
methods | ||
description: | | ||
"An Empirical Bayes (EB) approach to correct for batch effects. It | ||
estimates batch-specific parameters by pooling information across genes in | ||
each batch and shrinks the estimates towards the overall mean of the batch | ||
effect estimates across all genes. These parameters are then used to adjust | ||
the data for batch effects, leading to more accurate and reproducible | ||
results." | ||
references: | ||
doi: 10.1093/biostatistics/kxj037 | ||
links: | ||
repository: https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html | ||
documentation: https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html | ||
info: | ||
method_types: [feature] | ||
preferred_normalization: log_cp10k | ||
variants: | ||
combat_full_unscaled: | ||
combat_full_scaled: | ||
preferred_normalization: log_cp10k_scaled | ||
arguments: | ||
- name: --n_hvg | ||
type: integer | ||
default: 2000 | ||
description: Number of highly variable genes to use. | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- type: python_script | ||
path: /src/utils/read_anndata_partial.py | ||
engines: | ||
- type: docker | ||
image: openproblems/base_python:1.0.0 | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [midtime, highmem, lowcpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import sys | ||
import scanpy as sc | ||
from scipy.sparse import csr_matrix | ||
|
||
## VIASH START | ||
par = { | ||
'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
'output': 'output.h5ad', | ||
'n_hvg': 2000, | ||
} | ||
|
||
meta = { | ||
'name': 'foo', | ||
'config': 'bar' | ||
} | ||
|
||
## VIASH END | ||
|
||
sys.path.append(meta["resources_dir"]) | ||
from read_anndata_partial import read_anndata | ||
|
||
print('Read input', flush=True) | ||
adata = read_anndata( | ||
par['input'], | ||
X='layers/normalized', | ||
obs='obs', | ||
var='var', | ||
uns='uns' | ||
) | ||
|
||
if par['n_hvg']: | ||
print(f"Select top {par['n_hvg']} high variable genes", flush=True) | ||
idx = adata.var['hvg_score'].to_numpy().argsort()[::-1][:par['n_hvg']] | ||
adata = adata[:, idx].copy() | ||
|
||
|
||
print('Run Combat', flush=True) | ||
adata.X = sc.pp.combat(adata, key='batch', inplace=False) | ||
|
||
|
||
print("Store output", flush=True) | ||
output = sc.AnnData( | ||
obs=adata.obs[[]], | ||
var=adata.var[[]], | ||
uns={ | ||
'dataset_id': adata.uns['dataset_id'], | ||
'normalization_id': adata.uns['normalization_id'], | ||
'method_id': meta['name'], | ||
}, | ||
layers={ | ||
'corrected_counts': csr_matrix(adata.X), | ||
} | ||
) | ||
|
||
print("Store outputs", flush=True) | ||
output.write_h5ad(par['output'], compression='gzip') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
__merge__: /src/api/comp_method.yaml | ||
name: fastmnn | ||
label: fastMnn | ||
summary: A simpler version of the original mnnCorrect algorithm. | ||
description: | | ||
The fastMNN() approach is much simpler than the original mnnCorrect() algorithm, and proceeds in several steps. | ||
1. Perform a multi-sample PCA on the (cosine-)normalized expression values to reduce dimensionality. | ||
2. Identify MNN pairs in the low-dimensional space between a reference batch and a target batch. | ||
3. Remove variation along the average batch vector in both reference and target batches. | ||
4. Correct the cells in the target batch towards the reference, using locally weighted correction vectors. | ||
5. Merge the corrected target batch with the reference, and repeat with the next target batch. | ||
references: | ||
doi: 10.1038/nbt.4091 | ||
links: | ||
repository: https://code.bioconductor.org/browse/batchelor/ | ||
documentation: https://bioconductor.org/packages/batchelor/ | ||
info: | ||
method_types: [feature, embedding] | ||
preferred_normalization: log_cp10k | ||
resources: | ||
- type: r_script | ||
path: script.R | ||
engines: | ||
- type: docker | ||
image: openproblems/base_r:1.0.0 | ||
setup: | ||
- type: r | ||
bioc: batchelor | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [midtime, lowcpu, highmem] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
cat("Loading dependencies\n") | ||
suppressPackageStartupMessages({ | ||
requireNamespace("anndata", quietly = TRUE) | ||
library(Matrix, warn.conflicts = FALSE) | ||
requireNamespace("batchelor", quietly = TRUE) | ||
library(SingleCellExperiment, warn.conflicts = FALSE) | ||
}) | ||
|
||
## VIASH START | ||
par <- list( | ||
input = 'resources_test/batch_integration/pancreas/unintegrated.h5ad', | ||
output = 'output.h5ad' | ||
) | ||
meta <- list( | ||
name = "mnn_correct_feature" | ||
) | ||
## VIASH END | ||
|
||
cat("Read input\n") | ||
adata <- anndata::read_h5ad(par$input) | ||
|
||
# TODO: pass output of 'multiBatchNorm' to fastMNN | ||
|
||
cat("Run mnn\n") | ||
out <- suppressWarnings(batchelor::fastMNN( | ||
t(adata$layers[["normalized"]]), | ||
batch = adata$obs[["batch"]] | ||
)) | ||
|
||
layer <- as(SummarizedExperiment::assay(out, "reconstructed"), "sparseMatrix") | ||
obsm <- SingleCellExperiment::reducedDim(out, "corrected") | ||
|
||
cat("Reformat output\n") | ||
output <- anndata::AnnData( | ||
layers = list( | ||
corrected_counts = t(layer) | ||
), | ||
obsm = list( | ||
X_emb = obsm | ||
), | ||
shape = adata$shape, | ||
uns = list( | ||
dataset_id = adata$uns[["dataset_id"]], | ||
normalization_id = adata$uns[["normalization_id"]], | ||
method_id = meta$name | ||
) | ||
) | ||
|
||
cat("Write output to file\n") | ||
zzz <- output$write_h5ad(par$output, compression = "gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
__merge__: /src/api/comp_method.yaml | ||
name: liger | ||
label: LIGER | ||
summary: Linked Inference of Genomic Experimental Relationships | ||
description: | | ||
LIGER or linked inference of genomic experimental relationships uses iNMF | ||
deriving and implementing a novel coordinate descent algorithm to efficiently | ||
do the factorization. Joint clustering is performed and factor loadings are | ||
normalised. | ||
references: | ||
doi: 10.1016/j.cell.2019.05.006 | ||
links: | ||
repository: https://github.com/welch-lab/liger | ||
documentation: https://github.com/welch-lab/liger | ||
info: | ||
method_types: [embedding] | ||
preferred_normalization: log_cp10k | ||
resources: | ||
- type: r_script | ||
path: script.R | ||
engines: | ||
- type: docker | ||
image: openproblems/base_r:1.0.0 | ||
setup: | ||
- type: apt | ||
packages: cmake | ||
- type: r | ||
cran: rliger | ||
github: welch-lab/RcppPlanc | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [lowcpu, highmem, midtime] |
Oops, something went wrong.