Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scenic + RNA-only GRN inference methods #4

Merged
merged 11 commits into from
Aug 12, 2024
Merged
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ approaches to assess both accuracy and comprehensiveness.
| name | roles |
|:-------------------|:-------|
| Jalil Nourisa | author |
| Robrecht Cannoodt | author |
| Antoine Passemiers | author |
| Robrecht Cannoodt | author |

## API

Expand Down
14 changes: 14 additions & 0 deletions dockerfiles/pidc/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Use the base image
FROM continuumio/anaconda3:2024.02-1

# Install Python packages
RUN pip install anndata

# Install Julia
RUN wget https://julialang-s3.julialang.org/bin/linux/x64/1.9/julia-1.9.3-linux-x86_64.tar.gz
RUN tar -xvzf julia-1.9.3-linux-x86_64.tar.gz
RUN mv julia-1.9.3 /opt/
RUN ln -s /opt/julia-1.9.3/bin/julia /usr/local/bin/julia

# Install Julia packages
RUN julia -e 'using Pkg; Pkg.resolve(); Pkg.add(PackageSpec(name="InformationMeasures", version="0.3.0")); Pkg.add(PackageSpec(name="PyPlot", version="2.8.0")); Pkg.add(PackageSpec(name="LightGraphs", version="1.2.0")); Pkg.add(PackageSpec(name="GraphPlot", version="0.3.1")); Pkg.add(PackageSpec(name="NetworkInference", version="0.1.0"))'
17 changes: 0 additions & 17 deletions dockerfiles/pycistopic/Dockerfile

This file was deleted.

24 changes: 24 additions & 0 deletions dockerfiles/scenicplus/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the base image
FROM janursa/scenicplus:19-08-2024

# Install HTSlib and tabix
RUN git clone https://github.com/samtools/htslib.git
WORKDIR htslib/
RUN autoreconf -i
RUN git submodule update --init --recursive
RUN ./configure
RUN make
RUN make install
WORKDIR ..

# Install Java
RUN apt update
RUN apt install -y openjdk-17-jdk
ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64/

# Re-install scenicplus
RUN git clone https://github.com/aertslab/scenicplus
WORKDIR scenicplus/
RUN git checkout fa55dae55203951c0c42e359b65a28010cd544f6
RUN pip install .
WORKDIR ..
8 changes: 6 additions & 2 deletions scripts/run_evaluation.sh → scripts/run_evaluation_reg2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
grn=""
sample="200" # Default value for sample
reg_type="ridge"
score="out/score.csv"
score="output/score.csv"

# Parse arguments
while [[ "$#" -gt 0 ]]; do
Expand Down Expand Up @@ -42,6 +42,10 @@ bin/regression_1/regression_1 --perturbation_data resources/grn-benchmark/pertur

# Run regression analysis 2
echo "Regression 2"
if [ ! -f resources/grn-benchmark/consensus-num-regulators.json ]; then
viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus
bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv
fi
mkdir -p bin/regression_2
viash build src/metrics/regression_2/config.vsh.yaml -p docker -o bin/regression_2
bin/regression_2/regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --reg_type $reg_type --prediction $grn --score $score
bin/regression_2/regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --consensus resources/grn-benchmark/consensus-num-regulators.json --layer scgen_pearson --reg_type $reg_type --prediction $grn --score $score
58 changes: 58 additions & 0 deletions src/methods/ennet/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
functionality:
name: ennet
info:
label: ennet
summary: "GRN inference using ENNET"
description: |
GRN inference using ENNET.
documentation_url: https://doi.org/10.1186/1752-0509-7-106
arguments:
- name: --multiomics_rna
type: file
example: resources/grn-benchmark/multiomics_rna.h5ad
info:
label: multiomics_rna
summary: "Multiomics RNA data"
file_type: rds
columns:
- name: dummpy
type: string
required: false
required: true
must_exist: true
- name: --prediction
__merge__: ../../api/file_prediction.yaml
required: true
direction: output
- name: --max_n_links
type: integer
default: 50000
- name: --temp_dir
type: file
direction: output
default: 'output/ennet'
- name: --num_workers
type: integer
direction: input
default: 4
resources:
- type: r_script
path: script.R

platforms:
- type: docker
image: janursa/figr:19-08-2024
setup:
- type: r
packages: [ foreach, plyr, anndata, dplyr ]
- type: docker
run: |
wget http://github.com/slawekj/ennet/archive/master.tar.gz -O ennet.tar.gz && \
tar -xvzf ennet.tar.gz && \
cd ennet-master && \
R CMD build ennet && \
R CMD INSTALL ennet
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
38 changes: 38 additions & 0 deletions src/methods/ennet/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
library(ennet)
library(anndata)
library(dplyr)

## VIASH START
par <- list(
"multiomics_rna" = 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad',
"prediction" = 'output/ennet/prediction.csv',
"temp_dir": 'output/ennet',
"max_n_links": 50000
)
## VIASH END

# input expression data
ad <- anndata::read_h5ad(par$multiomics_rna)
X <- ad$X

# Run GRN inference method
K <- matrix(0,nrow(X),ncol(X))
Tf <- 1:ncol(X)
grn = ennet(E = X, K = K, Tf = Tf)

# Re-format output
df <- as.data.frame(as.table(grn))
colnames(df) <- c("source", "target", "weight")
df <- df[df$weight != 0,]
df <- df[order(-df$weight),]

# Add index as extra column
df <- cbind(index = 1:nrow(df), df)

# Keep top links
df <- head(df, par$max_n_links)

# Save results
write.table(df, par$prediction, sep = ",", quote = FALSE, row.names = FALSE)

print("Finished.")
2 changes: 2 additions & 0 deletions src/methods/ennet/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
viash build src/methods/ennet/config.vsh.yaml -p docker -o bin/ennet && bin/ennet/ennet --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/ennet/prediction.csv
#viash run src/methods/ennet/config.vsh.yaml -p docker -- ---setup build && bin/ennet/ennet --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/ennet/prediction.csv
54 changes: 54 additions & 0 deletions src/methods/genie3/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
functionality:
name: genie3
info:
label: genie3
summary: "GRN inference using GENIE3"
description: |
GRN inference using GENIE3.
documentation_url: https://www.bioconductor.org/packages/release/bioc/html/GENIE3.html
arguments:
- name: --multiomics_rna
type: file
example: resources/grn-benchmark/multiomics_rna.h5ad
info:
label: multiomics_rna
summary: "Multiomics RNA data"
file_type: rds
columns:
- name: dummpy
type: string
required: false
required: true
must_exist: true
- name: --prediction
__merge__: ../../api/file_prediction.yaml
required: true
direction: output
- name: --max_n_links
type: integer
default: 50000
- name: --temp_dir
type: file
direction: output
default: 'output/genie3'
- name: --num_workers
type: integer
direction: input
default: 4
resources:
- type: python_script
path: script.py

platforms:
- type: docker
image: continuumio/anaconda3:2024.02-1
setup:
- type: docker
run: |
conda install -y -c bioconda arboreto pandas
- type: python
packages: [ anndata ]
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
38 changes: 38 additions & 0 deletions src/methods/genie3/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

import anndata
import numpy as np
import pandas as pd
from arboreto.algo import genie3
from distributed import Client


## VIASH START
par = {
'multiomics_rna': 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad',
'prediction': 'output/genie3/prediction.csv',
'max_n_links': 50000
}
## VIASH END


# Load scRNA-seq data
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X

# GRN inference
client = Client(processes=False)
network = genie3(X, client_or_address=client, gene_names=gene_names)

# Keep only top links
network = network.head(par['max_n_links'])

# Rename columns and index
network.rename(columns={'TF': 'source', 'target': 'target', 'importance': 'weight'}, inplace=True)
network.reset_index(drop=True, inplace=True)

# Save inferred GRN
network.to_csv(par['prediction'], sep=',')

print('Finished.')
2 changes: 2 additions & 0 deletions src/methods/genie3/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
viash build src/methods/genie3/config.vsh.yaml -p docker -o bin/genie3 && bin/genie3/genie3 --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/genie3/prediction.csv
#viash run src/methods/genie3/config.vsh.yaml -p docker -- ---setup build && bin/genie3/genie3 --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/genie3/prediction.csv
54 changes: 54 additions & 0 deletions src/methods/grnboost2/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
functionality:
name: grnboost2
info:
label: grnboost2
summary: "GRN inference using GRNBoost2"
description: |
GRN inference using GRNBoost2.
documentation_url: https://arboreto.readthedocs.io/en/latest/algorithms.html#grnboost2
arguments:
- name: --multiomics_rna
type: file
example: resources/grn-benchmark/multiomics_rna.h5ad
info:
label: multiomics_rna
summary: "Multiomics RNA data"
file_type: rds
columns:
- name: dummpy
type: string
required: false
required: true
must_exist: true
- name: --prediction
__merge__: ../../api/file_prediction.yaml
required: true
direction: output
- name: --max_n_links
type: integer
default: 50000
- name: --temp_dir
type: file
direction: output
default: 'output/grnboost2'
- name: --num_workers
type: integer
direction: input
default: 4
resources:
- type: python_script
path: script.py

platforms:
- type: docker
image: continuumio/anaconda3:2024.02-1
setup:
- type: docker
run: |
conda install -y -c bioconda arboreto pandas
- type: python
packages: [ anndata ]
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
38 changes: 38 additions & 0 deletions src/methods/grnboost2/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

import anndata
import numpy as np
import pandas as pd
from arboreto.algo import grnboost2
from distributed import Client


## VIASH START
par = {
'multiomics_rna': 'resources/resources_test/grn-benchmark/multiomics_rna.h5ad',
'prediction': 'output/grnboost2/prediction.csv',
'max_n_links': 50000
}
## VIASH END


# Load scRNA-seq data
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X

# GRN inference
client = Client(processes=False)
network = grnboost2(X, client_or_address=client, gene_names=gene_names)

# Keep only top links
network = network.head(par['max_n_links'])

# Rename columns and index
network.rename(columns={'TF': 'source', 'target': 'target', 'importance': 'weight'}, inplace=True)
network.reset_index(drop=True, inplace=True)

# Save inferred GRN
network.to_csv(par['prediction'], sep=',')

print('Finished.')
1 change: 1 addition & 0 deletions src/methods/grnboost2/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
viash build src/methods/grnboost2/config.vsh.yaml -p docker -o bin/grnboost2 && bin/grnboost2/grnboost2 --multiomics_rna resources/resources_test/grn-benchmark/multiomics_rna.h5ad --prediction output/grnboost2/prediction.csv
Loading