diff --git a/Dockerfile b/Dockerfile
index 460d08c..832f558 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,29 +1,44 @@
-FROM python:3.11-slim
+# Stage 1: Build environment
+FROM python:3.11-slim AS build-stage
-RUN pip install poetry
+# Install build tools and Poetry
+RUN apt-get update && apt-get install -y build-essential \
+ && pip install poetry
-# Set the working directory
WORKDIR /app
-# Install build tools for Snakemake (gcc, make, etc.)
-RUN apt-get update && apt-get install -y build-essential
+# Copy dependency files and install dependencies
+COPY pyproject.toml poetry.lock /app/
+RUN poetry config virtualenvs.create false \
+ && poetry install --no-interaction --no-ansi
-# Copy the pyproject.toml file
-COPY pyproject.toml /app/
+# Copy and install the application
+COPY . /app
+RUN poetry install
-# Install the dependencies
-RUN poetry install --no-root
+# Stage 2: Snakemake runtime environment
+FROM snakemake/snakemake:latest
-# Copy the rest of the application files
-COPY . /app
+# Install Poetry
+RUN pip install poetry
-# Install the package
-RUN poetry install
+WORKDIR /app
+
+# Copy the application from the build stage
+COPY --from=build-stage /app /app
+
+# Install dependencies
+RUN pip install -r <(poetry export --format requirements.txt --without-hashes) \
+ && pip install -e .
+
+# Set up non-root user
+RUN groupadd -r snakemake && useradd -r -g snakemake snakemake \
+ && chown -R snakemake:snakemake /app
-# Install Snakemake using Poetry
-RUN poetry add snakemake
+USER snakemake
-# Set the entry point for the container
-ENTRYPOINT ["poetry", "run"]
+# Configure Python path
+ENV PYTHONPATH="/app:${PYTHONPATH}"
-CMD ["help"]
+# Set the entry point
+ENTRYPOINT ["snakemake"]
\ No newline at end of file
diff --git a/Snakefile b/Snakefile
index bdf07b4..4089745 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,20 +1,13 @@
-# TODO - Refactor to input args to the Snakemake file
-WORKFLOW_IDS = range(1,11)
+from snakemake.io import directory
+
VERSIONS = ['1']
OUTPUT_DIRS = "data"
MERGED_FILE = "merged.ttl"
-
-
-def list_expected_files():
- files = []
- for wf_id in WORKFLOW_IDS:
- for ver in VERSIONS:
- files.append(f"{OUTPUT_DIRS}/{wf_id}_{ver}_ro-crate-metadata.json")
- return files
+ro_crate_metadata_dir = "ro-crate-metadata/"
rule all:
input:
- MERGED_FILE
+ "ro-crate-metadata"
rule source_ro_crates:
output:
@@ -23,12 +16,20 @@ rule source_ro_crates:
"""
# Create the output directory if it doesn't exist:
mkdir -p {OUTPUT_DIRS}
+
+ # Add the current directory to PYTHONPATH, creating it if it doesn't exist
+ export PYTHONPATH="${{PYTHONPATH:+$PYTHONPATH:}}$(pwd)"
- # Run the source_crates script to download the RO Crate metadata:
- python workflowhub_graph/source_crates.py --workflow-ids 1-10 --prod --all-versions
-
- # After sourcing, check which files were actually created:
- python workflowhub_graph/check_outputs.py --workflow-ids 1-10 --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
+ # Run the source_crates script to download the RO Crate metadata,
+ # then check the output files and generate created_files.json:
+
+ # - all versions of all workflows:
+ # python workflowhub_graph/source_crates.py --prod --all-versions
+ # python workflowhub_graph/check_outputs.py --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
+
+ # - all versions of first 10 workflows:
+ python workflowhub_graph/source_crates.py --workflow-ids 1-20 --prod --all-versions
+ python workflowhub_graph/check_outputs.py --workflow-ids 1-20 --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
"""
rule report_created_files:
@@ -65,3 +66,36 @@ rule merge_files:
shell(f"""
python workflowhub_graph/merge.py {output[0]} -p "data/*.json"
""")
+
+rule create_ro_crate:
+ input:
+ MERGED_FILE
+ params:
+ workflow_file = "Snakefile"
+ output:
+ directory("ro-crate-metadata/")
+ shell:
+ """
+ # Create a new virtual environment
+ python -m venv rocrate_env
+
+ # Activate the virtual environment
+ source rocrate_env/bin/activate
+
+ # Upgrade pip to avoid any potential issues
+ pip install --upgrade pip
+
+ # pip uninstall urllib3
+
+ # Install required packages
+ pip install requests urllib3 rocrate rocrate-zenodo
+
+ # Run the create_ro_crate script
+ python workflowhub_graph/create_ro_crate.py {input} {params.workflow_file} {output}
+
+ # Deactivate the virtual environment
+ deactivate
+
+ # Remove the virtual environment to clean up
+ rm -rf rocrate_env
+ """
diff --git a/merged.ttl b/merged.ttl
index 7e5b935..c040502 100644
--- a/merged.ttl
+++ b/merged.ttl
@@ -1,868 +1,76848 @@
@prefix dct: .
@prefix ns1: .
+@prefix ns2: .
+@prefix rel: .
@prefix schema1: .
@prefix xsd: .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
+ a schema1:CreativeWork ;
dct:conformsTo ;
- schema1:about .
+ schema1:about .
- a schema1:CreativeWork ;
- schema1:about .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:CreativeWork ;
- dct:conformsTo ;
- schema1:about .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/MaxMTpc" .
- a schema1:CreativeWork ;
- schema1:about .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/MinCountPerCell" .
- a schema1:CreativeWork ;
- dct:conformsTo ;
- schema1:about .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/MinGenesPerCell" .
- a schema1:CreativeWork ;
- schema1:about .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/genecount_qc_plot" .
- a schema1:Person ;
- schema1:name "Dannon Baker" .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/mito_qc_plot" .
- a schema1:Person ;
- schema1:name "Björn Grüning" .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/qc_anndata_object" .
- a schema1:Person ;
- schema1:name "Delphine Larivière" .
+ a ;
+ dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
+ schema1:name "#main/top_genes_plot" .
- a schema1:Person ;
- schema1:name "Gildas Le Corguillé" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Andrew Lonie" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Nicholas Keener" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Sergei Kosakovsky Pond" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Wolfgang Maier" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ,
+ ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Anton Nekrutenko" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "James Taylor" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Steven Weaver" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Marius van den Beek" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dave Bouvier" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "John Chilton" .
+ a schema1:Person ;
+ schema1:name "Anna Syme" .
- a schema1:Person ;
- schema1:name "Nate Coraor" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ,
+ ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Frederik Coppens" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Bert Droesbeke" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Ignacio Eguinoa" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Simon Gladman" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:ComputerLanguage ;
- schema1:alternateName "CWL" ;
- schema1:identifier ;
- schema1:name "Common Workflow Language" ;
- schema1:url .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:ComputerLanguage ;
- schema1:identifier ;
- schema1:name "Galaxy" ;
- schema1:url .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dannon Baker" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Björn Grüning" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Delphine Larivière" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Gildas Le Corguillé" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Andrew Lonie" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Nicholas Keener" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Sergei Kosakovsky Pond" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Wolfgang Maier" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Anton Nekrutenko" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "James Taylor" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Steven Weaver" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Marius van den Beek" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dave Bouvier" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "John Chilton" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Nate Coraor" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Frederik Coppens" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Bert Droesbeke" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Ignacio Eguinoa" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Simon Gladman" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:ComputerLanguage ;
- schema1:alternateName "CWL" ;
- schema1:identifier ;
- schema1:name "Common Workflow Language" ;
- schema1:url .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:ComputerLanguage ;
- schema1:identifier ;
- schema1:name "Galaxy" ;
- schema1:url .
+ a schema1:CreativeWork ;
+ schema1:about .
- a ;
- dct:conformsTo "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE/" ;
- schema1:name "NC_045512" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dannon Baker" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Björn Grüning" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Delphine Larivière" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Gildas Le Corguillé" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Andrew Lonie" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Nicholas Keener" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Sergei Kosakovsky Pond" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Wolfgang Maier" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Anton Nekrutenko" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "James Taylor" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Steven Weaver" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Marius van den Beek" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dave Bouvier" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "John Chilton" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Nate Coraor" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Frederik Coppens" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Bert Droesbeke" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Ignacio Eguinoa" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Simon Gladman" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:ComputerLanguage ;
- schema1:alternateName "CWL" ;
- schema1:identifier ;
- schema1:name "Common Workflow Language" ;
- schema1:url .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:ComputerLanguage ;
- schema1:identifier ;
- schema1:name "Galaxy" ;
- schema1:url .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dannon Baker" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Björn Grüning" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Delphine Larivière" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Gildas Le Corguillé" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Andrew Lonie" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ,
+ ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Nicholas Keener" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Sergei Kosakovsky Pond" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Wolfgang Maier" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Anton Nekrutenko" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "James Taylor" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Steven Weaver" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ,
+ ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Marius van den Beek" .
+ a schema1:CreativeWork ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "Dave Bouvier" .
+ a schema1:CreativeWork ;
+ dct:conformsTo ;
+ schema1:about .
- a schema1:Person ;
- schema1:name "John Chilton" .
+ a schema1:CreativeWork ;
+ schema1:about