diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a023889596..9f981cca443 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -119,6 +119,8 @@
 
 * `metadata/copy_obs` component: Added a component to copy an .obs column from a MuData object to another (PR #874).
 
+* `workflow/annotation/scanorama_knn` workflow: Cell-type annotation based on scanorama integration with KNN label transfer (PR #884). 
+
 ## MINOR CHANGES
 
 * `resources_test_scripts/cellranger_atac_tiny_bcl.sh` script: generate counts from fastq files using CellRanger atac count (PR #726).
diff --git a/src/workflows/annotation/scanorama_knn/config.vsh.yaml b/src/workflows/annotation/scanorama_knn/config.vsh.yaml
new file mode 100644
index 00000000000..82fa61b0df4
--- /dev/null
+++ b/src/workflows/annotation/scanorama_knn/config.vsh.yaml
@@ -0,0 +1,165 @@
+name: "scanorama_knn"
+namespace: "workflows/annotation"
+description: "Cell type annotation workflow by performing scanorama integration of reference and query dataset followed by KNN label transfer."
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
+  - __merge__: /src/authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Query Input
+    arguments:
+      - name: "--id"
+        required: true
+        type: string
+        description: ID of the sample.
+        example: foo
+      - name: "--input"
+        required: true
+        type: file
+        description: Input dataset consisting of the (unlabeled) query observations. The dataset is expected to be pre-processed in the same way as --reference.
+        example: input.h5mu
+      - name: "--modality"
+        description: Which modality to process. Should match the modality of the --reference dataset.
+        type: string
+        default: "rna"
+        required: false
+      - name: "--input_obsm_embedding"
+        example: "X_pca"
+        type: string
+        description: Embedding .obsm column to use as input for integration. Should match the embedding .obsm columng of the --reference dataset.
+      - name: "--input_obs_batch_label"
+        type: string
+        description: "The .obs field in the input (query) dataset containing the batch labels."
+        example: "sample"
+        required: true
+
+  - name: Reference input
+    arguments:
+      - name: "--reference"
+        required: true
+        type: file
+        description: Reference dataset consisting of the labeled observations to train the KNN classifier on. The dataset is expected to be pre-processed in the same way as the --input query dataset.
+        example: reference.h5mu
+      - name: "--reference_obs_targets"
+        type: string
+        example: [ ann_level_1, ann_level_2, ann_level_3, ann_level_4, ann_level_5, ann_finest_level ]
+        required: true
+        multiple: true
+        description: The `.obs` key(s) of the target labels to transfer.
+      - name: "--reference_obs_batch_label"
+        type: string
+        description:  "The .obs field in the reference dataset containing the batch labels."
+        example: "sample"
+        required: true
+
+  - name: Scanorama integration options
+    arguments:
+      - name: "--knn"
+        type: integer
+        description: "Number of nearest neighbors to use for matching during scanorama integration."
+        default: 20
+      - name: "--batch_size"
+        type: integer
+        description: "The batch size used in the alignment vector computation. Useful when integrating very large (>100k samples) datasets. Set to large value that runs within available memory."
+        default: 5000
+      - name: "--sigma"
+        type: double
+        description: "Correction smoothing parameter on Gaussian kernel."
+        default: 15
+      - name: "--approx"
+        type: boolean
+        description: "Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime."
+        default: True
+      - name: "--alpha"
+        type: double
+        description: "Alignment score minimum cutoff"
+        default: 0.1
+
+  - name: Leiden clustering options
+    arguments:
+      - name: "--leiden_resolution"
+        type: double
+        description: Control the coarseness of the clustering. Higher values lead to more clusters.
+        min: 0
+        default: [1]
+        multiple: true
+
+  - name: Neighbor classifier arguments
+    arguments:
+      - name: "--weights"
+        type: string
+        default: "uniform"
+        choices: ["uniform", "distance"]
+        description: |
+          Weight function used in prediction. Possible values are:
+          `uniform` (all points in each neighborhood are weighted equally) or 
+          `distance` (weight points by the inverse of their distance)
+      - name: "--n_neighbors"
+        type: integer
+        default: 15
+        required: false
+        description: |
+          The number of neighbors to use in k-neighbor graph structure used for fast approximate nearest neighbor search with PyNNDescent. 
+          Larger values will result in more accurate search results at the cost of computation time.
+
+  - name: "Outputs"
+    arguments:
+      - name: "--output"
+        type: file
+        required: true
+        direction: output
+        description: The query data in .h5mu format with predicted labels predicted from the classifier trained on the reference.
+        example: output.h5mu
+      - name: "--output_obs_predictions"
+        type: string
+        required: false
+        multiple: true
+        description: |
+          In which `.obs` slots to store the predicted cell labels.
+          If provided, must have the same length as `--reference_obs_targets`.
+          If empty, will default to the `reference_obs_targets` combined with the `"_pred"` suffix.
+      - name: "--output_obs_probability"
+        type: string
+        required: false
+        multiple: true
+        description: |
+          In which `.obs` slots to store the probability of the predictions.
+          If provided, must have the same length as `--reference_obs_targets`.
+          If empty, will default to the `reference_obs_targets` combined with the `"_probability"` suffix.
+      - name: "--output_obsm_integrated"
+        type: string
+        default: "X_integrated_scanorama"
+        required: false
+        description: "In which .obsm slot to store the integrated embedding."
+      - name: "--output_compression"
+        type: string
+        description: |
+          The compression format to be used on the output h5mu object.
+        choices: ["gzip", "lzf"]
+        required: false
+        example: "gzip"
+        
+dependencies:
+  - name: workflows/integration/scanorama_leiden
+    alias: scanorama_leiden_workflow
+  - name: labels_transfer/pynndescent_knn
+  - name: dataflow/split_h5mu
+  - name: dataflow/concatenate_h5mu
+  - name: metadata/add_id
+  - name: metadata/copy_obs
+
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+
+test_resources:
+  - type: nextflow_script
+    path: test.nf
+    entrypoint: test_wf
+  - path: /resources_test/scgpt
+
+runners:
+  - type: nextflow
diff --git a/src/workflows/annotation/scanorama_knn/integration_test.sh b/src/workflows/annotation/scanorama_knn/integration_test.sh
new file mode 100755
index 00000000000..ce567527124
--- /dev/null
+++ b/src/workflows/annotation/scanorama_knn/integration_test.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+export NXF_VER=21.10.6
+
+viash ns build -q scanorama_knn
+
+nextflow \
+  run . \
+  -main-script src/workflows/annotation/scanorama_knn/test.nf \
+  -entry test_wf \
+  -resume \
+  -profile no_publish \
+  -c src/workflows/utils/labels_ci.config \
+  -c src/workflows/utils/integration_tests.config \
+  -with-trace work/trace.txt
diff --git a/src/workflows/annotation/scanorama_knn/main.nf b/src/workflows/annotation/scanorama_knn/main.nf
new file mode 100644
index 00000000000..03330da8a38
--- /dev/null
+++ b/src/workflows/annotation/scanorama_knn/main.nf
@@ -0,0 +1,164 @@
+workflow run_wf {
+  take:
+    input_ch
+
+  main:
+    
+    
+    output_ch = input_ch
+        // Set aside the output for this workflow to avoid conflicts
+        | map {id, state -> 
+        def new_state = state + ["workflow_output": state.output]
+        [id, new_state]
+        }
+        // add id as _meta join id to be able to merge with source channel and end of workflow
+        | map{ id, state -> 
+        def new_state = state + ["_meta": ["join_id": id]]
+        [id, new_state]
+        }
+        | view {"After adding join_id: $it"}
+        // Add 'query' id to .obs columns of query dataset
+        | add_id.run(
+            fromState: [
+                "input": "input",
+            ],
+            args:[
+                "input_id": "query",
+                "obs_output": "dataset",
+            ],
+            toState: ["input": "output"])
+        // Add 'reference'id to .obs columns of reference dataset
+        | add_id.run(
+                fromState:[
+                    "input": "reference",
+                ],
+                args:[
+                    "input_id": "reference",
+                    "obs_output": "dataset"
+                ],
+                toState: ["reference": "output"])
+        // Make sure that query and reference dataset have batch information in the same .obs column
+        // By copying the respective .obs columns to the obs column "batch_label"
+        | copy_obs.run(
+            fromState: [
+                "input": "input",
+                "modality": "modality",
+                "input_obs_key": "input_obs_batch_label",
+            ],
+            args: [
+                "output_obs_key": "batch_label"
+            ],
+            toState: [
+                "input": "output"
+            ]
+        )
+        | copy_obs.run(
+            fromState: [
+                "input": "reference",
+                "modality": "modality",
+                "input_obs_key": "reference_obs_batch_label",
+            ],
+            args: [
+                "output_obs_key": "batch_label"
+            ],
+            toState: [
+                "reference": "output"
+            ]
+        )
+        // Concatenate query and reference datasets prior to integration
+        | concatenate_h5mu.run(
+            fromState: { id, state -> [
+                "input": [state.input, state.reference]
+                ]
+            },
+            args: [
+                "input_id": ["query", "reference"],
+                "other_axis_mode": "move"
+            ],
+            toState: ["input": "output"]
+            )
+        | view {"After concatenation: $it"}
+        // Run scanorama integration with leiden clustering
+        | scanorama_leiden_workflow.run(
+            fromState: { id, state ->
+            [
+                "id": id,
+                "input": state.input,
+                // "layer": state.layer, 
+                "modality": state.modality,
+                "obsm_input": state.input_obsm_embedding, //
+                "obsm_output": state.output_obsm_integrated,
+                "leiden_resolution": state.leiden_resolution,
+                "knn": state.knn,
+                "batch_size": state.batch_size,
+                "sigma": state.sigma,
+                "approx": state.approx,
+                "alpha": state.alpha
+            ]},
+            args: [
+                "uns_neighbors": "scanorama_integration_neighbors",
+                "obsp_neighbor_distances": "scanorama_integration_distances",
+                "obsp_neighbor_connectivities": "scanorama_integration_connectivities",
+                "obs_cluster": "scanorama_integration_leiden",
+                "obsm_umap": "X_leiden_scanorama_umap",
+                "obs_batch": "batch_label"
+            ],
+            toState: ["input": "output"]
+            )
+        | view {"After integration: $it"}
+        // Split integrated dataset back into a separate reference and query dataset
+        | split_h5mu.run(
+            fromState: [
+                "input": "input",
+                "modality": "modality"
+            ],
+            args: [
+                "obs_feature": "dataset",
+                "output_files": "sample_files.csv",
+                "drop_obs_nan": "true",
+                "output": "ref_query"
+            ],
+            toState: [ 
+                "output": "output", 
+                "output_files": "output_files" 
+            ],
+            auto: [ publish: true ]
+            )
+        | view {"After sample splitting: $it"}
+        // map the integrated query and reference datasets back to the state
+        | map {id, state ->
+            def outputDir = state.output
+            def files = readCsv(state.output_files.toUriString())
+            def query_file = files.findAll{ dat -> dat.name == 'query' }
+            assert query_file.size() == 1, 'there should only be one query file'
+            def reference_file = files.findAll{ dat -> dat.name == 'reference' }
+            assert reference_file.size() == 1, 'there should only be one reference file'
+            def integrated_query = outputDir.resolve(query_file.filename)
+            def integrated_reference = outputDir.resolve(reference_file.filename)
+            def newKeys = ["integrated_query": integrated_query, "integrated_reference": integrated_reference]
+            [id, state + newKeys]
+            }
+        | view {"After splitting query: $it"}
+        // Perform KNN label transfer from integrated reference to integrated query
+        | pynndescent_knn.run(
+            fromState: [
+                "input": "integrated_query",
+                "modality": "modality",
+                "input_obsm_features": "output_obsm_integrated",
+                "reference": "integrated_reference",
+                "reference_obsm_features": "output_obsm_integrated",
+                "reference_obs_targets": "reference_obs_targets",
+                "output_obs_predictions": "output_obs_predictions",
+                "output_obs_probability": "output_obs_probability",
+                "output_compression": "output_compression",
+                "weights": "weights",
+                "n_neighbors": "n_neighbors",
+                "output": "workflow_output"
+            ],
+            toState: {id, output, state -> ["output": output.output]},
+            auto: [ publish: true ]
+            )
+    
+  emit:
+    output_ch
+}
diff --git a/src/workflows/annotation/scanorama_knn/nextflow.config b/src/workflows/annotation/scanorama_knn/nextflow.config
new file mode 100644
index 00000000000..059100c489c
--- /dev/null
+++ b/src/workflows/annotation/scanorama_knn/nextflow.config
@@ -0,0 +1,10 @@
+manifest {
+  nextflowVersion = '!>=20.12.1-edge'
+}
+
+params {
+  rootDir = java.nio.file.Paths.get("$projectDir/../../../../").toAbsolutePath().normalize().toString()
+}
+
+// include common settings
+includeConfig("${params.rootDir}/src/workflows/utils/labels.config")
diff --git a/src/workflows/annotation/scanorama_knn/test.nf b/src/workflows/annotation/scanorama_knn/test.nf
new file mode 100644
index 00000000000..c814ff51fc6
--- /dev/null
+++ b/src/workflows/annotation/scanorama_knn/test.nf
@@ -0,0 +1,59 @@
+nextflow.enable.dsl=2
+
+include {scanorama_knn } from params.rootDir + "/target/nextflow/workflows/annotation/scanorama_knn/main.nf"
+include { scanorama_knn_test } from params.rootDir + "/target/nextflow/test_workflows/annotation/scanorama_knn_test/main.nf"
+
+workflow test_wf {
+  // allow changing the resources_test dir
+  resources_test = file("${params.rootDir}/resources_test")
+
+  output_ch = Channel.fromList(
+    [
+      [
+        id: "simple_execution_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        input_obs_batch_label: "sample_id",
+        reference_obs_batch_label: "donor_assay",
+        reference_obs_targets: "cell_type",
+        leiden_resolution: [1.0, 0.25]
+      ],
+      [
+        id: "no_leiden_resolutions_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        input_obs_batch_label: "sample_id",
+        reference_obs_batch_label: "donor_assay",
+        reference_obs_targets: "cell_type",
+        leiden_resolution: []
+      ]
+    ])
+    | map{ state -> [state.id, state] }
+    | scanorama_knn 
+    | view { output ->
+      assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
+
+      // check id
+      def id = output[0]
+      assert id.endsWith("_test") : "Output ID should be same as input ID"
+
+      // check output
+      def state = output[1]
+      assert state instanceof Map : "State should be a map. Found: ${state}"
+      assert state.containsKey("output") : "Output should contain key 'output'."
+      assert state.output.isFile() : "'output' should be a file."
+      assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}"
+    
+    "Output: $output"
+    }
+    | scanorama_knn_test.run(
+        fromState: [
+          "input": "output"
+        ]
+    )
+    | toSortedList({a, b -> a[0] <=> b[0]})
+    | map { output_list ->
+      assert output_list.size() == 2 : "output channel should contain 2 events"
+      assert output_list.collect{it[0]} == ["no_leiden_resolutions_test", "simple_execution_test"]
+    }
+    }
diff --git a/src/workflows/test_workflows/annotation/scanorama_knn/config.vsh.yaml b/src/workflows/test_workflows/annotation/scanorama_knn/config.vsh.yaml
new file mode 100644
index 00000000000..c7a2072d673
--- /dev/null
+++ b/src/workflows/test_workflows/annotation/scanorama_knn/config.vsh.yaml
@@ -0,0 +1,35 @@
+name: "scanorama_knn_test"
+namespace: "test_workflows/annotation"
+description: "This component tests the output of the annotation of the scanorama_knn of workflow."
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        type: file
+        required: true
+        description: Path to h5mu output.
+        example: foo.final.h5mu
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/setup_logger.py
+  - path: /src/base/openpipelinetestutils
+    dest: openpipelinetestutils
+engines:
+  - type: docker
+    image: python:3.12-slim
+    setup:
+      - type: docker
+        copy: ["openpipelinetestutils /opt/openpipelinetestutils"]
+      - type: apt
+        packages: 
+          - procps
+      - type: python
+        packages: /opt/openpipelinetestutils
+      - type: python
+        __merge__: [/src/base/requirements/anndata_mudata.yaml, /src/base/requirements/viashpy.yaml, .]
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/src/workflows/test_workflows/annotation/scanorama_knn/script.py b/src/workflows/test_workflows/annotation/scanorama_knn/script.py
new file mode 100644
index 00000000000..8e5e77edfdb
--- /dev/null
+++ b/src/workflows/test_workflows/annotation/scanorama_knn/script.py
@@ -0,0 +1,35 @@
+from mudata import read_h5mu
+import numpy as np
+import shutil
+import os
+import sys
+from pathlib import Path
+import pytest
+
+##VIASH START
+par = {
+    "input": "scanorama_knn/output.h5mu"
+}
+
+meta = {
+    "resources_dir": "resources_test"
+}
+##VIASH END
+
+
+def test_run():
+    input_mudata = read_h5mu(par["input"])
+    expected_obsm = ["X_integrated_scanorama", "X_leiden_scanorama_umap"]
+    expected_obs = ["cell_type_pred", "cell_type_probability"]
+    expected_obsp = ["scanorama_integration_connectivities", "scanorama_integration_distances"]
+
+    assert "rna" in list(input_mudata.mod.keys()), "Input should contain rna modality."
+    assert all(key in list(input_mudata.mod["rna"].obsm) for key in expected_obsm), f"Input mod['rna'] obsm columns should be: {expected_obsm}, found: {input_mudata.mod['rna'].obsm.keys()}."
+    assert all(key in list(input_mudata.mod["rna"].obs) for key in expected_obs), f"Input mod['rna'] obs columns should be: {expected_obs}, found: {input_mudata.mod['rna'].obs.keys()}."
+    assert all(key in list(input_mudata.mod["rna"].obsp) for key in expected_obsp), f"Input mod['rna'] obsp columns should be: {expected_obsp}, found: {input_mudata.mod['rna'].obsp.keys()}."
+
+if __name__ == "__main__":
+    HERE_DIR = Path(__file__).resolve().parent
+    shutil.copyfile(os.path.join(meta['resources_dir'], "openpipelinetestutils", "conftest.py"),
+                    os.path.join(HERE_DIR, "conftest.py"))
+    sys.exit(pytest.main(["--import-mode=importlib"]))
\ No newline at end of file