diff --git a/CHANGELOG.md b/CHANGELOG.md
index 156515c5d41..573303e942a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,10 @@
 
 * The `transfer/publish` component is deprecated and will be removed in a future major release (PR #941).
 
+# NEW FUNCTIONALITY 
+
+* `workflows/annotation/harmony_knn` workflow: Cell-type annotation based on harmony integration with KNN label transfer (PR #836).
+
 # MINOR CHANGES
 
 * Several workflows: refactor neighbors, leiden and UMAP in a separate subworkflow (PR #942 and PR #949). 
diff --git a/resources_test_scripts/annotation_test_data.sh b/resources_test_scripts/annotation_test_data.sh
index 083058ba0d5..1d163270e58 100755
--- a/resources_test_scripts/annotation_test_data.sh
+++ b/resources_test_scripts/annotation_test_data.sh
@@ -33,15 +33,28 @@ wget "https://zenodo.org/record/7580707/files/pretrained_models_Blood_ts.tar.gz?
 
 # Process Tabula Sapiens Blood reference h5ad
 # (Select one individual and 100 cells per cell type)
+# normalize and log1p transform data
 python <<HEREDOC
 import anndata as ad
+import scanpy as sc
 ref_adata = ad.read_h5ad("${OUT}/tmp_TS_Blood_filtered.h5ad")
 sub_ref_adata = ref_adata[ref_adata.obs["donor_assay"] == "TSP14_10x 3' v3"] 
 n=100
 s=sub_ref_adata.obs.groupby('cell_ontology_class').cell_ontology_class.transform('count')
 sub_ref_adata_final = sub_ref_adata[sub_ref_adata.obs[s>=n].groupby('cell_ontology_class').head(n).index]
 # assert sub_ref_adata_final.shape == (500, 58870)
+data_for_scanpy = ad.AnnData(X=sub_ref_adata_final.X)
+sc.pp.normalize_total(data_for_scanpy, target_sum=10000)
+sc.pp.log1p(
+    data_for_scanpy,
+    base=None,
+    layer=None,
+    copy=False,
+)  
+sub_ref_adata_final.layers["log_normalized"] = data_for_scanpy.X
+
 sub_ref_adata_final.write("${OUT}/TS_Blood_filtered.h5ad", compression='gzip')
+
 HEREDOC
 
 
diff --git a/src/feature_annotation/highly_variable_features_scanpy/script.py b/src/feature_annotation/highly_variable_features_scanpy/script.py
index 089f9c1f089..19aab20c27d 100644
--- a/src/feature_annotation/highly_variable_features_scanpy/script.py
+++ b/src/feature_annotation/highly_variable_features_scanpy/script.py
@@ -140,7 +140,7 @@
 try:
     out = sc.pp.highly_variable_genes(**hvg_args)
     if par["var_input"] is not None:
-        out.index = data[:, data.var[par["var_input"]]].var.index
+        out.index = input_anndata.var.index
         out = out.reindex(index=data.var.index, method=None)
         out.highly_variable = out.highly_variable.fillna(False)
         assert (
diff --git a/src/integrate/harmony/config.vsh.yaml b/src/integrate/harmony/config.vsh.yaml
index fb835ddcc51..85972f7ca78 100644
--- a/src/integrate/harmony/config.vsh.yaml
+++ b/src/integrate/harmony/config.vsh.yaml
@@ -41,7 +41,7 @@ arguments:
     required: false
     description: "In which .obsm slot to store the resulting integrated embedding."
   - name: "--theta"
-    description: "Diversity clustering penalty parameter. Specify for each variable in group.by.vars. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters."
+    description: "Diversity clustering penalty parameter. Can be set as a single value for all batch observations or as multiple values, one for each observation in the batches defined by --obs_covariates. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters."
     type: double
     default: 2
     multiple: true
diff --git a/src/integrate/harmonypy/config.vsh.yaml b/src/integrate/harmonypy/config.vsh.yaml
index 7f2f4d06af7..2dbd7479f56 100644
--- a/src/integrate/harmonypy/config.vsh.yaml
+++ b/src/integrate/harmonypy/config.vsh.yaml
@@ -41,7 +41,7 @@ arguments:
     required: false
     description: "In which .obsm slot to store the resulting integrated embedding."
   - name: "--theta"
-    description: "Diversity clustering penalty parameter. Specify for each variable in group.by.vars. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters."
+    description: "Diversity clustering penalty parameter. Can be set as a single value for all batch observations or as multiple values, one for each observation in the batches defined by --obs_covariates. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters."
     type: double
     default: 2
     multiple: true
diff --git a/src/utils/subset_vars.py b/src/utils/subset_vars.py
index d8cb05b2d3d..6a68435fc0c 100644
--- a/src/utils/subset_vars.py
+++ b/src/utils/subset_vars.py
@@ -18,4 +18,14 @@ def subset_vars(adata, subset_col):
             f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
         )
 
+    if adata.var[subset_col].dtype == "boolean":
+        assert (
+            adata.var[subset_col].isna().sum() == 0
+        ), f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert (
+        adata.var[subset_col].dtype == "bool"
+    ), f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+
     return adata[:, adata.var[subset_col]].copy()
diff --git a/src/workflows/annotation/harmony_knn/config.vsh.yaml b/src/workflows/annotation/harmony_knn/config.vsh.yaml
new file mode 100644
index 00000000000..427d26b4319
--- /dev/null
+++ b/src/workflows/annotation/harmony_knn/config.vsh.yaml
@@ -0,0 +1,205 @@
+name: "harmony_knn"
+namespace: "workflows/annotation"
+description: "Cell type annotation workflow by performing harmony integration of reference and query dataset followed by KNN label transfer."
+info:
+  name: "Harmony integration followed by KNN label transfer"
+  test_dependencies:
+    - name: harmony_knn_test
+      namespace: test_workflows/annotation
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
+  - __merge__: /src/authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Query Input
+    arguments:
+      - name: "--id"
+        required: true
+        type: string
+        description: ID of the sample.
+        example: foo
+      - name: "--input"
+        required: true
+        type: file
+        description: Input dataset consisting of the (unlabeled) query observations. The dataset is expected to be pre-processed in the same way as --reference.
+        example: input.h5mu
+      - name: "--modality"
+        description: Which modality to process. Should match the modality of the --reference dataset.
+        type: string
+        default: "rna"
+        required: false
+      - name: "--input_layer"
+        description: The layer of the input dataset to process if .X is not to be used. Should contain log normalized counts.
+        required: false
+        type: string
+      - name: "--input_obs_batch_label"
+        type: string
+        description: "The .obs field in the input (query) dataset containing the batch labels."
+        example: "sample"
+        required: true
+      - name: "--input_var_gene_names"
+        type: string
+        required: false
+        description: |
+          The .var field in the input (query) dataset containing gene names; if not provided, the .var index will be used.
+      - name: "--input_reference_gene_overlap"
+        type: integer
+        default: 100
+        min: 1
+        description: | 
+          The minimum number of genes present in both the reference and query datasets.
+      - name: "--overwrite_existing_key"
+        type: boolean_true
+        description: If provided, will overwrite existing fields in the input dataset when data are copied during the reference alignment process.
+
+  - name: Reference input
+    arguments:
+      - name: "--reference"
+        required: true
+        type: file
+        description: Reference dataset consisting of the labeled observations to train the KNN classifier on. The dataset is expected to be pre-processed in the same way as the --input query dataset.
+        example: reference.h5mu
+      - name: "--reference_layer"
+        description: The layer of the reference dataset to process if .X is not to be used. Should contain log normalized counts.
+        required: false
+        type: string
+      - name: "--reference_obs_target"
+        type: string
+        example: cell_type
+        required: true
+        description: The `.obs` key of the target cell type labels to transfer.
+      - name: "--reference_var_gene_names"
+        type: string
+        required: false
+        description: |
+          The .var field in the reference dataset containing gene names; if not provided, the .var index will be used.
+      - name: "--reference_obs_batch_label"
+        type: string
+        description:  "The .obs field in the reference dataset containing the batch labels."
+        example: "sample"
+        required: true
+
+  - name: "Highly Variable Genes calculation options"
+    arguments:
+      - name: "--hvg_flavor"
+        alternatives: ["--filter_with_hvg_flavor"]
+        type: string
+        default: "seurat"
+        choices: ["seurat", "cell_ranger", "seurat_v3"]
+        description: |
+          Choose the flavor for identifying highly variable features. For the dispersion based methods
+          in their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes n_top_features.
+      - name: "--hvg_n_top_features"
+        alternatives: ["--filter_with_hvg_n_top_genes"]
+        required: false
+        type: integer
+        description: Number of highly-variable features to keep. Mandatory if filter_with_hvg_flavor is set to 'seurat_v3'.
+
+  - name: PCA options
+    arguments:
+      - name: "--pca_num_components"
+        type: integer
+        example: 25
+        description: Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.
+
+  - name: Harmony integration options
+    arguments:
+      - name: "--harmony_theta"
+        type: double
+        description: |
+          Diversity clustering penalty parameter. Can be set as a single value for all batch observations or as multiple values, one for each observation in the batches defined by --input_obs_batch_label. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters."
+        min: 0
+        default: [2]
+        multiple: true
+
+  - name: Leiden clustering options
+    arguments:
+      - name: "--leiden_resolution"
+        type: double
+        description: Control the coarseness of the clustering. Higher values lead to more clusters.
+        min: 0
+        default: [1]
+        multiple: true
+
+  - name: Neighbor classifier arguments
+    arguments:
+      - name: "--knn_weights"
+        type: string
+        default: "uniform"
+        choices: ["uniform", "distance"]
+        description: |
+          Weight function used in prediction. Possible values are:
+          `uniform` (all points in each neighborhood are weighted equally) or 
+          `distance` (weight points by the inverse of their distance)
+      - name: "--knn_n_neighbors"
+        type: integer
+        default: 15
+        min: 5
+        required: false
+        description: |
+          The number of neighbors to use in k-neighbor graph structure used for fast approximate nearest neighbor search with PyNNDescent. 
+          Larger values will result in more accurate search results at the cost of computation time.
+
+  - name: "Outputs"
+    arguments:
+      - name: "--output"
+        type: file
+        required: true
+        direction: output
+        description: The query data in .h5mu format with predicted labels predicted from the classifier trained on the reference.
+        example: output.h5mu
+      - name: "--output_obs_predictions"
+        type: string
+        required: false
+        multiple: true
+        description: |
+          In which `.obs` slots to store the predicted cell labels.
+          If provided, must have the same length as `--reference_obs_targets`.
+          If empty, will default to the `reference_obs_targets` combined with the `"_pred"` suffix.
+      - name: "--output_obs_probability"
+        type: string
+        required: false
+        multiple: true
+        description: |
+          In which `.obs` slots to store the probability of the predictions.
+          If provided, must have the same length as `--reference_obs_targets`.
+          If empty, will default to the `reference_obs_targets` combined with the `"_probability"` suffix.
+      - name: "--output_obsm_integrated"
+        type: string
+        default: "X_integrated_harmony"
+        required: false
+        description: "In which .obsm slot to store the integrated embedding."
+      - name: "--output_compression"
+        type: string
+        description: |
+          The compression format to be used on the output h5mu object.
+        choices: ["gzip", "lzf"]
+        required: false
+        example: "gzip"
+
+dependencies:
+  - name: workflows/integration/harmony_leiden
+    alias: harmony_leiden_workflow
+  - name: labels_transfer/knn
+  - name: dataflow/split_h5mu
+  - name: dataflow/concatenate_h5mu
+  - name: dimred/pca
+  - name: feature_annotation/align_query_reference
+  - name: feature_annotation/highly_variable_features_scanpy
+
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+
+test_resources:
+  - type: nextflow_script
+    path: test.nf
+    entrypoint: test_wf
+  - path: /resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu
+  - path: /resources_test/annotation_test_data/TS_Blood_filtered.h5mu
+
+runners:
+  - type: nextflow
diff --git a/src/workflows/annotation/harmony_knn/integration_test.sh b/src/workflows/annotation/harmony_knn/integration_test.sh
new file mode 100755
index 00000000000..829d005b158
--- /dev/null
+++ b/src/workflows/annotation/harmony_knn/integration_test.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+nextflow \
+  run . \
+  -main-script src/workflows/annotation/harmony_knn/test.nf \
+  -entry test_wf \
+  -resume \
+  -profile docker,no_publish \
+  -c src/workflows/utils/labels_ci.config \
+  -c src/workflows/utils/integration_tests.config \
diff --git a/src/workflows/annotation/harmony_knn/main.nf b/src/workflows/annotation/harmony_knn/main.nf
new file mode 100644
index 00000000000..8c10adffb12
--- /dev/null
+++ b/src/workflows/annotation/harmony_knn/main.nf
@@ -0,0 +1,168 @@
+workflow run_wf {
+  take:
+    input_ch
+
+  main:
+    
+    output_ch = input_ch
+        // Set aside the output for this workflow to avoid conflicts
+        | map {id, state -> 
+            def new_state = state + ["workflow_output": state.output]
+            [id, new_state]
+        }
+        // Align query and reference datasets
+        | align_query_reference.run(
+            fromState: [
+                "input": "input",
+                "modality": "modality",
+                "input_layer": "input_layer",
+                "input_obs_batch": "input_obs_batch_label",
+                "input_var_gene_names": "input_var_gene_names",
+                "reference": "reference",
+                "reference_layer": "reference_layer",
+                "reference_obs_batch": "reference_obs_batch_label",
+                "reference_obs_label": "reference_obs_target",
+                "reference_var_gene_names": "reference_var_gene_names",
+                "input_reference_gene_overlap": "input_reference_gene_overlap",
+                "overwrite_existing_key": "overwrite_existing_key"
+            ],
+            args: [
+                "input_id": "query",
+                "reference_id": "reference",
+                "output_layer": "_counts",
+                "output_var_gene_names": "_gene_names",
+                "output_obs_batch": "_sample_id",
+                "output_obs_label": "_cell_type",
+                "output_obs_id": "_dataset",
+                "output_var_common_genes": "_common_vars"
+            ],
+            toState: [
+                "input": "output_query",
+                "reference": "output_reference"
+            ]
+        )
+        // Concatenate query and reference datasets prior to integration
+        | concatenate_h5mu.run(
+            fromState: { id, state -> [
+                "input": [state.input, state.reference]
+                ]
+            },
+            args: [
+                "input_id": ["query", "reference"],
+                "other_axis_mode": "move"
+            ],
+            toState: ["input": "output"]
+        )
+        | view {"After concatenation: $it"}
+        | highly_variable_features_scanpy.run(
+            fromState: [
+                "input": "input",
+                "modality": "modality",
+                "flavor": "hvg_flavor",
+                "n_top_features": "hvg_n_top_features",
+            ],
+            args: [
+                "layer": "_counts",
+                "var_input": "_common_vars",
+                "var_name_filter": "_common_hvg",
+                "obs_batch_key": "_sample_id"
+            ],
+            toState: [
+                "input": "output"
+            ]
+        )
+        | pca.run(
+            fromState: [
+                "input": "input",
+                "modality": "modality",
+                "overwrite": "overwrite_existing_key",
+                "num_compontents": "pca_num_components"
+            ],
+            args: [
+                "layer": "_counts",
+                "var_input": "_common_hvg",
+                "obsm_output": "X_pca_harmony",
+                "varm_output": "pca_loadings_harmony",
+                "uns_output": "pca_variance_harmony",
+            ],
+            toState: [
+                "input": "output"
+            ]
+        )
+        // Run harmony integration with leiden clustering
+        | harmony_leiden_workflow.run(
+            fromState: { id, state -> [
+                "id": id,
+                "input": state.input,
+                "modality": state.modality,
+                "obsm_integrated": state.output_obsm_integrated,
+                "theta": state.harmony_theta,
+                "leiden_resolution": state.leiden_resolution,
+            ]},
+            args: [
+                "embedding": "X_pca_harmony",
+                "uns_neighbors": "harmonypy_integration_neighbors",
+                "obsp_neighbor_distances": "harmonypy_integration_distances",
+                "obsp_neighbor_connectivities": "harmonypy_integration_connectivities",
+                "obs_cluster": "harmony_integration_leiden",
+                "obsm_umap": "X_leiden_harmony_umap",
+                "obs_covariates": "_sample_id"
+            ],
+            toState: ["input": "output"]
+        )
+        | view {"After integration: $it"}
+        // Split integrated dataset back into a separate reference and query dataset
+        | split_h5mu.run(
+            fromState: [
+                "input": "input",
+                "modality": "modality"
+            ],
+            args: [
+                "obs_feature": "_dataset",
+                "output_files": "sample_files.csv",
+                "drop_obs_nan": "true",
+                "output": "ref_query"
+            ],
+            toState: [ 
+                "output": "output", 
+                "output_files": "output_files" 
+            ],
+            auto: [ publish: true ]
+        )
+        | view {"After sample splitting: $it"}
+        // map the integrated query and reference datasets back to the state
+        | map {id, state ->
+            def outputDir = state.output
+            def files = readCsv(state.output_files.toUriString())
+            def query_file = files.findAll{ dat -> dat.name == 'query' }
+            assert query_file.size() == 1, 'there should only be one query file'
+            def reference_file = files.findAll{ dat -> dat.name == 'reference' }
+            assert reference_file.size() == 1, 'there should only be one reference file'
+            def integrated_query = outputDir.resolve(query_file.filename)
+            def integrated_reference = outputDir.resolve(reference_file.filename)
+            def newKeys = ["integrated_query": integrated_query, "integrated_reference": integrated_reference]
+            [id, state + newKeys]
+        }
+        | view {"After splitting query: $it"}
+        // Perform KNN label transfer from integrated reference to integrated query
+        | knn.run(
+            fromState: [
+                "input": "integrated_query",
+                "modality": "modality",
+                "input_obsm_features": "output_obsm_integrated",
+                "reference": "integrated_reference",
+                "reference_obsm_features": "output_obsm_integrated",
+                "reference_obs_targets": "reference_obs_target",
+                "output_obs_predictions": "output_obs_predictions",
+                "output_obs_probability": "output_obs_probability",
+                "output_compression": "output_compression",
+                "weights": "knn_weights",
+                "n_neighbors": "knn_n_neighbors",
+                "output": "workflow_output"
+            ],
+            toState: {id, output, state -> ["output": output.output]},
+        )
+    
+  emit:
+    output_ch
+}
diff --git a/src/workflows/annotation/harmony_knn/nextflow.config b/src/workflows/annotation/harmony_knn/nextflow.config
new file mode 100644
index 00000000000..059100c489c
--- /dev/null
+++ b/src/workflows/annotation/harmony_knn/nextflow.config
@@ -0,0 +1,10 @@
+manifest {
+  nextflowVersion = '!>=20.12.1-edge'
+}
+
+params {
+  rootDir = java.nio.file.Paths.get("$projectDir/../../../../").toAbsolutePath().normalize().toString()
+}
+
+// include common settings
+includeConfig("${params.rootDir}/src/workflows/utils/labels.config")
diff --git a/src/workflows/annotation/harmony_knn/test.nf b/src/workflows/annotation/harmony_knn/test.nf
new file mode 100644
index 00000000000..cdb62f052a0
--- /dev/null
+++ b/src/workflows/annotation/harmony_knn/test.nf
@@ -0,0 +1,66 @@
+nextflow.enable.dsl=2
+
+include { harmony_knn } from params.rootDir + "/target/nextflow/workflows/annotation/harmony_knn/main.nf"
+include { harmony_knn_test } from params.rootDir + "/target/nextflow/test_workflows/annotation/harmony_knn_test/main.nf"
+params.resources_test = params.rootDir + "/resources_test"
+
+workflow test_wf {
+  // allow changing the resources_test dir
+  resources_test = file(params.resources_test)
+
+  output_ch = Channel.fromList(
+    [
+      [
+        id: "simple_execution_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        input_layer: "log_normalized",
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        reference_var_gene_names: "ensemblid",
+        input_obs_batch_label: "sample_id",
+        reference_layer: "log_normalized",
+        reference_obs_batch_label: "donor_assay",
+        reference_obs_target: "cell_type",
+        leiden_resolution: [1.0, 0.25]
+      ],
+      [
+        id: "no_leiden_resolutions_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        input_layer: "log_normalized",
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        reference_var_gene_names: "ensemblid",
+        input_obs_batch_label: "sample_id",
+        reference_layer: "log_normalized",
+        reference_obs_batch_label: "donor_assay",
+        reference_obs_target: "cell_type",
+        leiden_resolution: []
+      ]
+    ])
+    | map{ state -> [state.id, state] }
+    | harmony_knn 
+    | view { output ->
+      assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
+
+      // check id
+      def id = output[0]
+      assert id.endsWith("_test") : "Output ID should be same as input ID"
+
+      // check output
+      def state = output[1]
+      assert state instanceof Map : "State should be a map. Found: ${state}"
+      assert state.containsKey("output") : "Output should contain key 'output'."
+      assert state.output.isFile() : "'output' should be a file."
+      assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}"
+    
+    "Output: $output"
+    }
+    | harmony_knn_test.run(
+        fromState: [
+          "input": "output"
+        ]
+    )
+    | toSortedList({a, b -> a[0] <=> b[0]})
+    | map { output_list ->
+      assert output_list.size() == 2 : "output channel should contain 2 events"
+      assert output_list.collect{it[0]} == ["no_leiden_resolutions_test", "simple_execution_test"]
+    }
+    }
\ No newline at end of file
diff --git a/src/workflows/integration/harmony_leiden/config.vsh.yaml b/src/workflows/integration/harmony_leiden/config.vsh.yaml
index 058375735c3..11e9d3d0d1d 100644
--- a/src/workflows/integration/harmony_leiden/config.vsh.yaml
+++ b/src/workflows/integration/harmony_leiden/config.vsh.yaml
@@ -71,8 +71,7 @@ argument_groups:
       - name: "--theta"
         type: double
         description: |
-          Diversity clustering penalty parameter. Specify for each variable in group.by.vars. 
-          theta=0 does not encourage any diversity. Larger values of theta
+          Diversity clustering penalty parameter. Can be set as a single value for all batch observations or as multiple values, one for each observation in the batches defined by --obs_covariates. theta=0 does not encourage any diversity. Larger values of theta
           result in more diverse clusters."
         default: 2
         multiple: true
diff --git a/src/workflows/test_workflows/annotation/harmony_knn/config.vsh.yaml b/src/workflows/test_workflows/annotation/harmony_knn/config.vsh.yaml
new file mode 100644
index 00000000000..f5fab1c5efc
--- /dev/null
+++ b/src/workflows/test_workflows/annotation/harmony_knn/config.vsh.yaml
@@ -0,0 +1,35 @@
+name: "harmony_knn_test"
+namespace: "test_workflows/annotation"
+description: "This component tests the output of the annotation of the harmony_knn of workflow."
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        type: file
+        required: true
+        description: Path to h5mu output.
+        example: foo.final.h5mu
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/setup_logger.py
+  - path: /src/base/openpipelinetestutils
+    dest: openpipelinetestutils
+engines:
+  - type: docker
+    image: python:3.12-slim
+    setup:
+      - type: docker
+        copy: ["openpipelinetestutils /opt/openpipelinetestutils"]
+      - type: apt
+        packages: 
+          - procps
+      - type: python
+        packages: /opt/openpipelinetestutils
+      - type: python
+        __merge__: [/src/base/requirements/anndata_mudata.yaml, /src/base/requirements/viashpy.yaml, .]
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/workflows/test_workflows/annotation/harmony_knn/script.py b/src/workflows/test_workflows/annotation/harmony_knn/script.py
new file mode 100644
index 00000000000..cf20808a735
--- /dev/null
+++ b/src/workflows/test_workflows/annotation/harmony_knn/script.py
@@ -0,0 +1,38 @@
+from mudata import read_h5mu
+import shutil
+import os
+import sys
+from pathlib import Path
+import pytest
+
+##VIASH START
+par = {"input": "harmony_knn/output.h5mu"}
+
+meta = {"resources_dir": "resources_test"}
+##VIASH END
+
+
+def test_run():
+    input_mudata = read_h5mu(par["input"])
+    expected_obsm = ["X_integrated_harmony", "X_leiden_harmony_umap"]
+    expected_obs = ["cell_type_pred", "cell_type_probability"]
+
+    assert "rna" in list(input_mudata.mod.keys()), "Input should contain rna modality."
+    assert all(
+        key in list(input_mudata.mod["rna"].obsm) for key in expected_obsm
+    ), f"Input mod['rna'] obs columns should be: {expected_obsm}, found: {input_mudata.mod['rna'].obsm.keys()}."
+    assert all(
+        key in list(input_mudata.mod["rna"].obs) for key in expected_obs
+    ), f"Input mod['rna'] obs columns should be: {expected_obs}, found: {input_mudata.mod['rna'].obs.keys()}."
+
+    assert input_mudata.mod["rna"].obs["cell_type_pred"].dtype == "category"
+    assert input_mudata.mod["rna"].obs["cell_type_probability"].dtype == "float64"
+
+
+if __name__ == "__main__":
+    HERE_DIR = Path(__file__).resolve().parent
+    shutil.copyfile(
+        os.path.join(meta["resources_dir"], "openpipelinetestutils", "conftest.py"),
+        os.path.join(HERE_DIR, "conftest.py"),
+    )
+    sys.exit(pytest.main(["--import-mode=importlib"]))