From 95d52dbacae64b5a1fc5a22f51bdd5b1ccc48fc3 Mon Sep 17 00:00:00 2001
From: antonylebechec <antony.lebechec@gmail.com>
Date: Fri, 8 Mar 2024 00:48:53 +0100
Subject: [PATCH] Add annotation strategy (update, append) #165, add docs #4

---
 docs/help.html                                |  10 ++
 docs/help.md                                  |  16 +++
 docs/help.param.html                          |  16 +--
 docs/help.param.json                          |  42 +++++--
 docs/help.param.md                            |  50 +++++---
 howard/objects/variants.py                    |  25 ++--
 howard/tools/annotation.py                    |  12 ++
 howard/tools/tools.py                         |  24 +++-
 tests/data/example.nci60_1.vcf                |  63 ++++++++++
 ...{example.nci60.vcf => example.nci60_2.vcf} |   2 +-
 tests/test_variants_annotations.py            | 112 ++++++++++++++++--
 11 files changed, 320 insertions(+), 52 deletions(-)
 create mode 100644 tests/data/example.nci60_1.vcf
 rename tests/data/{example.nci60.vcf => example.nci60_2.vcf} (96%)
diff --git a/docs/help.html b/docs/help.html
index d3dac82..3b51717 100644
--- a/docs/help.html
+++ b/docs/help.html
@@ -150,6 +150,16 @@ <H1>HOWARD Help</h1>
 Default assembly
 Default: &#x27;hg19&#x27;
 
+</pre><pre>--annotations_update
+Update option for annotation (Only for Parquet annotation).
+If True, annotation fields will be removed and re-annotated.
+These options will be applied to all annotation databases.default: False
+
+</pre><pre>--annotations_append
+Append option for annotation (Only for Parquet annotation).
+If True, annotation fields will be annotated only if not annotation exists for the variant.
+These options will be applied to all annotation databases.default: False
+
 </pre><H2>CALCULATION</H2>
 <p>Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.</p>Usage examples:<br>&nbsp;&nbsp;&nbsp;howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' <br>&nbsp;&nbsp;&nbsp;howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv <br>&nbsp;&nbsp;&nbsp;howard calculation --show_calculations <br><H3>Main options</H3>
 <pre>--input=&lt;input&gt;
diff --git a/docs/help.md b/docs/help.md
index 178d01f..660dde4 100644
--- a/docs/help.md
+++ b/docs/help.md
@@ -347,6 +347,22 @@ Default assembly
 Default: 'hg19'
 ```
 
+```
+--annotations_update
+
+Update option for annotation (Only for Parquet annotation).
+If True, annotation fields will be removed and re-annotated.
+These options will be applied to all annotation databases.default: False
+```
+
+```
+--annotations_append
+
+Append option for annotation (Only for Parquet annotation).
+If True, annotation fields will be annotated only if not annotation exists for the variant.
+These options will be applied to all annotation databases.default: False
+```
+
 
 
 ## CALCULATION tool
diff --git a/docs/help.param.html b/docs/help.param.html
index 02e538e..de4b36b 100644
--- a/docs/help.param.html
+++ b/docs/help.param.html
@@ -1,6 +1,6 @@
 <H1 id='HOWARD Parameters'>HOWARD Parameters</H1>HOWARD Parameters JSON file defined parameters to process annotations, prioritization, calculations, convertions and queries.<br>
                     <H2>Table of contents</H1>
-                    - <a href='#HOWARD Parameters'>HOWARD Parameters</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::parquet'>parquet</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::parquet::annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::parquet::update'>update</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::bcftools'>bcftools</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::bcftools::annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::annovar'>annovar</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::annovar::annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::annovar::options'>options</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::snpeff'>snpeff</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::snpeff::options'>options</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::exomiser'>exomiser</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::exomiser::release'>release</a><BR>
+                    - <a href='#HOWARD Parameters'>HOWARD Parameters</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::parquet'>parquet</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::parquet::annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::bcftools'>bcftools</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::bcftools::annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::annovar'>annovar</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::annovar::annotations'>annotations</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::annovar::options'>options</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::snpeff'>snpeff</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::snpeff::options'>options</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::exomiser'>exomiser</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::exomiser::release'>release</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::options'>options</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::options::update'>update</a><BR>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;- <a href='#annotations::options::append'>append</a><BR>
                     <br>
                     <H2 id='annotations'>annotations</H2>Annotation process using HOWARD algorithms or external tools.<br>For HOWARD Parquet algorithm, specify the list of database files available (formats such as Parquet, VCF, TSV, duckDB, JSON). This parameter enables users to select specific database fields and optionally rename them. Use 'INFO' keyword to select all fields within the database. If a full path is not provided, the system will automatically detect files within database folders (see Configuration doc) and assembly (see Parameter option).<br>For external tools, such as Annovar, snpEff and Exomiser, specify parameters such as annotation keywords (Annovar) and options (depending on the tool).<br>Examples: <xmp># Annotation with multiple tools in multiple formats with multiple options
 "annotation": {
@@ -63,8 +63,7 @@ <H2 id='annotations'>annotations</H2>Annotation process using HOWARD algorithms
       "REVEL_rankscore": null
    }
 }
-</xmp><br><H4 id='annotations::parquet::update'>annotations::parquet::update</H4>Update option for Parquet annotation. If True, annotation fields will be updated if exists. If False, annotation fields will not change if it already exists. These options will be applied to all annotation databases.<br>Examples: <xmp># Apply update on all annotation fields for all databases.
-"update": True</xmp><br><H3 id='annotations::bcftools'>annotations::bcftools</H3>Annotation process using BCFTools. Provide a list of database files and annotation fields.<br>Examples: <xmp># Annotation with multiple databases in multiple formats
+</xmp><br><H3 id='annotations::bcftools'>annotations::bcftools</H3>Annotation process using BCFTools. Provide a list of database files and annotation fields.<br>Examples: <xmp># Annotation with multiple databases in multiple formats
 "parquet": {
    "bcftools": {
       "/path/to/database1.vcf.gz": {
@@ -146,12 +145,15 @@ <H2 id='annotations'>annotations</H2>Annotation process using HOWARD algorithms
       " -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr "}
    }
 }</xmp><br><H4 id='annotations::snpeff::options'>annotations::snpeff::options</H4>String (as command line) of options available such as:<br> - filters on variants (regions filter, specific changes as intronic or downstream)<br> - annotation (e.g. HGVS, loss of function) <br> - database (e.g. only protein coding transcripts, splice sites size)<br>Examples: <xmp># Annotation with snpEff databases, with options to generate HGVS annotation, specify to not shift variants according to HGVS notation, define splice sites size to 3, add loss of function (LOF), Nonsense mediated decay and OICR tags.
-"options": {
-   " -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr "}
-}</xmp><br><H3 id='annotations::exomiser'>annotations::exomiser</H3>Annotation process using Exomiser tool and options (see [Exomiser website documentation](https://www.sanger.ac.uk/tool/exomiser/)).<br>Examples: <xmp># Annotation with Exomiser, using database relse '2109', transcripts source as UCSC and a list of HPO terms.
+"options": " -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr "</xmp><br><H3 id='annotations::exomiser'>annotations::exomiser</H3>Annotation process using Exomiser tool and options (see [Exomiser website documentation](https://www.sanger.ac.uk/tool/exomiser/)).<br>Examples: <xmp># Annotation with Exomiser, using database relse '2109', transcripts source as UCSC and a list of HPO terms.
 "exomiser": {
    "release": "2109"
    "transcript_source": "refseq"
    "hpo": ['HP:0001156', 'HP:0001363', 'HP:0011304', 'HP:0010055']
 }</xmp><br><H4 id='annotations::exomiser::release'>annotations::exomiser::release</H4>Release of Exomiser database. This option replace the release variable in 'application.properties' file (see 'exomiser_application_properties' option). The release will be downloaded if it is not available locally. <br>Examples: <xmp># Annotation with release '2109' of Exomiser database.
-"release": "2109"</xmp><br>
\ No newline at end of file
+"release": "2109"</xmp><br><H3 id='annotations::options'>annotations::options</H3>Options for annotations, such as annotation strategy (skip if exists, update, append)<br>Examples: <xmp># Annotation with Parquet databases, with update annotation strategy.
+"options": {
+   "update": True
+}</xmp><br><H4 id='annotations::options::update'>annotations::options::update</H4>Update option for annotation (only for Parquet annotation). If True, annotation fields will be removed and re-annotated. These options will be applied to all annotation databases.<br>Examples: <xmp># Apply update on all annotation fields for all databases.
+"update": True</xmp><br><H4 id='annotations::options::append'>annotations::options::append</H4>Append option for annotation (only for Parquet annotation). If True, annotation fields will be annotated only if not annotation exists for the variant. These options will be applied to all annotation databases.<br>Examples: <xmp># Apply append on all annotation fields for all databases.
+"append": True</xmp><br>
\ No newline at end of file
diff --git a/docs/help.param.json b/docs/help.param.json
index 738777d..aeea936 100644
--- a/docs/help.param.json
+++ b/docs/help.param.json
@@ -86,15 +86,6 @@
           "}",
           ""
         ]
-      },
-      "update": {
-        "__help": [
-          "Update option for Parquet annotation. If True, annotation fields will be updated if exists. If False, annotation fields will not change if it already exists. These options will be applied to all annotation databases."
-        ],
-        "__examples_code": [
-          "# Apply update on all annotation fields for all databases.",
-          "\"update\": True"
-        ]
       }
     },
     "bcftools": {
@@ -234,9 +225,7 @@
         ],
         "__examples_code": [
           "# Annotation with snpEff databases, with options to generate HGVS annotation, specify to not shift variants according to HGVS notation, define splice sites size to 3, add loss of function (LOF), Nonsense mediated decay and OICR tags.",
-          "\"options\": {",
-          "   \" -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr \"}",
-          "}"
+          "\"options\": \" -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr \""
         ]
       }
     },
@@ -261,6 +250,35 @@
           "\"release\": \"2109\""
         ]
       }
+    },
+    "options": {
+      "__help": [
+        "Options for annotations, such as annotation strategy (skip if exists, update, append)"
+      ],
+      "__examples_code": [
+        "# Annotation with Parquet databases, with update annotation strategy.",
+        "\"options\": {",
+        "   \"update\": True",
+        "}"
+      ],
+      "update": {
+        "__help": [
+          "Update option for annotation (only for Parquet annotation). If True, annotation fields will be removed and re-annotated. These options will be applied to all annotation databases."
+        ],
+        "__examples_code": [
+          "# Apply update on all annotation fields for all databases.",
+          "\"update\": True"
+        ]
+      },
+      "append": {
+        "__help": [
+          "Append option for annotation (only for Parquet annotation). If True, annotation fields will be annotated only if not annotation exists for the variant. These options will be applied to all annotation databases."
+        ],
+        "__examples_code": [
+          "# Apply append on all annotation fields for all databases.",
+          "\"append\": True"
+        ]
+      }
     }
   }
 }
diff --git a/docs/help.param.md b/docs/help.param.md
index b2f529b..a73b239 100644
--- a/docs/help.param.md
+++ b/docs/help.param.md
@@ -8,7 +8,6 @@ HOWARD Parameters JSON file defined parameters to process annotations, prioritiz
    - [annotations](#annotations)
       - [parquet](#annotationsparquet)
          - [annotations](#annotationsparquetannotations)
-         - [update](#annotationsparquetupdate)
       - [bcftools](#annotationsbcftools)
          - [annotations](#annotationsbcftoolsannotations)
       - [annovar](#annotationsannovar)
@@ -18,6 +17,9 @@ HOWARD Parameters JSON file defined parameters to process annotations, prioritiz
          - [options](#annotationssnpeffoptions)
       - [exomiser](#annotationsexomiser)
          - [release](#annotationsexomiserrelease)
+      - [options](#annotationsoptions)
+         - [update](#annotationsoptionsupdate)
+         - [append](#annotationsoptionsappend)
 
 
 ## annotations
@@ -112,16 +114,6 @@ Examples:
 
 ```
 
-#### annotations::parquet::update
-
-Update option for Parquet annotation. If True, annotation fields will be updated if exists. If False, annotation fields will not change if it already exists. These options will be applied to all annotation databases.
-
-Examples: 
-```
-# Apply update on all annotation fields for all databases.
-"update": True
-```
-
 ### annotations::bcftools
 
 Annotation process using BCFTools. Provide a list of database files and annotation fields.
@@ -270,9 +262,7 @@ String (as command line) of options available such as:
 Examples: 
 ```
 # Annotation with snpEff databases, with options to generate HGVS annotation, specify to not shift variants according to HGVS notation, define splice sites size to 3, add loss of function (LOF), Nonsense mediated decay and OICR tags.
-"options": {
-   " -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr "}
-}
+"options": " -hgvs -noShiftHgvs -spliceSiteSize 3 -lof -oicr "
 ```
 
 ### annotations::exomiser
@@ -299,3 +289,35 @@ Examples:
 "release": "2109"
 ```
 
+### annotations::options
+
+Options for annotations, such as annotation strategy (skip if exists, update, append)
+
+Examples: 
+```
+# Annotation with Parquet databases, with update annotation strategy.
+"options": {
+   "update": True
+}
+```
+
+#### annotations::options::update
+
+Update option for annotation (only for Parquet annotation). If True, annotation fields will be removed and re-annotated. These options will be applied to all annotation databases.
+
+Examples: 
+```
+# Apply update on all annotation fields for all databases.
+"update": True
+```
+
+#### annotations::options::append
+
+Append option for annotation (only for Parquet annotation). If True, annotation fields will be annotated only if not annotation exists for the variant. These options will be applied to all annotation databases.
+
+Examples: 
+```
+# Apply append on all annotation fields for all databases.
+"append": True
+```
+
diff --git a/howard/objects/variants.py b/howard/objects/variants.py
index 6697336..00c7aea 100644
--- a/howard/objects/variants.py
+++ b/howard/objects/variants.py
@@ -4143,8 +4143,10 @@ def annotation_parquet(self, threads: int = None) -> None:
         assembly = self.get_param().get("assembly", self.get_config().get("assembly", DEFAULT_ASSEMBLY))
 
         # Force Update Annotation
-        force_update_annotation = self.get_param().get("annotation", {}).get("parquet", {}).get("update", False)
+        force_update_annotation = self.get_param().get("annotation", {}).get("options", {}).get("update", False)
         log.debug(f"force_update_annotation={force_update_annotation}")
+        force_append_annotation = self.get_param().get("annotation", {}).get("options", {}).get("append", False)
+        log.debug(f"force_append_annotation={force_append_annotation}")
 
         # Data
         table_variants = self.get_table_variants()
@@ -4301,7 +4303,9 @@ def annotation_parquet(self, threads: int = None) -> None:
 
                         # To annotate
                         #force_update_annotation = True
-                        if annotation_field in parquet_hdr_vcf_header_infos and (force_update_annotation or (annotation_fields_new_name not in self.get_header().infos)):
+                        #force_append_annotation = True
+                        #if annotation_field in parquet_hdr_vcf_header_infos and (force_update_annotation or (annotation_fields_new_name not in self.get_header().infos)):
+                        if annotation_field in parquet_hdr_vcf_header_infos and (force_update_annotation or force_append_annotation or (annotation_fields_new_name not in self.get_header().infos)):
                             
                             # Add field to annotation to process list
                             annotation_fields_processed.append(
@@ -4354,11 +4358,17 @@ def annotation_parquet(self, threads: int = None) -> None:
                                 self.code_type_map[parquet_hdr_vcf_header_infos_type]
                             )
 
+                            # Append
+                            if force_append_annotation:
+                                query_case_when_append = f""" AND REGEXP_EXTRACT(concat(';', table_variants.INFO), ';{annotation_fields_new_name}=([^;]*)',1) IN ('','.') """
+                            else:
+                                query_case_when_append = ""
+
                             # Annotation/Update query fields
                             # Found in INFO column
                             if annotation_field_column == "INFO" and "INFO" in parquet_hdr_vcf_header_columns:
                                 sql_query_annotation_update_info_sets.append(f"""
-                                CASE WHEN REGEXP_EXTRACT(concat(';', table_parquet.INFO), ';{annotation_field}=([^;]*)',1) NOT IN ('','.')
+                                CASE WHEN REGEXP_EXTRACT(concat(';', table_parquet.INFO), ';{annotation_field}=([^;]*)',1) NOT IN ('','.') {query_case_when_append}
                                         THEN concat('{annotation_field_sep}', '{annotation_fields_new_name}=', REGEXP_EXTRACT(concat(';', table_parquet.INFO), ';{annotation_field}=([^;]*)',1))
                                         ELSE ''
                                     END
@@ -4366,7 +4376,7 @@ def annotation_parquet(self, threads: int = None) -> None:
                             # Found in a specific column
                             else:
                                 sql_query_annotation_update_info_sets.append(f"""
-                                CASE WHEN table_parquet."{annotation_field_column}" NOT IN ('','.')
+                                CASE WHEN table_parquet."{annotation_field_column}" NOT IN ('','.') {query_case_when_append}
                                         THEN concat('{annotation_field_sep}', '{annotation_fields_new_name}=', replace(table_parquet."{annotation_field_column}", ';', ','))
                                         ELSE ''
                                     END
@@ -4389,7 +4399,8 @@ def annotation_parquet(self, threads: int = None) -> None:
                                     f"Annotation '{annotation_name}' - '{annotation_fields_new_name}' [{nb_annotation_field}] - already exists in header ({annotation_message})")
 
                     # Check if ALL fields have to be annotated. Thus concat all INFO field
-                    allow_annotation_full_info = True
+                    #allow_annotation_full_info = True
+                    allow_annotation_full_info = not force_append_annotation
                     
                     if parquet_type in ["regions"]:
                         allow_annotation_full_info = False
@@ -4557,8 +4568,8 @@ def annotation_parquet(self, threads: int = None) -> None:
                                     # Add update query to dict
                                     query_dict[f"{chrom}:{sql_query_interval_start}-{sql_query_interval_stop}"] = sql_query_annotation_chrom_interval_pos
 
-                                    # log.debug(
-                                    #     "Create SQL query: " + str(sql_query_annotation_chrom_interval_pos))
+                                    log.debug(
+                                        "Create SQL query: " + str(sql_query_annotation_chrom_interval_pos))
 
                                     # Interval Start/Stop
                                     sql_query_interval_start = sql_query_interval_stop
diff --git a/howard/tools/annotation.py b/howard/tools/annotation.py
index 97e8995..ba5f6d0 100644
--- a/howard/tools/annotation.py
+++ b/howard/tools/annotation.py
@@ -47,6 +47,12 @@ def annotation(args:argparse) -> None:
 
         params = vcfdata_obj.get_param()
 
+        # Prapare annotation dict
+        if not params.get("annotation", None):
+            params["annotation"] = {}
+        if not params.get("annotation", {}).get("options", None):
+            params["annotation"]["options"] = {}
+
         # Quick Annotation
         if args.annotations:
             annotation_file_list = [value for value in args.annotations.split(',')]
@@ -56,6 +62,12 @@ def annotation(args:argparse) -> None:
                 param_quick_annotations[annotation_file] = {"INFO": None}
             params["annotations"] = param_quick_annotations
         
+        if args.annotations_update:
+            params["annotation"]["options"]["update"] = True
+
+        if args.annotations_append:
+            params["annotation"]["options"]["append"] = True
+
         vcfdata_obj.set_param(params)
         
         # Load data from input file
diff --git a/howard/tools/tools.py b/howard/tools/tools.py
index ec613e4..d6d2da7 100644
--- a/howard/tools/tools.py
+++ b/howard/tools/tools.py
@@ -98,6 +98,8 @@
                 "widget": "FileSaver"
             }
         },
+
+        # Annotations
         "annotations": {
             "metavar": "annotations",
             "help": """Annotation with databases files, or with tools\n"""
@@ -113,6 +115,24 @@
                 "widget": "MultiFileChooser"
             }
         },
+        "annotations_update": {
+            "help": """Update option for annotation (Only for Parquet annotation).\n"""
+                    """If True, annotation fields will be removed and re-annotated.\n"""
+                    """These options will be applied to all annotation databases."""
+                    """default: False""",
+            "action": "store_true",
+            "default": False
+        },
+        "annotations_append": {
+            "help": """Append option for annotation (Only for Parquet annotation).\n"""
+                    """If True, annotation fields will be annotated only if not annotation exists for the variant.\n"""
+                    """These options will be applied to all annotation databases."""
+                    """default: False""",
+            "action": "store_true",
+            "default": False
+        },
+
+        # Calculations
         "calculations": {
             "metavar": "operations",
             "help": """Calculations on genetic variants information and genotype information\n"""
@@ -1162,7 +1182,9 @@
                 "input": True,
                 "output": True,
                 "annotations": True,
-                "assembly": False
+                "assembly": False,
+                "annotations_update": False,
+                "annotations_append": False
             }
         }
     },
diff --git a/tests/data/example.nci60_1.vcf b/tests/data/example.nci60_1.vcf
new file mode 100644
index 0000000..9dd06b5
--- /dev/null
+++ b/tests/data/example.nci60_1.vcf
@@ -0,0 +1,63 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20140624
+##source=./export.pl release 1.5
+##reference=IRC
+##phasing=unknown
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=nci60,Number=1,Type=Float,Description="NCI60 annotation old release and value">
+##FILTER=<ID=FSFilter,Description="FS > 200.0">
+##FILTER=<ID=LowQual,Description="Low quality">
+##FILTER=<ID=QDFilter,Description="QD < 2.0">
+##FILTER=<ID=ReadPosFilter,Description="ReadPosRankSum < -20.0">
+##FILTER=<ID=TruthSensitivityTranche99.00to99.90,Description="Truth sensitivity tranche level at VSQ Lod: -3.9813 <= x < 3.448">
+##FILTER=<ID=TruthSensitivityTranche99.90to100.00+,Description="Truth sensitivity tranche level at VQS Lod < -29586.8217">
+##FILTER=<ID=TruthSensitivityTranche99.90to100.00,Description="Truth sensitivity tranche level at VSQ Lod: -29586.8217 <= x < -3.9813">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=AD,Number=2,Type=Integer,Description="Allelic Depth">
+##FORMAT=<ID=FILTER,Number=.,Type=String,Description="Filter quality">
+##FORMAT=<ID=QUAL,Number=1,Type=Integer,Description="Filter quality value">
+##INFO=<ID=CLNSIG,Number=1,Type=String,Description="CLNSIG">
+##contig=<ID=chr1,length=249250621,assembly=hg19>
+##contig=<ID=chr7,length=159138663,assembly=hg19>
+##contig=<ID=1,length=249250621,assembly=hg19>
+##contig=<ID=10,length=135534747,assembly=hg19>
+##contig=<ID=11,length=135006516,assembly=hg19>
+##contig=<ID=12,length=133851895,assembly=hg19>
+##contig=<ID=13,length=115169878,assembly=hg19>
+##contig=<ID=14,length=107349540,assembly=hg19>
+##contig=<ID=15,length=102531392,assembly=hg19>
+##contig=<ID=16,length=90354753,assembly=hg19>
+##contig=<ID=17,length=81195210,assembly=hg19>
+##contig=<ID=18,length=78077248,assembly=hg19>
+##contig=<ID=19,length=59128983,assembly=hg19>
+##contig=<ID=2,length=243199373,assembly=hg19>
+##contig=<ID=20,length=63025520,assembly=hg19>
+##contig=<ID=21,length=48129895,assembly=hg19>
+##contig=<ID=22,length=51304566,assembly=hg19>
+##contig=<ID=3,length=198022430,assembly=hg19>
+##contig=<ID=4,length=191154276,assembly=hg19>
+##contig=<ID=5,length=180915260,assembly=hg19>
+##contig=<ID=6,length=171115067,assembly=hg19>
+##contig=<ID=7,length=159138663,assembly=hg19>
+##contig=<ID=8,length=146364022,assembly=hg19>
+##contig=<ID=9,length=141213431,assembly=hg19>
+##contig=<ID=M,length=16571,assembly=hg19>
+##contig=<ID=X,length=155270560,assembly=hg19>
+##contig=<ID=Y,length=59373566,assembly=hg19>
+##INFO=<ID=SIFT,Number=.,Type=String,Description="Annotation 'SIFT'">
+##bcftools_viewVersion=1.15.1+htslib-1.15.1
+##bcftools_viewCommand=view tests/data/example.vcf.gz; Date=Fri Mar 10 21:25:44 2023
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample1	sample2	sample3	sample4
+chr1	28736	.	A	C	100	PASS	CLNSIG=pathogenic	GT:AD:DP:GQ	0/1:525,204:729:99	0/1:12659,4994:17664:99	1/1:12658,4995:17663:99	1/1:401,175:576:99
+chr1	35144	.	A	C	100	PASS	CLNSIG=non-pathogenic	GT:AD:DP:GQ	./.:.:.:.	0/1:12659,4994:17664:99	0/1:12658,4995:17663:99	0/1:401,175:576:99
+chr1	69101	.	A	G	100	PASS	DP=50	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
+chr1	768251	.	A	G	100	PASS	.	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
+chr1	768252	.	A	G	100	PASS	.	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
+chr1	768253	.	A	G	100	PASS	nci60=0.321	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
+chr7	55249063	rs1050171	G	A	5777	PASS	DP=125	GT:AD:DP:GQ	0/1:525,204:729:99	0/1:12659,4994:17664:99	./.:.:.:.	0/1:401,175:576:99
diff --git a/tests/data/example.nci60.vcf b/tests/data/example.nci60_2.vcf
similarity index 96%
rename from tests/data/example.nci60.vcf
rename to tests/data/example.nci60_2.vcf
index 6ce426f..1acff86 100644
--- a/tests/data/example.nci60.vcf
+++ b/tests/data/example.nci60_2.vcf
@@ -59,5 +59,5 @@ chr1	35144	.	A	C	100	PASS	CLNSIG=non-pathogenic	GT:AD:DP:GQ	./.:.:.:.	0/1:12659,
 chr1	69101	.	A	G	100	PASS	DP=50	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
 chr1	768251	.	A	G	100	PASS	.	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
 chr1	768252	.	A	G	100	PASS	.	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
-chr1	768253	.	A	G	100	PASS	.	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
+chr1	768253	.	A	G	100	PASS	nci60=0.321	GT:AD:DP:GQ	0/1:525,204:729:99	./.:.:.:.	0/1:12658,4995:17663:99	0/1:401,175:576:99
 chr7	55249063	rs1050171	G	A	5777	PASS	DP=125;nci60=0.123	GT:AD:DP:GQ	0/1:525,204:729:99	0/1:12659,4994:17664:99	./.:.:.:.	0/1:401,175:576:99
diff --git a/tests/test_variants_annotations.py b/tests/test_variants_annotations.py
index e43f3ad..386fe72 100644
--- a/tests/test_variants_annotations.py
+++ b/tests/test_variants_annotations.py
@@ -27,16 +27,102 @@
 
 
 
+
+def test_annotation_parquet_append():
+    """
+    The function `test_annotation_parquet_append` tests the annotation functionality for appending data
+    to a Parquet file in a VCF file.
+    """
+
+    with TemporaryDirectory(dir=tests_folder) as tmp_dir:
+
+        # Init files
+        input_vcf = tests_data_folder + "/example.nci60_1.vcf"
+        annotation1 = os.path.join(tests_annotations_folder, "nci60.parquet")
+        output_vcf = f"{tmp_dir}/output.vcf.gz"
+
+        # Construct param dict
+        param = {
+                    'annotation': {
+                        'parquet': {
+                            'annotations': {
+                                annotation1: {
+                                    "nci60": "nci60"
+                                }
+                            },
+                        },
+                        'options': {
+                            'append': False
+                        }
+                    }
+                }
+        param_update = {
+                    'annotation': {
+                        'parquet': {
+                            'annotations': {
+                                annotation1: {
+                                    "nci60": "nci60"
+                                }
+                            },
+                        },
+                        'options': {
+                            'append': True
+                        }
+                        
+                    }
+                }
+        log.debug(f"param={param}")
+        log.debug(f"param_update={param_update}")
+
+        # Create object
+        variants = Variants(conn=None, input=input_vcf, output=output_vcf, param=param, load=True)
+
+        # Remove if output file exists
+        remove_if_exists([output_vcf])
+
+        # Annotation
+        variants.annotation()
+
+        # Check annotation not changed
+        
+        result = variants.get_query_to_df("SELECT INFO FROM variants")
+        log.debug(result)
+        result1 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr1' AND POS = 768253 AND REF = 'A' AND ALT = 'G' AND INFO LIKE '%nci60=0.321%'")
+        result2 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr7' AND POS = 55249063 AND REF = 'G' AND ALT = 'A' AND INFO LIKE '%nci60=0.66%'")
+        #log.debug(result1)
+        assert len(result1) == 1
+        assert len(result2) == 0
+
+        variants.set_param(param=param_update)
+        variants.annotation()
+
+        # Check annotation changed (existing kept, one annotation added)
+        result = variants.get_query_to_df("SELECT INFO FROM variants")
+        log.debug(result)
+        result1 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr1' AND POS = 768253 AND REF = 'A' AND ALT = 'G' AND INFO LIKE '%nci60=0.321%'")
+        result2 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr7' AND POS = 55249063 AND REF = 'G' AND ALT = 'A' AND INFO LIKE '%nci60=0.66%'")
+        #log.debug(result)
+        assert len(result1) == 1
+        assert len(result2) == 1
+
+        # Check if VCF is in correct format with pyVCF
+        variants.export_output()
+        try:
+            vcf.Reader(filename=output_vcf)
+        except:
+            assert False
+
+
 def test_annotation_parquet_update():
     """
-    This function tests if a field already present in a VCF file is not changed during annotation with a
-    Parquet file.
+    The function `test_annotation_parquet_update` tests the updating functionality of annotations in a
+    VCF file using Parquet format.
     """
 
     with TemporaryDirectory(dir=tests_folder) as tmp_dir:
 
         # Init files
-        input_vcf = tests_data_folder + "/example.nci60.vcf"
+        input_vcf = tests_data_folder + "/example.nci60_2.vcf"
         annotation1 = os.path.join(tests_annotations_folder, "nci60.parquet")
         output_vcf = f"{tmp_dir}/output.vcf.gz"
 
@@ -49,6 +135,8 @@ def test_annotation_parquet_update():
                                     "nci60": "nci60"
                                 }
                             },
+                        },
+                        'options': {
                             'update': False
                         }
                     }
@@ -61,6 +149,8 @@ def test_annotation_parquet_update():
                                     "nci60": "nci60"
                                 }
                             },
+                        },
+                        'options': {
                             'update': True
                         }
                     }
@@ -78,17 +168,19 @@ def test_annotation_parquet_update():
         variants.annotation()
 
         # Check annotation not changed
-        result = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr7' AND POS = 55249063 AND REF = 'G' AND ALT = 'A' AND INFO LIKE '%nci60=0.123%'")
-        log.debug(result)
-        assert len(result) == 1
+        result1 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr1' AND POS = 768253 AND REF = 'A' AND ALT = 'G' AND INFO LIKE '%nci60=0.321%'")
+        result2 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr7' AND POS = 55249063 AND REF = 'G' AND ALT = 'A' AND INFO LIKE '%nci60=0.123%'")
+        assert len(result1) == 1
+        assert len(result2) == 1
 
         variants.set_param(param=param_update)
         variants.annotation()
 
-        # Check annotation changed
-        result = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr7' AND POS = 55249063 AND REF = 'G' AND ALT = 'A' AND INFO LIKE '%nci60=0.66%'")
-        log.debug(result)
-        assert len(result) == 1
+        # Check annotation changed (all removed, but one added)
+        result1 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr1' AND POS = 768253 AND REF = 'A' AND ALT = 'G' AND INFO LIKE '%nci60=0.321%'")
+        result2 = variants.get_query_to_df("SELECT 1 AS count FROM variants WHERE \"#CHROM\" = 'chr7' AND POS = 55249063 AND REF = 'G' AND ALT = 'A' AND INFO LIKE '%nci60=0.66%'")
+        assert len(result1) == 0
+        assert len(result2) == 1
 
         # Check if VCF is in correct format with pyVCF
         variants.export_output()