diff --git a/README.pdf b/README.pdf index 391ee6d..0fa2842 100644 Binary files a/README.pdf and b/README.pdf differ diff --git a/RELEASE_NOTES.html b/RELEASE_NOTES.html index 4a3dfc3..a510c22 100644 --- a/RELEASE_NOTES.html +++ b/RELEASE_NOTES.html @@ -205,6 +205,7 @@

New tools:
  • Plugins:
      diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index a2a47b8..11d9500 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -22,6 +22,7 @@ and transcripts mapping. (Integer, Float, String) with list type (defined in VCF header) - New tools: - Tool 'filter' to filter variants in SQL format and samples + - Tool 'sort' to sort variants from contig order - Plugins: - 'to_excel': Convert VCF to Excel '.xlsx' format diff --git a/RELEASE_NOTES.pdf b/RELEASE_NOTES.pdf index d3dad35..f4c9270 100644 Binary files a/RELEASE_NOTES.pdf and b/RELEASE_NOTES.pdf differ diff --git a/docs/docs.pdf b/docs/docs.pdf index 3363b14..cfeb738 100644 Binary files a/docs/docs.pdf and b/docs/docs.pdf differ diff --git a/docs/help.configuration.calculation.pdf b/docs/help.configuration.calculation.pdf index 7289104..471559f 100644 Binary files a/docs/help.configuration.calculation.pdf and b/docs/help.configuration.calculation.pdf differ diff --git a/docs/help.configuration.pdf b/docs/help.configuration.pdf index d4d9db5..f5cdd02 100644 Binary files a/docs/help.configuration.pdf and b/docs/help.configuration.pdf differ diff --git a/docs/help.configuration.prioritization.pdf b/docs/help.configuration.prioritization.pdf index 10767ac..9a9c623 100644 Binary files a/docs/help.configuration.prioritization.pdf and b/docs/help.configuration.prioritization.pdf differ diff --git a/docs/help.html b/docs/help.html index c6ab2e3..f9f43b7 100644 --- a/docs/help.html +++ b/docs/help.html @@ -199,171 +199,179 @@

      Contents

    • 3.3 Export
  • -
  • 4 STATS tool +
  • 4 SORT tool
  • -
  • 5 CONVERT tool +
  • 5 STATS tool
  • -
  • 6 HGVS tool +
  • 6 CONVERT tool
  • -
  • 7 ANNOTATION tool +
  • 7 HGVS tool
  • -
  • 8 CALCULATION tool +
  • 8 ANNOTATION tool
  • +
  • 9 CALCULATION tool +
  • 9 PRIORITIZATION tool +class="toc-section-number">10 PRIORITIZATION tool
  • 10 PROCESS tool +class="toc-section-number">11 PROCESS tool
  • 11 DATABASES tool +class="toc-section-number">12 DATABASES tool
  • 12 GUI tool
  • +class="toc-section-number">13
    GUI tool
  • 13 HELP tool +class="toc-section-number">14 HELP tool
  • 14 UPDATE_DATABASE tool +class="toc-section-number">15 UPDATE_DATABASE tool
  • 15 TO_EXCEL tool +class="toc-section-number">16 TO_EXCEL tool
  • 16 TRANSCRIPTS_CHECK tool +class="toc-section-number">17 TRANSCRIPTS_CHECK tool
  • 17 GENEBE tool +class="toc-section-number">18 GENEBE tool
  • 18 MINIMALIZE tool +class="toc-section-number">19 MINIMALIZE tool
  • 19 Shared arguments
  • +class="toc-section-number">20 Shared arguments

    -

    4 STATS tool

    +

    4 SORT tool

    +

    Sort genetic variations from contig order. Data can be loaded into +‘variants’ table from various formats (e.g. VCF, TSV, Parquet…). SQL +filter can also use external data within the request, such as a Parquet +file(s).

    +

    Usage examples:

    +
    +

    howard sort –input=tests/data/example.vcf.gz +–output=/tmp/example.sorted.vcf.gz

    +
    +
    + +
    +

    4.1 Main options

    +

    +
    +
        --input=<input> | required
    +
    +Input file path.
    +Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.
    +Files can be compressesd (e.g. vcf.gz, tsv.gz).
    +
    +

    +

    +
    +
        --output=<output> | required
    +
    +Output file path.
    +Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.
    +Files can be compressesd (e.g. vcf.gz, tsv.gz).
    +
    +

    +

    4.2 Export

    +

    +
    +
        --include_header
    +
    +Include header (in VCF format) in output file.
    +Only for compatible formats (tab-delimiter format as TSV or BED).
    +
    +

    +

    +
    +
        --parquet_partitions=<parquet partitions>
    +
    +Parquet partitioning using hive (available for any format).
    +This option is faster parallel writing, but memory consuming.
    +Use 'None' (string) for NO partition but split parquet files into a folder.
    +Examples: '#CHROM', '#CHROM,REF', 'None'.
    +
    +

    +

    5 STATS tool

    Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples…

    Usage examples:

    @@ -651,8 +713,8 @@

    -

    4.1 Main options

    +

    5.1 Main options

        --input=<input> | required
    @@ -670,8 +732,8 @@ 

    -

    4.2 Stats

    +

    5.2 Stats

        --stats_md=<stats markdown>
    @@ -686,8 +748,8 @@ 

    -

    5 CONVERT tool

    +

    6 CONVERT tool

    Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats @@ -722,8 +784,8 @@

    -

    5.1 Main options

    +

    6.1 Main options

        --input=<input> | required
    @@ -750,8 +812,8 @@ 

    -

    5.2 Explode

    +

    6.2 Explode

        --explode_infos
    @@ -781,8 +843,8 @@ 

    -

    5.3 Export

    +

    6.3 Export

        --include_header
    @@ -812,8 +874,8 @@ 

    -

    6 HGVS tool

    +

    7 HGVS tool

    HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. @@ -834,8 +896,8 @@

    -

    6.1 Main options

    +

    7.1 Main options

        --input=<input> | required
    @@ -881,8 +943,8 @@ 

    -

    6.2 HGVS

    +

    7.2 HGVS

        --use_gene
    @@ -952,8 +1014,8 @@ 

    -

    7 ANNOTATION tool

    +

    8 ANNOTATION tool

    Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of @@ -1018,8 +1080,8 @@

    -

    7.1 Main options

    +

    8.1 Main options

        --input=<input> | required
    @@ -1138,8 +1200,8 @@ 

    -

    7.2 Annotation

    +

    8.2 Annotation

        --annotations_update
    @@ -1158,8 +1220,8 @@ 

    -

    8 CALCULATION tool

    +

    9 CALCULATION tool

    Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts @@ -1202,8 +1264,8 @@

    -

    8.1 Main options

    +

    9.1 Main options

        --input=<input>
    @@ -1241,8 +1303,8 @@ 

    -

    8.2 Calculation

    +

    9.2 Calculation

        --calculation_config=<calculation config>
    @@ -1257,8 +1319,8 @@ 

    -

    8.3 NOMEN

    +

    9.3 NOMEN

        --hgvs_field=<HGVS field> (default: hgvs)
    @@ -1274,8 +1336,8 @@ 

    -

    8.4 TRIO

    +

    9.4 TRIO

        --trio_pedigree=<trio pedigree>
    @@ -1286,8 +1348,8 @@ 

    -

    8.5 BARCODEFAMILY

    +

    9.5 BARCODEFAMILY

        --family_pedigree=<family pedigree>
    @@ -1298,8 +1360,8 @@ 

    -

    9 PRIORITIZATION tool

    +

    10 PRIORITIZATION tool

    Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: ‘polymorphism identified in dbSNP. @@ -1331,8 +1393,8 @@

    -

    9.1 Main options

    +

    10.1 Main options

        --input=<input> | required
    @@ -1368,8 +1430,8 @@ 

    -

    9.2 Prioritization

    +

    10.2 Prioritization

        --default_profile=<default profile>
    @@ -1401,8 +1463,8 @@ 

    -

    10 PROCESS tool

    +

    11 PROCESS tool

    howard process tool manage genetic variations to:

    • annotates genetic variants with multiple annotation @@ -1442,8 +1504,8 @@

      -

      10.1 Main options

      +

      11.1 Main options

          --input=<input> | required
      @@ -1535,8 +1597,8 @@ 

      -

      10.2 HGVS

      +

      11.2 HGVS

          --use_gene
      @@ -1606,8 +1668,8 @@ 

      -

      10.3 Annotation

      +

      11.3 Annotation

          --annotations_update
      @@ -1626,8 +1688,8 @@ 

      -

      10.4 Calculation

      +

      11.4 Calculation

          --calculation_config=<calculation config>
      @@ -1635,8 +1697,8 @@ 

      -

      10.5 Prioritization

      +

      11.5 Prioritization

          --default_profile=<default profile>
      @@ -1668,8 +1730,8 @@ 

      -

      10.6 Query

      +

      11.6 Query

          --query=<query>
      @@ -1693,8 +1755,8 @@ 

      -

      10.7 Explode

      +

      11.7 Explode

          --explode_infos
      @@ -1724,8 +1786,8 @@ 

      -

      10.8 Export

      +

      11.8 Export

          --include_header
      @@ -1755,8 +1817,8 @@ 

      -

      11 DATABASES tool

      +

      12 DATABASES tool

      Download databases and needed files for howard and associated tools

      Usage examples:

      @@ -1843,8 +1905,8 @@

      -

      11.1 Main options

      +

      12.1 Main options

          --assembly=<assembly> (default: hg19)
      @@ -1875,8 +1937,8 @@ 

      -

      11.2 Genomes

      +

      12.2 Genomes

          --download-genomes=<genomes>
      @@ -1903,8 +1965,8 @@ 

      -

      11.3 snpEff

      +

      12.3 snpEff

          --download-snpeff=<snpEff>
      @@ -1912,8 +1974,8 @@ 

      -

      11.4 Annovar

      +

      12.4 Annovar

          --download-annovar=<Annovar>
      @@ -1940,8 +2002,8 @@ 

      -

      11.5 refSeq

      +

      12.5 refSeq

          --download-refseq=<refSeq>
      @@ -2023,8 +2085,8 @@ 

      -

      11.6 dbNSFP

      +

      12.6 dbNSFP

          --download-dbnsfp=<dbNSFP>
      @@ -2129,8 +2191,8 @@ 

      -

      11.7 AlphaMissense

      +

      12.7 AlphaMissense

          --download-alphamissense=<AlphaMissense>
      @@ -2146,8 +2208,8 @@ 

      -

      11.8 Exomiser

      +

      12.8 Exomiser

          --download-exomiser=<Exomiser>
      @@ -2243,8 +2305,8 @@ 

      -

      11.9 dbSNP

      +

      12.9 dbSNP

          --download-dbsnp=<dnSNP>
      @@ -2321,8 +2383,8 @@ 

      -

      11.10 HGMD

      +

      12.10 HGMD

          --convert-hgmd=<HGMD>
      @@ -2351,8 +2413,8 @@ 

      -

      11.11 from_Annovar

      +

      12.11 from_Annovar

          --input_annovar=<input annovar>
      @@ -2400,8 +2462,8 @@ 

      -

      11.12 from_extann

      +

      12.12 from_extann

          --input_extann=<input extann>
      @@ -2453,8 +2515,8 @@ 

      -

      11.13 Parameters

      +

      12.13 Parameters

          --generate-param=<param>
      @@ -2497,15 +2559,15 @@ 

      -

      12 GUI tool

      +

      13 GUI tool

      Graphical User Interface tools

      Usage examples:

      howard gui

      -

      13 HELP tool

      +

      14 HELP tool

      Help tools

      Usage examples:

      @@ -2537,8 +2599,8 @@

      -

      13.1 Main options

      +

      14.1 Main options

          --help_md=<help markdown>
      @@ -2589,8 +2651,8 @@ 

      -

      14 UPDATE_DATABASE tool

      +

      15 UPDATE_DATABASE tool

      Update HOWARD database

      Usage examples:

      @@ -2600,8 +2662,8 @@

      -

      14.1 Main options

      +

      15.1 Main options

          --param=<param> (default: {})
      @@ -2610,8 +2672,8 @@ 

      -

      14.2 Update_database

      +

      15.2 Update_database

          --databases_folder=<databases_folder> (default: ~/howard/databases)
      @@ -2640,8 +2702,8 @@ 

      -

      14.3 Options

      +

      15.3 Options

          --show=<show>
      @@ -2656,8 +2718,8 @@ 

      -

      15 TO_EXCEL tool

      +

      16 TO_EXCEL tool

      Convert VCF file to Excel ‘.xlsx’ format.

      Usage examples:

      @@ -2667,8 +2729,8 @@

      -

      15.1 Main options

      +

      16.1 Main options

          --input=<input> | required
      @@ -2687,8 +2749,8 @@ 

      -

      15.2 Add

      +

      16.2 Add

          --add_variants_view
      @@ -2703,8 +2765,8 @@ 

      -

      16 TRANSCRIPTS_CHECK tool

      +

      17 TRANSCRIPTS_CHECK tool

      Check if a transcript list is present in a generated transcript table from a input VCF file.

      Usage examples:

      @@ -2719,8 +2781,8 @@

      -

      16.1 Main options

      +

      17.1 Main options

          --input=<input> | required
      @@ -2759,8 +2821,8 @@ 

      -

      17 GENEBE tool

      +

      18 GENEBE tool

      GeneBe annotation using REST API (see https://genebe.net/).

      Usage examples:

      @@ -2770,8 +2832,8 @@

      -

      17.1 Main options

      +

      18.1 Main options

          --input=<input> | required
      @@ -2805,8 +2867,8 @@ 

      -

      17.2 GeneBe

      +

      18.2 GeneBe

          --genebe_use_refseq
      @@ -2828,8 +2890,8 @@ 

      -

      17.3 Explode

      +

      18.3 Explode

          --explode_infos
      @@ -2859,8 +2921,8 @@ 

      -

      17.4 Export

      +

      18.4 Export

          --include_header
      @@ -2890,8 +2952,8 @@ 

      -

      18 MINIMALIZE tool

      +

      19 MINIMALIZE tool

      Minimalize a VCF file consists in put missing value (‘.’) on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded @@ -2911,8 +2973,8 @@

      -

      18.1 Main options

      +

      19.1 Main options

          --input=<input> | required
      @@ -2939,8 +3001,8 @@ 

      -

      18.2 Minimalize

      +

      19.2 Minimalize

          --minimalize_info
      @@ -2983,8 +3045,8 @@ 

      -

      18.3 Explode

      +

      19.3 Explode

          --explode_infos
      @@ -3014,8 +3076,8 @@ 

      -

      18.4 Export

      +

      19.4 Export

          --include_header
      @@ -3045,8 +3107,8 @@ 

      -

      19 Shared arguments

      +

      20 Shared arguments

          --config=<config> (default: {})
      diff --git a/docs/help.md b/docs/help.md
      index fdd1bdd..26786eb 100644
      --- a/docs/help.md
      +++ b/docs/help.md
      @@ -15,111 +15,115 @@ title: HOWARD Help
           options](#main-options-1)
         - [3.2 Filters](#filters)
         - [3.3 Export](#export-1)
      -- [4 STATS tool](#stats-tool)
      +- [4 SORT tool](#sort-tool)
         - [4.1 Main
           options](#main-options-2)
      -  - [4.2 Stats](#stats)
      -- [5 CONVERT
      -  tool](#convert-tool)
      +  - [4.2 Export](#export-2)
      +- [5 STATS tool](#stats-tool)
         - [5.1 Main
           options](#main-options-3)
      -  - [5.2 Explode](#explode-1)
      -  - [5.3 Export](#export-2)
      -- [6 HGVS tool](#hgvs-tool)
      +  - [5.2 Stats](#stats)
      +- [6 CONVERT
      +  tool](#convert-tool)
         - [6.1 Main
           options](#main-options-4)
      -  - [6.2 HGVS](#hgvs)
      -- [7 ANNOTATION
      -  tool](#annotation-tool)
      +  - [6.2 Explode](#explode-1)
      +  - [6.3 Export](#export-3)
      +- [7 HGVS tool](#hgvs-tool)
         - [7.1 Main
           options](#main-options-5)
      -  - [7.2
      -    Annotation](#annotation)
      -- [8 CALCULATION
      -  tool](#calculation-tool)
      +  - [7.2 HGVS](#hgvs)
      +- [8 ANNOTATION
      +  tool](#annotation-tool)
         - [8.1 Main
           options](#main-options-6)
         - [8.2
      -    Calculation](#calculation)
      -  - [8.3 NOMEN](#nomen)
      -  - [8.4 TRIO](#trio)
      -  - [8.5
      -    BARCODEFAMILY](#barcodefamily)
      -- [9 PRIORITIZATION
      -  tool](#prioritization-tool)
      +    Annotation](#annotation)
      +- [9 CALCULATION
      +  tool](#calculation-tool)
         - [9.1 Main
           options](#main-options-7)
         - [9.2
      -    Prioritization](#prioritization)
      -- [10 PROCESS
      -  tool](#process-tool)
      +    Calculation](#calculation)
      +  - [9.3 NOMEN](#nomen)
      +  - [9.4 TRIO](#trio)
      +  - [9.5
      +    BARCODEFAMILY](#barcodefamily)
      +- [10 PRIORITIZATION
      +  tool](#prioritization-tool)
         - [10.1 Main
           options](#main-options-8)
      -  - [10.2 HGVS](#hgvs-1)
      -  - [10.3
      +  - [10.2
      +    Prioritization](#prioritization)
      +- [11 PROCESS
      +  tool](#process-tool)
      +  - [11.1 Main
      +    options](#main-options-9)
      +  - [11.2 HGVS](#hgvs-1)
      +  - [11.3
           Annotation](#annotation-1)
      -  - [10.4
      +  - [11.4
           Calculation](#calculation-1)
      -  - [10.5
      +  - [11.5
           Prioritization](#prioritization-1)
      -  - [10.6 Query](#query-1)
      -  - [10.7 Explode](#explode-2)
      -  - [10.8 Export](#export-3)
      -- [11 DATABASES
      +  - [11.6 Query](#query-1)
      +  - [11.7 Explode](#explode-2)
      +  - [11.8 Export](#export-4)
      +- [12 DATABASES
         tool](#databases-tool)
      -  - [11.1 Main
      -    options](#main-options-9)
      -  - [11.2 Genomes](#genomes)
      -  - [11.3 snpEff](#snpeff)
      -  - [11.4 Annovar](#annovar)
      -  - [11.5 refSeq](#refseq)
      -  - [11.6 dbNSFP](#dbnsfp)
      -  - [11.7
      +  - [12.1 Main
      +    options](#main-options-10)
      +  - [12.2 Genomes](#genomes)
      +  - [12.3 snpEff](#snpeff)
      +  - [12.4 Annovar](#annovar)
      +  - [12.5 refSeq](#refseq)
      +  - [12.6 dbNSFP](#dbnsfp)
      +  - [12.7
           AlphaMissense](#alphamissense)
      -  - [11.8 Exomiser](#exomiser)
      -  - [11.9 dbSNP](#dbsnp)
      -  - [11.10 HGMD](#hgmd)
      -  - [11.11
      +  - [12.8 Exomiser](#exomiser)
      +  - [12.9 dbSNP](#dbsnp)
      +  - [12.10 HGMD](#hgmd)
      +  - [12.11
           from_Annovar](#from_annovar)
      -  - [11.12
      +  - [12.12
           from_extann](#from_extann)
      -  - [11.13
      +  - [12.13
           Parameters](#parameters)
      -- [12 GUI tool](#gui-tool)
      -- [13 HELP tool](#help-tool)
      -  - [13.1 Main
      -    options](#main-options-10)
      -- [14 UPDATE_DATABASE
      -  tool](#update_database-tool)
      +- [13 GUI tool](#gui-tool)
      +- [14 HELP tool](#help-tool)
         - [14.1 Main
           options](#main-options-11)
      -  - [14.2
      -    Update_database](#update_database)
      -  - [14.3 Options](#options)
      -- [15 TO_EXCEL
      -  tool](#to_excel-tool)
      +- [15 UPDATE_DATABASE
      +  tool](#update_database-tool)
         - [15.1 Main
           options](#main-options-12)
      -  - [15.2 Add](#add)
      -- [16 TRANSCRIPTS_CHECK
      -  tool](#transcripts_check-tool)
      +  - [15.2
      +    Update_database](#update_database)
      +  - [15.3 Options](#options)
      +- [16 TO_EXCEL
      +  tool](#to_excel-tool)
         - [16.1 Main
           options](#main-options-13)
      -- [17 GENEBE tool](#genebe-tool)
      +  - [16.2 Add](#add)
      +- [17 TRANSCRIPTS_CHECK
      +  tool](#transcripts_check-tool)
         - [17.1 Main
           options](#main-options-14)
      -  - [17.2 GeneBe](#genebe)
      -  - [17.3 Explode](#explode-3)
      -  - [17.4 Export](#export-4)
      -- [18 MINIMALIZE
      -  tool](#minimalize-tool)
      +- [18 GENEBE tool](#genebe-tool)
         - [18.1 Main
           options](#main-options-15)
      -  - [18.2
      -    Minimalize](#minimalize)
      -  - [18.3 Explode](#explode-4)
      +  - [18.2 GeneBe](#genebe)
      +  - [18.3 Explode](#explode-3)
         - [18.4 Export](#export-5)
      -- [19 Shared
      +- [19 MINIMALIZE
      +  tool](#minimalize-tool)
      +  - [19.1 Main
      +    options](#main-options-16)
      +  - [19.2
      +    Minimalize](#minimalize)
      +  - [19.3 Explode](#explode-4)
      +  - [19.4 Export](#export-6)
      +- [20 Shared
         arguments](#shared-arguments)
       
       # Introduction
      @@ -392,6 +396,64 @@ Usage examples:
       
       
       
      +# SORT tool
      +
      +Sort genetic variations from contig order. Data can be loaded into
      +'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL
      +filter can also use external data within the request, such as a Parquet
      +file(s).
      +
      +Usage examples:
      +
      +> howard sort --input=tests/data/example.vcf.gz
      +> --output=/tmp/example.sorted.vcf.gz
      +
      +> 
      +
      +## Main options
      +
      +
      +
      +>     --input= | required
      +>
      +>     Input file path.
      +>     Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.
      +>     Files can be compressesd (e.g. vcf.gz, tsv.gz).
      +
      +
      +
      +
      +
      +>     --output= | required
      +>
      +>     Output file path.
      +>     Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.
      +>     Files can be compressesd (e.g. vcf.gz, tsv.gz).
      +
      +
      +
      +## Export
      +
      +
      +
      +>     --include_header
      +>
      +>     Include header (in VCF format) in output file.
      +>     Only for compatible formats (tab-delimiter format as TSV or BED).
      +
      +
      +
      +
      +
      +>     --parquet_partitions=
      +>
      +>     Parquet partitioning using hive (available for any format).
      +>     This option is faster parallel writing, but memory consuming.
      +>     Use 'None' (string) for NO partition but split parquet files into a folder.
      +>     Examples: '#CHROM', '#CHROM,REF', 'None'.
      +
      +
      +
       # STATS tool
       
       Statistics on genetic variations, such as: number of variants, number of
      diff --git a/docs/help.parameters.databases.pdf b/docs/help.parameters.databases.pdf
      index c0144ce..bca5569 100644
      Binary files a/docs/help.parameters.databases.pdf and b/docs/help.parameters.databases.pdf differ
      diff --git a/docs/help.parameters.pdf b/docs/help.parameters.pdf
      index 9c50931..89ca3f7 100644
      Binary files a/docs/help.parameters.pdf and b/docs/help.parameters.pdf differ
      diff --git a/docs/help.pdf b/docs/help.pdf
      index 7d0cb7a..fcd449e 100644
      Binary files a/docs/help.pdf and b/docs/help.pdf differ
      diff --git a/docs/pdoc/howard/functions/commons.html b/docs/pdoc/howard/functions/commons.html
      index 6034bc3..abf4143 100644
      --- a/docs/pdoc/howard/functions/commons.html
      +++ b/docs/pdoc/howard/functions/commons.html
      @@ -429,6 +429,9 @@ 

      API Documentation

    • docker_automount
    • +
    • + sort_contigs +
    @@ -4804,6 +4807,63 @@

    4352 if "sock" not in volume.get("Source") and "tmp" not in volume.get("Source"): 4353 mounts_new += f" -v {volume.get('Source')}:{volume.get ('Destination')}:{volume.get('Mode')}" 4354 return mounts_new +4355 +4356 +4357def sort_contigs(vcf_reader): +4358 """ +4359 Function that sort contigs in VCF header +4360 +4361 Args: +4362 vcf_reader (vcf): VCF object from VCF package +4363 +4364 Returns: +4365 vcf:VCF object from VCF package +4366 """ +4367 +4368 from collections import OrderedDict +4369 +4370 # inf +4371 inf = 100000000 +4372 +4373 # Extract contigs from header +4374 contigs = list(vcf_reader.contigs.keys()) +4375 +4376 # Sort function +4377 def contig_sort_key(contig): +4378 +4379 # Remove 'chr' from contig +4380 contig_clean = re.sub(r"^chr", "", contig) +4381 +4382 # Special cases: X, Y, M/MT +4383 if contig_clean == "X": +4384 return (float(inf) - 3, contig) +4385 elif contig_clean == "Y": +4386 return (float(inf) - 2, contig) +4387 elif contig_clean in ["M", "MT"]: +4388 return (float(inf) - 1, contig) +4389 +4390 # Contig as integer +4391 try: +4392 return (int(contig_clean), contig) +4393 except ValueError: +4394 # Contig as on-numeric +4395 return (float(inf), contig_clean) +4396 +4397 # Sort contigs +4398 sorted_contigs = sorted(contigs, key=contig_sort_key) +4399 +4400 # Create new contgis OrderedDict +4401 ordered_contigs = OrderedDict() +4402 +4403 # Add contigs +4404 for contig in sorted_contigs: +4405 ordered_contigs[contig] = vcf_reader.contigs[contig] +4406 +4407 # Replace contigs +4408 vcf_reader.contigs = ordered_contigs +4409 +4410 # Return +4411 return vcf_reader @@ -12426,6 +12486,86 @@

    Returns
    + +
    + +
    + + def + sort_contigs(vcf_reader): + + + +
    + +
    4358def sort_contigs(vcf_reader):
    +4359    """
    +4360    Function that sort contigs in VCF header
    +4361
    +4362    Args:
    +4363        vcf_reader (vcf): VCF object from VCF package
    +4364
    +4365    Returns:
    +4366        vcf:VCF object from VCF package
    +4367    """
    +4368
    +4369    from collections import OrderedDict
    +4370
    +4371    # inf
    +4372    inf = 100000000
    +4373
    +4374    # Extract contigs from header
    +4375    contigs = list(vcf_reader.contigs.keys())
    +4376
    +4377    # Sort function
    +4378    def contig_sort_key(contig):
    +4379
    +4380        # Remove 'chr' from contig
    +4381        contig_clean = re.sub(r"^chr", "", contig)
    +4382
    +4383        # Special cases: X, Y, M/MT
    +4384        if contig_clean == "X":
    +4385            return (float(inf) - 3, contig)
    +4386        elif contig_clean == "Y":
    +4387            return (float(inf) - 2, contig)
    +4388        elif contig_clean in ["M", "MT"]:
    +4389            return (float(inf) - 1, contig)
    +4390
    +4391        # Contig as integer
    +4392        try:
    +4393            return (int(contig_clean), contig)
    +4394        except ValueError:
    +4395            # Contig as on-numeric
    +4396            return (float(inf), contig_clean)
    +4397
    +4398    # Sort contigs
    +4399    sorted_contigs = sorted(contigs, key=contig_sort_key)
    +4400
    +4401    # Create new contgis OrderedDict
    +4402    ordered_contigs = OrderedDict()
    +4403
    +4404    # Add contigs
    +4405    for contig in sorted_contigs:
    +4406        ordered_contigs[contig] = vcf_reader.contigs[contig]
    +4407
    +4408    # Replace contigs
    +4409    vcf_reader.contigs = ordered_contigs
    +4410
    +4411    # Return
    +4412    return vcf_reader
    +
    + + +

    Function that sort contigs in VCF header

    + +

    Args: + vcf_reader (vcf): VCF object from VCF package

    + +

    Returns: + vcf:VCF object from VCF package

    +
    + +
    + \ No newline at end of file diff --git a/docs/pdoc/howard/tools/tools.html b/docs/pdoc/howard/tools/tools.html index 9d10001..0696b13 100644 --- a/docs/pdoc/howard/tools/tools.html +++ b/docs/pdoc/howard/tools/tools.html @@ -107,2218 +107,2234 @@

    33from howard.tools.prioritization import prioritization 34from howard.tools.query import query 35from howard.tools.filter import filter - 36from howard.tools.stats import stats - 37from howard.tools.convert import convert - 38from howard.tools.databases import databases - 39from howard.tools.help import help - 40 + 36from howard.tools.sort import sort + 37from howard.tools.stats import stats + 38from howard.tools.convert import convert + 39from howard.tools.databases import databases + 40from howard.tools.help import help 41 - 42# Import gui only if gooey and wx is installed - 43try: - 44 check_gooey = importlib.util.find_spec("gooey") - 45 check_wx = importlib.util.find_spec("wx") - 46 tool_gui_enable = check_gooey and check_wx - 47except ImportError: - 48 tool_gui_enable = False - 49 - 50if tool_gui_enable: - 51 from howard.tools.gui import gui - 52 + 42 + 43# Import gui only if gooey and wx is installed + 44try: + 45 check_gooey = importlib.util.find_spec("gooey") + 46 check_wx = importlib.util.find_spec("wx") + 47 tool_gui_enable = check_gooey and check_wx + 48except ImportError: + 49 tool_gui_enable = False + 50 + 51if tool_gui_enable: + 52 from howard.tools.gui import gui 53 - 54class PathType(object): - 55 - 56 def __init__(self, exists=True, type="file", dash_ok=True): - 57 """exists: - 58 True: a path that does exist - 59 False: a path that does not exist, in a valid parent directory - 60 None: don't care - 61 type: file, dir, symlink, None, or a function returning True for valid paths - 62 None: don't care - 63 dash_ok: whether to allow "-" as stdin/stdout""" - 64 - 65 self.__name__ = "Path" - 66 - 67 assert exists in (True, False, None) - 68 assert type in ("file", "dir", "symlink", None) or hasattr(type, "__call__") - 69 - 70 self._exists = exists - 71 self._type = type - 72 self._dash_ok = dash_ok - 73 - 74 def __call__(self, string): - 75 - 76 # Full path if not a JSON string - 77 try: - 78 json.loads(string) - 79 except: - 80 string = full_path(string) - 81 - 82 if string == "-": - 83 # the special argument "-" means sys.std{in,out} - 84 if self._type == "dir": - 85 raise ValueError( - 86 "standard input/output (-) not allowed as directory path" - 87 ) - 88 elif self._type == "symlink": - 89 raise ValueError( - 90 "standard input/output (-) not allowed as symlink path" - 91 ) - 92 elif not self._dash_ok: - 93 raise ValueError("standard input/output (-) not allowed") - 94 else: - 95 e = os.path.exists(string) - 96 if self._exists == True: - 97 if not e: - 98 raise ValueError("path does not exist: '%s'" % string) - 99 - 100 if self._type is None: - 101 pass - 102 elif self._type == "file": - 103 if not os.path.isfile(string): - 104 raise ValueError("path is not a file: '%s'" % string) - 105 elif self._type == "symlink": - 106 if not os.path.symlink(string): - 107 raise ValueError("path is not a symlink: '%s'" % string) - 108 elif self._type == "dir": - 109 if not os.path.isdir(string): - 110 raise ValueError("path is not a directory: '%s'" % string) - 111 elif not self._type(string): - 112 raise ValueError("path not valid: '%s'" % string) - 113 else: - 114 if self._exists == False and e: - 115 raise ValueError("path exists: '%s'" % string) - 116 - 117 return string - 118 + 54 + 55class PathType(object): + 56 + 57 def __init__(self, exists=True, type="file", dash_ok=True): + 58 """exists: + 59 True: a path that does exist + 60 False: a path that does not exist, in a valid parent directory + 61 None: don't care + 62 type: file, dir, symlink, None, or a function returning True for valid paths + 63 None: don't care + 64 dash_ok: whether to allow "-" as stdin/stdout""" + 65 + 66 self.__name__ = "Path" + 67 + 68 assert exists in (True, False, None) + 69 assert type in ("file", "dir", "symlink", None) or hasattr(type, "__call__") + 70 + 71 self._exists = exists + 72 self._type = type + 73 self._dash_ok = dash_ok + 74 + 75 def __call__(self, string): + 76 + 77 # Full path if not a JSON string + 78 try: + 79 json.loads(string) + 80 except: + 81 string = full_path(string) + 82 + 83 if string == "-": + 84 # the special argument "-" means sys.std{in,out} + 85 if self._type == "dir": + 86 raise ValueError( + 87 "standard input/output (-) not allowed as directory path" + 88 ) + 89 elif self._type == "symlink": + 90 raise ValueError( + 91 "standard input/output (-) not allowed as symlink path" + 92 ) + 93 elif not self._dash_ok: + 94 raise ValueError("standard input/output (-) not allowed") + 95 else: + 96 e = os.path.exists(string) + 97 if self._exists == True: + 98 if not e: + 99 raise ValueError("path does not exist: '%s'" % string) + 100 + 101 if self._type is None: + 102 pass + 103 elif self._type == "file": + 104 if not os.path.isfile(string): + 105 raise ValueError("path is not a file: '%s'" % string) + 106 elif self._type == "symlink": + 107 if not os.path.symlink(string): + 108 raise ValueError("path is not a symlink: '%s'" % string) + 109 elif self._type == "dir": + 110 if not os.path.isdir(string): + 111 raise ValueError("path is not a directory: '%s'" % string) + 112 elif not self._type(string): + 113 raise ValueError("path not valid: '%s'" % string) + 114 else: + 115 if self._exists == False and e: + 116 raise ValueError("path exists: '%s'" % string) + 117 + 118 return string 119 - 120# Arguments dict - 121arguments = { - 122 # Process & other - 123 "input": { - 124 "metavar": "input", - 125 "help": """Input file path.\n""" - 126 """Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n""" - 127 """Files can be compressesd (e.g. vcf.gz, tsv.gz).\n""", - 128 "required": False, - 129 "default": None, - 130 "type": PathType(exists=True, type=None), - 131 "gooey": { - 132 "widget": "FileChooser", - 133 "options": { - 134 "wildcard": "Parquet file (*.parquet)|*.parquet|" "All files (*)|*" - 135 }, - 136 }, - 137 }, - 138 "output": { - 139 "metavar": "output", - 140 "help": """Output file path.\n""" - 141 """Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n""" - 142 """Files can be compressesd (e.g. vcf.gz, tsv.gz).\n""", - 143 "required": False, - 144 "default": None, - 145 "type": PathType(exists=None, type=None), - 146 "gooey": {"widget": "FileSaver"}, - 147 }, - 148 "param": { - 149 "metavar": "param", - 150 "help": """Parameters JSON file (or string) defines parameters to process \n""" - 151 """annotations, calculations, prioritizations, convertions and queries.\n""", - 152 "default": "{}", - 153 "type": PathType(exists=None, type=None), - 154 "gooey": { - 155 "widget": "FileChooser", - 156 "options": { - 157 "initial_value": "", - 158 "wildcard": "JSON file (*.json)|*.json|" "All files (*)|*", - 159 }, - 160 }, - 161 }, - 162 "query": { - 163 "metavar": "query", - 164 "help": """Query in SQL format\n""" - 165 """(e.g. 'SELECT * FROM variants LIMIT 50').\n""", - 166 "default": None, - 167 "type": str, - 168 "gooey": { - 169 "widget": "Textarea", - 170 "options": {"initial_value": "SELECT * FROM variants"}, - 171 }, - 172 "extra": {"param_section": "query"}, - 173 }, - 174 "filter": { - 175 "metavar": "filter", - 176 "help": """Filter variant using SQL format\n""" """(e.g. 'POS < 100000').\n""", - 177 "default": None, - 178 "type": str, - 179 "gooey": { - 180 "widget": "Textarea", - 181 "options": {"initial_value": ""}, - 182 }, - 183 # "extra": {"param_section": "filter"}, - 184 }, - 185 "samples": { - 186 "metavar": "samples", - 187 "help": """List of samples\n""" """(e.g. 'sample1,sample2').\n""", - 188 "default": None, - 189 "type": str, - 190 "gooey": { - 191 "widget": "Textarea", - 192 "options": {"initial_value": ""}, - 193 }, - 194 # "extra": {"param_section": "filter"}, - 195 }, - 196 "output_query": { - 197 "metavar": "output", - 198 "help": """Output Query file.\n""" - 199 """Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n""", - 200 "default": None, - 201 "type": PathType(exists=None, type=None), - 202 "gooey": { - 203 "widget": "FileSaver", - 204 "options": { - 205 "wildcard": "All files (*)|*", - 206 }, - 207 }, - 208 }, - 209 # Annotations - 210 "annotations": { - 211 "metavar": "annotations", - 212 "help": """Annotation with databases files, or with tools,\n""" - 213 """as a list of files in Parquet, VCF, BED, or keywords\n""" - 214 """ (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\n""" - 215 """- For a Parquet/VCF/BED, use file paths\n""" - 216 """ (e.g. 'file1.parquet,file2.vcf.gz').\n""" - 217 """- For BCFTools annotation, use keyword 'bcftools' with file paths\n""" - 218 """ (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\n""" - 219 """- For Parquet annotation, use keyword 'parquet' with file paths\n""" - 220 """ (e.g. 'parquet:file.parquet').\n""" - 221 """- For Annovar annotation, use keyword 'annovar' with annovar code\n""" - 222 """ (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\n""" - 223 """- For snpeff annotation, use keyword 'snpeff' with options\n""" - 224 """ (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\n""" - 225 """- For snpSift annotation, use keyword 'snpsift' with file paths\n""" - 226 """ (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\n""" - 227 """- For Exomiser annotation, use keyword 'exomiser' with options as key=value\n""" - 228 """ (e.g. 'exomiser:preset=exome:transcript_source=refseq').\n""" - 229 """- For add all availalbe databases files, use 'ALL' keyword,\n""" - 230 """ with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\n""" - 231 """ (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\n""", - 232 "default": None, - 233 "type": str, - 234 "extra": { - 235 "format": "DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]", - 236 "examples": { - 237 "Parquet method annotation with 2 Parquet files": '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', - 238 "Parquet method annotation with multiple file formats": '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', - 239 "Parquet method annotation with available Parquet databases in current release (check databases in production)": '"annotations": "ALL:parquet:current"', - 240 "Parquet method annotation with available Parquet databases in latest release (check databases before production)": '"annotations": "ALL:parquet:latest"', - 241 "Annotation with BCFTools": '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', - 242 "Annotation with Annovar (refGene with hgvs and Cosmic)": '"annotations": "annovar:refGene:cosmic70"', - 243 "Annotation with snpEff (default options)": '"annotations": "snpeff"', - 244 "Annotation with snpEff (with options)": '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', - 245 "Annotation with snpSift": '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', - 246 "Annotation with Exomiser with options": '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', - 247 "Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)": '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"', - 248 }, - 249 }, - 250 }, - 251 # Annotations Parquet - 252 "annotation_parquet": { - 253 "metavar": "annotation parquet", - 254 "help": """Annotation with Parquet method, as a list of files in Parquet, VCF or BED\n""" - 255 """ (e.g. 'file1.parquet,file2.vcf.gz').\n""" - 256 """For add all availalbe databases files, use 'ALL' keyword,\n""" - 257 """ with filters on type and release\n""" - 258 """ (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\n""", - 259 "default": None, - 260 "type": str, - 261 "nargs": "+", - 262 "gooey": { - 263 "widget": "MultiFileChooser", - 264 "options": { - 265 "default_dir": DEFAULT_ANNOTATIONS_FOLDER, - 266 "message": "Database files", - 267 }, - 268 }, - 269 "extra": { - 270 "format": "DB[,DB]*", - 271 "examples": { - 272 "Parquet method annotation with 2 Parquet files": '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', - 273 "Parquet method annotation with multiple file formats": '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', - 274 "Parquet method annotation with available Parquet databases in current release (check databases in production)": '"annotation_parquet": "ALL:parquet:current"', - 275 "Parquet method annotation with available Parquet databases in latest release (check databases before production)": '"annotation_parquet": "ALL:parquet:latest"', - 276 }, - 277 }, - 278 }, - 279 # Annotations BCFTools - 280 "annotation_bcftools": { - 281 "metavar": "annotation BCFTools", - 282 "help": """Annotation with BCFTools, as a list of files VCF or BED\n""" - 283 """ (e.g. 'file.vcf.gz,file.bed.gz').\n""", - 284 "default": None, - 285 "type": str, - 286 "nargs": "+", - 287 "gooey": { - 288 "widget": "MultiFileChooser", - 289 "options": { - 290 "default_dir": DEFAULT_ANNOTATIONS_FOLDER, - 291 "message": "Database files", - 292 }, - 293 }, - 294 "extra": { - 295 "format": "DB[,DB]*", - 296 "examples": { - 297 "Annovation with BCFTools": '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', - 298 }, - 299 }, - 300 }, - 301 # Annotations snpeff - 302 "annotation_snpeff": { - 303 "metavar": "annotation snpEff", - 304 "help": """Annotation with snpEff, with options\n""" - 305 """ (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\n""", - 306 "default": None, - 307 "type": str, - 308 "extra": { - 309 "format": "options", - 310 "examples": { - 311 "Annotation with snpEff (default options)": '"annotation_snpeff": ""', - 312 "Annotation with snpEff (with options)": '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"', - 313 }, - 314 }, - 315 }, - 316 # Annotations snpSift - 317 "annotation_snpsift": { - 318 "metavar": "annotation snpSift", - 319 "help": """Annotation with snpSift, as a list of files VCF\n""" - 320 """ (e.g. 'file.vcf.gz,file.bed.gz').\n""", - 321 "default": None, - 322 "type": str, - 323 "nargs": "+", - 324 "gooey": { - 325 "widget": "MultiFileChooser", - 326 "options": { - 327 "default_dir": DEFAULT_ANNOTATIONS_FOLDER, - 328 "message": "Database files", - 329 }, - 330 }, - 331 "extra": { - 332 "format": "DB[,DB]*", - 333 "examples": { - 334 "Annovation with snpSift": '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', - 335 }, - 336 }, - 337 }, - 338 # Annotations Annovar - 339 "annotation_annovar": { - 340 "metavar": "annotation Annovar", - 341 "help": """Annotation with Annovar, as a list of database keywords\n""" - 342 """ (e.g. 'refGene', 'refGene:cosmic70').\n""", - 343 "default": None, - 344 "type": str, - 345 "extra": { - 346 "format": "keyword[:keyword]*", - 347 "examples": { - 348 "Annotation with Annovar (refGene with hgvs and Cosmic)": '"annotation_annovar": "refGene:cosmic70"', - 349 }, - 350 }, - 351 }, - 352 # Annotations Exomiser - 353 "annotation_exomiser": { - 354 "metavar": "annotation Exomiser", - 355 "help": """Annotation with Exomiser, as a list of options\n""" - 356 """ (e.g. 'preset=exome:transcript_source=refseq').\n""", - 357 "default": None, - 358 "type": str, - 359 "extra": { - 360 "format": "option=value[:option=value]", - 361 "examples": { - 362 "Annotation with Exomiser with options": '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', - 363 }, - 364 }, - 365 }, - 366 # Annotations Splice - 367 "annotation_splice": { - 368 "metavar": "annotation Splice", - 369 "help": """Annotation with Splice, as a list of options\n""" - 370 """ (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\n""", - 371 "default": None, - 372 "type": str, - 373 "extra": { - 374 "format": "option=value[:option=value]", - 375 "examples": { - 376 "Annotation with Splice with options": '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"', - 377 }, - 378 }, - 379 }, - 380 # Update annotation - 381 "annotations_update": { - 382 "help": """Update option for annotation (Only for Parquet annotation).\n""" - 383 """If True, annotation fields will be removed and re-annotated.\n""" - 384 """These options will be applied to all annotation databases.\n""", - 385 "action": "store_true", - 386 "default": False, - 387 "gooey": { - 388 "widget": "BlockCheckbox", - 389 "options": {"checkbox_label": "Update annotation method"}, - 390 }, - 391 "extra": {"param_section": "annotation:options"}, - 392 }, - 393 # Append annotation - 394 "annotations_append": { - 395 "help": """Append option for annotation (Only for Parquet annotation).\n""" - 396 """If True, annotation fields will be annotated only if not annotation exists for the variant.\n""" - 397 """These options will be applied to all annotation databases.\n""", - 398 "action": "store_true", - 399 "default": False, - 400 "gooey": { - 401 "widget": "BlockCheckbox", - 402 "options": {"checkbox_label": "Append annotation method"}, - 403 }, - 404 "extra": {"param_section": "annotation:options"}, - 405 }, - 406 # Calculations - 407 "calculations": { - 408 "metavar": "operations", - 409 "help": """Quick calculations on genetic variants information and genotype information,\n""" - 410 """as a list of operations (e.g. 'VARTYPE,variant_id').\n""" - 411 """List of available calculations by default\n""" - 412 """ (unsensitive case, see doc for more information):\n""" - 413 """ VARTYPE """ - 414 """ snpeff_hgvs """ - 415 """ FINDBYPIPELINE """ - 416 """ GENOTYPECONCORDANCE """ - 417 """ BARCODE """ - 418 """ TRIO """ - 419 """ VAF """ - 420 """ VAF_STATS """ - 421 """ DP_STATS """ - 422 """\n""", - 423 "default": None, - 424 "type": str, - 425 }, - 426 # Prioritizations - 427 "prioritizations": { - 428 "metavar": "prioritisations", - 429 "help": """List of prioritization profiles to process (based on Prioritization JSON file),\n""" - 430 """such as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\n""" - 431 """By default, all profiles available will be processed.\n""", - 432 "default": None, - 433 "type": str, - 434 "extra": { - 435 # "param_section": "prioritization", - 436 "examples": { - 437 "Prioritization profile by default": """"prioritization": "default" """, - 438 "Prioritization profile by default and GERMLINE from Configuration JSON file": """"prioritization": "default,GERMLINE" """, - 439 } - 440 }, - 441 }, - 442 # Prioritization config - 443 "prioritization_config": { - 444 "metavar": "prioritization config", - 445 "help": """Prioritization configuration JSON file (defines profiles, see doc).\n""", - 446 "default": None, - 447 "type": PathType(exists=True, type="file"), - 448 "gooey": { - 449 "widget": "FileChooser", - 450 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, - 451 }, - 452 "extra": { - 453 "param_section": "prioritization", - 454 "examples": { - 455 "Prioritization configuration JSON file as an option": """"prioritization_config": "prioritization_config.json" """ - 456 }, - 457 }, - 458 }, - 459 "profiles": { - 460 "metavar": "profiles", - 461 "help": """List of prioritization profiles to process (based on Prioritization JSON file),\n""" - 462 """such as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\n""" - 463 """By default, all profiles available will be processed.\n""", - 464 "default": None, - 465 "type": str, - 466 }, - 467 "default_profile": { - 468 "metavar": "default profile", - 469 "help": """Prioritization profile by default (see doc).\n""" - 470 """Default is the first profile in the list of prioritization profiles.\n""", - 471 "default": None, - 472 "type": str, - 473 }, - 474 "pzfields": { - 475 "metavar": "pzfields", - 476 "help": """Prioritization fields to provide (see doc).\n""" - 477 """Available: PZScore, PZFlag, PZTags, PZComment, PZInfos\n""", - 478 "default": "PZScore,PZFlag", - 479 "type": str, - 480 }, - 481 "prioritization_score_mode": { - 482 "metavar": "prioritization score mode", - 483 "help": """Prioritization Score mode (see doc).\n""" - 484 """Available: HOWARD (increment score), VaRank (max score)\n""", - 485 "default": "HOWARD", - 486 "type": str, - 487 "choices": ["HOWARD", "VaRank"], - 488 "gooey": {"widget": "Dropdown", "options": {}}, - 489 }, - 490 # Query print options - 491 "query_limit": { - 492 "metavar": "query limit", - 493 "help": """Limit of number of row for query (only for print result, not output).\n""", - 494 "default": 10, - 495 "type": int, - 496 "gooey": { - 497 "widget": "IntegerField", - 498 "options": {"min": 1, "max": 10000, "increment": 10}, - 499 }, - 500 }, - 501 "query_print_mode": { - 502 "metavar": "print mode", - 503 "help": """Print mode of query result (only for print result, not output).\n""" - 504 """Either None (native), 'markdown', 'tabulate' or disabled.\n""", - 505 "choices": [None, "markdown", "tabulate", "disabled"], - 506 "default": None, - 507 "type": str, - 508 "gooey": {"widget": "Dropdown", "options": {}}, - 509 }, - 510 # Explode infos - 511 "explode_infos": { - 512 "help": """Explode VCF INFO/Tag into 'variants' table columns.\n""", - 513 "action": "store_true", - 514 "default": False, - 515 }, - 516 "explode_infos_prefix": { - 517 "metavar": "explode infos prefix", - 518 "help": """Explode VCF INFO/Tag with a specific prefix.\n""", - 519 "default": "", - 520 "type": str, - 521 }, - 522 "explode_infos_fields": { - 523 "metavar": "explode infos list", - 524 "help": """Explode VCF INFO/Tag specific fields/tags.\n""" - 525 """Keyword `*` specify all available fields, except those already specified.\n""" - 526 """Pattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\n""" - 527 """Examples:\n""" - 528 """- 'HGVS,SIFT,Clinvar' (list of fields)\n""" - 529 """- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\n""" - 530 """- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\n""" - 531 """- 'HGVS,.*_score,*' (1 field, scores, all other fields)\n""" - 532 """- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\n""", - 533 "default": "*", - 534 "type": str, - 535 }, - 536 # Include header - 537 "include_header": { - 538 "help": """Include header (in VCF format) in output file.\n""" - 539 """Only for compatible formats (tab-delimiter format as TSV or BED).\n""", - 540 "action": "store_true", - 541 "default": False, - 542 }, - 543 # Sort By - 544 "order_by": { - 545 "metavar": "order by", - 546 "help": """List of columns to sort the result-set in ascending or descending order.\n""" - 547 """Use SQL format, and keywords ASC (ascending) and DESC (descending).\n""" - 548 """If a column is not available, order will not be considered.\n""" - 549 """Order is enable only for compatible format (e.g. TSV, CSV, JSON).\n""" - 550 """Examples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\n""", - 551 "default": "", - 552 "type": str, - 553 "extra": { - 554 "examples": { - 555 "Order by ACMG score in descending order": """"order_by": "ACMG_score DESC" """, - 556 "Order by PZFlag and PZScore in descending order": """"order_by": "PZFlag DESC, PZScore DESC" """, - 557 } - 558 }, - 559 }, - 560 # Parquet partition - 561 "parquet_partitions": { - 562 "metavar": "parquet partitions", - 563 "help": """Parquet partitioning using hive (available for any format).\n""" - 564 """This option is faster parallel writing, but memory consuming.\n""" - 565 """Use 'None' (string) for NO partition but split parquet files into a folder.\n""" - 566 """Examples: '#CHROM', '#CHROM,REF', 'None'.\n""", - 567 "default": None, - 568 "type": str, - 569 }, - 570 # From annovar - 571 "input_annovar": { - 572 "metavar": "input annovar", - 573 "help": """Input Annovar file path.\n""" - 574 """Format file must be a Annovar TXT file, associated with '.idx'.\n""", - 575 "required": False, - 576 "default": None, - 577 "type": PathType(exists=True, type=None), - 578 "gooey": { - 579 "widget": "FileChooser", - 580 "options": { - 581 "wildcard": "Parquet file (*.parquet)|*.parquet|" "All files (*)|*" - 582 }, - 583 }, - 584 }, - 585 "output_annovar": { - 586 "metavar": "output annovar", - 587 "help": """Output Annovar file path.\n""" - 588 """Format file must be either VCF compressesd file '.vcf.gz'.\n""", - 589 "required": False, - 590 "default": None, - 591 "type": PathType(exists=None, type=None), - 592 "gooey": {"widget": "FileSaver"}, - 593 }, - 594 # From Annovar - 595 "annovar_code": { - 596 "metavar": "Annovar code", - 597 "help": """Annovar code, or database name.\n""" - 598 """Usefull to name databases columns.\n""", - 599 "required": False, - 600 "default": None, - 601 "type": str, - 602 }, - 603 "annovar_to_parquet": { - 604 "metavar": "to parquet", - 605 "help": """Parquet file conversion.\n""", - 606 "required": False, - 607 "default": None, - 608 "type": PathType(exists=None, type=None), - 609 "gooey": { - 610 "widget": "FileSaver", - 611 "options": { - 612 "wildcard": "HTML file (*.parquet)|*.parquet", - 613 }, - 614 }, - 615 }, - 616 # "multi_variant": { - 617 # "metavar": "multi variant", - 618 # "help": """Variant with multiple annotation lines.\n""" - 619 # """Either 'auto' (auto-detection), 'enable' or 'disable'.\n""", - 620 # "default": "auto", - 621 # "type": str, - 622 # "choices": ["auto", "enable", "disable"], - 623 # "gooey": { - 624 # "widget": "Dropdown", - 625 # "options": {} - 626 # } - 627 # }, - 628 # "reduce_memory": { - 629 # "metavar": "reduce memory", - 630 # "help": """Reduce memory option,\n""" - 631 # """either 'auto' (auto-detection), 'enable' or 'disable'.\n""", - 632 # "default": "auto", - 633 # "type": str, - 634 # "choices": ["auto", "enable", "disable"], - 635 # "gooey": { - 636 # "widget": "Dropdown", - 637 # "options": {} - 638 # } - 639 # }, - 640 "annovar_multi_variant": { - 641 "metavar": "Annovar multi variant", - 642 "help": """Variant with multiple annotation lines on Annovar file.\n""" - 643 """Either 'auto' (auto-detection), 'enable' or 'disable'.\n""", - 644 "default": "auto", - 645 "type": str, - 646 "choices": ["auto", "enable", "disable"], - 647 "gooey": {"widget": "Dropdown", "options": {}}, - 648 }, - 649 "annovar_reduce_memory": { - 650 "metavar": "reduce memory", - 651 "help": """Reduce memory option for Annovar convert,\n""" - 652 """either 'auto' (auto-detection), 'enable' or 'disable'.\n""", - 653 "default": "auto", - 654 "type": str, - 655 "choices": ["auto", "enable", "disable"], - 656 "gooey": {"widget": "Dropdown", "options": {}}, - 657 }, - 658 # From Extann - 659 "input_extann": { - 660 "metavar": "input extann", - 661 "help": """Input Extann file path.\n""" - 662 """Format file must be a Extann TXT file or TSV file.\n""" - 663 """File need to have at least the genes column.\n""", - 664 "required": False, - 665 "default": None, - 666 "type": PathType(exists=True, type=None), - 667 "gooey": { - 668 "widget": "FileChooser", - 669 "options": { - 670 "wildcard": "VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|" - 671 "All files (*)|*" - 672 }, - 673 }, - 674 }, - 675 "output_extann": { - 676 "metavar": "output extann", - 677 "help": """Output Extann file path.\n""" - 678 """Output extann file, should be BED or BED.gz.\n""", - 679 "required": False, - 680 "default": None, - 681 "type": PathType(exists=None, type=None), - 682 "gooey": {"widget": "FileSaver"}, - 683 }, - 684 "mode_extann": { - 685 "metavar": "mode extann", - 686 "help": """Mode extann selection.\n""" - 687 """How to pick transcript from ncbi, keep all,\n""" - 688 """keep the longest, or keep the chosen one (transcript_extann).\n""", - 689 "required": False, - 690 "default": "longest", - 691 "choices": ["all", "longest", "chosen"], - 692 "type": str, - 693 }, - 694 "param_extann": { - 695 "metavar": "param extann", - 696 "help": """Param extann file path.\n""" - 697 """Param containing configuration, options to replace chars and\n""" - 698 """bedlike header description, conf vcf specs.\n""" - 699 """(e.g. '~/howard/config/param.extann.json')\n""", - 700 "required": False, - 701 "default": None, - 702 "type": PathType(exists=True, type=None), - 703 "gooey": { - 704 "widget": "FileChooser", - 705 "options": {"wildcard": "TSV file format|*.tsv|"}, - 706 }, - 707 }, - 708 # Calculation - 709 "calculation_config": { - 710 "metavar": "calculation config", - 711 "help": """Calculation configuration JSON file.\n""", - 712 "default": None, - 713 "type": PathType(exists=True, type="file"), - 714 "gooey": { - 715 "widget": "FileChooser", - 716 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, - 717 }, - 718 "extra": { - 719 "param_section": "calculation", - 720 "examples": { - 721 "Calculation configuration JSON file as an option": """"calculation_config": "calculation_config.json" """ - 722 }, - 723 }, - 724 }, - 725 "show_calculations": { - 726 "help": """Show available calculation operations.\n""", - 727 "action": "store_true", - 728 "default": False, - 729 }, - 730 "hgvs_field": { - 731 "metavar": "HGVS field", - 732 "help": """HGVS INFO/tag containing a list o HGVS annotations.\n""", - 733 "default": "hgvs", - 734 "type": str, - 735 "extra": {"param_section": "calculation:calculations:NOMEN:options"}, - 736 }, - 737 "transcripts": { - 738 "metavar": "transcripts", - 739 "help": """Transcripts TSV file,\n""" - 740 """with Transcript in first column, optional Gene in second column.\n""", - 741 "default": None, - 742 "type": PathType(exists=True, type="file"), - 743 "gooey": { - 744 "widget": "FileChooser", - 745 "options": {"wildcard": "TSV file (*.tsv)|*.tsv|" "All files (*)|*"}, - 746 }, - 747 "extra": {"param_section": "calculation:calculations:NOMEN:options"}, - 748 }, - 749 "trio_pedigree": { - 750 "metavar": "trio pedigree", - 751 "help": """Pedigree Trio for trio inheritance calculation.\n""" - 752 """Either a JSON file or JSON string or a list of samples\n""" - 753 """(e.g. 'sample1,sample2,sample3' for father, mother and child,\n""" - 754 """ '{"father": "sample1", "mother": "sample2", "child": "sample3"}').\n""", - 755 "default": None, - 756 "gooey": { - 757 "widget": "FileChooser", - 758 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, - 759 }, - 760 "extra": {"param_section": "calculation:calculations:TRIO"}, - 761 }, - 762 "family_pedigree": { - 763 "metavar": "family pedigree", - 764 "help": """Pedigree family for barcode calculation on genotype.\n""" - 765 """Either a JSON file or JSON string or a list of samples\n""" - 766 """(e.g. 'sample1,sample2,sample3,sample4',\n""" - 767 """ '{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}').\n""", - 768 "default": None, - 769 "gooey": { - 770 "widget": "FileChooser", - 771 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, - 772 }, - 773 "extra": {"param_section": "calculation:calculations:BARCODEFAMILY"}, - 774 }, - 775 # Stats - 776 "stats_md": { - 777 "metavar": "stats markdown", - 778 "help": """Stats Output file in MarkDown format.\n""", - 779 "required": False, - 780 "default": None, - 781 "type": PathType(exists=None, type="file"), - 782 "gooey": { - 783 "widget": "FileSaver", - 784 "options": {"wildcard": "Markdown file (*.md)|*.md"}, - 785 }, - 786 "extra": { - 787 "examples": { - 788 "Export statistics in Markdown format": """"stats_md": "/tmp/stats.md" """ - 789 } - 790 }, - 791 }, - 792 "stats_json": { - 793 "metavar": "stats json", - 794 "help": """Stats Output file in JSON format.\n""", - 795 "required": False, - 796 "default": None, - 797 "type": PathType(exists=None, type="file"), - 798 "gooey": { - 799 "widget": "FileSaver", - 800 "options": {"wildcard": "JSON file (*.json)|*.json"}, - 801 }, - 802 "extra": { - 803 "examples": { - 804 "Export statistics in JSON format": """"stats_json": "/tmp/stats.json" """ - 805 } - 806 }, - 807 }, - 808 # Assembly and Genome - 809 "assembly": { - 810 "metavar": "assembly", - 811 "help": """Genome Assembly (e.g. 'hg19', 'hg38').\n""", - 812 "required": False, - 813 "default": DEFAULT_ASSEMBLY, - 814 "type": str, - 815 "extra": { - 816 "examples": { - 817 "Default assembly for all analysis tools": """"assembly": "hg19" """, - 818 "List of assemblies for databases download tool": """"assembly": "hg19,hg38" """, - 819 } - 820 }, - 821 }, - 822 "genome": { - 823 "metavar": "genome", - 824 "help": """Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\n""", - 825 "required": False, - 826 "default": f"{DEFAULT_GENOME_FOLDER}/{DEFAULT_ASSEMBLY}/{DEFAULT_ASSEMBLY}.fa", - 827 "type": PathType(exists=None, type="file"), - 828 "gooey": {"widget": "FileChooser", "options": {"wildcard": "All files (*)|*"}}, - 829 }, - 830 # HGVS - 831 "hgvs_options": { - 832 "metavar": "HGVS options", - 833 "help": """Quick HGVS annotation options.\n""" - 834 """This option will skip all other hgvs options.\n""" - 835 """Examples:\n""" - 836 """- 'default' (for default options)\n""" - 837 """- 'full_format' (for full format HGVS annotation)\n""" - 838 """- 'use_gene=True:add_protein=true:codon_type=FULL'\n""", - 839 "required": False, - 840 "default": None, - 841 "type": str, - 842 }, - 843 "use_gene": { - 844 "help": """Use Gene information to generate HGVS annotation\n""" - 845 """(e.g. 'NM_152232(TAS1R2):c.231T>C')""", - 846 "action": "store_true", - 847 "default": False, - 848 }, - 849 "use_exon": { - 850 "help": """Use Exon information to generate HGVS annotation\n""" - 851 """(e.g. 'NM_152232(exon2):c.231T>C').\n""" - 852 """Only if 'use_gene' is not enabled.\n""", - 853 "action": "store_true", - 854 "default": False, - 855 }, - 856 "use_protein": { - 857 "help": """Use Protein level to generate HGVS annotation\n""" - 858 """(e.g. 'NP_689418:p.Cys77Arg').\n""" - 859 """Can be used with 'use_exon' or 'use_gene'.\n""", - 860 "action": "store_true", - 861 "default": False, - 862 }, - 863 "add_protein": { - 864 "help": """Add Protein level to DNA HGVS annotation """ - 865 """(e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\n""", - 866 "action": "store_true", - 867 "default": False, - 868 }, - 869 "full_format": { - 870 "help": """Generates HGVS annotation in a full format\n""" - 871 """by using all information to generates an exhaustive annotation\n""" - 872 """(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\n""" - 873 """Use 'use_exon' to add exon information\n""" - 874 """(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\n""", - 875 "action": "store_true", - 876 "default": False, - 877 }, - 878 "use_version": { - 879 "help": """Generates HGVS annotation with transcript version\n""" - 880 """(e.g. 'NM_152232.1:c.231T>C').\n""", - 881 "action": "store_true", - 882 "default": False, - 883 }, - 884 "codon_type": { - 885 "metavar": "Codon type", - 886 "help": """Amino Acide Codon format type to use to generate HGVS annotation.\n""" - 887 """Available:\n""" - 888 """- '1': codon in 1 character (e.g. 'C', 'R')\n""" - 889 """- '3': codon in 3 character (e.g. 'Cys', 'Arg')\n""" - 890 """-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\n""", - 891 "required": False, - 892 "default": "3", - 893 "type": str, - 894 "choices": ["1", "3", "FULL"], - 895 "gooey": {"widget": "Dropdown", "options": {}}, - 896 }, - 897 "refgene": { - 898 "metavar": "refGene", - 899 "help": """Path to refGene annotation file.\n""", - 900 "required": False, - 901 "default": None, - 902 "type": PathType(exists=True, type="file"), - 903 "gooey": { - 904 "widget": "FileChooser", - 905 "options": { - 906 "wildcard": "All files (*)|*", - 907 "default_dir": DEFAULT_REFSEQ_FOLDER, - 908 "default_file": "ncbiRefSeq.txt", - 909 "message": "Path to refGene annotation file", - 910 }, - 911 }, - 912 }, - 913 "refseqlink": { - 914 "metavar": "refSeqLink", - 915 "help": """Path to refSeqLink annotation file.\n""", - 916 "required": False, - 917 "default": None, - 918 "type": PathType(exists=True, type="file"), - 919 "gooey": { - 920 "widget": "FileChooser", - 921 "options": { - 922 "wildcard": "All files (*)|*", - 923 "default_dir": DEFAULT_REFSEQ_FOLDER, - 924 "default_file": "ncbiRefSeq.txt", - 925 "message": "Path to refGeneLink annotation file", - 926 }, - 927 }, - 928 }, - 929 "refseq-folder": { - 930 "metavar": "refseq folder", - 931 "help": """Folder containing refSeq files.\n""", - 932 "required": False, - 933 "default": DEFAULT_REFSEQ_FOLDER, - 934 "type": PathType(exists=True, type="dir"), - 935 "gooey": { - 936 "widget": "DirChooser", - 937 "options": { - 938 "default_dir": DEFAULT_REFSEQ_FOLDER, - 939 "message": "Path to refGenefolder", - 940 }, - 941 }, - 942 }, - 943 # Databases - 944 # Genome - 945 "download-genomes": { - 946 "metavar": "genomes", - 947 "help": """Path to genomes folder\n""" - 948 """with Fasta files, indexes,\n""" - 949 """and all files generated by pygenome module.\n""" - 950 f"""(e.g. '{DEFAULT_GENOME_FOLDER}').\n""", - 951 "required": False, - 952 "default": None, - 953 "type": PathType(exists=None, type="dir"), - 954 "gooey": { - 955 "widget": "DirChooser", - 956 "options": { - 957 "default_dir": DEFAULT_DATABASE_FOLDER, - 958 "message": "Path to genomes folder", - 959 }, - 960 }, - 961 }, - 962 "download-genomes-provider": { - 963 "metavar": "genomes provider", - 964 "help": """Download Genome from an external provider.\n""" - 965 """Available: GENCODE, Ensembl, UCSC, NCBI.\n""", - 966 "required": False, - 967 "default": "UCSC", - 968 "type": str, - 969 "choices": ["GENCODE", "Ensembl", "UCSC", "NCBI"], - 970 "gooey": {"widget": "Dropdown", "options": {}}, - 971 }, - 972 "download-genomes-contig-regex": { - 973 "metavar": "genomes contig regex", - 974 "help": """Regular expression to select specific chromosome\n""" - 975 """(e.g 'chr[0-9XYM]+$').\n""", - 976 "required": False, - 977 "default": None, - 978 "type": str, - 979 }, - 980 # Annovar - 981 "download-annovar": { - 982 "metavar": "Annovar", - 983 "help": """Path to Annovar databases\n""" - 984 f"""(e.g. '{DEFAULT_ANNOVAR_FOLDER}').\n""", - 985 "required": False, - 986 "type": PathType(exists=None, type="dir"), - 987 "default": None, - 988 "gooey": { - 989 "widget": "DirChooser", - 990 "options": { - 991 "default_dir": DEFAULT_DATABASE_FOLDER, - 992 "message": "Path to Annovar databases folder", - 993 }, - 994 }, - 995 }, - 996 "download-annovar-files": { - 997 "metavar": "Annovar code", - 998 "help": """Download Annovar databases for a list of Annovar file code (see Annovar Doc).\n""" - 999 """Use None to donwload all available files,\n""" -1000 """or Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\n""" -1001 """Note that refGene will at least be downloaded,\n""" -1002 """and only files that not already exist or changed will be downloaded.\n""", -1003 "required": False, -1004 "default": None, -1005 "type": str, -1006 }, -1007 "download-annovar-url": { -1008 "metavar": "Annovar url", -1009 "help": """Annovar databases URL (see Annovar Doc).\n""", -1010 "required": False, -1011 "default": DEFAULT_ANNOVAR_URL, -1012 "type": str, -1013 }, -1014 # snpEff -1015 "download-snpeff": { -1016 "metavar": "snpEff", -1017 "help": """Download snpEff databases within snpEff folder""", -1018 "required": False, -1019 "default": None, -1020 "type": PathType(exists=None, type="dir"), -1021 "gooey": { -1022 "widget": "DirChooser", -1023 "options": { -1024 "default_dir": DEFAULT_DATABASE_FOLDER, -1025 "message": "Path to snpEff databases folder", -1026 }, -1027 }, -1028 }, -1029 # refSeq -1030 "download-refseq": { -1031 "metavar": "refSeq", -1032 "help": """Path to refSeq databases\n""" -1033 f"""(e.g. '{DEFAULT_REFSEQ_FOLDER}').\n""", -1034 "required": False, -1035 "default": None, -1036 "type": PathType(exists=None, type="dir"), -1037 "gooey": { -1038 "widget": "DirChooser", -1039 "options": { -1040 "default_dir": DEFAULT_DATABASE_FOLDER, -1041 "message": "Path to refGene files folder", -1042 }, -1043 }, -1044 }, -1045 "download-refseq-url": { -1046 "metavar": "refSeq url", -1047 "help": """refSeq databases URL (see refSeq WebSite)\n""" -1048 f"""(e.g. '{DEFAULT_REFSEQ_URL}')•/n""", -1049 "required": False, -1050 "default": DEFAULT_REFSEQ_URL, -1051 "type": str, -1052 }, -1053 "download-refseq-prefix": { -1054 "metavar": "refSeq prefix", -1055 "help": """Check existing refSeq files in refSeq folder.\n""", -1056 "required": False, -1057 "default": "ncbiRefSeq", -1058 "type": str, -1059 }, -1060 "download-refseq-files": { -1061 "metavar": "refSeq files", -1062 "help": """List of refSeq files to download.\n""", -1063 "required": False, -1064 "default": "ncbiRefSeq.txt,ncbiRefSeqLink.txt", -1065 "type": str, -1066 }, -1067 "download-refseq-format-file": { -1068 "metavar": "refSeq format file", -1069 "help": """Name of refSeq file to convert in BED format\n""" -1070 """(e.g. 'ncbiRefSeq.txt').\n""" -1071 """Process only if not None.\n""", -1072 "required": False, -1073 "default": None, -1074 "type": str, -1075 }, -1076 "download-refseq-include-utr5": { -1077 "help": """Formating BED refSeq file including 5'UTR.\n""", -1078 "action": "store_true", -1079 "default": False, -1080 }, -1081 "download-refseq-include-utr3": { -1082 "help": """Formating BED refSeq file including 3'UTR.\n""", -1083 "action": "store_true", -1084 "default": False, -1085 }, -1086 "download-refseq-include-chrM": { -1087 "help": """Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\n""", -1088 "action": "store_true", -1089 "default": False, -1090 }, -1091 "download-refseq-include-non-canonical-chr": { -1092 "help": """Formating BED refSeq file including non canonical chromosomes.\n""", -1093 "action": "store_true", -1094 "default": False, -1095 }, -1096 "download-refseq-include-non-coding-transcripts": { -1097 "help": """Formating BED refSeq file including non coding transcripts.\n""", -1098 "action": "store_true", -1099 "default": False, -1100 }, -1101 "download-refseq-include-transcript-version": { -1102 "help": """Formating BED refSeq file including transcript version.\n""", -1103 "action": "store_true", -1104 "default": False, -1105 }, -1106 # dbNSFP -1107 "download-dbnsfp": { -1108 "metavar": "dbNSFP", -1109 "help": """Download dbNSFP databases within dbNSFP folder""" -1110 f"""(e.g. '{DEFAULT_DATABASE_FOLDER}').\n""", -1111 "required": False, -1112 "default": None, -1113 "type": PathType(exists=None, type="dir"), -1114 "gooey": { -1115 "widget": "DirChooser", -1116 "options": { -1117 "default_dir": DEFAULT_DATABASE_FOLDER, -1118 "message": "Path to dbNSFP databases folder", -1119 }, -1120 }, -1121 }, -1122 "download-dbnsfp-url": { -1123 "metavar": "dbNSFP url", -1124 "help": """Download dbNSFP databases URL (see dbNSFP website)\n""" -1125 f"""(e.g. {DEFAULT_DBNSFP_URL}').\n""", -1126 "required": False, -1127 "default": DEFAULT_DBNSFP_URL, -1128 "type": str, -1129 }, -1130 "download-dbnsfp-release": { -1131 "metavar": "dnNSFP release", -1132 "help": """Release of dbNSFP to download (see dbNSFP website)\n""" -1133 """(e.g. '4.4a').\n""", -1134 "required": False, -1135 "default": "4.4a", -1136 }, -1137 "download-dbnsfp-parquet-size": { -1138 "metavar": "dbNSFP parquet size", -1139 "help": """Maximum size (Mb) of data files in Parquet folder.\n""" -1140 """Parquet folder are partitioned (hive) by chromosome (sub-folder),\n""" -1141 """which contain N data files.\n""", -1142 "required": False, -1143 "default": 100, -1144 "type": int, -1145 "gooey": { -1146 "widget": "IntegerField", -1147 "options": {"min": 1, "max": 100000, "increment": 10}, -1148 }, -1149 }, -1150 "download-dbnsfp-subdatabases": { -1151 "help": """Generate dbNSFP sub-databases.\n""" -1152 """dbNSFP provides multiple databases which are split onto multiple columns.\n""" -1153 """This option create a Parquet folder for each sub-database (based on columns names).\n""", -1154 "action": "store_true", -1155 "default": False, -1156 }, -1157 "download-dbnsfp-parquet": { -1158 "help": """Generate a Parquet file for each Parquet folder.\n""", -1159 "action": "store_true", -1160 "default": False, -1161 }, -1162 "download-dbnsfp-vcf": { -1163 "help": """Generate a VCF file for each Parquet folder.\n""" -1164 """Need genome FASTA file (see --download-genome).\n""", -1165 "action": "store_true", -1166 "default": False, -1167 }, -1168 "download-dbnsfp-no-files-all": { -1169 "help": """Not generate database Parquet/VCF file for the entire database ('ALL').\n""" -1170 """Only sub-databases files will be generated.\n""" -1171 """(see '--download-dbnsfp-subdatabases').\n""", -1172 "action": "store_true", -1173 "default": False, -1174 }, -1175 "download-dbnsfp-add-info": { -1176 "help": """Add INFO column (VCF format) in Parquet folder and file.\n""" -1177 """Useful for speed up full annotation (all available columns).\n""" -1178 """Increase memory and space during generation of files.\n""", -1179 "action": "store_true", -1180 "default": False, -1181 }, -1182 "download-dbnsfp-only-info": { -1183 "help": """Add only INFO column (VCF format) in Parquet folder and file.\n""" -1184 """Useful for speed up full annotation (all available columns).\n""" -1185 """Decrease memory and space during generation of files.\n""" -1186 """Increase time for partial annotation (some available columns).\n""", -1187 "action": "store_true", -1188 "default": False, -1189 }, -1190 "download-dbnsfp-uniquify": { -1191 "help": """Uniquify values within column\n""" -1192 """(e.g. "D,D" to "D", "D,.,T" to "D,T").\n""" -1193 """Remove transcripts information details.\n""" -1194 """Usefull to reduce size of the database.\n""" -1195 """Increase memory and space during generation of files.\n""", -1196 "action": "store_true", -1197 "default": False, -1198 }, -1199 "download-dbnsfp-row-group-size": { -1200 "metavar": "dnNSFP row grooup size", -1201 "help": """Minimum number of rows in a parquet row group (see duckDB doc).\n""" -1202 """Lower can reduce memory usage and slightly increase space during generation,\n""" -1203 """speed up highly selective queries, slow down whole file queries (e.g. aggregations).\n""", -1204 "required": False, -1205 "default": 100000, -1206 "type": int, -1207 "gooey": { -1208 "widget": "IntegerField", -1209 "options": {"min": 1, "max": 100000000000, "increment": 10000}, -1210 }, -1211 }, -1212 # AlphaMissense -1213 "download-alphamissense": { -1214 "metavar": "AlphaMissense", -1215 "help": "Path to AlphaMissense databases", -1216 "required": False, -1217 "default": None, -1218 "type": PathType(exists=None, type="dir"), -1219 "gooey": { -1220 "widget": "DirChooser", -1221 "options": { -1222 "default_dir": DEFAULT_DATABASE_FOLDER, -1223 "message": "Path to Alphamissense databases folder", -1224 }, -1225 }, -1226 }, -1227 "download-alphamissense-url": { -1228 "metavar": "AlphaMissense url", -1229 "help": """Download AlphaMissense databases URL (see AlphaMissense website)\n""" -1230 f"""(e.g. '{DEFAULT_ALPHAMISSENSE_URL}').\n""", -1231 "required": False, -1232 "default": DEFAULT_ALPHAMISSENSE_URL, -1233 "type": str, -1234 }, -1235 # Exomiser -1236 "download-exomiser": { -1237 "metavar": "Exomiser", -1238 "help": """Path to Exomiser databases\n""" -1239 f"""(e.g. {DEFAULT_EXOMISER_FOLDER}).\n""", -1240 "required": False, -1241 "default": None, -1242 "type": PathType(exists=None, type="dir"), -1243 "gooey": { -1244 "widget": "DirChooser", -1245 "options": { -1246 "default_dir": DEFAULT_DATABASE_FOLDER, -1247 "message": "Path to Exomiser databases folder", -1248 }, -1249 }, -1250 }, -1251 "download-exomiser-application-properties": { -1252 "metavar": "Exomiser application properties", -1253 "help": """Exomiser Application Properties configuration file (see Exomiser website).\n""" -1254 """This file contains configuration settings for the Exomiser tool.\n""" -1255 """If this parameter is not provided, the function will attempt to locate\n""" -1256 """the application properties file automatically based on the Exomiser.\n""" -1257 """Configuration information will be used to download expected releases (if no other parameters).\n""" -1258 """CADD and REMM will be downloaded only if 'path' are provided.\n""", -1259 "required": False, -1260 "default": None, -1261 "type": PathType(exists=True, type="file"), -1262 "gooey": { -1263 "widget": "FileChooser", -1264 "options": { -1265 "wildcard": "All files (*)|*", -1266 "options": { -1267 "default_dir": DEFAULT_EXOMISER_FOLDER, -1268 "message": "Path to Exomiser application properties file", -1269 }, -1270 }, -1271 }, -1272 }, -1273 "download-exomiser-url": { -1274 "metavar": "Exomiser url", -1275 "help": """URL where Exomiser database files can be downloaded from\n""" -1276 f"""(e.g. '{DEFAULT_EXOMISER_URL}').\n""", -1277 "required": False, -1278 "default": DEFAULT_EXOMISER_URL, -1279 "type": str, -1280 }, -1281 "download-exomiser-release": { -1282 "metavar": "Exomiser release", -1283 "help": """Release of Exomiser data to download.\n""" -1284 """If "default", "auto", or "config", retrieve from Application Properties file.\n""" -1285 """If not provided (None), from Application Properties file (Exomiser data-version) \n""" -1286 """or default '2109'.\n""", -1287 "required": False, -1288 "default": None, -1289 "type": str, -1290 }, -1291 "download-exomiser-phenotype-release": { -1292 "metavar": "Exomiser phenoptye release", -1293 "help": """Release of Exomiser phenotype to download.\n""" -1294 """If not provided (None), from Application Properties file (Exomiser Phenotype data-version)\n""" -1295 """or Exomiser release.\n""", -1296 "required": False, -1297 "default": None, -1298 "type": str, -1299 }, -1300 "download-exomiser-remm-release": { -1301 "metavar": "Exomiser remm release", -1302 "help": """Release of ReMM (Regulatory Mendelian Mutation) database to download.\n""" -1303 """If "default", "auto", or "config", retrieve from Application Properties file.\n""", -1304 "required": False, -1305 "default": None, -1306 "type": str, -1307 }, -1308 "download-exomiser-remm-url": { -1309 "metavar": "Exomiser remm url", -1310 "help": """URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\n""" -1311 f"""(e.g. '{DEFAULT_EXOMISER_REMM_URL}').\n""", -1312 "required": False, -1313 "default": DEFAULT_EXOMISER_REMM_URL, -1314 "type": str, -1315 }, -1316 "download-exomiser-cadd-release": { -1317 "metavar": "Exomiser cadd release", -1318 "help": """Release of CADD (Combined Annotation Dependent Depletion) database to download.\n""" -1319 """If "default", "auto", or "config", retrieve from Application Properties file.\n""", -1320 "required": False, -1321 "default": None, -1322 "type": str, -1323 }, -1324 "download-exomiser-cadd-url": { -1325 "metavar": "Exomiser cadd url", -1326 "help": """URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\n""" -1327 f"""(e.g. '{DEFAULT_EXOMISER_CADD_URL}').\n""", -1328 "required": False, -1329 "default": DEFAULT_EXOMISER_CADD_URL, -1330 "type": str, -1331 }, -1332 "download-exomiser-cadd-url-snv-file": { -1333 "metavar": "Exomiser url snv file", -1334 "help": """Name of the file containing the SNV (Single Nucleotide Variant) data\n""" -1335 """for the CADD (Combined Annotation Dependent Depletion) database.\n""", -1336 "required": False, -1337 "default": "whole_genome_SNVs.tsv.gz", -1338 "type": str, -1339 }, -1340 "download-exomiser-cadd-url-indel-file": { -1341 "metavar": "Exomiser cadd url indel", -1342 "help": """Name of the file containing the INDEL (Insertion-Deletion) data\n""" -1343 """for the CADD (Combined Annotation Dependent Depletion) database.\n""", -1344 "required": False, -1345 "default": "InDels.tsv.gz", -1346 "type": str, -1347 }, -1348 # dbSNP -1349 "download-dbsnp": { -1350 "metavar": "dnSNP", -1351 "help": """Path to dbSNP databases\n""" -1352 f"""(e.g. '{DEFAULT_DBSNP_FOLDER}').\n""", -1353 "required": False, -1354 "default": None, -1355 "type": PathType(exists=None, type="dir"), -1356 "gooey": { -1357 "widget": "DirChooser", -1358 "options": { -1359 "default_dir": DEFAULT_DATABASE_FOLDER, -1360 "message": "Path to dbSNP databases folder", -1361 }, -1362 }, -1363 }, -1364 "download-dbsnp-releases": { -1365 "metavar": "dnSNP releases", -1366 "help": """Release of dbSNP to download\n""" -1367 """(e.g. 'b152', 'b152,b156').\n""", -1368 "required": False, -1369 "default": "b156", -1370 "type": str, -1371 }, -1372 "download-dbsnp-release-default": { -1373 "metavar": "dnSNP release default", -1374 "help": """Default Release of dbSNP ('default' symlink)\n""" -1375 """(e.g. 'b156').\n""" -1376 """If None, first release to download will be assigned as default\n""" -1377 """only if it does not exists.\n""", -1378 "required": False, -1379 "default": None, -1380 "type": str, -1381 }, -1382 "download-dbsnp-url": { -1383 "metavar": "dbSNP url", -1384 "help": """URL where dbSNP database files can be downloaded from.\n""" -1385 f"""(e.g. '{DEFAULT_DBSNP_URL}').\n""", -1386 "required": False, -1387 "default": DEFAULT_DBSNP_URL, -1388 "type": str, -1389 }, -1390 "download-dbsnp-url-files": { -1391 "metavar": "dbSNP url files", -1392 "help": """Dictionary that maps assembly names to specific dbSNP URL files.\n""" -1393 """It allows you to provide custom dbSNP URL files for specific assemblies\n""" -1394 """instead of using the default file naming convention.\n""", -1395 "required": False, -1396 "default": None, -1397 "type": str, -1398 }, -1399 "download-dbsnp-url-files-prefix": { -1400 "metavar": "dbSNP url files prefix", -1401 "help": """String that represents the prefix of the dbSNP file name for a specific assembly.\n""" -1402 """It is used to construct the full URL of the dbSNP file to be downloaded.\n""", -1403 "required": False, -1404 "default": "GCF_000001405", -1405 "type": str, -1406 }, -1407 "download-dbsnp-assemblies-map": { -1408 "metavar": "dbSNP assemblies map", -1409 "help": """dictionary that maps assembly names to their corresponding dbSNP versions.\n""" -1410 """It is used to construct the dbSNP file name based on the assembly name.\n""", -1411 "required": False, -1412 "default": {"hg19": "25", "hg38": "40"}, -1413 "type": str, -1414 "gooey": {"options": {"initial_value": '{"hg19": "25", "hg38": "40"}'}}, -1415 }, -1416 "download-dbsnp-vcf": { -1417 "help": """Generate well-formatted VCF from downloaded file:\n""" -1418 """- Add and filter contigs associated to assembly\n""" -1419 """- Normalize by splitting multiallelics\n""" -1420 """- Need genome (see --download-genome)\n""", -1421 "action": "store_true", -1422 "default": False, -1423 }, -1424 "download-dbsnp-parquet": { -1425 "help": """Generate Parquet file from VCF.\n""", -1426 "action": "store_true", -1427 "default": False, -1428 }, -1429 # HGMD -1430 "convert-hgmd": { -1431 "metavar": "HGMD", -1432 "help": """Convert HGMD databases.\n""" -1433 """Folder where the HGMD databases will be stored.\n""" -1434 """Fields in VCF, Parquet and TSV will be generated.\n""" -1435 """If the folder does not exist, it will be created.\n""", -1436 "required": False, -1437 "default": None, -1438 "type": PathType(exists=None, type="dir"), -1439 "gooey": {"widget": "DirChooser"}, -1440 }, -1441 "convert-hgmd-file": { -1442 "metavar": "HGMD file", -1443 "help": """File from HGMD.\n""" -1444 """Name format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\n""", -1445 "required": False, -1446 "default": None, -1447 "type": PathType(exists=True, type="file"), -1448 "gooey": {"widget": "FileChooser"}, -1449 }, -1450 "convert-hgmd-basename": { -1451 "metavar": "HGMD basename", -1452 "help": """File output basename.\n""" -1453 """Generated files will be prefixed by basename\n""" -1454 """(e.g. 'HGMD_Pro_MY_RELEASE')\n""" -1455 """By default (None), input file name without '.vcf.gz'.\n""", -1456 "required": False, -1457 "default": None, -1458 "type": str, -1459 }, -1460 # Databases parameters -1461 "generate-param": { -1462 "metavar": "param", -1463 "help": """Parameter file (JSON) with all databases found.\n""" -1464 """Databases folders scanned are defined in config file.\n""" -1465 """Structure of databases follow this structure (see doc):\n""" -1466 """.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\n""", -1467 "required": False, -1468 "default": None, -1469 "type": PathType(exists=None, type=None), -1470 "gooey": { -1471 "widget": "FileSaver", -1472 "options": {"wildcard": "JSON file (*.json)|*.json"}, -1473 }, -1474 }, -1475 "generate-param-description": { -1476 "metavar": "param description", -1477 "help": """Description file (JSON) with all databases found.\n""" -1478 """Contains all databases with description of format, assembly, fields...\n""", -1479 "required": False, -1480 "default": None, -1481 "type": PathType(exists=None, type=None), -1482 "gooey": { -1483 "widget": "FileSaver", -1484 "options": {"wildcard": "JSON file (*.json)|*.json"}, -1485 }, -1486 }, -1487 "generate-param-releases": { -1488 "metavar": "param release", -1489 "help": """List of database folder releases to check\n""" -1490 """(e.g. 'current', 'latest').\n""", -1491 "required": False, -1492 "default": "current", -1493 "type": str, -1494 }, -1495 "generate-param-formats": { -1496 "metavar": "param formats", -1497 "help": """List of database formats to check\n""" -1498 """(e.g. 'parquet', 'parquet,vcf,bed,tsv').\n""", -1499 "required": False, -1500 "default": "parquet", -1501 "type": str, -1502 }, -1503 "generate-param-bcftools": { -1504 "help": """Generate parameter JSON file with BCFTools annotation for allowed formats\n""" -1505 """(i.e. 'vcf', 'bed').\n""", -1506 "action": "store_true", -1507 "default": False, -1508 }, -1509 # Help -1510 "help_md": { -1511 "metavar": "help markdown", -1512 "help": """Help Output file in MarkDown format.\n""", -1513 "required": False, -1514 "default": None, -1515 "type": PathType(exists=None, type=None), -1516 "gooey": { -1517 "widget": "FileSaver", -1518 "options": { -1519 "wildcard": "HTML file (*.md)|*.md", -1520 }, -1521 }, -1522 }, -1523 "help_html": { -1524 "metavar": "help html", -1525 "help": """Help Output file in HTML format.\n""", -1526 "required": False, -1527 "default": None, -1528 "type": PathType(exists=None, type=None), -1529 "gooey": { -1530 "widget": "FileSaver", -1531 "options": { -1532 "wildcard": "HTML file (*.html)|*.html", -1533 }, -1534 }, -1535 }, -1536 "help_pdf": { -1537 "metavar": "help pdf", -1538 "help": """Help Output file in PDF format.\n""", -1539 "required": False, -1540 "default": None, -1541 "type": PathType(exists=None, type=None), -1542 "gooey": { -1543 "widget": "FileSaver", -1544 "options": { -1545 "wildcard": "PDF file (*.pdf)|*.pdf", -1546 }, -1547 }, -1548 }, -1549 "help_json_input": { -1550 "metavar": "help JSON input", -1551 "help": """Help input file in JSON format.\n""", -1552 "required": False, -1553 "default": None, -1554 "type": PathType(exists=True, type="file"), -1555 "gooey": { -1556 "widget": "FileChooser", -1557 "options": { -1558 "wildcard": "JSON file (*.json)|*.json|" "All files (*)|*", -1559 }, -1560 }, -1561 }, -1562 "help_md_input": { -1563 "metavar": "help MarkDown input", -1564 "help": """Help input file in MarkDown format.\n""", -1565 "required": False, -1566 "default": None, -1567 "type": PathType(exists=True, type="file"), -1568 "gooey": { -1569 "widget": "FileChooser", -1570 "options": { -1571 "wildcard": "MarkDown file (*.md)|*.md|" "All files (*)|*", -1572 }, -1573 }, -1574 }, -1575 "code_type": { -1576 "metavar": "example code type", -1577 "help": """Help example code type for input JSON format\n""" -1578 """(e.g. 'json', 'bash').\n""", -1579 "required": False, -1580 "default": "", -1581 "type": str, -1582 }, -1583 "help_json_input_title": { -1584 "metavar": "help JSON input title", -1585 "help": """Help JSON input title.\n""", -1586 "required": False, -1587 "default": "Help", -1588 "type": str, -1589 }, -1590 # Common -1591 "genomes-folder": { -1592 "metavar": "genomes", -1593 "help": """Folder containing genomes.\n""" -1594 f"""(e.g. '{DEFAULT_GENOME_FOLDER}'""", -1595 "required": False, -1596 "default": DEFAULT_GENOME_FOLDER, -1597 "type": PathType(exists=None, type="dir"), -1598 "gooey": { -1599 "widget": "DirChooser", -1600 "options": { -1601 "default_dir": DEFAULT_GENOME_FOLDER, -1602 "message": "Path to genomes databases folder", -1603 }, -1604 }, -1605 }, -1606 # Shared -1607 "config": { -1608 "metavar": "config", -1609 "help": """Configuration JSON file defined default configuration regarding \n""" -1610 """resources (e.g. threads, memory),\n""" -1611 """settings (e.g. verbosity, temporary files),\n""" -1612 """default folders (e.g. for databases)\n""" -1613 """and paths to external tools.\n""", -1614 "required": False, -1615 "default": "{}", -1616 "type": str, -1617 "gooey": {"widget": "FileChooser", "options": {"initial_value": "{}"}}, -1618 }, -1619 "threads": { -1620 "metavar": "threads", -1621 "help": """Specify the number of threads to use for processing HOWARD.\n""" -1622 """It determines the level of parallelism,\n""" -1623 """either on python scripts, duckdb engine and external tools.\n""" -1624 """It and can help speed up the process/tool.\n""" -1625 """Use -1 to use all available CPU/cores.\n""" -1626 """Either non valid value is 1 CPU/core.\n""", -1627 "required": False, -1628 "type": int, -1629 "default": -1, -1630 "gooey": { -1631 "widget": "IntegerField", -1632 "options": {"min": -1, "max": 1000, "increment": 1}, -1633 }, -1634 "extra": { -1635 "examples": { -1636 "# Automatically detect all available CPU/cores": '"threads": -1', -1637 "# Define 8 CPU/cores": '"threads": 8', -1638 } -1639 }, -1640 }, -1641 "memory": { -1642 "metavar": "memory", -1643 "help": """Specify the memory to use in format FLOAT[kMG]\n""" -1644 """(e.g. '8G', '12.42G', '1024M').\n""" -1645 """It determines the amount of memory for duckDB engine and external tools\n""" -1646 """(especially for JAR programs).\n""" -1647 """It can help to prevent 'out of memory' failures.\n""" -1648 """By default (None) is 80%% of RAM (for duckDB).\n""", -1649 "required": False, -1650 "type": str, -1651 "default": None, -1652 "extra": { -1653 "format": "FLOAT[kMG]", -1654 "examples": { -1655 "# Automatically detect all available CPU/cores": '"threads": -1', -1656 "# Define 8 CPU/cores": '"threads": 8', -1657 }, -1658 }, -1659 }, -1660 "chunk_size": { -1661 "metavar": "chunk size", -1662 "help": """Number of records in batch to export output file.\n""" -1663 """The lower the chunk size, the less memory consumption.\n""" -1664 """For Parquet partitioning, files size will depend on the chunk size.\n""", -1665 "required": False, -1666 "default": 1000000, -1667 "type": int, -1668 "gooey": { -1669 "widget": "IntegerField", -1670 "options": {"min": 1, "max": 100000000000, "increment": 10000}, -1671 }, -1672 "extra": { -1673 "examples": { -1674 "Chunk size of 1.000.000 by default": '"chunk_size": 1000000', -1675 "Smaller chunk size to reduce Parquet file size and memory usage": '"chunk_size": 100000', -1676 } -1677 }, -1678 }, -1679 "tmp": { -1680 "metavar": "Temporary folder", -1681 "help": """Temporary folder (e.g. '/tmp').\n""" -1682 """By default, '.tmp' for duckDB (see doc),""" -1683 """external tools and python scripts.\n""", -1684 "required": False, -1685 "default": None, -1686 "type": PathType(exists=True, type="dir"), -1687 "gooey": {"widget": "DirChooser"}, -1688 "extra": { -1689 "examples": { -1690 "# System temporary folder": '"tmp": "/tmp"', -1691 "# HOWARD work directory": '"tmp": "~/howard/tmp"', -1692 "# Current work directory": '"tmp": ".tmp"', -1693 } -1694 }, -1695 }, -1696 "duckdb_settings": { -1697 "metavar": "duckDB settings", -1698 "help": """DuckDB settings (see duckDB doc) as JSON (string or file).\n""" -1699 """These settings have priority (see options 'threads', 'tmp'...).\n""" -1700 """Examples: '{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}'.\n""", -1701 "required": False, -1702 "default": None, -1703 "type": PathType(exists=True, type="file"), -1704 "gooey": { -1705 "widget": "FileChooser", -1706 "options": { -1707 "wildcard": "JSON file (*.json)|*.json|" "All files (*)|*", -1708 }, -1709 }, -1710 "extra": { -1711 "examples": { -1712 "DuckDB settings JSON file": '"duckdb_settings": "/path/to/duckdb_config.json"', -1713 "JSON string for Time zone, temporary directory and threads for duckDB": """\"duckdb_settings\": {\n""" -1714 """ \"TimeZone\": \"GMT\",\n""" -1715 """ \"temp_directory\": \"/tmp/duckdb\",\n""" -1716 """ \"threads\": 8\n""" -1717 """}""", -1718 } -1719 }, -1720 }, -1721 "verbosity": { -1722 "metavar": "verbosity", -1723 "help": """Verbosity level\n""" -1724 """Available: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\n""" -1725 """- DEBUG: Detailed information, typically of interest only when diagnosing problems.\n""" -1726 """- INFO: Confirmation that things are working as expected.\n""" -1727 """- WARNING: An indication that something unexpected happened.\n""" -1728 """- ERROR: Due to a more serious problem.\n""" -1729 """- CRITICAL: A serious error.\n""" -1730 """- FATAL: A fatal error.\n""" -1731 """- NOTSET: All messages.\n""", -1732 "required": False, -1733 "choices": [ -1734 "CRITICAL", -1735 "ERROR", -1736 "WARNING", -1737 "INFO", -1738 "DEBUG", -1739 "NOTSET", -1740 "WARN", -1741 "FATAL", -1742 ], -1743 "default": "INFO", -1744 "type": str, -1745 "gooey": {"widget": "Dropdown", "options": {}}, -1746 "extra": { -1747 "examples": { -1748 "Default verbosity": '"verbosity": "INFO"', -1749 "ERROR level (quiet mode)": '"verbosity": "ERROR"', -1750 "For debug": '"verbosity": "DEBUG"', -1751 } -1752 }, -1753 }, -1754 "access": { -1755 "metavar": "access mode", -1756 "help": """Access mode to variants file or database.\n""" -1757 """Either 'RW' for Read and Write, or 'RO' for Read Only.\n""", -1758 "default": "RW", -1759 "type": str, -1760 "choices": ["RW", "RO"], -1761 "gooey": {"widget": "Dropdown", "options": {}}, -1762 "extra": { -1763 "examples": { -1764 "Read and Write mode": '"access": "RW"', -1765 "Read only mode": '"access": "RO"', -1766 } -1767 }, -1768 }, -1769 "log": { -1770 "metavar": "log", -1771 "help": """Logs file\n""" """(e.g. 'my.log').\n""", -1772 "required": False, -1773 "default": None, -1774 "type": PathType(exists=None, type="file"), -1775 "gooey": {"widget": "FileSaver"}, -1776 "extra": { -1777 "examples": { -1778 "Relative path to log file": '"log": "my.log"', -1779 "# HOWARD work directory": '"log": "~/howard/log"', -1780 "Full path to log file": '"log": "/tmp/my.log"', -1781 } -1782 }, -1783 }, -1784 # Interactivity -1785 "interactive": { -1786 "help": """Interative mose..\n""", -1787 "action": "store_true", -1788 "default": False, -1789 }, -1790 # Verbosity -1791 "quiet": {"help": argparse.SUPPRESS, "action": "store_true", "default": False}, -1792 "verbose": {"help": argparse.SUPPRESS, "action": "store_true", "default": False}, -1793 "debug": {"help": argparse.SUPPRESS, "action": "store_true", "default": False}, -1794 # Only for HELP -1795} -1796 + 120 + 121# Arguments dict + 122arguments = { + 123 # Process & other + 124 "input": { + 125 "metavar": "input", + 126 "help": """Input file path.\n""" + 127 """Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n""" + 128 """Files can be compressesd (e.g. vcf.gz, tsv.gz).\n""", + 129 "required": False, + 130 "default": None, + 131 "type": PathType(exists=True, type=None), + 132 "gooey": { + 133 "widget": "FileChooser", + 134 "options": { + 135 "wildcard": "Parquet file (*.parquet)|*.parquet|" "All files (*)|*" + 136 }, + 137 }, + 138 }, + 139 "output": { + 140 "metavar": "output", + 141 "help": """Output file path.\n""" + 142 """Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n""" + 143 """Files can be compressesd (e.g. vcf.gz, tsv.gz).\n""", + 144 "required": False, + 145 "default": None, + 146 "type": PathType(exists=None, type=None), + 147 "gooey": {"widget": "FileSaver"}, + 148 }, + 149 "param": { + 150 "metavar": "param", + 151 "help": """Parameters JSON file (or string) defines parameters to process \n""" + 152 """annotations, calculations, prioritizations, convertions and queries.\n""", + 153 "default": "{}", + 154 "type": PathType(exists=None, type=None), + 155 "gooey": { + 156 "widget": "FileChooser", + 157 "options": { + 158 "initial_value": "", + 159 "wildcard": "JSON file (*.json)|*.json|" "All files (*)|*", + 160 }, + 161 }, + 162 }, + 163 "query": { + 164 "metavar": "query", + 165 "help": """Query in SQL format\n""" + 166 """(e.g. 'SELECT * FROM variants LIMIT 50').\n""", + 167 "default": None, + 168 "type": str, + 169 "gooey": { + 170 "widget": "Textarea", + 171 "options": {"initial_value": "SELECT * FROM variants"}, + 172 }, + 173 "extra": {"param_section": "query"}, + 174 }, + 175 "filter": { + 176 "metavar": "filter", + 177 "help": """Filter variant using SQL format\n""" """(e.g. 'POS < 100000').\n""", + 178 "default": None, + 179 "type": str, + 180 "gooey": { + 181 "widget": "Textarea", + 182 "options": {"initial_value": ""}, + 183 }, + 184 # "extra": {"param_section": "filter"}, + 185 }, + 186 "samples": { + 187 "metavar": "samples", + 188 "help": """List of samples\n""" """(e.g. 'sample1,sample2').\n""", + 189 "default": None, + 190 "type": str, + 191 "gooey": { + 192 "widget": "Textarea", + 193 "options": {"initial_value": ""}, + 194 }, + 195 # "extra": {"param_section": "filter"}, + 196 }, + 197 "output_query": { + 198 "metavar": "output", + 199 "help": """Output Query file.\n""" + 200 """Format file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n""", + 201 "default": None, + 202 "type": PathType(exists=None, type=None), + 203 "gooey": { + 204 "widget": "FileSaver", + 205 "options": { + 206 "wildcard": "All files (*)|*", + 207 }, + 208 }, + 209 }, + 210 # Annotations + 211 "annotations": { + 212 "metavar": "annotations", + 213 "help": """Annotation with databases files, or with tools,\n""" + 214 """as a list of files in Parquet, VCF, BED, or keywords\n""" + 215 """ (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\n""" + 216 """- For a Parquet/VCF/BED, use file paths\n""" + 217 """ (e.g. 'file1.parquet,file2.vcf.gz').\n""" + 218 """- For BCFTools annotation, use keyword 'bcftools' with file paths\n""" + 219 """ (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\n""" + 220 """- For Parquet annotation, use keyword 'parquet' with file paths\n""" + 221 """ (e.g. 'parquet:file.parquet').\n""" + 222 """- For Annovar annotation, use keyword 'annovar' with annovar code\n""" + 223 """ (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\n""" + 224 """- For snpeff annotation, use keyword 'snpeff' with options\n""" + 225 """ (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\n""" + 226 """- For snpSift annotation, use keyword 'snpsift' with file paths\n""" + 227 """ (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\n""" + 228 """- For Exomiser annotation, use keyword 'exomiser' with options as key=value\n""" + 229 """ (e.g. 'exomiser:preset=exome:transcript_source=refseq').\n""" + 230 """- For add all availalbe databases files, use 'ALL' keyword,\n""" + 231 """ with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\n""" + 232 """ (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\n""", + 233 "default": None, + 234 "type": str, + 235 "extra": { + 236 "format": "DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]", + 237 "examples": { + 238 "Parquet method annotation with 2 Parquet files": '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', + 239 "Parquet method annotation with multiple file formats": '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', + 240 "Parquet method annotation with available Parquet databases in current release (check databases in production)": '"annotations": "ALL:parquet:current"', + 241 "Parquet method annotation with available Parquet databases in latest release (check databases before production)": '"annotations": "ALL:parquet:latest"', + 242 "Annotation with BCFTools": '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', + 243 "Annotation with Annovar (refGene with hgvs and Cosmic)": '"annotations": "annovar:refGene:cosmic70"', + 244 "Annotation with snpEff (default options)": '"annotations": "snpeff"', + 245 "Annotation with snpEff (with options)": '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', + 246 "Annotation with snpSift": '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', + 247 "Annotation with Exomiser with options": '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', + 248 "Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)": '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"', + 249 }, + 250 }, + 251 }, + 252 # Annotations Parquet + 253 "annotation_parquet": { + 254 "metavar": "annotation parquet", + 255 "help": """Annotation with Parquet method, as a list of files in Parquet, VCF or BED\n""" + 256 """ (e.g. 'file1.parquet,file2.vcf.gz').\n""" + 257 """For add all availalbe databases files, use 'ALL' keyword,\n""" + 258 """ with filters on type and release\n""" + 259 """ (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\n""", + 260 "default": None, + 261 "type": str, + 262 "nargs": "+", + 263 "gooey": { + 264 "widget": "MultiFileChooser", + 265 "options": { + 266 "default_dir": DEFAULT_ANNOTATIONS_FOLDER, + 267 "message": "Database files", + 268 }, + 269 }, + 270 "extra": { + 271 "format": "DB[,DB]*", + 272 "examples": { + 273 "Parquet method annotation with 2 Parquet files": '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', + 274 "Parquet method annotation with multiple file formats": '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', + 275 "Parquet method annotation with available Parquet databases in current release (check databases in production)": '"annotation_parquet": "ALL:parquet:current"', + 276 "Parquet method annotation with available Parquet databases in latest release (check databases before production)": '"annotation_parquet": "ALL:parquet:latest"', + 277 }, + 278 }, + 279 }, + 280 # Annotations BCFTools + 281 "annotation_bcftools": { + 282 "metavar": "annotation BCFTools", + 283 "help": """Annotation with BCFTools, as a list of files VCF or BED\n""" + 284 """ (e.g. 'file.vcf.gz,file.bed.gz').\n""", + 285 "default": None, + 286 "type": str, + 287 "nargs": "+", + 288 "gooey": { + 289 "widget": "MultiFileChooser", + 290 "options": { + 291 "default_dir": DEFAULT_ANNOTATIONS_FOLDER, + 292 "message": "Database files", + 293 }, + 294 }, + 295 "extra": { + 296 "format": "DB[,DB]*", + 297 "examples": { + 298 "Annovation with BCFTools": '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', + 299 }, + 300 }, + 301 }, + 302 # Annotations snpeff + 303 "annotation_snpeff": { + 304 "metavar": "annotation snpEff", + 305 "help": """Annotation with snpEff, with options\n""" + 306 """ (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\n""", + 307 "default": None, + 308 "type": str, + 309 "extra": { + 310 "format": "options", + 311 "examples": { + 312 "Annotation with snpEff (default options)": '"annotation_snpeff": ""', + 313 "Annotation with snpEff (with options)": '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"', + 314 }, + 315 }, + 316 }, + 317 # Annotations snpSift + 318 "annotation_snpsift": { + 319 "metavar": "annotation snpSift", + 320 "help": """Annotation with snpSift, as a list of files VCF\n""" + 321 """ (e.g. 'file.vcf.gz,file.bed.gz').\n""", + 322 "default": None, + 323 "type": str, + 324 "nargs": "+", + 325 "gooey": { + 326 "widget": "MultiFileChooser", + 327 "options": { + 328 "default_dir": DEFAULT_ANNOTATIONS_FOLDER, + 329 "message": "Database files", + 330 }, + 331 }, + 332 "extra": { + 333 "format": "DB[,DB]*", + 334 "examples": { + 335 "Annovation with snpSift": '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', + 336 }, + 337 }, + 338 }, + 339 # Annotations Annovar + 340 "annotation_annovar": { + 341 "metavar": "annotation Annovar", + 342 "help": """Annotation with Annovar, as a list of database keywords\n""" + 343 """ (e.g. 'refGene', 'refGene:cosmic70').\n""", + 344 "default": None, + 345 "type": str, + 346 "extra": { + 347 "format": "keyword[:keyword]*", + 348 "examples": { + 349 "Annotation with Annovar (refGene with hgvs and Cosmic)": '"annotation_annovar": "refGene:cosmic70"', + 350 }, + 351 }, + 352 }, + 353 # Annotations Exomiser + 354 "annotation_exomiser": { + 355 "metavar": "annotation Exomiser", + 356 "help": """Annotation with Exomiser, as a list of options\n""" + 357 """ (e.g. 'preset=exome:transcript_source=refseq').\n""", + 358 "default": None, + 359 "type": str, + 360 "extra": { + 361 "format": "option=value[:option=value]", + 362 "examples": { + 363 "Annotation with Exomiser with options": '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', + 364 }, + 365 }, + 366 }, + 367 # Annotations Splice + 368 "annotation_splice": { + 369 "metavar": "annotation Splice", + 370 "help": """Annotation with Splice, as a list of options\n""" + 371 """ (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\n""", + 372 "default": None, + 373 "type": str, + 374 "extra": { + 375 "format": "option=value[:option=value]", + 376 "examples": { + 377 "Annotation with Splice with options": '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"', + 378 }, + 379 }, + 380 }, + 381 # Update annotation + 382 "annotations_update": { + 383 "help": """Update option for annotation (Only for Parquet annotation).\n""" + 384 """If True, annotation fields will be removed and re-annotated.\n""" + 385 """These options will be applied to all annotation databases.\n""", + 386 "action": "store_true", + 387 "default": False, + 388 "gooey": { + 389 "widget": "BlockCheckbox", + 390 "options": {"checkbox_label": "Update annotation method"}, + 391 }, + 392 "extra": {"param_section": "annotation:options"}, + 393 }, + 394 # Append annotation + 395 "annotations_append": { + 396 "help": """Append option for annotation (Only for Parquet annotation).\n""" + 397 """If True, annotation fields will be annotated only if not annotation exists for the variant.\n""" + 398 """These options will be applied to all annotation databases.\n""", + 399 "action": "store_true", + 400 "default": False, + 401 "gooey": { + 402 "widget": "BlockCheckbox", + 403 "options": {"checkbox_label": "Append annotation method"}, + 404 }, + 405 "extra": {"param_section": "annotation:options"}, + 406 }, + 407 # Calculations + 408 "calculations": { + 409 "metavar": "operations", + 410 "help": """Quick calculations on genetic variants information and genotype information,\n""" + 411 """as a list of operations (e.g. 'VARTYPE,variant_id').\n""" + 412 """List of available calculations by default\n""" + 413 """ (unsensitive case, see doc for more information):\n""" + 414 """ VARTYPE """ + 415 """ snpeff_hgvs """ + 416 """ FINDBYPIPELINE """ + 417 """ GENOTYPECONCORDANCE """ + 418 """ BARCODE """ + 419 """ TRIO """ + 420 """ VAF """ + 421 """ VAF_STATS """ + 422 """ DP_STATS """ + 423 """\n""", + 424 "default": None, + 425 "type": str, + 426 }, + 427 # Prioritizations + 428 "prioritizations": { + 429 "metavar": "prioritisations", + 430 "help": """List of prioritization profiles to process (based on Prioritization JSON file),\n""" + 431 """such as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\n""" + 432 """By default, all profiles available will be processed.\n""", + 433 "default": None, + 434 "type": str, + 435 "extra": { + 436 # "param_section": "prioritization", + 437 "examples": { + 438 "Prioritization profile by default": """"prioritization": "default" """, + 439 "Prioritization profile by default and GERMLINE from Configuration JSON file": """"prioritization": "default,GERMLINE" """, + 440 } + 441 }, + 442 }, + 443 # Prioritization config + 444 "prioritization_config": { + 445 "metavar": "prioritization config", + 446 "help": """Prioritization configuration JSON file (defines profiles, see doc).\n""", + 447 "default": None, + 448 "type": PathType(exists=True, type="file"), + 449 "gooey": { + 450 "widget": "FileChooser", + 451 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, + 452 }, + 453 "extra": { + 454 "param_section": "prioritization", + 455 "examples": { + 456 "Prioritization configuration JSON file as an option": """"prioritization_config": "prioritization_config.json" """ + 457 }, + 458 }, + 459 }, + 460 "profiles": { + 461 "metavar": "profiles", + 462 "help": """List of prioritization profiles to process (based on Prioritization JSON file),\n""" + 463 """such as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\n""" + 464 """By default, all profiles available will be processed.\n""", + 465 "default": None, + 466 "type": str, + 467 }, + 468 "default_profile": { + 469 "metavar": "default profile", + 470 "help": """Prioritization profile by default (see doc).\n""" + 471 """Default is the first profile in the list of prioritization profiles.\n""", + 472 "default": None, + 473 "type": str, + 474 }, + 475 "pzfields": { + 476 "metavar": "pzfields", + 477 "help": """Prioritization fields to provide (see doc).\n""" + 478 """Available: PZScore, PZFlag, PZTags, PZComment, PZInfos\n""", + 479 "default": "PZScore,PZFlag", + 480 "type": str, + 481 }, + 482 "prioritization_score_mode": { + 483 "metavar": "prioritization score mode", + 484 "help": """Prioritization Score mode (see doc).\n""" + 485 """Available: HOWARD (increment score), VaRank (max score)\n""", + 486 "default": "HOWARD", + 487 "type": str, + 488 "choices": ["HOWARD", "VaRank"], + 489 "gooey": {"widget": "Dropdown", "options": {}}, + 490 }, + 491 # Query print options + 492 "query_limit": { + 493 "metavar": "query limit", + 494 "help": """Limit of number of row for query (only for print result, not output).\n""", + 495 "default": 10, + 496 "type": int, + 497 "gooey": { + 498 "widget": "IntegerField", + 499 "options": {"min": 1, "max": 10000, "increment": 10}, + 500 }, + 501 }, + 502 "query_print_mode": { + 503 "metavar": "print mode", + 504 "help": """Print mode of query result (only for print result, not output).\n""" + 505 """Either None (native), 'markdown', 'tabulate' or disabled.\n""", + 506 "choices": [None, "markdown", "tabulate", "disabled"], + 507 "default": None, + 508 "type": str, + 509 "gooey": {"widget": "Dropdown", "options": {}}, + 510 }, + 511 # Explode infos + 512 "explode_infos": { + 513 "help": """Explode VCF INFO/Tag into 'variants' table columns.\n""", + 514 "action": "store_true", + 515 "default": False, + 516 }, + 517 "explode_infos_prefix": { + 518 "metavar": "explode infos prefix", + 519 "help": """Explode VCF INFO/Tag with a specific prefix.\n""", + 520 "default": "", + 521 "type": str, + 522 }, + 523 "explode_infos_fields": { + 524 "metavar": "explode infos list", + 525 "help": """Explode VCF INFO/Tag specific fields/tags.\n""" + 526 """Keyword `*` specify all available fields, except those already specified.\n""" + 527 """Pattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\n""" + 528 """Examples:\n""" + 529 """- 'HGVS,SIFT,Clinvar' (list of fields)\n""" + 530 """- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\n""" + 531 """- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\n""" + 532 """- 'HGVS,.*_score,*' (1 field, scores, all other fields)\n""" + 533 """- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\n""", + 534 "default": "*", + 535 "type": str, + 536 }, + 537 # Include header + 538 "include_header": { + 539 "help": """Include header (in VCF format) in output file.\n""" + 540 """Only for compatible formats (tab-delimiter format as TSV or BED).\n""", + 541 "action": "store_true", + 542 "default": False, + 543 }, + 544 # Sort By + 545 "order_by": { + 546 "metavar": "order by", + 547 "help": """List of columns to sort the result-set in ascending or descending order.\n""" + 548 """Use SQL format, and keywords ASC (ascending) and DESC (descending).\n""" + 549 """If a column is not available, order will not be considered.\n""" + 550 """Order is enable only for compatible format (e.g. TSV, CSV, JSON).\n""" + 551 """Examples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\n""", + 552 "default": "", + 553 "type": str, + 554 "extra": { + 555 "examples": { + 556 "Order by ACMG score in descending order": """"order_by": "ACMG_score DESC" """, + 557 "Order by PZFlag and PZScore in descending order": """"order_by": "PZFlag DESC, PZScore DESC" """, + 558 } + 559 }, + 560 }, + 561 # Parquet partition + 562 "parquet_partitions": { + 563 "metavar": "parquet partitions", + 564 "help": """Parquet partitioning using hive (available for any format).\n""" + 565 """This option is faster parallel writing, but memory consuming.\n""" + 566 """Use 'None' (string) for NO partition but split parquet files into a folder.\n""" + 567 """Examples: '#CHROM', '#CHROM,REF', 'None'.\n""", + 568 "default": None, + 569 "type": str, + 570 }, + 571 # From annovar + 572 "input_annovar": { + 573 "metavar": "input annovar", + 574 "help": """Input Annovar file path.\n""" + 575 """Format file must be a Annovar TXT file, associated with '.idx'.\n""", + 576 "required": False, + 577 "default": None, + 578 "type": PathType(exists=True, type=None), + 579 "gooey": { + 580 "widget": "FileChooser", + 581 "options": { + 582 "wildcard": "Parquet file (*.parquet)|*.parquet|" "All files (*)|*" + 583 }, + 584 }, + 585 }, + 586 "output_annovar": { + 587 "metavar": "output annovar", + 588 "help": """Output Annovar file path.\n""" + 589 """Format file must be either VCF compressesd file '.vcf.gz'.\n""", + 590 "required": False, + 591 "default": None, + 592 "type": PathType(exists=None, type=None), + 593 "gooey": {"widget": "FileSaver"}, + 594 }, + 595 # From Annovar + 596 "annovar_code": { + 597 "metavar": "Annovar code", + 598 "help": """Annovar code, or database name.\n""" + 599 """Usefull to name databases columns.\n""", + 600 "required": False, + 601 "default": None, + 602 "type": str, + 603 }, + 604 "annovar_to_parquet": { + 605 "metavar": "to parquet", + 606 "help": """Parquet file conversion.\n""", + 607 "required": False, + 608 "default": None, + 609 "type": PathType(exists=None, type=None), + 610 "gooey": { + 611 "widget": "FileSaver", + 612 "options": { + 613 "wildcard": "HTML file (*.parquet)|*.parquet", + 614 }, + 615 }, + 616 }, + 617 # "multi_variant": { + 618 # "metavar": "multi variant", + 619 # "help": """Variant with multiple annotation lines.\n""" + 620 # """Either 'auto' (auto-detection), 'enable' or 'disable'.\n""", + 621 # "default": "auto", + 622 # "type": str, + 623 # "choices": ["auto", "enable", "disable"], + 624 # "gooey": { + 625 # "widget": "Dropdown", + 626 # "options": {} + 627 # } + 628 # }, + 629 # "reduce_memory": { + 630 # "metavar": "reduce memory", + 631 # "help": """Reduce memory option,\n""" + 632 # """either 'auto' (auto-detection), 'enable' or 'disable'.\n""", + 633 # "default": "auto", + 634 # "type": str, + 635 # "choices": ["auto", "enable", "disable"], + 636 # "gooey": { + 637 # "widget": "Dropdown", + 638 # "options": {} + 639 # } + 640 # }, + 641 "annovar_multi_variant": { + 642 "metavar": "Annovar multi variant", + 643 "help": """Variant with multiple annotation lines on Annovar file.\n""" + 644 """Either 'auto' (auto-detection), 'enable' or 'disable'.\n""", + 645 "default": "auto", + 646 "type": str, + 647 "choices": ["auto", "enable", "disable"], + 648 "gooey": {"widget": "Dropdown", "options": {}}, + 649 }, + 650 "annovar_reduce_memory": { + 651 "metavar": "reduce memory", + 652 "help": """Reduce memory option for Annovar convert,\n""" + 653 """either 'auto' (auto-detection), 'enable' or 'disable'.\n""", + 654 "default": "auto", + 655 "type": str, + 656 "choices": ["auto", "enable", "disable"], + 657 "gooey": {"widget": "Dropdown", "options": {}}, + 658 }, + 659 # From Extann + 660 "input_extann": { + 661 "metavar": "input extann", + 662 "help": """Input Extann file path.\n""" + 663 """Format file must be a Extann TXT file or TSV file.\n""" + 664 """File need to have at least the genes column.\n""", + 665 "required": False, + 666 "default": None, + 667 "type": PathType(exists=True, type=None), + 668 "gooey": { + 669 "widget": "FileChooser", + 670 "options": { + 671 "wildcard": "VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|" + 672 "All files (*)|*" + 673 }, + 674 }, + 675 }, + 676 "output_extann": { + 677 "metavar": "output extann", + 678 "help": """Output Extann file path.\n""" + 679 """Output extann file, should be BED or BED.gz.\n""", + 680 "required": False, + 681 "default": None, + 682 "type": PathType(exists=None, type=None), + 683 "gooey": {"widget": "FileSaver"}, + 684 }, + 685 "mode_extann": { + 686 "metavar": "mode extann", + 687 "help": """Mode extann selection.\n""" + 688 """How to pick transcript from ncbi, keep all,\n""" + 689 """keep the longest, or keep the chosen one (transcript_extann).\n""", + 690 "required": False, + 691 "default": "longest", + 692 "choices": ["all", "longest", "chosen"], + 693 "type": str, + 694 }, + 695 "param_extann": { + 696 "metavar": "param extann", + 697 "help": """Param extann file path.\n""" + 698 """Param containing configuration, options to replace chars and\n""" + 699 """bedlike header description, conf vcf specs.\n""" + 700 """(e.g. '~/howard/config/param.extann.json')\n""", + 701 "required": False, + 702 "default": None, + 703 "type": PathType(exists=True, type=None), + 704 "gooey": { + 705 "widget": "FileChooser", + 706 "options": {"wildcard": "TSV file format|*.tsv|"}, + 707 }, + 708 }, + 709 # Calculation + 710 "calculation_config": { + 711 "metavar": "calculation config", + 712 "help": """Calculation configuration JSON file.\n""", + 713 "default": None, + 714 "type": PathType(exists=True, type="file"), + 715 "gooey": { + 716 "widget": "FileChooser", + 717 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, + 718 }, + 719 "extra": { + 720 "param_section": "calculation", + 721 "examples": { + 722 "Calculation configuration JSON file as an option": """"calculation_config": "calculation_config.json" """ + 723 }, + 724 }, + 725 }, + 726 "show_calculations": { + 727 "help": """Show available calculation operations.\n""", + 728 "action": "store_true", + 729 "default": False, + 730 }, + 731 "hgvs_field": { + 732 "metavar": "HGVS field", + 733 "help": """HGVS INFO/tag containing a list o HGVS annotations.\n""", + 734 "default": "hgvs", + 735 "type": str, + 736 "extra": {"param_section": "calculation:calculations:NOMEN:options"}, + 737 }, + 738 "transcripts": { + 739 "metavar": "transcripts", + 740 "help": """Transcripts TSV file,\n""" + 741 """with Transcript in first column, optional Gene in second column.\n""", + 742 "default": None, + 743 "type": PathType(exists=True, type="file"), + 744 "gooey": { + 745 "widget": "FileChooser", + 746 "options": {"wildcard": "TSV file (*.tsv)|*.tsv|" "All files (*)|*"}, + 747 }, + 748 "extra": {"param_section": "calculation:calculations:NOMEN:options"}, + 749 }, + 750 "trio_pedigree": { + 751 "metavar": "trio pedigree", + 752 "help": """Pedigree Trio for trio inheritance calculation.\n""" + 753 """Either a JSON file or JSON string or a list of samples\n""" + 754 """(e.g. 'sample1,sample2,sample3' for father, mother and child,\n""" + 755 """ '{"father": "sample1", "mother": "sample2", "child": "sample3"}').\n""", + 756 "default": None, + 757 "gooey": { + 758 "widget": "FileChooser", + 759 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, + 760 }, + 761 "extra": {"param_section": "calculation:calculations:TRIO"}, + 762 }, + 763 "family_pedigree": { + 764 "metavar": "family pedigree", + 765 "help": """Pedigree family for barcode calculation on genotype.\n""" + 766 """Either a JSON file or JSON string or a list of samples\n""" + 767 """(e.g. 'sample1,sample2,sample3,sample4',\n""" + 768 """ '{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}').\n""", + 769 "default": None, + 770 "gooey": { + 771 "widget": "FileChooser", + 772 "options": {"wildcard": "JSON file (*.json)|*.json|" "All files (*)|*"}, + 773 }, + 774 "extra": {"param_section": "calculation:calculations:BARCODEFAMILY"}, + 775 }, + 776 # Stats + 777 "stats_md": { + 778 "metavar": "stats markdown", + 779 "help": """Stats Output file in MarkDown format.\n""", + 780 "required": False, + 781 "default": None, + 782 "type": PathType(exists=None, type="file"), + 783 "gooey": { + 784 "widget": "FileSaver", + 785 "options": {"wildcard": "Markdown file (*.md)|*.md"}, + 786 }, + 787 "extra": { + 788 "examples": { + 789 "Export statistics in Markdown format": """"stats_md": "/tmp/stats.md" """ + 790 } + 791 }, + 792 }, + 793 "stats_json": { + 794 "metavar": "stats json", + 795 "help": """Stats Output file in JSON format.\n""", + 796 "required": False, + 797 "default": None, + 798 "type": PathType(exists=None, type="file"), + 799 "gooey": { + 800 "widget": "FileSaver", + 801 "options": {"wildcard": "JSON file (*.json)|*.json"}, + 802 }, + 803 "extra": { + 804 "examples": { + 805 "Export statistics in JSON format": """"stats_json": "/tmp/stats.json" """ + 806 } + 807 }, + 808 }, + 809 # Assembly and Genome + 810 "assembly": { + 811 "metavar": "assembly", + 812 "help": """Genome Assembly (e.g. 'hg19', 'hg38').\n""", + 813 "required": False, + 814 "default": DEFAULT_ASSEMBLY, + 815 "type": str, + 816 "extra": { + 817 "examples": { + 818 "Default assembly for all analysis tools": """"assembly": "hg19" """, + 819 "List of assemblies for databases download tool": """"assembly": "hg19,hg38" """, + 820 } + 821 }, + 822 }, + 823 "genome": { + 824 "metavar": "genome", + 825 "help": """Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\n""", + 826 "required": False, + 827 "default": f"{DEFAULT_GENOME_FOLDER}/{DEFAULT_ASSEMBLY}/{DEFAULT_ASSEMBLY}.fa", + 828 "type": PathType(exists=None, type="file"), + 829 "gooey": {"widget": "FileChooser", "options": {"wildcard": "All files (*)|*"}}, + 830 }, + 831 # HGVS + 832 "hgvs_options": { + 833 "metavar": "HGVS options", + 834 "help": """Quick HGVS annotation options.\n""" + 835 """This option will skip all other hgvs options.\n""" + 836 """Examples:\n""" + 837 """- 'default' (for default options)\n""" + 838 """- 'full_format' (for full format HGVS annotation)\n""" + 839 """- 'use_gene=True:add_protein=true:codon_type=FULL'\n""", + 840 "required": False, + 841 "default": None, + 842 "type": str, + 843 }, + 844 "use_gene": { + 845 "help": """Use Gene information to generate HGVS annotation\n""" + 846 """(e.g. 'NM_152232(TAS1R2):c.231T>C')""", + 847 "action": "store_true", + 848 "default": False, + 849 }, + 850 "use_exon": { + 851 "help": """Use Exon information to generate HGVS annotation\n""" + 852 """(e.g. 'NM_152232(exon2):c.231T>C').\n""" + 853 """Only if 'use_gene' is not enabled.\n""", + 854 "action": "store_true", + 855 "default": False, + 856 }, + 857 "use_protein": { + 858 "help": """Use Protein level to generate HGVS annotation\n""" + 859 """(e.g. 'NP_689418:p.Cys77Arg').\n""" + 860 """Can be used with 'use_exon' or 'use_gene'.\n""", + 861 "action": "store_true", + 862 "default": False, + 863 }, + 864 "add_protein": { + 865 "help": """Add Protein level to DNA HGVS annotation """ + 866 """(e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\n""", + 867 "action": "store_true", + 868 "default": False, + 869 }, + 870 "full_format": { + 871 "help": """Generates HGVS annotation in a full format\n""" + 872 """by using all information to generates an exhaustive annotation\n""" + 873 """(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\n""" + 874 """Use 'use_exon' to add exon information\n""" + 875 """(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\n""", + 876 "action": "store_true", + 877 "default": False, + 878 }, + 879 "use_version": { + 880 "help": """Generates HGVS annotation with transcript version\n""" + 881 """(e.g. 'NM_152232.1:c.231T>C').\n""", + 882 "action": "store_true", + 883 "default": False, + 884 }, + 885 "codon_type": { + 886 "metavar": "Codon type", + 887 "help": """Amino Acide Codon format type to use to generate HGVS annotation.\n""" + 888 """Available:\n""" + 889 """- '1': codon in 1 character (e.g. 'C', 'R')\n""" + 890 """- '3': codon in 3 character (e.g. 'Cys', 'Arg')\n""" + 891 """-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\n""", + 892 "required": False, + 893 "default": "3", + 894 "type": str, + 895 "choices": ["1", "3", "FULL"], + 896 "gooey": {"widget": "Dropdown", "options": {}}, + 897 }, + 898 "refgene": { + 899 "metavar": "refGene", + 900 "help": """Path to refGene annotation file.\n""", + 901 "required": False, + 902 "default": None, + 903 "type": PathType(exists=True, type="file"), + 904 "gooey": { + 905 "widget": "FileChooser", + 906 "options": { + 907 "wildcard": "All files (*)|*", + 908 "default_dir": DEFAULT_REFSEQ_FOLDER, + 909 "default_file": "ncbiRefSeq.txt", + 910 "message": "Path to refGene annotation file", + 911 }, + 912 }, + 913 }, + 914 "refseqlink": { + 915 "metavar": "refSeqLink", + 916 "help": """Path to refSeqLink annotation file.\n""", + 917 "required": False, + 918 "default": None, + 919 "type": PathType(exists=True, type="file"), + 920 "gooey": { + 921 "widget": "FileChooser", + 922 "options": { + 923 "wildcard": "All files (*)|*", + 924 "default_dir": DEFAULT_REFSEQ_FOLDER, + 925 "default_file": "ncbiRefSeq.txt", + 926 "message": "Path to refGeneLink annotation file", + 927 }, + 928 }, + 929 }, + 930 "refseq-folder": { + 931 "metavar": "refseq folder", + 932 "help": """Folder containing refSeq files.\n""", + 933 "required": False, + 934 "default": DEFAULT_REFSEQ_FOLDER, + 935 "type": PathType(exists=True, type="dir"), + 936 "gooey": { + 937 "widget": "DirChooser", + 938 "options": { + 939 "default_dir": DEFAULT_REFSEQ_FOLDER, + 940 "message": "Path to refGenefolder", + 941 }, + 942 }, + 943 }, + 944 # Databases + 945 # Genome + 946 "download-genomes": { + 947 "metavar": "genomes", + 948 "help": """Path to genomes folder\n""" + 949 """with Fasta files, indexes,\n""" + 950 """and all files generated by pygenome module.\n""" + 951 f"""(e.g. '{DEFAULT_GENOME_FOLDER}').\n""", + 952 "required": False, + 953 "default": None, + 954 "type": PathType(exists=None, type="dir"), + 955 "gooey": { + 956 "widget": "DirChooser", + 957 "options": { + 958 "default_dir": DEFAULT_DATABASE_FOLDER, + 959 "message": "Path to genomes folder", + 960 }, + 961 }, + 962 }, + 963 "download-genomes-provider": { + 964 "metavar": "genomes provider", + 965 "help": """Download Genome from an external provider.\n""" + 966 """Available: GENCODE, Ensembl, UCSC, NCBI.\n""", + 967 "required": False, + 968 "default": "UCSC", + 969 "type": str, + 970 "choices": ["GENCODE", "Ensembl", "UCSC", "NCBI"], + 971 "gooey": {"widget": "Dropdown", "options": {}}, + 972 }, + 973 "download-genomes-contig-regex": { + 974 "metavar": "genomes contig regex", + 975 "help": """Regular expression to select specific chromosome\n""" + 976 """(e.g 'chr[0-9XYM]+$').\n""", + 977 "required": False, + 978 "default": None, + 979 "type": str, + 980 }, + 981 # Annovar + 982 "download-annovar": { + 983 "metavar": "Annovar", + 984 "help": """Path to Annovar databases\n""" + 985 f"""(e.g. '{DEFAULT_ANNOVAR_FOLDER}').\n""", + 986 "required": False, + 987 "type": PathType(exists=None, type="dir"), + 988 "default": None, + 989 "gooey": { + 990 "widget": "DirChooser", + 991 "options": { + 992 "default_dir": DEFAULT_DATABASE_FOLDER, + 993 "message": "Path to Annovar databases folder", + 994 }, + 995 }, + 996 }, + 997 "download-annovar-files": { + 998 "metavar": "Annovar code", + 999 "help": """Download Annovar databases for a list of Annovar file code (see Annovar Doc).\n""" +1000 """Use None to donwload all available files,\n""" +1001 """or Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\n""" +1002 """Note that refGene will at least be downloaded,\n""" +1003 """and only files that not already exist or changed will be downloaded.\n""", +1004 "required": False, +1005 "default": None, +1006 "type": str, +1007 }, +1008 "download-annovar-url": { +1009 "metavar": "Annovar url", +1010 "help": """Annovar databases URL (see Annovar Doc).\n""", +1011 "required": False, +1012 "default": DEFAULT_ANNOVAR_URL, +1013 "type": str, +1014 }, +1015 # snpEff +1016 "download-snpeff": { +1017 "metavar": "snpEff", +1018 "help": """Download snpEff databases within snpEff folder""", +1019 "required": False, +1020 "default": None, +1021 "type": PathType(exists=None, type="dir"), +1022 "gooey": { +1023 "widget": "DirChooser", +1024 "options": { +1025 "default_dir": DEFAULT_DATABASE_FOLDER, +1026 "message": "Path to snpEff databases folder", +1027 }, +1028 }, +1029 }, +1030 # refSeq +1031 "download-refseq": { +1032 "metavar": "refSeq", +1033 "help": """Path to refSeq databases\n""" +1034 f"""(e.g. '{DEFAULT_REFSEQ_FOLDER}').\n""", +1035 "required": False, +1036 "default": None, +1037 "type": PathType(exists=None, type="dir"), +1038 "gooey": { +1039 "widget": "DirChooser", +1040 "options": { +1041 "default_dir": DEFAULT_DATABASE_FOLDER, +1042 "message": "Path to refGene files folder", +1043 }, +1044 }, +1045 }, +1046 "download-refseq-url": { +1047 "metavar": "refSeq url", +1048 "help": """refSeq databases URL (see refSeq WebSite)\n""" +1049 f"""(e.g. '{DEFAULT_REFSEQ_URL}')•/n""", +1050 "required": False, +1051 "default": DEFAULT_REFSEQ_URL, +1052 "type": str, +1053 }, +1054 "download-refseq-prefix": { +1055 "metavar": "refSeq prefix", +1056 "help": """Check existing refSeq files in refSeq folder.\n""", +1057 "required": False, +1058 "default": "ncbiRefSeq", +1059 "type": str, +1060 }, +1061 "download-refseq-files": { +1062 "metavar": "refSeq files", +1063 "help": """List of refSeq files to download.\n""", +1064 "required": False, +1065 "default": "ncbiRefSeq.txt,ncbiRefSeqLink.txt", +1066 "type": str, +1067 }, +1068 "download-refseq-format-file": { +1069 "metavar": "refSeq format file", +1070 "help": """Name of refSeq file to convert in BED format\n""" +1071 """(e.g. 'ncbiRefSeq.txt').\n""" +1072 """Process only if not None.\n""", +1073 "required": False, +1074 "default": None, +1075 "type": str, +1076 }, +1077 "download-refseq-include-utr5": { +1078 "help": """Formating BED refSeq file including 5'UTR.\n""", +1079 "action": "store_true", +1080 "default": False, +1081 }, +1082 "download-refseq-include-utr3": { +1083 "help": """Formating BED refSeq file including 3'UTR.\n""", +1084 "action": "store_true", +1085 "default": False, +1086 }, +1087 "download-refseq-include-chrM": { +1088 "help": """Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\n""", +1089 "action": "store_true", +1090 "default": False, +1091 }, +1092 "download-refseq-include-non-canonical-chr": { +1093 "help": """Formating BED refSeq file including non canonical chromosomes.\n""", +1094 "action": "store_true", +1095 "default": False, +1096 }, +1097 "download-refseq-include-non-coding-transcripts": { +1098 "help": """Formating BED refSeq file including non coding transcripts.\n""", +1099 "action": "store_true", +1100 "default": False, +1101 }, +1102 "download-refseq-include-transcript-version": { +1103 "help": """Formating BED refSeq file including transcript version.\n""", +1104 "action": "store_true", +1105 "default": False, +1106 }, +1107 # dbNSFP +1108 "download-dbnsfp": { +1109 "metavar": "dbNSFP", +1110 "help": """Download dbNSFP databases within dbNSFP folder""" +1111 f"""(e.g. '{DEFAULT_DATABASE_FOLDER}').\n""", +1112 "required": False, +1113 "default": None, +1114 "type": PathType(exists=None, type="dir"), +1115 "gooey": { +1116 "widget": "DirChooser", +1117 "options": { +1118 "default_dir": DEFAULT_DATABASE_FOLDER, +1119 "message": "Path to dbNSFP databases folder", +1120 }, +1121 }, +1122 }, +1123 "download-dbnsfp-url": { +1124 "metavar": "dbNSFP url", +1125 "help": """Download dbNSFP databases URL (see dbNSFP website)\n""" +1126 f"""(e.g. {DEFAULT_DBNSFP_URL}').\n""", +1127 "required": False, +1128 "default": DEFAULT_DBNSFP_URL, +1129 "type": str, +1130 }, +1131 "download-dbnsfp-release": { +1132 "metavar": "dnNSFP release", +1133 "help": """Release of dbNSFP to download (see dbNSFP website)\n""" +1134 """(e.g. '4.4a').\n""", +1135 "required": False, +1136 "default": "4.4a", +1137 }, +1138 "download-dbnsfp-parquet-size": { +1139 "metavar": "dbNSFP parquet size", +1140 "help": """Maximum size (Mb) of data files in Parquet folder.\n""" +1141 """Parquet folder are partitioned (hive) by chromosome (sub-folder),\n""" +1142 """which contain N data files.\n""", +1143 "required": False, +1144 "default": 100, +1145 "type": int, +1146 "gooey": { +1147 "widget": "IntegerField", +1148 "options": {"min": 1, "max": 100000, "increment": 10}, +1149 }, +1150 }, +1151 "download-dbnsfp-subdatabases": { +1152 "help": """Generate dbNSFP sub-databases.\n""" +1153 """dbNSFP provides multiple databases which are split onto multiple columns.\n""" +1154 """This option create a Parquet folder for each sub-database (based on columns names).\n""", +1155 "action": "store_true", +1156 "default": False, +1157 }, +1158 "download-dbnsfp-parquet": { +1159 "help": """Generate a Parquet file for each Parquet folder.\n""", +1160 "action": "store_true", +1161 "default": False, +1162 }, +1163 "download-dbnsfp-vcf": { +1164 "help": """Generate a VCF file for each Parquet folder.\n""" +1165 """Need genome FASTA file (see --download-genome).\n""", +1166 "action": "store_true", +1167 "default": False, +1168 }, +1169 "download-dbnsfp-no-files-all": { +1170 "help": """Not generate database Parquet/VCF file for the entire database ('ALL').\n""" +1171 """Only sub-databases files will be generated.\n""" +1172 """(see '--download-dbnsfp-subdatabases').\n""", +1173 "action": "store_true", +1174 "default": False, +1175 }, +1176 "download-dbnsfp-add-info": { +1177 "help": """Add INFO column (VCF format) in Parquet folder and file.\n""" +1178 """Useful for speed up full annotation (all available columns).\n""" +1179 """Increase memory and space during generation of files.\n""", +1180 "action": "store_true", +1181 "default": False, +1182 }, +1183 "download-dbnsfp-only-info": { +1184 "help": """Add only INFO column (VCF format) in Parquet folder and file.\n""" +1185 """Useful for speed up full annotation (all available columns).\n""" +1186 """Decrease memory and space during generation of files.\n""" +1187 """Increase time for partial annotation (some available columns).\n""", +1188 "action": "store_true", +1189 "default": False, +1190 }, +1191 "download-dbnsfp-uniquify": { +1192 "help": """Uniquify values within column\n""" +1193 """(e.g. "D,D" to "D", "D,.,T" to "D,T").\n""" +1194 """Remove transcripts information details.\n""" +1195 """Usefull to reduce size of the database.\n""" +1196 """Increase memory and space during generation of files.\n""", +1197 "action": "store_true", +1198 "default": False, +1199 }, +1200 "download-dbnsfp-row-group-size": { +1201 "metavar": "dnNSFP row grooup size", +1202 "help": """Minimum number of rows in a parquet row group (see duckDB doc).\n""" +1203 """Lower can reduce memory usage and slightly increase space during generation,\n""" +1204 """speed up highly selective queries, slow down whole file queries (e.g. aggregations).\n""", +1205 "required": False, +1206 "default": 100000, +1207 "type": int, +1208 "gooey": { +1209 "widget": "IntegerField", +1210 "options": {"min": 1, "max": 100000000000, "increment": 10000}, +1211 }, +1212 }, +1213 # AlphaMissense +1214 "download-alphamissense": { +1215 "metavar": "AlphaMissense", +1216 "help": "Path to AlphaMissense databases", +1217 "required": False, +1218 "default": None, +1219 "type": PathType(exists=None, type="dir"), +1220 "gooey": { +1221 "widget": "DirChooser", +1222 "options": { +1223 "default_dir": DEFAULT_DATABASE_FOLDER, +1224 "message": "Path to Alphamissense databases folder", +1225 }, +1226 }, +1227 }, +1228 "download-alphamissense-url": { +1229 "metavar": "AlphaMissense url", +1230 "help": """Download AlphaMissense databases URL (see AlphaMissense website)\n""" +1231 f"""(e.g. '{DEFAULT_ALPHAMISSENSE_URL}').\n""", +1232 "required": False, +1233 "default": DEFAULT_ALPHAMISSENSE_URL, +1234 "type": str, +1235 }, +1236 # Exomiser +1237 "download-exomiser": { +1238 "metavar": "Exomiser", +1239 "help": """Path to Exomiser databases\n""" +1240 f"""(e.g. {DEFAULT_EXOMISER_FOLDER}).\n""", +1241 "required": False, +1242 "default": None, +1243 "type": PathType(exists=None, type="dir"), +1244 "gooey": { +1245 "widget": "DirChooser", +1246 "options": { +1247 "default_dir": DEFAULT_DATABASE_FOLDER, +1248 "message": "Path to Exomiser databases folder", +1249 }, +1250 }, +1251 }, +1252 "download-exomiser-application-properties": { +1253 "metavar": "Exomiser application properties", +1254 "help": """Exomiser Application Properties configuration file (see Exomiser website).\n""" +1255 """This file contains configuration settings for the Exomiser tool.\n""" +1256 """If this parameter is not provided, the function will attempt to locate\n""" +1257 """the application properties file automatically based on the Exomiser.\n""" +1258 """Configuration information will be used to download expected releases (if no other parameters).\n""" +1259 """CADD and REMM will be downloaded only if 'path' are provided.\n""", +1260 "required": False, +1261 "default": None, +1262 "type": PathType(exists=True, type="file"), +1263 "gooey": { +1264 "widget": "FileChooser", +1265 "options": { +1266 "wildcard": "All files (*)|*", +1267 "options": { +1268 "default_dir": DEFAULT_EXOMISER_FOLDER, +1269 "message": "Path to Exomiser application properties file", +1270 }, +1271 }, +1272 }, +1273 }, +1274 "download-exomiser-url": { +1275 "metavar": "Exomiser url", +1276 "help": """URL where Exomiser database files can be downloaded from\n""" +1277 f"""(e.g. '{DEFAULT_EXOMISER_URL}').\n""", +1278 "required": False, +1279 "default": DEFAULT_EXOMISER_URL, +1280 "type": str, +1281 }, +1282 "download-exomiser-release": { +1283 "metavar": "Exomiser release", +1284 "help": """Release of Exomiser data to download.\n""" +1285 """If "default", "auto", or "config", retrieve from Application Properties file.\n""" +1286 """If not provided (None), from Application Properties file (Exomiser data-version) \n""" +1287 """or default '2109'.\n""", +1288 "required": False, +1289 "default": None, +1290 "type": str, +1291 }, +1292 "download-exomiser-phenotype-release": { +1293 "metavar": "Exomiser phenoptye release", +1294 "help": """Release of Exomiser phenotype to download.\n""" +1295 """If not provided (None), from Application Properties file (Exomiser Phenotype data-version)\n""" +1296 """or Exomiser release.\n""", +1297 "required": False, +1298 "default": None, +1299 "type": str, +1300 }, +1301 "download-exomiser-remm-release": { +1302 "metavar": "Exomiser remm release", +1303 "help": """Release of ReMM (Regulatory Mendelian Mutation) database to download.\n""" +1304 """If "default", "auto", or "config", retrieve from Application Properties file.\n""", +1305 "required": False, +1306 "default": None, +1307 "type": str, +1308 }, +1309 "download-exomiser-remm-url": { +1310 "metavar": "Exomiser remm url", +1311 "help": """URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\n""" +1312 f"""(e.g. '{DEFAULT_EXOMISER_REMM_URL}').\n""", +1313 "required": False, +1314 "default": DEFAULT_EXOMISER_REMM_URL, +1315 "type": str, +1316 }, +1317 "download-exomiser-cadd-release": { +1318 "metavar": "Exomiser cadd release", +1319 "help": """Release of CADD (Combined Annotation Dependent Depletion) database to download.\n""" +1320 """If "default", "auto", or "config", retrieve from Application Properties file.\n""", +1321 "required": False, +1322 "default": None, +1323 "type": str, +1324 }, +1325 "download-exomiser-cadd-url": { +1326 "metavar": "Exomiser cadd url", +1327 "help": """URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\n""" +1328 f"""(e.g. '{DEFAULT_EXOMISER_CADD_URL}').\n""", +1329 "required": False, +1330 "default": DEFAULT_EXOMISER_CADD_URL, +1331 "type": str, +1332 }, +1333 "download-exomiser-cadd-url-snv-file": { +1334 "metavar": "Exomiser url snv file", +1335 "help": """Name of the file containing the SNV (Single Nucleotide Variant) data\n""" +1336 """for the CADD (Combined Annotation Dependent Depletion) database.\n""", +1337 "required": False, +1338 "default": "whole_genome_SNVs.tsv.gz", +1339 "type": str, +1340 }, +1341 "download-exomiser-cadd-url-indel-file": { +1342 "metavar": "Exomiser cadd url indel", +1343 "help": """Name of the file containing the INDEL (Insertion-Deletion) data\n""" +1344 """for the CADD (Combined Annotation Dependent Depletion) database.\n""", +1345 "required": False, +1346 "default": "InDels.tsv.gz", +1347 "type": str, +1348 }, +1349 # dbSNP +1350 "download-dbsnp": { +1351 "metavar": "dnSNP", +1352 "help": """Path to dbSNP databases\n""" +1353 f"""(e.g. '{DEFAULT_DBSNP_FOLDER}').\n""", +1354 "required": False, +1355 "default": None, +1356 "type": PathType(exists=None, type="dir"), +1357 "gooey": { +1358 "widget": "DirChooser", +1359 "options": { +1360 "default_dir": DEFAULT_DATABASE_FOLDER, +1361 "message": "Path to dbSNP databases folder", +1362 }, +1363 }, +1364 }, +1365 "download-dbsnp-releases": { +1366 "metavar": "dnSNP releases", +1367 "help": """Release of dbSNP to download\n""" +1368 """(e.g. 'b152', 'b152,b156').\n""", +1369 "required": False, +1370 "default": "b156", +1371 "type": str, +1372 }, +1373 "download-dbsnp-release-default": { +1374 "metavar": "dnSNP release default", +1375 "help": """Default Release of dbSNP ('default' symlink)\n""" +1376 """(e.g. 'b156').\n""" +1377 """If None, first release to download will be assigned as default\n""" +1378 """only if it does not exists.\n""", +1379 "required": False, +1380 "default": None, +1381 "type": str, +1382 }, +1383 "download-dbsnp-url": { +1384 "metavar": "dbSNP url", +1385 "help": """URL where dbSNP database files can be downloaded from.\n""" +1386 f"""(e.g. '{DEFAULT_DBSNP_URL}').\n""", +1387 "required": False, +1388 "default": DEFAULT_DBSNP_URL, +1389 "type": str, +1390 }, +1391 "download-dbsnp-url-files": { +1392 "metavar": "dbSNP url files", +1393 "help": """Dictionary that maps assembly names to specific dbSNP URL files.\n""" +1394 """It allows you to provide custom dbSNP URL files for specific assemblies\n""" +1395 """instead of using the default file naming convention.\n""", +1396 "required": False, +1397 "default": None, +1398 "type": str, +1399 }, +1400 "download-dbsnp-url-files-prefix": { +1401 "metavar": "dbSNP url files prefix", +1402 "help": """String that represents the prefix of the dbSNP file name for a specific assembly.\n""" +1403 """It is used to construct the full URL of the dbSNP file to be downloaded.\n""", +1404 "required": False, +1405 "default": "GCF_000001405", +1406 "type": str, +1407 }, +1408 "download-dbsnp-assemblies-map": { +1409 "metavar": "dbSNP assemblies map", +1410 "help": """dictionary that maps assembly names to their corresponding dbSNP versions.\n""" +1411 """It is used to construct the dbSNP file name based on the assembly name.\n""", +1412 "required": False, +1413 "default": {"hg19": "25", "hg38": "40"}, +1414 "type": str, +1415 "gooey": {"options": {"initial_value": '{"hg19": "25", "hg38": "40"}'}}, +1416 }, +1417 "download-dbsnp-vcf": { +1418 "help": """Generate well-formatted VCF from downloaded file:\n""" +1419 """- Add and filter contigs associated to assembly\n""" +1420 """- Normalize by splitting multiallelics\n""" +1421 """- Need genome (see --download-genome)\n""", +1422 "action": "store_true", +1423 "default": False, +1424 }, +1425 "download-dbsnp-parquet": { +1426 "help": """Generate Parquet file from VCF.\n""", +1427 "action": "store_true", +1428 "default": False, +1429 }, +1430 # HGMD +1431 "convert-hgmd": { +1432 "metavar": "HGMD", +1433 "help": """Convert HGMD databases.\n""" +1434 """Folder where the HGMD databases will be stored.\n""" +1435 """Fields in VCF, Parquet and TSV will be generated.\n""" +1436 """If the folder does not exist, it will be created.\n""", +1437 "required": False, +1438 "default": None, +1439 "type": PathType(exists=None, type="dir"), +1440 "gooey": {"widget": "DirChooser"}, +1441 }, +1442 "convert-hgmd-file": { +1443 "metavar": "HGMD file", +1444 "help": """File from HGMD.\n""" +1445 """Name format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\n""", +1446 "required": False, +1447 "default": None, +1448 "type": PathType(exists=True, type="file"), +1449 "gooey": {"widget": "FileChooser"}, +1450 }, +1451 "convert-hgmd-basename": { +1452 "metavar": "HGMD basename", +1453 "help": """File output basename.\n""" +1454 """Generated files will be prefixed by basename\n""" +1455 """(e.g. 'HGMD_Pro_MY_RELEASE')\n""" +1456 """By default (None), input file name without '.vcf.gz'.\n""", +1457 "required": False, +1458 "default": None, +1459 "type": str, +1460 }, +1461 # Databases parameters +1462 "generate-param": { +1463 "metavar": "param", +1464 "help": """Parameter file (JSON) with all databases found.\n""" +1465 """Databases folders scanned are defined in config file.\n""" +1466 """Structure of databases follow this structure (see doc):\n""" +1467 """.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\n""", +1468 "required": False, +1469 "default": None, +1470 "type": PathType(exists=None, type=None), +1471 "gooey": { +1472 "widget": "FileSaver", +1473 "options": {"wildcard": "JSON file (*.json)|*.json"}, +1474 }, +1475 }, +1476 "generate-param-description": { +1477 "metavar": "param description", +1478 "help": """Description file (JSON) with all databases found.\n""" +1479 """Contains all databases with description of format, assembly, fields...\n""", +1480 "required": False, +1481 "default": None, +1482 "type": PathType(exists=None, type=None), +1483 "gooey": { +1484 "widget": "FileSaver", +1485 "options": {"wildcard": "JSON file (*.json)|*.json"}, +1486 }, +1487 }, +1488 "generate-param-releases": { +1489 "metavar": "param release", +1490 "help": """List of database folder releases to check\n""" +1491 """(e.g. 'current', 'latest').\n""", +1492 "required": False, +1493 "default": "current", +1494 "type": str, +1495 }, +1496 "generate-param-formats": { +1497 "metavar": "param formats", +1498 "help": """List of database formats to check\n""" +1499 """(e.g. 'parquet', 'parquet,vcf,bed,tsv').\n""", +1500 "required": False, +1501 "default": "parquet", +1502 "type": str, +1503 }, +1504 "generate-param-bcftools": { +1505 "help": """Generate parameter JSON file with BCFTools annotation for allowed formats\n""" +1506 """(i.e. 'vcf', 'bed').\n""", +1507 "action": "store_true", +1508 "default": False, +1509 }, +1510 # Help +1511 "help_md": { +1512 "metavar": "help markdown", +1513 "help": """Help Output file in MarkDown format.\n""", +1514 "required": False, +1515 "default": None, +1516 "type": PathType(exists=None, type=None), +1517 "gooey": { +1518 "widget": "FileSaver", +1519 "options": { +1520 "wildcard": "HTML file (*.md)|*.md", +1521 }, +1522 }, +1523 }, +1524 "help_html": { +1525 "metavar": "help html", +1526 "help": """Help Output file in HTML format.\n""", +1527 "required": False, +1528 "default": None, +1529 "type": PathType(exists=None, type=None), +1530 "gooey": { +1531 "widget": "FileSaver", +1532 "options": { +1533 "wildcard": "HTML file (*.html)|*.html", +1534 }, +1535 }, +1536 }, +1537 "help_pdf": { +1538 "metavar": "help pdf", +1539 "help": """Help Output file in PDF format.\n""", +1540 "required": False, +1541 "default": None, +1542 "type": PathType(exists=None, type=None), +1543 "gooey": { +1544 "widget": "FileSaver", +1545 "options": { +1546 "wildcard": "PDF file (*.pdf)|*.pdf", +1547 }, +1548 }, +1549 }, +1550 "help_json_input": { +1551 "metavar": "help JSON input", +1552 "help": """Help input file in JSON format.\n""", +1553 "required": False, +1554 "default": None, +1555 "type": PathType(exists=True, type="file"), +1556 "gooey": { +1557 "widget": "FileChooser", +1558 "options": { +1559 "wildcard": "JSON file (*.json)|*.json|" "All files (*)|*", +1560 }, +1561 }, +1562 }, +1563 "help_md_input": { +1564 "metavar": "help MarkDown input", +1565 "help": """Help input file in MarkDown format.\n""", +1566 "required": False, +1567 "default": None, +1568 "type": PathType(exists=True, type="file"), +1569 "gooey": { +1570 "widget": "FileChooser", +1571 "options": { +1572 "wildcard": "MarkDown file (*.md)|*.md|" "All files (*)|*", +1573 }, +1574 }, +1575 }, +1576 "code_type": { +1577 "metavar": "example code type", +1578 "help": """Help example code type for input JSON format\n""" +1579 """(e.g. 'json', 'bash').\n""", +1580 "required": False, +1581 "default": "", +1582 "type": str, +1583 }, +1584 "help_json_input_title": { +1585 "metavar": "help JSON input title", +1586 "help": """Help JSON input title.\n""", +1587 "required": False, +1588 "default": "Help", +1589 "type": str, +1590 }, +1591 # Common +1592 "genomes-folder": { +1593 "metavar": "genomes", +1594 "help": """Folder containing genomes.\n""" +1595 f"""(e.g. '{DEFAULT_GENOME_FOLDER}'""", +1596 "required": False, +1597 "default": DEFAULT_GENOME_FOLDER, +1598 "type": PathType(exists=None, type="dir"), +1599 "gooey": { +1600 "widget": "DirChooser", +1601 "options": { +1602 "default_dir": DEFAULT_GENOME_FOLDER, +1603 "message": "Path to genomes databases folder", +1604 }, +1605 }, +1606 }, +1607 # Shared +1608 "config": { +1609 "metavar": "config", +1610 "help": """Configuration JSON file defined default configuration regarding \n""" +1611 """resources (e.g. threads, memory),\n""" +1612 """settings (e.g. verbosity, temporary files),\n""" +1613 """default folders (e.g. for databases)\n""" +1614 """and paths to external tools.\n""", +1615 "required": False, +1616 "default": "{}", +1617 "type": str, +1618 "gooey": {"widget": "FileChooser", "options": {"initial_value": "{}"}}, +1619 }, +1620 "threads": { +1621 "metavar": "threads", +1622 "help": """Specify the number of threads to use for processing HOWARD.\n""" +1623 """It determines the level of parallelism,\n""" +1624 """either on python scripts, duckdb engine and external tools.\n""" +1625 """It and can help speed up the process/tool.\n""" +1626 """Use -1 to use all available CPU/cores.\n""" +1627 """Either non valid value is 1 CPU/core.\n""", +1628 "required": False, +1629 "type": int, +1630 "default": -1, +1631 "gooey": { +1632 "widget": "IntegerField", +1633 "options": {"min": -1, "max": 1000, "increment": 1}, +1634 }, +1635 "extra": { +1636 "examples": { +1637 "# Automatically detect all available CPU/cores": '"threads": -1', +1638 "# Define 8 CPU/cores": '"threads": 8', +1639 } +1640 }, +1641 }, +1642 "memory": { +1643 "metavar": "memory", +1644 "help": """Specify the memory to use in format FLOAT[kMG]\n""" +1645 """(e.g. '8G', '12.42G', '1024M').\n""" +1646 """It determines the amount of memory for duckDB engine and external tools\n""" +1647 """(especially for JAR programs).\n""" +1648 """It can help to prevent 'out of memory' failures.\n""" +1649 """By default (None) is 80%% of RAM (for duckDB).\n""", +1650 "required": False, +1651 "type": str, +1652 "default": None, +1653 "extra": { +1654 "format": "FLOAT[kMG]", +1655 "examples": { +1656 "# Automatically detect all available CPU/cores": '"threads": -1', +1657 "# Define 8 CPU/cores": '"threads": 8', +1658 }, +1659 }, +1660 }, +1661 "chunk_size": { +1662 "metavar": "chunk size", +1663 "help": """Number of records in batch to export output file.\n""" +1664 """The lower the chunk size, the less memory consumption.\n""" +1665 """For Parquet partitioning, files size will depend on the chunk size.\n""", +1666 "required": False, +1667 "default": 1000000, +1668 "type": int, +1669 "gooey": { +1670 "widget": "IntegerField", +1671 "options": {"min": 1, "max": 100000000000, "increment": 10000}, +1672 }, +1673 "extra": { +1674 "examples": { +1675 "Chunk size of 1.000.000 by default": '"chunk_size": 1000000', +1676 "Smaller chunk size to reduce Parquet file size and memory usage": '"chunk_size": 100000', +1677 } +1678 }, +1679 }, +1680 "tmp": { +1681 "metavar": "Temporary folder", +1682 "help": """Temporary folder (e.g. '/tmp').\n""" +1683 """By default, '.tmp' for duckDB (see doc),""" +1684 """external tools and python scripts.\n""", +1685 "required": False, +1686 "default": None, +1687 "type": PathType(exists=True, type="dir"), +1688 "gooey": {"widget": "DirChooser"}, +1689 "extra": { +1690 "examples": { +1691 "# System temporary folder": '"tmp": "/tmp"', +1692 "# HOWARD work directory": '"tmp": "~/howard/tmp"', +1693 "# Current work directory": '"tmp": ".tmp"', +1694 } +1695 }, +1696 }, +1697 "duckdb_settings": { +1698 "metavar": "duckDB settings", +1699 "help": """DuckDB settings (see duckDB doc) as JSON (string or file).\n""" +1700 """These settings have priority (see options 'threads', 'tmp'...).\n""" +1701 """Examples: '{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}'.\n""", +1702 "required": False, +1703 "default": None, +1704 "type": PathType(exists=True, type="file"), +1705 "gooey": { +1706 "widget": "FileChooser", +1707 "options": { +1708 "wildcard": "JSON file (*.json)|*.json|" "All files (*)|*", +1709 }, +1710 }, +1711 "extra": { +1712 "examples": { +1713 "DuckDB settings JSON file": '"duckdb_settings": "/path/to/duckdb_config.json"', +1714 "JSON string for Time zone, temporary directory and threads for duckDB": """\"duckdb_settings\": {\n""" +1715 """ \"TimeZone\": \"GMT\",\n""" +1716 """ \"temp_directory\": \"/tmp/duckdb\",\n""" +1717 """ \"threads\": 8\n""" +1718 """}""", +1719 } +1720 }, +1721 }, +1722 "verbosity": { +1723 "metavar": "verbosity", +1724 "help": """Verbosity level\n""" +1725 """Available: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\n""" +1726 """- DEBUG: Detailed information, typically of interest only when diagnosing problems.\n""" +1727 """- INFO: Confirmation that things are working as expected.\n""" +1728 """- WARNING: An indication that something unexpected happened.\n""" +1729 """- ERROR: Due to a more serious problem.\n""" +1730 """- CRITICAL: A serious error.\n""" +1731 """- FATAL: A fatal error.\n""" +1732 """- NOTSET: All messages.\n""", +1733 "required": False, +1734 "choices": [ +1735 "CRITICAL", +1736 "ERROR", +1737 "WARNING", +1738 "INFO", +1739 "DEBUG", +1740 "NOTSET", +1741 "WARN", +1742 "FATAL", +1743 ], +1744 "default": "INFO", +1745 "type": str, +1746 "gooey": {"widget": "Dropdown", "options": {}}, +1747 "extra": { +1748 "examples": { +1749 "Default verbosity": '"verbosity": "INFO"', +1750 "ERROR level (quiet mode)": '"verbosity": "ERROR"', +1751 "For debug": '"verbosity": "DEBUG"', +1752 } +1753 }, +1754 }, +1755 "access": { +1756 "metavar": "access mode", +1757 "help": """Access mode to variants file or database.\n""" +1758 """Either 'RW' for Read and Write, or 'RO' for Read Only.\n""", +1759 "default": "RW", +1760 "type": str, +1761 "choices": ["RW", "RO"], +1762 "gooey": {"widget": "Dropdown", "options": {}}, +1763 "extra": { +1764 "examples": { +1765 "Read and Write mode": '"access": "RW"', +1766 "Read only mode": '"access": "RO"', +1767 } +1768 }, +1769 }, +1770 "log": { +1771 "metavar": "log", +1772 "help": """Logs file\n""" """(e.g. 'my.log').\n""", +1773 "required": False, +1774 "default": None, +1775 "type": PathType(exists=None, type="file"), +1776 "gooey": {"widget": "FileSaver"}, +1777 "extra": { +1778 "examples": { +1779 "Relative path to log file": '"log": "my.log"', +1780 "# HOWARD work directory": '"log": "~/howard/log"', +1781 "Full path to log file": '"log": "/tmp/my.log"', +1782 } +1783 }, +1784 }, +1785 # Interactivity +1786 "interactive": { +1787 "help": """Interative mose..\n""", +1788 "action": "store_true", +1789 "default": False, +1790 }, +1791 # Verbosity +1792 "quiet": {"help": argparse.SUPPRESS, "action": "store_true", "default": False}, +1793 "verbose": {"help": argparse.SUPPRESS, "action": "store_true", "default": False}, +1794 "debug": {"help": argparse.SUPPRESS, "action": "store_true", "default": False}, +1795 # Only for HELP +1796} 1797 -1798# Shared arguments -1799shared_arguments = [ -1800 "config", -1801 "threads", -1802 "memory", -1803 "chunk_size", -1804 "tmp", -1805 "duckdb_settings", -1806 "interactive", -1807 "verbosity", -1808 "log", -1809 "quiet", -1810 "verbose", -1811 "debug", -1812] -1813 -1814# Command dict -1815commands_arguments = { -1816 "query": { -1817 "function": "query", -1818 "description": """Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). """, -1819 "help": "Query genetic variations file in SQL format.", -1820 "epilog": """Usage examples:\n""" -1821 """ howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = 'A' AND POS < 100000" \n""" -1822 """ howard query --input=tests/data/example.vcf.gz --explode_infos --query='SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC' \n""" -1823 """ howard query --query="SELECT \\\"#CHROM\\\", POS, REF, ALT, \\\"INFO/Interpro_domain\\\" FROM 'tests/databases/annotations/current/hg19/dbnsfp42a.parquet' WHERE \\\"INFO/Interpro_domain\\\" NOT NULL ORDER BY \\\"INFO/SiPhy_29way_logOdds_rankscore\\\" DESC LIMIT 10" \n""" -1824 """ howard query --explode_infos --explode_infos_prefix='INFO/' --query="SELECT \\\"#CHROM\\\", POS, REF, ALT, STRING_AGG(INFO, ';') AS INFO FROM 'tests/databases/annotations/current/hg19/*.parquet' GROUP BY \\\"#CHROM\\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \n""" -1825 """ howard query --input=tests/data/example.vcf.gz --param=config/param.json \n""" -1826 """ \n""", -1827 "groups": { -1828 "main": {"input": False, "output": False, "param": False, "query": False}, -1829 "Explode": { -1830 "explode_infos": False, -1831 "explode_infos_prefix": False, -1832 "explode_infos_fields": False, -1833 }, -1834 "Query": {"query_limit": False, "query_print_mode": False}, -1835 "Export": {"include_header": False, "parquet_partitions": False}, -1836 }, -1837 }, -1838 "filter": { -1839 "function": "filter", -1840 "description": """Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). """, -1841 "help": "Filter genetic variations file in SQL format.", -1842 "epilog": """Usage examples:\n""" -1843 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = 'A' AND POS < 100000" \n""" -1844 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = 'A' AND POS < 100000" --samples="sample1,sample2" \n""" -1845 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE 'pathogenic'" --samples="sample1,sample2" \n""" -1846 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != './.'" --samples="sample2" \n""" -1847 """ \n""", -1848 "groups": { -1849 "main": { -1850 "input": True, -1851 "output": True, -1852 }, -1853 "Filters": { -1854 "filter": False, -1855 "samples": False, -1856 }, -1857 "Export": {"include_header": False, "parquet_partitions": False}, -1858 }, -1859 }, -1860 "stats": { -1861 "function": "stats", -1862 "description": """Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...""", -1863 "help": "Statistics on genetic variations file.", -1864 "epilog": """Usage examples:\n""" -1865 """ howard stats --input=tests/data/example.vcf.gz \n""" -1866 """ howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \n""" -1867 """ howard stats --input=tests/data/example.vcf.gz --param=config/param.json \n""" -1868 """ \n""", -1869 "groups": { -1870 "main": {"input": True, "param": False}, -1871 "Stats": {"stats_md": False, "stats_json": False}, -1872 }, -1873 }, -1874 "convert": { -1875 "function": "convert", -1876 "description": """Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. """, -1877 "help": "Convert genetic variations file to another format.", -1878 "epilog": """Usage examples:\n""" -1879 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \n""" -1880 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \n""" -1881 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields='CLNSIG,SIFT,DP' --order_by='CLNSIG DESC, DP DESC' \n""" -1882 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix='INFO/' --explode_infos_fields='CLNSIG,SIFT,DP,*' --order_by='"INFO/CLNSIG" DESC, "INFO/DP" DESC' --include_header \n""" -1883 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \n""" +1798 +1799# Shared arguments +1800shared_arguments = [ +1801 "config", +1802 "threads", +1803 "memory", +1804 "chunk_size", +1805 "tmp", +1806 "duckdb_settings", +1807 "interactive", +1808 "verbosity", +1809 "log", +1810 "quiet", +1811 "verbose", +1812 "debug", +1813] +1814 +1815# Command dict +1816commands_arguments = { +1817 "query": { +1818 "function": "query", +1819 "description": """Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). """, +1820 "help": "Query genetic variations file in SQL format.", +1821 "epilog": """Usage examples:\n""" +1822 """ howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = 'A' AND POS < 100000" \n""" +1823 """ howard query --input=tests/data/example.vcf.gz --explode_infos --query='SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC' \n""" +1824 """ howard query --query="SELECT \\\"#CHROM\\\", POS, REF, ALT, \\\"INFO/Interpro_domain\\\" FROM 'tests/databases/annotations/current/hg19/dbnsfp42a.parquet' WHERE \\\"INFO/Interpro_domain\\\" NOT NULL ORDER BY \\\"INFO/SiPhy_29way_logOdds_rankscore\\\" DESC LIMIT 10" \n""" +1825 """ howard query --explode_infos --explode_infos_prefix='INFO/' --query="SELECT \\\"#CHROM\\\", POS, REF, ALT, STRING_AGG(INFO, ';') AS INFO FROM 'tests/databases/annotations/current/hg19/*.parquet' GROUP BY \\\"#CHROM\\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \n""" +1826 """ howard query --input=tests/data/example.vcf.gz --param=config/param.json \n""" +1827 """ \n""", +1828 "groups": { +1829 "main": {"input": False, "output": False, "param": False, "query": False}, +1830 "Explode": { +1831 "explode_infos": False, +1832 "explode_infos_prefix": False, +1833 "explode_infos_fields": False, +1834 }, +1835 "Query": {"query_limit": False, "query_print_mode": False}, +1836 "Export": {"include_header": False, "parquet_partitions": False}, +1837 }, +1838 }, +1839 "filter": { +1840 "function": "filter", +1841 "description": """Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). """, +1842 "help": "Filter genetic variations file in SQL format.", +1843 "epilog": """Usage examples:\n""" +1844 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = 'A' AND POS < 100000" \n""" +1845 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = 'A' AND POS < 100000" --samples="sample1,sample2" \n""" +1846 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE 'pathogenic'" --samples="sample1,sample2" \n""" +1847 """ howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != './.'" --samples="sample2" \n""" +1848 """ \n""", +1849 "groups": { +1850 "main": { +1851 "input": True, +1852 "output": True, +1853 }, +1854 "Filters": { +1855 "filter": False, +1856 "samples": False, +1857 }, +1858 "Export": {"include_header": False, "parquet_partitions": False}, +1859 }, +1860 }, +1861 "sort": { +1862 "function": "sort", +1863 "description": """Sort genetic variations from contig order. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). """, +1864 "help": "Sort genetic variations file from contig order.", +1865 "epilog": """Usage examples:\n""" +1866 """ howard sort --input=tests/data/example.vcf.gz --output=/tmp/example.sorted.vcf.gz \n""" +1867 """ \n""", +1868 "groups": { +1869 "main": { +1870 "input": True, +1871 "output": True, +1872 }, +1873 "Export": {"include_header": False, "parquet_partitions": False}, +1874 }, +1875 }, +1876 "stats": { +1877 "function": "stats", +1878 "description": """Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...""", +1879 "help": "Statistics on genetic variations file.", +1880 "epilog": """Usage examples:\n""" +1881 """ howard stats --input=tests/data/example.vcf.gz \n""" +1882 """ howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \n""" +1883 """ howard stats --input=tests/data/example.vcf.gz --param=config/param.json \n""" 1884 """ \n""", 1885 "groups": { -1886 "main": {"input": True, "output": True, "param": False}, -1887 "Explode": { -1888 "explode_infos": False, -1889 "explode_infos_prefix": False, -1890 "explode_infos_fields": False, -1891 }, -1892 "Export": { -1893 "include_header": False, -1894 "order_by": False, -1895 "parquet_partitions": False, -1896 }, -1897 }, -1898 }, -1899 "hgvs": { -1900 "function": "hgvs", -1901 "description": """HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.""", -1902 "help": """HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\n""", -1903 "epilog": """Usage examples:\n""" -1904 """ howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \n""" -1905 """ howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \n""" -1906 """ howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \n""" -1907 """ \n""", -1908 "groups": { -1909 "main": { -1910 "input": True, -1911 "output": True, -1912 "param": False, -1913 "hgvs_options": False, -1914 "assembly": False, -1915 }, -1916 "HGVS": { -1917 "use_gene": False, -1918 "use_exon": False, -1919 "use_protein": False, -1920 "add_protein": False, -1921 "full_format": False, -1922 "codon_type": False, -1923 "refgene": False, -1924 "refseqlink": False, -1925 }, -1926 # "Databases": { -1927 # "refseq-folder": False, -1928 # "genomes-folder": False -1929 # } -1930 }, -1931 }, -1932 "annotation": { -1933 "function": "annotation", -1934 "description": """Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). """, -1935 "help": """Annotation of genetic variations file using databases/files and tools.""", -1936 "epilog": """Usage examples:\n""" -1937 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \n""" -1938 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \n""" -1939 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \n""" -1940 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n""" -1941 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n""" -1942 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \n""" -1943 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \n""" -1944 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \n""" -1945 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \n""" -1946 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \n""" -1947 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \n""" -1948 """ \n""", -1949 "groups": { -1950 "main": { -1951 "input": True, -1952 "output": True, -1953 "param": False, -1954 "annotations": False, -1955 "annotation_parquet": False, -1956 "annotation_bcftools": False, -1957 "annotation_annovar": False, -1958 "annotation_snpeff": False, -1959 "annotation_snpsift": False, -1960 "annotation_exomiser": False, -1961 "annotation_splice": False, -1962 "assembly": False, -1963 }, -1964 "Annotation": { -1965 "annotations_update": False, -1966 "annotations_append": False, -1967 }, -1968 }, -1969 }, -1970 "calculation": { -1971 "function": "calculation", -1972 "description": """Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.""", -1973 "help": """Calculation operations on genetic variations file and genotype information.\n""", -1974 "epilog": """Usage examples:\n""" -1975 """ howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n""" -1976 """ howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \n""" -1977 """ howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \n""" -1978 """ howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \n""" -1979 """ howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \n""" -1980 """ howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \n""" -1981 """ howard calculation --show_calculations \n""" -1982 """ \n""", -1983 "groups": { -1984 "main": { -1985 "input": False, -1986 "output": False, -1987 "param": False, -1988 "calculations": False, -1989 }, -1990 "Calculation": {"calculation_config": False, "show_calculations": False}, -1991 "NOMEN": {"hgvs_field": False, "transcripts": False}, -1992 "TRIO": {"trio_pedigree": False}, -1993 "BARCODEFAMILY": {"family_pedigree": False}, -1994 }, -1995 }, -1996 "prioritization": { -1997 "function": "prioritization", -1998 "description": """Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\n""", -1999 "help": "Prioritization of genetic variations based on annotations criteria (profiles).", -2000 "epilog": """Usage examples:\n""" -2001 """ howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \n""" -2002 """ howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \n""" -2003 """ howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \n""" -2004 """ \n""", -2005 "groups": { -2006 "main": { -2007 "input": True, -2008 "output": True, -2009 "param": False, -2010 "prioritizations": False, -2011 }, -2012 "Prioritization": { -2013 "default_profile": False, -2014 "pzfields": False, -2015 "prioritization_score_mode": False, -2016 "prioritization_config": False, -2017 }, -2018 }, -2019 }, -2020 "process": { -2021 "function": "process", -2022 "description": """howard process tool manage genetic variations to:\n""" -2023 """- annotates genetic variants with multiple annotation databases/files and tools\n""" -2024 """- calculates and normalizes annotations\n""" -2025 """- prioritizes variants with profiles (list of citeria) to calculate scores and flags\n""" -2026 """- translates into various formats\n""" -2027 """- query genetic variants and annotations\n""" -2028 """- generates variants statistics""", -2029 "help": """Full genetic variations process: annotation, calculation, prioritization, format, query, filter...""", -2030 "epilog": """Usage examples:\n""" -2031 """ howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n""" -2032 """ howard process --input=tests/data/example.vcf.gz --annotations='snpeff' --calculations='snpeff_hgvs' --prioritizations='default' --explode_infos --output=/tmp/example.annotated.tsv --query='SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants' \n""" -2033 """ howard process --input=tests/data/example.vcf.gz --hgvs_options='full_format,use_exon' --explode_infos --output=/tmp/example.annotated.tsv --query='SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants' \n""" -2034 """ howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs='full_format,use_exon' --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' --calculations='NOMEN' --explode_infos --query='SELECT NOMEN, REVEL_score, SIFT_score, AF AS 'gnomad_AF', ClinPred_score, ClinPred_pred FROM variants' \n""" -2035 """ \n""", -2036 "groups": { -2037 "main": { -2038 "input": True, -2039 "output": True, -2040 "param": False, -2041 "hgvs_options": False, -2042 "annotations": False, -2043 "calculations": False, -2044 "prioritizations": False, -2045 "assembly": False, -2046 }, -2047 "HGVS": { -2048 "use_gene": False, -2049 "use_exon": False, -2050 "use_protein": False, -2051 "add_protein": False, -2052 "full_format": False, -2053 "codon_type": False, -2054 "refgene": False, -2055 "refseqlink": False, -2056 }, -2057 "Annotation": { -2058 "annotations_update": False, -2059 "annotations_append": False, -2060 }, -2061 "Calculation": { -2062 "calculation_config": False, -2063 }, -2064 "Prioritization": { -2065 "default_profile": False, -2066 "pzfields": False, -2067 "prioritization_score_mode": False, -2068 "prioritization_config": False, -2069 }, -2070 "Query": { -2071 "query": False, -2072 "query_limit": False, -2073 "query_print_mode": False, -2074 }, -2075 "Explode": { -2076 "explode_infos": False, -2077 "explode_infos_prefix": False, -2078 "explode_infos_fields": False, +1886 "main": {"input": True, "param": False}, +1887 "Stats": {"stats_md": False, "stats_json": False}, +1888 }, +1889 }, +1890 "convert": { +1891 "function": "convert", +1892 "description": """Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. """, +1893 "help": "Convert genetic variations file to another format.", +1894 "epilog": """Usage examples:\n""" +1895 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \n""" +1896 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \n""" +1897 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields='CLNSIG,SIFT,DP' --order_by='CLNSIG DESC, DP DESC' \n""" +1898 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix='INFO/' --explode_infos_fields='CLNSIG,SIFT,DP,*' --order_by='"INFO/CLNSIG" DESC, "INFO/DP" DESC' --include_header \n""" +1899 """ howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \n""" +1900 """ \n""", +1901 "groups": { +1902 "main": {"input": True, "output": True, "param": False}, +1903 "Explode": { +1904 "explode_infos": False, +1905 "explode_infos_prefix": False, +1906 "explode_infos_fields": False, +1907 }, +1908 "Export": { +1909 "include_header": False, +1910 "order_by": False, +1911 "parquet_partitions": False, +1912 }, +1913 }, +1914 }, +1915 "hgvs": { +1916 "function": "hgvs", +1917 "description": """HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.""", +1918 "help": """HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\n""", +1919 "epilog": """Usage examples:\n""" +1920 """ howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \n""" +1921 """ howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \n""" +1922 """ howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \n""" +1923 """ \n""", +1924 "groups": { +1925 "main": { +1926 "input": True, +1927 "output": True, +1928 "param": False, +1929 "hgvs_options": False, +1930 "assembly": False, +1931 }, +1932 "HGVS": { +1933 "use_gene": False, +1934 "use_exon": False, +1935 "use_protein": False, +1936 "add_protein": False, +1937 "full_format": False, +1938 "codon_type": False, +1939 "refgene": False, +1940 "refseqlink": False, +1941 }, +1942 # "Databases": { +1943 # "refseq-folder": False, +1944 # "genomes-folder": False +1945 # } +1946 }, +1947 }, +1948 "annotation": { +1949 "function": "annotation", +1950 "description": """Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). """, +1951 "help": """Annotation of genetic variations file using databases/files and tools.""", +1952 "epilog": """Usage examples:\n""" +1953 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \n""" +1954 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \n""" +1955 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \n""" +1956 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n""" +1957 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n""" +1958 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \n""" +1959 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \n""" +1960 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \n""" +1961 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \n""" +1962 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \n""" +1963 """ howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \n""" +1964 """ \n""", +1965 "groups": { +1966 "main": { +1967 "input": True, +1968 "output": True, +1969 "param": False, +1970 "annotations": False, +1971 "annotation_parquet": False, +1972 "annotation_bcftools": False, +1973 "annotation_annovar": False, +1974 "annotation_snpeff": False, +1975 "annotation_snpsift": False, +1976 "annotation_exomiser": False, +1977 "annotation_splice": False, +1978 "assembly": False, +1979 }, +1980 "Annotation": { +1981 "annotations_update": False, +1982 "annotations_append": False, +1983 }, +1984 }, +1985 }, +1986 "calculation": { +1987 "function": "calculation", +1988 "description": """Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.""", +1989 "help": """Calculation operations on genetic variations file and genotype information.\n""", +1990 "epilog": """Usage examples:\n""" +1991 """ howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n""" +1992 """ howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \n""" +1993 """ howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \n""" +1994 """ howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \n""" +1995 """ howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \n""" +1996 """ howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \n""" +1997 """ howard calculation --show_calculations \n""" +1998 """ \n""", +1999 "groups": { +2000 "main": { +2001 "input": False, +2002 "output": False, +2003 "param": False, +2004 "calculations": False, +2005 }, +2006 "Calculation": {"calculation_config": False, "show_calculations": False}, +2007 "NOMEN": {"hgvs_field": False, "transcripts": False}, +2008 "TRIO": {"trio_pedigree": False}, +2009 "BARCODEFAMILY": {"family_pedigree": False}, +2010 }, +2011 }, +2012 "prioritization": { +2013 "function": "prioritization", +2014 "description": """Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\n""", +2015 "help": "Prioritization of genetic variations based on annotations criteria (profiles).", +2016 "epilog": """Usage examples:\n""" +2017 """ howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \n""" +2018 """ howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \n""" +2019 """ howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \n""" +2020 """ \n""", +2021 "groups": { +2022 "main": { +2023 "input": True, +2024 "output": True, +2025 "param": False, +2026 "prioritizations": False, +2027 }, +2028 "Prioritization": { +2029 "default_profile": False, +2030 "pzfields": False, +2031 "prioritization_score_mode": False, +2032 "prioritization_config": False, +2033 }, +2034 }, +2035 }, +2036 "process": { +2037 "function": "process", +2038 "description": """howard process tool manage genetic variations to:\n""" +2039 """- annotates genetic variants with multiple annotation databases/files and tools\n""" +2040 """- calculates and normalizes annotations\n""" +2041 """- prioritizes variants with profiles (list of citeria) to calculate scores and flags\n""" +2042 """- translates into various formats\n""" +2043 """- query genetic variants and annotations\n""" +2044 """- generates variants statistics""", +2045 "help": """Full genetic variations process: annotation, calculation, prioritization, format, query, filter...""", +2046 "epilog": """Usage examples:\n""" +2047 """ howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n""" +2048 """ howard process --input=tests/data/example.vcf.gz --annotations='snpeff' --calculations='snpeff_hgvs' --prioritizations='default' --explode_infos --output=/tmp/example.annotated.tsv --query='SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants' \n""" +2049 """ howard process --input=tests/data/example.vcf.gz --hgvs_options='full_format,use_exon' --explode_infos --output=/tmp/example.annotated.tsv --query='SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants' \n""" +2050 """ howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs='full_format,use_exon' --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' --calculations='NOMEN' --explode_infos --query='SELECT NOMEN, REVEL_score, SIFT_score, AF AS 'gnomad_AF', ClinPred_score, ClinPred_pred FROM variants' \n""" +2051 """ \n""", +2052 "groups": { +2053 "main": { +2054 "input": True, +2055 "output": True, +2056 "param": False, +2057 "hgvs_options": False, +2058 "annotations": False, +2059 "calculations": False, +2060 "prioritizations": False, +2061 "assembly": False, +2062 }, +2063 "HGVS": { +2064 "use_gene": False, +2065 "use_exon": False, +2066 "use_protein": False, +2067 "add_protein": False, +2068 "full_format": False, +2069 "codon_type": False, +2070 "refgene": False, +2071 "refseqlink": False, +2072 }, +2073 "Annotation": { +2074 "annotations_update": False, +2075 "annotations_append": False, +2076 }, +2077 "Calculation": { +2078 "calculation_config": False, 2079 }, -2080 "Export": { -2081 "include_header": False, -2082 "order_by": False, -2083 "parquet_partitions": False, -2084 }, -2085 }, -2086 }, -2087 "databases": { -2088 "function": "databases", -2089 "description": """Download databases and needed files for howard and associated tools""", -2090 "help": """Download databases and needed files for howard and associated tools""", -2091 "epilog": """Usage examples:\n""" -2092 """ howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \n""" -2093 """ howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \n""" -2094 """ howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \n""" -2095 """ howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \n""" -2096 """ howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \n""" -2097 """ howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \n""" -2098 """ howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \n""" -2099 """ howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \n""" -2100 """ cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \n""" -2101 """ howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \n""" -2102 """ howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \n""" -2103 """\n""" -2104 """Notes:\n""" -2105 """ - Downloading databases can take a while, depending on network, threads and memory\n""" -2106 """ - Proxy: Beware of network and proxy configuration\n""" -2107 """ - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\n""" -2108 """ \n""", -2109 "groups": { -2110 "main": { -2111 "assembly": False, -2112 "genomes-folder": False, -2113 "genome": False, -2114 "param": False, -2115 }, -2116 "Genomes": { -2117 "download-genomes": False, -2118 "download-genomes-provider": False, -2119 "download-genomes-contig-regex": False, -2120 }, -2121 "snpEff": {"download-snpeff": False}, -2122 "Annovar": { -2123 "download-annovar": False, -2124 "download-annovar-files": False, -2125 "download-annovar-url": False, -2126 }, -2127 "refSeq": { -2128 "download-refseq": False, -2129 "download-refseq-url": False, -2130 "download-refseq-prefix": False, -2131 "download-refseq-files": False, -2132 "download-refseq-format-file": False, -2133 "download-refseq-include-utr5": False, -2134 "download-refseq-include-utr3": False, -2135 "download-refseq-include-chrM": False, -2136 "download-refseq-include-non-canonical-chr": False, -2137 "download-refseq-include-non-coding-transcripts": False, -2138 "download-refseq-include-transcript-version": False, -2139 }, -2140 "dbNSFP": { -2141 "download-dbnsfp": False, -2142 "download-dbnsfp-url": False, -2143 "download-dbnsfp-release": False, -2144 "download-dbnsfp-parquet-size": False, -2145 "download-dbnsfp-subdatabases": False, -2146 "download-dbnsfp-parquet": False, -2147 "download-dbnsfp-vcf": False, -2148 "download-dbnsfp-no-files-all": False, -2149 "download-dbnsfp-add-info": False, -2150 "download-dbnsfp-only-info": False, -2151 "download-dbnsfp-uniquify": False, -2152 "download-dbnsfp-row-group-size": False, -2153 }, -2154 "AlphaMissense": { -2155 "download-alphamissense": False, -2156 "download-alphamissense-url": False, -2157 }, -2158 "Exomiser": { -2159 "download-exomiser": False, -2160 "download-exomiser-application-properties": False, -2161 "download-exomiser-url": False, -2162 "download-exomiser-release": False, -2163 "download-exomiser-phenotype-release": False, -2164 "download-exomiser-remm-release": False, -2165 "download-exomiser-remm-url": False, -2166 "download-exomiser-cadd-release": False, -2167 "download-exomiser-cadd-url": False, -2168 "download-exomiser-cadd-url-snv-file": False, -2169 "download-exomiser-cadd-url-indel-file": False, -2170 }, -2171 "dbSNP": { -2172 "download-dbsnp": False, -2173 "download-dbsnp-releases": False, -2174 "download-dbsnp-release-default": False, -2175 "download-dbsnp-url": False, -2176 "download-dbsnp-url-files": False, -2177 "download-dbsnp-url-files-prefix": False, -2178 "download-dbsnp-assemblies-map": False, -2179 "download-dbsnp-vcf": False, -2180 "download-dbsnp-parquet": False, -2181 }, -2182 "HGMD": { -2183 "convert-hgmd": False, -2184 "convert-hgmd-file": False, -2185 "convert-hgmd-basename": False, +2080 "Prioritization": { +2081 "default_profile": False, +2082 "pzfields": False, +2083 "prioritization_score_mode": False, +2084 "prioritization_config": False, +2085 }, +2086 "Query": { +2087 "query": False, +2088 "query_limit": False, +2089 "query_print_mode": False, +2090 }, +2091 "Explode": { +2092 "explode_infos": False, +2093 "explode_infos_prefix": False, +2094 "explode_infos_fields": False, +2095 }, +2096 "Export": { +2097 "include_header": False, +2098 "order_by": False, +2099 "parquet_partitions": False, +2100 }, +2101 }, +2102 }, +2103 "databases": { +2104 "function": "databases", +2105 "description": """Download databases and needed files for howard and associated tools""", +2106 "help": """Download databases and needed files for howard and associated tools""", +2107 "epilog": """Usage examples:\n""" +2108 """ howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \n""" +2109 """ howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \n""" +2110 """ howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \n""" +2111 """ howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \n""" +2112 """ howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \n""" +2113 """ howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \n""" +2114 """ howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \n""" +2115 """ howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \n""" +2116 """ cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \n""" +2117 """ howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \n""" +2118 """ howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \n""" +2119 """\n""" +2120 """Notes:\n""" +2121 """ - Downloading databases can take a while, depending on network, threads and memory\n""" +2122 """ - Proxy: Beware of network and proxy configuration\n""" +2123 """ - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\n""" +2124 """ \n""", +2125 "groups": { +2126 "main": { +2127 "assembly": False, +2128 "genomes-folder": False, +2129 "genome": False, +2130 "param": False, +2131 }, +2132 "Genomes": { +2133 "download-genomes": False, +2134 "download-genomes-provider": False, +2135 "download-genomes-contig-regex": False, +2136 }, +2137 "snpEff": {"download-snpeff": False}, +2138 "Annovar": { +2139 "download-annovar": False, +2140 "download-annovar-files": False, +2141 "download-annovar-url": False, +2142 }, +2143 "refSeq": { +2144 "download-refseq": False, +2145 "download-refseq-url": False, +2146 "download-refseq-prefix": False, +2147 "download-refseq-files": False, +2148 "download-refseq-format-file": False, +2149 "download-refseq-include-utr5": False, +2150 "download-refseq-include-utr3": False, +2151 "download-refseq-include-chrM": False, +2152 "download-refseq-include-non-canonical-chr": False, +2153 "download-refseq-include-non-coding-transcripts": False, +2154 "download-refseq-include-transcript-version": False, +2155 }, +2156 "dbNSFP": { +2157 "download-dbnsfp": False, +2158 "download-dbnsfp-url": False, +2159 "download-dbnsfp-release": False, +2160 "download-dbnsfp-parquet-size": False, +2161 "download-dbnsfp-subdatabases": False, +2162 "download-dbnsfp-parquet": False, +2163 "download-dbnsfp-vcf": False, +2164 "download-dbnsfp-no-files-all": False, +2165 "download-dbnsfp-add-info": False, +2166 "download-dbnsfp-only-info": False, +2167 "download-dbnsfp-uniquify": False, +2168 "download-dbnsfp-row-group-size": False, +2169 }, +2170 "AlphaMissense": { +2171 "download-alphamissense": False, +2172 "download-alphamissense-url": False, +2173 }, +2174 "Exomiser": { +2175 "download-exomiser": False, +2176 "download-exomiser-application-properties": False, +2177 "download-exomiser-url": False, +2178 "download-exomiser-release": False, +2179 "download-exomiser-phenotype-release": False, +2180 "download-exomiser-remm-release": False, +2181 "download-exomiser-remm-url": False, +2182 "download-exomiser-cadd-release": False, +2183 "download-exomiser-cadd-url": False, +2184 "download-exomiser-cadd-url-snv-file": False, +2185 "download-exomiser-cadd-url-indel-file": False, 2186 }, -2187 "from_Annovar": { -2188 "input_annovar": False, -2189 "output_annovar": False, -2190 "annovar_code": False, -2191 "annovar_to_parquet": False, -2192 "annovar_reduce_memory": False, -2193 "annovar_multi_variant": False, -2194 }, -2195 "from_extann": { -2196 "input_extann": False, -2197 "output_extann": False, -2198 "refgene": False, -2199 "transcripts": False, -2200 "param_extann": False, -2201 "mode_extann": False, +2187 "dbSNP": { +2188 "download-dbsnp": False, +2189 "download-dbsnp-releases": False, +2190 "download-dbsnp-release-default": False, +2191 "download-dbsnp-url": False, +2192 "download-dbsnp-url-files": False, +2193 "download-dbsnp-url-files-prefix": False, +2194 "download-dbsnp-assemblies-map": False, +2195 "download-dbsnp-vcf": False, +2196 "download-dbsnp-parquet": False, +2197 }, +2198 "HGMD": { +2199 "convert-hgmd": False, +2200 "convert-hgmd-file": False, +2201 "convert-hgmd-basename": False, 2202 }, -2203 "Parameters": { -2204 "generate-param": False, -2205 "generate-param-description": False, -2206 "generate-param-releases": False, -2207 "generate-param-formats": False, -2208 "generate-param-bcftools": False, -2209 }, -2210 }, -2211 }, -2212 "gui": { -2213 "function": "gui", -2214 "description": """Graphical User Interface tools""", -2215 "help": """Graphical User Interface tools""", -2216 "epilog": """Usage examples:\n""" """ howard gui """, -2217 "groups": {}, -2218 }, -2219 "help": { -2220 "function": "help", -2221 "description": """Help tools""", -2222 "help": """Help tools""", -2223 "epilog": """Usage examples:\n""" -2224 """ howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\n""" -2225 """ howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\n""" -2226 """ howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \n""" -2227 """ howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \n""" -2228 """ \n""", -2229 "groups": { -2230 "main": { -2231 "help_md": False, -2232 "help_html": False, -2233 "help_pdf": False, -2234 "help_md_input": False, -2235 "help_json_input": False, -2236 "help_json_input_title": False, -2237 "code_type": False, -2238 } -2239 }, -2240 }, -2241} -2242 -2243arguments_dict = { -2244 "arguments": arguments, -2245 "commands_arguments": commands_arguments, -2246 "shared_arguments": shared_arguments, -2247} +2203 "from_Annovar": { +2204 "input_annovar": False, +2205 "output_annovar": False, +2206 "annovar_code": False, +2207 "annovar_to_parquet": False, +2208 "annovar_reduce_memory": False, +2209 "annovar_multi_variant": False, +2210 }, +2211 "from_extann": { +2212 "input_extann": False, +2213 "output_extann": False, +2214 "refgene": False, +2215 "transcripts": False, +2216 "param_extann": False, +2217 "mode_extann": False, +2218 }, +2219 "Parameters": { +2220 "generate-param": False, +2221 "generate-param-description": False, +2222 "generate-param-releases": False, +2223 "generate-param-formats": False, +2224 "generate-param-bcftools": False, +2225 }, +2226 }, +2227 }, +2228 "gui": { +2229 "function": "gui", +2230 "description": """Graphical User Interface tools""", +2231 "help": """Graphical User Interface tools""", +2232 "epilog": """Usage examples:\n""" """ howard gui """, +2233 "groups": {}, +2234 }, +2235 "help": { +2236 "function": "help", +2237 "description": """Help tools""", +2238 "help": """Help tools""", +2239 "epilog": """Usage examples:\n""" +2240 """ howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\n""" +2241 """ howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\n""" +2242 """ howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \n""" +2243 """ howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \n""" +2244 """ \n""", +2245 "groups": { +2246 "main": { +2247 "help_md": False, +2248 "help_html": False, +2249 "help_pdf": False, +2250 "help_md_input": False, +2251 "help_json_input": False, +2252 "help_json_input_title": False, +2253 "code_type": False, +2254 } +2255 }, +2256 }, +2257} +2258 +2259arguments_dict = { +2260 "arguments": arguments, +2261 "commands_arguments": commands_arguments, +2262 "shared_arguments": shared_arguments, +2263} @@ -2334,70 +2350,70 @@

    -
     55class PathType(object):
    - 56
    - 57    def __init__(self, exists=True, type="file", dash_ok=True):
    - 58        """exists:
    - 59             True: a path that does exist
    - 60             False: a path that does not exist, in a valid parent directory
    - 61             None: don't care
    - 62        type: file, dir, symlink, None, or a function returning True for valid paths
    - 63             None: don't care
    - 64        dash_ok: whether to allow "-" as stdin/stdout"""
    - 65
    - 66        self.__name__ = "Path"
    - 67
    - 68        assert exists in (True, False, None)
    - 69        assert type in ("file", "dir", "symlink", None) or hasattr(type, "__call__")
    - 70
    - 71        self._exists = exists
    - 72        self._type = type
    - 73        self._dash_ok = dash_ok
    - 74
    - 75    def __call__(self, string):
    - 76
    - 77        # Full path if not a JSON string
    - 78        try:
    - 79            json.loads(string)
    - 80        except:
    - 81            string = full_path(string)
    - 82
    - 83        if string == "-":
    - 84            # the special argument "-" means sys.std{in,out}
    - 85            if self._type == "dir":
    - 86                raise ValueError(
    - 87                    "standard input/output (-) not allowed as directory path"
    - 88                )
    - 89            elif self._type == "symlink":
    - 90                raise ValueError(
    - 91                    "standard input/output (-) not allowed as symlink path"
    - 92                )
    - 93            elif not self._dash_ok:
    - 94                raise ValueError("standard input/output (-) not allowed")
    - 95        else:
    - 96            e = os.path.exists(string)
    - 97            if self._exists == True:
    - 98                if not e:
    - 99                    raise ValueError("path does not exist: '%s'" % string)
    -100
    -101                if self._type is None:
    -102                    pass
    -103                elif self._type == "file":
    -104                    if not os.path.isfile(string):
    -105                        raise ValueError("path is not a file: '%s'" % string)
    -106                elif self._type == "symlink":
    -107                    if not os.path.symlink(string):
    -108                        raise ValueError("path is not a symlink: '%s'" % string)
    -109                elif self._type == "dir":
    -110                    if not os.path.isdir(string):
    -111                        raise ValueError("path is not a directory: '%s'" % string)
    -112                elif not self._type(string):
    -113                    raise ValueError("path not valid: '%s'" % string)
    -114            else:
    -115                if self._exists == False and e:
    -116                    raise ValueError("path exists: '%s'" % string)
    -117
    -118        return string
    +            
     56class PathType(object):
    + 57
    + 58    def __init__(self, exists=True, type="file", dash_ok=True):
    + 59        """exists:
    + 60             True: a path that does exist
    + 61             False: a path that does not exist, in a valid parent directory
    + 62             None: don't care
    + 63        type: file, dir, symlink, None, or a function returning True for valid paths
    + 64             None: don't care
    + 65        dash_ok: whether to allow "-" as stdin/stdout"""
    + 66
    + 67        self.__name__ = "Path"
    + 68
    + 69        assert exists in (True, False, None)
    + 70        assert type in ("file", "dir", "symlink", None) or hasattr(type, "__call__")
    + 71
    + 72        self._exists = exists
    + 73        self._type = type
    + 74        self._dash_ok = dash_ok
    + 75
    + 76    def __call__(self, string):
    + 77
    + 78        # Full path if not a JSON string
    + 79        try:
    + 80            json.loads(string)
    + 81        except:
    + 82            string = full_path(string)
    + 83
    + 84        if string == "-":
    + 85            # the special argument "-" means sys.std{in,out}
    + 86            if self._type == "dir":
    + 87                raise ValueError(
    + 88                    "standard input/output (-) not allowed as directory path"
    + 89                )
    + 90            elif self._type == "symlink":
    + 91                raise ValueError(
    + 92                    "standard input/output (-) not allowed as symlink path"
    + 93                )
    + 94            elif not self._dash_ok:
    + 95                raise ValueError("standard input/output (-) not allowed")
    + 96        else:
    + 97            e = os.path.exists(string)
    + 98            if self._exists == True:
    + 99                if not e:
    +100                    raise ValueError("path does not exist: '%s'" % string)
    +101
    +102                if self._type is None:
    +103                    pass
    +104                elif self._type == "file":
    +105                    if not os.path.isfile(string):
    +106                        raise ValueError("path is not a file: '%s'" % string)
    +107                elif self._type == "symlink":
    +108                    if not os.path.symlink(string):
    +109                        raise ValueError("path is not a symlink: '%s'" % string)
    +110                elif self._type == "dir":
    +111                    if not os.path.isdir(string):
    +112                        raise ValueError("path is not a directory: '%s'" % string)
    +113                elif not self._type(string):
    +114                    raise ValueError("path not valid: '%s'" % string)
    +115            else:
    +116                if self._exists == False and e:
    +117                    raise ValueError("path exists: '%s'" % string)
    +118
    +119        return string
     
    @@ -2413,23 +2429,23 @@

    -
    57    def __init__(self, exists=True, type="file", dash_ok=True):
    -58        """exists:
    -59             True: a path that does exist
    -60             False: a path that does not exist, in a valid parent directory
    -61             None: don't care
    -62        type: file, dir, symlink, None, or a function returning True for valid paths
    -63             None: don't care
    -64        dash_ok: whether to allow "-" as stdin/stdout"""
    -65
    -66        self.__name__ = "Path"
    -67
    -68        assert exists in (True, False, None)
    -69        assert type in ("file", "dir", "symlink", None) or hasattr(type, "__call__")
    -70
    -71        self._exists = exists
    -72        self._type = type
    -73        self._dash_ok = dash_ok
    +            
    58    def __init__(self, exists=True, type="file", dash_ok=True):
    +59        """exists:
    +60             True: a path that does exist
    +61             False: a path that does not exist, in a valid parent directory
    +62             None: don't care
    +63        type: file, dir, symlink, None, or a function returning True for valid paths
    +64             None: don't care
    +65        dash_ok: whether to allow "-" as stdin/stdout"""
    +66
    +67        self.__name__ = "Path"
    +68
    +69        assert exists in (True, False, None)
    +70        assert type in ("file", "dir", "symlink", None) or hasattr(type, "__call__")
    +71
    +72        self._exists = exists
    +73        self._type = type
    +74        self._dash_ok = dash_ok
     
    @@ -2475,7 +2491,7 @@

    commands_arguments = - {'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \'A\' AND POS < 100000" \n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\' \n howard query --query="SELECT \\"#CHROM\\", POS, REF, ALT, \\"INFO/Interpro_domain\\" FROM \'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\' WHERE \\"INFO/Interpro_domain\\" NOT NULL ORDER BY \\"INFO/SiPhy_29way_logOdds_rankscore\\" DESC LIMIT 10" \n howard query --explode_infos --explode_infos_prefix=\'INFO/\' --query="SELECT \\"#CHROM\\", POS, REF, ALT, STRING_AGG(INFO, \';\') AS INFO FROM \'tests/databases/annotations/current/hg19/*.parquet\' GROUP BY \\"#CHROM\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \n howard query --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \'pathogenic\'" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \'./.\'" --samples="sample2" \n \n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\n howard stats --input=tests/data/example.vcf.gz \n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\'CLNSIG,SIFT,DP\' --order_by=\'CLNSIG DESC, DP DESC\' \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\'INFO/\' --explode_infos_fields=\'CLNSIG,SIFT,DP,*\' --order_by=\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\' --include_header \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomenhoward.tools.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\n', 'epilog': 'Usage examples:\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\n', 'epilog': "Usage examples:\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \n howard calculation --show_calculations \n \n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\n- annotates genetic variants with multiple annotation databases/files and tools\n- calculates and normalizes annotations\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\n- translates into various formats\n- query genetic variants and annotations\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n howard process --input=tests/data/example.vcf.gz --annotations=\'snpeff\' --calculations=\'snpeff_hgvs\' --prioritizations=\'default\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --hgvs_options=\'full_format,use_exon\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\'full_format,use_exon\' --annotations=\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\' --calculations=\'NOMEN\' --explode_infos --query=\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \'gnomad_AF\', ClinPred_score, ClinPred_pred FROM variants\' \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \n\nNotes:\n - Downloading databases can take a while, depending on network, threads and memory\n - Proxy: Beware of network and proxy configuration\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\n \n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \n \n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \n \n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\n \n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\n \n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}} + {'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \'A\' AND POS < 100000" \n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\' \n howard query --query="SELECT \\"#CHROM\\", POS, REF, ALT, \\"INFO/Interpro_domain\\" FROM \'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\' WHERE \\"INFO/Interpro_domain\\" NOT NULL ORDER BY \\"INFO/SiPhy_29way_logOdds_rankscore\\" DESC LIMIT 10" \n howard query --explode_infos --explode_infos_prefix=\'INFO/\' --query="SELECT \\"#CHROM\\", POS, REF, ALT, STRING_AGG(INFO, \';\') AS INFO FROM \'tests/databases/annotations/current/hg19/*.parquet\' GROUP BY \\"#CHROM\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \n howard query --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \'pathogenic\'" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \'./.\'" --samples="sample2" \n \n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'sort': {'function': 'sort', 'description': "Sort genetic variations from contig order. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Sort genetic variations file from contig order.', 'epilog': 'Usage examples:\n howard sort --input=tests/data/example.vcf.gz --output=/tmp/example.sorted.vcf.gz \n \n', 'groups': {'main': {'input': True, 'output': True}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\n howard stats --input=tests/data/example.vcf.gz \n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\'CLNSIG,SIFT,DP\' --order_by=\'CLNSIG DESC, DP DESC\' \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\'INFO/\' --explode_infos_fields=\'CLNSIG,SIFT,DP,*\' --order_by=\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\' --include_header \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomenhoward.tools.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\n', 'epilog': 'Usage examples:\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\n', 'epilog': "Usage examples:\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \n howard calculation --show_calculations \n \n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\n- annotates genetic variants with multiple annotation databases/files and tools\n- calculates and normalizes annotations\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\n- translates into various formats\n- query genetic variants and annotations\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n howard process --input=tests/data/example.vcf.gz --annotations=\'snpeff\' --calculations=\'snpeff_hgvs\' --prioritizations=\'default\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --hgvs_options=\'full_format,use_exon\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\'full_format,use_exon\' --annotations=\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\' --calculations=\'NOMEN\' --explode_infos --query=\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \'gnomad_AF\', ClinPred_score, ClinPred_pred FROM variants\' \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \n\nNotes:\n - Downloading databases can take a while, depending on network, threads and memory\n - Proxy: Beware of network and proxy configuration\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\n \n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \n \n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \n \n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\n \n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\n \n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}
    @@ -2488,7 +2504,7 @@

    arguments_dict = - {'arguments': {'input': {'metavar': 'input', 'help': 'Input file path.\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output': {'metavar': 'output', 'help': 'Output file path.\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'param': {'metavar': 'param', 'help': 'Parameters JSON file (or string) defines parameters to process \nannotations, calculations, prioritizations, convertions and queries.\n', 'default': '{}', 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '', 'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'query': {'metavar': 'query', 'help': "Query in SQL format\n(e.g. 'SELECT * FROM variants LIMIT 50').\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': 'SELECT * FROM variants'}}, 'extra': {'param_section': 'query'}}, 'filter': {'metavar': 'filter', 'help': "Filter variant using SQL format\n(e.g. 'POS < 100000').\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'samples': {'metavar': 'samples', 'help': "List of samples\n(e.g. 'sample1,sample2').\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'output_query': {'metavar': 'output', 'help': 'Output Query file.\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'All files (*)|*'}}}, 'annotations': {'metavar': 'annotations', 'help': "Annotation with databases files, or with tools,\nas a list of files in Parquet, VCF, BED, or keywords\n (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\n- For a Parquet/VCF/BED, use file paths\n (e.g. 'file1.parquet,file2.vcf.gz').\n- For BCFTools annotation, use keyword 'bcftools' with file paths\n (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\n- For Parquet annotation, use keyword 'parquet' with file paths\n (e.g. 'parquet:file.parquet').\n- For Annovar annotation, use keyword 'annovar' with annovar code\n (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\n- For snpeff annotation, use keyword 'snpeff' with options\n (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\n- For snpSift annotation, use keyword 'snpsift' with file paths\n (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\n- For Exomiser annotation, use keyword 'exomiser' with options as key=value\n (e.g. 'exomiser:preset=exome:transcript_source=refseq').\n- For add all availalbe databases files, use 'ALL' keyword,\n with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\n (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotations": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotations": "ALL:parquet:latest"', 'Annotation with BCFTools': '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotations": "annovar:refGene:cosmic70"', 'Annotation with snpEff (default options)': '"annotations": "snpeff"', 'Annotation with snpEff (with options)': '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', 'Annotation with snpSift': '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Exomiser with options': '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', 'Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)': '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"'}}}, 'annotation_parquet': {'metavar': 'annotation parquet', 'help': "Annotation with Parquet method, as a list of files in Parquet, VCF or BED\n (e.g. 'file1.parquet,file2.vcf.gz').\nFor add all availalbe databases files, use 'ALL' keyword,\n with filters on type and release\n (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotation_parquet": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotation_parquet": "ALL:parquet:latest"'}}}, 'annotation_bcftools': {'metavar': 'annotation BCFTools', 'help': "Annotation with BCFTools, as a list of files VCF or BED\n (e.g. 'file.vcf.gz,file.bed.gz').\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with BCFTools': '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_snpeff': {'metavar': 'annotation snpEff', 'help': "Annotation with snpEff, with options\n (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'options', 'examples': {'Annotation with snpEff (default options)': '"annotation_snpeff": ""', 'Annotation with snpEff (with options)': '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"'}}}, 'annotation_snpsift': {'metavar': 'annotation snpSift', 'help': "Annotation with snpSift, as a list of files VCF\n (e.g. 'file.vcf.gz,file.bed.gz').\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with snpSift': '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_annovar': {'metavar': 'annotation Annovar', 'help': "Annotation with Annovar, as a list of database keywords\n (e.g. 'refGene', 'refGene:cosmic70').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'keyword[:keyword]*', 'examples': {'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotation_annovar": "refGene:cosmic70"'}}}, 'annotation_exomiser': {'metavar': 'annotation Exomiser', 'help': "Annotation with Exomiser, as a list of options\n (e.g. 'preset=exome:transcript_source=refseq').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Exomiser with options': '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"'}}}, 'annotation_splice': {'metavar': 'annotation Splice', 'help': "Annotation with Splice, as a list of options\n (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Splice with options': '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"'}}}, 'annotations_update': {'help': 'Update option for annotation (Only for Parquet annotation).\nIf True, annotation fields will be removed and re-annotated.\nThese options will be applied to all annotation databases.\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Update annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'annotations_append': {'help': 'Append option for annotation (Only for Parquet annotation).\nIf True, annotation fields will be annotated only if not annotation exists for the variant.\nThese options will be applied to all annotation databases.\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Append annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'calculations': {'metavar': 'operations', 'help': "Quick calculations on genetic variants information and genotype information,\nas a list of operations (e.g. 'VARTYPE,variant_id').\nList of available calculations by default\n (unsensitive case, see doc for more information):\n VARTYPE snpeff_hgvs FINDBYPIPELINE GENOTYPECONCORDANCE BARCODE TRIO VAF VAF_STATS DP_STATS \n", 'default': None, 'type': <class 'str'>}, 'prioritizations': {'metavar': 'prioritisations', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\nBy default, all profiles available will be processed.\n", 'default': None, 'type': <class 'str'>, 'extra': {'examples': {'Prioritization profile by default': '"prioritization": "default" ', 'Prioritization profile by default and GERMLINE from Configuration JSON file': '"prioritization": "default,GERMLINE" '}}}, 'prioritization_config': {'metavar': 'prioritization config', 'help': 'Prioritization configuration JSON file (defines profiles, see doc).\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'prioritization', 'examples': {'Prioritization configuration JSON file as an option': '"prioritization_config": "prioritization_config.json" '}}}, 'profiles': {'metavar': 'profiles', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\nBy default, all profiles available will be processed.\n", 'default': None, 'type': <class 'str'>}, 'default_profile': {'metavar': 'default profile', 'help': 'Prioritization profile by default (see doc).\nDefault is the first profile in the list of prioritization profiles.\n', 'default': None, 'type': <class 'str'>}, 'pzfields': {'metavar': 'pzfields', 'help': 'Prioritization fields to provide (see doc).\nAvailable: PZScore, PZFlag, PZTags, PZComment, PZInfos\n', 'default': 'PZScore,PZFlag', 'type': <class 'str'>}, 'prioritization_score_mode': {'metavar': 'prioritization score mode', 'help': 'Prioritization Score mode (see doc).\nAvailable: HOWARD (increment score), VaRank (max score)\n', 'default': 'HOWARD', 'type': <class 'str'>, 'choices': ['HOWARD', 'VaRank'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'query_limit': {'metavar': 'query limit', 'help': 'Limit of number of row for query (only for print result, not output).\n', 'default': 10, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 10000, 'increment': 10}}}, 'query_print_mode': {'metavar': 'print mode', 'help': "Print mode of query result (only for print result, not output).\nEither None (native), 'markdown', 'tabulate' or disabled.\n", 'choices': [None, 'markdown', 'tabulate', 'disabled'], 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'explode_infos': {'help': "Explode VCF INFO/Tag into 'variants' table columns.\n", 'action': 'store_true', 'default': False}, 'explode_infos_prefix': {'metavar': 'explode infos prefix', 'help': 'Explode VCF INFO/Tag with a specific prefix.\n', 'default': '', 'type': <class 'str'>}, 'explode_infos_fields': {'metavar': 'explode infos list', 'help': "Explode VCF INFO/Tag specific fields/tags.\nKeyword `*` specify all available fields, except those already specified.\nPattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\nExamples:\n- 'HGVS,SIFT,Clinvar' (list of fields)\n- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\n- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\n- 'HGVS,.*_score,*' (1 field, scores, all other fields)\n- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\n", 'default': '*', 'type': <class 'str'>}, 'include_header': {'help': 'Include header (in VCF format) in output file.\nOnly for compatible formats (tab-delimiter format as TSV or BED).\n', 'action': 'store_true', 'default': False}, 'order_by': {'metavar': 'order by', 'help': "List of columns to sort the result-set in ascending or descending order.\nUse SQL format, and keywords ASC (ascending) and DESC (descending).\nIf a column is not available, order will not be considered.\nOrder is enable only for compatible format (e.g. TSV, CSV, JSON).\nExamples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\n", 'default': '', 'type': <class 'str'>, 'extra': {'examples': {'Order by ACMG score in descending order': '"order_by": "ACMG_score DESC" ', 'Order by PZFlag and PZScore in descending order': '"order_by": "PZFlag DESC, PZScore DESC" '}}}, 'parquet_partitions': {'metavar': 'parquet partitions', 'help': "Parquet partitioning using hive (available for any format).\nThis option is faster parallel writing, but memory consuming.\nUse 'None' (string) for NO partition but split parquet files into a folder.\nExamples: '#CHROM', '#CHROM,REF', 'None'.\n", 'default': None, 'type': <class 'str'>}, 'input_annovar': {'metavar': 'input annovar', 'help': "Input Annovar file path.\nFormat file must be a Annovar TXT file, associated with '.idx'.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output_annovar': {'metavar': 'output annovar', 'help': "Output Annovar file path.\nFormat file must be either VCF compressesd file '.vcf.gz'.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'annovar_code': {'metavar': 'Annovar code', 'help': 'Annovar code, or database name.\nUsefull to name databases columns.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'annovar_to_parquet': {'metavar': 'to parquet', 'help': 'Parquet file conversion.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.parquet)|*.parquet'}}}, 'annovar_multi_variant': {'metavar': 'Annovar multi variant', 'help': "Variant with multiple annotation lines on Annovar file.\nEither 'auto' (auto-detection), 'enable' or 'disable'.\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'annovar_reduce_memory': {'metavar': 'reduce memory', 'help': "Reduce memory option for Annovar convert,\neither 'auto' (auto-detection), 'enable' or 'disable'.\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'input_extann': {'metavar': 'input extann', 'help': 'Input Extann file path.\nFormat file must be a Extann TXT file or TSV file.\nFile need to have at least the genes column.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|All files (*)|*'}}}, 'output_extann': {'metavar': 'output extann', 'help': 'Output Extann file path.\nOutput extann file, should be BED or BED.gz.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'mode_extann': {'metavar': 'mode extann', 'help': 'Mode extann selection.\nHow to pick transcript from ncbi, keep all,\nkeep the longest, or keep the chosen one (transcript_extann).\n', 'required': False, 'default': 'longest', 'choices': ['all', 'longest', 'chosen'], 'type': <class 'str'>}, 'param_extann': {'metavar': 'param extann', 'help': "Param extann file path.\nParam containing configuration, options to replace chars and\nbedlike header description, conf vcf specs.\n(e.g. '~/howard/config/param.extann.json')\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file format|*.tsv|'}}}, 'calculation_config': {'metavar': 'calculation config', 'help': 'Calculation configuration JSON file.\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation', 'examples': {'Calculation configuration JSON file as an option': '"calculation_config": "calculation_config.json" '}}}, 'show_calculations': {'help': 'Show available calculation operations.\n', 'action': 'store_true', 'default': False}, 'hgvs_field': {'metavar': 'HGVS field', 'help': 'HGVS INFO/tag containing a list o HGVS annotations.\n', 'default': 'hgvs', 'type': <class 'str'>, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'transcripts': {'metavar': 'transcripts', 'help': 'Transcripts TSV file,\nwith Transcript in first column, optional Gene in second column.\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'trio_pedigree': {'metavar': 'trio pedigree', 'help': 'Pedigree Trio for trio inheritance calculation.\nEither a JSON file or JSON string or a list of samples\n(e.g. \'sample1,sample2,sample3\' for father, mother and child,\n \'{"father": "sample1", "mother": "sample2", "child": "sample3"}\').\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:TRIO'}}, 'family_pedigree': {'metavar': 'family pedigree', 'help': 'Pedigree family for barcode calculation on genotype.\nEither a JSON file or JSON string or a list of samples\n(e.g. \'sample1,sample2,sample3,sample4\',\n \'{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}\').\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:BARCODEFAMILY'}}, 'stats_md': {'metavar': 'stats markdown', 'help': 'Stats Output file in MarkDown format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'Markdown file (*.md)|*.md'}}, 'extra': {'examples': {'Export statistics in Markdown format': '"stats_md": "/tmp/stats.md" '}}}, 'stats_json': {'metavar': 'stats json', 'help': 'Stats Output file in JSON format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}, 'extra': {'examples': {'Export statistics in JSON format': '"stats_json": "/tmp/stats.json" '}}}, 'assembly': {'metavar': 'assembly', 'help': "Genome Assembly (e.g. 'hg19', 'hg38').\n", 'required': False, 'default': 'hg19', 'type': <class 'str'>, 'extra': {'examples': {'Default assembly for all analysis tools': '"assembly": "hg19" ', 'List of assemblies for databases download tool': '"assembly": "hg19,hg38" '}}}, 'genome': {'metavar': 'genome', 'help': "Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\n", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current/hg19/hg19.fa', 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*'}}}, 'hgvs_options': {'metavar': 'HGVS options', 'help': "Quick HGVS annotation options.\nThis option will skip all other hgvs options.\nExamples:\n- 'default' (for default options)\n- 'full_format' (for full format HGVS annotation)\n- 'use_gene=True:add_protein=true:codon_type=FULL'\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'use_gene': {'help': "Use Gene information to generate HGVS annotation\n(e.g. 'NM_152232(TAS1R2):c.231T>C')", 'action': 'store_true', 'default': False}, 'use_exon': {'help': "Use Exon information to generate HGVS annotation\n(e.g. 'NM_152232(exon2):c.231T>C').\nOnly if 'use_gene' is not enabled.\n", 'action': 'store_true', 'default': False}, 'use_protein': {'help': "Use Protein level to generate HGVS annotation\n(e.g. 'NP_689418:p.Cys77Arg').\nCan be used with 'use_exon' or 'use_gene'.\n", 'action': 'store_true', 'default': False}, 'add_protein': {'help': "Add Protein level to DNA HGVS annotation (e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\n", 'action': 'store_true', 'default': False}, 'full_format': {'help': "Generates HGVS annotation in a full format\nby using all information to generates an exhaustive annotation\n(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\nUse 'use_exon' to add exon information\n(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\n", 'action': 'store_true', 'default': False}, 'use_version': {'help': "Generates HGVS annotation with transcript version\n(e.g. 'NM_152232.1:c.231T>C').\n", 'action': 'store_true', 'default': False}, 'codon_type': {'metavar': 'Codon type', 'help': "Amino Acide Codon format type to use to generate HGVS annotation.\nAvailable:\n- '1': codon in 1 character (e.g. 'C', 'R')\n- '3': codon in 3 character (e.g. 'Cys', 'Arg')\n-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\n", 'required': False, 'default': '3', 'type': <class 'str'>, 'choices': ['1', '3', 'FULL'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'refgene': {'metavar': 'refGene', 'help': 'Path to refGene annotation file.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGene annotation file'}}}, 'refseqlink': {'metavar': 'refSeqLink', 'help': 'Path to refSeqLink annotation file.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGeneLink annotation file'}}}, 'refseq-folder': {'metavar': 'refseq folder', 'help': 'Folder containing refSeq files.\n', 'required': False, 'default': '/Users/lebechea/howard/databases/refseq/current', 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'message': 'Path to refGenefolder'}}}, 'download-genomes': {'metavar': 'genomes', 'help': "Path to genomes folder\nwith Fasta files, indexes,\nand all files generated by pygenome module.\n(e.g. '/Users/lebechea/howard/databases/genomes/current').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to genomes folder'}}}, 'download-genomes-provider': {'metavar': 'genomes provider', 'help': 'Download Genome from an external provider.\nAvailable: GENCODE, Ensembl, UCSC, NCBI.\n', 'required': False, 'default': 'UCSC', 'type': <class 'str'>, 'choices': ['GENCODE', 'Ensembl', 'UCSC', 'NCBI'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'download-genomes-contig-regex': {'metavar': 'genomes contig regex', 'help': "Regular expression to select specific chromosome\n(e.g 'chr[0-9XYM]+$').\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar': {'metavar': 'Annovar', 'help': "Path to Annovar databases\n(e.g. '/Users/lebechea/howard/databases/annovar/current').\n", 'required': False, 'type': <PathType object>, 'default': None, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Annovar databases folder'}}}, 'download-annovar-files': {'metavar': 'Annovar code', 'help': "Download Annovar databases for a list of Annovar file code (see Annovar Doc).\nUse None to donwload all available files,\nor Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\nNote that refGene will at least be downloaded,\nand only files that not already exist or changed will be downloaded.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar-url': {'metavar': 'Annovar url', 'help': 'Annovar databases URL (see Annovar Doc).\n', 'required': False, 'default': 'http://www.openbioinformatics.org/annovar/download', 'type': <class 'str'>}, 'download-snpeff': {'metavar': 'snpEff', 'help': 'Download snpEff databases within snpEff folder', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to snpEff databases folder'}}}, 'download-refseq': {'metavar': 'refSeq', 'help': "Path to refSeq databases\n(e.g. '/Users/lebechea/howard/databases/refseq/current').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to refGene files folder'}}}, 'download-refseq-url': {'metavar': 'refSeq url', 'help': "refSeq databases URL (see refSeq WebSite)\n(e.g. 'http://hgdownload.soe.ucsc.edu/goldenPath')•/n", 'required': False, 'default': 'http://hgdownload.soe.ucsc.edu/goldenPath', 'type': <class 'str'>}, 'download-refseq-prefix': {'metavar': 'refSeq prefix', 'help': 'Check existing refSeq files in refSeq folder.\n', 'required': False, 'default': 'ncbiRefSeq', 'type': <class 'str'>}, 'download-refseq-files': {'metavar': 'refSeq files', 'help': 'List of refSeq files to download.\n', 'required': False, 'default': 'ncbiRefSeq.txt,ncbiRefSeqLink.txt', 'type': <class 'str'>}, 'download-refseq-format-file': {'metavar': 'refSeq format file', 'help': "Name of refSeq file to convert in BED format\n(e.g. 'ncbiRefSeq.txt').\nProcess only if not None.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-refseq-include-utr5': {'help': "Formating BED refSeq file including 5'UTR.\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-utr3': {'help': "Formating BED refSeq file including 3'UTR.\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-chrM': {'help': "Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-non-canonical-chr': {'help': 'Formating BED refSeq file including non canonical chromosomes.\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-non-coding-transcripts': {'help': 'Formating BED refSeq file including non coding transcripts.\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-transcript-version': {'help': 'Formating BED refSeq file including transcript version.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp': {'metavar': 'dbNSFP', 'help': "Download dbNSFP databases within dbNSFP folder(e.g. '/Users/lebechea/howard/databases').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbNSFP databases folder'}}}, 'download-dbnsfp-url': {'metavar': 'dbNSFP url', 'help': "Download dbNSFP databases URL (see dbNSFP website)\n(e.g. https://dbnsfp.s3.amazonaws.com').\n", 'required': False, 'default': 'https://dbnsfp.s3.amazonaws.com', 'type': <class 'str'>}, 'download-dbnsfp-release': {'metavar': 'dnNSFP release', 'help': "Release of dbNSFP to download (see dbNSFP website)\n(e.g. '4.4a').\n", 'required': False, 'default': '4.4a'}, 'download-dbnsfp-parquet-size': {'metavar': 'dbNSFP parquet size', 'help': 'Maximum size (Mb) of data files in Parquet folder.\nParquet folder are partitioned (hive) by chromosome (sub-folder),\nwhich contain N data files.\n', 'required': False, 'default': 100, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000, 'increment': 10}}}, 'download-dbnsfp-subdatabases': {'help': 'Generate dbNSFP sub-databases.\ndbNSFP provides multiple databases which are split onto multiple columns.\nThis option create a Parquet folder for each sub-database (based on columns names).\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-parquet': {'help': 'Generate a Parquet file for each Parquet folder.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-vcf': {'help': 'Generate a VCF file for each Parquet folder.\nNeed genome FASTA file (see --download-genome).\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-no-files-all': {'help': "Not generate database Parquet/VCF file for the entire database ('ALL').\nOnly sub-databases files will be generated.\n(see '--download-dbnsfp-subdatabases').\n", 'action': 'store_true', 'default': False}, 'download-dbnsfp-add-info': {'help': 'Add INFO column (VCF format) in Parquet folder and file.\nUseful for speed up full annotation (all available columns).\nIncrease memory and space during generation of files.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-only-info': {'help': 'Add only INFO column (VCF format) in Parquet folder and file.\nUseful for speed up full annotation (all available columns).\nDecrease memory and space during generation of files.\nIncrease time for partial annotation (some available columns).\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-uniquify': {'help': 'Uniquify values within column\n(e.g. "D,D" to "D", "D,.,T" to "D,T").\nRemove transcripts information details.\nUsefull to reduce size of the database.\nIncrease memory and space during generation of files.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-row-group-size': {'metavar': 'dnNSFP row grooup size', 'help': 'Minimum number of rows in a parquet row group (see duckDB doc).\nLower can reduce memory usage and slightly increase space during generation,\nspeed up highly selective queries, slow down whole file queries (e.g. aggregations).\n', 'required': False, 'default': 100000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}}, 'download-alphamissense': {'metavar': 'AlphaMissense', 'help': 'Path to AlphaMissense databases', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Alphamissense databases folder'}}}, 'download-alphamissense-url': {'metavar': 'AlphaMissense url', 'help': "Download AlphaMissense databases URL (see AlphaMissense website)\n(e.g. 'https://storage.googleapis.com/dm_alphamissense').\n", 'required': False, 'default': 'https://storage.googleapis.com/dm_alphamissense', 'type': <class 'str'>}, 'download-exomiser': {'metavar': 'Exomiser', 'help': 'Path to Exomiser databases\n(e.g. /Users/lebechea/howard/databases/exomiser/current).\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Exomiser databases folder'}}}, 'download-exomiser-application-properties': {'metavar': 'Exomiser application properties', 'help': "Exomiser Application Properties configuration file (see Exomiser website).\nThis file contains configuration settings for the Exomiser tool.\nIf this parameter is not provided, the function will attempt to locate\nthe application properties file automatically based on the Exomiser.\nConfiguration information will be used to download expected releases (if no other parameters).\nCADD and REMM will be downloaded only if 'path' are provided.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'options': {'default_dir': '/Users/lebechea/howard/databases/exomiser/current', 'message': 'Path to Exomiser application properties file'}}}}, 'download-exomiser-url': {'metavar': 'Exomiser url', 'help': "URL where Exomiser database files can be downloaded from\n(e.g. 'http://data.monarchinitiative.org/exomiser').\n", 'required': False, 'default': 'http://data.monarchinitiative.org/exomiser', 'type': <class 'str'>}, 'download-exomiser-release': {'metavar': 'Exomiser release', 'help': 'Release of Exomiser data to download.\nIf "default", "auto", or "config", retrieve from Application Properties file.\nIf not provided (None), from Application Properties file (Exomiser data-version) \nor default \'2109\'.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-phenotype-release': {'metavar': 'Exomiser phenoptye release', 'help': 'Release of Exomiser phenotype to download.\nIf not provided (None), from Application Properties file (Exomiser Phenotype data-version)\nor Exomiser release.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-release': {'metavar': 'Exomiser remm release', 'help': 'Release of ReMM (Regulatory Mendelian Mutation) database to download.\nIf "default", "auto", or "config", retrieve from Application Properties file.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-url': {'metavar': 'Exomiser remm url', 'help': "URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\n(e.g. 'https://kircherlab.bihealth.org/download/ReMM').\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/ReMM', 'type': <class 'str'>}, 'download-exomiser-cadd-release': {'metavar': 'Exomiser cadd release', 'help': 'Release of CADD (Combined Annotation Dependent Depletion) database to download.\nIf "default", "auto", or "config", retrieve from Application Properties file.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-cadd-url': {'metavar': 'Exomiser cadd url', 'help': "URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\n(e.g. 'https://kircherlab.bihealth.org/download/CADD').\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/CADD', 'type': <class 'str'>}, 'download-exomiser-cadd-url-snv-file': {'metavar': 'Exomiser url snv file', 'help': 'Name of the file containing the SNV (Single Nucleotide Variant) data\nfor the CADD (Combined Annotation Dependent Depletion) database.\n', 'required': False, 'default': 'whole_genome_SNVs.tsv.gz', 'type': <class 'str'>}, 'download-exomiser-cadd-url-indel-file': {'metavar': 'Exomiser cadd url indel', 'help': 'Name of the file containing the INDEL (Insertion-Deletion) data\nfor the CADD (Combined Annotation Dependent Depletion) database.\n', 'required': False, 'default': 'InDels.tsv.gz', 'type': <class 'str'>}, 'download-dbsnp': {'metavar': 'dnSNP', 'help': "Path to dbSNP databases\n(e.g. '/Users/lebechea/howard/databases/exomiser/dbsnp').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbSNP databases folder'}}}, 'download-dbsnp-releases': {'metavar': 'dnSNP releases', 'help': "Release of dbSNP to download\n(e.g. 'b152', 'b152,b156').\n", 'required': False, 'default': 'b156', 'type': <class 'str'>}, 'download-dbsnp-release-default': {'metavar': 'dnSNP release default', 'help': "Default Release of dbSNP ('default' symlink)\n(e.g. 'b156').\nIf None, first release to download will be assigned as default\nonly if it does not exists.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url': {'metavar': 'dbSNP url', 'help': "URL where dbSNP database files can be downloaded from.\n(e.g. 'https://ftp.ncbi.nih.gov/snp/archive').\n", 'required': False, 'default': 'https://ftp.ncbi.nih.gov/snp/archive', 'type': <class 'str'>}, 'download-dbsnp-url-files': {'metavar': 'dbSNP url files', 'help': 'Dictionary that maps assembly names to specific dbSNP URL files.\nIt allows you to provide custom dbSNP URL files for specific assemblies\ninstead of using the default file naming convention.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url-files-prefix': {'metavar': 'dbSNP url files prefix', 'help': 'String that represents the prefix of the dbSNP file name for a specific assembly.\nIt is used to construct the full URL of the dbSNP file to be downloaded.\n', 'required': False, 'default': 'GCF_000001405', 'type': <class 'str'>}, 'download-dbsnp-assemblies-map': {'metavar': 'dbSNP assemblies map', 'help': 'dictionary that maps assembly names to their corresponding dbSNP versions.\nIt is used to construct the dbSNP file name based on the assembly name.\n', 'required': False, 'default': {'hg19': '25', 'hg38': '40'}, 'type': <class 'str'>, 'gooey': {'options': {'initial_value': '{"hg19": "25", "hg38": "40"}'}}}, 'download-dbsnp-vcf': {'help': 'Generate well-formatted VCF from downloaded file:\n- Add and filter contigs associated to assembly\n- Normalize by splitting multiallelics\n- Need genome (see --download-genome)\n', 'action': 'store_true', 'default': False}, 'download-dbsnp-parquet': {'help': 'Generate Parquet file from VCF.\n', 'action': 'store_true', 'default': False}, 'convert-hgmd': {'metavar': 'HGMD', 'help': 'Convert HGMD databases.\nFolder where the HGMD databases will be stored.\nFields in VCF, Parquet and TSV will be generated.\nIf the folder does not exist, it will be created.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser'}}, 'convert-hgmd-file': {'metavar': 'HGMD file', 'help': "File from HGMD.\nName format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser'}}, 'convert-hgmd-basename': {'metavar': 'HGMD basename', 'help': "File output basename.\nGenerated files will be prefixed by basename\n(e.g. 'HGMD_Pro_MY_RELEASE')\nBy default (None), input file name without '.vcf.gz'.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'generate-param': {'metavar': 'param', 'help': 'Parameter file (JSON) with all databases found.\nDatabases folders scanned are defined in config file.\nStructure of databases follow this structure (see doc):\n.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-description': {'metavar': 'param description', 'help': 'Description file (JSON) with all databases found.\nContains all databases with description of format, assembly, fields...\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-releases': {'metavar': 'param release', 'help': "List of database folder releases to check\n(e.g. 'current', 'latest').\n", 'required': False, 'default': 'current', 'type': <class 'str'>}, 'generate-param-formats': {'metavar': 'param formats', 'help': "List of database formats to check\n(e.g. 'parquet', 'parquet,vcf,bed,tsv').\n", 'required': False, 'default': 'parquet', 'type': <class 'str'>}, 'generate-param-bcftools': {'help': "Generate parameter JSON file with BCFTools annotation for allowed formats\n(i.e. 'vcf', 'bed').\n", 'action': 'store_true', 'default': False}, 'help_md': {'metavar': 'help markdown', 'help': 'Help Output file in MarkDown format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.md)|*.md'}}}, 'help_html': {'metavar': 'help html', 'help': 'Help Output file in HTML format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.html)|*.html'}}}, 'help_pdf': {'metavar': 'help pdf', 'help': 'Help Output file in PDF format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'PDF file (*.pdf)|*.pdf'}}}, 'help_json_input': {'metavar': 'help JSON input', 'help': 'Help input file in JSON format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'help_md_input': {'metavar': 'help MarkDown input', 'help': 'Help input file in MarkDown format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'MarkDown file (*.md)|*.md|All files (*)|*'}}}, 'code_type': {'metavar': 'example code type', 'help': "Help example code type for input JSON format\n(e.g. 'json', 'bash').\n", 'required': False, 'default': '', 'type': <class 'str'>}, 'help_json_input_title': {'metavar': 'help JSON input title', 'help': 'Help JSON input title.\n', 'required': False, 'default': 'Help', 'type': <class 'str'>}, 'genomes-folder': {'metavar': 'genomes', 'help': "Folder containing genomes.\n(e.g. '/Users/lebechea/howard/databases/genomes/current'", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current', 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/genomes/current', 'message': 'Path to genomes databases folder'}}}, 'config': {'metavar': 'config', 'help': 'Configuration JSON file defined default configuration regarding \nresources (e.g. threads, memory),\nsettings (e.g. verbosity, temporary files),\ndefault folders (e.g. for databases)\nand paths to external tools.\n', 'required': False, 'default': '{}', 'type': <class 'str'>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '{}'}}}, 'threads': {'metavar': 'threads', 'help': 'Specify the number of threads to use for processing HOWARD.\nIt determines the level of parallelism,\neither on python scripts, duckdb engine and external tools.\nIt and can help speed up the process/tool.\nUse -1 to use all available CPU/cores.\nEither non valid value is 1 CPU/core.\n', 'required': False, 'type': <class 'int'>, 'default': -1, 'gooey': {'widget': 'IntegerField', 'options': {'min': -1, 'max': 1000, 'increment': 1}}, 'extra': {'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'memory': {'metavar': 'memory', 'help': "Specify the memory to use in format FLOAT[kMG]\n(e.g. '8G', '12.42G', '1024M').\nIt determines the amount of memory for duckDB engine and external tools\n(especially for JAR programs).\nIt can help to prevent 'out of memory' failures.\nBy default (None) is 80%% of RAM (for duckDB).\n", 'required': False, 'type': <class 'str'>, 'default': None, 'extra': {'format': 'FLOAT[kMG]', 'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'chunk_size': {'metavar': 'chunk size', 'help': 'Number of records in batch to export output file.\nThe lower the chunk size, the less memory consumption.\nFor Parquet partitioning, files size will depend on the chunk size.\n', 'required': False, 'default': 1000000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}, 'extra': {'examples': {'Chunk size of 1.000.000 by default': '"chunk_size": 1000000', 'Smaller chunk size to reduce Parquet file size and memory usage': '"chunk_size": 100000'}}}, 'tmp': {'metavar': 'Temporary folder', 'help': "Temporary folder (e.g. '/tmp').\nBy default, '.tmp' for duckDB (see doc),external tools and python scripts.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser'}, 'extra': {'examples': {'# System temporary folder': '"tmp": "/tmp"', '# HOWARD work directory': '"tmp": "~/howard/tmp"', '# Current work directory': '"tmp": ".tmp"'}}}, 'duckdb_settings': {'metavar': 'duckDB settings', 'help': 'DuckDB settings (see duckDB doc) as JSON (string or file).\nThese settings have priority (see options \'threads\', \'tmp\'...).\nExamples: \'{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}\'.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'examples': {'DuckDB settings JSON file': '"duckdb_settings": "/path/to/duckdb_config.json"', 'JSON string for Time zone, temporary directory and threads for duckDB': '"duckdb_settings": {\n "TimeZone": "GMT",\n "temp_directory": "/tmp/duckdb",\n "threads": 8\n}'}}}, 'verbosity': {'metavar': 'verbosity', 'help': 'Verbosity level\nAvailable: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\n- DEBUG: Detailed information, typically of interest only when diagnosing problems.\n- INFO: Confirmation that things are working as expected.\n- WARNING: An indication that something unexpected happened.\n- ERROR: Due to a more serious problem.\n- CRITICAL: A serious error.\n- FATAL: A fatal error.\n- NOTSET: All messages.\n', 'required': False, 'choices': ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET', 'WARN', 'FATAL'], 'default': 'INFO', 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Default verbosity': '"verbosity": "INFO"', 'ERROR level (quiet mode)': '"verbosity": "ERROR"', 'For debug': '"verbosity": "DEBUG"'}}}, 'access': {'metavar': 'access mode', 'help': "Access mode to variants file or database.\nEither 'RW' for Read and Write, or 'RO' for Read Only.\n", 'default': 'RW', 'type': <class 'str'>, 'choices': ['RW', 'RO'], 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Read and Write mode': '"access": "RW"', 'Read only mode': '"access": "RO"'}}}, 'log': {'metavar': 'log', 'help': "Logs file\n(e.g. 'my.log').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}, 'extra': {'examples': {'Relative path to log file': '"log": "my.log"', '# HOWARD work directory': '"log": "~/howard/log"', 'Full path to log file': '"log": "/tmp/my.log"'}}}, 'interactive': {'help': 'Interative mose..\n', 'action': 'store_true', 'default': False}, 'quiet': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'verbose': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'debug': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'databases_folder': {'help': 'Path of HOWARD database folder.\n', 'type': <class 'str'>, 'default': '/Users/lebechea/howard/databases'}, 'database': {'help': 'Which database to update.\n', 'type': <class 'str'>, 'default': 'clinvar', 'choices': ['clinvar']}, 'update_config': {'help': 'Path of json configuration file.\n', 'type': <class 'str'>}, 'current_folder': {'help': 'Path of json configuration file.\n', 'type': <class 'str'>, 'default': 'current'}, 'add_variants_view': {'help': 'Create a sheet with all INFO fields exploded.\n', 'action': 'store_true', 'default': False}, 'add_header': {'help': 'Create a sheet with all INFO fields header descritions.\n', 'action': 'store_true', 'default': False}, 'transcripts_expected': {'metavar': 'List of transcripts (file)', 'help': 'File with a list of transcripts in first column.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'transcripts_missing': {'metavar': 'List of missing transcripts (file)', 'help': 'File with a list of missing transcripts in first column.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'genebe_use_refseq': {'help': 'Use refSeq to annotate (default).\n', 'action': 'store_true', 'default': False}, 'genebe_use_ensembl': {'help': 'Use Ensembl to annotate.\n', 'action': 'store_true', 'default': False}, 'not_flatten_consequences': {'help': 'Use exploded annotation informations.\n', 'action': 'store_true', 'default': False}, 'minimalize_info': {'help': "Minimalize INFO field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_id': {'help': "Minimalize ID field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_qual': {'help': "Minimalize QUAL field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_filter': {'help': "Minimalize FILTER field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_samples': {'help': "Minimalize samples to keep only genotypes (i.e. 'GT').\n", 'action': 'store_true', 'default': False}, 'remove_samples': {'help': 'Remove all samples to keep only variants.\n', 'action': 'store_true', 'default': False}}, 'commands_arguments': {'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \'A\' AND POS < 100000" \n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\' \n howard query --query="SELECT \\"#CHROM\\", POS, REF, ALT, \\"INFO/Interpro_domain\\" FROM \'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\' WHERE \\"INFO/Interpro_domain\\" NOT NULL ORDER BY \\"INFO/SiPhy_29way_logOdds_rankscore\\" DESC LIMIT 10" \n howard query --explode_infos --explode_infos_prefix=\'INFO/\' --query="SELECT \\"#CHROM\\", POS, REF, ALT, STRING_AGG(INFO, \';\') AS INFO FROM \'tests/databases/annotations/current/hg19/*.parquet\' GROUP BY \\"#CHROM\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \n howard query --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \'pathogenic\'" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \'./.\'" --samples="sample2" \n \n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\n howard stats --input=tests/data/example.vcf.gz \n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\'CLNSIG,SIFT,DP\' --order_by=\'CLNSIG DESC, DP DESC\' \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\'INFO/\' --explode_infos_fields=\'CLNSIG,SIFT,DP,*\' --order_by=\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\' --include_header \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomenhoward.tools.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\n', 'epilog': 'Usage examples:\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\n', 'epilog': "Usage examples:\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \n howard calculation --show_calculations \n \n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\n- annotates genetic variants with multiple annotation databases/files and tools\n- calculates and normalizes annotations\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\n- translates into various formats\n- query genetic variants and annotations\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n howard process --input=tests/data/example.vcf.gz --annotations=\'snpeff\' --calculations=\'snpeff_hgvs\' --prioritizations=\'default\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --hgvs_options=\'full_format,use_exon\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\'full_format,use_exon\' --annotations=\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\' --calculations=\'NOMEN\' --explode_infos --query=\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \'gnomad_AF\', ClinPred_score, ClinPred_pred FROM variants\' \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \n\nNotes:\n - Downloading databases can take a while, depending on network, threads and memory\n - Proxy: Beware of network and proxy configuration\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\n \n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \n \n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \n \n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\n \n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\n \n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}, 'shared_arguments': ['config', 'threads', 'memory', 'chunk_size', 'tmp', 'duckdb_settings', 'interactive', 'verbosity', 'log', 'quiet', 'verbose', 'debug']} + {'arguments': {'input': {'metavar': 'input', 'help': 'Input file path.\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output': {'metavar': 'output', 'help': 'Output file path.\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'param': {'metavar': 'param', 'help': 'Parameters JSON file (or string) defines parameters to process \nannotations, calculations, prioritizations, convertions and queries.\n', 'default': '{}', 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '', 'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'query': {'metavar': 'query', 'help': "Query in SQL format\n(e.g. 'SELECT * FROM variants LIMIT 50').\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': 'SELECT * FROM variants'}}, 'extra': {'param_section': 'query'}}, 'filter': {'metavar': 'filter', 'help': "Filter variant using SQL format\n(e.g. 'POS < 100000').\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'samples': {'metavar': 'samples', 'help': "List of samples\n(e.g. 'sample1,sample2').\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'output_query': {'metavar': 'output', 'help': 'Output Query file.\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'All files (*)|*'}}}, 'annotations': {'metavar': 'annotations', 'help': "Annotation with databases files, or with tools,\nas a list of files in Parquet, VCF, BED, or keywords\n (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\n- For a Parquet/VCF/BED, use file paths\n (e.g. 'file1.parquet,file2.vcf.gz').\n- For BCFTools annotation, use keyword 'bcftools' with file paths\n (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\n- For Parquet annotation, use keyword 'parquet' with file paths\n (e.g. 'parquet:file.parquet').\n- For Annovar annotation, use keyword 'annovar' with annovar code\n (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\n- For snpeff annotation, use keyword 'snpeff' with options\n (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\n- For snpSift annotation, use keyword 'snpsift' with file paths\n (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\n- For Exomiser annotation, use keyword 'exomiser' with options as key=value\n (e.g. 'exomiser:preset=exome:transcript_source=refseq').\n- For add all availalbe databases files, use 'ALL' keyword,\n with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\n (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotations": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotations": "ALL:parquet:latest"', 'Annotation with BCFTools': '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotations": "annovar:refGene:cosmic70"', 'Annotation with snpEff (default options)': '"annotations": "snpeff"', 'Annotation with snpEff (with options)': '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', 'Annotation with snpSift': '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Exomiser with options': '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', 'Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)': '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"'}}}, 'annotation_parquet': {'metavar': 'annotation parquet', 'help': "Annotation with Parquet method, as a list of files in Parquet, VCF or BED\n (e.g. 'file1.parquet,file2.vcf.gz').\nFor add all availalbe databases files, use 'ALL' keyword,\n with filters on type and release\n (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotation_parquet": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotation_parquet": "ALL:parquet:latest"'}}}, 'annotation_bcftools': {'metavar': 'annotation BCFTools', 'help': "Annotation with BCFTools, as a list of files VCF or BED\n (e.g. 'file.vcf.gz,file.bed.gz').\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with BCFTools': '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_snpeff': {'metavar': 'annotation snpEff', 'help': "Annotation with snpEff, with options\n (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'options', 'examples': {'Annotation with snpEff (default options)': '"annotation_snpeff": ""', 'Annotation with snpEff (with options)': '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"'}}}, 'annotation_snpsift': {'metavar': 'annotation snpSift', 'help': "Annotation with snpSift, as a list of files VCF\n (e.g. 'file.vcf.gz,file.bed.gz').\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with snpSift': '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_annovar': {'metavar': 'annotation Annovar', 'help': "Annotation with Annovar, as a list of database keywords\n (e.g. 'refGene', 'refGene:cosmic70').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'keyword[:keyword]*', 'examples': {'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotation_annovar": "refGene:cosmic70"'}}}, 'annotation_exomiser': {'metavar': 'annotation Exomiser', 'help': "Annotation with Exomiser, as a list of options\n (e.g. 'preset=exome:transcript_source=refseq').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Exomiser with options': '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"'}}}, 'annotation_splice': {'metavar': 'annotation Splice', 'help': "Annotation with Splice, as a list of options\n (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Splice with options': '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"'}}}, 'annotations_update': {'help': 'Update option for annotation (Only for Parquet annotation).\nIf True, annotation fields will be removed and re-annotated.\nThese options will be applied to all annotation databases.\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Update annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'annotations_append': {'help': 'Append option for annotation (Only for Parquet annotation).\nIf True, annotation fields will be annotated only if not annotation exists for the variant.\nThese options will be applied to all annotation databases.\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Append annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'calculations': {'metavar': 'operations', 'help': "Quick calculations on genetic variants information and genotype information,\nas a list of operations (e.g. 'VARTYPE,variant_id').\nList of available calculations by default\n (unsensitive case, see doc for more information):\n VARTYPE snpeff_hgvs FINDBYPIPELINE GENOTYPECONCORDANCE BARCODE TRIO VAF VAF_STATS DP_STATS \n", 'default': None, 'type': <class 'str'>}, 'prioritizations': {'metavar': 'prioritisations', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\nBy default, all profiles available will be processed.\n", 'default': None, 'type': <class 'str'>, 'extra': {'examples': {'Prioritization profile by default': '"prioritization": "default" ', 'Prioritization profile by default and GERMLINE from Configuration JSON file': '"prioritization": "default,GERMLINE" '}}}, 'prioritization_config': {'metavar': 'prioritization config', 'help': 'Prioritization configuration JSON file (defines profiles, see doc).\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'prioritization', 'examples': {'Prioritization configuration JSON file as an option': '"prioritization_config": "prioritization_config.json" '}}}, 'profiles': {'metavar': 'profiles', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\nBy default, all profiles available will be processed.\n", 'default': None, 'type': <class 'str'>}, 'default_profile': {'metavar': 'default profile', 'help': 'Prioritization profile by default (see doc).\nDefault is the first profile in the list of prioritization profiles.\n', 'default': None, 'type': <class 'str'>}, 'pzfields': {'metavar': 'pzfields', 'help': 'Prioritization fields to provide (see doc).\nAvailable: PZScore, PZFlag, PZTags, PZComment, PZInfos\n', 'default': 'PZScore,PZFlag', 'type': <class 'str'>}, 'prioritization_score_mode': {'metavar': 'prioritization score mode', 'help': 'Prioritization Score mode (see doc).\nAvailable: HOWARD (increment score), VaRank (max score)\n', 'default': 'HOWARD', 'type': <class 'str'>, 'choices': ['HOWARD', 'VaRank'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'query_limit': {'metavar': 'query limit', 'help': 'Limit of number of row for query (only for print result, not output).\n', 'default': 10, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 10000, 'increment': 10}}}, 'query_print_mode': {'metavar': 'print mode', 'help': "Print mode of query result (only for print result, not output).\nEither None (native), 'markdown', 'tabulate' or disabled.\n", 'choices': [None, 'markdown', 'tabulate', 'disabled'], 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'explode_infos': {'help': "Explode VCF INFO/Tag into 'variants' table columns.\n", 'action': 'store_true', 'default': False}, 'explode_infos_prefix': {'metavar': 'explode infos prefix', 'help': 'Explode VCF INFO/Tag with a specific prefix.\n', 'default': '', 'type': <class 'str'>}, 'explode_infos_fields': {'metavar': 'explode infos list', 'help': "Explode VCF INFO/Tag specific fields/tags.\nKeyword `*` specify all available fields, except those already specified.\nPattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\nExamples:\n- 'HGVS,SIFT,Clinvar' (list of fields)\n- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\n- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\n- 'HGVS,.*_score,*' (1 field, scores, all other fields)\n- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\n", 'default': '*', 'type': <class 'str'>}, 'include_header': {'help': 'Include header (in VCF format) in output file.\nOnly for compatible formats (tab-delimiter format as TSV or BED).\n', 'action': 'store_true', 'default': False}, 'order_by': {'metavar': 'order by', 'help': "List of columns to sort the result-set in ascending or descending order.\nUse SQL format, and keywords ASC (ascending) and DESC (descending).\nIf a column is not available, order will not be considered.\nOrder is enable only for compatible format (e.g. TSV, CSV, JSON).\nExamples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\n", 'default': '', 'type': <class 'str'>, 'extra': {'examples': {'Order by ACMG score in descending order': '"order_by": "ACMG_score DESC" ', 'Order by PZFlag and PZScore in descending order': '"order_by": "PZFlag DESC, PZScore DESC" '}}}, 'parquet_partitions': {'metavar': 'parquet partitions', 'help': "Parquet partitioning using hive (available for any format).\nThis option is faster parallel writing, but memory consuming.\nUse 'None' (string) for NO partition but split parquet files into a folder.\nExamples: '#CHROM', '#CHROM,REF', 'None'.\n", 'default': None, 'type': <class 'str'>}, 'input_annovar': {'metavar': 'input annovar', 'help': "Input Annovar file path.\nFormat file must be a Annovar TXT file, associated with '.idx'.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output_annovar': {'metavar': 'output annovar', 'help': "Output Annovar file path.\nFormat file must be either VCF compressesd file '.vcf.gz'.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'annovar_code': {'metavar': 'Annovar code', 'help': 'Annovar code, or database name.\nUsefull to name databases columns.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'annovar_to_parquet': {'metavar': 'to parquet', 'help': 'Parquet file conversion.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.parquet)|*.parquet'}}}, 'annovar_multi_variant': {'metavar': 'Annovar multi variant', 'help': "Variant with multiple annotation lines on Annovar file.\nEither 'auto' (auto-detection), 'enable' or 'disable'.\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'annovar_reduce_memory': {'metavar': 'reduce memory', 'help': "Reduce memory option for Annovar convert,\neither 'auto' (auto-detection), 'enable' or 'disable'.\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'input_extann': {'metavar': 'input extann', 'help': 'Input Extann file path.\nFormat file must be a Extann TXT file or TSV file.\nFile need to have at least the genes column.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|All files (*)|*'}}}, 'output_extann': {'metavar': 'output extann', 'help': 'Output Extann file path.\nOutput extann file, should be BED or BED.gz.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'mode_extann': {'metavar': 'mode extann', 'help': 'Mode extann selection.\nHow to pick transcript from ncbi, keep all,\nkeep the longest, or keep the chosen one (transcript_extann).\n', 'required': False, 'default': 'longest', 'choices': ['all', 'longest', 'chosen'], 'type': <class 'str'>}, 'param_extann': {'metavar': 'param extann', 'help': "Param extann file path.\nParam containing configuration, options to replace chars and\nbedlike header description, conf vcf specs.\n(e.g. '~/howard/config/param.extann.json')\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file format|*.tsv|'}}}, 'calculation_config': {'metavar': 'calculation config', 'help': 'Calculation configuration JSON file.\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation', 'examples': {'Calculation configuration JSON file as an option': '"calculation_config": "calculation_config.json" '}}}, 'show_calculations': {'help': 'Show available calculation operations.\n', 'action': 'store_true', 'default': False}, 'hgvs_field': {'metavar': 'HGVS field', 'help': 'HGVS INFO/tag containing a list o HGVS annotations.\n', 'default': 'hgvs', 'type': <class 'str'>, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'transcripts': {'metavar': 'transcripts', 'help': 'Transcripts TSV file,\nwith Transcript in first column, optional Gene in second column.\n', 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'trio_pedigree': {'metavar': 'trio pedigree', 'help': 'Pedigree Trio for trio inheritance calculation.\nEither a JSON file or JSON string or a list of samples\n(e.g. \'sample1,sample2,sample3\' for father, mother and child,\n \'{"father": "sample1", "mother": "sample2", "child": "sample3"}\').\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:TRIO'}}, 'family_pedigree': {'metavar': 'family pedigree', 'help': 'Pedigree family for barcode calculation on genotype.\nEither a JSON file or JSON string or a list of samples\n(e.g. \'sample1,sample2,sample3,sample4\',\n \'{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}\').\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:BARCODEFAMILY'}}, 'stats_md': {'metavar': 'stats markdown', 'help': 'Stats Output file in MarkDown format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'Markdown file (*.md)|*.md'}}, 'extra': {'examples': {'Export statistics in Markdown format': '"stats_md": "/tmp/stats.md" '}}}, 'stats_json': {'metavar': 'stats json', 'help': 'Stats Output file in JSON format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}, 'extra': {'examples': {'Export statistics in JSON format': '"stats_json": "/tmp/stats.json" '}}}, 'assembly': {'metavar': 'assembly', 'help': "Genome Assembly (e.g. 'hg19', 'hg38').\n", 'required': False, 'default': 'hg19', 'type': <class 'str'>, 'extra': {'examples': {'Default assembly for all analysis tools': '"assembly": "hg19" ', 'List of assemblies for databases download tool': '"assembly": "hg19,hg38" '}}}, 'genome': {'metavar': 'genome', 'help': "Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\n", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current/hg19/hg19.fa', 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*'}}}, 'hgvs_options': {'metavar': 'HGVS options', 'help': "Quick HGVS annotation options.\nThis option will skip all other hgvs options.\nExamples:\n- 'default' (for default options)\n- 'full_format' (for full format HGVS annotation)\n- 'use_gene=True:add_protein=true:codon_type=FULL'\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'use_gene': {'help': "Use Gene information to generate HGVS annotation\n(e.g. 'NM_152232(TAS1R2):c.231T>C')", 'action': 'store_true', 'default': False}, 'use_exon': {'help': "Use Exon information to generate HGVS annotation\n(e.g. 'NM_152232(exon2):c.231T>C').\nOnly if 'use_gene' is not enabled.\n", 'action': 'store_true', 'default': False}, 'use_protein': {'help': "Use Protein level to generate HGVS annotation\n(e.g. 'NP_689418:p.Cys77Arg').\nCan be used with 'use_exon' or 'use_gene'.\n", 'action': 'store_true', 'default': False}, 'add_protein': {'help': "Add Protein level to DNA HGVS annotation (e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\n", 'action': 'store_true', 'default': False}, 'full_format': {'help': "Generates HGVS annotation in a full format\nby using all information to generates an exhaustive annotation\n(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\nUse 'use_exon' to add exon information\n(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\n", 'action': 'store_true', 'default': False}, 'use_version': {'help': "Generates HGVS annotation with transcript version\n(e.g. 'NM_152232.1:c.231T>C').\n", 'action': 'store_true', 'default': False}, 'codon_type': {'metavar': 'Codon type', 'help': "Amino Acide Codon format type to use to generate HGVS annotation.\nAvailable:\n- '1': codon in 1 character (e.g. 'C', 'R')\n- '3': codon in 3 character (e.g. 'Cys', 'Arg')\n-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\n", 'required': False, 'default': '3', 'type': <class 'str'>, 'choices': ['1', '3', 'FULL'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'refgene': {'metavar': 'refGene', 'help': 'Path to refGene annotation file.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGene annotation file'}}}, 'refseqlink': {'metavar': 'refSeqLink', 'help': 'Path to refSeqLink annotation file.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGeneLink annotation file'}}}, 'refseq-folder': {'metavar': 'refseq folder', 'help': 'Folder containing refSeq files.\n', 'required': False, 'default': '/Users/lebechea/howard/databases/refseq/current', 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'message': 'Path to refGenefolder'}}}, 'download-genomes': {'metavar': 'genomes', 'help': "Path to genomes folder\nwith Fasta files, indexes,\nand all files generated by pygenome module.\n(e.g. '/Users/lebechea/howard/databases/genomes/current').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to genomes folder'}}}, 'download-genomes-provider': {'metavar': 'genomes provider', 'help': 'Download Genome from an external provider.\nAvailable: GENCODE, Ensembl, UCSC, NCBI.\n', 'required': False, 'default': 'UCSC', 'type': <class 'str'>, 'choices': ['GENCODE', 'Ensembl', 'UCSC', 'NCBI'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'download-genomes-contig-regex': {'metavar': 'genomes contig regex', 'help': "Regular expression to select specific chromosome\n(e.g 'chr[0-9XYM]+$').\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar': {'metavar': 'Annovar', 'help': "Path to Annovar databases\n(e.g. '/Users/lebechea/howard/databases/annovar/current').\n", 'required': False, 'type': <PathType object>, 'default': None, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Annovar databases folder'}}}, 'download-annovar-files': {'metavar': 'Annovar code', 'help': "Download Annovar databases for a list of Annovar file code (see Annovar Doc).\nUse None to donwload all available files,\nor Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\nNote that refGene will at least be downloaded,\nand only files that not already exist or changed will be downloaded.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar-url': {'metavar': 'Annovar url', 'help': 'Annovar databases URL (see Annovar Doc).\n', 'required': False, 'default': 'http://www.openbioinformatics.org/annovar/download', 'type': <class 'str'>}, 'download-snpeff': {'metavar': 'snpEff', 'help': 'Download snpEff databases within snpEff folder', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to snpEff databases folder'}}}, 'download-refseq': {'metavar': 'refSeq', 'help': "Path to refSeq databases\n(e.g. '/Users/lebechea/howard/databases/refseq/current').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to refGene files folder'}}}, 'download-refseq-url': {'metavar': 'refSeq url', 'help': "refSeq databases URL (see refSeq WebSite)\n(e.g. 'http://hgdownload.soe.ucsc.edu/goldenPath')•/n", 'required': False, 'default': 'http://hgdownload.soe.ucsc.edu/goldenPath', 'type': <class 'str'>}, 'download-refseq-prefix': {'metavar': 'refSeq prefix', 'help': 'Check existing refSeq files in refSeq folder.\n', 'required': False, 'default': 'ncbiRefSeq', 'type': <class 'str'>}, 'download-refseq-files': {'metavar': 'refSeq files', 'help': 'List of refSeq files to download.\n', 'required': False, 'default': 'ncbiRefSeq.txt,ncbiRefSeqLink.txt', 'type': <class 'str'>}, 'download-refseq-format-file': {'metavar': 'refSeq format file', 'help': "Name of refSeq file to convert in BED format\n(e.g. 'ncbiRefSeq.txt').\nProcess only if not None.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-refseq-include-utr5': {'help': "Formating BED refSeq file including 5'UTR.\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-utr3': {'help': "Formating BED refSeq file including 3'UTR.\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-chrM': {'help': "Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-non-canonical-chr': {'help': 'Formating BED refSeq file including non canonical chromosomes.\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-non-coding-transcripts': {'help': 'Formating BED refSeq file including non coding transcripts.\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-transcript-version': {'help': 'Formating BED refSeq file including transcript version.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp': {'metavar': 'dbNSFP', 'help': "Download dbNSFP databases within dbNSFP folder(e.g. '/Users/lebechea/howard/databases').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbNSFP databases folder'}}}, 'download-dbnsfp-url': {'metavar': 'dbNSFP url', 'help': "Download dbNSFP databases URL (see dbNSFP website)\n(e.g. https://dbnsfp.s3.amazonaws.com').\n", 'required': False, 'default': 'https://dbnsfp.s3.amazonaws.com', 'type': <class 'str'>}, 'download-dbnsfp-release': {'metavar': 'dnNSFP release', 'help': "Release of dbNSFP to download (see dbNSFP website)\n(e.g. '4.4a').\n", 'required': False, 'default': '4.4a'}, 'download-dbnsfp-parquet-size': {'metavar': 'dbNSFP parquet size', 'help': 'Maximum size (Mb) of data files in Parquet folder.\nParquet folder are partitioned (hive) by chromosome (sub-folder),\nwhich contain N data files.\n', 'required': False, 'default': 100, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000, 'increment': 10}}}, 'download-dbnsfp-subdatabases': {'help': 'Generate dbNSFP sub-databases.\ndbNSFP provides multiple databases which are split onto multiple columns.\nThis option create a Parquet folder for each sub-database (based on columns names).\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-parquet': {'help': 'Generate a Parquet file for each Parquet folder.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-vcf': {'help': 'Generate a VCF file for each Parquet folder.\nNeed genome FASTA file (see --download-genome).\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-no-files-all': {'help': "Not generate database Parquet/VCF file for the entire database ('ALL').\nOnly sub-databases files will be generated.\n(see '--download-dbnsfp-subdatabases').\n", 'action': 'store_true', 'default': False}, 'download-dbnsfp-add-info': {'help': 'Add INFO column (VCF format) in Parquet folder and file.\nUseful for speed up full annotation (all available columns).\nIncrease memory and space during generation of files.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-only-info': {'help': 'Add only INFO column (VCF format) in Parquet folder and file.\nUseful for speed up full annotation (all available columns).\nDecrease memory and space during generation of files.\nIncrease time for partial annotation (some available columns).\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-uniquify': {'help': 'Uniquify values within column\n(e.g. "D,D" to "D", "D,.,T" to "D,T").\nRemove transcripts information details.\nUsefull to reduce size of the database.\nIncrease memory and space during generation of files.\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-row-group-size': {'metavar': 'dnNSFP row grooup size', 'help': 'Minimum number of rows in a parquet row group (see duckDB doc).\nLower can reduce memory usage and slightly increase space during generation,\nspeed up highly selective queries, slow down whole file queries (e.g. aggregations).\n', 'required': False, 'default': 100000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}}, 'download-alphamissense': {'metavar': 'AlphaMissense', 'help': 'Path to AlphaMissense databases', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Alphamissense databases folder'}}}, 'download-alphamissense-url': {'metavar': 'AlphaMissense url', 'help': "Download AlphaMissense databases URL (see AlphaMissense website)\n(e.g. 'https://storage.googleapis.com/dm_alphamissense').\n", 'required': False, 'default': 'https://storage.googleapis.com/dm_alphamissense', 'type': <class 'str'>}, 'download-exomiser': {'metavar': 'Exomiser', 'help': 'Path to Exomiser databases\n(e.g. /Users/lebechea/howard/databases/exomiser/current).\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Exomiser databases folder'}}}, 'download-exomiser-application-properties': {'metavar': 'Exomiser application properties', 'help': "Exomiser Application Properties configuration file (see Exomiser website).\nThis file contains configuration settings for the Exomiser tool.\nIf this parameter is not provided, the function will attempt to locate\nthe application properties file automatically based on the Exomiser.\nConfiguration information will be used to download expected releases (if no other parameters).\nCADD and REMM will be downloaded only if 'path' are provided.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'options': {'default_dir': '/Users/lebechea/howard/databases/exomiser/current', 'message': 'Path to Exomiser application properties file'}}}}, 'download-exomiser-url': {'metavar': 'Exomiser url', 'help': "URL where Exomiser database files can be downloaded from\n(e.g. 'http://data.monarchinitiative.org/exomiser').\n", 'required': False, 'default': 'http://data.monarchinitiative.org/exomiser', 'type': <class 'str'>}, 'download-exomiser-release': {'metavar': 'Exomiser release', 'help': 'Release of Exomiser data to download.\nIf "default", "auto", or "config", retrieve from Application Properties file.\nIf not provided (None), from Application Properties file (Exomiser data-version) \nor default \'2109\'.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-phenotype-release': {'metavar': 'Exomiser phenoptye release', 'help': 'Release of Exomiser phenotype to download.\nIf not provided (None), from Application Properties file (Exomiser Phenotype data-version)\nor Exomiser release.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-release': {'metavar': 'Exomiser remm release', 'help': 'Release of ReMM (Regulatory Mendelian Mutation) database to download.\nIf "default", "auto", or "config", retrieve from Application Properties file.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-url': {'metavar': 'Exomiser remm url', 'help': "URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\n(e.g. 'https://kircherlab.bihealth.org/download/ReMM').\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/ReMM', 'type': <class 'str'>}, 'download-exomiser-cadd-release': {'metavar': 'Exomiser cadd release', 'help': 'Release of CADD (Combined Annotation Dependent Depletion) database to download.\nIf "default", "auto", or "config", retrieve from Application Properties file.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-cadd-url': {'metavar': 'Exomiser cadd url', 'help': "URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\n(e.g. 'https://kircherlab.bihealth.org/download/CADD').\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/CADD', 'type': <class 'str'>}, 'download-exomiser-cadd-url-snv-file': {'metavar': 'Exomiser url snv file', 'help': 'Name of the file containing the SNV (Single Nucleotide Variant) data\nfor the CADD (Combined Annotation Dependent Depletion) database.\n', 'required': False, 'default': 'whole_genome_SNVs.tsv.gz', 'type': <class 'str'>}, 'download-exomiser-cadd-url-indel-file': {'metavar': 'Exomiser cadd url indel', 'help': 'Name of the file containing the INDEL (Insertion-Deletion) data\nfor the CADD (Combined Annotation Dependent Depletion) database.\n', 'required': False, 'default': 'InDels.tsv.gz', 'type': <class 'str'>}, 'download-dbsnp': {'metavar': 'dnSNP', 'help': "Path to dbSNP databases\n(e.g. '/Users/lebechea/howard/databases/exomiser/dbsnp').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbSNP databases folder'}}}, 'download-dbsnp-releases': {'metavar': 'dnSNP releases', 'help': "Release of dbSNP to download\n(e.g. 'b152', 'b152,b156').\n", 'required': False, 'default': 'b156', 'type': <class 'str'>}, 'download-dbsnp-release-default': {'metavar': 'dnSNP release default', 'help': "Default Release of dbSNP ('default' symlink)\n(e.g. 'b156').\nIf None, first release to download will be assigned as default\nonly if it does not exists.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url': {'metavar': 'dbSNP url', 'help': "URL where dbSNP database files can be downloaded from.\n(e.g. 'https://ftp.ncbi.nih.gov/snp/archive').\n", 'required': False, 'default': 'https://ftp.ncbi.nih.gov/snp/archive', 'type': <class 'str'>}, 'download-dbsnp-url-files': {'metavar': 'dbSNP url files', 'help': 'Dictionary that maps assembly names to specific dbSNP URL files.\nIt allows you to provide custom dbSNP URL files for specific assemblies\ninstead of using the default file naming convention.\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url-files-prefix': {'metavar': 'dbSNP url files prefix', 'help': 'String that represents the prefix of the dbSNP file name for a specific assembly.\nIt is used to construct the full URL of the dbSNP file to be downloaded.\n', 'required': False, 'default': 'GCF_000001405', 'type': <class 'str'>}, 'download-dbsnp-assemblies-map': {'metavar': 'dbSNP assemblies map', 'help': 'dictionary that maps assembly names to their corresponding dbSNP versions.\nIt is used to construct the dbSNP file name based on the assembly name.\n', 'required': False, 'default': {'hg19': '25', 'hg38': '40'}, 'type': <class 'str'>, 'gooey': {'options': {'initial_value': '{"hg19": "25", "hg38": "40"}'}}}, 'download-dbsnp-vcf': {'help': 'Generate well-formatted VCF from downloaded file:\n- Add and filter contigs associated to assembly\n- Normalize by splitting multiallelics\n- Need genome (see --download-genome)\n', 'action': 'store_true', 'default': False}, 'download-dbsnp-parquet': {'help': 'Generate Parquet file from VCF.\n', 'action': 'store_true', 'default': False}, 'convert-hgmd': {'metavar': 'HGMD', 'help': 'Convert HGMD databases.\nFolder where the HGMD databases will be stored.\nFields in VCF, Parquet and TSV will be generated.\nIf the folder does not exist, it will be created.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser'}}, 'convert-hgmd-file': {'metavar': 'HGMD file', 'help': "File from HGMD.\nName format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser'}}, 'convert-hgmd-basename': {'metavar': 'HGMD basename', 'help': "File output basename.\nGenerated files will be prefixed by basename\n(e.g. 'HGMD_Pro_MY_RELEASE')\nBy default (None), input file name without '.vcf.gz'.\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'generate-param': {'metavar': 'param', 'help': 'Parameter file (JSON) with all databases found.\nDatabases folders scanned are defined in config file.\nStructure of databases follow this structure (see doc):\n.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-description': {'metavar': 'param description', 'help': 'Description file (JSON) with all databases found.\nContains all databases with description of format, assembly, fields...\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-releases': {'metavar': 'param release', 'help': "List of database folder releases to check\n(e.g. 'current', 'latest').\n", 'required': False, 'default': 'current', 'type': <class 'str'>}, 'generate-param-formats': {'metavar': 'param formats', 'help': "List of database formats to check\n(e.g. 'parquet', 'parquet,vcf,bed,tsv').\n", 'required': False, 'default': 'parquet', 'type': <class 'str'>}, 'generate-param-bcftools': {'help': "Generate parameter JSON file with BCFTools annotation for allowed formats\n(i.e. 'vcf', 'bed').\n", 'action': 'store_true', 'default': False}, 'help_md': {'metavar': 'help markdown', 'help': 'Help Output file in MarkDown format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.md)|*.md'}}}, 'help_html': {'metavar': 'help html', 'help': 'Help Output file in HTML format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.html)|*.html'}}}, 'help_pdf': {'metavar': 'help pdf', 'help': 'Help Output file in PDF format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'PDF file (*.pdf)|*.pdf'}}}, 'help_json_input': {'metavar': 'help JSON input', 'help': 'Help input file in JSON format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'help_md_input': {'metavar': 'help MarkDown input', 'help': 'Help input file in MarkDown format.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'MarkDown file (*.md)|*.md|All files (*)|*'}}}, 'code_type': {'metavar': 'example code type', 'help': "Help example code type for input JSON format\n(e.g. 'json', 'bash').\n", 'required': False, 'default': '', 'type': <class 'str'>}, 'help_json_input_title': {'metavar': 'help JSON input title', 'help': 'Help JSON input title.\n', 'required': False, 'default': 'Help', 'type': <class 'str'>}, 'genomes-folder': {'metavar': 'genomes', 'help': "Folder containing genomes.\n(e.g. '/Users/lebechea/howard/databases/genomes/current'", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current', 'type': <PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/genomes/current', 'message': 'Path to genomes databases folder'}}}, 'config': {'metavar': 'config', 'help': 'Configuration JSON file defined default configuration regarding \nresources (e.g. threads, memory),\nsettings (e.g. verbosity, temporary files),\ndefault folders (e.g. for databases)\nand paths to external tools.\n', 'required': False, 'default': '{}', 'type': <class 'str'>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '{}'}}}, 'threads': {'metavar': 'threads', 'help': 'Specify the number of threads to use for processing HOWARD.\nIt determines the level of parallelism,\neither on python scripts, duckdb engine and external tools.\nIt and can help speed up the process/tool.\nUse -1 to use all available CPU/cores.\nEither non valid value is 1 CPU/core.\n', 'required': False, 'type': <class 'int'>, 'default': -1, 'gooey': {'widget': 'IntegerField', 'options': {'min': -1, 'max': 1000, 'increment': 1}}, 'extra': {'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'memory': {'metavar': 'memory', 'help': "Specify the memory to use in format FLOAT[kMG]\n(e.g. '8G', '12.42G', '1024M').\nIt determines the amount of memory for duckDB engine and external tools\n(especially for JAR programs).\nIt can help to prevent 'out of memory' failures.\nBy default (None) is 80%% of RAM (for duckDB).\n", 'required': False, 'type': <class 'str'>, 'default': None, 'extra': {'format': 'FLOAT[kMG]', 'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'chunk_size': {'metavar': 'chunk size', 'help': 'Number of records in batch to export output file.\nThe lower the chunk size, the less memory consumption.\nFor Parquet partitioning, files size will depend on the chunk size.\n', 'required': False, 'default': 1000000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}, 'extra': {'examples': {'Chunk size of 1.000.000 by default': '"chunk_size": 1000000', 'Smaller chunk size to reduce Parquet file size and memory usage': '"chunk_size": 100000'}}}, 'tmp': {'metavar': 'Temporary folder', 'help': "Temporary folder (e.g. '/tmp').\nBy default, '.tmp' for duckDB (see doc),external tools and python scripts.\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'DirChooser'}, 'extra': {'examples': {'# System temporary folder': '"tmp": "/tmp"', '# HOWARD work directory': '"tmp": "~/howard/tmp"', '# Current work directory': '"tmp": ".tmp"'}}}, 'duckdb_settings': {'metavar': 'duckDB settings', 'help': 'DuckDB settings (see duckDB doc) as JSON (string or file).\nThese settings have priority (see options \'threads\', \'tmp\'...).\nExamples: \'{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}\'.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'examples': {'DuckDB settings JSON file': '"duckdb_settings": "/path/to/duckdb_config.json"', 'JSON string for Time zone, temporary directory and threads for duckDB': '"duckdb_settings": {\n "TimeZone": "GMT",\n "temp_directory": "/tmp/duckdb",\n "threads": 8\n}'}}}, 'verbosity': {'metavar': 'verbosity', 'help': 'Verbosity level\nAvailable: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\n- DEBUG: Detailed information, typically of interest only when diagnosing problems.\n- INFO: Confirmation that things are working as expected.\n- WARNING: An indication that something unexpected happened.\n- ERROR: Due to a more serious problem.\n- CRITICAL: A serious error.\n- FATAL: A fatal error.\n- NOTSET: All messages.\n', 'required': False, 'choices': ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET', 'WARN', 'FATAL'], 'default': 'INFO', 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Default verbosity': '"verbosity": "INFO"', 'ERROR level (quiet mode)': '"verbosity": "ERROR"', 'For debug': '"verbosity": "DEBUG"'}}}, 'access': {'metavar': 'access mode', 'help': "Access mode to variants file or database.\nEither 'RW' for Read and Write, or 'RO' for Read Only.\n", 'default': 'RW', 'type': <class 'str'>, 'choices': ['RW', 'RO'], 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Read and Write mode': '"access": "RW"', 'Read only mode': '"access": "RO"'}}}, 'log': {'metavar': 'log', 'help': "Logs file\n(e.g. 'my.log').\n", 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver'}, 'extra': {'examples': {'Relative path to log file': '"log": "my.log"', '# HOWARD work directory': '"log": "~/howard/log"', 'Full path to log file': '"log": "/tmp/my.log"'}}}, 'interactive': {'help': 'Interative mose..\n', 'action': 'store_true', 'default': False}, 'quiet': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'verbose': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'debug': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'databases_folder': {'help': 'Path of HOWARD database folder.\n', 'type': <class 'str'>, 'default': '/Users/lebechea/howard/databases'}, 'database': {'help': 'Which database to update.\n', 'type': <class 'str'>, 'default': 'clinvar', 'choices': ['clinvar']}, 'update_config': {'help': 'Path of json configuration file.\n', 'type': <class 'str'>}, 'current_folder': {'help': 'Path of json configuration file.\n', 'type': <class 'str'>, 'default': 'current'}, 'add_variants_view': {'help': 'Create a sheet with all INFO fields exploded.\n', 'action': 'store_true', 'default': False}, 'add_header': {'help': 'Create a sheet with all INFO fields header descritions.\n', 'action': 'store_true', 'default': False}, 'transcripts_expected': {'metavar': 'List of transcripts (file)', 'help': 'File with a list of transcripts in first column.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'transcripts_missing': {'metavar': 'List of missing transcripts (file)', 'help': 'File with a list of missing transcripts in first column.\n', 'required': False, 'default': None, 'type': <PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'genebe_use_refseq': {'help': 'Use refSeq to annotate (default).\n', 'action': 'store_true', 'default': False}, 'genebe_use_ensembl': {'help': 'Use Ensembl to annotate.\n', 'action': 'store_true', 'default': False}, 'not_flatten_consequences': {'help': 'Use exploded annotation informations.\n', 'action': 'store_true', 'default': False}, 'minimalize_info': {'help': "Minimalize INFO field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_id': {'help': "Minimalize ID field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_qual': {'help': "Minimalize QUAL field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_filter': {'help': "Minimalize FILTER field (e.g. '.' value).\n", 'action': 'store_true', 'default': False}, 'minimalize_samples': {'help': "Minimalize samples to keep only genotypes (i.e. 'GT').\n", 'action': 'store_true', 'default': False}, 'remove_samples': {'help': 'Remove all samples to keep only variants.\n', 'action': 'store_true', 'default': False}}, 'commands_arguments': {'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \'A\' AND POS < 100000" \n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\' \n howard query --query="SELECT \\"#CHROM\\", POS, REF, ALT, \\"INFO/Interpro_domain\\" FROM \'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\' WHERE \\"INFO/Interpro_domain\\" NOT NULL ORDER BY \\"INFO/SiPhy_29way_logOdds_rankscore\\" DESC LIMIT 10" \n howard query --explode_infos --explode_infos_prefix=\'INFO/\' --query="SELECT \\"#CHROM\\", POS, REF, ALT, STRING_AGG(INFO, \';\') AS INFO FROM \'tests/databases/annotations/current/hg19/*.parquet\' GROUP BY \\"#CHROM\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \n howard query --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \'A\' AND POS < 100000" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \'pathogenic\'" --samples="sample1,sample2" \n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \'./.\'" --samples="sample2" \n \n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'sort': {'function': 'sort', 'description': "Sort genetic variations from contig order. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Sort genetic variations file from contig order.', 'epilog': 'Usage examples:\n howard sort --input=tests/data/example.vcf.gz --output=/tmp/example.sorted.vcf.gz \n \n', 'groups': {'main': {'input': True, 'output': True}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\n howard stats --input=tests/data/example.vcf.gz \n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\'CLNSIG,SIFT,DP\' --order_by=\'CLNSIG DESC, DP DESC\' \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\'INFO/\' --explode_infos_fields=\'CLNSIG,SIFT,DP,*\' --order_by=\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\' --include_header \n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomenhoward.tools.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\n', 'epilog': 'Usage examples:\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\n', 'epilog': "Usage examples:\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \n howard calculation --show_calculations \n \n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \n \n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\n- annotates genetic variants with multiple annotation databases/files and tools\n- calculates and normalizes annotations\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\n- translates into various formats\n- query genetic variants and annotations\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n howard process --input=tests/data/example.vcf.gz --annotations=\'snpeff\' --calculations=\'snpeff_hgvs\' --prioritizations=\'default\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --hgvs_options=\'full_format,use_exon\' --explode_infos --output=/tmp/example.annotated.tsv --query=\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\' \n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\'full_format,use_exon\' --annotations=\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\' --calculations=\'NOMEN\' --explode_infos --query=\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \'gnomad_AF\', ClinPred_score, ClinPred_pred FROM variants\' \n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \n\nNotes:\n - Downloading databases can take a while, depending on network, threads and memory\n - Proxy: Beware of network and proxy configuration\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\n \n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \n \n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \n \n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\n \n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\n \n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\n \n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}, 'shared_arguments': ['config', 'threads', 'memory', 'chunk_size', 'tmp', 'duckdb_settings', 'interactive', 'verbosity', 'log', 'quiet', 'verbose', 'debug']}
    diff --git a/docs/pdoc/search.js b/docs/pdoc/search.js index 8cda73f..e94c1c7 100644 --- a/docs/pdoc/search.js +++ b/docs/pdoc/search.js @@ -1,6 +1,6 @@ window.pdocSearch = (function(){ /** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o

    \n"}, {"fullname": "howard.functions", "modulename": "howard.functions", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.commons", "modulename": "howard.functions.commons", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.commons.file_folder", "modulename": "howard.functions.commons", "qualname": "file_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/functions'"}, {"fullname": "howard.functions.commons.subfolder_plugins", "modulename": "howard.functions.commons", "qualname": "subfolder_plugins", "kind": "variable", "doc": "

    \n", "default_value": "'plugins'"}, {"fullname": "howard.functions.commons.folder_main", "modulename": "howard.functions.commons", "qualname": "folder_main", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD'"}, {"fullname": "howard.functions.commons.folder_config", "modulename": "howard.functions.commons", "qualname": "folder_config", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/config'"}, {"fullname": "howard.functions.commons.folder_user_home", "modulename": "howard.functions.commons", "qualname": "folder_user_home", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea'"}, {"fullname": "howard.functions.commons.folder_howard_home", "modulename": "howard.functions.commons", "qualname": "folder_howard_home", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard'"}, {"fullname": "howard.functions.commons.folder_plugins", "modulename": "howard.functions.commons", "qualname": "folder_plugins", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/plugins'"}, {"fullname": "howard.functions.commons.comparison_map", "modulename": "howard.functions.commons", "qualname": "comparison_map", "kind": "variable", "doc": "

    \n", "default_value": "{'gt': '>', 'gte': '>=', 'lt': '<', 'lte': '<=', 'equals': '=', 'contains': 'SIMILAR TO'}"}, {"fullname": "howard.functions.commons.code_type_map", "modulename": "howard.functions.commons", "qualname": "code_type_map", "kind": "variable", "doc": "

    \n", "default_value": "{'Integer': 0, 'String': 1, 'Float': 2, 'Flag': 3}"}, {"fullname": "howard.functions.commons.code_type_map_to_sql", "modulename": "howard.functions.commons", "qualname": "code_type_map_to_sql", "kind": "variable", "doc": "

    \n", "default_value": "{'Integer': 'INTEGER', 'String': 'VARCHAR', 'Float': 'FLOAT', 'Flag': 'VARCHAR'}"}, {"fullname": "howard.functions.commons.code_type_map_to_vcf", "modulename": "howard.functions.commons", "qualname": "code_type_map_to_vcf", "kind": "variable", "doc": "

    \n", "default_value": "{'INTEGER': 'Integer', 'VARCHAR': 'String', 'FLOAT': 'Float', 'DOUBLE': 'Integer', 'BOOLEAN': 'String'}"}, {"fullname": "howard.functions.commons.file_format_delimiters", "modulename": "howard.functions.commons", "qualname": "file_format_delimiters", "kind": "variable", "doc": "

    \n", "default_value": "{'vcf': '\\t', 'tsv': '\\t', 'csv': ',', 'psv': '|', 'bed': '\\t'}"}, {"fullname": "howard.functions.commons.file_format_allowed", "modulename": "howard.functions.commons", "qualname": "file_format_allowed", "kind": "variable", "doc": "

    \n", "default_value": "['vcf', 'tsv', 'csv', 'psv', 'bed', 'json', 'parquet', 'duckdb']"}, {"fullname": "howard.functions.commons.file_compressed_format", "modulename": "howard.functions.commons", "qualname": "file_compressed_format", "kind": "variable", "doc": "

    \n", "default_value": "['gz', 'bgz']"}, {"fullname": "howard.functions.commons.vcf_required_release", "modulename": "howard.functions.commons", "qualname": "vcf_required_release", "kind": "variable", "doc": "

    \n", "default_value": "'##fileformat=VCFv4.2'"}, {"fullname": "howard.functions.commons.vcf_required_columns", "modulename": "howard.functions.commons", "qualname": "vcf_required_columns", "kind": "variable", "doc": "

    \n", "default_value": "['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO']"}, {"fullname": "howard.functions.commons.vcf_required", "modulename": "howard.functions.commons", "qualname": "vcf_required", "kind": "variable", "doc": "

    \n", "default_value": "['##fileformat=VCFv4.2', '#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO']"}, {"fullname": "howard.functions.commons.DEFAULT_TOOLS_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_TOOLS_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/tools'"}, {"fullname": "howard.functions.commons.DEFAULT_TOOLS_BIN", "modulename": "howard.functions.commons", "qualname": "DEFAULT_TOOLS_BIN", "kind": "variable", "doc": "

    \n", "default_value": "{'bcftools': {'bin': 'bcftools'}, 'bgzip': {'bin': 'bgzip'}, 'java': {'bin': 'java'}, 'snpeff': {'jar': '~/howard/tools/snpeff/current/bin/snpEff.jar'}, 'annovar': {'perl': '~/howard/tools/annovar/current/bin/table_annovar.pl'}, 'exomiser': {'jar': '~/howard/tools/exomiser/current/bin/exomiser.jar'}, 'docker': {'bin': 'docker'}, 'splice': {'docker': {'image': 'bioinfochrustrasbourg/splice:0.2.1', 'entrypoint': '/bin/bash', 'options': None, 'command': None}}}"}, {"fullname": "howard.functions.commons.DEFAULT_ANNOVAR_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ANNOVAR_URL", "kind": "variable", "doc": "

    \n", "default_value": "'http://www.openbioinformatics.org/annovar/download'"}, {"fullname": "howard.functions.commons.DEFAULT_REFSEQ_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_REFSEQ_URL", "kind": "variable", "doc": "

    \n", "default_value": "'http://hgdownload.soe.ucsc.edu/goldenPath'"}, {"fullname": "howard.functions.commons.DEFAULT_DBNSFP_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBNSFP_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://dbnsfp.s3.amazonaws.com'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_URL", "kind": "variable", "doc": "

    \n", "default_value": "'http://data.monarchinitiative.org/exomiser'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_REMM_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_REMM_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://kircherlab.bihealth.org/download/ReMM'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_CADD_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_CADD_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://kircherlab.bihealth.org/download/CADD'"}, {"fullname": "howard.functions.commons.DEFAULT_ALPHAMISSENSE_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ALPHAMISSENSE_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://storage.googleapis.com/dm_alphamissense'"}, {"fullname": "howard.functions.commons.DEFAULT_DBSNP_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBSNP_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://ftp.ncbi.nih.gov/snp/archive'"}, {"fullname": "howard.functions.commons.DEFAULT_DATABASE_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DATABASE_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases'"}, {"fullname": "howard.functions.commons.DEFAULT_ANNOTATIONS_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ANNOTATIONS_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/annotations/current'"}, {"fullname": "howard.functions.commons.DEFAULT_GENOME_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_GENOME_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/genomes/current'"}, {"fullname": "howard.functions.commons.DEFAULT_SNPEFF_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SNPEFF_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/snpeff/current'"}, {"fullname": "howard.functions.commons.DEFAULT_ANNOVAR_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ANNOVAR_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/annovar/current'"}, {"fullname": "howard.functions.commons.DEFAULT_REFSEQ_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_REFSEQ_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/refseq/current'"}, {"fullname": "howard.functions.commons.DEFAULT_DBNSFP_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBNSFP_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/dbnsfp/current'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/exomiser/current'"}, {"fullname": "howard.functions.commons.DEFAULT_DBSNP_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBSNP_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/exomiser/dbsnp'"}, {"fullname": "howard.functions.commons.DEFAULT_SPLICE_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SPLICE_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/splice'"}, {"fullname": "howard.functions.commons.DEFAULT_SPLICEAI_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SPLICEAI_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/spliceai'"}, {"fullname": "howard.functions.commons.DEFAULT_SPIP_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SPIP_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/spip'"}, {"fullname": "howard.functions.commons.DEFAULT_DATA_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DATA_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/data'"}, {"fullname": "howard.functions.commons.DEFAULT_ASSEMBLY", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ASSEMBLY", "kind": "variable", "doc": "

    \n", "default_value": "'hg19'"}, {"fullname": "howard.functions.commons.DUCKDB_EXTENSION", "modulename": "howard.functions.commons", "qualname": "DUCKDB_EXTENSION", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/functions/duckdb_extension'"}, {"fullname": "howard.functions.commons.MACHIN_LIST", "modulename": "howard.functions.commons", "qualname": "MACHIN_LIST", "kind": "variable", "doc": "

    \n", "default_value": "{'amd64': 'amd64', 'arm64': 'arm64'}"}, {"fullname": "howard.functions.commons.BCFTOOLS_FORMAT", "modulename": "howard.functions.commons", "qualname": "BCFTOOLS_FORMAT", "kind": "variable", "doc": "

    \n", "default_value": "['vcf', 'bed']"}, {"fullname": "howard.functions.commons.CODE_TYPE_MAP", "modulename": "howard.functions.commons", "qualname": "CODE_TYPE_MAP", "kind": "variable", "doc": "

    \n", "default_value": "{'Integer': 0, 'String': 1, 'Float': 2, 'Flag': 3}"}, {"fullname": "howard.functions.commons.GENOTYPE_MAP", "modulename": "howard.functions.commons", "qualname": "GENOTYPE_MAP", "kind": "variable", "doc": "

    \n", "default_value": "{None: '.', -1: 'A', -2: 'G', -3: 'R'}"}, {"fullname": "howard.functions.commons.DTYPE_LIMIT_AUTO", "modulename": "howard.functions.commons", "qualname": "DTYPE_LIMIT_AUTO", "kind": "variable", "doc": "

    \n", "default_value": "10000"}, {"fullname": "howard.functions.commons.DEFAULT_CHUNK_SIZE", "modulename": "howard.functions.commons", "qualname": "DEFAULT_CHUNK_SIZE", "kind": "variable", "doc": "

    \n", "default_value": "1048576"}, {"fullname": "howard.functions.commons.LOG_FORMAT", "modulename": "howard.functions.commons", "qualname": "LOG_FORMAT", "kind": "variable", "doc": "

    \n", "default_value": "'#[%(asctime)s] %(levelname)7s| %(message)s'"}, {"fullname": "howard.functions.commons.log_color", "modulename": "howard.functions.commons", "qualname": "log_color", "kind": "variable", "doc": "

    \n", "default_value": "None"}, {"fullname": "howard.functions.commons.prompt_mesage", "modulename": "howard.functions.commons", "qualname": "prompt_mesage", "kind": "variable", "doc": "

    \n", "default_value": "'#[{}] |'"}, {"fullname": "howard.functions.commons.prompt_color", "modulename": "howard.functions.commons", "qualname": "prompt_color", "kind": "variable", "doc": "

    \n", "default_value": "None"}, {"fullname": "howard.functions.commons.prompt_line_color", "modulename": "howard.functions.commons", "qualname": "prompt_line_color", "kind": "variable", "doc": "

    \n", "default_value": "'green'"}, {"fullname": "howard.functions.commons.remove_if_exists", "modulename": "howard.functions.commons", "qualname": "remove_if_exists", "kind": "function", "doc": "

    The function removes a file if it exists at the specified filepath(s).

    \n\n
    Parameters
    \n\n
      \n
    • filepaths: A list of file paths that you want to check for existence and remove if they exist
    • \n
    \n", "signature": "(filepaths: list) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.commons.set_log_level", "modulename": "howard.functions.commons", "qualname": "set_log_level", "kind": "function", "doc": "

    It sets the log level of the Python logging module

    \n\n
    Parameters
    \n\n
      \n
    • verbosity: The level of verbosity
    • \n
    \n", "signature": "(verbosity: str, log_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.split_interval", "modulename": "howard.functions.commons", "qualname": "split_interval", "kind": "function", "doc": "

    It takes a start and end value, and either a step size or a number of cuts, and returns a list of\nvalues that split the interval into equal-sized pieces

    \n\n
    Parameters
    \n\n
      \n
    • start: the start of the interval
    • \n
    • end: the end of the interval
    • \n
    • step: the step size between each cut
    • \n
    • ncuts: number of cuts to make
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of numbers.

    \n
    \n", "signature": "(start: int, end: int, step: int = None, ncuts: int = None):", "funcdef": "def"}, {"fullname": "howard.functions.commons.merge_regions", "modulename": "howard.functions.commons", "qualname": "merge_regions", "kind": "function", "doc": "

    It takes a list of genomic regions and returns a list of genomic regions where overlapping regions\nhave been merged

    \n\n
    Parameters
    \n\n
      \n
    • regions: A list of tuples representing genomic regions with the values of the chrom, start\nand end columns
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of tuples representing the merged regions with the values of the columns chrom,\n start and end.

    \n
    \n", "signature": "(regions: list) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.create_where_clause", "modulename": "howard.functions.commons", "qualname": "create_where_clause", "kind": "function", "doc": "

    It takes a list of merged regions and returns a SQL WHERE clause that can be used to filter variants\nin a SQL table

    \n\n
    Parameters
    \n\n
      \n
    • merged_regions: a list of tuples representing the merged regions with the values of the\nchrom, start and end columns
    • \n
    • table: The name of the table to query, defaults to variants (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    A dictionary with the chromosome as key and the where clause as value.

    \n
    \n", "signature": "(merged_regions: list, table: str = 'variants') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.command", "modulename": "howard.functions.commons", "qualname": "command", "kind": "function", "doc": "

    It runs a command in the shell and waits for it to finish

    \n\n
    Parameters
    \n\n
      \n
    • command: The command to run
    • \n
    \n\n
    Returns
    \n\n
    \n

    The return value is the exit status of the process.

    \n
    \n", "signature": "(command: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.run_parallel_commands", "modulename": "howard.functions.commons", "qualname": "run_parallel_commands", "kind": "function", "doc": "

    It takes a list of commands and a number of threads, and runs the commands in parallel

    \n\n
    Parameters
    \n\n
      \n
    • commands: a list of commands to run
    • \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of results from the commands.

    \n
    \n", "signature": "(commands: list, threads: int = 1) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.run_parallel_functions", "modulename": "howard.functions.commons", "qualname": "run_parallel_functions", "kind": "function", "doc": "

    It takes a list of functions and a number of threads, and runs the functions in parallel using the\nnumber of threads specified

    \n\n
    Parameters
    \n\n
      \n
    • functions: a list of functions to run in parallel
    • \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of multiprocessing.pool.ApplyResult objects.

    \n
    \n", "signature": "(functions: list, threads: int = 1) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.example_function", "modulename": "howard.functions.commons", "qualname": "example_function", "kind": "function", "doc": "

    example_function takes in a number and a word and returns a list of the number and the word

    \n\n
    Parameters
    \n\n
      \n
    • num: a number
    • \n
    • word: a string
    • \n
    \n\n
    Returns
    \n\n
    \n

    [num, word]

    \n
    \n", "signature": "(num, word):", "funcdef": "def"}, {"fullname": "howard.functions.commons.find", "modulename": "howard.functions.commons", "qualname": "find", "kind": "function", "doc": "

    It recursively walks the directory tree starting at the given path, and returns the first file it\nfinds with the given name

    \n\n
    Parameters
    \n\n
      \n
    • name: The name of the file you're looking for
    • \n
    • path: The path to search for the file
    • \n
    \n\n
    Returns
    \n\n
    \n

    The path to the file.

    \n
    \n", "signature": "(name: str, path: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_all", "modulename": "howard.functions.commons", "qualname": "find_all", "kind": "function", "doc": "

    \"Walk the directory tree starting at path, and for each regular file with the name name, append its\nfull path to the result list.\"

    \n\n

    The os.walk function is a generator that yields a 3-tuple containing the name of a directory, a list\nof its subdirectories, and a list of the files in that directory. The name of the directory is a\nstring, and the lists of subdirectories and files are lists of strings

    \n\n
    Parameters
    \n\n
      \n
    • name: The name of the file you're looking for
    • \n
    • path: The path to search in
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of all the files in the directory that have the name \"name\"

    \n
    \n", "signature": "(name: str, path: str) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_genome", "modulename": "howard.functions.commons", "qualname": "find_genome", "kind": "function", "doc": "

    The find_genome function checks if a genome file exists at the specified path, and if not, it\ntries to find it using the provided assembly name or file name.

    \n\n
    Parameters
    \n\n
      \n
    • genome_path: The path to the genome file
    • \n
    • assembly: The assembly parameter is a string that represents the name of the genome\nassembly. It is used to search for the genome file with the specified assembly name in the\ngenome_dir directory. If a genome file with the assembly name is found, its path is returned
    • \n
    • file: The file parameter is the name of the genome file that you want to find
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path to the genome file.

    \n
    \n", "signature": "(genome_path: str, assembly: str = None, file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_file_prefix", "modulename": "howard.functions.commons", "qualname": "find_file_prefix", "kind": "function", "doc": "

    The function find_file_prefix is used to find a specific file based on input parameters such as\ninput file, folder, and assembly.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The input file is the file that you want to find the prefix for. It can be a file\npath or just the file name if it is in the current directory
    • \n
    • folder: The folder parameter is a string that represents the directory where the file is\nlocated
    • \n
    • assembly: The \"assembly\" parameter is a string that represents the assembly version of the\nfile you are looking for. It is used to search for files with the specific assembly version in their\nfilename
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path of the output file.

    \n
    \n", "signature": "(\tinput_file: str = None,\tprefix: str = None,\tfolder: str = None,\tassembly: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_nomen", "modulename": "howard.functions.commons", "qualname": "find_nomen", "kind": "function", "doc": "

    The function find_nomen takes a HGVS string and a list of transcripts, parses the HGVS string, and\nreturns a dictionary with the best NOMEN based on specified patterns.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is a DataFrame containing the HGVS strings to parse. It seems like\nthe function is designed to process multiple HGVS strings at once. You can pass this DataFrame to\nthe function for processing. If you have a specific DataFrame that you would like to use, please\nprovide it
    • \n
    • transcript: The transcript parameter in the find_nomen function is used to specify a\nsingle transcript to use for ranking. It is a string that represents the transcript. If provided,\nthis transcript will be used along with the transcripts from the transcripts list to determine the\nbest NOMEN
    • \n
    • transcripts: Transcripts are a list of transcripts to use for ranking in the find_nomen\nfunction. You can provide a list of transcripts that you want to consider when constructing the\nNOMEN for a given HGVS string
    • \n
    • transcripts_source_order: The transcripts_source_order parameter is a list that specifies\nthe order in which different sources of transcripts should be considered. In the provided function,\nthe default order is [\"column\", \"file\"], which means that transcripts from a column in the input\ndata will be considered first, followed by
    • \n
    • pattern: The pattern parameter in the find_nomen function is used to specify the format\nin which the NOMEN should be constructed. By default, the pattern is set to\n\"GNOMEN:TNOMEN:ENOMEN:CNOMEN:RNOMEN:NNOMEN
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function find_nomen returns a dictionary containing the following keys:

    \n \n
      \n
    • NOMEN
    • \n
    • CNOMEN
    • \n
    • RNOMEN
    • \n
    • NNOMEN
    • \n
    • PNOMEN
    • \n
    • TVNOMEN
    • \n
    • TNOMEN
    • \n
    • TPVNOMEN
    • \n
    • TPNOMEN
    • \n
    • VNOMEN
    • \n
    • ENOMEN
    • \n
    • GNOMEN
    • \n
    \n
    \n", "signature": "(\thgvs: pandas.core.frame.DataFrame,\ttranscript: str = None,\ttranscripts: list = [],\ttranscripts_source_order: list = None,\tpattern=None,\ttranscripts_len: int = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.explode_annotation_format", "modulename": "howard.functions.commons", "qualname": "explode_annotation_format", "kind": "function", "doc": "

    The explode_annotation_format function takes an annotation string and formats it into a specified\noutput format with optional customization parameters.

    \n\n
    Parameters
    \n\n
      \n
    • annotation: The annotation parameter is a string containing multiple annotations separated\nby commas and pipe symbols. Each annotation consists of different fields separated by pipe symbols.\nFor example, an annotation string could look like this: \"A|B|C,D|E|F\"
    • \n
    • uniquify: The uniquify parameter in the explode_annotation_format function is a boolean\nflag that determines whether to keep only unique values for each annotation field. If set to True,\nonly unique values will be retained for each field before joining them together. If set to False,\nall values, defaults to False
    • \n
    • output_format: The output_format parameter specifies the format in which you want the\noutput to be generated. The function supports two output formats: \"fields\" and \"JSON\". If you choose\n\"fields\", the output will be a string with annotations separated by semicolons. If you choose\n\"JSON\", the, defaults to fields
    • \n
    • prefix: The prefix parameter in the explode_annotation_format function is used to specify\nthe prefix that will be added to each annotation field when generating the exploded annotation\nstring. In the provided function, the default prefix value is set to \"ANN_\". You can customize this\nprefix value to suit your specific, defaults to ANN_
    • \n
    • header: The header parameter in the explode_annotation_format function is a list of\ncolumn names that will be used to create a DataFrame from the input annotation string. Each element\nin the header list corresponds to a specific field in the annotation data
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function explode_annotation_format returns a string that contains the exploded and\n formatted annotation information based on the input parameters provided. The format of the returned\n string depends on the output_format parameter. If output_format is set to \"JSON\", the function\n returns a JSON-formatted string. Otherwise, it returns a string with annotations formatted based on\n the other parameters such as `uniquify

    \n
    \n", "signature": "(\tannotation: str = '',\tuniquify: bool = False,\toutput_format: str = 'fields',\tprefix: str = 'ANN_',\theader: list = ['Allele', 'Annotation', 'Annotation_Impact', 'Gene_Name', 'Gene_ID', 'Feature_Type', 'Feature_ID', 'Transcript_BioType', 'Rank', 'HGVS.c', 'HGVS.p', 'cDNA.pos / cDNA.length', 'CDS.pos / CDS.length', 'AA.pos / AA.length', 'Distance', 'ERRORS / WARNINGS / INFO']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_snpeff_hgvs", "modulename": "howard.functions.commons", "qualname": "extract_snpeff_hgvs", "kind": "function", "doc": "

    This function extracts HGVS annotations from a given snpEff annotation string and returns them as a\ncomma-separated string.

    \n\n
    Parameters
    \n\n
      \n
    • snpeff: The snpeff parameter is a string that contains annotations for genetic variants in\na specific format. It is used as input to extract HGVS notation for the variants
    • \n
    • header: The header parameter is a list of column names that will be used to create a pandas\nDataFrame from the snpeff string input. It is used to extract specific information from the snpeff\nannotations
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that contains the HGVS annotations extracted from the input SNPEff annotation\n string.

    \n
    \n", "signature": "(\tsnpeff: str = '',\theader: list = ['Allele', 'Annotation', 'Annotation_Impact', 'Gene_Name', 'Gene_ID', 'Feature_Type', 'Feature_ID', 'Transcript_BioType', 'Rank', 'HGVS.c', 'HGVS.p', 'cDNA.pos / cDNA.length', 'CDS.pos / CDS.length', 'AA.pos / AA.length', 'Distance', 'ERRORS / WARNINGS / INFO']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.explode_snpeff_ann", "modulename": "howard.functions.commons", "qualname": "explode_snpeff_ann", "kind": "function", "doc": "

    The explode_snpeff_ann function takes a string of SNPEff annotations, splits and processes them\nbased on specified parameters, and returns the processed annotations in a specified output format.

    \n\n
    Parameters
    \n\n
      \n
    • snpeff: The snpeff parameter is a string containing annotations separated by commas. Each\nannotation is further divided into different fields separated by pipes (|)
    • \n
    • uniquify: The uniquify parameter in the explode_snpeff_ann function is a boolean flag\nthat determines whether to keep only unique values for each annotation field or not. If uniquify\nis set to True, only unique values will be kept for each annotation field. If, defaults to False
    • \n
    • output_format: The output_format parameter in the explode_snpeff_ann function specifies\nthe format in which the output will be generated. The function supports two output formats: \"fields\"\nand \"JSON\", defaults to fields
    • \n
    • prefix: The prefix parameter in the explode_snpeff_ann function is used to specify the\nprefix that will be added to each annotation field in the output. For example, if the prefix is set\nto \"ANN_\", then the output annotations will be formatted as \"ANN_Annotation=example_annotation,\ndefaults to ANN_
    • \n
    • header: The header parameter in the explode_snpeff_ann function is a list of strings that\nrepresent the column names or fields for the output data. These strings include information such as\nallele, annotation, gene name, gene ID, feature type, transcript biotype, and various other details\nrelated
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function explode_snpeff_ann returns a string that contains the exploded and formatted\n SNPEff annotations based on the input parameters provided. The specific format of the returned\n string depends on the output_format, uniquify, and other parameters specified in the function.

    \n
    \n", "signature": "(\tsnpeff: str = '',\tuniquify: bool = False,\toutput_format: str = 'fields',\tprefix: str = 'ANN_',\theader: list = ['Allele', 'Annotation', 'Annotation_Impact', 'Gene_Name', 'Gene_ID', 'Feature_Type', 'Feature_ID', 'Transcript_BioType', 'Rank', 'HGVS.c', 'HGVS.p', 'cDNA.pos / cDNA.length', 'CDS.pos / CDS.length', 'AA.pos / AA.length', 'Distance', 'ERRORS / WARNINGS / INFO']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_index", "modulename": "howard.functions.commons", "qualname": "get_index", "kind": "function", "doc": "

    The function returns the index of a given value in a list, or -1 if the value is not in the list.

    \n\n
    Parameters
    \n\n
      \n
    • value: The value to search for in the list
    • \n
    • values: The parameter \"values\" is a list of values in which we want to find the index of a\nspecific value. It is an optional parameter with a default value of an empty list
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_index returns the index of the first occurrence of the value parameter\n in the values list. If the value parameter is not found in the values list, the function\n returns -1.

    \n
    \n", "signature": "(value, values: list = []) -> int:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_file_format", "modulename": "howard.functions.commons", "qualname": "get_file_format", "kind": "function", "doc": "

    It takes a filename and returns the file format

    \n\n
    Parameters
    \n\n
      \n
    • filename: the name of the file you want to get the format of
    • \n
    \n\n
    Returns
    \n\n
    \n

    The file format of the file.

    \n
    \n", "signature": "(filename: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.findbypipeline", "modulename": "howard.functions.commons", "qualname": "findbypipeline", "kind": "function", "doc": "

    This function takes a dataframe and a list of samples, and returns the number of pipelines found in\nthe samples that have a non-null GT value.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing genetic variant information
    • \n
    • samples: The samples parameter is a list of strings representing the names of the\nsamples/pipelines to be searched for in the input dataframe df
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string in the format of \"nb_pipeline_find/nb_pipeline\", where nb_pipeline_find is the\n number of pipelines in the input list samples that have a non-null GT value in the input dataframe\n df, and nb_pipeline is the total number of pipelines in the input list samples. If the input list\n samples is empty, the function returns \"0/0\".

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotypeconcordance", "modulename": "howard.functions.commons", "qualname": "genotypeconcordance", "kind": "function", "doc": "

    The function checks the genotype concordance of a given list of samples in a dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing genetic variant information, including genotype\ninformation for each sample/pipeline
    • \n
    • samples: The parameter \"samples\" is a list of sample/pipeline names that are present in the\ninput dataframe \"df\". These samples/pipelines have genotype information that will be used to\ncalculate genotype concordance
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that indicates whether the genotypes of the specified samples in the input\n dataframe are concordant or not. The string is either \"TRUE\" or \"FALSE\", depending on whether all\n the specified samples have the same genotype or not.

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotype_compression", "modulename": "howard.functions.commons", "qualname": "genotype_compression", "kind": "function", "doc": "

    The function takes a genotype string, replaces dots with zeros, removes non-digit characters, sorts\nand removes duplicates, and returns the compressed genotype string.

    \n\n
    Parameters
    \n\n
      \n
    • genotype: The input genotype as a string. It is a DNA sequence that contains genetic\ninformation
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function genotype_compression returns a compressed version of the input genotype\n string. The compressed string has all dots replaced with 0s, all non-digit characters removed, and\n duplicates removed and sorted. The compressed string is returned as a string.

    \n
    \n", "signature": "(genotype: str = '') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotype_barcode", "modulename": "howard.functions.commons", "qualname": "genotype_barcode", "kind": "function", "doc": "

    This function takes a genotype string and compresses it, then returns a barcode string based on the\nlength and content of the compressed genotype.

    \n\n
    Parameters
    \n\n
      \n
    • genotype: The genotype parameter is a string that represents a genetic sequence or code
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function genotype_barcode returns a string representing the barcode for a given\n genotype. The barcode can be \"0\", \"1\", \"2\", or \"?\" depending on the length and content of the\n compressed genotype string.

    \n
    \n", "signature": "(genotype: str = '') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.barcode", "modulename": "howard.functions.commons", "qualname": "barcode", "kind": "function", "doc": "

    Generates a barcode based on the genotype of the specified samples.

    \n\n
    Parameters
    \n\n
      \n
    • df: A pandas DataFrame containing the genetic data.

    • \n
    • samples: A list of sample names to use for generating the barcode.

    • \n
    \n\n
    Returns
    \n\n
    \n

    A barcode string based on the genotype of the specified samples.

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.trio", "modulename": "howard.functions.commons", "qualname": "trio", "kind": "function", "doc": "

    The function trio(df, samples:list = []) determines the type of variant (denovo, dominant, or\nrecessive) in a trio based on the barcode generated from the samples.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing genetic variant information
    • \n
    • samples: A list of sample IDs to be used in the analysis
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function trio returns a string that represents the type of variant in a trio\n analysis, which can be \"denovo\", \"dominant\", \"recessive\", or \"unknown\".

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.vaf_normalization", "modulename": "howard.functions.commons", "qualname": "vaf_normalization", "kind": "function", "doc": "

    This function takes in a row of data and a sample name, extracts the genotype information for that\nsample, calculates the variant allele frequency (VAF) from the genotype information, and adds the\nVAF to the genotype information before returning it.

    \n\n
    Parameters
    \n\n
      \n
    • row: The input row of a pandas DataFrame containing information about a genetic variant
    • \n
    • sample: The parameter \"sample\" is a string representing the name of the sample for which we\nwant to calculate the VAF (Variant Allele Frequency). It is used to extract the genotype information\nfor that particular sample from the input row
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the genotype information for a given sample with an added \"VAF\"\n field that represents the variant allele frequency.

    \n
    \n", "signature": "(row, sample: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotype_stats", "modulename": "howard.functions.commons", "qualname": "genotype_stats", "kind": "function", "doc": "

    This function computes statistics on a specified information field (e.g. VAF) for a given set of\nsamples in a pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing variant information
    • \n
    • samples: The list of sample/pipeline names for which to compute the genotype statistics. If\nempty, the function will return an empty dictionary
    • \n
    • info: The parameter \"info\" is a string that represents the type of information being analyzed\nin the function. In this case, it is used to compute statistics on the Variant Allele Frequency\n(VAF) of genetic variants, defaults to VAF
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary containing statistics related to a specified information field (default is\n \"VAF\") for a given set of samples in a pandas DataFrame. The statistics include the number of\n values, a list of values, minimum value, maximum value, mean, median, and standard deviation. If no\n samples are specified, an empty dictionary is returned.

    \n
    \n", "signature": "(df, samples: list = [], info: str = 'VAF'):", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_file", "modulename": "howard.functions.commons", "qualname": "extract_file", "kind": "function", "doc": "

    The function extracts a compressed file in .zip or .gz format based on the file path provided.

    \n\n
    Parameters
    \n\n
      \n
    • file_path: The file path parameter is a string that represents the path to a file that needs\nto be extracted. The function checks if the file has a \".zip\" or \".gz\" extension and extracts it\naccordingly
    • \n
    • path: The path parameter is an optional string that represents the directory where the\nextracted files will be saved. If no path is provided, the function will use the directory of the\nfile_path as the extraction destination
    • \n
    • threads: The threads parameter is an optional parameter that specifies the number of\nthreads to use for extraction. By default, it is set to 1, meaning the extraction will be done using\na single thread, defaults to 1
    • \n
    \n", "signature": "(file_path: str, path: str = None, threads: int = 1):", "funcdef": "def"}, {"fullname": "howard.functions.commons.download_file", "modulename": "howard.functions.commons", "qualname": "download_file", "kind": "function", "doc": "

    The download_file function is a Python function that downloads a file from a given URL and saves\nit to a specified destination file path in chunks.

    \n\n
    Parameters
    \n\n
      \n
    • url: The url parameter is the URL of the file you want to download. It should be a string\nthat represents the complete URL, including the protocol (e.g., \"http://example.com/file.txt\")
    • \n
    • dest_file_path: The dest_file_path parameter is the path where the downloaded file will be\nsaved. It should be a string representing the file path, including the file name and extension. For\nexample, if you want to save the file as \"myfile.txt\" in the current directory, you can set `dest
    • \n
    • chunk_size: The chunk_size parameter determines the size of each chunk of data that is\ndownloaded at a time. In this case, the default value is set to 1 MB, which means that the file will\nbe downloaded in chunks of 1 MB at a time. This parameter can be adjusted according to
    • \n
    • try_aria: The try_aria parameter is a boolean value that determines whether to use the\nAria2c command-line tool for downloading the file. If set to True, the function will attempt to\ndownload the file using Aria2c. If set to False, the function will use the, defaults to True
    • \n
    • aria_async_dns: The aria_async_dns parameter is a boolean value that determines whether to\nuse asynchronous DNS resolution with Aria2c. If set to True, Aria2c will use asynchronous DNS\nresolution, which can improve download performance. If set to False, Aria2c will use synchronous,\ndefaults to False
    • \n
    • threads: The threads parameter specifies the number of threads to be used for downloading\nthe file. It determines the number of simultaneous connections that will be made to download the\nfile. By default, it is set to 1, which means that only one connection will be made at a time.\nIncreasing the value, defaults to 1
    • \n
    • quiet: The quiet parameter is a boolean value that determines whether to suppress the\noutput of the download process. If set to True, the output will be suppressed. If set to False,\nthe output will be displayed. By default, it is set to True, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the file was successfully downloaded and saved to the\n specified destination file path.

    \n
    \n", "signature": "(\turl: str,\tdest_file_path: str,\tchunk_size: int = 1048576,\ttry_aria: bool = True,\taria_async_dns: bool = False,\tthreads: int = 1,\tquiet: bool = True):", "funcdef": "def"}, {"fullname": "howard.functions.commons.whereis_bin", "modulename": "howard.functions.commons", "qualname": "whereis_bin", "kind": "function", "doc": "

    \n", "signature": "(bin_file: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_bin", "modulename": "howard.functions.commons", "qualname": "get_bin", "kind": "function", "doc": "

    The get_bin function retrieves the path to a specified binary file from a configuration dictionary\nor searches for it in the file system if it is not specified in the configuration.

    \n\n
    Parameters
    \n\n
      \n
    • bin: The bin parameter is a string or a pattern that represents the name of the binary file (e.g.,\nsnpEff.jar, exomiser-cli*.jar) that you want to retrieve the path for
    • \n
    • tool: The tool parameter is a string that represents the name of the tool. It is used to\nretrieve the path to the tool's binary file
    • \n
    • bin_type: The bin_type parameter is a string that specifies the type of binary file to\nsearch for in the config dict (e.g., jar, bin). In this case, the default value is \"bin\". A value \"jar\" indicates that the function is searching\nfor a JAR file. Defaults to bin
    • \n
    • config: A dictionary containing configuration information for the snpEff tool, including the\npath to the snpEff jar file. If no configuration is provided, an empty dictionary is used
    • \n
    • default_folder: The default_folder parameter is a string that represents the default folder\nwhere the tool binaries are located. If the bin_file is not found in the configuration dictionary\nor in the file system, the function will search for it in this default folder
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path to the snpEff.jar file. If the file is not found, it returns None.

    \n
    \n", "signature": "(\tbin: str = None,\ttool: str = None,\tbin_type: str = 'bin',\tconfig: dict = {},\tdefault_folder: str = '/Users/lebechea/howard/tools',\toutput_type: str = 'bin') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_bin_command", "modulename": "howard.functions.commons", "qualname": "get_bin_command", "kind": "function", "doc": "

    The function get_bin_command generates a command based on the tool type (jar, java, docker) and\nspecified parameters.

    \n\n
    Parameters
    \n\n
      \n
    • bin: The bin parameter in the get_bin_command function is used to specify the binary\nexecutable file that you want to run. It is a string that represents the path or name of the binary\nfile. If you provide this parameter, the function will attempt to locate the binary file based on\nthe
    • \n
    • tool: The tool parameter in the get_bin_command function represents the name of the tool\nfor which you want to retrieve the command. It is used to identify the specific tool for which the\ncommand is being generated
    • \n
    • bin_type: The bin_type parameter in the get_bin_command function specifies the type of\nbinary executable that the tool uses. It can have values like \"bin\", \"jar\", \"java\", \"docker\", etc.,\ndepending on the type of tool being executed. The function uses this parameter to determine,\ndefaults to bin
    • \n
    • config: The config parameter in the get_bin_command function is a dictionary that holds\nconfiguration settings for the tool being used. It can include various settings such as paths,\nenvironment variables, or any other configuration options needed for the tool to run properly
    • \n
    • param: The param parameter in the get_bin_command function is a dictionary that contains\nadditional parameters or configurations for the tool being executed. These parameters can be used to\ncustomize the behavior or settings of the tool when generating the command for execution. The\nfunction uses the param dictionary along with the
    • \n
    • default_folder: The default_folder parameter in the get_bin_command function is used to\nspecify the default folder where the tools are located. If a specific folder is not provided when\ncalling the function, it will default to the value of DEFAULT_TOOLS_FOLDER
    • \n
    • add_options: The add_options parameter in the get_bin_command function allows you to pass\nadditional options or arguments to the command being constructed based on the tool type. These\nadditional options can be specific configurations, flags, or any other parameters that you want to\ninclude in the final command. When provided,
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_bin_command function returns a string representing the command to execute a\n specific tool based on the provided parameters. The returned command can be either a Java command\n for running a JAR file or a Docker command for running a Docker image/container. If the tool type is\n not Java or Docker, it returns the default tool bin.

    \n
    \n", "signature": "(\tbin: str = None,\ttool: str = None,\tbin_type: str = 'bin',\tconfig: dict = {},\tparam: dict = {},\tdefault_folder: str = '/Users/lebechea/howard/tools',\tadd_options: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_tmp", "modulename": "howard.functions.commons", "qualname": "get_tmp", "kind": "function", "doc": "

    The get_tmp function returns the value of the \"tmp\" parameter from either the param dictionary,\nconfig dictionary, or a default value \"/tmp\".

    \n\n
    Parameters
    \n\n
      \n
    • config: Config is a dictionary that contains configuration settings for the function. It is\nan optional parameter with a default value of an empty dictionary. It can be used to provide\nadditional configuration settings to the function get_tmp
    • \n
    • param: The param parameter is a dictionary containing parameters that can be passed to the\nfunction get_tmp. It can include various key-value pairs, but in this context, the function\nspecifically looks for the key \"tmp\" within the param dictionary to determine the temporary path\nvalue. If the \"
    • \n
    • default_tmp: The default_tmp parameter in the get_tmp function is a string that\nrepresents the default path for temporary files. If the \"tmp\" key is not found in the param\ndictionary or the config dictionary, the function will return this default_tmp value, which is,\ndefaults to /tmp
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_tmp returns the value of the \"tmp\" key from the param dictionary if it\n exists. If the \"tmp\" key is not found in the param dictionary, it returns the value of the \"tmp\"\n key from the config dictionary. If neither key is found in param or config, it returns the\n default value \"/tmp\".

    \n
    \n", "signature": "(config: dict = {}, param: dict = None, default_tmp: str = '/tmp') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_threads", "modulename": "howard.functions.commons", "qualname": "get_threads", "kind": "function", "doc": "

    This Python function retrieves the number of threads to use based on input parameters and system\nconfiguration.

    \n\n
    Parameters
    \n\n
      \n
    • config: The config parameter is a dictionary that contains configuration settings for the\nfunction get_threads. It can be used to provide default values for the number of threads to use in\nthe function
    • \n
    • param: The param parameter is a dictionary that may contain the key \"threads\" which\nspecifies the number of threads to use. If the \"threads\" key is not present in the param\ndictionary, the function will look for the \"threads\" key in the config dictionary. If neither
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_threads returns the number of threads to be used based on the input\n parameters.

    \n
    \n", "signature": "(config: dict = {}, param: dict = {}) -> int:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_memory", "modulename": "howard.functions.commons", "qualname": "get_memory", "kind": "function", "doc": "

    The get_memory function retrieves memory information using psutil and calculates a default memory\nvalue based on total memory, with the option to specify a custom memory value.

    \n\n
    Parameters
    \n\n
      \n
    • config: The config parameter is a dictionary that may contain configuration settings for\nthe function get_memory. It is used to provide default values or settings for the function
    • \n
    • param: The param parameter is a dictionary that may contain a key \"memory\" which represents\nthe amount of memory to be used. If the \"memory\" key is not present in the param dictionary, the\nfunction will try to retrieve the value from the config dictionary using the key \"
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_memory returns a string representing the amount of memory to be used.\n This memory value is calculated based on the total memory available on the system, with a default\n value set to 80% of the total memory. The function first checks if a specific memory value is\n provided in the param dictionary, and if not, it looks for a default value in the config

    \n
    \n", "signature": "(config: dict = {}, param: dict = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_float_from_str", "modulename": "howard.functions.commons", "qualname": "extract_float_from_str", "kind": "function", "doc": "

    The function extract_float_from_str extracts a float value from a given string input.

    \n\n
    Parameters
    \n\n
      \n
    • text: The extract_float_from_str function is designed to extract a floating-point number\nfrom a given string input. The function uses a regular expression to find the first occurrence of a\nfloating-point number in the input string and returns it as a float
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function extract_float_from_str returns a float value extracted from the input text\n string. If a float value is found in the text, it is returned as a float. If no float value is\n found, it returns None.

    \n
    \n", "signature": "(text: str = '') -> float:", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_memory_in_go", "modulename": "howard.functions.commons", "qualname": "extract_memory_in_go", "kind": "function", "doc": "

    The extract_memory_in_go function converts a memory size string in the format FLOAT[kMG] to an\ninteger value in Go memory units.

    \n\n
    Parameters
    \n\n
      \n
    • memory_str: The memory_str parameter should be a string representing a memory value with a\nunit suffix in the format FLOAT[kMG]. For example, it could be \"1G\", \"512M\", or \"2k\"
    • \n
    • default: The default parameter in the extract_memory_in_go function is used to specify a\ndefault integer value if the conversion of the memory size string fails or if the value cannot be\nextracted from the input string. If no valid value can be extracted from the input string, the\nfunction will return the, defaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    The extract_memory_in_go function is returning an integer value representing the memory\n size in Go units based on the input memory string provided.

    \n
    \n", "signature": "(memory_str, default_value: int = 1, default_unit: str = 'G') -> int:", "funcdef": "def"}, {"fullname": "howard.functions.commons.concat_file", "modulename": "howard.functions.commons", "qualname": "concat_file", "kind": "function", "doc": "

    This function concatenates multiple input files into a single output file.

    \n\n
    Parameters
    \n\n
      \n
    • input_files: A list of file paths to the input files that need to be concatenated
    • \n
    • output_file: The parameter \"output_file\" is a string that represents the name of the file\nthat will be created by the function and will contain the concatenated content of all the input\nfiles
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the output file was successfully created or not. It\n checks if the output file exists using the os.path.exists() function and returns True if it\n exists and False otherwise.

    \n
    \n", "signature": "(input_files: list, output_file: str) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.compress_file", "modulename": "howard.functions.commons", "qualname": "compress_file", "kind": "function", "doc": "

    This function compresses a file using the BGZF compression algorithm.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path and name of the input file that needs to be compressed
    • \n
    • output_file: The output_file parameter is a string that represents the name and path of\nthe file where the compressed data will be written
    • \n
    \n", "signature": "(input_file: str, output_file: str) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_compression_type", "modulename": "howard.functions.commons", "qualname": "get_compression_type", "kind": "function", "doc": "

    The function get_compression_type determines the compression type of a file based on its first few\nbytes.

    \n\n
    Parameters
    \n\n
      \n
    • filepath: The filepath parameter is a string that represents the path to the file for which\nwe want to determine the compression type
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_compression_type returns a string indicating the compression type of the\n file specified by the filepath parameter. The possible return values are \"gzip\" if the file is\n compressed using gzip, \"bgzip\" if the file is compressed using bgzip, \"unknown\" if the compression\n type is unknown, and \"none\" if the file is not compressed.

    \n
    \n", "signature": "(filepath: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_file_compressed", "modulename": "howard.functions.commons", "qualname": "get_file_compressed", "kind": "function", "doc": "

    This function takes a filename as input and returns True if the file is compressed (in bgzip) and False if it\nis not

    \n\n
    Parameters
    \n\n
      \n
    • filename: the name of the file to be checked
    • \n
    \n\n
    Returns
    \n\n
    \n

    A boolean value.

    \n
    \n", "signature": "(filename: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.concat_into_infile", "modulename": "howard.functions.commons", "qualname": "concat_into_infile", "kind": "function", "doc": "

    The function concat_into_infile concatenates multiple input files into a compressed output file,\nwith support for different compression types and multi-threading.

    \n\n
    Parameters
    \n\n
      \n
    • input_files: A list of input file paths that need to be concatenated into the compressed file
    • \n
    • compressed_file: The compressed_file parameter is an object that represents the file where\nthe concatenated contents of the input files will be written. It is expected to be a file object\nthat has write capabilities
    • \n
    • compression_type: The compression_type parameter specifies the type of compression to be\nused for the output file. The default value is \"none\", which means no compression will be applied.\nOther possible values include \"bgzip\" and \"gzip\", which indicate that the output file should be\ncompressed using the bgzip and, defaults to none
    • \n
    • threads: The \"threads\" parameter specifies the number of threads to use for compression or\ndecompression. It determines how many parallel processes can be executed simultaneously, which can\nhelp improve performance when dealing with large files or multiple files, defaults to 1
    • \n
    • block: The block parameter is used to specify the size of the block when reading the input\nfiles. It is set to 10 ** 6, which means 1 million bytes. This parameter determines how much data\nis read from the input files at a time
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value, specifically True.

    \n
    \n", "signature": "(\tinput_files: list,\tcompressed_file: object,\tcompression_type: str = 'none',\tthreads: int = 1,\tblock: int = 1000000) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.concat_and_compress_files", "modulename": "howard.functions.commons", "qualname": "concat_and_compress_files", "kind": "function", "doc": "

    The function concat_and_compress_files takes a list of input files, an output file name, and\noptional parameters for compression type, number of threads, block size, compression level, sorting,\nand indexing, and concatenates and compresses the input files into the output file.

    \n\n
    Parameters
    \n\n
      \n
    • input_files: A list of input file paths that need to be concatenated and compressed
    • \n
    • output_file: The output_file parameter is a string that specifies the path and name of the\noutput file that will be created after concatenating and compressing the input files
    • \n
    • compression_type: The compression_type parameter specifies the type of compression to be\napplied to the output file. It can take one of three values: \"bgzip\", \"gzip\", or \"none\", defaults to\nbgzip
    • \n
    • threads: The threads parameter specifies the number of threads to use for compression and\ndecompression. It determines the level of parallelism in the compression process, allowing for\nfaster execution when multiple threads are used, defaults to 1
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    • block: The block parameter specifies the size of the block used for reading and writing\ndata during compression. It is set to a default value of 10^6 (1 million) bytes
    • \n
    • compression_level: The compression_level parameter determines the level of compression to\nbe used when compressing the output file. It is an integer value ranging from 0 to 9, where 0\nindicates no compression and 9 indicates maximum compression. The higher the compression level, the\nsmaller the resulting compressed file size, defaults to 6
    • \n
    • sort: The sort parameter is a boolean flag that determines whether the output file should\nbe sorted or not. If sort is set to True, the output file will be sorted using\npysam.bcftools.sort before renaming it. If sort is set to `False, defaults to False
    • \n
    • index: The index parameter is a boolean flag that determines whether or not to index the\noutput file after concatenation and compression. If index is set to True, the output file will\nbe indexed using the pysam.tabix_index function with the preset \"vcf\". Make sure VCF is sorted.\nDefaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the output file exists or not.

    \n
    \n", "signature": "(\tinput_files: list,\toutput_file: str,\tcompression_type: str = 'bgzip',\tthreads: int = 1,\tmemory: int = 1,\tblock: int = 1000000,\tcompression_level: int = 6,\tsort: bool = False,\tindex: bool = False) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_plateform_name_from_duckdb", "modulename": "howard.functions.commons", "qualname": "get_plateform_name_from_duckdb", "kind": "function", "doc": "

    The function get_plateform_name_from_duckdb returns the platform information from a DuckDB connection.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The conn parameter is an instance of the DuckDBPyConnection class from the duckdb\nmodule. It represents a connection to a DuckDB database
    • \n
    \n\n
    Returns
    \n\n
    \n

    the platform information from the DuckDB connection.

    \n
    \n", "signature": "(conn: duckdb.duckdb.DuckDBPyConnection) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_duckdb_extension_file", "modulename": "howard.functions.commons", "qualname": "get_duckdb_extension_file", "kind": "function", "doc": "

    This function returns the file path of a DuckDB extension based on the extension name and platform.

    \n\n
    Parameters
    \n\n
      \n
    • extension_name: The name of the DuckDB extension file that is being requested
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the file path of a DuckDB extension file. The file path is\n constructed using the constant DUCKDB_EXTENSION, the platform name obtained from the\n get_plateform_name_from_duckdb() function, and the extension name passed as an argument to the function.

    \n
    \n", "signature": "(\textension_name: str,\tconn: duckdb.duckdb.DuckDBPyConnection,\tdownload: bool = True) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_duckdb_extension", "modulename": "howard.functions.commons", "qualname": "load_duckdb_extension", "kind": "function", "doc": "

    This function loads DuckDB extensions into a connection object and returns a boolean indicating\nwhether all extensions were successfully loaded.

    \n\n
    Parameters
    \n\n
      \n
    • conn: duckdb.DuckDBPyConnection object representing a connection to a DuckDB database
    • \n
    • duckdb_extensions: A list of strings representing the names of the DuckDB extensions to be\nloaded
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether all the specified DuckDB extensions were successfully\n loaded or not.

    \n
    \n", "signature": "(conn: duckdb.duckdb.DuckDBPyConnection, duckdb_extensions: list) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.TimeoutException", "modulename": "howard.functions.commons", "qualname": "TimeoutException", "kind": "class", "doc": "

    Common base class for all non-exit exceptions.

    \n", "bases": "builtins.Exception"}, {"fullname": "howard.functions.commons.time_limit", "modulename": "howard.functions.commons", "qualname": "time_limit", "kind": "function", "doc": "

    \n", "signature": "(seconds):", "funcdef": "def"}, {"fullname": "howard.functions.commons.duckdb_execute", "modulename": "howard.functions.commons", "qualname": "duckdb_execute", "kind": "function", "doc": "

    The duckdb_execute function executes a query using the DuckDB database engine and returns a\nboolean indicating whether the query was successful or not.

    \n\n
    Parameters
    \n\n
      \n
    • query: The query parameter is a string that represents the SQL query you want to execute in\nDuckDB. It can be any valid SQL statement, such as SELECT, INSERT, UPDATE, DELETE, etc
    • \n
    • threads: The \"threads\" parameter specifies the number of threads to use for executing the\nquery. By default, it is set to 1, meaning that the query will be executed using a single thread,\ndefaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function duckdb_execute returns a boolean value. It returns True if the query\n execution is successful, and False if it is not successful.

    \n
    \n", "signature": "(query: str, threads: int = 1) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.genome_build_switch", "modulename": "howard.functions.commons", "qualname": "genome_build_switch", "kind": "function", "doc": "

    The genome_build_switch function takes an assembly name as input and returns a new\nassembly name if a different version of the same genome is available, otherwise it returns\nNone.

    \n\n
    Parameters
    \n\n
      \n
    • assembly: The assembly parameter is a string that represents the name or identifier\nof a genome assembly
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function genome_build_switch returns a string.

    \n
    \n", "signature": "(assembly: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_argument", "modulename": "howard.functions.commons", "qualname": "get_argument", "kind": "function", "doc": "

    The get_argument function retrieves information about a specific argument from a dictionary, and\ncan also set its \"required\" status.

    \n\n
    Parameters
    \n\n
      \n
    • arguments: A dictionary containing information about the arguments passed to a function or\nmethod
    • \n
    • arg: The arg parameter is a string that represents the name of the argument that you want\nto retrieve information for
    • \n
    • required: The required parameter is a boolean value that determines whether the argument is\nrequired or not. If set to True, the function will return an empty dictionary if the argument is not\nfound in the arguments dictionary. If set to False (default), the function will still return an\nempty dictionary if, defaults to False
    • \n
    • remove_infos: The remove_infos parameter is a list that contains the names of specific\ninformation that you want to remove from the argument dictionary. In the code, it is used to remove\nspecific argument information such as \"gooey\" from the arg_infos dictionary
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary containing information about a specific argument, specified by the arg\n parameter. If the argument is found in the arguments dictionary, the function returns a dictionary\n containing the information about that argument. If the argument is not found, an empty dictionary is\n returned. The required parameter is used to specify whether the argument is required or not, and\n this information is added to

    \n
    \n", "signature": "(\targuments: dict = {},\targ: str = '',\trequired: bool = False,\tremove_infos: list = ['gooey', 'extra'],\tadd_metavar: bool = False) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_argument_gooey", "modulename": "howard.functions.commons", "qualname": "get_argument_gooey", "kind": "function", "doc": "

    The function get_argument_gooey takes an argument and returns the corresponding widget and options\nfor the Gooey library in Python.

    \n\n
    Parameters
    \n\n
      \n
    • arg: The arg parameter is a string that represents the name of the argument you want to\nretrieve information for
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_argument_gooey returns two values: widget and options.

    \n
    \n", "signature": "(arguments: dict = {}, arg: str = ''):", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_argument_to_mk", "modulename": "howard.functions.commons", "qualname": "get_argument_to_mk", "kind": "function", "doc": "

    The function get_argument_to_mk generates a formatted string containing information about a\ncommand line argument, which can be output in either Markdown or HTML format.

    \n\n
    Parameters
    \n\n
      \n
    • arg: The arg parameter is a string that represents the name of the argument. It is used to\ngenerate the header and text for the argument
    • \n
    • argument: The argument parameter is a dictionary that contains information about the\nargument. It has the following keys:
    • \n
    • mode: The mode parameter is used to specify the format of the output. It can have two\npossible values: \"mk\" or \"html\". If \"mk\" is specified, the output will be formatted using Markdown\nsyntax. If \"html\" is specified, the output will be formatted using HTML syntax, defaults to mk
    • \n
    \n\n
    Returns
    \n\n
    \n

    a formatted string that provides information about a command line argument. The format of\n the string depends on the value of the mode parameter. If mode is set to \"html\", the string is\n formatted as an HTML <pre> block. Otherwise, the string is formatted as a Markdown code block. The\n string includes the argument name, metavariable, help text, required

    \n
    \n", "signature": "(arg: str, argument: dict = {}, mode: str = 'mk') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.help_generation_from_dict", "modulename": "howard.functions.commons", "qualname": "help_generation_from_dict", "kind": "function", "doc": "

    The help_generation_from_dict function generates help documentation from a dictionary input,\nsupporting markdown and HTML output formats with specific sections like \"__help\", \"__format\",\n\"__default\", \"__examples\", \"__code\", and \"__examples_code\".

    \n\n
    Parameters
    \n\n
      \n
    • element: The element parameter in the help_generation_from_dict function is a string that\nrepresents the current element or key in the dictionary for which help documentation is being\ngenerated. It is the specific key or element within the dictionary that you want to generate help\ndocumentation for
    • \n
    • help_dict: The help_dict parameter in the help_generation_from_dict function is a\ndictionary that contains the help documentation for various elements or keys. This dictionary\nstructure allows for organizing and storing information related to each element, such as help text,\nformatting details, default values, and examples. The function processes
    • \n
    • previous: The previous parameter in the help_generation_from_dict function is used to\nkeep track of the previous elements in the hierarchy. It is a string that represents the path to the\ncurrent element being processed. This parameter helps in maintaining the correct hierarchy level\nwhen generating help documentation for nested elements in a
    • \n
    • output_type: The output_type parameter in the help_generation_from_dict function\nspecifies the type of output format that you want the generated help documentation to be in. It can\ntake two possible values: \"markdown\" or \"html\". By default, the output type is set to markdown,\ndefaults to markdown
    • \n
    • level: The level parameter in the help_generation_from_dict function is used to keep\ntrack of the depth or level of recursion in the generation process. It starts at 1 for the initial\ncall and increments by 1 for each level of recursion into sub-elements. This parameter helps in\nformatting the, defaults to 1
    • \n
    • table: The table parameter in the help_generation_from_dict function is used to store the\ntable of contents for the generated help documentation. It is a string that contains the formatted\ntable of contents with links to different sections or elements within the documentation. This table\nhelps users navigate through the documentation easily
    • \n
    • generate_table: The generate_table parameter in the help_generation_from_dict function is\na boolean flag that determines whether the function should generate a table of contents for the help\ndocumentation. When set to True, the function will include a table of contents in the output based\non the hierarchy of elements in the, defaults to False
    • \n
    • code_type: The code_type parameter in the help_generation_from_dict function specifies\nthe type of code examples that will be included in the generated help documentation. It defaults to\n\"json\", meaning that the code examples provided in the \"__examples_code\" section of the dictionary\nwill be in JSON format
    • \n
    • auto_default: The auto_default parameter in the help_generation_from_dict function is a\nboolean flag that determines whether the function should automatically populate certain sections of\nthe help documentation based on the information available in the dictionary and the element's\narguments. When set to True, the function will automatically fill in sections, defaults to True
    • \n
    • previous_sections: The previous_sections parameter in the help_generation_from_dict\nfunction is a boolean flag that determines whether the function should include previous sections in\nthe hierarchy when generating help documentation for nested elements. When set to True, the\nfunction will maintain the previous sections in the hierarchy path, helping to provide, defaults to\nFalse
    • \n
    \n\n
    Returns
    \n\n
    \n

    The help_generation_from_dict function returns the generated help documentation based on\n the input help_dict dictionary. The output is formatted based on the specified output_type\n (either \"markdown\" or \"html\") and includes sections such as \"__help\", \"__format\", \"__default\", and\n \"__examples\" if they are present in the help_dict.

    \n
    \n", "signature": "(\telement: str,\thelp_dict: dict,\tprevious: str = '',\toutput_type: str = 'markdown',\tlevel: int = 1,\ttable: str = '',\tgenerate_table: bool = False,\tcode_type: str = '',\tauto_default: bool = True,\tprevious_sections: bool = False):", "funcdef": "def"}, {"fullname": "howard.functions.commons.help_generation_from_json", "modulename": "howard.functions.commons", "qualname": "help_generation_from_json", "kind": "function", "doc": "

    The help_generation_from_json function reads a JSON file containing help information, converts it\ninto a specified output format, and returns the generated help content.

    \n\n
    Parameters
    \n\n
      \n
    • help_json_file: The help_json_file parameter is a string that should contain the file path\nto the JSON file from which help information will be extracted. This JSON file likely contains\nstructured data that will be used to generate the help content
    • \n
    • output_type: The output_type parameter in the help_generation_from_json function\nspecifies the format in which the generated help content will be output. By default, it is set to\n\"markdown\", which means the help content will be formatted using Markdown syntax. However, you can\nalso specify other output formats such, defaults to markdown
    • \n
    • title: The title parameter in the help_generation_from_json function is a string that\nrepresents the title of the help documentation that will be generated. It is used to provide a title\nfor the help content to make it more organized and informative. By default, the title is set to\n\"Help\", defaults to Help (optional)
    • \n
    • code_type: The code_type parameter in the help_generation_from_json function is used to\nspecify the type of code examples that will be included in the generated help content. This\nparameter allows you to define the format or language of the code examples to be displayed alongside\nthe help information extracted from the JSON
    • \n
    • include_toc: The include_toc parameter in the help_generation_from_json function is a\nboolean flag that determines whether a table of contents (TOC) should be included in the generated\nhelp content. If include_toc is set to True, a table of contents will be generated based,\ndefaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function help_generation_from_json returns the generated help content based on the\n information stored in the JSON file provided as input.

    \n
    \n", "signature": "(\thelp_json_file: str,\toutput_type: str = 'markdown',\ttitle='Help',\tcode_type: str = '',\tinclude_toc: bool = False):", "funcdef": "def"}, {"fullname": "howard.functions.commons.RawTextArgumentDefaultsHelpFormatter", "modulename": "howard.functions.commons", "qualname": "RawTextArgumentDefaultsHelpFormatter", "kind": "class", "doc": "

    Help message formatter which adds default values to argument help.

    \n\n

    Only the name of this class is considered a public API. All the methods\nprovided by the class are considered an implementation detail.

    \n", "bases": "argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter"}, {"fullname": "howard.functions.commons.help_header", "modulename": "howard.functions.commons", "qualname": "help_header", "kind": "function", "doc": "

    The help_header function generates a header for the help documentation based on the metadata\ninformation provided in the setup file.

    \n\n
    Parameters
    \n\n
      \n
    • setup: The setup parameter is a string that represents the path to a configuration file.\nThis file contains metadata about the program, such as its name, version, description, and long\ndescription content type
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function help_header returns a string that represents the header for the help\n documentation. The header includes the program name, version, authors, and description.

    \n
    \n", "signature": "(setup: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.help_generation", "modulename": "howard.functions.commons", "qualname": "help_generation", "kind": "function", "doc": "

    The help_generation function generates a parser object for command-line arguments, as well as\nmarkdown or HTML help documentation for those arguments.

    \n\n
    Parameters
    \n\n
      \n
    • arguments_dict: A dictionary containing the arguments for the function. It has three keys:
    • \n
    • parser: The parser parameter is an instance of the argparse.ArgumentParser class. It is\nused to define the command-line interface and parse the command-line arguments. If no parser is\nprovided, a new instance of argparse.ArgumentParser will be created
    • \n
    • setup: The setup parameter is a string that represents the path to a configuration file.\nThis file contains metadata about the program, such as its name, version, description, and long\ndescription content type
    • \n
    • output_type: The output_type parameter determines the format of the output. It can be one\nof the following values:, defaults to parser
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function help_generation returns different outputs based on the value of the\n output_type parameter.

    \n
    \n", "signature": "(arguments_dict: dict = {}, parser=None, output_type: str = 'parser'):", "funcdef": "def"}, {"fullname": "howard.functions.commons.format_arg_help", "modulename": "howard.functions.commons", "qualname": "format_arg_help", "kind": "function", "doc": "

    The function format_arg_help formats a help message for a function argument, including a default\nvalue if provided.

    \n\n
    Parameters
    \n\n
      \n
    • help_message: The help_message parameter is a string that contains the description or help\nmessage for a function or method argument. It provides information about the purpose or usage of the\nargument
    • \n
    • default_value: The default_value parameter in the format_arg_help function is an optional\nparameter that specifies a default value for the argument being described in the help message. If a\ndefault value is provided, it will be included in the formatted help message to indicate the default\nvalue for that argument
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function format_arg_help returns a formatted help message with a default value\n appended at the end if provided.

    \n
    \n", "signature": "(help_message: str, default_value: object = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.bed_sort", "modulename": "howard.functions.commons", "qualname": "bed_sort", "kind": "function", "doc": "

    The bed_sort function reads a tab-separated input file, sorts the data based on columns 0, 1, and\n2 in ascending order, and writes the sorted data to a tab-separated output file.

    \n\n
    Parameters
    \n\n
      \n
    • input: The input parameter is the path to the input file that contains the data to be\nsorted. This file should be in a tab-separated format
    • \n
    • output: The output parameter is a string that specifies the path and filename of the output\nfile where the sorted data will be saved
    • \n
    \n", "signature": "(input: str, output: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.full_path", "modulename": "howard.functions.commons", "qualname": "full_path", "kind": "function", "doc": "

    The function full_path takes a path string as input and returns the full expanded path.

    \n\n
    Parameters
    \n\n
      \n
    • path: The full_path function takes a string path as input and returns the full path by\nexpanding the user's home directory in the path if it is not None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function full_path is returning the expanded version of the input path using\n os.path.expanduser(path). This function expands the ~ character in the path to the user's home\n directory.

    \n
    \n", "signature": "(path: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_default_argument", "modulename": "howard.functions.commons", "qualname": "get_default_argument", "kind": "function", "doc": "

    The function get_default_argument retrieves the default value of a specified argument from a\ndictionary of arguments.

    \n\n
    Parameters
    \n\n
      \n
    • arguments_dict: The arguments_dict parameter is a dictionary that contains information\nabout arguments
    • \n
    • argument: The get_default_argument function takes in two parameters:
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function is attempting to return the default value of a specific argument from a\n dictionary of arguments. However, there is a mistake in the code. The correct key to access the\n argument's default value should be \"argument\" instead of \"arguments\". Therefore, the function will\n return the default value of the specified argument if it exists, otherwise it will return None.

    \n
    \n", "signature": "(arguments_dict: dict, argument: str):", "funcdef": "def"}, {"fullname": "howard.functions.commons.set_param", "modulename": "howard.functions.commons", "qualname": "set_param", "kind": "function", "doc": "

    The function set_param takes input arguments and adds them to a dictionary based on certain\nconditions.

    \n\n
    Parameters
    \n\n
      \n
    • param: The param parameter is a dictionary that stores configuration parameters or\nsettings. It is used to collect and store various arguments and their values based on the conditions\nspecified in the set_param function
    • \n
    • args: The args parameter in the set_param function is likely an instance of the\nargparse.Namespace class, which is typically used to store the command-line arguments parsed by\nthe argparse module in Python. It contains the values of the arguments provided by the user when\nthe script
    • \n
    • arguments_dict: The arguments_dict parameter seems to be a dictionary that likely contains\ninformation about arguments and their default values. This dictionary is used in the function\nset_param to determine whether a specific argument should be included in the param dictionary\nbased on certain conditions
    • \n
    • argument: The argument parameter in the set_param function represents the specific\nargument that you want to set in the param dictionary. It is the key that will be used to store\nthe value in the dictionary
    • \n
    • section: The section parameter in the set_param function is used to specify a section\nwithin the param dictionary where the argument value should be stored. If a section is provided,\nthe argument value will be stored under that section in the param dictionary. If no `section
    • \n
    \n\n
    Returns
    \n\n
    \n

    the updated param dictionary after setting the specified argument value based on the\n conditions provided in the function.

    \n
    \n", "signature": "(\tparam: dict,\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>,\targuments_dict: dict,\targument: str,\tsection: list = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.add_value_into_dict", "modulename": "howard.functions.commons", "qualname": "add_value_into_dict", "kind": "function", "doc": "

    The function add_value_into_dict adds a value into a dictionary tree based on the provided\nsections.

    \n\n
    Parameters
    \n\n
      \n
    • dict_tree: The dict_tree parameter is a dictionary representing a tree structure. It serves\nas the starting point for adding a value based on the provided sections
    • \n
    • sections: The sections parameter in the add_value_into_dict function represents a list of\nsections corresponding to successive keys in the dictionary. These sections are used to traverse the\ndictionary tree and determine the location where the value should be added. Each element in the\nsections list corresponds to a key in
    • \n
    • value: The value parameter in the add_value_into_dict function represents the value that\nyou want to add into the dictionary tree at the specified location determined by the sections\nlist. This value can be of any data type (e.g., int, str, list, dict, etc.)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function add_value_into_dict returns the updated dictionary tree after adding the\n value based on the given sections.

    \n
    \n", "signature": "(dict_tree: dict, sections: list = [], value=None):", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_param", "modulename": "howard.functions.commons", "qualname": "load_param", "kind": "function", "doc": "

    The function load_param takes command line arguments and returns a dictionary containing\nparameters loaded from a file or as JSON.

    \n\n
    Parameters
    \n\n
      \n
    • args: It seems like the code snippet you provided is a function named load_param that takes\nan argument args of type argparse and returns a dictionary. The function is intended to load\nparameters from a file or a string
    • \n
    \n\n
    Returns
    \n\n
    \n

    A dictionary containing the loaded parameters is being returned.

    \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_config_args", "modulename": "howard.functions.commons", "qualname": "load_config_args", "kind": "function", "doc": "

    The function load_config_args takes in arguments, extracts specific keys from them, and loads\nparameters in JSON format.

    \n\n
    Parameters
    \n\n
      \n
    • args: The load_config_args function takes in an args object as input. This args object\nseems to contain various configuration parameters that the function will use to load and return\nspecific values
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function load_config_args returns the variables arguments_dict, setup_cfg,\n config, and param.

    \n
    \n", "signature": "(args):", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_args", "modulename": "howard.functions.commons", "qualname": "load_args", "kind": "function", "doc": "

    The load_args function processes arguments based on specified parameters and conditions, raising\nan error if a specified argument is not found.

    \n\n
    Parameters
    \n\n
      \n
    • param: The param parameter in the load_args function is a dictionary that stores the\narguments and their values. It is used to keep track of the arguments that have been loaded or\nprocessed during the argument parsing process
    • \n
    • args: The args parameter in the load_args function is an instance of the\nargparse.ArgumentParser class from the argparse module in Python. This object is used to parse\ncommand-line arguments and options. It contains information about the arguments passed to the script\nwhen it was executed
    • \n
    • arguments_dict: The arguments_dict parameter in the load_args function is a dictionary\nthat likely contains information about the arguments expected by the script. It may include details\nsuch as the argument names, their corresponding sections, and any additional parameters related to\neach argument. This dictionary is used within the `load_args
    • \n
    • command: The command parameter in the load_args function is a string that represents a\nspecific command or action for which arguments need to be loaded. This parameter is used to identify\nthe command-specific arguments that should be processed during argument parsing
    • \n
    • arguments_list: The arguments_list parameter in the load_args function is a dictionary\nthat contains the names of arguments that are expected to be present in the args object. This list\nis used to specify which arguments should be processed by the function load_args during the\nargument parsing process
    • \n
    • strict: The strict parameter in the load_args function is a boolean flag that determines\nwhether an error should be raised if an argument specified in the arguments_list list is not found\nin the args object. If strict is set to True, an error will be raised, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function load_args is returning a dictionary named param after processing the\n arguments based on the input parameters and conditions specified in the function.

    \n
    \n", "signature": "(\tparam: dict,\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>,\targuments_dict: dict,\tcommand: str = None,\targuments_list: dict = {},\tstrict: bool = False,\tsection_prefix: list = []) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_random", "modulename": "howard.functions.commons", "qualname": "get_random", "kind": "function", "doc": "

    The function get_random generates a random string of uppercase letters and digits with a default\nlength of 10.

    \n\n
    Parameters
    \n\n
      \n
    • N: The parameter N in the get_random function represents the length of the random string\nthat will be generated. By default, if no value is provided for N, it will generate a random\nstring of length 10 consisting of uppercase letters and digits, defaults to 10
    • \n
    \n\n
    Returns
    \n\n
    \n

    A random string of length N consisting of uppercase letters and digits.

    \n
    \n", "signature": "(N: int = 10) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.transcripts_file_to_df", "modulename": "howard.functions.commons", "qualname": "transcripts_file_to_df", "kind": "function", "doc": "

    This Python function reads a transcripts file into a pandas DataFrame, filtering out comment lines.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_file: The transcripts_file parameter is a string that represents the file path\nto a file containing transcript information. This function is designed to read the contents of this\nfile and convert it into a pandas DataFrame. The file is expected to be tab-separated with two\ncolumns: \"transcript\" and \"gene
    • \n
    • column_names: The column_names parameter is a list that specifies the column names expected\nin the transcripts file. By default, it is set to [\"transcript\", \"gene\"], indicating that the file\nshould have two columns named \"transcript\" and \"gene\". If the actual column names in the
    • \n
    \n\n
    Returns
    \n\n
    \n

    A pandas DataFrame containing transcript and gene information read from the specified file\n after filtering out comment lines is being returned.

    \n
    \n", "signature": "(\ttranscripts_file: str,\tcolumn_names: list = ['transcript', 'gene']) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "howard.functions.commons.identical", "modulename": "howard.functions.commons", "qualname": "identical", "kind": "function", "doc": "

    The identical function compares the contents of multiple VCF files to determine if they are\nidentical.

    \n\n
    Parameters
    \n\n
      \n
    • vcf_list: The vcf_list parameter is a list of file paths to VCF (Variant Call Format) files\nthat you want to compare for identity. The function reads the contents of these files and checks if\nthey are identical based on the specified conditions
    • \n
    • begin: The begin parameter in the identical function is used to specify a string that\nindicates the beginning of a line in the input files. If a line in the input file starts with the\nspecified begin string, it will be skipped and not included in the comparison process. By default,\ndefaults to ##
    • \n
    • line_strip: The line_strip parameter in the identical function is a boolean flag that\ndetermines whether each line read from the input files should be stripped of leading and trailing\nwhitespaces before being compared. If line_strip is set to True, each line will be stripped\nusing the `strip, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function identical is returning a boolean value. It returns True if all the lines\n in the VCF files provided in the vcf_list are identical, and False otherwise.

    \n
    \n", "signature": "(vcf_list: List[str], begin: str = '##', line_strip: bool = True) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.check_docker_image_exists", "modulename": "howard.functions.commons", "qualname": "check_docker_image_exists", "kind": "function", "doc": "

    Checks if a Docker image with a specific tag exists in the local repository.

    \n\n
    Parameters
    \n\n
      \n
    • image_with_tag: Image name with tag (e.g., \"image: version\")
    • \n
    \n\n
    Returns
    \n\n
    \n

    True if the image exists, False otherwise

    \n
    \n", "signature": "(image_with_tag: str) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.params_string_to_dict", "modulename": "howard.functions.commons", "qualname": "params_string_to_dict", "kind": "function", "doc": "

    The params_string_to_dict function in Python converts a string of parameters into a dictionary\nusing specified separators and clears certain characters from the parameter values.

    \n\n
    Parameters
    \n\n
      \n
    • params: The params parameter in the params_string_to_dict function is a string of\nparameters that you want to convert into a dictionary. It contains the information you want to parse\nand organize into key-value pairs
    • \n
    • param_sep: The param_sep parameter in the params_string_to_dict function is used to\nspecify the separator that separates different parameters in the input string params. By default,\nthe param_sep is set to \":\" in the function definition. This means that the function expects the\nparameters in the input, defaults to :
    • \n
    • var_val_sep: The var_val_sep parameter in the params_string_to_dict function is used to\nspecify the separator between the variable and value in the input string params. By default, it is\nset to \"=\", which means that the function expects the format of each parameter in the params,\ndefaults to =
    • \n
    • val_clear: The val_clear parameter in the params_string_to_dict function is a dictionary\nthat contains key-value pairs used to clear specific characters from the parameter values before\nstoring them in the resulting dictionary
    • \n
    • header: The header parameter in the params_string_to_dict function is a boolean flag that\ndetermines whether the input string params has a header that should be skipped when processing the\nparameters. If header is set to True, the function will start processing parameters from the\nsecond line onwards, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function params_string_to_dict returns a dictionary containing the parameters\n extracted from the input string params.

    \n
    \n", "signature": "(\tparams: str,\tparam_sep: str = ':',\tvar_val_sep: str = '=',\tval_clear: dict = {'+': ',', ' ': ''},\theader: bool = True) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.determine_value_type", "modulename": "howard.functions.commons", "qualname": "determine_value_type", "kind": "function", "doc": "

    The function determine_value_type determines the type of a given value in a string format,\nhandling lists of values separated by a specified separator and skipping specified null-like\nvalues.

    \n\n
    Parameters
    \n\n
      \n
    • value: The value parameter in the determine_value_type function is the input value\nthat you want to determine the type of. It can be a string containing one or more values\nseparated by a specified separator (default is ';')
    • \n
    • sep: The sep parameter in the determine_value_type function is used to specify the\nseparator character that is used to split the input value string into individual values. By\ndefault, the separator is set to \";\", but you can change it to a different character if needed,\ndefaults to ;
    • \n
    • skip_null: The skip_null parameter in the determine_value_type function is a list\nthat contains values that should be skipped during the type determination process. These values\nare considered as null-like or empty values and are not taken into account when determining the\ntype of the given value
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function determine_value_type returns a string indicating the type of the given\n value. The possible return values are:

    \n \n
      \n
    • \"VARCHAR\" if the value contains at least one non-numeric character
    • \n
    • \"DOUBLE\" if the value contains at least one floating-point number
    • \n
    • \"BIGINT\" if the value contains only integers
    • \n
    • None if the value is empty or does not match any
    • \n
    \n
    \n", "signature": "(value: str, sep: str = ';', skip_null: list = ['', '.']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.determine_column_types", "modulename": "howard.functions.commons", "qualname": "determine_column_types", "kind": "function", "doc": "

    The function determine_column_types analyzes a list of values to determine the predominant\ndata type among VARCHAR, DOUBLE, and BIGINT.

    \n\n
    Parameters
    \n\n
      \n
    • values_list: It seems like you have provided the code snippet for a function that\ndetermines the type of values in a list, but you have not provided the actual values_list that\nthe function will operate on. If you provide me with the values_list, I can help you test the\nfunction and see how it determines the
    • \n
    \n\n
    Returns
    \n\n
    \n

    the type of the column based on the types of values present in the input list. It will\n return \"VARCHAR\" if the list contains any string values, \"DOUBLE\" if it contains any float\n values, \"BIGINT\" if it contains any integer values, and \"VARCHAR\" if none of the specific types\n are found.

    \n
    \n", "signature": "(values_list: list) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.detect_column_type", "modulename": "howard.functions.commons", "qualname": "detect_column_type", "kind": "function", "doc": "

    The function detect_column_type determines the type of a given column in a DataFrame as either\nDATETIME, BOOLEAN, DOUBLE, or VARCHAR.

    \n\n
    Parameters
    \n\n
      \n
    • column: The function detect_column_type takes a column as input and determines its data\ntype based on certain conditions. The conditions are as follows:
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function detect_column_type returns a string indicating the type of data in the input\n column. The possible return values are \"DATETIME\", \"BOOLEAN\", \"DOUBLE\", or \"VARCHAR\" based on the\n conditions checked in the function.

    \n
    \n", "signature": "(column) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.determine_column_number", "modulename": "howard.functions.commons", "qualname": "determine_column_number", "kind": "function", "doc": "

    \n", "signature": "(values_list: list) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.clean_annotation_field", "modulename": "howard.functions.commons", "qualname": "clean_annotation_field", "kind": "function", "doc": "

    The clean_annotation_field function removes characters from a string that are not alphanumeric or\nin a specified list.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string that represents the input text that you want to clean.\nIt typically contains annotations or other text that you want to process
    • \n
    • char_allowed: The char_allowed parameter is a list that contains characters that are\nallowed to remain in the name string after cleaning. Any character in the name string that is\nnot alphanumeric and not in the char_allowed list will be removed during the cleaning process
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function clean_annotation_field returns a cleaned version of the name string, where\n only alphanumeric characters and characters from the char_allowed list are kept.

    \n
    \n", "signature": "(name: str = '', char_allowed: list = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.docker_automount", "modulename": "howard.functions.commons", "qualname": "docker_automount", "kind": "function", "doc": "

    Add needed volume to the tool container, check first if we are already inside one otherwise return empty string

    \n\n
    Parameters
    \n\n
      \n
    • containerid: for other linux distribution catch container mount from container ID
    • \n
    \n\n
    Returns
    \n\n
    \n

    string containing volume to add

    \n
    \n", "signature": "() -> str:", "funcdef": "def"}, {"fullname": "howard.functions.databases", "modulename": "howard.functions.databases", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.databases.generate_databases_param", "modulename": "howard.functions.databases", "qualname": "generate_databases_param", "kind": "function", "doc": "

    This function generates database parameters based on specified arguments and assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter in the generate_databases_param function is expected to be an\ninstance of the argparse module, which is commonly used for parsing command-line arguments. This\nparameter is used to retrieve various arguments and options provided by the user when running the\nscript or program
    • \n
    • assemblies: The assemblies parameter is a list containing the assemblies for which\ndatabases will be generated. The function generate_databases_param takes various arguments using\nthe argparse module and generates database parameters based on these inputs. If the\ngenerate_param argument is provided and set to True,
    • \n
    \n\n
    Returns
    \n\n
    \n

    None

    \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>,\tassemblies: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.databases.query_and_concatenate_columns", "modulename": "howard.functions.databases", "qualname": "query_and_concatenate_columns", "kind": "function", "doc": "

    This function performs an SQL query on a large Parquet file and concatenates multiple columns (if not empty),\nincluding the column name in the concatenation.

    \n\n
    Parameters
    \n\n
      \n
    • parquet_file: The path to the Parquet file
    • \n
    • output_file: The path to the output file where the concatenated data will be written
    • \n
    • columns: The list of columns to concatenate
    • \n
    \n", "signature": "(parquet_file: str, output_file: str, columns: list):", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_infos", "modulename": "howard.functions.databases", "qualname": "databases_infos", "kind": "function", "doc": "

    The databases_infos function scans database folders and retrieves information about the databases\nfound, including their folder, release, assembly, subdatabase, format, header, and parameters.

    \n\n
    Parameters
    \n\n
      \n
    • database_folders: A list of folders where the databases are located
    • \n
    • database_folder_releases: A list of specific releases of the database folders to include in\nthe search. If None, all releases will be included
    • \n
    • assembly: The assembly parameter is a string that specifies the assembly version of the\ndatabases to be searched. It is used to filter the databases based on their assembly version. The\ndefault value is \"hg19\", defaults to hg19
    • \n
    • database_formats: The database_formats parameter is a list that specifies the formats of\nthe databases to include in the results. If this parameter is not provided or is set to None, all\ndatabase formats will be included
    • \n
    • config: The config parameter is a dictionary that contains configuration settings for the\nfunction. It has the following structure:
    • \n
    \n\n
    Returns
    \n\n
    \n

    The databases_infos function returns a dictionary containing information about the\n databases found in the specified database folders. The keys of the dictionary are the paths to the\n database files, and the values are dictionaries containing the following information: folder,\n release, assembly, subdatabase, format, header, and parameters.

    \n
    \n", "signature": "(\tdatabase_folders: list = [],\tdatabase_folder_releases: list = ['current'],\tassembly: str = 'hg19',\tdatabase_formats: list = None,\tconfig: dict = {}) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_param", "modulename": "howard.functions.databases", "qualname": "databases_param", "kind": "function", "doc": "

    The databases_param function takes in a dictionary of database information, an optional output\nfile path, and a boolean flag for bcftools preference, and returns a dictionary containing the\nparameters for parquet and bcftools annotations.

    \n\n
    Parameters
    \n\n
      \n
    • databases_infos_dict: A dictionary containing information about databases. Each key in the\ndictionary represents the name of a database, and the corresponding value is another dictionary\ncontaining information about the database, such as its format and parameters
    • \n
    • output: The output parameter is a string that specifies the path and filename of the output\nfile where the generated JSON object will be written. If this parameter is not provided or is set to\nNone, the JSON object will not be written to a file
    • \n
    • output_description: The output_description parameter is a string that specifies the path\nand filename of the output file where the description of the databases will be written. If this\nparameter is not provided or is set to None, the description will not be written to a file
    • \n
    • bcftools_preference: The bcftools_preference parameter is a boolean flag that determines\nwhether to prioritize databases in the BCFTOOLS format. If bcftools_preference is set to True,\ndatabases in the BCFTOOLS format will be given priority over other formats. If `bcftools, defaults\nto False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_param returns a dictionary object named \"param_stats_show\".

    \n
    \n", "signature": "(\tdatabases_infos_dict: dict,\toutput: str = None,\toutput_description: str = None,\tbcftools_preference: bool = False) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_annovar", "modulename": "howard.functions.databases", "qualname": "databases_download_annovar", "kind": "function", "doc": "

    This function downloads and extracts Annovar databases for specified assemblies and files.

    \n\n
    Parameters
    \n\n
      \n
    • folder: The folder where the Annovar databases will be downloaded to
    • \n
    • files: The files parameter is a list of specific Annovar database files to download. If not\nprovided, only the mandatory files will be downloaded. If set to \"ALL\", all available files will be\ndownloaded
    • \n
    • assemblies: A list of genome assemblies for which Annovar databases will be downloaded.\nDefault is [\"hg19\"]
    • \n
    • annovar_url: The URL where Annovar databases can be downloaded from, defaults to\nhttp://www.openbioinformatics.org/annovar/download
    • \n
    • threads: The \"threads\" parameter specifies the number of threads (parallel processes) to use\nfor download and extract/uncompress files. Default: 1
    • \n
    \n", "signature": "(\tfolder: str = None,\tfiles: list = None,\tassemblies: list = ['hg19'],\tannovar_url: str = 'http://www.openbioinformatics.org/annovar/download',\tthreads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_snpeff", "modulename": "howard.functions.databases", "qualname": "databases_download_snpeff", "kind": "function", "doc": "

    The databases_download_snpeff function downloads and extracts snpEff databases for specified\ngenome assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • folder: The folder parameter is a string that specifies the folder where the snpEff\ndatabases will be downloaded and stored. If the folder does not exist, it will be created
    • \n
    • assemblies: The assemblies parameter is a list of genome assemblies for which the snpEff\ndatabases need to be downloaded. It specifies the genome assemblies for which you want to download\nthe snpEff databases. For example, if you want to download the snpEff databases for the human genome\nassembly hg
    • \n
    • config: The config parameter is a dictionary that contains information about the tools and\ntheir configurations. It is used to retrieve the path to the Java binary and the path to the snpEff\nbinary
    • \n
    • threads: The threads parameter specifies the number of threads to be used for downloading\nthe snpEff databases. It determines the parallelism of the download process, allowing multiple files\nto be downloaded simultaneously, defaults to 1
    • \n
    \n", "signature": "(\tfolder: str = None,\tassemblies: list = ['hg19'],\tconfig: dict = {},\tthreads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_genomes", "modulename": "howard.functions.databases", "qualname": "databases_download_genomes", "kind": "function", "doc": "

    This function downloads genome assemblies using genomepy package with options to specify genome\nfolder, provider, contig regex, and number of threads.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: a list of genome assembly names to download
    • \n
    • genomes_folder: The folder where the downloaded genome files will be saved. If no folder is\nspecified, the default folder will be used
    • \n
    • provider: The provider parameter specifies the source of the genome data. In this case, the\ndefault provider is set to \"UCSC\", which refers to the University of California, Santa Cruz Genome\nBrowser. Other possible providers could include NCBI or Ensembl, defaults to UCSC
    • \n
    • contig_regex: The contig_regex parameter is a regular expression used to filter the contigs\n(chromosomes or scaffolds) to be downloaded for a given genome assembly. It allows users to download\nonly a subset of the available contigs, based on their names or other characteristics. If\ncontig_regex is not specified
    • \n
    • threads: The \"threads\" parameter specifies the number of threads (parallel processes) to use\nfor downloading the genomes. This can speed up the process if the computer has multiple cores or\nprocessors. The default value is 1, meaning that the download will be done using a single thread,\ndefaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    None is being returned.

    \n
    \n", "signature": "(\tassemblies: list,\tgenomes_folder: str = '/Users/lebechea/howard/databases/genomes/current',\tprovider: str = 'UCSC',\tcontig_regex: str = None,\tthreads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_refseq", "modulename": "howard.functions.databases", "qualname": "databases_download_refseq", "kind": "function", "doc": "

    The databases_download_refseq function downloads RefSeq files for a list of assemblies and returns\na dictionary of installed RefSeq files for each assembly.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of assemblies for which the RefSeq files need to be downloaded. Each\nassembly is represented as a string
    • \n
    • refseq_folder: The refseq_folder parameter is a string that specifies the folder where the\nRefSeq files will be downloaded and stored. If this parameter is not provided, a default folder will\nbe used
    • \n
    • refseq_url: The refseq_url parameter is a string that represents the URL where the RefSeq\nfiles can be downloaded from
    • \n
    • refseq_prefix: The refseq_prefix parameter is a string that specifies the prefix for the\ndownloaded RefSeq files. By default, it is set to \"ncbiRefSeq\". This prefix is used to identify the\nRefSeq files for each assembly. For example, if the prefix is set to \"ncbi, defaults to ncbiRefSeq
    • \n
    • refseq_files: The refseq_files parameter is a list of filenames that need to be downloaded\nfor each assembly. The default value is [\"ncbiRefSeq.txt\", \"ncbiRefSeqLink.txt\"], but you can\nprovide your own list of filenames if needed
    • \n
    • refseq_format_file: The refseq_format_file parameter is a string that specifies the\nfilename of the RefSeq file that needs to be formatted. This file will be used as input for the\ndatabases_format_refseq function. By default, the value is set to \"ncbiRefSeq.txt\", defaults to\nncbiRefSeq.txt
    • \n
    • refseq_format_file_output: The refseq_format_file_output parameter is a string that\nspecifies the output file path for the formatted RefSeq file. This file will be generated by the\ndatabases_format_refseq function and will contain the formatted RefSeq data. If this parameter is\nnot provided, the formatted RefSeq
    • \n
    • include_utr_5: A boolean parameter that specifies whether to include the 5' untranslated\nregion (UTR) in the downloaded RefSeq files. If set to True, the 5' UTR will be included. If set to\nFalse, the 5' UTR will be excluded, defaults to True
    • \n
    • include_utr_3: The include_utr_3 parameter is a boolean value that specifies whether to\ninclude the 3' untranslated region (UTR) in the downloaded RefSeq files. If set to True, the 3'\nUTR will be included. If set to False, the 3, defaults to True
    • \n
    • include_chrM: The include_chrM parameter is a boolean value that determines whether to\ninclude the mitochondrial chromosome (chrM) in the downloaded RefSeq files. If set to True, the chrM\nwill be included; if set to False, it will be excluded, defaults to True
    • \n
    • include_non_canonical_chr: The include_non_canonical_chr parameter is a boolean value that\ndetermines whether or not to include non-canonical chromosomes in the downloaded RefSeq files. If\nset to True, non-canonical chromosomes will be included. If set to False, non-canonical\nchromosomes will be excluded, defaults to True
    • \n
    • include_non_coding_transcripts: The include_non_coding_transcripts parameter is a boolean\nflag that determines whether non-coding transcripts should be included in the downloaded RefSeq\nfiles. If set to True, non-coding transcripts will be included. If set to False, non-coding\ntranscripts will be excluded, defaults to True
    • \n
    • include_transcript_ver: The include_transcript_ver parameter is a boolean value that\ndetermines whether to include the transcript version in the downloaded RefSeq files. If set to\nTrue, the transcript version will be included. If set to False, the transcript version will be\nexcluded, defaults to True
    • \n
    • threads: The threads parameter specifies the number of threads to use for downloading and\nextracting the RefSeq files. It determines the level of parallelism in the download and extraction\nprocess. By default, it is set to 1, which means that the download and extraction will be performed\nsequentially. If you want, defaults to 1
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_download_refseq returns a dictionary installed_refseq which\n contains information about the downloaded RefSeq files for each assembly. The keys of the dictionary\n are the assembly names, and the values are lists of the installed RefSeq files for each assembly.

    \n
    \n", "signature": "(\tassemblies: list,\trefseq_folder: str = None,\trefseq_url: str = None,\trefseq_prefix: str = 'ncbiRefSeq',\trefseq_files: List = ['ncbiRefSeq.txt', 'ncbiRefSeqLink.txt'],\trefseq_format_file: str = 'ncbiRefSeq.txt',\trefseq_format_file_output: str = None,\tinclude_utr_5: bool = True,\tinclude_utr_3: bool = True,\tinclude_chrM: bool = True,\tinclude_non_canonical_chr: bool = True,\tinclude_non_coding_transcripts: bool = True,\tinclude_transcript_ver: bool = True,\tthreads: int = 1,\tmemory: int = 1) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_format_refseq", "modulename": "howard.functions.databases", "qualname": "databases_format_refseq", "kind": "function", "doc": "

    The databases_format_refseq function takes a RefSeq file as input, formats it according to\nspecified criteria, and outputs the formatted file.

    \n\n
    Parameters
    \n\n
      \n
    • refseq_file: The refseq_file parameter is a string that represents the path to the input\nRefSeq file. This file contains information about gene annotations, including chromosome, start and\nend positions, strand, and other details
    • \n
    • output_file: The output_file parameter is a string that represents the name of the file\nwhere the formatted data will be written
    • \n
    • include_utr_5: The include_utr_5 parameter is a boolean that determines whether to include\nthe 5' UTR (untranslated region) in the output file. If set to True, the 5' UTR will be included.\nIf set to False, the 5' U, defaults to True
    • \n
    • include_utr_3: A boolean parameter that determines whether to include the 3' UTR\n(untranslated region) in the output. If set to True, the 3' UTR will be included. If set to False,\nthe 3' UTR will be excluded, defaults to True
    • \n
    • include_chrM: The include_chrM parameter is a boolean that determines whether to include\ntranscripts from the mitochondrial chromosome (chrM or chrMT) in the output file. If set to True,\ntranscripts from the mitochondrial chromosome will be included. If set to False, transcripts from\nthe mitochondrial chromosome will be excluded, defaults to True
    • \n
    • include_non_canonical_chr: The parameter include_non_canonical_chr determines whether or\nnot to include non-canonical chromosomes in the output. If set to True, non-canonical chromosomes\nwill be included. If set to False, non-canonical chromosomes will be excluded, defaults to True
    • \n
    • include_non_coding_transcripts: The parameter include_non_coding_transcripts determines\nwhether non-coding transcripts should be included in the output file. If set to True, non-coding\ntranscripts will be included. If set to False, non-coding transcripts will be excluded, defaults\nto True
    • \n
    • include_transcript_ver: The include_transcript_ver parameter determines whether to include\nthe transcript version in the output file. If set to True, the transcript version will be included\nin the output file. If set to False, the transcript version will be removed from the output file.\nThe default value is `True, defaults to True
    • \n
    • sort: The sort parameter determines whether to sort the output file in ascending order\nbased on the chromosome and start position. If set to True, the file will be sorted. If set to\nFalse, the file will not be sorted. The default value is False, defaults to False
    • \n
    • header: The header parameter is a boolean that determines whether to include a header line\nin the output file. If set to True, a header line will be included. If set to False, no header\nline will be included. The default value is False, defaults to False
    • \n
    • header_first_line: The header_first_line parameter is a boolean that determines whether to\ninclude the header line as the first line in the output file. If set to True, the header line will\nbe included as the first line. If set to False, the header line will not be included as the first,\ndefaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_format_refseq returns the path of the output file.

    \n
    \n", "signature": "(\trefseq_file: str,\toutput_file: str,\tinclude_utr_5: bool = True,\tinclude_utr_3: bool = True,\tinclude_chrM: bool = True,\tinclude_non_canonical_chr: bool = True,\tinclude_non_coding_transcripts: bool = True,\tinclude_transcript_ver: bool = True,\tsort: bool = False,\theader: bool = False,\theader_first_line: bool = True) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_dbnsfp", "modulename": "howard.functions.databases", "qualname": "databases_download_dbnsfp", "kind": "function", "doc": "

    The databases_download_dbnsfp function is used to download and process dbNSFP databases for\nspecified genome assemblies, generating Parquet and VCF files based on the provided configurations.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of genome assemblies for which to download and process dbNSFP data. Each\nassembly should be specified as a string
    • \n
    • dbnsfp_folder: The dbnsfp_folder parameter is a string that specifies the folder where the\ndbNSFP database files are located. If this parameter is not provided, the function will attempt to\ndownload the dbNSFP database files from the dbnsfp_url parameter
    • \n
    • dbnsfp_url: The dbnsfp_url parameter represents the URL from which the dbNSFP database\nfiles can be downloaded. This URL is used by the function to fetch the necessary database files for\nprocessing
    • \n
    • dbnsfp_release: The dbnsfp_release parameter specifies the version of the dbNSFP database\nto be used. The default value is \"4.4a\", but you can specify a different version if needed, defaults\nto 4.4a
    • \n
    • threads: The threads parameter specifies the number of threads to use for parallel\nprocessing. Increasing the number of threads can potentially speed up the execution time of the\nfunction, especially if there are multiple cores available on the machine. It determines how many\ntasks can be executed simultaneously
    • \n
    • memory: The memory parameter specifies the amount of maximum memory (in gigabytes) to use\nfor sorting. It is used in the context of processing and sorting data efficiently. The default value\nfor this parameter is set to 1, meaning that 1 gigabyte of memory will be allocated for sorting\noperations, defaults to 1
    • \n
    • parquet_size: The parquet_size parameter specifies the maximum size (in megabytes) of data\nfiles in the Parquet folder. It determines the size at which the Parquet files will be split or\ngenerated. The value should be an integer representing the size in megabytes, defaults to 100
    • \n
    • generate_parquet_file: The generate_parquet_file parameter in the\ndatabases_download_dbnsfp function is a boolean flag that indicates whether to generate a Parquet\nfile or not. If set to True, the function will create Parquet files based on the specified\nparameters and data. If set to `, defaults to False
    • \n
    • generate_sub_databases: The generate_sub_databases parameter in the\ndatabases_download_dbnsfp function determines whether to generate sub-databases based on the\nassemblies provided. If set to True, the function will create sub-databases based on the specified\ngenome assemblies. If set to False, the function, defaults to False
    • \n
    • generate_vcf_file: The generate_vcf_file parameter in the databases_download_dbnsfp\nfunction is a boolean flag that indicates whether to generate a VCF file based on the specified\nparameters and data. If set to True, the function will generate a VCF file. If set to `False,\ndefaults to False
    • \n
    • not_generate_files_all: The not_generate_files_all parameter in the\ndatabases_download_dbnsfp function is a boolean flag that indicates whether to skip generating\ndatabase Parquet/VCF files for the entire database. If set to True, the function will not generate\nfiles for the entire database. If set to, defaults to False
    • \n
    • genomes_folder: The genomes_folder parameter specifies the folder where the genome files\nare located. It is a string that represents the path to the folder containing genome assemblies
    • \n
    • add_info: The add_info parameter in the databases_download_dbnsfp function is a boolean\nflag that determines whether to include an \"INFO\" column in the Parquet folder/file. If set to\nTrue, the function will add an INFO column to the generated Parquet files. This INFO, defaults to\nFalse
    • \n
    • only_info: The only_info parameter in the databases_download_dbnsfp function is a boolean\nflag that, when set to True, indicates that only the \"INFO\" column should be included in the\noutput. This parameter is used to control whether to include only the \"INFO\" column in, defaults to\nFalse
    • \n
    • row_group_size: The row_group_size parameter specifies the row group size to generate the\nParquet folder and file. It is used to control the size of row groups in the Parquet file. This\nparameter affects the organization of data within the Parquet file and can impact performance and\nmemory usage during processing. The, defaults to 100000
    • \n
    • uniquify: The uniquify parameter in the databases_download_dbnsfp function is a boolean\nflag that determines whether to generate unique values for each annotation in the Parquet file. When\nset to True, the function will ensure that each annotation column contains only unique values.\nThis can be, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_download_dbnsfp returns a boolean value indicating whether the\n process of downloading and processing dbNSFP databases for specified genome assemblies was\n successful or not.

    \n
    \n", "signature": "(\tassemblies: list,\tdbnsfp_folder: str = None,\tdbnsfp_url: str = None,\tdbnsfp_release: str = '4.4a',\tthreads: int = None,\tmemory: int = 1,\tparquet_size: int = 100,\tgenerate_parquet_file: bool = False,\tgenerate_sub_databases: bool = False,\tgenerate_vcf_file: bool = False,\tnot_generate_files_all: bool = False,\tgenomes_folder: str = None,\tadd_info: bool = False,\tonly_info: bool = False,\trow_group_size: int = 100000,\tuniquify: bool = False) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_alphamissense", "modulename": "howard.functions.databases", "qualname": "databases_download_alphamissense", "kind": "function", "doc": "

    The databases_download_alphamissense function downloads and converts AlphaMissense databases for a\nlist of assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: assemblies is a list of assemblies for which the AlphaMissense database needs\nto be downloaded. Each assembly represents a specific genome or genetic sequence
    • \n
    • alphamissense_folder: The alphamissense_folder parameter is a string that specifies the\nfolder where the AlphaMissense files will be downloaded and stored. It is set to\nDEFAULT_ANNOTATIONS_FOLDER by default, which is likely a predefined constant or variable in your\ncode
    • \n
    • alphamissense_url: The alphamissense_url parameter is a string that specifies the URL where\nthe AlphaMissense files are located. It is used to construct the download URL for each assembly's\nAlphaMissense file
    • \n
    • threads: The threads parameter is an optional parameter that specifies the number of\nthreads to use for the conversion process. It determines the level of parallelism when converting\nthe AlphaMissense TSV file to the Parquet format. If not specified, the default value will be used
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_download_alphamissense returns a boolean value True.

    \n
    \n", "signature": "(\tassemblies: list,\talphamissense_folder: str = '/Users/lebechea/howard/databases/annotations/current',\talphamissense_url: str = 'https://storage.googleapis.com/dm_alphamissense',\tthreads: int = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_exomiser", "modulename": "howard.functions.databases", "qualname": "databases_download_exomiser", "kind": "function", "doc": "

    The databases_download_exomiser function downloads and sets up the Exomiser database for the\nspecified assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of assemblies for which to download Exomiser databases. Each assembly is a\nstring representing a genome build, such as \"GRCh37\" or \"GRCh38\"
    • \n
    • exomiser_folder: The exomiser_folder parameter is a string that specifies the folder where\nthe Exomiser databases will be downloaded and stored. If the folder does not exist, it will be\ncreated
    • \n
    • exomiser_application_properties: The exomiser_application_properties parameter is a string\nrepresenting the path to the Exomiser application properties file. This file contains configuration\nsettings for the Exomiser tool. If this parameter is not provided, the function will attempt to\nlocate the application properties file automatically based on the Exomiser
    • \n
    • exomiser_url: The exomiser_url parameter is the URL where the Exomiser database files can\nbe downloaded from. It is used to construct the download URLs for the phenotype and assembly files
    • \n
    • exomiser_release: The exomiser_release parameter is used to specify the version of the\nExomiser data to download. If it is set to \"default\", \"auto\", or \"config\", the function will attempt\nto retrieve the version from the exomiser.application.properties file. If it is
    • \n
    • exomiser_phenotype_release: The exomiser_phenotype_release parameter is used to specify the\nrelease version of the Exomiser phenotype database. If not provided, it will default to the value\nspecified in the application.properties file or the latest available release
    • \n
    • exomiser_remm_release: The exomiser_remm_release parameter is used to specify the version\nof the ReMM (Regulatory Mendelian Mutation) database to download. If the value is set to \"default\",\n\"auto\", or \"config\", it will try to retrieve the version from the application.properties
    • \n
    • exomiser_remm_url: The exomiser_remm_url parameter is the URL where the ReMM (Regulatory\nMendelian Mutation) database can be downloaded from. It is used in the function to construct the\ndownload URL for the ReMM database files, defaults to https://kircherlab.bihealth.org/download/ReMM
    • \n
    • exomiser_cadd_release: The exomiser_cadd_release parameter is used to specify the version\nof the CADD (Combined Annotation Dependent Depletion) database to download. If the value is set to\n\"default\", \"auto\", or \"config\", it will try to retrieve the version from the `exom
    • \n
    • exomiser_cadd_url: The exomiser_cadd_url parameter is the URL where the CADD (Combined\nAnnotation Dependent Depletion) database files can be downloaded from. It is used to construct the\ndownload URLs for the CADD database files, defaults to https://kircherlab.bihealth.org/download/CADD
    • \n
    • exomiser_cadd_url_snv_file: The parameter exomiser_cadd_url_snv_file is the name of the\nfile containing the SNV (Single Nucleotide Variant) data for the CADD (Combined Annotation Dependent\nDepletion) database, defaults to whole_genome_SNVs.tsv.gz
    • \n
    • exomiser_cadd_url_indel_file: The parameter exomiser_cadd_url_indel_file is the name of the\nINDEL file that will be downloaded from the CADD database, defaults to InDels.tsv.gz
    • \n
    • threads: The threads parameter specifies the number of threads to use for parallel\nprocessing. It determines how many tasks can be executed simultaneously. Increasing the number of\nthreads can potentially speed up the execution time of the function, especially if there are\nmultiple cores available on the machine
    • \n
    \n", "signature": "(\tassemblies: list,\texomiser_folder: str = '/Users/lebechea/howard/databases/exomiser/current',\texomiser_application_properties: str = None,\texomiser_url: str = 'http://data.monarchinitiative.org/exomiser',\texomiser_release: str = None,\texomiser_phenotype_release: str = None,\texomiser_remm_release: str = None,\texomiser_remm_url: str = 'https://kircherlab.bihealth.org/download/ReMM',\texomiser_cadd_release: str = None,\texomiser_cadd_url: str = 'https://kircherlab.bihealth.org/download/CADD',\texomiser_cadd_url_snv_file: str = 'whole_genome_SNVs.tsv.gz',\texomiser_cadd_url_indel_file: str = 'InDels.tsv.gz',\tthreads: int = 1) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_dbsnp", "modulename": "howard.functions.databases", "qualname": "databases_download_dbsnp", "kind": "function", "doc": "

    The function databases_download_dbsnp downloads dbSNP files, generates VCF files, and converts\nthem to Parquet format.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of genome assemblies for which to download dbSNP data
    • \n
    • dbsnp_folder: The folder where the dbSNP files will be downloaded and stored
    • \n
    • dbsnp_releases: List of releases to download. Default: [\"b156\"]
    • \n
    • dbsnp_release_default: Default release to link in default folder. Default: first release in dbsnp_releases
    • \n
    • dbsnp_url: The dbsnp_url parameter is a string that represents the base URL where the dbSNP\nfiles are located. This URL is used to construct the full URL for downloading the dbSNP files
    • \n
    • dbsnp_url_files: The dbsnp_url_files parameter is a dictionary that maps assembly names to\nspecific dbSNP URL files. It allows you to provide custom dbSNP URL files for specific assemblies\ninstead of using the default file naming convention
    • \n
    • dbsnp_url_files_prefix: The dbsnp_url_files_prefix parameter is a string that represents the\nprefix of the dbSNP file name for a specific assembly. It is used to construct the full URL of the\ndbSNP file to be downloaded. By default, the value is set to \"GCF_000001405\"
    • \n
    • dbsnp_assemblies_map: The dbsnp_assemblies_map parameter is a dictionary that maps assembly\nnames to their corresponding dbSNP versions. It is used to construct the dbSNP file name based on\nthe assembly name. For example, if the assembly is \"hg19\", the corresponding dbSNP version is \"
    • \n
    • genomes_folder: The genomes_folder parameter is a string that specifies the folder where the\ngenome index files are located. These index files are used for generating the VCF file from the\ndownloaded dbSNP file
    • \n
    • threads: The threads parameter specifies the number of threads to use for downloading and\nprocessing the dbSNP files, defaults to 1
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    • dbsnp_vcf: A boolean flag indicating whether to generate a VCF file from the downloaded\ndbSNP data. If set to True, the function will generate a VCF file. If set to False, the function\nwill not generate a VCF file, defaults to False
    • \n
    • dbsnpparquet: A boolean flag indicating whether to generate a Parquet file from the\ndownloaded dbSNP data. If set to True, a Parquet file will be generated; if set to False, no Parquet\nfile will be generated, defaults to False
    • \n
    \n", "signature": "(\tassemblies: list,\tdbsnp_folder: str = '/Users/lebechea/howard/databases/exomiser/dbsnp',\tdbsnp_releases: list = ['b156'],\tdbsnp_release_default: str = None,\tdbsnp_url: str = 'https://ftp.ncbi.nih.gov/snp/archive',\tdbsnp_url_files: dict = None,\tdbsnp_url_files_prefix: str = 'GCF_000001405',\tdbsnp_assemblies_map: dict = {'hg19': '25', 'hg38': '40'},\tgenomes_folder: str = '/Users/lebechea/howard/databases/genomes/current',\tthreads: int = 1,\tmemory: int = 1,\tdbsnp_vcf: bool = False,\tdbsnp_parquet: bool = False,\tdbsnp_parquet_explode_infos: bool = True) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_hgmd", "modulename": "howard.functions.databases", "qualname": "databases_download_hgmd", "kind": "function", "doc": "

    The databases_download_hgmd function converts an HGMD database file into VCF, Parquet, and TSV\nformats.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of assemblies for which the HGMD database should be downloaded and\nconverted. Only one assembly can be specified
    • \n
    • hgmd_file: The hgmd_file parameter is a string that represents the path to the HGMD\ndatabase file in VCF format. This file contains the variants and their associated information
    • \n
    • hgmd_folder: The hgmd_folder parameter is a string that represents the path to the folder\nwhere the HGMD database files will be stored. If no value is provided, it will use the\nDEFAULT_ANNOTATIONS_FOLDER constant as the default value
    • \n
    • output_basename: The output_basename parameter is a string that specifies the base name for\nthe output files. If not provided, it will be set as the base name of the input HGMD file without\nthe assembly information
    • \n
    • threads: The threads parameter specifies the number of threads to use for processing the\nHGMD database. It determines the level of parallelism and can help speed up the conversion process
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    • genomes_folder: The genomes_folder parameter is a string that specifies the folder where\nthe genome files are located. If this parameter is not provided, it will default to a constant value\nDEFAULT_GENOME_FOLDER
    • \n
    • to_parquet: The to_parquet parameter is a boolean value that specifies whether the HGMD\ndatabase should be converted to the Parquet format or not. If set to True, the database will be\nconverted to Parquet format. If set to False, the conversion will be skipped, defaults to True
    • \n
    • to_tsv: The to_tsv parameter is a boolean value that specifies whether the HGMD database\nshould be converted to TSV format or not. If set to True, the function will generate a TSV file\nfrom the HGMD database. If set to False, the TSV conversion will be, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the HGMD database conversion was successful or not.

    \n
    \n", "signature": "(\tassemblies: list,\thgmd_file: str,\thgmd_folder: str = '/Users/lebechea/howard/databases/annotations/current',\toutput_basename: str = None,\tthreads: int = None,\tmemory: int = 1,\tgenomes_folder: str = None,\tto_parquet: bool = True,\tto_tsv: bool = True) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar", "modulename": "howard.functions.from_annovar", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.from_annovar.TYPES", "modulename": "howard.functions.from_annovar", "qualname": "TYPES", "kind": "variable", "doc": "

    \n", "default_value": "{'int': 'Integer', 'int64': 'Integer', 'float': 'Float', 'float64': 'Float', 'object': 'String'}"}, {"fullname": "howard.functions.from_annovar.from_annovar", "modulename": "howard.functions.from_annovar", "qualname": "from_annovar", "kind": "function", "doc": "

    This function converts an Annovar database to a VCF and Parquet format.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object with several attributes representing the input parameters for the\nfunction. These attributes include:
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar.annovar_to_vcf", "modulename": "howard.functions.from_annovar", "qualname": "annovar_to_vcf", "kind": "function", "doc": "

    This function converts an ANNOVAR file to a VCF file and optionally to a Parquet file, with various\noptions for annotations, headers, databases, and memory usage.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path to the input file in ANNOVAR format that needs to be converted to VCF\nformat
    • \n
    • output_file: The name of the output VCF file that will be generated by the function
    • \n
    • output_file_parquet: output_file_parquet is an optional parameter that specifies the name of\nthe output file in Parquet format. If this parameter is not provided, the output will not be saved\nin Parquet format
    • \n
    • annotations: This parameter is used to specify the location of the ANNOVAR annotation\ndatabase files. If not provided, ANNOVAR will use the default location
    • \n
    • header_file: The path to a file containing the header information for the VCF output. This\ncan be used to customize the output format of the VCF file. If not provided, a default header will\nbe used
    • \n
    • database_name: The name of the ANNOVAR database used for annotation
    • \n
    • bcftools: The path to the bcftools executable, defaults to bcftools
    • \n
    • genome: The genome parameter specifies the reference genome file to be used for the\nconversion from annovar format to VCF format, defaults to hg19.fa
    • \n
    • threads: The number of threads to use for processing. This can speed up the process if your\ncomputer has multiple cores
    • \n
    • maxmem: The maximum amount of memory that can be used by the program. It is usually specified\nin units of bytes, kilobytes, megabytes, or gigabytes. For example, \"2G\" means 2 gigabytes of memory
    • \n
    • remove_annotations: remove_annotations is a list of annotations to be removed from the\noutput VCF file. These annotations will not be included in the final VCF file
    • \n
    • reduce_memory: A boolean parameter that determines whether to reduce memory usage during the\nconversion process. If set to True, the function will attempt to reduce memory usage by using a more\nmemory-efficient algorithm, but this may result in slower performance. If set to False, the function\nwill use a faster algorithm that may consume more, defaults to False
    • \n
    • multivariant: A boolean parameter that determines if input file contains multiple annotations\nfor each variant (position ref alt). If set to False, the function will attempt to reduce memory usage\na specific query without 'group by', for a more memory-efficient algorithm. If set to True, the function\nwill use a query using 'group by', which may consume more memory. I set to None, the function will\nauto-detemine the parameter value with a sample of variants. Defaults to None (auto)
    • \n
    \n", "signature": "(\tinput_file: str,\toutput_file: str,\toutput_file_parquet: str = None,\tannotations: str = None,\theader_file: str = None,\tdatabase_name: str = None,\tbcftools: str = 'bcftools',\tgenome: str = 'hg19.fa',\tthreads: int = None,\tmaxmem: str = None,\tremove_annotations: list = [],\treduce_memory: bool = None,\tmulti_variant: bool = None) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar.parquet_info_explode", "modulename": "howard.functions.from_annovar", "qualname": "parquet_info_explode", "kind": "function", "doc": "

    This function takes a parquet file, splits it by chromosome, explodes the INFO column, and then\nmerges the exploded files back together.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path to the input file, which can be either a TSV or VCF file
    • \n
    • output_file: The name of the output file in Parquet format after exploding the input file
    • \n
    • threads: The number of threads to use for processing the parquet file, defaults to None (all)
    • \n
    • memory: The among of memory to use for processing the parquet file, defaults to None (all)
    • \n
    • reduce_memory: The reduce_memory parameter is a boolean flag that determines whether or not\nto use memory reduction techniques during the execution of the function. If set to True, the\nfunction will attempt to reduce memory usage during the execution, which may result in slower\nperformance but lower memory usage. If set to `, defaults to False
    • \n
    \n", "signature": "(\tinput_file: str,\toutput_file: str,\tthreads: int = None,\tmemory: str = None,\treduce_memory: bool = False) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar.tsv_to_parquet", "modulename": "howard.functions.from_annovar", "qualname": "tsv_to_parquet", "kind": "function", "doc": "

    The function converts a TSV file to a Parquet file with customizable options.

    \n\n
    Parameters
    \n\n
      \n
    • tsv: The path to the TSV file that needs to be converted to Parquet format
    • \n
    • parquet: parquet is the file path and name of the output Parquet file that will be created\nby the function
    • \n
    • delim: The delimiter used in the TSV file to separate columns. If not specified, the default\ndelimiter (tab) will be used
    • \n
    • columns: The columns parameter is a dictionary that maps column names to their data types.\nIt is used to specify the schema of the resulting Parquet file. For example, if the input TSV file\nhas columns \"name\", \"age\", and \"salary\", and we want \"name\" to be
    • \n
    • quote: The quote parameter is an optional parameter that specifies the character used to\nquote fields in the TSV file. If not specified, the default quote character is double quotes (\")
    • \n
    • nullstr: The nullstr parameter is used to specify the string that represents null values in\nthe input TSV file. This parameter is used to correctly interpret and convert null values in the TSV\nfile to null values in the resulting Parquet file. For example, if the null value in the TSV
    • \n
    • skip: The skip parameter is an optional integer parameter that specifies the number of rows\nto skip at the beginning of the TSV file. This is useful if the TSV file has a header row that\nshould not be included in the resulting Parquet file. If skip is not specified, no
    • \n
    \n", "signature": "(\ttsv: str,\tparquet: str,\tdelim: str = None,\tcolumns: dict = None,\tquote: str = None,\tnullstr: str = None,\tskip: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann", "modulename": "howard.functions.from_extann", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.from_extann.create_metaheader", "modulename": "howard.functions.from_extann", "qualname": "create_metaheader", "kind": "function", "doc": "

    From extann file in dataframe, create metaheader of pseudo bed file\ninput: path of input extann\nconfig: dict\nextra_cols: list of column from refgene to keep

    \n", "signature": "(\tdf_extann: pandas.core.frame.DataFrame,\tinput: str,\tconfig: dict,\textra_cols=None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.add_default_metaheader", "modulename": "howard.functions.from_extann", "qualname": "add_default_metaheader", "kind": "function", "doc": "

    \n", "signature": "(fields, id):", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.read_json", "modulename": "howard.functions.from_extann", "qualname": "read_json", "kind": "function", "doc": "

    From json file to python dict

    \n", "signature": "(file: str) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.read_refgene", "modulename": "howard.functions.from_extann", "qualname": "read_refgene", "kind": "function", "doc": "

    \n", "signature": "(refgene: str) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.metaheader_rows", "modulename": "howard.functions.from_extann", "qualname": "metaheader_rows", "kind": "function", "doc": "

    INFO=

    \n\n

    fields: INFO, FORMAT....\nnumber: 0, 1, ., ...\ntype: String, Float, ....\ndescription: descprition of the field\nconf https://samtools.github.io/hts-specs/VCFv4.4.pdf

    \n", "signature": "(fields: str, id: str, number: str, type: str, description: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.replace_values", "modulename": "howard.functions.from_extann", "qualname": "replace_values", "kind": "function", "doc": "

    \n", "signature": "(input_string: str, config: dict) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.write_extann", "modulename": "howard.functions.from_extann", "qualname": "write_extann", "kind": "function", "doc": "

    Write ExtAnn into a bed like file and his hdr mate

    \n", "signature": "(\tparam,\theader,\toutput,\tdf_extann,\tdf_refgene,\textra_cols=None,\tmode=None,\tdf_transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.extann_to_info", "modulename": "howard.functions.from_extann", "qualname": "extann_to_info", "kind": "function", "doc": "

    from pandas series (row of dataframe) create the info field of the vcf from extann data per gene

    \n", "signature": "(record: pandas.core.series.Series) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_longest_transcript", "modulename": "howard.functions.from_extann", "qualname": "get_longest_transcript", "kind": "function", "doc": "

    From pandas dataframe containing one gene and many transcript and coordinate return the longest

    \n\n

    if there are many same size transcript keep the MANE

    \n", "signature": "(\tdf: pandas.core.frame.DataFrame,\textra_col=None) -> <built-in function any>:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_all_transcript", "modulename": "howard.functions.from_extann", "qualname": "get_all_transcript", "kind": "function", "doc": "

    Get all from trasncript from refgene matching gene name

    \n", "signature": "(\tmatch: pandas.core.frame.DataFrame,\textra_col=None) -> <built-in function any>:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_chosen_transcript", "modulename": "howard.functions.from_extann", "qualname": "get_chosen_transcript", "kind": "function", "doc": "

    From a txt / tsv file with gene and transcript, it will keep only provided transcript for this gene, if gene does not match it will take the longest

    \n", "signature": "(\tmatch: pandas.core.frame.DataFrame,\tdf_transcript: pandas.core.frame.DataFrame,\textra_col=None):", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_gene_coordinate", "modulename": "howard.functions.from_extann", "qualname": "get_gene_coordinate", "kind": "function", "doc": "

    From pandas dataframe containing refgene file, get chr start stop from each gene present in extann\ndo the same process for each gene/transcript it will lead to duplicate\ndf_refgene: refgene dataframe\ngene_row: pandas series of extann row\nlog

    \n", "signature": "(\tdf_refgene: pandas.core.frame.DataFrame,\tgene_row: pandas.core.series.Series,\textra_col=None,\tmode=None,\tdf_transcript=None) -> <built-in function any>:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.from_extann", "modulename": "howard.functions.from_extann", "qualname": "from_extann", "kind": "function", "doc": "

    This function converts an txt or tsv files containing genes-bases information

    \n\n

    From a \"genes\" columns which contains genes symbol it will match gene coordinates in refgene database and create a bed-like output with vcf header

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object with several attributes representing the input parameters for the\nfunction. These attributes include:
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.plugins", "modulename": "howard.functions.plugins", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.plugins.plugins_infos", "modulename": "howard.functions.plugins", "qualname": "plugins_infos", "kind": "function", "doc": "

    The plugins_infos function loads Python plugins from a specified directory and returns a\ndictionary mapping plugin names to their respective modules.

    \n\n
    Parameters
    \n\n
      \n
    • plugins_dir: The plugins_dir parameter in the plugins_infos function is a string that\nrepresents the directory where the plugins are located. This function loads Python plugins from the\nspecified directory and returns a dictionary containing the loaded plugins
    • \n
    • subfolder_plugins: The subfolder_plugins parameter in the plugins_infos function is a\nstring that represents the subfolder within the plugins_dir where the plugins are located. By\ndefault, the value of subfolder_plugins is set to \"plugins\". This parameter is used to specify the\nsubfolder, defaults to plugins
    • \n
    \n\n
    Returns
    \n\n
    \n

    A dictionary containing information about the loaded plugins is being returned. Each key in\n the dictionary represents the name of a plugin, and the corresponding value is a dictionary\n containing the attributes and functions defined in that plugin.

    \n
    \n", "signature": "(plugins_dir: str, subfolder_plugins: str = 'plugins') -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.plugins.plugins_list", "modulename": "howard.functions.plugins", "qualname": "plugins_list", "kind": "function", "doc": "

    The plugins_list function loads plugin information from a specified directory and determines which\nplugins are enabled based on a dictionary of plugin data.

    \n\n
    Parameters
    \n\n
      \n
    • plugins: The plugins parameter is a dictionary containing information about various\nplugins. Each key in the dictionary represents the name of a plugin, and the corresponding value is\na dictionary containing data about that plugin
    • \n
    • plugins_dir: The plugins_dir parameter is a string that represents the directory where the\nplugins are located. This directory is used by the list_plugins function to locate the plugins and\ngather information about them
    • \n
    • subfolder_plugins: The subfolder_plugins parameter in the plugins_list function is a\nstring that represents the subfolder within the plugins_dir where the plugins are located. By\ndefault, the value of subfolder_plugins is set to \"plugins\". This parameter is used to specify the\nsubfolder, defaults to plugins
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function plugins_list returns a dictionary plugin_info containing information about\n each plugin specified in the plugins parameter. The information includes whether the plugin is\n enabled (based on whether it is in the list of enabled plugins obtained from the specified\n directory), as well as any additional data provided for each plugin in the plugins dictionary.

    \n
    \n", "signature": "(\tplugins: dict,\tplugins_dir: str,\tsubfolder_plugins: str = 'plugins') -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.plugins.plugins_to_load", "modulename": "howard.functions.plugins", "qualname": "plugins_to_load", "kind": "function", "doc": "

    The plugins_to_load function filters a dictionary of plugins based on their \"enabled\" and\n\"__enabled__\" keys.

    \n\n
    Parameters
    \n\n
      \n
    • plugins_list_dict: The plugins_list_dict parameter is a dictionary containing information\nabout various plugins. Each key in the dictionary represents the name of a plugin, and the\ncorresponding value is another dictionary with plugin information. The plugin information dictionary\nmay contain keys such as \"enabled\" and \"__enabled__\" to indicate whether the
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function plugins_to_load returns a dictionary containing plugins that are enabled\n based on the input plugins_list_dict. The plugins are selected based on the values of the\n \"enabled\" and \"__enabled__\" keys in the nested dictionaries within the input dictionary.

    \n
    \n", "signature": "(plugins_list_dict: dict) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.utils", "modulename": "howard.functions.utils", "kind": "module", "doc": "

    Helper functions.

    \n"}, {"fullname": "howard.functions.utils.read_refgene", "modulename": "howard.functions.utils", "qualname": "read_refgene", "kind": "function", "doc": "

    The function \"read_refgene\" reads a genePred file with an extra column at the front and returns the\ngenePred data.

    \n\n

    refGene = genePred with extra column at front (and ignored ones after)

    \n\n
    Parameters
    \n\n
      \n
    • infile: The input file containing the refGene data
    • \n
    \n\n
    Returns
    \n\n
    \n

    the result of calling the function read_genepred with the argument infile and the\n keyword argument skip_first_column set to True.

    \n
    \n", "signature": "(infile):", "funcdef": "def"}, {"fullname": "howard.functions.utils.read_genepred", "modulename": "howard.functions.utils", "qualname": "read_genepred", "kind": "function", "doc": "

    The function read_genepred reads a file in GenePred extension format and yields a dictionary for\neach line, containing information about a gene.

    \n\n
    Parameters
    \n\n
      \n
    • infile: The infile parameter is the input file object that contains the gene annotation\ndata in the GenePred format. It is used to read the lines of the file and extract the necessary\ninformation
    • \n
    • skip_first_column: The skip_first_column parameter is a boolean flag that determines\nwhether to skip the first column of the input file when parsing the genePred format. By default, it\nis set to False, which means the first column (usually the transcript ID) will be included in the\noutput. If you, defaults to False (optional)
    • \n
    \n\n

    GenePred extension format:\nhttp://genome.ucsc.edu/FAQ/FAQformat.html#GenePredExt

    \n\n

    Column definitions:

    \n\n
      \n
    1. string name; \"Name of gene (usually transcript_id from GTF)\"
    2. \n
    3. string chrom; \"Chromosome name\"
    4. \n
    5. char[1] strand; \"+ or - for strand\"
    6. \n
    7. uint txStart; \"Transcription start position\"
    8. \n
    9. uint txEnd; \"Transcription end position\"
    10. \n
    11. uint cdsStart; \"Coding region start\"
    12. \n
    13. uint cdsEnd; \"Coding region end\"
    14. \n
    15. uint exonCount; \"Number of exons\"
    16. \n
    17. uint[exonCount] exonStarts; \"Exon start positions\"
    18. \n
    19. uint[exonCount] exonEnds; \"Exon end positions\"
    20. \n
    21. uint id; \"Unique identifier\"
    22. \n
    23. string name2; \"Alternate name (e.g. gene_id from GTF)\"
    24. \n
    \n", "signature": "(infile, skip_first_column=False):", "funcdef": "def"}, {"fullname": "howard.functions.utils.make_transcript", "modulename": "howard.functions.utils", "qualname": "make_transcript", "kind": "function", "doc": "

    The function make_transcript takes a JSON object representing a transcript and creates a\nTranscript object from it.

    \n\n
    Parameters
    \n\n
      \n
    • transcript_json: The transcript_json parameter is a JSON object that contains information\nabout a transcript. It should have the following keys:
    • \n
    \n\n
    Returns
    \n\n
    \n

    a Transcript object.

    \n
    \n", "signature": "(transcript_json):", "funcdef": "def"}, {"fullname": "howard.functions.utils.json_perfect_exons_to_cdna_match", "modulename": "howard.functions.utils", "qualname": "json_perfect_exons_to_cdna_match", "kind": "function", "doc": "

    The function json_perfect_exons_to_cdna_match converts a list of ordered exons into a list of cDNA\nmatches, where each match consists of the start and end positions of the exon, the start and end\npositions of the corresponding cDNA sequence, and an optional gap list.

    \n\n

    Perfectly matched exons are basically a no-gap case of cDNA match\nsingle - use a single cDNA match (deletions for introns) - this is currently broken do not use

    \n\n
    Parameters
    \n\n
      \n
    • ordered_exons: A list of tuples representing the start and end positions of exons in a gene\nsequence. The exons should be ordered based on their position in the gene
    • \n
    • single: The single parameter is a boolean flag that determines whether to use a single cDNA\nmatch or not. If single is set to True, the function will create a single cDNA match by\nconsidering deletions for introns. If single is set to False (, defaults to False (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of lists, where each inner list represents a cDNA match. Each inner list contains\n the start and end positions of the exon, the start and end positions of the corresponding cDNA\n match, and a string representing any gaps (intron lengths) between exons.

    \n
    \n", "signature": "(ordered_exons, single=False):", "funcdef": "def"}, {"fullname": "howard.functions.utils.read_transcripts", "modulename": "howard.functions.utils", "qualname": "read_transcripts", "kind": "function", "doc": "

    The function read_transcripts reads all transcripts in a RefGene file and returns them as a\ndictionary.

    \n\n
    Parameters
    \n\n
      \n
    • refgene_file: The refgene_file parameter is the file path to a RefGene file. This file\ncontains information about gene transcripts, such as their names, full names, and other relevant\ndetails. The read_transcripts function reads this file and returns a dictionary of transcripts,\nwhere the keys are the
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary of transcripts.

    \n
    \n", "signature": "(refgene_file):", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_genomic_sequence", "modulename": "howard.functions.utils", "qualname": "get_genomic_sequence", "kind": "function", "doc": "

    The function get_genomic_sequence returns a sequence for a given genomic region.

    \n\n
    Parameters
    \n\n
      \n
    • genome: A dictionary containing genomic sequences for different chromosomes. The keys of the\ndictionary are chromosome names (e.g., 'chr1', 'chr2', etc.), and the values are the corresponding\ngenomic sequences
    • \n
    • chrom: The chrom parameter represents the chromosome or genomic region from which you want to\nextract the sequence
    • \n
    • start: The start parameter is the 1-based coordinate of the beginning of the genomic region
    • \n
    • end: The end parameter is the end coordinate of the genomic region. It is a 1-based,\nend-inclusive coordinate, meaning that the base at the end position is included in the returned\nsequence
    • \n
    \n\n
    Returns
    \n\n
    \n

    a sequence for the specified genomic region.

    \n
    \n", "signature": "(genome, chrom, start, end):", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_vcf_allele", "modulename": "howard.functions.utils", "qualname": "get_vcf_allele", "kind": "function", "doc": "

    The function get_vcf_allele takes a HGVS name, a genome, and an optional transcript, and returns a\nVCF-style allele.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is an object of type HGVSName. It likely contains information\nabout a genetic variant, such as the chromosome, start and end positions, and the type of mutation\n(e.g., substitution, deletion, insertion, etc.)
    • \n
    • genome: The genome parameter is the genomic sequence from which the allele will be\nextracted. It is a string representing the entire genome sequence
    • \n
    • transcript: The transcript parameter is an optional argument that represents a transcript.\nIt is used to retrieve the VCF-style allele from the given HGVSName and genome. If a transcript is\nprovided, the function will use it to get the VCF coordinates and the reference and alternate\nalleles. If no
    • \n
    \n\n
    Returns
    \n\n
    \n

    the chromosome, start position, end position, reference allele, and alternate allele.

    \n
    \n", "signature": "(hgvs, genome, transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_alt_from_sequence", "modulename": "howard.functions.utils", "qualname": "get_alt_from_sequence", "kind": "function", "doc": "

    The function \"get_alt_from_sequence\" returns a genomic sequence from a given HGVS notation, genome,\nand transcript.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is an object that provides methods for working with Human Genome\nVariation Society (HGVS) nomenclature. It likely has a method called get_raw_coords() that takes a\ntranscript as input and returns the chromosome, start position, and end position of the\ncorresponding genomic sequence
    • \n
    • genome: The genome parameter refers to the genomic sequence from which the alternative allele\nwill be extracted
    • \n
    • transcript: The transcript parameter is a string that represents the transcript ID or name
    • \n
    \n\n
    Returns
    \n\n
    \n

    the genomic sequence from the specified region in the genome.

    \n
    \n", "signature": "(hgvs, genome, transcript):", "funcdef": "def"}, {"fullname": "howard.functions.utils.matches_ref_allele", "modulename": "howard.functions.utils", "qualname": "matches_ref_allele", "kind": "function", "doc": "

    The function matches_ref_allele checks if the reference allele in a given HGVS notation matches\nthe corresponding genomic sequence.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is an object that represents a variant in the Human Genome\nVariation Society (HGVS) format. It contains information about the variant's reference allele,\nalternative allele, and genomic coordinates
    • \n
    • genome: The genome parameter is the genomic sequence from which the reference allele is\nextracted
    • \n
    • transcript: The transcript parameter is an object that represents a transcript. It has a\nproperty called tx_position which provides information about the position of the transcript on the\ngenome, including whether it is on the forward or reverse strand
    • \n
    \n\n
    Returns
    \n\n
    \n

    True if the reference allele matches the genomic sequence, and False otherwise.

    \n
    \n", "signature": "(hgvs, genome, transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.utils.hgvs_justify_dup", "modulename": "howard.functions.utils", "qualname": "hgvs_justify_dup", "kind": "function", "doc": "

    The function hgvs_justify_dup determines if an allele is a duplication and justifies it by\nreturning the duplicated region if applicable.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome name where the allele is located
    • \n
    • offset: The offset parameter is the 1-index genomic coordinate, which represents the position\nof the variant on the chromosome
    • \n
    • ref: The \"ref\" parameter represents the reference allele, which is the allele that is present\nin the reference genome at the given genomic coordinate
    • \n
    • alt: The alt parameter represents the alternate allele, which is the allele that differs\nfrom the reference allele at a specific genomic position
    • \n
    • genome: The genome parameter is a pygr compatible genome object. It is an object that\nrepresents a reference genome and provides methods to access genomic sequences
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the chromosome name, offset, reference allele, alternate allele, and\n mutation type.

    \n
    \n", "signature": "(chrom, offset, ref, alt, genome):", "funcdef": "def"}, {"fullname": "howard.functions.utils.hgvs_justify_indel", "modulename": "howard.functions.utils", "qualname": "hgvs_justify_indel", "kind": "function", "doc": "

    The function hgvs_justify_indel justifies an indel (insertion or deletion) according to the HGVS\nstandard by determining the genomic sequence around the lesion, identifying the actual lesion\nsequence, and 3' justifying the offset.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome where the indel is located
    • \n
    • offset: The offset parameter represents the position of the indel (insertion or deletion)\nwithin the chromosome or genomic sequence
    • \n
    • ref: The ref parameter represents the reference allele of the variant. It is a string that\ncontains the nucleotide sequence of the reference allele
    • \n
    • alt: The alt parameter in the hgvs_justify_indel function represents the alternate allele\nsequence for an indel variant. It is the sequence that replaces the reference allele sequence\n(ref) at the specified offset position on the chrom chromosome
    • \n
    • strand: The parameter \"strand\" represents the orientation of the DNA strand where the indel\nis located. It can have two possible values: \"+\" or \"-\". The \"+\" strand refers to the forward\nstrand, while the \"-\" strand refers to the reverse complement strand
    • \n
    • genome: The genome parameter is a dictionary that contains the genomic sequence for each\nchromosome. The keys of the dictionary are the chromosome names (e.g., \"chr1\", \"chr2\", etc.), and\nthe values are the corresponding genomic sequences
    • \n
    \n\n
    Returns
    \n\n
    \n

    the variables chrom, offset, ref, and alt.

    \n
    \n", "signature": "(chrom, offset, ref, alt, strand, genome):", "funcdef": "def"}, {"fullname": "howard.functions.utils.hgvs_normalize_variant", "modulename": "howard.functions.utils", "qualname": "hgvs_normalize_variant", "kind": "function", "doc": "

    The function hgvs_normalize_variant converts a variant in VCF-style to HGVS-style by adjusting the\noffset, reference and alternate alleles, and determining the mutation type.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome where the variant is located
    • \n
    • offset: The offset parameter represents the position of the variant within the chromosome. It\nis an integer value
    • \n
    • ref: The ref parameter represents the reference allele in a variant
    • \n
    • alt: The alt parameter represents the alternate allele in a variant. It is a string that\nrepresents the alternative nucleotide(s) or sequence(s) at a specific position in the genome
    • \n
    • genome: The genome parameter is the reference genome sequence. It is used to perform\ncertain operations on the variant, such as justifying indels and representing duplications
    • \n
    • transcript: The transcript parameter is an optional argument that represents the transcript\nor gene in which the variant occurs. It is used to determine the strand of the gene and to perform\ncertain operations on the variant. If no transcript is provided, the default value is None
    • \n
    \n\n
    Returns
    \n\n
    \n

    the following values: chrom, offset, ref, alt, and mutation_type.

    \n
    \n", "signature": "(chrom, offset, ref, alt, genome, transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.utils.parse_hgvs_name", "modulename": "howard.functions.utils", "qualname": "parse_hgvs_name", "kind": "function", "doc": "

    The function parse_hgvs_name takes an HGVS name, a genome object, and optional parameters, and\nreturns the chromosome, start position, reference allele, and alternate allele of the variant\ndescribed by the HGVS name.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs_name: The HGVS name to parse
    • \n
    • genome: A pygr compatible genome object. This object represents the reference genome and\nprovides methods to access genomic sequences and annotations
    • \n
    • transcript: The transcript parameter is an optional argument that represents the transcript\ncorresponding to the HGVS name. It is used to determine the reference sequence for the variant. If\nnot provided, the get_transcript function is used to retrieve the transcript based on the HGVS name.\nIf neither transcript nor get_transcript is
    • \n
    • get_transcript: A function that takes a transcript name as input and returns the\ncorresponding transcript object. If not provided, the default behavior is to return None
    • \n
    • flank_length: The flank_length parameter is an integer that specifies the length of the\nflanking sequence to include when normalizing the variant allele. This is used in the\nnormalize_variant function to determine the reference allele and normalize the variant allele\naccording to the VCF standard, defaults to 30 (optional)
    • \n
    • normalize: A boolean parameter that determines whether the allele should be normalized\naccording to the VCF standard. If set to True, the allele will be normalized; if set to False, the\nallele will not be normalized, defaults to True (optional)
    • \n
    • lazy: The lazy parameter is a boolean flag that determines whether or not to discard\nversion information from the incoming transcript or gene. If lazy is set to True, the version\ninformation will be discarded. If lazy is set to False, the version information will be included\nin the, defaults to False (optional)
    • \n
    • indels_start_with_same_base: The parameter \"indels_start_with_same_base\" is a boolean flag\nthat determines whether or not to strip the common prefix from indels when normalizing alleles. If\nset to True, the common prefix will not be stripped, defaults to True (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function parse_hgvs_name returns a tuple containing the chromosome, start position,\n reference allele, and alternate allele of the parsed HGVS name.

    \n
    \n", "signature": "(\thgvs_name,\tgenome,\ttranscript=None,\tget_transcript=<function <lambda>>,\tflank_length=30,\tnormalize=True,\tlazy=False,\tindels_start_with_same_base=True):", "funcdef": "def"}, {"fullname": "howard.functions.utils.cdna_to_protein", "modulename": "howard.functions.utils", "qualname": "cdna_to_protein", "kind": "function", "doc": "

    The function cdna_to_protein takes in various parameters related to a genetic mutation and returns\nan updated HGVS object with additional protein information.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The parameter hgvs is an object that represents a variant in the Human Genome\nVariation Society (HGVS) format. It contains information about the variant, such as the cDNA start\nand end positions
    • \n
    • offset: The offset is a numerical value that represents the starting position of the genomic\nsequence in the reference genome. It is used to calculate the genomic position of the mutation
    • \n
    • genome: The genome parameter is a dictionary that represents the genomic sequence. It\ncontains the chromosome as the key and the corresponding DNA sequence as the value
    • \n
    • chrom: The chrom parameter represents the chromosome on which the mutation occurs
    • \n
    • transcript: The transcript parameter is a string that represents the transcript ID or name.\nIt is used to identify the specific transcript in the genome
    • \n
    • ref: The parameter \"ref\" is a string that represents the reference nucleotide sequence. It is\nused to determine the codons in the DNA sequence
    • \n
    • alt: The alt parameter in the cdna_to_protein function is a string that represents the\nalternate nucleotide sequence for a mutation
    • \n
    • mutation_type: The mutation_type parameter is a string that represents the type of\nmutation. It can have the following values:
    • \n
    • codon_type: The codon_type parameter is a string that specifies the type of codon\ntranslation to be used. It can have one of the following values:, defaults to 3
    • \n
    \n\n
    Returns
    \n\n
    \n

    the updated hgvs object.

    \n
    \n", "signature": "(\thgvs,\toffset,\tgenome,\tchrom,\ttranscript,\tref,\talt,\tmutation_type,\tcodon_type: str = '3'):", "funcdef": "def"}, {"fullname": "howard.functions.utils.variant_to_hgvs_name", "modulename": "howard.functions.utils", "qualname": "variant_to_hgvs_name", "kind": "function", "doc": "

    The function variant_to_hgvs_name takes in genomic coordinates, alleles, and other parameters, and\nreturns a HGVS-style name for the variant.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome name where the variant is located
    • \n
    • offset: The offset parameter represents the genomic offset of the allele. It is the\nposition of the variant on the chromosome
    • \n
    • ref: The reference allele at the given genomic coordinate
    • \n
    • alt: The alt parameter is the alternate allele. In genetics, a variant or mutation can\noccur at a specific position in the genome, and the alt allele represents the alternative\nnucleotide or sequence at that position compared to the reference genome
    • \n
    • genome: A pygr compatible genome object, which represents the reference genome sequence
    • \n
    • transcript: The transcript parameter is the transcript corresponding to the allele. It is\nused to determine the type of coordinates to use in the HGVS name (either genomic coordinates or\ncDNA coordinates). If the transcript is not available, the function will use genomic coordinates
    • \n
    • transcript_protein: The transcript_protein parameter is an optional argument that\nrepresents the protein sequence corresponding to the transcript. It is used to populate the\ntranscript_protein attribute of the HGVSName object
    • \n
    • exon: The exon parameter is an optional argument that represents the exon number or\nidentifier associated with the variant. It is used to populate the exon attribute of the\nHGVSName object. If provided, it will be included in the final HGVS name generated by the function
    • \n
    • max_allele_length: The max_allele_length parameter is used to determine whether to\nrepresent the alleles as their actual sequence or as the length of the sequence. If the length of\nthe reference allele or alternate allele is greater than max_allele_length, then the length of the\nallele is used instead of the actual, defaults to 4 (optional)
    • \n
    • use_counsyl: A boolean flag indicating whether to use Counsyl-specific rules for single-base\nindels, defaults to False (optional)
    • \n
    • codon_type: The parameter codon_type is a string that specifies the type of codon numbering\nto be used in the HGVS name. It is used in the cdna_to_protein function to determine the type of\ncodon numbering to be used in the protein-level HGVS name. The, defaults to 3
    • \n
    \n\n
    Returns
    \n\n
    \n

    an object of type HGVSName.

    \n
    \n", "signature": "(\tchrom,\toffset,\tref,\talt,\tgenome,\ttranscript,\ttranscript_protein=None,\texon=None,\tmax_allele_length=4,\tuse_counsyl=False,\tcodon_type: str = '3'):", "funcdef": "def"}, {"fullname": "howard.functions.utils.format_hgvs_name", "modulename": "howard.functions.utils", "qualname": "format_hgvs_name", "kind": "function", "doc": "

    The format_hgvs_name function generates a HGVS name from a genomic coordinate.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chrom parameter represents the chromosome name. It is a string that specifies\nthe chromosome on which the variant occurs
    • \n
    • offset: The offset parameter represents the genomic offset of the allele, which is the\nposition of the variant on the chromosome. It is used to generate the HGVS name based on the genomic\ncoordinate
    • \n
    • ref: The ref parameter represents the reference allele. In genetics, a variant or mutation\ncan occur at a specific position in the genome, resulting in a change from the reference allele to\nan alternate allele. The ref parameter specifies the sequence of the reference allele at that\nposition
    • \n
    • alt: The alt parameter represents the alternate allele. In genetics, a variant or mutation\ncan occur at a specific position in the genome, resulting in a change from the reference allele to\nan alternate allele. The alt parameter specifies the sequence of the alternate allele at that\nposition
    • \n
    • genome: A pygr compatible genome object, which is used to retrieve genomic sequences and\nannotations. It provides methods to access genomic information such as chromosome names, sequences,\nand gene annotations
    • \n
    • transcript: The transcript parameter is the transcript corresponding to the allele. It is\nused to generate the HGVS name based on the genomic coordinate
    • \n
    • transcript_protein: The transcript_protein parameter is an optional argument that\nrepresents the protein transcript corresponding to the cDNA transcript. It is used to generate the\nprotein HGVS name if it exists
    • \n
    • exon: The exon parameter is used to specify the exon number in the HGVS name. It is an\noptional parameter and is used to generate a more specific HGVS name when needed
    • \n
    • use_prefix: A boolean indicating whether to include a transcript/gene/chromosome prefix in\nthe HGVS name. If set to True, the prefix will be included; if set to False, the prefix will be\nexcluded, defaults to True (optional)
    • \n
    • use_gene: A boolean parameter that determines whether to include the gene name in the HGVS\nprefix. If set to True, the gene name will be included; if set to False, the gene name will be\nexcluded, defaults to True (optional)
    • \n
    • use_protein: A boolean parameter that determines whether to include protein HGVS notation in\nthe generated HGVS name. If set to True, the protein HGVS notation will be included if it exists. If\nset to False, only the genomic and transcript HGVS notation will be included, defaults to False\n(optional)
    • \n
    • use_counsyl: The use_counsyl parameter is a boolean parameter that determines whether to\nuse Counsyl-specific formatting for the HGVS name. If set to True, the HGVS name will be formatted\naccording to Counsyl's specific guidelines. If set to False, the HGVS name will be, defaults to\nFalse (optional)
    • \n
    • max_allele_length: The max_allele_length parameter is used to determine the maximum length\nof the allele. If the length of the allele is greater than the specified max_allele_length, then\nthe allele length will be used in the HGVS name instead of the actual allele sequence. By default,\nthe `, defaults to 4 (optional)
    • \n
    • full_format: A boolean parameter that determines whether to use the full HGVS format or not.\nIf set to True, the HGVS name will include the gene name, transcript name, exon number (if\nprovided), and the amino acid change (if protein information is available). If set to False, the\nHGVS, defaults to False (optional)
    • \n
    • use_version: A boolean parameter that determines whether to include the version number of the\ntranscript in the HGVS name. If set to True, the version number will be included; if set to False,\nthe version number will be excluded, defaults to False (optional)
    • \n
    • codon_type: The codon_type parameter is a string that specifies the type of codon numbering\nto be used in the HGVS name. It can have one of the following values:, defaults to 3
    • \n
    \n\n
    Returns
    \n\n
    \n

    a formatted HGVS name generated from a genomic coordinate.

    \n
    \n", "signature": "(\tchrom,\toffset,\tref,\talt,\tgenome,\ttranscript,\ttranscript_protein=None,\texon=None,\tuse_prefix=True,\tuse_gene=True,\tuse_protein=False,\tuse_counsyl=False,\tmax_allele_length=4,\tfull_format=False,\tuse_version=False,\tcodon_type: str = '3'):", "funcdef": "def"}, {"fullname": "howard.functions.utils.create_refseq_table", "modulename": "howard.functions.utils", "qualname": "create_refseq_table", "kind": "function", "doc": "

    The function create_refseq_table creates a table in a database with the specified name and\nstructure, either using a file or without a file.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The conn parameter is a connection object that represents a connection to a database.\nIt is used to execute SQL queries and interact with the database
    • \n
    • refseq_table: The refseq_table parameter is a string that specifies the name of the table\nthat will be created in the database to store the RefGene data, defaults to refseq
    • \n
    • refseq_file: The refseq_file parameter is a string that specifies the path to a file\ncontaining the data for the refGene table. If this parameter is provided, the function will create\nthe refGene table in the database using the data from the file. If this parameter is not provided,\nthe function will
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the refseq table that was created or used.

    \n
    \n", "signature": "(conn, refseq_table: str = 'refseq', refseq_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_refseq_table", "modulename": "howard.functions.utils", "qualname": "get_refseq_table", "kind": "function", "doc": "

    The function get_refseq_table checks if a table named refseq exists in a database, and if not,\ncreates it using the create_refseq_table function.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The parameter conn is expected to be a connection object that allows you to interact\nwith a database. It could be an instance of a database connector class, such as pymysql.connect()\nfor MySQL or psycopg2.connect() for PostgreSQL
    • \n
    • refseq_table: The parameter \"refseq_table\" is a string that specifies the name of the table\nin the database where the refGene data will be stored. If this table already exists in the database,\nthe function will return the name of the existing table. If the table does not exist, the function\nwill create, defaults to refseq
    • \n
    • refseq_file: The refseq_file parameter is the name or path of the file that contains the\nrefGene data. This file is used to populate the refGene table in the database
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the refseq_table.

    \n
    \n", "signature": "(conn, refseq_table: str = 'refseq', refseq_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_transcript", "modulename": "howard.functions.utils", "qualname": "get_transcript", "kind": "function", "doc": "

    The function get_transcript takes a dictionary of transcripts and a name as input, and returns the\ntranscript associated with that name.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts: A dictionary containing transcripts as values, with names as keys
    • \n
    • name: The name parameter is a string that represents the name of the transcript that you want\nto retrieve from the transcripts dictionary
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value associated with the given name key in the transcripts dictionary.

    \n
    \n", "signature": "(\ttranscripts: dict,\ttranscript_name: str) -> howard.objects.transcript.Transcript:", "funcdef": "def"}, {"fullname": "howard.main", "modulename": "howard.main", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.main.msg_gui_disable", "modulename": "howard.main", "qualname": "msg_gui_disable", "kind": "variable", "doc": "

    \n", "default_value": "'HOWARD GUI disabled'"}, {"fullname": "howard.main.main_folder", "modulename": "howard.main", "qualname": "main_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard'"}, {"fullname": "howard.main.main", "modulename": "howard.main", "qualname": "main", "kind": "function", "doc": "

    It loads a VCF file in multiple format (VCF, parquet, DB), and process, query, export data

    \n", "signature": "() -> None:", "funcdef": "def"}, {"fullname": "howard.objects", "modulename": "howard.objects", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.cdna", "modulename": "howard.objects.cdna", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.cdna.CDNA_START_CODON", "modulename": "howard.objects.cdna", "qualname": "CDNA_START_CODON", "kind": "variable", "doc": "

    \n", "default_value": "'cdna_start'"}, {"fullname": "howard.objects.cdna.CDNA_STOP_CODON", "modulename": "howard.objects.cdna", "qualname": "CDNA_STOP_CODON", "kind": "variable", "doc": "

    \n", "default_value": "'cdna_stop'"}, {"fullname": "howard.objects.cdna.CDNACoord", "modulename": "howard.objects.cdna", "qualname": "CDNACoord", "kind": "class", "doc": "

    A HGVS cDNA-based coordinate.

    \n\n

    A cDNA coordinate can take one of these forms:

    \n\n

    N = nucleotide N in protein coding sequence (e.g. 11A>G)

    \n\n

    -N = nucleotide N 5' of the ATG translation initiation codon (e.g. -4A>G)\n NOTE: so located in the 5'UTR or 5' of the transcription initiation\n site (upstream of the gene, incl. promoter)

    \n\n

    *N = nucleotide N 3' of the translation stop codon (e.g. *6A>G)\n NOTE: so located in the 3'UTR or 3' of the polyA-addition site\n (including downstream of the gene)

    \n\n

    N+M = nucleotide M in the intron after (3' of) position N in the coding DNA\n reference sequence (e.g. 30+4A>G)

    \n\n

    N-M = nucleotide M in the intron before (5' of) position N in the coding\n DNA reference sequence (e.g. 301-2A>G)

    \n\n

    -N+M / -N-M = nucleotide in an intron in the 5'UTR (e.g. -45+4A>G)

    \n\n

    *N+M / *N-M = nucleotide in an intron in the 3'UTR (e.g. *212-2A>G)

    \n"}, {"fullname": "howard.objects.cdna.CDNACoord.__init__", "modulename": "howard.objects.cdna", "qualname": "CDNACoord.__init__", "kind": "function", "doc": "

    coord: main coordinate along cDNA on the same strand as the transcript

    \n\n

    offset: an additional genomic offset from the main coordinate. This\n allows referencing non-coding (e.g. intronic) positions.\n Offset is also interpreted on the coding strand.

    \n\n

    landmark: ('cdna_start', 'cdna_stop') indicating that 'coord'\n is relative to one of these landmarks.

    \n\n

    string: a coordinate from an HGVS name. If given coord, offset, and\n landmark should not be specified.

    \n", "signature": "(coord=0, offset=0, landmark='cdna_start', string='')"}, {"fullname": "howard.objects.cdna.CDNACoord.parse", "modulename": "howard.objects.cdna", "qualname": "CDNACoord.parse", "kind": "function", "doc": "

    Parse a HGVS formatted cDNA coordinate.

    \n", "signature": "(self, coord_text):", "funcdef": "def"}, {"fullname": "howard.objects.database", "modulename": "howard.objects.database", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.database.SEP_TYPE", "modulename": "howard.objects.database", "qualname": "SEP_TYPE", "kind": "variable", "doc": "

    \n", "default_value": "{'vcf': '\\t', 'tsv': '\\t', 'csv': ',', 'tbl': '|', 'bed': '\\t'}"}, {"fullname": "howard.objects.database.DATABASE_TYPE_NEEDED_COLUMNS", "modulename": "howard.objects.database", "qualname": "DATABASE_TYPE_NEEDED_COLUMNS", "kind": "variable", "doc": "

    \n", "default_value": "{'variants': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'POS': ['POS'], 'REF': ['REF'], 'ALT': ['ALT']}, 'regions': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'START': ['START', 'POSITIONSTART', 'POS'], 'END': ['END', 'POSITIONEND', 'POS']}, 'vcf': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'POS': ['POS', 'POSITION'], 'ID': ['ID', 'IDENTIFIER'], 'REF': ['REF', 'REFERENCE'], 'ALT': ['ALT', 'ALTERNATIVE'], 'QUAL': ['QUAL', 'QUALITY'], 'FILTER': ['FILTER'], 'INFO': ['INFO']}, 'bed': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'START': ['START', 'POSITIONSTART', 'POS'], 'END': ['END', 'POSITIONEND', 'POS']}}"}, {"fullname": "howard.objects.database.DEFAULT_VCF_HEADER", "modulename": "howard.objects.database", "qualname": "DEFAULT_VCF_HEADER", "kind": "variable", "doc": "

    \n", "default_value": "['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO']"}, {"fullname": "howard.objects.database.DEFAULT_VCF_HEADER_DUCKDB_TYPES", "modulename": "howard.objects.database", "qualname": "DEFAULT_VCF_HEADER_DUCKDB_TYPES", "kind": "variable", "doc": "

    \n", "default_value": "{'#CHROM': 'STRING', 'POS': 'INT', 'START': 'INT', 'END': 'INT', 'ID': 'VARCHAR', 'REF': 'VARCHAR', 'ALT': 'VARCHAR', 'FILTER': 'VARCHAR', 'INFO': 'VARCHAR'}"}, {"fullname": "howard.objects.database.DEFAULT_HEADER_LIST", "modulename": "howard.objects.database", "qualname": "DEFAULT_HEADER_LIST", "kind": "variable", "doc": "

    \n", "default_value": "['##fileformat=VCFv4.2', '#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO']"}, {"fullname": "howard.objects.database.FILE_FORMAT_DELIMITERS", "modulename": "howard.objects.database", "qualname": "FILE_FORMAT_DELIMITERS", "kind": "variable", "doc": "

    \n", "default_value": "{'vcf': '\\t', 'tsv': '\\t', 'csv': ',', 'tbl': '|', 'bed': '\\t'}"}, {"fullname": "howard.objects.database.DUCKDB_EXTENSION_TO_LOAD", "modulename": "howard.objects.database", "qualname": "DUCKDB_EXTENSION_TO_LOAD", "kind": "variable", "doc": "

    \n", "default_value": "['sqlite_scanner']"}, {"fullname": "howard.objects.database.Database", "modulename": "howard.objects.database", "qualname": "Database", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.__init__", "modulename": "howard.objects.database", "qualname": "Database.__init__", "kind": "function", "doc": "

    This is an initialization function for a class that sets up a database and header file for use\nin a DuckDB connection.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name of the database to be used. If None, the default\ndatabase will be used
    • \n
    • format: The format parameter is not described in the docstring, so it is unclear what\nit represents
    • \n
    • header: The header parameter is a string that represents the name of the header file\nthat contains the column names for the database. It is used in conjunction with the database\nparameter to set the header for the database. If the header parameter is not provided, the\nheader will be set to
    • \n
    • header_file: The header_file parameter is a string that represents the file path to the\nheader file that contains the column names for the database. It is used in the set_header()\nmethod to set the header attribute of the class
    • \n
    • databases_folders: A list of folders where the database files are located. This parameter\nis used in the set_database() method to search for the database file in the specified folders.\nIf the database file is not found in any of the folders, an error is raised
    • \n
    • assembly: A string representing the name of the assembly to be used. It is used in\nconjunction with the set_assembly() method to set the assembly for the DuckDB connection. If\nthe assembly parameter is not provided, the default assembly will be used
    • \n
    • conn: An optional parameter that represents an existing DuckDBPyConnection object. If\nprovided, the class will use this connection instead of creating a new one. If not provided, a\nnew connection will be created
    • \n
    • conn_config: An optional parameter for DuckDBPyConnection object config (see duckdb.connect)
    • \n
    • table: The table parameter is a string representing the name of the table in the\ndatabase that will be used in the DuckDB connection. It is used in the set_table() method to\nset the table attribute of the class. If the table parameter is not provided, the default\ntable will
    • \n
    \n", "signature": "(\tdatabase: str = None,\tformat: str = None,\theader: str = None,\theader_file: str = None,\tdatabases_folders: list = None,\tassembly: str = None,\tconn=None,\tconn_config: dict = {},\ttable: str = None)"}, {"fullname": "howard.objects.database.Database.database", "modulename": "howard.objects.database", "qualname": "Database.database", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.format", "modulename": "howard.objects.database", "qualname": "Database.format", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.header", "modulename": "howard.objects.database", "qualname": "Database.header", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.header_file", "modulename": "howard.objects.database", "qualname": "Database.header_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.databases_folders", "modulename": "howard.objects.database", "qualname": "Database.databases_folders", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.assembly", "modulename": "howard.objects.database", "qualname": "Database.assembly", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.table", "modulename": "howard.objects.database", "qualname": "Database.table", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.set_database", "modulename": "howard.objects.database", "qualname": "Database.set_database", "kind": "function", "doc": "

    This function sets the database attribute of an object to a specified database if it exists or\ncan be found in a list of folders.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name of the database to be set
    • \n
    • databases_folders: The databases_folders parameter is a list of folders/directories\nwhere the find_database method will search for the specified database. If the database is\nfound in any of these folders, it will be set as the current database. If databases_folders is\nnot provided, the
    • \n
    • format: The format parameter is an optional string representing the format of the\ndatabase to be searched for. If provided, the find_database method will search for the\ndatabase only in the specified format. If not provided, the method will search for the database\nin all formats
    • \n
    • assembly: The assembly parameter is an optional string representing the name of the\nassembly to which the database belongs. If provided, the find_database method will search for\nthe database only in the specified assembly. If not provided, the method will search for the\ndatabase in all assemblies
    • \n
    \n", "signature": "(\tself,\tdatabase: str,\tdatabases_folders: list = None,\tformat: str = None,\tassembly: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_databases_folders", "modulename": "howard.objects.database", "qualname": "Database.set_databases_folders", "kind": "function", "doc": "

    This function sets the list of folders where databases are located as an attribute of an object.

    \n\n
    Parameters
    \n\n
      \n
    • databases_folders: databases_folders is a list parameter that contains the paths to the\nfolders where the databases are stored. The default value of the parameter is a list with a\nsingle element, which is the current directory (\".\")
    • \n
    \n", "signature": "(self, databases_folders: list = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_folders", "modulename": "howard.objects.database", "qualname": "Database.get_database_folders", "kind": "function", "doc": "

    This function returns a list of database folders.

    \n\n
    Returns
    \n\n
    \n

    The method get_database_folders is returning a list of database folders. The specific\n list being returned is stored in the instance variable databases_folders.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.read_header_file", "modulename": "howard.objects.database", "qualname": "Database.read_header_file", "kind": "function", "doc": "

    This function reads the header of a VCF file and returns a list of the header lines.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The path to the VCF file header that needs to be read
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of header lines of a VCF file.

    \n
    \n", "signature": "(self, header_file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_length", "modulename": "howard.objects.database", "qualname": "Database.get_header_length", "kind": "function", "doc": "

    The get_header_length function returns the length of a header file, excluding the first line.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The header_file parameter is a string that represents the file path or\nname of the header file. It is an optional parameter, which means it can be omitted when calling\nthe get_header_length method
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer, which represents the length of the header file.

    \n
    \n", "signature": "(self, header_file: str = None) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_file_columns", "modulename": "howard.objects.database", "qualname": "Database.get_header_file_columns", "kind": "function", "doc": "

    The function get_header_columns returns the header list of a VCF file.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The header_file parameter is a string that represents the file path of the\nheader file. It is an optional parameter and its default value is None
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of header columns.

    \n
    \n", "signature": "(self, header_file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_from_list", "modulename": "howard.objects.database", "qualname": "Database.get_header_from_list", "kind": "function", "doc": "

    The function get_header_from_list returns a vcf.Reader object with a header generated from a\ngiven list or a default list.

    \n\n
    Parameters
    \n\n
      \n
    • header_list: The header_list parameter is a list of strings representing the header\nlines of a VCF (Variant Call Format) file. It is an optional parameter, meaning it can be\nprovided as an argument to the function, but if no argument is provided, a default list of\nheader lines will be used
    • \n
    \n\n
    Returns
    \n\n
    \n

    a vcf.Reader object.

    \n
    \n", "signature": "(\tself,\theader_list: list = None) -> <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'>:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_from_file", "modulename": "howard.objects.database", "qualname": "Database.get_header_from_file", "kind": "function", "doc": "

    This function returns a VCF header either from a default list or from a file.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: A string representing the file path of a VCF header file. If this parameter\nis not provided or is an empty string, the function will use a default header list
    • \n
    \n\n
    Returns
    \n\n
    \n

    a VCF object, which is obtained by calling the get_header_from_list method with the\n header_list as an argument. The header_list is either the default header list or the list\n obtained by reading a header file using the read_header_file method.

    \n
    \n", "signature": "(\tself,\theader_file: str) -> <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'>:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.find_header_file", "modulename": "howard.objects.database", "qualname": "Database.find_header_file", "kind": "function", "doc": "

    This function finds the header file for a given database in various formats.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the path to a database\nfile. If this parameter is not provided, the get_database() method is called to retrieve the\npath to the database file
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path to the header file for a given database. If the header is in a separate file,\n it returns the path to that file. If the header is within the database file itself, it returns\n the path to the database file. If the database or its format cannot be determined, it returns\n None.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header", "modulename": "howard.objects.database", "qualname": "Database.get_header", "kind": "function", "doc": "

    The get_header function in Python returns the header of a VCF file from a file, a list, or the\nobject itself based on specified conditions.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_header function represents a string that\nspecifies the database from which the header information should be retrieved or used. It is used\nin various parts of the function to determine how to construct the header of the VCF file
    • \n
    • header_file: The header_file parameter in the get_header function is a string\nrepresenting the path to a file containing the header information for a VCF file. This parameter\nallows you to specify a file from which the function will read the header information
    • \n
    • header_list: The header_list parameter in the get_header function is a list\ncontaining the header lines of a VCF file. If provided, the function will construct the header\nfrom this list using the get_header_from_list method. If header_list is not provided, the\nfunction will
    • \n
    • sql_query: The sql_query parameter in the get_header function is a string\nrepresenting an SQL query that can be used to retrieve data from a database. This parameter is\nused in the function to help construct the header of a VCF file based on the query results or\nother conditions specified in the function
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_header function returns the header of a VCF file based on different\n conditions:

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_file: str = None,\theader_list: list = None,\tsql_query: str = None) -> <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'>:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_from_columns", "modulename": "howard.objects.database", "qualname": "Database.get_header_from_columns", "kind": "function", "doc": "

    The function get_header_from_columns generates a VCF header based on database columns and adds\ncustom annotations to it.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of a database. It\nis an optional parameter, and if not provided, the get_database() method is called to retrieve\nthe default database. This parameter specifies the database from which the columns will be used\nto generate the VCF header
    • \n
    • header_columns: The header_columns parameter is a list of column names that will be\nused to generate header information for a VCF file. If no header_columns are provided, the\nfunction will attempt to automatically detect the columns to use based on the database being\nused
    • \n
    • sql_query: The sql_query parameter in the get_header_from_columns function is used to\nspecify a SQL query that will be executed to retrieve column information from the database. This\nquery can be customized to fetch specific columns or data based on the requirements of the VCF\nheader generation process. If provided,
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_from_columns returns a VCF header object that includes\n information about the columns in a database and their data types. The header object is created\n based on the input parameters, including the database name and a list of header columns.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_columns: list = [],\tsql_query: str = None) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.query", "modulename": "howard.objects.database", "qualname": "Database.query", "kind": "function", "doc": "

    This is a Python function that takes in a database and query string as parameters and returns\nthe result of the query on the database.

    \n\n
    Parameters
    \n\n
      \n
    • query: The query parameter is a string that represents the SQL query that needs to be\nexecuted on the database. It can be any valid SQL statement such as SELECT, INSERT, UPDATE,\nDELETE, etc
    • \n
    \n\n
    Returns
    \n\n
    \n

    If a query is provided, the method returns the result of the query executed on the\n database. If no query is provided, the method returns None.

    \n
    \n", "signature": "(self, query: str = None) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_header", "modulename": "howard.objects.database", "qualname": "Database.set_header", "kind": "function", "doc": "

    This function sets the header of a database based on a provided header file or the database\nformat.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name or path of a database file. If not provided, the\nmethod will attempt to get the database name from the object's attributes
    • \n
    • header: header is a variable of type vcf (presumably representing a VCF header) that\ncan be provided as an argument to the set_header method to set the header attribute of the\nobject. If header is provided, the header_file parameter is ignored
    • \n
    • header_file: A string representing the file path of a header file. If provided, the\nfunction will use this header file to set the header attribute of the object
    • \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader: <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'> = None,\theader_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_header_file", "modulename": "howard.objects.database", "qualname": "Database.set_header_file", "kind": "function", "doc": "

    This function sets the header file attribute of an object to the value passed as an argument.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The parameter header_file is a string that represents the name or path of\na header file. This method sets the header_file attribute of an object to the value passed as\nan argument. If no argument is passed, the header_file attribute remains unchanged
    • \n
    \n", "signature": "(self, header_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_columns_from_database", "modulename": "howard.objects.database", "qualname": "Database.get_header_columns_from_database", "kind": "function", "doc": "

    The get_header_columns_from_database function retrieves column names from a specified database\ntable.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_header_columns_from_database function is\na string that represents the name of the database from which you want to retrieve the header\ncolumns. If no specific database is provided when calling the function, it will default to using\nthe get_database() method to retrieve the
    • \n
    • query: The query parameter in the get_header_columns_from_database function is a\nstring that represents a SQL query. If provided, this query will be used to retrieve column\nnames from the specified database table instead of using the default database table
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_columns_from_database returns a list of column names from the\n specified database table. If successful, it will return the list of column names. If there is an\n error or no columns are found, it will return None.

    \n
    \n", "signature": "(self, database: str = None, query: str = None) -> Optional[list]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_file", "modulename": "howard.objects.database", "qualname": "Database.get_header_file", "kind": "function", "doc": "

    The function get_header_file generates a VCF header file based on specified parameters or a\ndefault header if needed.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The header_file parameter is a string representing the file path and name\nof the header file. If set to None, the default header file path and name will be used
    • \n
    • remove_header_line: The remove_header_line parameter is a boolean parameter that\ndetermines whether to remove the #CHROM line from the header file. If set to True, the line\nwill be removed; otherwise, it will remain in the header file. By default, this parameter is set\nto False, meaning, defaults to False
    • \n
    • replace_header_line: The replace_header_line parameter is a list of columns that can be\nused to replace the header line in the generated header file. For example, if you provide\n['#CHROM', 'POS', 'ID'], these columns will be used as the header line in the generated file\ninstead
    • \n
    • force: The force parameter in the get_header_file function is a boolean parameter\nthat determines whether to force the generation of a header file even if a header file already\nexists. If force is set to True, the function will replace the existing header file with a\nnew one. If, defaults to False
    • \n
    • sql_query: The sql_query parameter in the get_header_file function is used to specify\na SQL query that can be used to retrieve header information from a database. This query can be\npassed to the function to customize the header generation process based on the query results
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_file returns a string which is the name of the header file\n that was generated or None if no header file was generated.

    \n
    \n", "signature": "(\tself,\theader_file: str = None,\tremove_header_line: bool = False,\treplace_header_line: list = None,\tforce: bool = False,\tsql_query: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_assembly", "modulename": "howard.objects.database", "qualname": "Database.set_assembly", "kind": "function", "doc": "

    This is a function that sets the assembly attribute of an object to a given string value.

    \n\n
    Parameters
    \n\n
      \n
    • assembly: The assembly parameter is a string that represents the name or type of assembly\nthat the object belongs to. This method sets the assembly attribute of the object to the value\npassed in as the assembly parameter. If no value is passed in, the assembly attribute remains\nunchanged. The method returns the updated value of the
    • \n
    \n", "signature": "(self, assembly: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_assembly", "modulename": "howard.objects.database", "qualname": "Database.get_assembly", "kind": "function", "doc": "

    This function returns the assembly attribute of an object if it exists, otherwise it returns\nNone.

    \n\n
    Returns
    \n\n
    \n

    If self.assembly is not None, then it returns the value of self.assembly.\n Otherwise, it returns None.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.find_database", "modulename": "howard.objects.database", "qualname": "Database.find_database", "kind": "function", "doc": "

    This function finds a database file in a specified folder or the current directory.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to be searched for. If not provided, it will call the\nget_database() method to get the name of the database. It is a string type parameter
    • \n
    • databases_folders: A list of folders where the function should look for the database\nfile. If this parameter is not provided, the function will look for the database file in the\ncurrent directory
    • \n
    • format: The file format of the database file. It is an optional parameter and if not\nprovided, the function will call the get_format() method to get the format
    • \n
    • assembly: assembly is an optional parameter that represents the name of a subfolder\nwhere the function should look for the database file. If provided, the function will search for\nthe database file in the specified subfolder within each of the databases_folders. If not\nprovided, the function will only search for
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the path to the database file. If the database is not found or\n if no database is specified, it returns None.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\tdatabases_folders: list = None,\tdatabase_format: str = None,\tassembly: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database", "modulename": "howard.objects.database", "qualname": "Database.get_database", "kind": "function", "doc": "

    This function returns the database name as a string.

    \n\n
    Returns
    \n\n
    \n

    The get_database method is returning the database attribute of the object. The\n return type is a string.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_basename", "modulename": "howard.objects.database", "qualname": "Database.get_database_basename", "kind": "function", "doc": "

    This function returns the basename of a database file.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter database is a string that represents the name of a database. If\nit is not provided, the method will use the get_database() method to retrieve the current\ndatabase
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string which is the basename of the database file. If the database parameter is not\n provided, it gets the current database using the get_database() method. If the database\n exists, it returns the basename of the database file using the os.path.basename() method. If\n the database does not exist, it returns None.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_dirname", "modulename": "howard.objects.database", "qualname": "Database.get_database_dirname", "kind": "function", "doc": "

    This function returns the directory name of a given database or the current database if none is\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter database is a string that represents the path to a database\nfile. If it is not provided, the method will call self.get_database() to retrieve the path to\nthe default database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the directory name of the specified database file. If no\n database file is specified, it will use the default database file and return its directory name.\n If there is no database file, it will return None.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.exists", "modulename": "howard.objects.database", "qualname": "Database.exists", "kind": "function", "doc": "

    This function checks if a database exists in the specified path or in the default path.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name or path of a\ndatabase file. If it is not provided, the method will call the get_database() method to\nretrieve the default database name/path
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the specified database exists or not. If the\n database parameter is not provided, it gets the current database using the get_database()\n method and checks if it exists using the os.path.exists() function.

    \n
    \n", "signature": "(self, database: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_format", "modulename": "howard.objects.database", "qualname": "Database.set_format", "kind": "function", "doc": "

    This is a method in a Python class that sets a format attribute to a specified string.

    \n\n
    Parameters
    \n\n
      \n
    • format: The format parameter is a string that specifies the desired format for the data.\nIt is an optional parameter, meaning that if it is not provided, the format attribute of the\nobject will not be changed. The function returns a string indicating the current format of the\nobject
    • \n
    \n", "signature": "(self, format: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_format", "modulename": "howard.objects.database", "qualname": "Database.get_format", "kind": "function", "doc": "

    This Python function returns the file format of a given database or the current database if none\nis provided.\nFormat database:\n - parquet\n - duckdb\n - sqlite\n - vcf\n - csv

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the type of database. It\nis an optional parameter and if not provided, the function will call the get_database() method\nto retrieve the database type
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the type of database. The type of database can be one of the\n following: \"parquet\", \"duckdb\", \"sqlite\", \"vcf\", or \"csv\". The specific type of database is determined by\n the input parameter database, which is either passed as an argument to the function or\n obtained by calling the get_database() method. The `get_file_format

    \n
    \n", "signature": "(self, database: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_type", "modulename": "howard.objects.database", "qualname": "Database.get_type", "kind": "function", "doc": "

    The get_type function determines the type of a database (variants VCF-like or regions\nBED-like) based on its columns and format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_type function is a string representing\nthe name of a database. If this parameter is not provided when calling the function, it will\nattempt to retrieve the database name using the get_database() method. This parameter is used\nto specify the database for which you
    • \n
    • sql_query: The sql_query parameter in the get_type function is used to pass an SQL\nquery as a string. This query can be used to filter or manipulate the data before determining\nthe type of the database based on its columns. If provided, the function will use this SQL query\nto fetch the
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_type function returns a string that represents the type of the database,\n which can be either \"variants\" (VCF-like) or \"regions\" (BED-like). If the database is not found\n or does not exist, the function returns None.

    \n
    \n", "signature": "(self, database: str = None, sql_query: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_tables", "modulename": "howard.objects.database", "qualname": "Database.get_database_tables", "kind": "function", "doc": "

    This function retrieves a list of tables in a specified database using the DuckDB format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database for which you want to retrieve the list of tables. If\nno database name is provided, it will use the default database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of tables in the specified database, or None if the database does not exist or\n the format is not supported.

    \n
    \n", "signature": "(self, database: str = None) -> Union[str, list, NoneType]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_table", "modulename": "howard.objects.database", "qualname": "Database.get_database_table", "kind": "function", "doc": "

    This function returns the name of a table in a specified database if it exists and is in a\nsupported format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to retrieve the table from. If None, it will use the\ndefault database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the name of a table in a database, or None if no suitable\n table is found.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_type_from_columns", "modulename": "howard.objects.database", "qualname": "Database.get_type_from_columns", "kind": "function", "doc": "

    This function returns the type of a database based on the provided list of columns.

    \n\n
    Parameters
    \n\n
      \n
    • database_columns: a list of column names in a database table
    • \n
    • check_database_type: A database type to check for. If not provided, it defaults\nto all database types defined in the constant DATABASE_TYPE_NEEDED_COLUMNS
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the type of database based on the provided list of columns. If\n the needed columns for a specific database type are not found in the provided list, the function\n returns None.

    \n
    \n", "signature": "(\tself,\tdatabase_columns: list = [],\tcheck_database_type: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_needed_columns", "modulename": "howard.objects.database", "qualname": "Database.get_needed_columns", "kind": "function", "doc": "

    This function takes a list of database columns and a type, and returns a dictionary of needed\ncolumns and their corresponding values found in the database columns.

    \n\n
    Parameters
    \n\n
      \n
    • database_columns: A list of column names in a database table
    • \n
    • type: The type of database being used. It is used to determine which columns are needed\nfor the specific database type
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary containing the columns that are needed for a specific database type, along\n with their corresponding column names in the actual database. The function takes in a list of\n database columns and a database type as input, and uses the DATABASE_TYPE_NEEDED_COLUMNS\n dictionary to determine which columns are needed for the specified database type. It then\n searches through the list of database columns to find the

    \n
    \n", "signature": "(self, database_columns: list = [], database_type: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_sql_from", "modulename": "howard.objects.database", "qualname": "Database.get_sql_from", "kind": "function", "doc": "

    This function returns a SQL query string based on the input database format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter \"database\" is a string that represents the name or path of the\ndatabase that the function will read from. If no value is provided for this parameter, the\nfunction will call the \"get_database()\" method to retrieve the default database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents a SQL query to read data from a database file. The specific\n SQL query returned depends on the format of the database file, which is determined by the\n get_format() method. The SQL query returned will be in the form of a function call to one of\n the following functions: read_parquet(), read_csv(), read_json(),

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_file: str = None,\tsample_size: int = 20480) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_sql_database_attach", "modulename": "howard.objects.database", "qualname": "Database.get_sql_database_attach", "kind": "function", "doc": "

    This function returns a SQL query to attach or detach a database based on the specified format\nand output.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to attach. If not provided, it will try to get the\ndefault database from the connection
    • \n
    • output: The \"output\" parameter is a string that specifies the desired output of the\nfunction. It can take on the following values:, defaults to query
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents a SQL query to attach a database to a DuckDB or SQLite\n database engine. The specific output depends on the value of the output parameter, which can\n be \"query\" (default), \"attach\", \"detach\", or \"name\". If output is \"query\" or \"attach\", the\n function returns a SQL query to attach the specified database.

    \n
    \n", "signature": "(self, database: str = None, output: str = 'query') -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_sql_database_link", "modulename": "howard.objects.database", "qualname": "Database.get_sql_database_link", "kind": "function", "doc": "

    This function returns a SQL database link based on the provided database name or the default\ndatabase.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIf it is not provided, the method will call the get_database() method to retrieve the default\ndatabase
    • \n
    \n\n
    Returns
    \n\n
    \n

    a SQL database link as a string. If a database name is provided as an argument, it will\n use that database to construct the link. Otherwise, it will use the default database obtained\n from self.get_database(). The link is constructed using the sql_from and sql_table\n obtained from other methods, and the final link is returned as a string. If the

    \n
    \n", "signature": "(self, database: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.create_view", "modulename": "howard.objects.database", "qualname": "Database.create_view", "kind": "function", "doc": "

    The create_view function creates a view in a specified database or the default database, using\na SQL database link.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIf no value is provided, it will use the value returned by the get_database() method
    • \n
    • view_name: The view_name parameter is a string that specifies the name of the view that\nwill be created in the database, defaults to variants
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the created view.

    \n
    \n", "signature": "(self, database: str = None, view_name: str = 'variants') -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_view", "modulename": "howard.objects.database", "qualname": "Database.get_view", "kind": "function", "doc": "

    The get_view function returns the name of a view in a database, or creates a new view if\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIt is an optional parameter and if not provided, the method get_database() is called to\nretrieve the database name
    • \n
    • create_view: The create_view parameter is a string that represents the name of the view\nthat you want to create. If this parameter is provided, the get_view method will call the\ncreate_view method and pass the database and view_name parameters to it
    • \n
    \n\n
    Returns
    \n\n
    \n

    The method get_view returns a string.

    \n
    \n", "signature": "(self, database: str = None, create_view: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.is_compressed", "modulename": "howard.objects.database", "qualname": "Database.is_compressed", "kind": "function", "doc": "

    This Python function checks if a given file is compressed and returns the format of the\ncompression.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the path or name of the\ninput file that needs to be checked for compression. If no value is provided for database, the\nmethod calls get_database() to retrieve the default database file
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function is_compressed returns a boolean value indicating whether the input file\n is compressed or not. The function calls another function get_file_compressed to determine the\n compression format of the file.

    \n
    \n", "signature": "(self, database: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_infos_list", "modulename": "howard.objects.database", "qualname": "Database.get_header_infos_list", "kind": "function", "doc": "

    This function returns a list of header information for a given database or the current database\nif none is specified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database\nfrom which the header information is to be retrieved. If no database name is provided, the\nmethod will use the default database name obtained from the get_database() method
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of header information from a database, or an empty list if the database header\n is not available.

    \n
    \n", "signature": "(self, database: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.find_column", "modulename": "howard.objects.database", "qualname": "Database.find_column", "kind": "function", "doc": "

    The find_column function searches for a specific column in a database table, with the option\nto search for a column with a specific prefix or within the INFO column header.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to search for the column in. If not provided, it will\nuse the current database that the code is connected to
    • \n
    • table: The \"table\" parameter is the name of the table in the database where the column is\nlocated
    • \n
    • column: The \"column\" parameter is a string that represents the name of the column to\nsearch for in the database table. By default, it is set to \"INFO\", but you can change it to\nsearch for a specific column name, defaults to INFO
    • \n
    • prefixes: The prefixes parameter is a list of strings that are used to search for a\ncolumn with a specific prefix in the database. For example, if the prefixes list contains \"DP/\",\nthe function will search for a column named \"DP/INFO\" in addition to the default \"INFO\" column
    • \n
    • database_columns: The database_columns parameter is a list that contains the names of\nall the columns in a specific database table. It is used to check if a specific column exists in\nthe database. If the database_columns parameter is not provided, the function will call the\nget_columns method to retrieve
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the name of the column found in the database, based on the\n input parameters. If the column is found, it returns the column name. If the column is not\n found, it returns None.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\ttable: str = None,\tcolumn: str = 'INFO',\tprefixes: list = ['INFO/'],\tdatabase_columns: list = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.map_columns", "modulename": "howard.objects.database", "qualname": "Database.map_columns", "kind": "function", "doc": "

    The map_columns function maps input columns to their corresponding columns in a specified\ndatabase table, using specified prefixes to filter the columns.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to search for columns in. If no database is specified,\nthe method will use the default database set in the connection
    • \n
    • table: The table parameter is the name of the table in the database that you want to\nmap the columns for
    • \n
    • columns: A list of column names that you want to map to their corresponding column names\nin the database
    • \n
    • prefixes: The prefixes parameter is a list of strings that are used to filter the\ncolumns that are searched for. Only columns that start with one of the prefixes in the list will\nbe considered. In the code above, the default value for prefixes is [\"INFO/\"]
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary that maps the input columns to their corresponding columns found in the\n specified database and table, with the specified prefixes.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\ttable: str = None,\tcolumns: list = [],\tprefixes: list = ['INFO/']) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_columns", "modulename": "howard.objects.database", "qualname": "Database.get_columns", "kind": "function", "doc": "

    The function get_columns retrieves a list of column names from a specified database and table\nusing SQL queries.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_columns function is used to specify the\nname of the database from which you want to retrieve the column names. If this parameter is not\nprovided, the function will default to using the current database
    • \n
    • table: The table parameter in the get_columns function represents the name of the\ntable in the database for which you want to retrieve the column names. If this parameter is not\nprovided, the function will attempt to get the table name from the specified database. If the\ntable parameter is not specified and
    • \n
    • header_file: The header_file parameter in the get_columns function is used to specify\nthe file containing the header information for the data source. This information is often used\nin cases where the column names are not explicitly defined in the database schema or where the\ndata is stored in a file format that requires additional
    • \n
    • sql_query: The sql_query parameter in the get_columns function is used to specify a\ncustom SQL query to retrieve column names from the database table. If a sql_query is provided,\nthe function will execute that query to get the column names and return them as a list
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_columns returns a list of column names for a given database and\n table. If a SQL query is provided, it executes the query and returns the column names from the\n result. If no database is specified, it uses the current database. It then checks the database\n format and connects to the database accordingly to retrieve the column names using a SQL query.\n If the table parameter is not provided

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\ttable: str = None,\theader_file: str = None,\tsql_query: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_table_columns_from_format", "modulename": "howard.objects.database", "qualname": "Database.get_table_columns_from_format", "kind": "function", "doc": "

    The function get_table_columns_from_format returns a list of table columns based on the\nspecified database format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIt is an optional parameter, which means it has a default value of None. If no value is\nprovided for the database parameter, the get_database() method is called to retrieve the\ncurrent database name
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of table columns.

    \n
    \n", "signature": "(self, database: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_table_columns_from_file", "modulename": "howard.objects.database", "qualname": "Database.get_table_columns_from_file", "kind": "function", "doc": "

    The function get_table_columns_from_file retrieves the column names from a database or header\nfile.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name or path of the\ndatabase file. If this parameter is not provided, the get_database() method is called to\nretrieve the database name or path
    • \n
    • header_file: The header_file parameter is a string that represents the file path or\nname of the header file. This file contains the header information for a table, which typically\nincludes the names of the columns in the table
    • \n
    • header_file_find: Allow header file find if not provided
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of table columns.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_file: str = None,\theader_file_find: bool = True) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_annotations", "modulename": "howard.objects.database", "qualname": "Database.get_annotations", "kind": "function", "doc": "

    This function returns the annotations of a database or the default database if none is\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter database is a string that represents the name of the database\nto retrieve annotations from. If no database name is provided, the method will use the default\ndatabase name obtained from the get_database() method
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_annotations returns the infos attribute of the header of a\n database. If the database parameter is not provided, it gets the current database using the\n get_database method. If there is no header, it returns None.

    \n
    \n", "signature": "(self, database: str = None) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_extra_columns", "modulename": "howard.objects.database", "qualname": "Database.get_extra_columns", "kind": "function", "doc": "

    This Python function returns a list of extra columns in a database table that are not needed\nbased on the database type and existing columns.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name of the database to retrieve columns from. If\nNone is provided, the default database will be used
    • \n
    • database_type: The database_type parameter in the get_extra_columns function\nrepresents the type of the database for which you want to retrieve the list of extra columns. It\nis used to determine which columns are needed based on the database type and the existing\ncolumns in the specified database table
    • \n
    • sql_query: The sql_query parameter in the get_extra_columns function is used to pass\nan SQL query that can be used to retrieve specific columns from the database. This query can be\ncustomized to filter columns based on certain conditions or criteria before analyzing them to\ndetermine the extra columns that are not needed
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of extra columns in a database table that are not needed based on the database\n type and existing columns.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\tdatabase_type: str = None,\tsql_query: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.is_vcf", "modulename": "howard.objects.database", "qualname": "Database.is_vcf", "kind": "function", "doc": "

    The is_vcf function checks if a given database is of type \"vcf\" by examining its columns and\ntheir types.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the is_vcf function is a string that represents\nthe name of the database that the function will use to check if the file is a VCF (Variant Call\nFormat) file. If the database parameter is not provided when calling the function, it will
    • \n
    • sql_query: The sql_query parameter in the is_vcf function is used to pass an SQL\nquery string that can be used to filter the columns retrieved from the database. This query can\nbe used to narrow down the columns that are considered when checking if the database is of type\n\"vcf\"
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function is_vcf returns a boolean value indicating whether the database type is\n \"vcf\" or not.

    \n
    \n", "signature": "(self, database: str = None, sql_query: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_conn", "modulename": "howard.objects.database", "qualname": "Database.get_conn", "kind": "function", "doc": "

    The function returns the connection object.

    \n\n
    Returns
    \n\n
    \n

    The method is returning the value of the instance variable self.conn.

    \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.is_genotype_column", "modulename": "howard.objects.database", "qualname": "Database.is_genotype_column", "kind": "function", "doc": "

    The is_genotype_column function in Python checks if a specified column in a database contains\ngenotype data based on a regular expression pattern.

    \n\n
    Parameters
    \n\n
      \n
    • column: The column parameter is a string that represents the name of a column in a\ndatabase table. It is used to specify the column for which you want to check if it contains\ngenotype information based on a regular expression pattern
    • \n
    • database: The database parameter in the is_genotype_column method is used to specify\nthe name of the database from which the data will be queried. If a database is provided, the\nmethod will query the specified database to check if the given column contains genotype\ninformation. If no database is provided,
    • \n
    • downsampling: The downsampling parameter in the is_genotype_column method is an\ninteger value that determines the number of rows to be sampled from the database table when\nchecking for genotype information in the specified column. This parameter is used to limit the\nnumber of rows to be processed in order to improve performance, defaults to 1000
    • \n
    • check_format: The check_format parameter in the is_genotype_column method is a\nboolean flag that determines whether the function should check the format of the data before\nproceeding with the genotype column analysis. If check_format is set to True, the function\nwill verify if the specified column exists in, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The is_genotype_column method returns a boolean value. If the specified column in a\n database table contains genotype information, it returns True; otherwise, it returns False.

    \n
    \n", "signature": "(\tself,\tcolumn: str,\tdatabase: str = None,\tdownsampling: int = 1000,\tcheck_format: bool = True) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.export", "modulename": "howard.objects.database", "qualname": "Database.export", "kind": "function", "doc": "

    The export function exports data from a database to a specified output format, compresses it\nif necessary, and returns a boolean value indicating whether the export was successful or not.

    \n\n
    Parameters
    \n\n
      \n
    • output_database: The output_database parameter is a string that represents the path and\nfilename of the output file to be exported. It specifies where the exported data will be saved
    • \n
    • output_header: The output_header parameter is an optional string that represents the\nheader of the output file. If provided, it specifies the header that will be included in the\noutput file. If not provided, the header will be automatically detected based on the output file\nformat
    • \n
    • header_in_output: The header_in_output parameter is a boolean value that determines\nwhether the header should be included in the output file. If set to True, the header will be\nincluded in the output file. If set to False, the header will not be included in the output\nfile. By default,, defaults to True
    • \n
    • database: The database parameter is the name of the database from which you want to\nexport data. If this parameter is not provided, the function will use the get_database()\nmethod to retrieve the current database
    • \n
    • table: The table parameter specifies the name of the table in the database from which\nthe data will be exported. By default, if not specified, it is set to \"variants\", defaults to\nvariants
    • \n
    • parquet_partitions: The parquet_partitions parameter is a list that specifies the\npartition columns for the Parquet output format. Each element in the list represents a partition\ncolumn. The partitions are used to organize the data in the Parquet file based on the values of\nthe specified columns
    • \n
    • threads: The threads parameter in the export function is an optional integer that\nspecifies the number of threads to use for exporting the data. It determines the level of\nparallelism during the export process. By default, it is set to 1, defaults to 1
    • \n
    • sort: The sort parameter in the export function is a boolean value that specifies\nwhether the output file should be sorted based on the genomic coordinates of the variants. If\nsort is set to True, the output file will be sorted. If sort is set to False,, defaults\nto False
    • \n
    • index: The index parameter is a boolean value that specifies whether to index the\noutput file. If index is set to True, the output file will be indexed. If index is set to\nFalse or not provided, the output file will not be indexed. By default,, defaults to False
    • \n
    • existing_columns_header: The existing_columns_header parameter is a list that\nrepresents the existing columns in the header of the output file. It is used to determine the\ncolumns that should be included in the output file. If this parameter is not provided, the\nfunction will automatically detect the header columns based on the output file format
    • \n
    • order_by: The order_by parameter in the export function is a string that specifies\nthe columns by which the output file should be ordered. You can specify multiple columns\nseparated by commas. Each column can be followed by the keyword \"ASC\" (ascending) or \"DESC\"\n(descending) to specify
    • \n
    • query: The query parameter in the export function represents a SQL query that\nspecifies the data to be exported from the database. If provided, the function will export the\nresult of this query. If the query parameter is not provided, the function will generate a\nquery to export the data from
    • \n
    • compression_type: The compression_type parameter in the export function specifies the\ntype of compression to be applied to the output file. By default, the compression type is set to\n\"bgzip\". This parameter allows you to choose the compression algorithm for the output file, such\nas \"gzip\", \"bgzip
    • \n
    • chunk_size: The chunk_size parameter in the export function specifies the size of\neach chunk or batch of data that will be processed during the export operation. It determines\nhow many records or lines of data will be included in each chunk that is processed at a time,\ndefaults to 1000000
    • \n
    • export_mode: The export_mode parameter in the export function specifies the mode of\nexport, which can be either \"pyarrow\" or \"duckdb\", defaults to pyarrow
    • \n
    • compresslevel: The compresslevel parameter in the export function represents the\nlevel of compression for gzip. By default, it is set to 6. This parameter allows you to specify\nthe compression level when using gzip compression for the output file. The compression level can\nrange from 0 (no compression), defaults to 6
    • \n
    • export_header: The export_header parameter is a boolean flag that determines whether\nthe header of a VCF file should be exported to a separate file or not. If export_header is\nTrue, the header will be exported to a file. If export_header is False, the header will not\nbe, defaults to True
    • \n
    • sample_list: The sample_list parameter in the export function is a list that\nspecifies the samples to be included in the exported data. If provided, the samples listed in\nthis parameter will be included in the output file. If not provided, the function will determine\nthe samples to include based on the data
    • \n
    \n\n
    Returns
    \n\n
    \n

    The export function returns a boolean value indicating whether the export was\n successful or not.

    \n
    \n", "signature": "(\tself,\toutput_database: str,\toutput_header: str = None,\theader_in_output: bool = True,\tdatabase: str = None,\ttable: str = 'variants',\tparquet_partitions: list = None,\tthreads: int = 1,\tsort: bool = False,\tindex: bool = False,\texisting_columns_header: list = [],\torder_by: str = '',\tquery: str = None,\tcompression_type: str = None,\tchunk_size: int = 1000000,\texport_mode: str = 'pyarrow',\tcompresslevel: int = 6,\texport_header: bool = True,\tsample_list: list = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.genome", "modulename": "howard.objects.genome", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.genome.ChromosomeSubset", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset", "kind": "class", "doc": "

    Allow direct access to a subset of the chromosome.

    \n"}, {"fullname": "howard.objects.genome.ChromosomeSubset.__init__", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset.__init__", "kind": "function", "doc": "

    \n", "signature": "(name, genome=None)"}, {"fullname": "howard.objects.genome.ChromosomeSubset.name", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.ChromosomeSubset.genome", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset", "modulename": "howard.objects.genome", "qualname": "GenomeSubset", "kind": "class", "doc": "

    Allow the direct access of a subset of the genome.

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.__init__", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.__init__", "kind": "function", "doc": "

    \n", "signature": "(genome, chrom, start, end, seqid)"}, {"fullname": "howard.objects.genome.GenomeSubset.genome", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.chrom", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.chrom", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.start", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.end", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.seqid", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.seqid", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockGenomeError", "modulename": "howard.objects.genome", "qualname": "MockGenomeError", "kind": "class", "doc": "

    Common base class for all non-exit exceptions.

    \n", "bases": "builtins.Exception"}, {"fullname": "howard.objects.genome.MockSequence", "modulename": "howard.objects.genome", "qualname": "MockSequence", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockSequence.__init__", "modulename": "howard.objects.genome", "qualname": "MockSequence.__init__", "kind": "function", "doc": "

    \n", "signature": "(sequence)"}, {"fullname": "howard.objects.genome.MockSequence.sequence", "modulename": "howard.objects.genome", "qualname": "MockSequence.sequence", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockChromosome", "modulename": "howard.objects.genome", "qualname": "MockChromosome", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockChromosome.__init__", "modulename": "howard.objects.genome", "qualname": "MockChromosome.__init__", "kind": "function", "doc": "

    \n", "signature": "(name, genome=None)"}, {"fullname": "howard.objects.genome.MockChromosome.name", "modulename": "howard.objects.genome", "qualname": "MockChromosome.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockChromosome.genome", "modulename": "howard.objects.genome", "qualname": "MockChromosome.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockGenome", "modulename": "howard.objects.genome", "qualname": "MockGenome", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockGenome.__init__", "modulename": "howard.objects.genome", "qualname": "MockGenome.__init__", "kind": "function", "doc": "

    A mock genome object that provides a pygr compatible interface.

    \n\n

    lookup: a list of ((chrom, start, end), seq) values that define\n a lookup table for genome sequence requests.\nfilename: a stream or filename containing a lookup table.\ndb_filename: a fasta file to use for genome sequence requests. All\n requests are recorded and can be writen to a lookup table file\n using the write method.\ndefault_seq: if given, this base will always be returned if\n region is unavailable.

    \n", "signature": "(lookup=None, filename=None, db_filename=None, default_seq=None)"}, {"fullname": "howard.objects.genome.MockGenome.get_seq", "modulename": "howard.objects.genome", "qualname": "MockGenome.get_seq", "kind": "function", "doc": "

    Return a sequence by chromosome name and region [start, end).

    \n\n

    Coordinates are 0-based, end-exclusive.

    \n", "signature": "(self, chrom, start, end):", "funcdef": "def"}, {"fullname": "howard.objects.genome.MockGenome.read", "modulename": "howard.objects.genome", "qualname": "MockGenome.read", "kind": "function", "doc": "

    Read a sequence lookup table from a file.

    \n\n

    filename: a filename string or file stream.

    \n", "signature": "(self, filename):", "funcdef": "def"}, {"fullname": "howard.objects.genome.MockGenome.write", "modulename": "howard.objects.genome", "qualname": "MockGenome.write", "kind": "function", "doc": "

    Write a sequence lookup table to file.

    \n", "signature": "(self, filename):", "funcdef": "def"}, {"fullname": "howard.objects.genome.MockGenomeTestFile", "modulename": "howard.objects.genome", "qualname": "MockGenomeTestFile", "kind": "class", "doc": "

    \n", "bases": "MockGenome"}, {"fullname": "howard.objects.genome.MockGenomeTestFile.__init__", "modulename": "howard.objects.genome", "qualname": "MockGenomeTestFile.__init__", "kind": "function", "doc": "

    A mock genome object that provides a pygr compatible interface.

    \n\n

    lookup: a list of ((chrom, start, end), seq) values that define\n a lookup table for genome sequence requests.\nfilename: a stream or filename containing a lookup table.\ndb_filename: a fasta file to use for genome sequence requests. All\n requests are recorded and can be writen to a lookup table file\n using the write method.\ndefault_seq: if given, this base will always be returned if\n region is unavailable.

    \n", "signature": "(\tlookup=None,\tfilename=None,\tdb_filename=None,\tdefault_seq=None,\tcreate_data=False)"}, {"fullname": "howard.objects.genome.MockGenomeTestFile.get_seq", "modulename": "howard.objects.genome", "qualname": "MockGenomeTestFile.get_seq", "kind": "function", "doc": "

    Return a sequence by chromosome name and region [start, end).

    \n\n

    Coordinates are 0-based, end-exclusive.

    \n", "signature": "(self, chrom, start, end):", "funcdef": "def"}, {"fullname": "howard.objects.hgvs", "modulename": "howard.objects.hgvs", "kind": "module", "doc": "

    HGVS language currently implemented.

    \n\n

    HGVS = ALLELE\n | PREFIX_NAME : ALLELE

    \n\n

    PREFIX_NAME = TRANSCRIPT\n | TRANSCRIPT '(' GENE ')'

    \n\n

    TRANSCRIPT = TRANSCRIPT_NAME\n | TRANSCRIPT_NAME '.' TRANSCRIPT_VERSION

    \n\n

    TRANSCRIPT_VERSION = NUMBER

    \n\n

    ALLELE = 'c.' CDNA_ALLELE # cDNA\n | 'g.' GENOMIC_ALLELE # genomic\n | 'm.' MIT_ALLELE # mitochondrial sequence\n | 'n.' NC_ALLELE # non-coding RNA reference sequence\n | 'r.' RNA_ALLELE # RNA sequence (like r.76a>u)\n | 'p.' PROTEIN_ALLELE # protein sequence (like p.Lys76Asn)

    \n\n

    NC_ALLELE =\nRNA_ALLELE =\nCDNA_ALLELE = CDNA_COORD SINGLE_BASE_CHANGE\n | CDNA_COORD_RANGE MULTI_BASE_CHANGE

    \n\n

    GENOMIC_ALLELE =\nMIT_ALLELE = COORD SINGLE_BASE_CHANGE\n | COORD_RANGE MULTI_BASE_CHANGE

    \n\n

    SINGLE_BASE_CHANGE = CDNA_ALLELE = CDNA_COORD BASE '=' # no change\n | CDNA_COORD BASE '>' BASE # substitution\n | CDNA_COORD 'ins' BASE # 1bp insertion\n | CDNA_COORD 'del' BASE # 1bp deletion\n | CDNA_COORD 'dup' BASE # 1bp duplication\n | CDNA_COORD 'ins' # 1bp insertion\n | CDNA_COORD 'del' # 1bp deletion\n | CDNA_COORD 'dup' # 1bp duplication\n | CDNA_COORD 'del' BASE 'ins' BASE # 1bp indel\n | CDNA_COORD 'delins' BASE # 1bp indel

    \n\n

    MULTI_BASE_CHANGE = COORD_RANGE 'del' BASES # deletion\n | COORD_RANGE 'ins' BASES # insertion\n | COORD_RANGE 'dup' BASES # duplication\n | COORD_RANGE 'del' # deletion\n | COORD_RANGE 'dup' # duplication\n | COORD_RANGE 'del' BASES 'ins' BASES # indel\n | COORD_RANGE 'delins' BASES # indel

    \n\n

    AMINO1 = [GAVLIMFWPSTCYNQDEKRH]

    \n\n

    AMINO3 = 'Gly' | 'Ala' | 'Val' | 'Leu' | 'Ile' | 'Met' | 'Phe' | 'Trp' | 'Pro'\n | 'Ser' | 'Thr' | 'Cys' | 'Tyr' | 'Asn' | 'Gln' | 'Asp' | 'Glu' | 'Lys'\n | 'Arg' | 'His'

    \n\n

    PROTEIN_ALLELE = AMINO3 COORD '=' # no peptide change\n | AMINO1 COORD '=' # no peptide change\n | AMINO3 COORD AMINO3 PEP_EXTRA # peptide change\n | AMINO1 COORD AMINO1 PEP_EXTRA # peptide change\n | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA # indel\n | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA # indel\n | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA AMINO3 # indel\n | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA AMINO1 # indel

    \n\n

    A genomic range:

    \n\n

    COORD_RANGE = COORD '_' COORD

    \n\n

    A cDNA range:

    \n\n

    CDNA_COORD_RANGE = CDNA_COORD '_' CDNA_COORD

    \n\n

    A cDNA coordinate:

    \n\n

    CDNA_COORD = COORD_PREFIX COORD\n | COORD_PREFIX COORD OFFSET_PREFIX OFFSET\nCOORD_PREFIX = '' | '-' | '*'\nCOORD = NUMBER\nOFFSET_PREFIX = '-' | '+'\nOFFSET = NUMBER

    \n\n

    Primatives:

    \n\n

    NUMBER = \"\\d+\"\nBASE = [ACGT]\nBASES = BASE+

    \n"}, {"fullname": "howard.objects.hgvs.CHROM_PREFIX", "modulename": "howard.objects.hgvs", "qualname": "CHROM_PREFIX", "kind": "variable", "doc": "

    \n", "default_value": "'chr'"}, {"fullname": "howard.objects.hgvs.CODON_1", "modulename": "howard.objects.hgvs", "qualname": "CODON_1", "kind": "variable", "doc": "

    \n", "default_value": "{'TTT': 'F', 'TTC': 'F', 'TCT': 'S', 'TCC': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C', 'TTA': 'L', 'TCA': 'S', 'TAA': '*', 'TGA': '*', 'TTG': 'L', 'TCG': 'S', 'TAG': '*', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CCT': 'P', 'CCC': 'P', 'CAT': 'H', 'CAC': 'H', 'CGT': 'R', 'CGC': 'R', 'CTA': 'L', 'CTG': 'L', 'CCA': 'P', 'CCG': 'P', 'CAA': 'Q', 'CAG': 'Q', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ACT': 'T', 'ACC': 'T', 'AAT': 'N', 'AAC': 'N', 'AGT': 'S', 'AGC': 'S', 'ATA': 'I', 'ACA': 'T', 'AAA': 'K', 'AGA': 'R', 'ATG': 'M', 'ACG': 'T', 'AAG': 'K', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GCT': 'A', 'GCC': 'A', 'GAT': 'D', 'GAC': 'D', 'GGT': 'G', 'GGC': 'G', 'GTA': 'V', 'GTG': 'V', 'GCA': 'A', 'GCG': 'A', 'GAA': 'E', 'GAG': 'E', 'GGA': 'G', 'GGG': 'G'}"}, {"fullname": "howard.objects.hgvs.CODON_3", "modulename": "howard.objects.hgvs", "qualname": "CODON_3", "kind": "variable", "doc": "

    \n", "default_value": "{'TTT': 'Phe', 'TTC': 'Phe', 'TCT': 'Ser', 'TCC': 'Ser', 'TAT': 'Tyr', 'TAC': 'Tyr', 'TGT': 'Cys', 'TGC': 'Cys', 'TTA': 'Leu', 'TCA': 'Ser', 'TAA': '*', 'TGA': '*', 'TTG': 'Leu', 'TCG': 'Ser', 'TAG': '*', 'TGG': 'Trp', 'CTT': 'Leu', 'CTC': 'Leu', 'CCT': 'Pro', 'CCC': 'Pro', 'CAT': 'His', 'CAC': 'His', 'CGT': 'Arg', 'CGC': 'Arg', 'CTA': 'Leu', 'CTG': 'Leu', 'CCA': 'Pro', 'CCG': 'Pro', 'CAA': 'Gln', 'CAG': 'Gln', 'CGA': 'Arg', 'CGG': 'Arg', 'ATT': 'Ile', 'ATC': 'Ile', 'ACT': 'Thr', 'ACC': 'Thr', 'AAT': 'Asn', 'AAC': 'Asn', 'AGT': 'Ser', 'AGC': 'Ser', 'ATA': 'Ile', 'ACA': 'Thr', 'AAA': 'Lys', 'AGA': 'Arg', 'ATG': 'Met', 'ACG': 'Thr', 'AAG': 'Lys', 'AGG': 'Arg', 'GTT': 'Val', 'GTC': 'Val', 'GCT': 'Ala', 'GCC': 'Ala', 'GAT': 'Asp', 'GAC': 'Asp', 'GGT': 'Gly', 'GGC': 'Gly', 'GTA': 'Val', 'GTG': 'Val', 'GCA': 'Ala', 'GCG': 'Ala', 'GAA': 'Glu', 'GAG': 'Glu', 'GGA': 'Gly', 'GGG': 'Gly'}"}, {"fullname": "howard.objects.hgvs.CODON_FULL", "modulename": "howard.objects.hgvs", "qualname": "CODON_FULL", "kind": "variable", "doc": "

    \n", "default_value": "{'TTT': 'Phenylalanine', 'TTC': 'Phenylalanine', 'TCT': 'Serine', 'TCC': 'Serine', 'TAT': 'Tyrosine', 'TAC': 'Tyrosine', 'TGT': 'Cysteine', 'TGC': 'Cysteine', 'TTA': 'Leucine', 'TCA': 'Serine', 'TAA': 'Stop', 'TGA': 'Stop', 'TTG': 'Leucine', 'TCG': 'Serine', 'TAG': 'Stop', 'TGG': 'Tryptophan', 'CTT': 'Leucine', 'CTC': 'Leucine', 'CCT': 'Proline', 'CCC': 'Proline', 'CAT': 'Histidine', 'CAC': 'Histidine', 'CGT': 'Arginine', 'CGC': 'Arginine', 'CTA': 'Leucine', 'CTG': 'Leucine', 'CCA': 'Proline', 'CCG': 'Proline', 'CAA': 'Glutamine', 'CAG': 'Glutamine', 'CGA': 'Arginine', 'CGG': 'Arginine', 'ATT': 'Isoleucine', 'ATC': 'Isoleucine', 'ACT': 'Threonine', 'ACC': 'Threonine', 'AAT': 'Asparagine', 'AAC': 'Asparagine', 'AGT': 'Serine', 'AGC': 'Serine', 'ATA': 'Isoleucine', 'ACA': 'Threonine', 'AAA': 'Lysine', 'AGA': 'Arginine', 'ATG': 'Methionine', 'ACG': 'Threonine', 'AAG': 'Lysine', 'AGG': 'Arginine', 'GTT': 'Valine', 'GTC': 'Valine', 'GCT': 'Alanine', 'GCC': 'Alanine', 'GAT': 'Aspartic acid', 'GAC': 'Aspartic acid', 'GGT': 'Glycine', 'GGC': 'Glycine', 'GTA': 'Valine', 'GTG': 'Valine', 'GCA': 'Alanine', 'GCG': 'Alanine', 'GAA': 'Glutamic acid', 'GAG': 'Glutamic acid', 'GGA': 'Glycine', 'GGG': 'Glycine'}"}, {"fullname": "howard.objects.hgvs.NUCLEOTIDE_TRANSLATE", "modulename": "howard.objects.hgvs", "qualname": "NUCLEOTIDE_TRANSLATE", "kind": "variable", "doc": "

    \n", "default_value": "{'T': 'A', 'A': 'T', 'G': 'C', 'C': 'G'}"}, {"fullname": "howard.objects.hgvs.HGVSRegex", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex", "kind": "class", "doc": "

    All regular expression for HGVS names.

    \n"}, {"fullname": "howard.objects.hgvs.HGVSRegex.BASE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.BASE", "kind": "variable", "doc": "

    \n", "default_value": "'[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]|\\\\d+'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.BASES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.BASES", "kind": "variable", "doc": "

    \n", "default_value": "'[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DNA_REF", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DNA_REF", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DNA_ALT", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DNA_ALT", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.EQUAL", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.EQUAL", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>=)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.SUB", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.SUB", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>>)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.INS", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.INS", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>ins)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DEL", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DEL", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>del)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DUP", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DUP", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>dup)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.COORD_START", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.COORD_START", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.COORD_END", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.COORD_END", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<end>\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.COORD_RANGE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.COORD_RANGE", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>\\\\d+)_(?P<end>\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_COORD", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_COORD", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<coord_prefix>|-|\\\\*)(?P<coord>\\\\d+)((?P<offset_prefix>-|\\\\+)(?P<offset>\\\\d+))?'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_START", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_START", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_END", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_END", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_RANGE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_RANGE", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_ALLELE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_ALLELE", "kind": "variable", "doc": "

    \n", "default_value": "['(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>=)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>del)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>dup)', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))']"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_ALLELE_REGEXES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_ALLELE_REGEXES", "kind": "variable", "doc": "

    \n", "default_value": "[re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>=)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtb), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhk), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offse), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offse)]"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP", "kind": "variable", "doc": "

    \n", "default_value": "'([A-Z]([a-z]{2}))+'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_REF", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_REF", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<ref>([A-Z]([a-z]{2}))+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_REF2", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_REF2", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<ref2>([A-Z]([a-z]{2}))+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_ALT", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_ALT", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<alt>([A-Z]([a-z]{2}))+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_EXTRA", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_EXTRA", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<extra>(|=|\\\\?)(|fs))'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_ALLELE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_ALLELE", "kind": "variable", "doc": "

    \n", "default_value": "['(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<extra>(|=|\\\\?)(|fs))', '(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs))', '(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<extra>(|=|\\\\?)(|fs)))', '(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs)))']"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_ALLELE_REGEXES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_ALLELE_REGEXES", "kind": "variable", "doc": "

    \n", "default_value": "[re.compile('^(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<extra>(|=|\\\\?)(|fs))$'), re.compile('^(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs))$'), re.compile('^(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<extra>(|=|\\\\?)(|fs)))$'), re.compile('^(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs)))$')]"}, {"fullname": "howard.objects.hgvs.HGVSRegex.GENOMIC_ALLELE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.GENOMIC_ALLELE", "kind": "variable", "doc": "

    \n", "default_value": "['(?P<start>\\\\d+)(?P<mutation_type>=)', '(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)', '(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>del)', '(?P<start>\\\\d+)(?P<mutation_type>dup)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)', '(?P<delins>(?P<start>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))']"}, {"fullname": "howard.objects.hgvs.HGVSRegex.GENOMIC_ALLELE_REGEXES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.GENOMIC_ALLELE_REGEXES", "kind": "variable", "doc": "

    \n", "default_value": "[re.compile('^(?P<start>\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>del)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>dup)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)$'), re.compile('^(?P<delins>(?P<start>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$')]"}, {"fullname": "howard.objects.hgvs.REFSEQ_PREFIXES", "modulename": "howard.objects.hgvs", "qualname": "REFSEQ_PREFIXES", "kind": "variable", "doc": "

    \n", "default_value": "[('AC_', 'genomic', 'Complete genomic molecule, usually alternate assembly'), ('NC_', 'genomic', 'Complete genomic molecule, usually reference assembly'), ('NG_', 'genomic', 'Incomplete genomic region'), ('NT_', 'genomic', 'Contig or scaffold, clone-based or WGS'), ('NW_', 'genomic', 'Contig or scaffold, primarily WGS'), ('NS_', 'genomic', 'Environmental sequence'), ('NZ_', 'genomic', 'Unfinished WGS'), ('NM_', 'mRNA', ''), ('NR_', 'RNA', ''), ('XM_', 'mRNA', 'Predicted model'), ('XR_', 'RNA', 'Predicted model'), ('AP_', 'Protein', 'Annotated on AC_ alternate assembly'), ('NP_', 'Protein', 'Associated with an NM_ or NC_ accession'), ('YP_', 'Protein', ''), ('XP_', 'Protein', 'Predicted model, associated with an XM_ accession'), ('ZP_', 'Protein', 'Predicted model, annotated on NZ_ genomic records')]"}, {"fullname": "howard.objects.hgvs.REFSEQ_PREFIX_LOOKUP", "modulename": "howard.objects.hgvs", "qualname": "REFSEQ_PREFIX_LOOKUP", "kind": "variable", "doc": "

    \n", "default_value": "{'AC_': ('genomic', 'Complete genomic molecule, usually alternate assembly'), 'NC_': ('genomic', 'Complete genomic molecule, usually reference assembly'), 'NG_': ('genomic', 'Incomplete genomic region'), 'NT_': ('genomic', 'Contig or scaffold, clone-based or WGS'), 'NW_': ('genomic', 'Contig or scaffold, primarily WGS'), 'NS_': ('genomic', 'Environmental sequence'), 'NZ_': ('genomic', 'Unfinished WGS'), 'NM_': ('mRNA', ''), 'NR_': ('RNA', ''), 'XM_': ('mRNA', 'Predicted model'), 'XR_': ('RNA', 'Predicted model'), 'AP_': ('Protein', 'Annotated on AC_ alternate assembly'), 'NP_': ('Protein', 'Associated with an NM_ or NC_ accession'), 'YP_': ('Protein', ''), 'XP_': ('Protein', 'Predicted model, associated with an XM_ accession'), 'ZP_': ('Protein', 'Predicted model, annotated on NZ_ genomic records')}"}, {"fullname": "howard.objects.hgvs.get_refseq_type", "modulename": "howard.objects.hgvs", "qualname": "get_refseq_type", "kind": "function", "doc": "

    The get_refseq_type function returns the RefSeq type for a given RefSeq name.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string representing a RefSeq name
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_refseq_type returns the RefSeq type for a given RefSeq name.

    \n
    \n", "signature": "(name: str) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName", "kind": "class", "doc": "

    Inappropriate argument value (of correct type).

    \n", "bases": "builtins.ValueError"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.__init__", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.__init__", "kind": "function", "doc": "

    The function initializes an InvalidHGVSName object with a message, name, part, and reason.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string that represents the invalid HGVS name. It is the\nname that is considered invalid and does not meet the required criteria
    • \n
    • part: The \"part\" parameter represents the part of the HGVS (Human Genome Variation\nSociety) name that is invalid. It is used to provide more specific information about the error\nthat occurred, defaults to name
    • \n
    • reason: The \"reason\" parameter is an optional argument that provides additional\ninformation or context for why the HGVS name is considered invalid. It can be used to provide\nspecific details about the error or to explain why the name does not meet the required criteria
    • \n
    \n", "signature": "(name: str = '', part: str = 'name', reason: str = '')"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.name", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.part", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.part", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.reason", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.reason", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName", "modulename": "howard.objects.hgvs", "qualname": "HGVSName", "kind": "class", "doc": "

    Represents a HGVS variant name.

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.__init__", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.__init__", "kind": "function", "doc": "

    The function is a constructor that initializes various attributes of an object and parses a\ngiven name to populate those attributes.

    \n\n
    Parameters
    \n\n
      \n
    • name: The full HGVS name of the variant
    • \n
    • prefix: The prefix parameter is a string that is used as a prefix for the HGVS name. It\ncan be used to indicate additional information or context about the variant
    • \n
    • chrom: The chrom parameter represents the chromosome where the mutation occurs. It is a\nstring that specifies the chromosome number or identifier
    • \n
    • transcript: The transcript parameter represents the transcript ID or name associated\nwith the mutation. It is used to specify the specific transcript in which the mutation occurs
    • \n
    • transcript_protein: The transcript_protein parameter is used to store information about\nthe protein associated with the transcript. It can be used to specify the protein variant or\nisoform that is affected by the mutation
    • \n
    • gene: The \"gene\" parameter represents the gene associated with the variant. It is a\nstring that specifies the gene name or identifier
    • \n
    • exon: The exon parameter represents the exon number or range in which the mutation\noccurs. It is used to specify the location of the mutation within the transcript
    • \n
    • kind: The \"kind\" parameter is used to specify the type of variant or mutation. It can be\na string that represents the kind of mutation, such as \"substitution\", \"deletion\", \"insertion\",\netc. This parameter helps to categorize and describe the type of mutation being represented by\nthe
    • \n
    • mutation_type: The mutation_type parameter is used to specify the type of mutation. It\ncan be a string that represents the type of mutation, such as \"SNP\" (single nucleotide\npolymorphism), \"DEL\" (deletion), \"INS\" (insertion), etc
    • \n
    • start: The start parameter represents the starting position of the mutation or variant\nin the genomic sequence. It is an integer value that indicates the position of the mutation or\nvariant on the genomic sequence. If not provided, it defaults to 0, defaults to 0
    • \n
    • end: The \"end\" parameter represents the end position of the mutation or variant. It is an\ninteger value that indicates the position of the mutation or variant on the genomic sequence,\ndefaults to 0
    • \n
    • ref_allele: The ref_allele parameter represents the reference allele in a genetic\nmutation. It is the allele that is present in the reference genome at a specific position
    • \n
    • ref2_allele: The ref2_allele parameter represents the reference allele at the end of a\npeptide indel. In the context of genetic mutations, an indel refers to the insertion or deletion\nof nucleotides in a DNA sequence. The ref2_allele specifically represents the reference allele\nthat is
    • \n
    • alt_allele: The alt_allele parameter represents the alternate allele in a genetic\nmutation. In genetics, an allele is one of the possible forms of a gene. In the context of this\ncode, alt_allele is used to store the alternate allele that is present in a mutation
    • \n
    • cdna_start: The cdna_start parameter is used to specify the start position of the\nmutation in the cDNA sequence. It is an optional parameter and if not provided, it will be set\nto a default value of CDNACoord()
    • \n
    • cdna_end: The cdna_end parameter is used to store the end coordinate of the cDNA\n(complementary DNA) sequence. It is an optional parameter and if not provided, it will be\ninitialized as a CDNACoord object. The CDNACoord object is likely a
    • \n
    • pep_extra: The pep_extra parameter is a string that represents any additional\ninformation related to the protein. It is used in the context of protein-specific fields
    • \n
    \n", "signature": "(\tname: str = '',\tprefix: str = '',\tchrom: str = '',\ttranscript: str = '',\ttranscript_protein: str = None,\tgene: str = '',\texon: str = None,\tkind: str = '',\tmutation_type: str = None,\tstart: int = 0,\tend: int = 0,\tref_allele: str = '',\tref2_allele: str = '',\talt_allele: str = '',\tcdna_start: int = None,\tcdna_end: int = None,\tpep_extra: str = '')"}, {"fullname": "howard.objects.hgvs.HGVSName.name", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.prefix", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.prefix", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.chrom", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.chrom", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.transcript", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.transcript", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.transcript_protein", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.transcript_protein", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.gene", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.gene", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.exon", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.exon", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.kind", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.kind", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.mutation_type", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.mutation_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.start", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.end", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.ref_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.ref_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.ref2_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.ref2_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.alt_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.alt_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.cdna_start", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.cdna_start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.cdna_end", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.cdna_end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.pep_extra", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.pep_extra", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.parse", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse", "kind": "function", "doc": "

    The parse function is used to split an HGVS name into a prefix and allele, and then validate\nthe parsed components.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string that represents an HGVS name. It is the input to\nthe parse function and is used to parse the HGVS name by splitting it into a prefix and allele
    • \n
    \n", "signature": "(self, name: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_prefix", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_prefix", "kind": "function", "doc": "

    The parse_prefix function is used to parse a HGVS prefix (gene/transcript/chromosome) and\nassign the parsed values to the corresponding attributes of the object.

    \n\n
    Parameters
    \n\n
      \n
    • prefix: The prefix parameter is a string that represents a HGVS prefix, which can be a\ngene, transcript, or chromosome identifier. It is used to determine the type of prefix and\nassign the parsed values to the corresponding attributes of the object
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function parse_prefix returns the parsed values for the transcript and gene\n attributes, or sets the chrom or gene attributes based on the given prefix.

    \n
    \n", "signature": "(self, prefix: str):", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_allele", "kind": "function", "doc": "

    The function parse_allele parses a HGVS allele description and determines the kind of HGVS\nname (c., p., g., etc.) and the mutation type.

    \n\n

    Some examples include:\n cDNA substitution: c.101A>C,\n cDNA indel: c.3428delCinsTA, c.1000_1003delATG, c.1000_1001insATG\n No protein change: p.Glu1161=\n Protein change: p.Glu1161Ser\n Protein frameshift: p.Glu1161_Ser1164?fs\n Genomic substitution: g.1000100A>T\n Genomic indel: g.1000100_1000102delATG

    \n\n
    Parameters
    \n\n
      \n
    • allele: The allele parameter is a string that represents a HGVS allele description. It\ncan contain various types of mutations, such as cDNA substitutions, cDNA indels, protein\nchanges, protein frameshifts, genomic substitutions, and genomic indels. The purpose of the\nparse_allele
    • \n
    \n", "signature": "(self, allele: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_cdna", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_cdna", "kind": "function", "doc": "

    The function parse_cdna is used to parse a HGVS cDNA name and extract information such as\nmutation type, coordinates, and alleles.

    \n\n

    Some examples include:\n Substitution: 101A>C,\n Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG

    \n\n
    Parameters
    \n\n
      \n
    • details: The details parameter is a string that represents a HGVS cDNA name. It\ncontains information about a genetic mutation, such as a substitution or an indel, along with\nthe specific coordinates and alleles involved in the mutation
    • \n
    \n\n
    Returns
    \n\n
    \n

    None.

    \n
    \n", "signature": "(self, details: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_protein", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_protein", "kind": "function", "doc": "

    The function parse_protein is used to parse a HGVS protein name and extract information such\nas mutation type, coordinates, alleles, and additional details.

    \n\n

    Some examples include:\n No change: Glu1161=\n Change: Glu1161Ser\n Frameshift: Glu1161_Ser1164?fs

    \n\n
    Parameters
    \n\n
      \n
    • details: The details parameter is a string that represents a HGVS protein name. It\ncontains information about a protein mutation, such as the amino acid change and the position of\nthe mutation
    • \n
    \n\n
    Returns
    \n\n
    \n

    The method parse_protein does not return anything. It updates the instance variables\n of the object it is called on.

    \n
    \n", "signature": "(self, details: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_genome", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_genome", "kind": "function", "doc": "

    The function parse_genome is used to parse a HGVS genomic name and extract information such as\nmutation type, coordinates, and alleles.

    \n\n

    Some examples include:\n Substitution: 1000100A>T\n Indel: 1000100_1000102delATG

    \n\n
    Parameters
    \n\n
      \n
    • details: The details parameter is a string that represents a HGVS genomic name. It\ncontains information about a genomic mutation, such as a substitution or an indel
    • \n
    \n\n
    Returns
    \n\n
    \n

    None.

    \n
    \n", "signature": "(self, details: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format", "kind": "function", "doc": "

    The format function generates a HGVS name as a string based on various formatting options.

    \n\n
    Parameters
    \n\n
      \n
    • use_prefix: A boolean indicating whether to include the prefix in the HGVS name. If set\nto True, the prefix will be included in the HGVS name. If set to False, the prefix will be\nexcluded. The default value is True, defaults to True
    • \n
    • use_gene: A boolean indicating whether to include the gene name in the HGVS name. If set\nto True, the gene name will be included in the HGVS name. If set to False, the gene name will\nnot be included. The default value is True, defaults to True
    • \n
    • use_exon: A boolean indicating whether to include exon information in the HGVS name. If\nset to True, exon information will be included in the HGVS name. If set to False, exon\ninformation will not be included, defaults to False
    • \n
    • use_protein: A boolean indicating whether to include the protein change in the HGVS name.\nIf set to True, the protein change will be included in the HGVS name. If set to False, the\nprotein change will not be included, defaults to False
    • \n
    • full_format: A boolean parameter that determines whether the full format of the allele\nshould be included in the output. If set to True, and if the allele is not a protein variant,\nthe allele will be appended with ':p.' followed by the formatted protein variant, defaults to\nFalse (optional)
    • \n
    • use_version: A boolean parameter that determines whether to include the version number in\nthe formatted HGVS name. If set to True, the version number will be included in the output. If\nset to False, the version number will not be included, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a HGVS name as a string.

    \n
    \n", "signature": "(\tself,\tuse_prefix: bool = True,\tuse_gene: bool = True,\tuse_exon: bool = False,\tuse_protein: bool = False,\tfull_format=False,\tuse_version: bool = False) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_prefix", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_prefix", "kind": "function", "doc": "

    The format_prefix function generates an HGVS transcript/gene prefix based on various\nparameters.

    \n\n
    Parameters
    \n\n
      \n
    • use_gene: A boolean parameter that determines whether to include the gene name in the\nprefix. If set to True, the gene name will be included in the prefix. If set to False, the gene\nname will not be included in the prefix. The default value is True, defaults to True
    • \n
    • use_exon: A boolean parameter that determines whether to include the exon information in\nthe prefix. If set to True, the exon information will be included in the prefix. If set to\nFalse, the exon information will not be included, defaults to False
    • \n
    • use_protein: A boolean indicating whether to use the protein transcript instead of the\nnucleotide transcript if available. If set to True, the protein transcript will be used. If set\nto False, the nucleotide transcript will be used. The default value is False, defaults to False
    • \n
    • full_format: A boolean parameter that determines whether to generate the full HGVS name\nwith transcript/gene prefix or not. If set to True, the full format will be generated. If set to\nFalse, only the transcript/gene prefix will be generated, defaults to False
    • \n
    • use_version: A boolean parameter that determines whether to include the version number in\nthe transcript prefix. If set to True, the version number will be included in the prefix (e.g.,\nNM_007294.3). If set to False, only the transcript ID without the version number will be\nincluded in the prefix, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function format_prefix returns a formatted HGVS transcript/gene prefix as a\n string.

    \n
    \n", "signature": "(\tself,\tuse_gene: bool = True,\tuse_exon: bool = False,\tuse_protein: bool = False,\tfull_format: bool = False,\tuse_version: bool = False) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_cdna_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_cdna_coords", "kind": "function", "doc": "

    The function format_cdna_coords generates a string representing HGVS cDNA coordinates,\nreturning either the start coordinate or a string in the format \"start_end\" depending on whether\nthe start and end coordinates are the same or not.

    \n\n
    Returns
    \n\n
    \n

    a string representing the cDNA coordinates. If the start and end coordinates are the\n same, it returns just the start coordinate. Otherwise, it returns a string in the format\n \"start_end\".

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_dna_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_dna_allele", "kind": "function", "doc": "

    The function format_dna_allele generates an HGVS DNA allele based on the mutation type and\nalleles provided.

    \n\n
    Returns
    \n\n
    \n

    The function format_dna_allele returns a string representing the HGVS DNA allele. The\n specific format of the returned string depends on the value of the mutation_type attribute of\n the object. The possible return values are:

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_cdna", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_cdna", "kind": "function", "doc": "

    The function \"format_cdna\" generates an HGVS cDNA allele by combining the cDNA coordinates and\nthe DNA allele.

    \n\n

    Some examples include:\n Substitution: 101A>C,\n Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG

    \n\n
    Returns
    \n\n
    \n

    a string that represents the HGVS cDNA allele.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_protein", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_protein", "kind": "function", "doc": "

    The format_protein function generates an HGVS protein name based on different scenarios such\nas no change, change, frameshift, and range change.

    \n\n

    Some examples include:\n No change: Glu1161=\n Change: Glu1161Ser\n Frameshift: Glu1161_Ser1164?fs

    \n\n
    Returns
    \n\n
    \n

    The method format_protein returns a string representing the HGVS protein name.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_coords", "kind": "function", "doc": "

    The function format_coords generates a string representation of HGVS cDNA coordinates.

    \n\n
    Returns
    \n\n
    \n

    a string that represents the HGVS cDNA coordinates. If the start and end coordinates\n are the same, it returns just the start coordinate. Otherwise, it returns a string in the format\n \"start_end\".

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_genome", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_genome", "kind": "function", "doc": "

    The function \"format_genome\" generates an HGVS genomic allele by combining the formatted\ncoordinates and DNA allele.

    \n\n

    Some examples include:\n Substitution: 1000100A>T\n Indel: 1000100_1000102delATG

    \n\n
    Returns
    \n\n
    \n

    a string that represents the HGVS genomic allele.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_raw_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_raw_coords", "kind": "function", "doc": "

    The function get_raw_coords returns the genomic coordinates based on the given transcript or\nthe provided chromosomal coordinates.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an object that represents a transcript. It is\nused to retrieve genomic coordinates based on the type of HGVS name (self.kind). The\ntranscript object should have the following attributes and methods:
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the genomic coordinates. The tuple consists of three elements: the\n chromosome, the start position, and the end position.

    \n
    \n", "signature": "(self, transcript: object = None) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_ref_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_ref_coords", "kind": "function", "doc": "

    The function \"get_ref_coords\" returns the genomic coordinates of the reference allele, taking\ninto account different mutation types.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an optional object that represents a transcript\nor gene. It is used to retrieve the genomic coordinates of the reference allele
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the genomic coordinates of the reference allele. The tuple consists\n of three elements: the chromosome, the start position, and the end position.

    \n
    \n", "signature": "(self, transcript: object = None) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_vcf_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_vcf_coords", "kind": "function", "doc": "

    The function \"get_vcf_coords\" returns the genomic coordinates of the reference allele in\nVCF-style, with left-padding for indels.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an object that represents a transcript or gene.\nIt is used to retrieve the genomic coordinates of the reference allele
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the genomic coordinates of the reference allele in VCF-style. The\n tuple consists of three elements: the chromosome, the start position, and the end position.

    \n
    \n", "signature": "(self, transcript: object = None) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_ref_alt", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_ref_alt", "kind": "function", "doc": "

    The function get_ref_alt returns the reference and alternate alleles, with an option to modify\nduplications to look like inserts.

    \n\n
    Parameters
    \n\n
      \n
    • is_forward_strand: The parameter is_forward_strand is a boolean flag that indicates\nwhether the alleles should be returned for the forward strand or the reverse complement strand.\nIf is_forward_strand is True, the alleles will be returned as is. If is_forward_strand is\nFalse,, defaults to True
    • \n
    • raw_dup_alleles: The raw_dup_alleles parameter is a boolean flag that determines\nwhether the raw values of duplicated alleles should be returned. By default, it is set to\nFalse, which means that if the mutation type is a duplication (dup), the reference allele\nwill be represented as an empty string, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_ref_alt returns a tuple containing the reference and alternate\n alleles.

    \n
    \n", "signature": "(\tself,\tis_forward_strand: bool = True,\traw_dup_alleles: bool = False) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.transcript", "modulename": "howard.objects.transcript", "kind": "module", "doc": "

    Models for representing genomic elements.

    \n"}, {"fullname": "howard.objects.transcript.Gene", "modulename": "howard.objects.transcript", "qualname": "Gene", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Gene.__init__", "modulename": "howard.objects.transcript", "qualname": "Gene.__init__", "kind": "function", "doc": "

    \n", "signature": "(name)"}, {"fullname": "howard.objects.transcript.Gene.name", "modulename": "howard.objects.transcript", "qualname": "Gene.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript", "modulename": "howard.objects.transcript", "qualname": "Transcript", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.__init__", "modulename": "howard.objects.transcript", "qualname": "Transcript.__init__", "kind": "function", "doc": "

    The function initializes an object with various attributes related to a gene and its transcript.

    \n\n
    Parameters
    \n\n
      \n
    • name: A string representing the name of the coding sequence
    • \n
    • version: The version parameter is a string that represents the version of the object.\nIt is used to track changes or updates to the object over time
    • \n
    • gene: The gene parameter is a string that represents the gene associated with the\ncoding sequence
    • \n
    • tx_position: The tx_position parameter represents the position of the transcript. It is\nan integer value that indicates the position of the transcript in the genome
    • \n
    • cds_position: The cds_position parameter represents the position of the coding sequence\n(CDS) within the transcript. It is an integer value that indicates the starting position of the\nCDS within the transcript sequence
    • \n
    • is_default: The is_default parameter is a boolean flag that indicates whether the\ninstance of the class is the default version of the gene. It is set to False by default, but\ncan be set to True if the instance is the default version, defaults to False
    • \n
    • cdna_match: The cdna_match parameter is a list that contains the positions of the\nmatching cDNA sequences. It is an optional parameter and if not provided, it defaults to an\nempty list
    • \n
    • start_codon_transcript_pos: The parameter \"start_codon_transcript_pos\" is an optional\nparameter that represents the transcript position of the start codon. It is used to store the\npre-calculated transcript position of the start codon for a specific gene
    • \n
    • stop_codon_transcript_pos: The parameter stop_codon_transcript_pos is an optional\ninteger that represents the transcript position of the stop codon. It is used to store the\npre-calculated transcript coordinate of the stop codon. If not provided, it will be set to\nNone
    • \n
    \n", "signature": "(\tname: str,\tversion: str,\tgene: str,\ttx_position: int,\tcds_position: int,\tis_default: bool = False,\tcdna_match: list = None,\tstart_codon_transcript_pos: int = None,\tstop_codon_transcript_pos: int = None)"}, {"fullname": "howard.objects.transcript.Transcript.name", "modulename": "howard.objects.transcript", "qualname": "Transcript.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.version", "modulename": "howard.objects.transcript", "qualname": "Transcript.version", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.gene", "modulename": "howard.objects.transcript", "qualname": "Transcript.gene", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.tx_position", "modulename": "howard.objects.transcript", "qualname": "Transcript.tx_position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.cds_position", "modulename": "howard.objects.transcript", "qualname": "Transcript.cds_position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.is_default", "modulename": "howard.objects.transcript", "qualname": "Transcript.is_default", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.cdna_match", "modulename": "howard.objects.transcript", "qualname": "Transcript.cdna_match", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.full_name", "modulename": "howard.objects.transcript", "qualname": "Transcript.full_name", "kind": "variable", "doc": "

    The function full_name returns the full name of an object, including its version if it exists.

    \n\n
    Returns
    \n\n
    \n

    a string. If the version attribute of the object is not None, it returns a string\n in the format name.version. Otherwise, it returns just the name attribute.

    \n
    \n", "annotation": ": str"}, {"fullname": "howard.objects.transcript.Transcript.is_coding", "modulename": "howard.objects.transcript", "qualname": "Transcript.is_coding", "kind": "variable", "doc": "

    The function checks if a coding transcript has a non-zero length coding sequence.

    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the coding transcript has a coding sequence (CDS)\n with a non-zero length.

    \n
    \n", "annotation": ": bool"}, {"fullname": "howard.objects.transcript.Transcript.strand", "modulename": "howard.objects.transcript", "qualname": "Transcript.strand", "kind": "variable", "doc": "

    The function returns a string '+' if the tx_position is on the forward strand, and '-' if it is\non the reverse strand.

    \n\n
    Returns
    \n\n
    \n

    a string that represents the strand of the given self.tx_position. If\n self.tx_position.is_forward_strand is True, then the string returned is '+'. Otherwise, the\n string returned is '-'.

    \n
    \n", "annotation": ": str"}, {"fullname": "howard.objects.transcript.Transcript.ordered_cdna_match", "modulename": "howard.objects.transcript", "qualname": "Transcript.ordered_cdna_match", "kind": "function", "doc": "

    The function \"ordered_cdna_match\" sorts a list of cdna_match objects based on their\ntx_position.chrom_start attribute and returns the sorted list.

    \n\n
    Returns
    \n\n
    \n

    a sorted list of cdna_match objects.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.get_cds_start_stop", "modulename": "howard.objects.transcript", "qualname": "Transcript.get_cds_start_stop", "kind": "function", "doc": "

    The function \"get_cds_start_stop\" returns the start and stop positions of a coding sequence,\ntaking into account the direction of the strand.

    \n\n
    Returns
    \n\n
    \n

    a tuple containing the start and stop positions of the coding sequence (CDS).

    \n
    \n", "signature": "(self) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.start_codon", "modulename": "howard.objects.transcript", "qualname": "Transcript.start_codon", "kind": "function", "doc": "

    The function returns the transcript position of the start codon.

    \n\n
    Returns
    \n\n
    \n

    the transcript position of the start codon.

    \n
    \n", "signature": "(self) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.stop_codon", "modulename": "howard.objects.transcript", "qualname": "Transcript.stop_codon", "kind": "function", "doc": "

    The function returns the transcript position of the stop codon.

    \n\n
    Returns
    \n\n
    \n

    The method stop_codon returns an integer, which represents the transcript position of\n the stop codon.

    \n
    \n", "signature": "(self) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.cdna_to_genomic_coord", "modulename": "howard.objects.transcript", "qualname": "Transcript.cdna_to_genomic_coord", "kind": "function", "doc": "

    The function cdna_to_genomic_coord converts a HGVS cDNA coordinate to a genomic coordinate.

    \n\n
    Parameters
    \n\n
      \n
    • coord: The parameter coord is an object that represents a cDNA coordinate. It is used\nto specify a position along a cDNA sequence
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value, which represents the genomic coordinate corresponding to the given\n cDNA coordinate.

    \n
    \n", "signature": "(self, coord: object) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.genomic_to_cdna_coord", "modulename": "howard.objects.transcript", "qualname": "Transcript.genomic_to_cdna_coord", "kind": "function", "doc": "

    The function genomic_to_cdna_coord converts a genomic coordinate to a cDNA coordinate and\noffset, taking into account exons, strand, and coding transcript information.

    \n\n
    Parameters
    \n\n
      \n
    • genomic_coord: The genomic_coord parameter is an integer representing a genomic\ncoordinate
    • \n
    \n\n
    Returns
    \n\n
    \n

    an object of type CDNACoord.

    \n
    \n", "signature": "(self, genomic_coord: int) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.find_exon_number", "modulename": "howard.objects.transcript", "qualname": "Transcript.find_exon_number", "kind": "function", "doc": "

    The function find_exon_number returns the exon number for a given position.

    \n\n
    Parameters
    \n\n
      \n
    • offset: The offset parameter represents a position in the genome. It is an integer value\nthat indicates the position of interest within the genome
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value, which represents the exon number for a given position.

    \n
    \n", "signature": "(self, offset: int) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.BED6Interval_base", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base", "kind": "class", "doc": "

    BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "bases": "builtins.tuple"}, {"fullname": "howard.objects.transcript.BED6Interval_base.__init__", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.__init__", "kind": "function", "doc": "

    Create new instance of BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "signature": "(chrom, chrom_start, chrom_end, name, score, strand)"}, {"fullname": "howard.objects.transcript.BED6Interval_base.chrom", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.chrom", "kind": "variable", "doc": "

    Alias for field number 0

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.chrom_start", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.chrom_start", "kind": "variable", "doc": "

    Alias for field number 1

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.chrom_end", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.chrom_end", "kind": "variable", "doc": "

    Alias for field number 2

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.name", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.name", "kind": "variable", "doc": "

    Alias for field number 3

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.score", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.score", "kind": "variable", "doc": "

    Alias for field number 4

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.strand", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.strand", "kind": "variable", "doc": "

    Alias for field number 5

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval", "modulename": "howard.objects.transcript", "qualname": "BED6Interval", "kind": "class", "doc": "

    BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "bases": "BED6Interval_base"}, {"fullname": "howard.objects.transcript.BED6Interval.__init__", "modulename": "howard.objects.transcript", "qualname": "BED6Interval.__init__", "kind": "function", "doc": "

    Create new instance of BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "signature": "(chrom, chrom_start, chrom_end, name, score, strand)"}, {"fullname": "howard.objects.transcript.BED6Interval.distance", "modulename": "howard.objects.transcript", "qualname": "BED6Interval.distance", "kind": "function", "doc": "

    The distance function calculates the distance between an offset and an interval, returning\nzero if the offset is inside the interval, a positive value if the interval comes after the\noffset, and a negative value if the interval comes before the offset.\nif offset is inside the exon, distance is zero.\notherwise, distance is the distance to the nearest edge.\ndistance is positive if the exon comes after the offset.\ndistance is negative if the exon comes before the offset.

    \n\n
    Parameters
    \n\n
      \n
    • offset: The offset parameter represents a position or point in the genome. It is an\ninteger value that indicates the position within the genome sequence
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value, which represents the distance to the interval.

    \n
    \n", "signature": "(self, offset: int) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Exon", "modulename": "howard.objects.transcript", "qualname": "Exon", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.__init__", "modulename": "howard.objects.transcript", "qualname": "Exon.__init__", "kind": "function", "doc": "

    The function initializes an object with a transcript, a position in the transcript, and a\nnumber.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is of type Transcript. It represents a\ntranscript object that contains information about a conversation or dialogue
    • \n
    • tx_position: The tx_position parameter represents the position of the transcript in a\nlist or array. It is an integer value that indicates the index of the transcript in the list or\narray
    • \n
    • number: The \"number\" parameter is an integer that represents a specific number. It is\nused as a parameter in the constructor of a class
    • \n
    \n", "signature": "(\ttranscript: howard.objects.transcript.Transcript,\ttx_position: int,\tnumber: int)"}, {"fullname": "howard.objects.transcript.Exon.transcript", "modulename": "howard.objects.transcript", "qualname": "Exon.transcript", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.tx_position", "modulename": "howard.objects.transcript", "qualname": "Exon.tx_position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.number", "modulename": "howard.objects.transcript", "qualname": "Exon.number", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.name", "modulename": "howard.objects.transcript", "qualname": "Exon.name", "kind": "variable", "doc": "

    The function returns a string that combines the name of the transcript and a number.

    \n\n
    Returns
    \n\n
    \n

    a string that combines the name of the transcript with the number. The format of the\n string is \"{transcript name}.{number}\".

    \n
    \n", "annotation": ": str"}, {"fullname": "howard.objects.transcript.Exon.get_as_interval", "modulename": "howard.objects.transcript", "qualname": "Exon.get_as_interval", "kind": "function", "doc": "

    The function get_as_interval returns the coding region for an exon as a BED6Interval object.\nThis function returns a BED6Interval objects containing position\ninformation for this exon. This may be used as input for\npybedtools.create_interval_from_list() after casting chrom_start\nand chrom_end as strings.

    \n\n
    Parameters
    \n\n
      \n
    • coding_only: The coding_only parameter is a boolean flag that determines whether to\ninclude only exons in the coding region. If coding_only is set to True, the function will\ncheck if the exon is completely outside the coding region defined by the transcript's CDS\n(coding sequence) position, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a BED6Interval object.

    \n
    \n", "signature": "(self, coding_only: bool = False) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Exon.strand", "modulename": "howard.objects.transcript", "qualname": "Exon.strand", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match", "kind": "class", "doc": "

    \n", "bases": "Exon"}, {"fullname": "howard.objects.transcript.CDNA_Match.__init__", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.__init__", "kind": "function", "doc": "

    The function initializes a CDNA_Match object with specified attributes.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an instance of the Transcript class. It\nrepresents the transcript that the CDNA match belongs to
    • \n
    • tx_position: The tx_position parameter represents the position of the transcript in the\ngenome. It is an integer value
    • \n
    • cdna_start: The cdna_start parameter represents the starting position of the cDNA\nmatch. It is an integer value
    • \n
    • cdna_end: The cdna_end parameter represents the end position of the cDNA match. It is\nan integer value that indicates the position of the last nucleotide in the cDNA sequence that\nmatches the transcript
    • \n
    • gap: The \"gap\" parameter represents the number of nucleotides that are missing or\ninserted in the cDNA sequence compared to the reference transcript sequence. It indicates the\npresence of gaps or insertions in the alignment between the cDNA and the reference transcript
    • \n
    • number: The number parameter represents the number of the CDNA match. It is used to\nuniquely identify each CDNA match object
    • \n
    \n", "signature": "(\ttranscript: howard.objects.transcript.Transcript,\ttx_position: int,\tcdna_start: int,\tcdna_end: int,\tgap: int,\tnumber: int)"}, {"fullname": "howard.objects.transcript.CDNA_Match.cdna_start", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.cdna_start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match.cdna_end", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.cdna_end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match.gap", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.gap", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match.length", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.length", "kind": "variable", "doc": "

    The function calculates the length of a sequence by subtracting the start position from the end\nposition and adding 1.

    \n\n
    Returns
    \n\n
    \n

    The length of the sequence, calculated by subtracting the cdna_start from the cdna_end\n and adding 1.

    \n
    \n", "annotation": ": int"}, {"fullname": "howard.objects.transcript.CDNA_Match.get_offset", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.get_offset", "kind": "function", "doc": "

    The get_offset function calculates the offset for a given position in a cDNA sequence based on\nthe GAP attribute.\ncdna_match GAP attribute looks like: 'M185 I3 M250' which is code/length\n@see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#the-gap-attribute\ncodes operation\nM match\nI insert a gap into the reference sequence\nD insert a gap into the target (delete from reference)\nIf you want the whole exon, then pass the end

    \n\n
    Parameters
    \n\n
      \n
    • position: The position parameter is an integer that represents the position in the\nsequence. It is used to calculate the offset based on the GAP attribute of the cDNA match
    • \n
    • validate: The validate parameter is a boolean flag that determines whether to perform\nvalidation checks during the calculation of the offset. If validate is set to True, the\nfunction will raise a ValueError if the given position falls within an insertion or deletion\ngap. If validate is set, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value representing the offset for a given position in the cDNA sequence.

    \n
    \n", "signature": "(self, position: int, validate: bool = True) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.variant", "modulename": "howard.objects.variant", "kind": "module", "doc": "

    Methods for manipulating genetic variants.

    \n"}, {"fullname": "howard.objects.variant.Position", "modulename": "howard.objects.variant", "qualname": "Position", "kind": "class", "doc": "

    A position in the genome.

    \n"}, {"fullname": "howard.objects.variant.Position.__init__", "modulename": "howard.objects.variant", "qualname": "Position.__init__", "kind": "function", "doc": "

    \n", "signature": "(chrom, chrom_start, chrom_stop, is_forward_strand)"}, {"fullname": "howard.objects.variant.Position.chrom", "modulename": "howard.objects.variant", "qualname": "Position.chrom", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.Position.chrom_start", "modulename": "howard.objects.variant", "qualname": "Position.chrom_start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.Position.chrom_stop", "modulename": "howard.objects.variant", "qualname": "Position.chrom_stop", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.Position.is_forward_strand", "modulename": "howard.objects.variant", "qualname": "Position.is_forward_strand", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.revcomp", "modulename": "howard.objects.variant", "qualname": "revcomp", "kind": "function", "doc": "

    Reverse complement.

    \n", "signature": "(seq):", "funcdef": "def"}, {"fullname": "howard.objects.variant.get_sequence", "modulename": "howard.objects.variant", "qualname": "get_sequence", "kind": "function", "doc": "

    Return a sequence for the genomic region.

    \n\n

    Coordinates are 0-based, end-exclusive.

    \n", "signature": "(genome, chrom, start, end, is_forward_strand=True):", "funcdef": "def"}, {"fullname": "howard.objects.variant.get_sequence_from_position", "modulename": "howard.objects.variant", "qualname": "get_sequence_from_position", "kind": "function", "doc": "

    Return a sequence for the genomic region

    \n\n

    Position is 0-based, end-exclusive.

    \n", "signature": "(genome, position):", "funcdef": "def"}, {"fullname": "howard.objects.variant.justify_indel", "modulename": "howard.objects.variant", "qualname": "justify_indel", "kind": "function", "doc": "

    Justify an indel to the left or right along a sequence 'seq'.

    \n\n

    start, end: 0-based, end-exclusive coordinates of 'indel' within the\n sequence 'seq'. Inserts denote the insertion point using start=end\n and deletions indicate the deleted region with (start,end).\nindel: indel sequence, can be insertion or deletion.\nseq: a larger sequence containing the indel. Can be a fragment from the\n genome.\njustify: Which direction to justify the indel ('left', 'right').

    \n", "signature": "(start, end, indel, seq, justify):", "funcdef": "def"}, {"fullname": "howard.objects.variant.justify_genomic_indel", "modulename": "howard.objects.variant", "qualname": "justify_genomic_indel", "kind": "function", "doc": "

    start, end: 0-based, end-exclusive coordinates of 'indel'.

    \n", "signature": "(genome, chrom, start, end, indel, justify, flank_length=20):", "funcdef": "def"}, {"fullname": "howard.objects.variant.normalize_variant", "modulename": "howard.objects.variant", "qualname": "normalize_variant", "kind": "function", "doc": "

    Normalize variant according to the GATK/VCF standard.

    \n\n

    chrom: chromsome containing variant.\noffset: 1-based coordinate of reference allele in the genome.\nref_sequence: reference allele.\nalt_sequences: list of all alternate sequences.\ngenome: pygr-compatiable genome object.

    \n", "signature": "(\tchrom,\toffset,\tref_sequence,\talt_sequences,\tgenome,\tjustify='left',\tflank_length=30,\tindels_start_with_same_base=True):", "funcdef": "def"}, {"fullname": "howard.objects.variant.NormalizedVariant", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant", "kind": "class", "doc": "

    Normalizes variant representation to match GATK/VCF.

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.__init__", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.__init__", "kind": "function", "doc": "

    position: a 0-index genomic Position.\nref_allele: the reference allele sequence.\nalt_alleles: a list of alternate allele sequences.\nseq_5p: 5 prime flanking sequence of variant.\nseq_3p: 3 prime flanking sequence of variant.\ngenome: a pygr compatible genome object (optional).

    \n\n

    indels_start_with_same_base: DML - I have no idea why this is required\n but am keeping for backwards compat

    \n", "signature": "(\tposition,\tref_allele,\talt_alleles,\tseq_5p='',\tseq_3p='',\tgenome=None,\tjustify='left',\tindels_start_with_same_base=True)"}, {"fullname": "howard.objects.variant.NormalizedVariant.position", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.alleles", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.alleles", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.seq_5p", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.seq_5p", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.seq_3p", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.seq_3p", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.genome", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.log", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.log", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.indels_start_with_same_base", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.indels_start_with_same_base", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.molecular_class", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.molecular_class", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.ref_allele", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.ref_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.alt_alleles", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.alt_alleles", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.variant", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.variant", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variants", "modulename": "howard.objects.variants", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.variants.Variants", "modulename": "howard.objects.variants", "qualname": "Variants", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.variants.Variants.__init__", "modulename": "howard.objects.variants", "qualname": "Variants.__init__", "kind": "function", "doc": "

    The function __init__ initializes the variables, sets the input, output, config, param, connexion and\nheader

    \n\n
    Parameters
    \n\n
      \n
    • conn: the connection to the database
    • \n
    • input: the input file
    • \n
    • output: the output file
    • \n
    • config: a dictionary containing the configuration of the model
    • \n
    • param: a dictionary containing the parameters of the model
    • \n
    \n", "signature": "(\tconn=None,\tinput: str = None,\toutput: str = None,\tconfig: dict = {},\tparam: dict = {},\tload: bool = False)"}, {"fullname": "howard.objects.variants.Variants.load_header", "modulename": "howard.objects.variants", "qualname": "Variants.load_header", "kind": "function", "doc": "

    Load header in a table, with INFO, FORMAT, FILTERS, SAMPLES and METADATA

    \n\n

    Args:\n header (vcfobject, optional): VCF object from pyVCF. Defaults to None (header of the Variants object).\n table (str, optional): Table name of the header table. Defaults to None (defined as 'header' later).\n drop (bool, optional): Drop table if exists. Defaults to False.\n view_name (str, optional): Name of the table. Defaults to 'header'.

    \n\n

    Returns:\n str: Name of the table, None otherwise

    \n", "signature": "(\tself,\theader=None,\ttable: str = None,\tdrop: bool = False,\tview_name: str = 'header') -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_samples", "modulename": "howard.objects.variants", "qualname": "Variants.set_samples", "kind": "function", "doc": "

    The function set_samples sets the samples attribute of an object to a provided list or\nretrieves it from a parameter dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • samples: The set_samples method is a method of a class that takes a list of samples as\ninput and sets the samples attribute of the class to the provided list. If no samples are\nprovided, it tries to get the samples from the class's parameters using the get_param method
    • \n
    \n\n
    Returns
    \n\n
    \n

    The samples list is being returned.

    \n
    \n", "signature": "(self, samples: list = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_samples", "modulename": "howard.objects.variants", "qualname": "Variants.get_samples", "kind": "function", "doc": "

    This function returns a list of samples.

    \n\n
    Returns
    \n\n
    \n

    The get_samples method is returning the samples attribute of the object.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_samples_check", "modulename": "howard.objects.variants", "qualname": "Variants.get_samples_check", "kind": "function", "doc": "

    This function returns the value of the \"check\" key within the \"samples\" dictionary retrieved\nfrom the parameters.

    \n\n
    Returns
    \n\n
    \n

    The method get_samples_check is returning the value of the key \"check\" inside the\n \"samples\" dictionary, which is nested inside the dictionary returned by the get_param()\n method. If the key \"check\" is not found, it will return False.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_input", "modulename": "howard.objects.variants", "qualname": "Variants.set_input", "kind": "function", "doc": "

    The function set_input takes a file name as input, extracts the name and extension, and sets\nattributes in the class accordingly.

    \n\n
    Parameters
    \n\n
      \n
    • input: The set_input method in the provided code snippet is used to set attributes\nrelated to the input file. Here's a breakdown of the parameters and their usage in the method:
    • \n
    \n", "signature": "(self, input: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_config", "modulename": "howard.objects.variants", "qualname": "Variants.set_config", "kind": "function", "doc": "

    The set_config function takes a config object and assigns it as the configuration object for the\nclass.

    \n\n
    Parameters
    \n\n
      \n
    • config: The config parameter in the set_config function is a dictionary object that\ncontains configuration settings for the class. When you call the set_config function with a\ndictionary object as the argument, it will set that dictionary as the configuration object for\nthe class
    • \n
    \n", "signature": "(self, config: dict) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_param", "modulename": "howard.objects.variants", "qualname": "Variants.set_param", "kind": "function", "doc": "

    This function sets a parameter object for the class based on the input dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • param: The set_param method you provided takes a dictionary object as input and sets it\nas the param attribute of the class instance
    • \n
    \n", "signature": "(self, param: dict) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.init_variables", "modulename": "howard.objects.variants", "qualname": "Variants.init_variables", "kind": "function", "doc": "

    This function initializes the variables that will be used in the rest of the class

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_indexing", "modulename": "howard.objects.variants", "qualname": "Variants.get_indexing", "kind": "function", "doc": "

    It returns the value of the key \"indexing\" in the dictionary. If the key is not present, it\nreturns False.

    \n\n
    Returns
    \n\n
    \n

    The value of the indexing parameter.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_config", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_config", "kind": "function", "doc": "

    The function get_connexion_config returns a dictionary containing the configuration for a\nconnection, including the number of threads and memory limit.

    \n\n
    Returns
    \n\n
    \n

    a dictionary containing the configuration for the Connexion library.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_duckdb_settings", "modulename": "howard.objects.variants", "qualname": "Variants.get_duckdb_settings", "kind": "function", "doc": "

    The function get_duckdb_settings retrieves DuckDB settings from a configuration file or a\nstring.

    \n\n
    Returns
    \n\n
    \n

    The function get_duckdb_settings returns a dictionary object duckdb_settings_dict.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_connexion_db", "modulename": "howard.objects.variants", "qualname": "Variants.set_connexion_db", "kind": "function", "doc": "

    The function set_connexion_db returns the appropriate database connection string based on the\ninput format and connection type.

    \n\n
    Returns
    \n\n
    \n

    the value of the variable connexion_db.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_connexion", "modulename": "howard.objects.variants", "qualname": "Variants.set_connexion", "kind": "function", "doc": "

    The function set_connexion creates a connection to a database, with options for different\ndatabase formats and settings.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The conn parameter in the set_connexion method is the connection to the\ndatabase. If a connection is not provided, a new connection to an in-memory database is created.\nThe method then proceeds to set up the connection based on the specified format (e.g., duckdb or\nsqlite
    • \n
    \n", "signature": "(self, conn) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_output", "modulename": "howard.objects.variants", "qualname": "Variants.set_output", "kind": "function", "doc": "

    The set_output function in Python sets the output file based on the input or a specified key\nin the config file, extracting the output name, extension, and format.

    \n\n
    Parameters
    \n\n
      \n
    • output: The output parameter in the set_output method is used to specify the name of\nthe output file. If the config file has an 'output' key, the method sets the output to the value\nof that key. If no output is provided, it sets the output to None
    • \n
    \n", "signature": "(self, output: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_header", "modulename": "howard.objects.variants", "qualname": "Variants.set_header", "kind": "function", "doc": "

    It reads the header of a VCF file and stores it as a list of strings and as a VCF object

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_query_to_df", "modulename": "howard.objects.variants", "qualname": "Variants.get_query_to_df", "kind": "function", "doc": "

    The get_query_to_df function takes a query as a string and returns the result as a pandas\nDataFrame based on the connection format.

    \n\n
    Parameters
    \n\n
      \n
    • query: The query parameter in the get_query_to_df function is a string that\nrepresents the SQL query you want to execute. This query will be used to fetch data from a\ndatabase and convert it into a pandas DataFrame
    • \n
    • limit: The limit parameter in the get_query_to_df function is used to specify the\nmaximum number of rows to be returned in the resulting dataframe. If a limit is provided, the\nfunction will only fetch up to that number of rows from the database query result. If no limit\nis specified,
    • \n
    \n\n
    Returns
    \n\n
    \n

    A pandas DataFrame is being returned by the get_query_to_df function.

    \n
    \n", "signature": "(self, query: str = '', limit: int = None) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_overview", "modulename": "howard.objects.variants", "qualname": "Variants.get_overview", "kind": "function", "doc": "

    The function prints the input, output, config, and dataframe of the current object

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_stats", "modulename": "howard.objects.variants", "qualname": "Variants.get_stats", "kind": "function", "doc": "

    The get_stats function calculates and returns various statistics of the current object,\nincluding information about the input file, variants, samples, header fields, quality, and\nSNVs/InDels.

    \n\n
    Returns
    \n\n
    \n

    a dictionary containing various statistics of the current object. The dictionary has\n the following structure:

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.stats_to_file", "modulename": "howard.objects.variants", "qualname": "Variants.stats_to_file", "kind": "function", "doc": "

    The function stats_to_file takes a file name as input, retrieves statistics, serializes them\ninto a JSON object, and writes the JSON object to the specified file.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is a string that represents the file path where the JSON data\nwill be written
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the file that was written to.

    \n
    \n", "signature": "(self, file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.print_stats", "modulename": "howard.objects.variants", "qualname": "Variants.print_stats", "kind": "function", "doc": "

    The print_stats function generates a markdown file and prints the statistics contained in a\nJSON file in a formatted manner.

    \n\n
    Parameters
    \n\n
      \n
    • output_file: The output_file parameter is a string that specifies the path and filename\nof the output file where the stats will be printed in Markdown format. If no output_file is\nprovided, a temporary directory will be created and the stats will be saved in a file named\n\"stats.md\" within that
    • \n
    • json_file: The json_file parameter is a string that represents the path to the JSON\nfile where the statistics will be saved. If no value is provided, a temporary directory will be\ncreated and a default file name \"stats.json\" will be used
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function print_stats does not return any value. It has a return type annotation\n of None.

    \n
    \n", "signature": "(self, output_file: str = None, json_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_input", "modulename": "howard.objects.variants", "qualname": "Variants.get_input", "kind": "function", "doc": "

    It returns the value of the input variable.

    \n\n
    Returns
    \n\n
    \n

    The input is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_input_format", "modulename": "howard.objects.variants", "qualname": "Variants.get_input_format", "kind": "function", "doc": "

    This function returns the format of the input variable, either from the provided input file or\nby prompting for input.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The input_file parameter in the get_input_format method is a string that\nrepresents the file path of the input file. If no input_file is provided when calling the\nmethod, it will default to None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The format of the input variable is being returned.

    \n
    \n", "signature": "(self, input_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_input_compressed", "modulename": "howard.objects.variants", "qualname": "Variants.get_input_compressed", "kind": "function", "doc": "

    The function get_input_compressed returns the format of the input variable after compressing\nit.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The input_file parameter in the get_input_compressed method is a string\nthat represents the file path of the input file. If no input_file is provided when calling the\nmethod, it will default to None and the method will then call self.get_input() to
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_input_compressed returns the compressed format of the input\n variable.

    \n
    \n", "signature": "(self, input_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_output", "modulename": "howard.objects.variants", "qualname": "Variants.get_output", "kind": "function", "doc": "

    It returns the output of the neuron.

    \n\n
    Returns
    \n\n
    \n

    The output of the neural network.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_output_format", "modulename": "howard.objects.variants", "qualname": "Variants.get_output_format", "kind": "function", "doc": "

    The function get_output_format returns the format of the input variable or the output file if\nprovided.

    \n\n
    Parameters
    \n\n
      \n
    • output_file: The output_file parameter in the get_output_format method is a string\nthat represents the file path of the output file. If no output_file is provided when calling\nthe method, it will default to the output obtained from the get_output method of the class\ninstance. The
    • \n
    \n\n
    Returns
    \n\n
    \n

    The format of the input variable is being returned.

    \n
    \n", "signature": "(self, output_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_config", "modulename": "howard.objects.variants", "qualname": "Variants.get_config", "kind": "function", "doc": "

    It returns the config

    \n\n
    Returns
    \n\n
    \n

    The config variable is being returned.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_param", "modulename": "howard.objects.variants", "qualname": "Variants.get_param", "kind": "function", "doc": "

    It returns the param

    \n\n
    Returns
    \n\n
    \n

    The param variable is being returned.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_db", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_db", "kind": "function", "doc": "

    It returns the connexion_db attribute of the object

    \n\n
    Returns
    \n\n
    \n

    The connexion_db is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_prefix", "modulename": "howard.objects.variants", "qualname": "Variants.get_prefix", "kind": "function", "doc": "

    It returns the prefix of the object.

    \n\n
    Returns
    \n\n
    \n

    The prefix is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_table_variants", "modulename": "howard.objects.variants", "qualname": "Variants.get_table_variants", "kind": "function", "doc": "

    This function returns the table_variants attribute of the object

    \n\n
    Parameters
    \n\n
      \n
    • clause: the type of clause the table will be used. Either \"select\" or \"from\" (optional),\ndefaults to select (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The table_variants attribute of the object.

    \n
    \n", "signature": "(self, clause: str = 'select') -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_tmp_dir", "modulename": "howard.objects.variants", "qualname": "Variants.get_tmp_dir", "kind": "function", "doc": "

    The function get_tmp_dir returns the temporary directory path based on configuration\nparameters or a default path.

    \n\n
    Returns
    \n\n
    \n

    The get_tmp_dir method is returning the temporary directory path based on the\n configuration, parameters, and a default value of \"/tmp\".

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_type", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_type", "kind": "function", "doc": "

    If the connexion type is not in the list of allowed connexion types, raise a ValueError

    \n\n
    Returns
    \n\n
    \n

    The connexion type is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion", "kind": "function", "doc": "

    It returns the connection object

    \n\n
    Returns
    \n\n
    \n

    The connection object.

    \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.close_connexion", "modulename": "howard.objects.variants", "qualname": "Variants.close_connexion", "kind": "function", "doc": "

    This function closes the connection to the database.

    \n\n
    Returns
    \n\n
    \n

    The connection is being closed.

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header", "modulename": "howard.objects.variants", "qualname": "Variants.get_header", "kind": "function", "doc": "

    This function returns the header of the VCF file as a list of strings

    \n\n
    Parameters
    \n\n
      \n
    • type: the type of header you want to get, defaults to vcf (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The header of the vcf file.

    \n
    \n", "signature": "(self, type: str = 'vcf'):", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_infos_list", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_infos_list", "kind": "function", "doc": "

    This function retrieves a list of information fields from the header.

    \n\n
    Returns
    \n\n
    \n

    A list of information fields from the header.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_length", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_length", "kind": "function", "doc": "

    The function get_header_length returns the length of the header list, excluding the #CHROM\nline.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is an optional argument that specifies the path to a VCF\nheader file. If this argument is provided, the function will read the header from the specified\nfile and return the length of the header list minus 1 (to exclude the #CHROM line)
    • \n
    \n\n
    Returns
    \n\n
    \n

    the length of the header list, excluding the #CHROM line.

    \n
    \n", "signature": "(self, file: str = None) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_columns", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_columns", "kind": "function", "doc": "

    This function returns the header list of a VCF

    \n\n
    Returns
    \n\n
    \n

    The length of the header list.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_columns_as_list", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_columns_as_list", "kind": "function", "doc": "

    This function returns the header list of a VCF

    \n\n
    Returns
    \n\n
    \n

    The length of the header list.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_columns_as_sql", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_columns_as_sql", "kind": "function", "doc": "

    This function retruns header length (without #CHROM line)

    \n\n
    Returns
    \n\n
    \n

    The length of the header list.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_sample_list", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_sample_list", "kind": "function", "doc": "

    The function get_header_sample_list returns a list of samples from a VCF header, with optional\nchecking and filtering based on input parameters.

    \n\n
    Parameters
    \n\n
      \n
    • check: The check parameter in the get_header_sample_list function is a boolean\nparameter that determines whether to check if the samples in the list are properly defined as\ngenotype columns. If check is set to True, the function will verify if each sample in the\nlist is defined as a, defaults to False
    • \n
    • samples: The samples parameter in the get_header_sample_list function is a list that\nallows you to specify a subset of samples from the header. If you provide a list of sample\nnames, the function will check if each sample is defined in the header. If a sample is not found\nin the
    • \n
    • samples_force: The samples_force parameter in the get_header_sample_list function is\na boolean parameter that determines whether to force the function to return the sample list\nwithout checking if the samples are genotype columns. If samples_force is set to True, the\nfunction will return the sample list without performing, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_sample_list returns a list of samples based on the input\n parameters and conditions specified in the function.

    \n
    \n", "signature": "(\tself,\tcheck: bool = False,\tsamples: list = None,\tsamples_force: bool = False) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.is_genotype_column", "modulename": "howard.objects.variants", "qualname": "Variants.is_genotype_column", "kind": "function", "doc": "

    This function checks if a given column is a genotype column in a database.

    \n\n
    Parameters
    \n\n
      \n
    • column: The column parameter in the is_genotype_column method is a string that\nrepresents the column name in a database table. This method checks if the specified column is a\ngenotype column in the database. If a column name is provided, it calls the is_genotype_column\nmethod of
    • \n
    \n\n
    Returns
    \n\n
    \n

    The is_genotype_column method is returning a boolean value. If the column parameter\n is not None, it calls the is_genotype_column method of the Database class with the specified\n column name and returns the result. If the column parameter is None, it returns False.

    \n
    \n", "signature": "(self, column: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_verbose", "modulename": "howard.objects.variants", "qualname": "Variants.get_verbose", "kind": "function", "doc": "

    It returns the value of the \"verbose\" key in the config dictionary, or False if the key doesn't\nexist

    \n\n
    Returns
    \n\n
    \n

    The value of the key \"verbose\" in the config dictionary.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_format", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_format", "kind": "function", "doc": "

    It returns the connexion format of the object.

    \n\n
    Returns
    \n\n
    \n

    The connexion_format is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.insert_file_to_table", "modulename": "howard.objects.variants", "qualname": "Variants.insert_file_to_table", "kind": "function", "doc": "

    The function reads a file in chunks and inserts each chunk into a table based on the specified\ndatabase format.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is the file that you want to load into a table. It should be\nthe path to the file on your system
    • \n
    • columns: The columns parameter in the insert_file_to_table function is a string that\nshould contain the names of the columns in the table where the data will be inserted. The column\nnames should be separated by commas within the string. For example, if you have columns named\n\"id\", \"name
    • \n
    • header_len: The header_len parameter in the insert_file_to_table function specifies\nthe number of lines to skip at the beginning of the file before reading the actual data. This\nparameter allows you to skip any header information present in the file before processing the\ndata, defaults to 0
    • \n
    • sep: The sep parameter in the insert_file_to_table function is used to specify the\nseparator character that is used in the file being read. In this case, the default separator is\nset to , which represents a tab character. You can change this parameter to a different\nseparator character if, defaults to
    • \n
    • chunksize: The chunksize parameter specifies the number of rows to read in at a time\nwhen processing the file in chunks. In the provided code snippet, the default value for\nchunksize is set to 1000000. This means that the file will be read in chunks of 1,, defaults\nto 1000000
    • \n
    \n", "signature": "(\tself,\tfile,\tcolumns: str,\theader_len: int = 0,\tsep: str = '\\t',\tchunksize: int = 1000000) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.load_data", "modulename": "howard.objects.variants", "qualname": "Variants.load_data", "kind": "function", "doc": "

    The load_data function reads a VCF file and inserts it into a table, with options to drop the\ntable before loading the data and specify a sample size.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path to the input file. This is the VCF file that will be loaded into the\ntable
    • \n
    • drop_variants_table: The drop_variants_table parameter is a boolean flag that\ndetermines whether the variants table should be dropped before loading the data. If set to\nTrue, the variants table will be dropped. If set to False (default), the variants table will\nnot be dropped, defaults to False
    • \n
    • sample_size: The sample_size parameter determines the number of rows to be sampled from\nthe input file. If it is set to None, the default value of 20480 will be used, defaults to\n20480
    • \n
    \n", "signature": "(\tself,\tinput_file: str = None,\tdrop_variants_table: bool = False,\tsample_size: int = 20480) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_explode_infos", "modulename": "howard.objects.variants", "qualname": "Variants.get_explode_infos", "kind": "function", "doc": "

    The function get_explode_infos returns the value of the \"explode_infos\" parameter, defaulting\nto False if it is not set.

    \n\n
    Returns
    \n\n
    \n

    The method is returning the value of the \"explode_infos\" parameter, which is a boolean\n value. If the parameter is not present, it will return False.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_explode_infos_fields", "modulename": "howard.objects.variants", "qualname": "Variants.get_explode_infos_fields", "kind": "function", "doc": "

    The get_explode_infos_fields function returns a list of exploded information fields based on\nthe input parameter explode_infos_fields.

    \n\n
    Parameters
    \n\n
      \n
    • explode_infos_fields: The explode_infos_fields parameter is a string that specifies the\nfields to be exploded. It can be set to \"ALL\" to explode all fields, or it can be a\ncomma-separated list of field names to explode
    • \n
    • remove_fields_not_in_header: The parameter remove_fields_not_in_header is a boolean\nflag that determines whether to remove fields that are not present in the header. If it is set\nto True, any field that is not in the header will be excluded from the list of exploded\ninformation fields. If it is set to `, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_explode_infos_fields returns a list of exploded information fields.\n If the explode_infos_fields parameter is not provided or is set to None, it returns an empty\n list. If the parameter is provided and its value is \"ALL\", it also returns an empty list.\n Otherwise, it returns a list of exploded information fields after removing any spaces and\n splitting the string by commas.

    \n
    \n", "signature": "(\tself,\texplode_infos_fields: str = None,\tremove_fields_not_in_header: bool = False) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_explode_infos_prefix", "modulename": "howard.objects.variants", "qualname": "Variants.get_explode_infos_prefix", "kind": "function", "doc": "

    The function get_explode_infos_prefix returns the value of the explode_infos_prefix parameter, or\nthe value of self.get_param().get(\"explode_infos_prefix\", None) if explode_infos_prefix is\nnot provided.

    \n\n
    Parameters
    \n\n
      \n
    • explode_infos_prefix: The parameter explode_infos_prefix is a string that specifies a\nprefix to be used for exploding or expanding information
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable explode_infos_prefix.

    \n
    \n", "signature": "(self, explode_infos_prefix: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.add_column", "modulename": "howard.objects.variants", "qualname": "Variants.add_column", "kind": "function", "doc": "

    The add_column function adds a column to a SQLite or DuckDB table with a default value if it\ndoesn't already exist.

    \n\n
    Parameters
    \n\n
      \n
    • table_name: The name of the table to which you want to add a column
    • \n
    • column_name: The parameter \"column_name\" is the name of the column that you want to add\nto the table
    • \n
    • column_type: The column_type parameter specifies the data type of the column that you\nwant to add to the table. It should be a string that represents the desired data type, such as\n\"INTEGER\", \"TEXT\", \"REAL\", etc
    • \n
    • default_value: The default_value parameter is an optional parameter that specifies the\ndefault value for the newly added column. If a default value is provided, it will be assigned to\nthe column for any existing rows that do not have a value for that column
    • \n
    • drop: The drop parameter is a boolean flag that determines whether to drop the column\nif it already exists in the table. If drop is set to True, the function will drop the\nexisting column before adding the new column. If drop is set to False (default),, defaults\nto False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the column was successfully added to the table.

    \n
    \n", "signature": "(\tself,\ttable_name,\tcolumn_name,\tcolumn_type,\tdefault_value=None,\tdrop: bool = False) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.drop_column", "modulename": "howard.objects.variants", "qualname": "Variants.drop_column", "kind": "function", "doc": "

    The drop_column function drops a specified column from a given table in a database and returns\nTrue if the column was successfully dropped, and False if the column does not exist in the\ntable.

    \n\n
    Parameters
    \n\n
      \n
    • column: The column parameter is a dictionary that contains information about the column\nyou want to drop. It has two keys:
    • \n
    • table_name: The table_name parameter is the name of the table from which you want to\ndrop a column
    • \n
    • column_name: The column_name parameter is the name of the column that you want to drop\nfrom the table
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value. It returns True if the column was successfully dropped from the table,\n and False if the column does not exist in the table.

    \n
    \n", "signature": "(\tself,\tcolumn: dict = None,\ttable_name: str = None,\tcolumn_name: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.explode_infos", "modulename": "howard.objects.variants", "qualname": "Variants.explode_infos", "kind": "function", "doc": "

    The explode_infos function in Python takes a VCF file and explodes the INFO fields into\nindividual columns, returning a list of added columns.

    \n\n
    Parameters
    \n\n
      \n
    • prefix: The prefix parameter is a string that is used as a prefix for the exploded INFO\nfields. If the prefix is not provided or is set to None, the function will use the value of\nself.get_explode_infos_prefix() as the prefix
    • \n
    • create_index: The create_index parameter is a boolean flag that specifies whether to\ncreate indexes on the exploded INFO fields. If set to True, indexes will be created; if set to\nFalse, indexes will not be created. The default value is False, defaults to False
    • \n
    • fields: The fields parameter in the explode_infos function is a list of INFO fields\nthat you want to explode into individual columns. If this parameter is not provided, all INFO\nfields will be exploded. You can specify the INFO fields you want to explode by passing them as\na list to the `
    • \n
    • force: The force parameter in the explode_infos function is a boolean flag that\ndetermines whether to drop and recreate a column if it already exists in the table. If force\nis set to True, the column will be dropped and recreated. If force is set to `False,\ndefaults to False
    • \n
    • proccess_all_fields_together: The proccess_all_fields_together parameter is a boolean\nflag that determines whether to process all the INFO fields together or individually. If set to\nTrue, all the INFO fields will be processed together. If set to False, each INFO field will\nbe processed individually. The default value is, defaults to False
    • \n
    • table: The table parameter in the explode_infos function is used to specify the name\nof the table where the exploded INFO fields will be added as individual columns. If you provide\na value for the table parameter, the function will use that table name. If the table\nparameter is
    • \n
    \n\n
    Returns
    \n\n
    \n

    The explode_infos function returns a list of added columns.

    \n
    \n", "signature": "(\tself,\tprefix: str = None,\tcreate_index: bool = False,\tfields: list = None,\tforce: bool = False,\tproccess_all_fields_together: bool = False,\ttable: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_indexes", "modulename": "howard.objects.variants", "qualname": "Variants.create_indexes", "kind": "function", "doc": "

    Create indexes on the table after insertion

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.drop_indexes", "modulename": "howard.objects.variants", "qualname": "Variants.drop_indexes", "kind": "function", "doc": "

    Create indexes on the table after insertion

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.read_vcf_header", "modulename": "howard.objects.variants", "qualname": "Variants.read_vcf_header", "kind": "function", "doc": "

    It reads the header of a VCF file and returns a list of the header lines

    \n\n
    Parameters
    \n\n
      \n
    • f: the file object
    • \n
    \n\n
    Returns
    \n\n
    \n

    The header lines of the VCF file.

    \n
    \n", "signature": "(self, f) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.read_vcf_header_file", "modulename": "howard.objects.variants", "qualname": "Variants.read_vcf_header_file", "kind": "function", "doc": "

    The read_vcf_header_file function reads the header of a VCF file, handling both compressed and\nuncompressed files.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is a string that represents the path to the VCF header file\nthat you want to read. It is an optional parameter, so if you don't provide a value, it will\ndefault to None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function read_vcf_header_file returns a list.

    \n
    \n", "signature": "(self, file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.execute_query", "modulename": "howard.objects.variants", "qualname": "Variants.execute_query", "kind": "function", "doc": "

    It takes a query as an argument, executes it, and returns the results

    \n\n
    Parameters
    \n\n
      \n
    • query: The query to be executed
    • \n
    \n\n
    Returns
    \n\n
    \n

    The result of the query is being returned.

    \n
    \n", "signature": "(self, query: str):", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.export_output", "modulename": "howard.objects.variants", "qualname": "Variants.export_output", "kind": "function", "doc": "

    The export_output function exports data from a VCF file to various formats, including VCF,\nCSV, TSV, PSV, and Parquet, with options for customization such as filtering, sorting, and\npartitioning.

    \n\n
    Parameters
    \n\n
      \n
    • output_file: The output_file parameter is a string that specifies the name of the\noutput file where the exported data will be saved
    • \n
    • output_header: The output_header parameter is a string that specifies the name of the\nfile where the header of the VCF file will be exported. If this parameter is not provided, the\nheader will be exported to a file with the same name as the output_file parameter, but with\nthe extension \"
    • \n
    • export_header: The export_header parameter is a boolean flag that determines whether\nthe header of a VCF file should be exported to a separate file or not. If export_header is\nTrue, the header will be exported to a file. If export_header is False, the header will not\nbe, defaults to True
    • \n
    • query: The query parameter in the export_output function is an optional SQL query\nthat can be used to filter and select specific data from the VCF file before exporting it. If\nprovided, only the data that matches the query will be exported. This allows you to customize\nthe exported data based on
    • \n
    • parquet_partitions: The parquet_partitions parameter is a list that specifies the\ncolumns to be used for partitioning the Parquet file during export. Partitioning is a way to\norganize data in a hierarchical directory structure based on the values of one or more columns.\nThis can improve query performance when working with large datasets
    • \n
    • chunk_size: The chunk_size parameter specifies the number of records in a batch when\nexporting data in Parquet format. This parameter is used for partitioning the Parquet file into\nmultiple files. It helps in optimizing the export process by breaking down the data into\nmanageable chunks for processing and storage
    • \n
    • threads: The threads parameter in the export_output function specifies the number of\nthreads to be used during the export process. It determines the level of parallelism and can\nimprove the performance of the export operation. If this parameter is not provided, the function\nwill use the default number of threads
    • \n
    • sort: The sort parameter in the export_output function is a boolean flag that\ndetermines whether the output file should be sorted based on genomic coordinates of the\nvariants. If sort is set to True, the output file will be sorted. If sort is set to\nFalse,, defaults to False
    • \n
    • index: The index parameter in the export_output function is a boolean flag that\ndetermines whether an index should be created on the output file. If index is set to True,\nan index will be created on the output file. If index is set to False, no, defaults to False
    • \n
    • order_by: The order_by parameter in the export_output function is a string that\nspecifies the column(s) to use for sorting the output file. This parameter is only applicable\nwhen exporting data in VCF format. It allows you to specify the column(s) based on which the\noutput file should be
    • \n
    • fields_to_rename: The fields_to_rename parameter is a dictionary that specifies the\nmapping of field names to be renamed during the export process. This parameter allows you to\ncustomize the output field names before exporting the data. Each key-value pair in the\ndictionary represents the original field name as the key and the new field name
    • \n
    \n\n
    Returns
    \n\n
    \n

    The export_output function returns a boolean value. It checks if the output file\n exists and returns True if it does, or None if it doesn't.

    \n
    \n", "signature": "(\tself,\toutput_file: str | None = None,\toutput_header: str | None = None,\texport_header: bool = True,\tquery: str | None = None,\tparquet_partitions: list | None = None,\tchunk_size: int | None = None,\tthreads: int | None = None,\tsort: bool = False,\tindex: bool = False,\torder_by: str | None = None,\tfields_to_rename: dict | None = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_extra_infos", "modulename": "howard.objects.variants", "qualname": "Variants.get_extra_infos", "kind": "function", "doc": "

    The get_extra_infos function returns a list of columns that are in a specified table but not\nin the header.

    \n\n
    Parameters
    \n\n
      \n
    • table: The table parameter in the get_extra_infos function is used to specify the\nname of the table from which you want to retrieve the extra columns that are not present in the\nheader. If the table parameter is not provided when calling the function, it will default to\nusing the variants
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of columns that are in the specified table but not in the header of the table.

    \n
    \n", "signature": "(self, table: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_extra_infos_sql", "modulename": "howard.objects.variants", "qualname": "Variants.get_extra_infos_sql", "kind": "function", "doc": "

    It returns a string of the extra infos, separated by commas, and each extra info is surrounded\nby double quotes

    \n\n
    Parameters
    \n\n
      \n
    • table: The name of the table to get the extra infos from. If None, the default table is\nused
    • \n
    \n\n
    Returns
    \n\n
    \n

    A string of the extra infos

    \n
    \n", "signature": "(self, table: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.export_header", "modulename": "howard.objects.variants", "qualname": "Variants.export_header", "kind": "function", "doc": "

    The export_header function takes a VCF file, extracts the header, modifies it according to\nspecified options, and writes it to a new file.

    \n\n
    Parameters
    \n\n
      \n
    • header_name: The header_name parameter is the name of the header file to be created. If\nthis parameter is not specified, the header will be written to the output file
    • \n
    • output_file: The output_file parameter in the export_header function is used to\nspecify the name of the output file where the header will be written. If this parameter is not\nprovided, the header will be written to a temporary file
    • \n
    • output_file_ext: The output_file_ext parameter in the export_header function is a\nstring that represents the extension of the output header file. By default, it is set to \".hdr\"\nif not specified by the user. This extension will be appended to the output_file name to\ncreate the final, defaults to .hdr
    • \n
    • clean_header: The clean_header parameter in the export_header function is a boolean\nflag that determines whether the header should be cleaned or not. When clean_header is set to\nTrue, the function will clean the header by modifying certain lines based on a specific\npattern. If clean_header, defaults to True
    • \n
    • clean_info_flag: The clean_info_flag parameter in the export_header function is a boolean\nflag that determines whether the header should be cleaned for INFO/tags that are 'Flag' type.\nWhen clean_info_flag is set to True, the function will replace INFO/tags 'Type' as 'String'.\nDefault to False
    • \n
    • remove_chrom_line: The remove_chrom_line parameter in the export_header function is a\nboolean flag that determines whether the #CHROM line should be removed from the header before\nwriting it to the output file. If set to True, the #CHROM line will be removed; if set to `,\ndefaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function export_header returns the name of the temporary header file that is\n created.

    \n
    \n", "signature": "(\tself,\theader_name: str = None,\toutput_file: str = None,\toutput_file_ext: str = '.hdr',\tclean_header: bool = True,\tclean_info_flag: bool = False,\tremove_chrom_line: bool = False) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.export_variant_vcf", "modulename": "howard.objects.variants", "qualname": "Variants.export_variant_vcf", "kind": "function", "doc": "

    The export_variant_vcf function exports a VCF file with specified samples, allowing options to\nremove INFO field, add samples, and control compression and indexing.

    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: The vcf_file parameter is the name of the file where the VCF data will be\nwritten to. It is the output file that will contain the filtered VCF data based on the specified\nparameters
    • \n
    • remove_info: The remove_info parameter in the export_variant_vcf function is a\nboolean flag that determines whether to remove the INFO field from the output VCF file. If set\nto True, the INFO field will be removed. If set to False, the INFO field will be included\nin, defaults to False
    • \n
    • add_samples: The add_samples parameter is a boolean parameter that determines whether\nthe samples should be added to the VCF file or not. If set to True, the samples will be added.\nIf set to False, the samples will be removed. The default value is True, defaults to True
    • \n
    • list_samples: The list_samples parameter is a list of samples that you want to include\nin the output VCF file. By default, all samples will be included. If you provide a list of\nsamples, only those samples will be included in the output file
    • \n
    • index: The index parameter in the export_variant_vcf function is a boolean flag that\ndetermines whether or not to create an index for the output VCF file. If index is set to\nTrue, the output VCF file will be indexed using tabix. If index, defaults to False
    • \n
    • threads: The threads parameter in the export_variant_vcf function specifies the\nnumber of threads to use for exporting the VCF file. It determines how many parallel threads\nwill be used during the export process. More threads can potentially speed up the export process\nby utilizing multiple cores of the processor. If
    • \n
    \n\n
    Returns
    \n\n
    \n

    The export_variant_vcf function returns the result of calling the export_output\n method with various parameters including the output file, query, threads, sort flag, and index\n flag. The export_output method is responsible for exporting the VCF data based on the\n specified parameters and configurations provided in the export_variant_vcf function.

    \n
    \n", "signature": "(\tself,\tvcf_file,\tremove_info: bool = False,\tadd_samples: bool = True,\tlist_samples: list = [],\twhere_clause: str = '',\tindex: bool = False,\tthreads: int | None = None) -> bool | None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.run_commands", "modulename": "howard.objects.variants", "qualname": "Variants.run_commands", "kind": "function", "doc": "

    It takes a list of commands and runs them in parallel using the number of threads specified

    \n\n
    Parameters
    \n\n
      \n
    • commands: A list of commands to run
    • \n
    • threads: The number of threads to use, defaults to 1 (optional)
    • \n
    \n", "signature": "(self, commands: list = [], threads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_threads", "modulename": "howard.objects.variants", "qualname": "Variants.get_threads", "kind": "function", "doc": "

    This function returns the number of threads to use for a job, with a default value of 1 if not\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • default: The default parameter in the get_threads method is used to specify the\ndefault number of threads to use if no specific value is provided. If no value is provided for\nthe threads parameter in the configuration or input parameters, the default value will be\nused, defaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    the number of threads to use for the current job.

    \n
    \n", "signature": "(self, default: int = 1) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_memory", "modulename": "howard.objects.variants", "qualname": "Variants.get_memory", "kind": "function", "doc": "

    This function retrieves the memory value from parameters or configuration with a default value\nif not found.

    \n\n
    Parameters
    \n\n
      \n
    • default: The get_memory function takes in a default value as a string parameter. This\ndefault value is used as a fallback in case the memory parameter is not provided in the\nparam dictionary or the config dictionary. If memory is not found in either dictionary,\nthe function
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_memory function returns a string value representing the memory parameter. If\n the input_memory is provided in the parameters, it will return that value. Otherwise, it will\n return the default value provided as an argument to the function.

    \n
    \n", "signature": "(self, default: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.update_from_vcf", "modulename": "howard.objects.variants", "qualname": "Variants.update_from_vcf", "kind": "function", "doc": "
    \n

    If the database is duckdb, then use the parquet method, otherwise use the sqlite method

    \n
    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: the path to the VCF file
    • \n
    \n", "signature": "(self, vcf_file: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.update_from_vcf_duckdb", "modulename": "howard.objects.variants", "qualname": "Variants.update_from_vcf_duckdb", "kind": "function", "doc": "

    It takes a VCF file and updates the INFO column of the variants table in the database with the\nINFO column of the VCF file

    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: the path to the VCF file
    • \n
    \n", "signature": "(self, vcf_file: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.update_from_vcf_sqlite", "modulename": "howard.objects.variants", "qualname": "Variants.update_from_vcf_sqlite", "kind": "function", "doc": "

    It creates a temporary table in the SQLite database, loads the VCF file into the temporary\ntable, then updates the INFO column of the variants table with the INFO column of the temporary\ntable

    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: The path to the VCF file you want to update the database with
    • \n
    \n", "signature": "(self, vcf_file: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.drop_variants_table", "modulename": "howard.objects.variants", "qualname": "Variants.drop_variants_table", "kind": "function", "doc": "
    \n

    This function drops the variants table

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_variant_id", "modulename": "howard.objects.variants", "qualname": "Variants.set_variant_id", "kind": "function", "doc": "

    It adds a column to the variants table called variant_id and populates it with a hash of the\n#CHROM, POS, REF, and ALT columns

    \n\n
    Parameters
    \n\n
      \n
    • variant_id_column: The name of the column to be created in the variants table, defaults\nto variant_id
    • \n
    • force: If True, the variant_id column will be created even if it already exists
    • \n
    \n\n
    Returns
    \n\n
    \n

    The name of the column that contains the variant_id

    \n
    \n", "signature": "(self, variant_id_column: str = 'variant_id', force: bool = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_variant_id_column", "modulename": "howard.objects.variants", "qualname": "Variants.get_variant_id_column", "kind": "function", "doc": "

    This function returns the variant_id column name

    \n\n
    Parameters
    \n\n
      \n
    • variant_id_column: The name of the column in the dataframe that contains the variant IDs,\ndefaults to variant_id
    • \n
    • force: If True, will force the variant_id to be set to the value of variant_id_column. If\nFalse, will only set the variant_id if it is not already set. If None, will set the variant_id\nif it is not already set, or if it is set
    • \n
    \n\n
    Returns
    \n\n
    \n

    The variant_id column name.

    \n
    \n", "signature": "(self, variant_id_column: str = 'variant_id', force: bool = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.scan_databases", "modulename": "howard.objects.variants", "qualname": "Variants.scan_databases", "kind": "function", "doc": "

    The function scan_databases scans for available databases based on specified formats and\nreleases.

    \n\n
    Parameters
    \n\n
      \n
    • database_formats: The database_formats parameter is a list that specifies the formats\nof the databases to be scanned. In this case, the accepted format is \"parquet\"
    • \n
    • database_releases: The database_releases parameter is a list that specifies the\nreleases of the databases to be scanned. In the provided function, the default value for\ndatabase_releases is set to [\"current\"], meaning that by default, the function will scan\ndatabases that are in the \"current\"
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function scan_databases returns a dictionary containing information about\n databases that match the specified formats and releases.

    \n
    \n", "signature": "(\tself,\tdatabase_formats: list = ['parquet'],\tdatabase_releases: list = ['current']) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation", "modulename": "howard.objects.variants", "qualname": "Variants.annotation", "kind": "function", "doc": "

    It annotates the VCF file with the annotations specified in the config file.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_bigwig", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_bigwig", "kind": "function", "doc": "

    The function annotation_bigwig annotates variants in a VCF file using bigwig databases.

    \n\n
    Parameters
    \n\n
      \n
    • threads: The threads parameter in the annotation_bigwig method is used to specify the\nnumber of threads to be used for parallel processing during the annotation process. If the\nthreads parameter is not provided, the method will attempt to determine the optimal number of\nthreads to use based on the system configuration
    • \n
    \n\n
    Returns
    \n\n
    \n

    True

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_snpsift", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_snpsift", "kind": "function", "doc": "

    This function annotate with bcftools

    \n\n
    Parameters
    \n\n
      \n
    • threads: Number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_bcftools", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_bcftools", "kind": "function", "doc": "

    This function annotate with bcftools

    \n\n
    Parameters
    \n\n
      \n
    • threads: Number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_exomiser", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_exomiser", "kind": "function", "doc": "

    This function annotate with Exomiser

    \n\n

    This function uses args as parameters, in section \"annotation\" -> \"exomiser\", with sections:

    \n\n
      \n
    • \"analysis\" (dict/file):\nFull analysis dictionnary parameters (see Exomiser docs).\nEither a dict, or a file in JSON or YAML format.\nThese parameters may change depending on other parameters (e.g. phenotipicFeatures/HPO)\nDefault : None
    • \n
    • \"preset\" (string):\nAnalysis preset (available in config folder).\nUsed if no full \"analysis\" is provided.\nDefault: \"exome\"
    • \n
    • \"phenopacket\" (dict/file):\nSamples and phenotipic features parameters (see Exomiser docs).\nEither a dict, or a file in JSON or YAML format.\nDefault: None
    • \n
    • \"subject\" (dict):\nSample parameters (see Exomiser docs).\nExample:\n \"subject\":\n {\n \"id\": \"ISDBM322017\",\n \"sex\": \"FEMALE\"\n }\nDefault: None
    • \n
    • \"sample\" (string):\nSample name to construct \"subject\" section:\n \"subject\":\n {\n \"id\": \"\",\n \"sex\": \"UNKNOWN_SEX\"\n }\nDefault: None
    • \n
    • \"phenotypicFeatures\" (dict)\nPhenotypic features to construct \"subject\" section.\nExample:\n \"phenotypicFeatures\":\n [\n { \"type\": { \"id\": \"HP:0001159\", \"label\": \"Syndactyly\" } },\n { \"type\": { \"id\": \"HP:0000486\", \"label\": \"Strabismus\" } }\n ]
    • \n
    • \"hpo\" (list)\nList of HPO ids as phenotypic features.\nExample:\n \"hpo\": ['0001156', '0001363', '0011304', '0010055']\nDefault: []
    • \n
    • \"outputOptions\" (dict):\nOutput options (see Exomiser docs).\nDefault:\n \"output_options\" =\n {\n \"outputContributingVariantsOnly\": False,\n \"numGenes\": 0,\n \"outputFormats\": [\"TSV_VARIANT\", \"VCF\"]\n }
    • \n
    • \"transcript_source\" (string):\nTranscript source (either \"refseq\", \"ucsc\", \"ensembl\")\nDefault: \"refseq\"
    • \n
    • \"exomiser_to_info\" (boolean):\nAdd exomiser TSV file columns as INFO fields in VCF.\nDefault: False
    • \n
    • \"release\" (string):\nExomise database release.\nIf not exists, database release will be downloaded (take a while).\nDefault: None (provided by application.properties configuration file)
    • \n
    • \"exomiser_application_properties\" (file):\nExomiser configuration file (see Exomiser docs).\nUseful to automatically download databases (especially for specific genome databases).
    • \n
    \n\n

    Notes:

    \n\n
      \n
    • If no sample in parameters, first sample in VCF will be chosen
    • \n
    • If no HPO found, \"hiPhivePrioritiser\" analysis step will be switch off
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    None.

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_snpeff", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_snpeff", "kind": "function", "doc": "

    This function annotate with snpEff

    \n\n
    Parameters
    \n\n
      \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_annovar", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_annovar", "kind": "function", "doc": "

    It takes a VCF file, annotates it with Annovar, and then updates the database with the new\nannotations

    \n\n
    Parameters
    \n\n
      \n
    • threads: number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_parquet", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_parquet", "kind": "function", "doc": "

    It takes a VCF file, and annotates it with a parquet file

    \n\n
    Parameters
    \n\n
      \n
    • threads: number of threads to use for the annotation
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"result\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_splice", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_splice", "kind": "function", "doc": "

    This function annotate with snpEff

    \n\n
    Parameters
    \n\n
      \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_config_default", "modulename": "howard.objects.variants", "qualname": "Variants.get_config_default", "kind": "function", "doc": "

    The function get_config_default returns a dictionary containing default configurations for\nvarious calculations and prioritizations.

    \n\n
    Parameters
    \n\n
      \n
    • name: The get_config_default function returns a dictionary containing default\nconfigurations for different calculations and prioritizations. The name parameter is used to\nspecify which specific configuration to retrieve from the dictionary
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_config_default returns a dictionary containing default configuration\n settings for different calculations and prioritizations. The specific configuration settings are\n retrieved based on the input name parameter provided to the function. If the name parameter\n matches a key in the config_default dictionary, the corresponding configuration settings are\n returned. If there is no match, an empty dictionary is returned.

    \n
    \n", "signature": "(self, name: str) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_config_json", "modulename": "howard.objects.variants", "qualname": "Variants.get_config_json", "kind": "function", "doc": "

    The function get_config_json retrieves a configuration JSON object with prioritizations from\ndefault values, a dictionary, and a file.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter in the get_config_json function is a string that represents\nthe name of the configuration. It is used to identify and retrieve the configuration settings\nfor a specific component or module
    • \n
    • config_dict: The config_dict parameter in the get_config_json function is a\ndictionary that allows you to provide additional configuration settings or overrides. When you\ncall the get_config_json function, you can pass a dictionary containing key-value pairs where\nthe key is the configuration setting you want to override or
    • \n
    • config_file: The config_file parameter in the get_config_json function is used to\nspecify the path to a configuration file that contains additional settings. If provided, the\nfunction will read the contents of this file and update the configuration dictionary with the\nvalues found in the file, overriding any existing values with the
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_config_json returns a dictionary containing the configuration\n settings.

    \n
    \n", "signature": "(self, name: str, config_dict: dict = {}, config_file: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.prioritization", "modulename": "howard.objects.variants", "qualname": "Variants.prioritization", "kind": "function", "doc": "

    The prioritization function in Python processes VCF files, adds new INFO fields, and\nprioritizes variants based on configured profiles and criteria.

    \n\n
    Parameters
    \n\n
      \n
    • table: The table parameter in the prioritization function is used to specify the name\nof the table (presumably a VCF file) on which the prioritization operation will be performed. If\na table name is provided, the method will prioritize the variants in that specific table
    • \n
    • pz_prefix: The pz_prefix parameter is used to specify a prefix that will be added to\ncertain INFO fields in a VCF file during the prioritization process. If this parameter is not\nprovided, the code will use a default prefix value of \"PZ\"
    • \n
    • pz_param: The pz_param parameter in the prioritization method is used to pass\nadditional parameters specific to the prioritization process. These parameters can include\nsettings related to prioritization profiles, fields, scoring modes, flags, comments, and other\nconfigurations needed for the prioritization of variants in a V
    • \n
    • pz_keys: The pz_keys parameter in the prioritization function is used to specify the\nkeys that will be used to join the prioritization table with the variant table. If no keys are\nprovided, the function will use the default keys of [\"#CHROM\", \"POS\", \"REF\", \"ALT\"]
    • \n
    \n\n
    Returns
    \n\n
    \n

    The prioritization function returns a boolean value (True) if the prioritization\n operation is successful. If the operation fails, the function will return a boolean value of\n False

    \n
    \n", "signature": "(\tself,\ttable: str = None,\tpz_prefix: str = None,\tpz_param: dict = None,\tpz_keys: list = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_hgvs", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_hgvs", "kind": "function", "doc": "

    The annotation_hgvs function performs HGVS annotation on a set of variants using genomic\ncoordinates and alleles.

    \n\n
    Parameters
    \n\n
      \n
    • threads: The threads parameter is an optional integer that specifies the number of\nthreads to use for parallel processing. If no value is provided, it will default to the number\nof threads obtained from the get_threads() method
    • \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_operations_help", "modulename": "howard.objects.variants", "qualname": "Variants.get_operations_help", "kind": "function", "doc": "

    \n", "signature": "(\tself,\toperations_config_dict: dict = {},\toperations_config_file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation", "modulename": "howard.objects.variants", "qualname": "Variants.calculation", "kind": "function", "doc": "

    It takes a list of operations, and for each operation, it checks if it's a python or sql\noperation, and then calls the appropriate function

    \n\n

    param json example:\n \"calculation\": {\n \"NOMEN\": {\n \"options\": {\n \"hgvs_field\": \"hgvs\"\n },\n \"middle\" : null\n }

    \n", "signature": "(\tself,\toperations: dict = {},\toperations_config_dict: dict = {},\toperations_config_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_process_sql", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_process_sql", "kind": "function", "doc": "

    The calculation_process_sql function takes in a mathematical operation as a string and\nperforms the operation, updating the specified table with the result.

    \n\n
    Parameters
    \n\n
      \n
    • operation: The operation parameter is a dictionary that contains information about the\nmathematical operation to be performed. It includes the following keys:
    • \n
    • operation_name: The operation_name parameter is a string that represents the name of\nthe mathematical operation being performed. It is used for logging and error handling purposes,\ndefaults to unknown
    • \n
    \n", "signature": "(self, operation: dict, operation_name: str = 'unknown') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_process_function", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_process_function", "kind": "function", "doc": "

    The calculation_process_function takes in an operation dictionary and performs the specified\nfunction with the given parameters.

    \n\n
    Parameters
    \n\n
      \n
    • operation: The operation parameter is a dictionary that contains information about the\noperation to be performed. It has the following keys:
    • \n
    • operation_name: The operation_name parameter is a string that represents the name of\nthe operation being performed. It is used for logging purposes, defaults to unknown
    • \n
    \n", "signature": "(self, operation: dict, operation_name: str = 'unknown') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_variant_id", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_variant_id", "kind": "function", "doc": "

    The function calculation_variant_id adds a variant ID annotation to a VCF file header and\nupdates the INFO field of a variants table with the variant ID.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_extract_snpeff", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_extract_snpeff", "kind": "function", "doc": "

    This function extracts SnpEff annotations from the specified field in the VCF file and processes them according to the provided parameters. The annotations can be exploded into separate rows, converted into JSON format, and/or ensured to be unique. The processed annotations are then added to the VCF file with the specified prefixes.

    \n\n

    Args:\n snpeff_field (str): The annotation field in the VCF file to extract SnpEff annotations from. Default is \"ANN\".\n snpeff_hgvs (str): The prefix for the HGVS annotations extracted from SnpEff. Default is \"snpeff_hgvs\".\n snpeff_explode (bool): Whether to explode the annotations into separate rows. Default is \"snpeff_\".\n snpeff_json (bool): Whether to convert the annotations into JSON format. Default is \"snpeff_json\".\n uniquify (bool): Whether to ensure unique annotations. Default is True.

    \n\n

    Returns:\n None

    \n", "signature": "(\tself,\tsnpeff_field: str = 'ANN',\tsnpeff_hgvs: str = 'snpeff_hgvs',\tsnpeff_explode: bool = 'snpeff_',\tsnpeff_json: bool = 'snpeff_json',\tuniquify: bool = True) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_extract_nomen", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_extract_nomen", "kind": "function", "doc": "

    This function extracts the HGVS nomenclature from the calculation/identification of NOMEN.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_find_by_pipeline", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_find_by_pipeline", "kind": "function", "doc": "

    The function calculation_find_by_pipeline performs a calculation to find the number of\npipeline/sample for a variant and updates the variant information in a VCF file.

    \n\n
    Parameters
    \n\n
      \n
    • tag: The tag parameter is a string that represents the annotation field for the\n\"findbypipeline\" information in the VCF file. It is used to create the annotation field in the\nVCF header and to update the corresponding field in the variants table, defaults to\nfindbypipeline
    • \n
    \n", "signature": "(self, tag: str = 'findbypipeline') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_genotype_concordance", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_genotype_concordance", "kind": "function", "doc": "

    The function calculation_genotype_concordance calculates the genotype concordance for\nmulti-caller VCF files and updates the variant information in the database.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_barcode", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_barcode", "kind": "function", "doc": "

    The calculation_barcode function calculates barcode values for variants in a VCF file and\nupdates the INFO field in the file with the calculated barcode values.

    \n\n
    Parameters
    \n\n
      \n
    • tag: The tag parameter in the calculation_barcode function is used to specify the tag\nname that will be used for the barcode calculation in the VCF file. If no tag name is provided,\nthe default tag name is set to \"barcode\", defaults to barcode
    • \n
    \n", "signature": "(self, tag: str = 'barcode') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_barcode_family", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_barcode_family", "kind": "function", "doc": "

    The calculation_barcode_family function calculates barcode values for variants in a VCF file\nand updates the INFO field in the file with the calculated barcode values.

    \n\n
    Parameters
    \n\n
      \n
    • tag: The tag parameter in the calculation_barcode_family function is used to specify\nthe barcode tag that will be added to the VCF file during the calculation process. If no value\nis provided for the tag parameter, the default value used is \"BCF\", defaults to BCF
    • \n
    • tag_samples: The tag_samples parameter in the calculation_barcode_family function is\nused to specify the barcode tag that will be added to the VCF file for samples during the\ncalculation process. If no value is provided for the tag_samples parameter, the default value\nused is \"BCFS\", defaults to BCFS
    • \n
    \n", "signature": "(self, tag: str = None, tag_samples: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_trio", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_trio", "kind": "function", "doc": "

    The calculation_trio function performs trio calculations on a VCF file by adding trio\ninformation to the INFO field of each variant.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_vaf_normalization", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_vaf_normalization", "kind": "function", "doc": "

    The calculation_vaf_normalization function calculates the VAF (Variant Allele Frequency)\nnormalization for each sample in a VCF file and updates the FORMAT and INFO fields accordingly.

    \n\n
    Returns
    \n\n
    \n

    The function does not return anything.

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_genotype_stats", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_genotype_stats", "kind": "function", "doc": "

    The calculation_genotype_stats function calculates genotype statistics for a given information\nfield in a VCF file and updates the INFO column of the variants table with the calculated\nstatistics.

    \n\n
    Parameters
    \n\n
      \n
    • info: The info parameter is a string that represents the type of information for which\ngenotype statistics are calculated. It is used to generate various VCF info tags for the\nstatistics, such as the number of occurrences, the list of values, the minimum value, the\nmaximum value, the mean, the median, defaults to VAF
    • \n
    \n", "signature": "(self, info: str = 'VAF') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_transcripts_annotation", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_transcripts_annotation", "kind": "function", "doc": "

    The calculation_transcripts_annotation function creates a transcripts table and adds an info\nfield to it if transcripts are available.

    \n\n
    Parameters
    \n\n
      \n
    • info_json: The info_json parameter in the calculation_transcripts_annotation method\nis a string parameter that represents the information field to be used in the transcripts JSON.\nIt is used to specify the JSON format for the transcripts information. If no value is provided\nwhen calling the method, it defaults to \"
    • \n
    • info_format: The info_format parameter in the calculation_transcripts_annotation\nmethod is a string parameter that specifies the format of the information field to be used in\nthe transcripts JSON. It is used to define the format of the information field
    • \n
    \n", "signature": "(self, info_json: str = None, info_format: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_transcripts_prioritization", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_transcripts_prioritization", "kind": "function", "doc": "

    The function calculation_transcripts_prioritization creates a transcripts table and\nprioritizes transcripts based on certain criteria.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_transcripts_export", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_transcripts_export", "kind": "function", "doc": "

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_transcript_view", "modulename": "howard.objects.variants", "qualname": "Variants.create_transcript_view", "kind": "function", "doc": "

    The create_transcript_view function generates a transcript view by processing data from a\nspecified table based on provided parameters and structural information.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter in the create_transcript_view function\nis used to specify the name of the table that will store the final transcript view data. If a table\nname is not provided, the function will create a new table to store the transcript view data, and by\ndefault,, defaults to transcripts
    • \n
    • transcripts_table_drop: The transcripts_table_drop parameter in the\ncreate_transcript_view function is a boolean parameter that determines whether to drop the\nexisting transcripts table before creating a new one. If transcripts_table_drop is set to True,\nthe function will drop the existing transcripts table if it exists, defaults to False
    • \n
    • param: The param parameter in the create_transcript_view function is a dictionary that\ncontains information needed to create a transcript view. It includes details such as the structure\nof the transcripts, columns mapping, column formats, and other necessary information for generating\nthe view. This parameter allows for flexibility and customization
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_transcript_view function returns the name of the transcripts table that was\n created or modified during the execution of the function.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = None,\ttranscripts_table_drop: bool = False,\tparam: dict = {}) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_transcript_view_from_columns_map", "modulename": "howard.objects.variants", "qualname": "Variants.create_transcript_view_from_columns_map", "kind": "function", "doc": "

    The create_transcript_view_from_columns_map function generates a temporary table view based on\nspecified columns mapping for transcripts data.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is a string that specifies the name\nof the table where the transcripts data is stored or will be stored in the database. This table\ntypically contains information about transcripts such as Ensembl transcript IDs, gene names,\nscores, predictions, etc. It defaults to \"transcripts, defaults to transcripts
    • \n
    • columns_maps: The columns_maps parameter is a dictionary that contains information\nabout how to map columns from a transcripts table to create a view. Each entry in the\ncolumns_maps list represents a mapping configuration for a specific set of columns. It\ntypically includes details such as the main transcript column and additional information columns
    • \n
    • added_columns: The added_columns parameter in the\ncreate_transcript_view_from_columns_map function is a list that stores the additional columns\nthat will be added to the view being created based on the columns map provided. These columns\nare generated by exploding the transcript information columns along with the main transcript\ncolumn
    • \n
    • temporary_tables: The temporary_tables parameter in the\ncreate_transcript_view_from_columns_map function is a list that stores the names of temporary\ntables created during the process of creating a transcript view from a columns map. These\ntemporary tables are used to store intermediate results or transformations before the final view\nis generated
    • \n
    • annotation_fields: The annotation_fields parameter in the\ncreate_transcript_view_from_columns_map function is a list that stores the fields that are\nused for annotation in the query view creation process. These fields are extracted from the\ntranscripts_column and transcripts_infos_columns specified in the `columns
    • \n
    • column_rename: The column_rename parameter in the\ncreate_transcript_view_from_columns_map function is a dictionary that allows you to specify\ncustom renaming for columns during the creation of the temporary table view. This parameter\nprovides a mapping of original column names to the desired renamed column names. By using this\nparameter,
    • \n
    • column_clean: The column_clean parameter in the\ncreate_transcript_view_from_columns_map function is a boolean flag that determines whether the\ncolumn values should be cleaned or not. If set to True, the column values will be cleaned by\nremoving any non-alphanumeric characters from them. This cleaning process ensures, defaults to\nFalse
    • \n
    • column_case: The column_case parameter in the create_transcript_view_from_columns_map\nfunction is used to specify the case transformation to be applied to the columns during the view\ncreation process. It allows you to control whether the column values should be converted to\nlowercase, uppercase, or remain unchanged
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_transcript_view_from_columns_map function returns a tuple containing three\n lists: added_columns, temporary_tables, and annotation_fields.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = 'transcripts',\tcolumns_maps: dict = {},\tadded_columns: list = [],\ttemporary_tables: list = None,\tannotation_fields: list = None,\tcolumn_rename: dict = {},\tcolumn_clean: bool = False,\tcolumn_case: str = None) -> tuple[list, list, list]:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_transcript_view_from_column_format", "modulename": "howard.objects.variants", "qualname": "Variants.create_transcript_view_from_column_format", "kind": "function", "doc": "

    The create_transcript_view_from_column_format function generates a transcript view based on\nspecified column formats, adds additional columns and annotation fields, and returns the list of\ntemporary tables and annotation fields.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is a string that specifies the name\nof the table containing the transcripts data. This table will be used as the base table for\ncreating the transcript view. The default value for this parameter is \"transcripts\", but you can\nprovide a different table name if needed, defaults to transcripts
    • \n
    • column_formats: The column_formats parameter is a dictionary that contains information\nabout the columns to be used for creating the transcript view. Each entry in the dictionary\nspecifies the mapping between a transcripts column and a transcripts infos column. This\nparameter allows you to define how the columns from the transcripts table should be transformed\nor mapped
    • \n
    • temporary_tables: The temporary_tables parameter in the\ncreate_transcript_view_from_column_format function is a list that stores the names of\ntemporary views created during the process of creating a transcript view from a column format.\nThese temporary views are used to manipulate and extract data before generating the final\ntranscript view
    • \n
    • annotation_fields: The annotation_fields parameter in the\ncreate_transcript_view_from_column_format function is a list that stores the annotation fields\nthat are extracted from the temporary views created during the process. These annotation fields\nare obtained by querying the temporary views and extracting the column names excluding specific\ncolumns like `#CH
    • \n
    • column_rename: The column_rename parameter in the\ncreate_transcript_view_from_column_format function is a dictionary that allows you to specify\ncustom renaming of columns in the transcripts infos table. By providing a mapping of original\ncolumn names to new column names in this dictionary, you can rename specific columns during the\nprocess
    • \n
    • column_clean: The column_clean parameter in the\ncreate_transcript_view_from_column_format function is a boolean flag that determines whether\nthe transcripts infos columns should undergo a cleaning process. If set to True, the columns\nwill be cleaned during the creation of the transcript view based on the specified column format,\ndefaults to False
    • \n
    • column_case: The column_case parameter in the\ncreate_transcript_view_from_column_format function is used to specify the case transformation\nto be applied to the columns in the transcript view. It can be set to either \"upper\" or \"lower\"\nto convert the column names to uppercase or lowercase, respectively
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_transcript_view_from_column_format function returns two lists:\n temporary_tables and annotation_fields.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = 'transcripts',\tcolumn_formats: dict = {},\ttemporary_tables: list = None,\tannotation_fields: list = None,\tcolumn_rename: dict = {},\tcolumn_clean: bool = False,\tcolumn_case: str = None) -> tuple[list, list, list]:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_format_to_table", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_format_to_table", "kind": "function", "doc": "

    The annotation_format_to_table function converts annotation data from a VCF file into a\nstructured table format, ensuring unique values and creating a temporary table for further\nprocessing or analysis.

    \n\n
    Parameters
    \n\n
      \n
    • uniquify: The uniquify parameter is a boolean flag that determines whether to ensure\nunique values in the output or not. If set to True, the function will make sure that the\noutput values are unique, defaults to True
    • \n
    • annotation_field: The annotation_field parameter refers to the field in the VCF file\nthat contains the annotation information for each variant. This field is used to extract the\nannotation details for further processing in the function. By default, it is set to \"ANN\",\ndefaults to ANN
    • \n
    • annotation_id: The annotation_id parameter in the annotation_format_to_table method\nis used to specify the identifier for the annotation feature. This identifier will be used as a\ncolumn name in the resulting table or view that is created based on the annotation data. It\nhelps in uniquely identifying each annotation entry in the, defaults to Feature_ID
    • \n
    • view_name: The view_name parameter in the annotation_format_to_table method is used\nto specify the name of the temporary table that will be created to store the transformed\nannotation data. This table will hold the extracted information from the annotation field in a\nstructured format for further processing or analysis. By default,, defaults to transcripts
    • \n
    • column_rename: The column_rename parameter in the annotation_format_to_table method\nis a dictionary that allows you to specify custom renaming for columns. By providing key-value\npairs in this dictionary, you can rename specific columns in the resulting table or view that is\ncreated based on the annotation data. This feature enables
    • \n
    • column_clean: The column_clean parameter in the annotation_format_to_table method is\na boolean flag that determines whether the annotation field should undergo a cleaning process.\nIf set to True, the function will clean the annotation field before further processing. This\ncleaning step may involve removing any unwanted characters, formatting inconsistencies, defaults\nto False
    • \n
    • column_case: The column_case parameter in the annotation_format_to_table method is\nused to specify the case transformation to be applied to the column names extracted from the\nannotation data. It allows you to set the case of the column names to either lowercase or\nuppercase for consistency or other specific requirements during the conversion
    • \n
    • column_split: The column_split parameter in the annotation_format_to_table method is\nused to specify the separator to split fields values. Default as '&'. None to disable.
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function annotation_format_to_table is returning the name of the view created,\n which is stored in the variable view_name.

    \n
    \n", "signature": "(\tself,\tannotation_field: str = 'ANN',\tannotation_id: str = 'Feature_ID',\tview_name: str = 'transcripts',\tcolumn_rename: dict = {},\tcolumn_clean: bool = False,\tcolumn_case: str = None,\tcolumn_split: str = '&') -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.transcripts_export", "modulename": "howard.objects.variants", "qualname": "Variants.transcripts_export", "kind": "function", "doc": "

    Exports transcript data from a table to a specified file.

    \n\n

    Args:\n transcripts_table (str): The name of the transcripts table.\n param (dict): A dictionary of parameters to customize the export process. This can include various options such as filtering criteria, formatting options, etc.

    \n\n

    Returns:\n bool: Returns True if the export is successful, False otherwise.

    \n\n

    This function exports transcript data to a specified file, using the provided parameters to customize the export process. The function returns True if the export is successful, and False otherwise.

    \n", "signature": "(self, transcripts_table: str = None, param: dict = {}) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.transcripts_prioritization", "modulename": "howard.objects.variants", "qualname": "Variants.transcripts_prioritization", "kind": "function", "doc": "

    The transcripts_prioritization function prioritizes transcripts based on certain parameters\nand updates the variants table with the prioritized information.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is a string that specifies the name\nof the table containing transcripts data. If no value is provided, it defaults to \"transcripts\".\nThis parameter is used to identify the table where the transcripts data is stored for the\nprioritization process
    • \n
    • param: The param parameter in the transcripts_prioritization method is a dictionary\nthat contains various configuration settings for the prioritization process of transcripts. It\nis used to customize the behavior of the prioritization algorithm and includes settings such as\nthe prefix for prioritization fields, default profiles, and other
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function transcripts_prioritization returns a boolean value True if the\n transcripts prioritization process is successfully completed, and False if there are any\n issues or if no profile is defined for transcripts prioritization.

    \n
    \n", "signature": "(self, transcripts_table: str = None, param: dict = {}) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.transcript_view_to_variants", "modulename": "howard.objects.variants", "qualname": "Variants.transcript_view_to_variants", "kind": "function", "doc": "

    The transcript_view_to_variants function updates a variants table with information from\ntranscripts in JSON format.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is used to specify the name of the\ntable containing the transcripts data. If this parameter is not provided, the function will\nattempt to retrieve it from the param dictionary or use a default value of \"transcripts\"
    • \n
    • transcripts_column_id: The transcripts_column_id parameter is used to specify the\ncolumn in the transcripts_table that contains the unique identifier for each transcript. This\nidentifier is used to match transcripts with variants in the database
    • \n
    • transcripts_info_json: The transcripts_info_json parameter is used to specify the name\nof the column in the variants table where the transcripts information will be stored in JSON\nformat. This parameter allows you to define the column in the variants table that will hold the\nJSON-formatted information about transcripts
    • \n
    • transcripts_info_field_json: The transcripts_info_field_json parameter is used to\nspecify the field in the VCF header that will contain information about transcripts in JSON\nformat. This field will be added to the VCF header as an INFO field with the specified name
    • \n
    • transcripts_info_format: The transcripts_info_format parameter is used to specify the\nformat of the information about transcripts that will be stored in the variants table. This\nformat can be used to define how the transcript information will be structured or displayed\nwithin the variants table
    • \n
    • transcripts_info_field_format: The transcripts_info_field_format parameter is used to\nspecify the field in the VCF header that will contain information about transcripts in a\nspecific format. This field will be added to the VCF header as an INFO field with the specified\nname
    • \n
    • param: The param parameter in the transcript_view_to_variants method is a dictionary\nthat contains various configuration settings related to transcripts. It is used to provide\ndefault values for certain parameters if they are not explicitly provided when calling the\nmethod. The param dictionary can be passed as an argument
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function transcript_view_to_variants returns a boolean value. It returns True\n if the operation is successful and False if certain conditions are not met.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = None,\ttranscripts_column_id: str = None,\ttranscripts_info_json: str = None,\ttranscripts_info_field_json: str = None,\ttranscripts_info_format: str = None,\ttranscripts_info_field_format: str = None,\tparam: dict = {}) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.rename_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.rename_info_fields", "kind": "function", "doc": "

    The rename_info_fields function renames specified fields in a VCF file header and updates\ncorresponding INFO fields in the variants table.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: The fields_to_rename parameter is a dictionary that contains the\nmapping of fields to be renamed in a VCF (Variant Call Format) file. The keys in the dictionary\nrepresent the original field names that need to be renamed, and the corresponding values\nrepresent the new names to which the fields should be
    • \n
    • table: The table parameter in the rename_info_fields function represents the name of\nthe table in which the variants data is stored. This table contains information about genetic\nvariants, and the function updates the corresponding INFO fields in this table when renaming\nspecified fields in the VCF file header
    • \n
    \n\n
    Returns
    \n\n
    \n

    The rename_info_fields function returns a dictionary fields_processed that contains\n the original field names as keys and their corresponding new names (or None if the field was\n removed) as values after renaming or removing specified fields in a VCF file header and updating\n corresponding INFO fields in the variants table.

    \n
    \n", "signature": "(self, fields_to_rename: dict = None, table: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.recreate_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.recreate_info_fields", "kind": "function", "doc": "

    The recreate_info_fields function renames specified fields in a VCF file header and updates\ncorresponding INFO fields in the variants table.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: The fields_to_rename parameter is a dictionary that contains the\nmapping of fields to be renamed in a VCF (Variant Call Format) file. The keys in the dictionary\nrepresent the original field names that need to be renamed, and the corresponding values\nrepresent the new names to which the fields should be renamed. Default {}
    • \n
    • table: The table parameter in the recreate_info_fields function represents the name of\nthe table in which the variants data is stored. This table contains information about genetic\nvariants, and the function updates the corresponding INFO fields in this table when renaming\nspecified fields in the VCF file header. Default Variants table 'variants'.
    • \n
    \n\n
    Returns
    \n\n
    \n

    The recreate_info_fields function returns a dictionary fields_renamed that contains\n the original field names as keys and their corresponding new names (or None if the field was\n removed) as values after renaming or removing specified fields in a VCF file header and updating\n corresponding INFO fields in the variants table.

    \n
    \n", "signature": "(self, fields_to_rename: dict = None, table: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_rename_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_rename_info_fields", "kind": "function", "doc": "

    The calculation_rename_info_fields function retrieves parameters from a dictionary, updates\nfields to rename and table if provided, and then calls another function to rename the fields.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: fields_to_rename is a dictionary that contains the fields to be\nrenamed in a table. Each key-value pair in the dictionary represents the original field name as\nthe key and the new field name as the value
    • \n
    • table: The table parameter in the calculation_rename_info_fields method is used to\nspecify the name of the table for which the fields are to be renamed. It is a string type\nparameter
    • \n
    • operation_name: The operation_name parameter in the calculation_rename_info_fields\nmethod is a string that specifies the name of the operation being performed. In this context, it\nis used as a default value for the operation name if not explicitly provided when calling the\nfunction, defaults to RENAME_INFO_FIELDS
    • \n
    \n", "signature": "(\tself,\tfields_to_rename: dict = None,\ttable: str = None,\toperation_name: str = 'RENAME_INFO_FIELDS') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_recreate_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_recreate_info_fields", "kind": "function", "doc": "

    The calculation_recreate_info_fields function retrieves parameters from a dictionary, recreate\nINFO fields with rename and table if provided, and then calls another function to rename the fields.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: fields_to_rename is a dictionary that contains the fields to be\nrenamed in a table. Each key-value pair in the dictionary represents the original field name as\nthe key and the new field name as the value
    • \n
    • table: The table parameter in the calculation_recreate_info_fields method is used to\nspecify the name of the table for which the fields are to be renamed. It is a string type\nparameter
    • \n
    • operation_name: The operation_name parameter in the calculation_recreate_info_fields\nmethod is a string that specifies the name of the operation being performed. In this context, it\nis used as a default value for the operation name if not explicitly provided when calling the\nfunction, defaults to RENAME_INFO_FIELDS
    • \n
    \n", "signature": "(\tself,\tfields_to_rename: dict = None,\ttable: str = None,\toperation_name: str = 'RENAME_INFO_FIELDS') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_annotations_view", "modulename": "howard.objects.variants", "qualname": "Variants.create_annotations_view", "kind": "function", "doc": "

    The create_annotations_view function creates a SQL view from fields in a VCF INFO column.

    \n\n
    Parameters
    \n\n
      \n
    • table: The table parameter in the create_annotations_view function is used to specify\nthe name of the table from which the fields are to be extracted. This table contains the\nvariants data, and the function creates a view based on the fields in the INFO column of this\ntable. Defaults to None
    • \n
    • view: The view parameter in the create_annotations_view function is used to specify\nthe name of the view that will be created based on the fields in the VCF INFO column. This view\nwill contain the extracted fields from the INFO column in a structured format for further\nprocessing or analysis. Defaults to None
    • \n
    • view_type: The view_type parameter in the create_annotations_view function is used to\nspecify the type of view that will be created. It can be either a VIEW or a TABLE, and the\nfunction will create the view based on the specified type. Defaults to VIEW
    • \n
    • view_mode: The view_mode parameter in the create_annotations_view function is used to\nspecify the mode of view that will be created. It can be either a full or a explore, and the\nfunction will create the view based on the specified algorithm/SQL query. Defaults to full
    • \n
    • fields: The fields parameter in the create_annotations_view function is a list that\ncontains the names of the fields to be extracted from the INFO column in the VCF file. These\nfields will be used to create the view with the specified columns and data extracted from the\nINFO column. Defaults to None
    • \n
    • fields_needed: The fields_needed parameter in the create_annotations_view function is\na list of fields that are required for the view. These fields are essential for the view and\nmust be included in the view to ensure that the data is complete and accurate. By default, the\nfunction will include all columns' table in the view, but you can specify the\nrequired fields using this parameter. Defaults to None, which means key columns corresponding\nof a variant [\"#CHROM\", \"POS\", \"REF\", \"ALT\"]
    • \n
    • fields_needed_all: The fields_needed_all parameter in the create_annotations_view\nfunction is a boolean flag that determines whether to include all fields in the table in the\nview. If set to True, the function will include all fields in the table in the view (only\nif fields_needed is False). If set to False, the function will only include the\nneeded fields specified in the fields_needed parameter in the view. Defaults to False
    • \n
    • detect_type_list: The detect_type_list parameter in the create_annotations_view\nfunction is a boolean flag that determines whether to detect the type of the fields extracted\nfrom the INFO column. If set to True, the function will detect the type of the fields and\nhandle them accordingly in the view. Defaults to False
    • \n
    • fields_not_exists: The fields_not_exists parameter in the create_annotations_view\nfunction is a boolean flag that determines whether to include fields that do not exist in the\ntable in the view. If set to True, the function will include fields that do not exist in the\ntable as NULL values in the view. Defaults to True
    • \n
    • info_prefix_column: The info_prefix_column parameter in the create_annotations_view\nfunction is used to specify a prefix that will be added to the field names in the view.\nIf provided, the function will generate a fields with the prefix (e.g. \"\", \"INFOS_\", \"annotations_\").\nIf not provided (None), the function will not genereate columns. This prefix helps in\ndistinguishing the fields extracted from the INFO column in the view. Defaults to None.
    • \n
    • info_struct_column: The info_struct_column parameter in the create_annotations_view\nfunction is used to specify the name of the column that will contain the extracted fields from\nthe INFO column in the view. This column will hold the structured data extracted from the INFO\ncolumn for further processing or analysis (e.g. \"INFOS\" or \"annotations\"). If not provided (None),\nthe function will not genereate the column. Defaults to None
    • \n
    • sample_struct_column: The sample_struct_column parameter in the create_annotations_view\nfunction is used to specify the name of the column that will contain the extracted formats from\nthe samples columns in the view. This column will hold the structured data extracted from all\nsamples column for further processing or analysis (e.g. \"SAMPLES\" or \"genotypes\"). If not provided (None),\nthe function will not genereate the column. Defaults to None
    • \n
    • drop_view: The drop_view parameter in the create_annotations_view function is a boolean\nflag that determines whether to drop the existing view with the same name before creating a new\nview. If set to True, the function will drop the existing view before creating a new view with\nthe specified name. Defaults to False
    • \n
    • fields_to_rename: The fields_to_rename parameter in the create_annotations_view\nfunction is a dictionary that contains the mapping of fields to be renamed in the VCF file. The\nkeys in the dictionary represent the original field names that need to be renamed, and the\ncorresponding values represent the new names to which the fields should be. Defaults to None
    • \n
    • fields_forced_as_varchar: Force fields as type VARCHAR
    • \n
    • limit: The limit parameter in the create_annotations_view function is an integer that\nspecifies the maximum number of rows to be included in the view. If provided, the function will\nlimit the number of rows in the view to the specified value. Defaults to None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_annotations_view function returns the name of the view that is created\n based on the fields extracted from the INFO column in the VCF file. This view contains the\n extracted fields in a structured format for further processing or analysis. Defaults to None

    \n
    \n", "signature": "(\tself,\ttable: str = None,\tview: str = None,\tview_type: str = None,\tview_mode: str = None,\tfields: list = None,\tfields_needed: list = None,\tfields_needed_all: bool = False,\tdetect_type_list: bool = True,\tfields_not_exists: bool = True,\tinfo_prefix_column: str = None,\tinfo_struct_column: str = None,\tsample_struct_column: str = None,\tdrop_view: bool = False,\tfields_to_rename: dict = None,\tfields_forced_as_varchar: bool = False,\tlimit: int = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.remove_tables_or_views", "modulename": "howard.objects.variants", "qualname": "Variants.remove_tables_or_views", "kind": "function", "doc": "

    Remove specified tables and views from the database.

    \n\n

    Args:\n tables (list): A list of table names to be removed. Default is None.\n views (list): A list of view names to be removed. Default is None.

    \n\n

    Returns:\n list: A list of tables and views that were successfully removed.

    \n\n

    This function attempts to remove the specified tables and views from the database.\nIt first tries to drop each item as a table, and if that fails, it tries to drop it as a view.\nIf an item is neither a table nor a view, an error is logged.

    \n", "signature": "(self, tables: list = None, views: list = None) -> list:", "funcdef": "def"}, {"fullname": "howard.tools", "modulename": "howard.tools", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.annotation", "modulename": "howard.tools.annotation", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.annotation.annotation", "modulename": "howard.tools.annotation", "qualname": "annotation", "kind": "function", "doc": "

    The annotation function performs annotation on a VCF file based on specified parameters and\nexports the annotated data.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is likely an object or dictionary containing various arguments\npassed to the annotation function. It is not clear from the code snippet what specific arguments\nare expected or required
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.calculation", "modulename": "howard.tools.calculation", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.calculation.calculation", "modulename": "howard.tools.calculation", "qualname": "calculation", "kind": "function", "doc": "

    This function performs calculations on VCF data based on user input and exports the results.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is a command line argument parser object that contains the\narguments passed to the script when it was executed
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.databases", "modulename": "howard.tools.databases", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.databases.databases", "modulename": "howard.tools.databases", "qualname": "databases", "kind": "function", "doc": "

    The function downloads databases and logs the start and end of the process.

    \n\n
    Parameters
    \n\n
      \n
    • args: The \"args\" parameter is likely an object or dictionary containing various arguments or\noptions related to the \"databases\" function. Without more context, it's difficult to say exactly\nwhat these arguments might be, but they could include things like the names or locations of\ndatabases to download, authentication credentials, or
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.filter", "modulename": "howard.tools.filter", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.filter.filter", "modulename": "howard.tools.filter", "qualname": "filter", "kind": "function", "doc": "

    This Python function loads and queries data from a VCF file based on user input and exports the\nresults.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the arguments passed to the function. It is likely a\nNamespace object created by parsing command line arguments using argparse
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.gui", "modulename": "howard.tools.gui", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.gui.main_folder", "modulename": "howard.tools.gui", "qualname": "main_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/tools'"}, {"fullname": "howard.tools.gui.image_dir", "modulename": "howard.tools.gui", "qualname": "image_dir", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/tools/../../images/'"}, {"fullname": "howard.tools.gui.gui", "modulename": "howard.tools.gui", "qualname": "gui", "kind": "function", "doc": "

    The gui function generates a graphical user interface (GUI) for a Python script using the\nargparse module and the Gooey library.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is of type argparse, which is a module in Python used for\nparsing command-line arguments. It is used to define the arguments that the program accepts and to\ngenerate help messages. In this code, it seems that args is an object that contains information\nabout the
    • \n
    \n", "signature": "(*args, **kwargs):", "funcdef": "def"}, {"fullname": "howard.tools.help", "modulename": "howard.tools.help", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.help.main_folder", "modulename": "howard.tools.help", "qualname": "main_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/tools'"}, {"fullname": "howard.tools.help.help", "modulename": "howard.tools.help", "qualname": "help", "kind": "function", "doc": "

    The help function generates help documentation in various formats (parser, Markdown, HTML) based\non the provided arguments and setup configuration.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is of type argparse.Namespace. It is used to pass command-line\narguments to the help function. The argparse module provides a way to parse command-line\narguments and generate help messages. The Namespace object holds the values of the command-line\narguments
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.hgvs", "modulename": "howard.tools.hgvs", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.hgvs.hgvs", "modulename": "howard.tools.hgvs", "qualname": "hgvs", "kind": "function", "doc": "

    The hgvs function takes command line arguments, creates a VCF object, sets parameters and\nconfigurations, loads data from an input file, performs annotation using HGVS notation, exports the\noutput, and closes the connection.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is of type argparse.Namespace and is used to parse command line\narguments. It contains the following attributes:
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.interactive", "modulename": "howard.tools.interactive", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.interactive.launch_interactive_terminal", "modulename": "howard.tools.interactive", "qualname": "launch_interactive_terminal", "kind": "function", "doc": "

    Launch an interactive SQL terminal with DuckDB

    \n", "signature": "(args=None, variants=None, tmp=None, display_format='dataframe'):", "funcdef": "def"}, {"fullname": "howard.tools.prioritization", "modulename": "howard.tools.prioritization", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.prioritization.prioritization", "modulename": "howard.tools.prioritization", "qualname": "prioritization", "kind": "function", "doc": "

    The function performs prioritization on a VCF file based on user-specified configurations and\nexports the results.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the command line arguments passed to the script. It is\nused to configure the behavior of the script and to provide input and output file paths, as well as\nother parameters needed for the execution of the script
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.process", "modulename": "howard.tools.process", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.process.process", "modulename": "howard.tools.process", "qualname": "process", "kind": "function", "doc": "

    The \"process\" function processes input arguments, loads parameters in JSON format, creates a VCF\nobject, performs quick annotations, calculations, prioritizations, and queries, exports output, and\ncloses the connection.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is a variable that contains the arguments passed to the function \"process\". It is\nassumed to be an object with several attributes, including \"config\", \"param\", \"input\", \"output\",\n\"annotations\", \"calculations\", \"prioritizations\", and \"query\". These attributes are used to
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.query", "modulename": "howard.tools.query", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.query.query", "modulename": "howard.tools.query", "qualname": "query", "kind": "function", "doc": "

    This Python function loads and queries data from a VCF file based on user input and exports the\nresults.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the arguments passed to the function. It is likely a\nNamespace object created by parsing command line arguments using argparse
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.stats", "modulename": "howard.tools.stats", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.stats.stats", "modulename": "howard.tools.stats", "qualname": "stats", "kind": "function", "doc": "

    The stats() function takes in arguments, loads data from an input file, gets statistics on the data,\nand closes the connection.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is a parameter that is passed to the function stats(). It is likely an object or a\ndictionary that contains various arguments or parameters that are needed by the function to perform\nits tasks. Some of the arguments that may be included in args are input file path, configuration\nsettings, and other parameters that are
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.tools", "modulename": "howard.tools.tools", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.tools.PathType", "modulename": "howard.tools.tools", "qualname": "PathType", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.tools.tools.PathType.__init__", "modulename": "howard.tools.tools", "qualname": "PathType.__init__", "kind": "function", "doc": "

    exists:\n True: a path that does exist\n False: a path that does not exist, in a valid parent directory\n None: don't care\ntype: file, dir, symlink, None, or a function returning True for valid paths\n None: don't care\ndash_ok: whether to allow \"-\" as stdin/stdout

    \n", "signature": "(exists=True, type='file', dash_ok=True)"}, {"fullname": "howard.tools.tools.arguments", "modulename": "howard.tools.tools", "qualname": "arguments", "kind": "variable", "doc": "

    \n", "default_value": "{'input': {'metavar': 'input', 'help': 'Input file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output': {'metavar': 'output', 'help': 'Output file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'param': {'metavar': 'param', 'help': 'Parameters JSON file (or string) defines parameters to process \\nannotations, calculations, prioritizations, convertions and queries.\\n', 'default': '{}', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '', 'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'query': {'metavar': 'query', 'help': "Query in SQL format\\n(e.g. 'SELECT * FROM variants LIMIT 50').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': 'SELECT * FROM variants'}}, 'extra': {'param_section': 'query'}}, 'filter': {'metavar': 'filter', 'help': "Filter variant using SQL format\\n(e.g. 'POS < 100000').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'samples': {'metavar': 'samples', 'help': "List of samples\\n(e.g. 'sample1,sample2').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'output_query': {'metavar': 'output', 'help': 'Output Query file.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'All files (*)|*'}}}, 'annotations': {'metavar': 'annotations', 'help': "Annotation with databases files, or with tools,\\nas a list of files in Parquet, VCF, BED, or keywords\\n (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\\n- For a Parquet/VCF/BED, use file paths\\n (e.g. 'file1.parquet,file2.vcf.gz').\\n- For BCFTools annotation, use keyword 'bcftools' with file paths\\n (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\\n- For Parquet annotation, use keyword 'parquet' with file paths\\n (e.g. 'parquet:file.parquet').\\n- For Annovar annotation, use keyword 'annovar' with annovar code\\n (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\\n- For snpeff annotation, use keyword 'snpeff' with options\\n (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\\n- For snpSift annotation, use keyword 'snpsift' with file paths\\n (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\\n- For Exomiser annotation, use keyword 'exomiser' with options as key=value\\n (e.g. 'exomiser:preset=exome:transcript_source=refseq').\\n- For add all availalbe databases files, use 'ALL' keyword,\\n with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\\n (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotations": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotations": "ALL:parquet:latest"', 'Annotation with BCFTools': '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotations": "annovar:refGene:cosmic70"', 'Annotation with snpEff (default options)': '"annotations": "snpeff"', 'Annotation with snpEff (with options)': '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', 'Annotation with snpSift': '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Exomiser with options': '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', 'Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)': '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"'}}}, 'annotation_parquet': {'metavar': 'annotation parquet', 'help': "Annotation with Parquet method, as a list of files in Parquet, VCF or BED\\n (e.g. 'file1.parquet,file2.vcf.gz').\\nFor add all availalbe databases files, use 'ALL' keyword,\\n with filters on type and release\\n (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotation_parquet": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotation_parquet": "ALL:parquet:latest"'}}}, 'annotation_bcftools': {'metavar': 'annotation BCFTools', 'help': "Annotation with BCFTools, as a list of files VCF or BED\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with BCFTools': '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_snpeff': {'metavar': 'annotation snpEff', 'help': "Annotation with snpEff, with options\\n (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'options', 'examples': {'Annotation with snpEff (default options)': '"annotation_snpeff": ""', 'Annotation with snpEff (with options)': '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"'}}}, 'annotation_snpsift': {'metavar': 'annotation snpSift', 'help': "Annotation with snpSift, as a list of files VCF\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with snpSift': '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_annovar': {'metavar': 'annotation Annovar', 'help': "Annotation with Annovar, as a list of database keywords\\n (e.g. 'refGene', 'refGene:cosmic70').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'keyword[:keyword]*', 'examples': {'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotation_annovar": "refGene:cosmic70"'}}}, 'annotation_exomiser': {'metavar': 'annotation Exomiser', 'help': "Annotation with Exomiser, as a list of options\\n (e.g. 'preset=exome:transcript_source=refseq').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Exomiser with options': '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"'}}}, 'annotation_splice': {'metavar': 'annotation Splice', 'help': "Annotation with Splice, as a list of options\\n (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Splice with options': '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"'}}}, 'annotations_update': {'help': 'Update option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be removed and re-annotated.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Update annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'annotations_append': {'help': 'Append option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be annotated only if not annotation exists for the variant.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Append annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'calculations': {'metavar': 'operations', 'help': "Quick calculations on genetic variants information and genotype information,\\nas a list of operations (e.g. 'VARTYPE,variant_id').\\nList of available calculations by default\\n (unsensitive case, see doc for more information):\\n VARTYPE snpeff_hgvs FINDBYPIPELINE GENOTYPECONCORDANCE BARCODE TRIO VAF VAF_STATS DP_STATS \\n", 'default': None, 'type': <class 'str'>}, 'prioritizations': {'metavar': 'prioritisations', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>, 'extra': {'examples': {'Prioritization profile by default': '"prioritization": "default" ', 'Prioritization profile by default and GERMLINE from Configuration JSON file': '"prioritization": "default,GERMLINE" '}}}, 'prioritization_config': {'metavar': 'prioritization config', 'help': 'Prioritization configuration JSON file (defines profiles, see doc).\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'prioritization', 'examples': {'Prioritization configuration JSON file as an option': '"prioritization_config": "prioritization_config.json" '}}}, 'profiles': {'metavar': 'profiles', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>}, 'default_profile': {'metavar': 'default profile', 'help': 'Prioritization profile by default (see doc).\\nDefault is the first profile in the list of prioritization profiles.\\n', 'default': None, 'type': <class 'str'>}, 'pzfields': {'metavar': 'pzfields', 'help': 'Prioritization fields to provide (see doc).\\nAvailable: PZScore, PZFlag, PZTags, PZComment, PZInfos\\n', 'default': 'PZScore,PZFlag', 'type': <class 'str'>}, 'prioritization_score_mode': {'metavar': 'prioritization score mode', 'help': 'Prioritization Score mode (see doc).\\nAvailable: HOWARD (increment score), VaRank (max score)\\n', 'default': 'HOWARD', 'type': <class 'str'>, 'choices': ['HOWARD', 'VaRank'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'query_limit': {'metavar': 'query limit', 'help': 'Limit of number of row for query (only for print result, not output).\\n', 'default': 10, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 10000, 'increment': 10}}}, 'query_print_mode': {'metavar': 'print mode', 'help': "Print mode of query result (only for print result, not output).\\nEither None (native), 'markdown', 'tabulate' or disabled.\\n", 'choices': [None, 'markdown', 'tabulate', 'disabled'], 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'explode_infos': {'help': "Explode VCF INFO/Tag into 'variants' table columns.\\n", 'action': 'store_true', 'default': False}, 'explode_infos_prefix': {'metavar': 'explode infos prefix', 'help': 'Explode VCF INFO/Tag with a specific prefix.\\n', 'default': '', 'type': <class 'str'>}, 'explode_infos_fields': {'metavar': 'explode infos list', 'help': "Explode VCF INFO/Tag specific fields/tags.\\nKeyword `*` specify all available fields, except those already specified.\\nPattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\\nExamples:\\n- 'HGVS,SIFT,Clinvar' (list of fields)\\n- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\\n- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\\n- 'HGVS,.*_score,*' (1 field, scores, all other fields)\\n- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\\n", 'default': '*', 'type': <class 'str'>}, 'include_header': {'help': 'Include header (in VCF format) in output file.\\nOnly for compatible formats (tab-delimiter format as TSV or BED).\\n', 'action': 'store_true', 'default': False}, 'order_by': {'metavar': 'order by', 'help': "List of columns to sort the result-set in ascending or descending order.\\nUse SQL format, and keywords ASC (ascending) and DESC (descending).\\nIf a column is not available, order will not be considered.\\nOrder is enable only for compatible format (e.g. TSV, CSV, JSON).\\nExamples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\\n", 'default': '', 'type': <class 'str'>, 'extra': {'examples': {'Order by ACMG score in descending order': '"order_by": "ACMG_score DESC" ', 'Order by PZFlag and PZScore in descending order': '"order_by": "PZFlag DESC, PZScore DESC" '}}}, 'parquet_partitions': {'metavar': 'parquet partitions', 'help': "Parquet partitioning using hive (available for any format).\\nThis option is faster parallel writing, but memory consuming.\\nUse 'None' (string) for NO partition but split parquet files into a folder.\\nExamples: '#CHROM', '#CHROM,REF', 'None'.\\n", 'default': None, 'type': <class 'str'>}, 'input_annovar': {'metavar': 'input annovar', 'help': "Input Annovar file path.\\nFormat file must be a Annovar TXT file, associated with '.idx'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output_annovar': {'metavar': 'output annovar', 'help': "Output Annovar file path.\\nFormat file must be either VCF compressesd file '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'annovar_code': {'metavar': 'Annovar code', 'help': 'Annovar code, or database name.\\nUsefull to name databases columns.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'annovar_to_parquet': {'metavar': 'to parquet', 'help': 'Parquet file conversion.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.parquet)|*.parquet'}}}, 'annovar_multi_variant': {'metavar': 'Annovar multi variant', 'help': "Variant with multiple annotation lines on Annovar file.\\nEither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'annovar_reduce_memory': {'metavar': 'reduce memory', 'help': "Reduce memory option for Annovar convert,\\neither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'input_extann': {'metavar': 'input extann', 'help': 'Input Extann file path.\\nFormat file must be a Extann TXT file or TSV file.\\nFile need to have at least the genes column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|All files (*)|*'}}}, 'output_extann': {'metavar': 'output extann', 'help': 'Output Extann file path.\\nOutput extann file, should be BED or BED.gz.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'mode_extann': {'metavar': 'mode extann', 'help': 'Mode extann selection.\\nHow to pick transcript from ncbi, keep all,\\nkeep the longest, or keep the chosen one (transcript_extann).\\n', 'required': False, 'default': 'longest', 'choices': ['all', 'longest', 'chosen'], 'type': <class 'str'>}, 'param_extann': {'metavar': 'param extann', 'help': "Param extann file path.\\nParam containing configuration, options to replace chars and\\nbedlike header description, conf vcf specs.\\n(e.g. '~/howard/config/param.extann.json')\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file format|*.tsv|'}}}, 'calculation_config': {'metavar': 'calculation config', 'help': 'Calculation configuration JSON file.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation', 'examples': {'Calculation configuration JSON file as an option': '"calculation_config": "calculation_config.json" '}}}, 'show_calculations': {'help': 'Show available calculation operations.\\n', 'action': 'store_true', 'default': False}, 'hgvs_field': {'metavar': 'HGVS field', 'help': 'HGVS INFO/tag containing a list o HGVS annotations.\\n', 'default': 'hgvs', 'type': <class 'str'>, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'transcripts': {'metavar': 'transcripts', 'help': 'Transcripts TSV file,\\nwith Transcript in first column, optional Gene in second column.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'trio_pedigree': {'metavar': 'trio pedigree', 'help': 'Pedigree Trio for trio inheritance calculation.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3\\' for father, mother and child,\\n \\'{"father": "sample1", "mother": "sample2", "child": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:TRIO'}}, 'family_pedigree': {'metavar': 'family pedigree', 'help': 'Pedigree family for barcode calculation on genotype.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3,sample4\\',\\n \\'{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:BARCODEFAMILY'}}, 'stats_md': {'metavar': 'stats markdown', 'help': 'Stats Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'Markdown file (*.md)|*.md'}}, 'extra': {'examples': {'Export statistics in Markdown format': '"stats_md": "/tmp/stats.md" '}}}, 'stats_json': {'metavar': 'stats json', 'help': 'Stats Output file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}, 'extra': {'examples': {'Export statistics in JSON format': '"stats_json": "/tmp/stats.json" '}}}, 'assembly': {'metavar': 'assembly', 'help': "Genome Assembly (e.g. 'hg19', 'hg38').\\n", 'required': False, 'default': 'hg19', 'type': <class 'str'>, 'extra': {'examples': {'Default assembly for all analysis tools': '"assembly": "hg19" ', 'List of assemblies for databases download tool': '"assembly": "hg19,hg38" '}}}, 'genome': {'metavar': 'genome', 'help': "Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\\n", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current/hg19/hg19.fa', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*'}}}, 'hgvs_options': {'metavar': 'HGVS options', 'help': "Quick HGVS annotation options.\\nThis option will skip all other hgvs options.\\nExamples:\\n- 'default' (for default options)\\n- 'full_format' (for full format HGVS annotation)\\n- 'use_gene=True:add_protein=true:codon_type=FULL'\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'use_gene': {'help': "Use Gene information to generate HGVS annotation\\n(e.g. 'NM_152232(TAS1R2):c.231T>C')", 'action': 'store_true', 'default': False}, 'use_exon': {'help': "Use Exon information to generate HGVS annotation\\n(e.g. 'NM_152232(exon2):c.231T>C').\\nOnly if 'use_gene' is not enabled.\\n", 'action': 'store_true', 'default': False}, 'use_protein': {'help': "Use Protein level to generate HGVS annotation\\n(e.g. 'NP_689418:p.Cys77Arg').\\nCan be used with 'use_exon' or 'use_gene'.\\n", 'action': 'store_true', 'default': False}, 'add_protein': {'help': "Add Protein level to DNA HGVS annotation (e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'full_format': {'help': "Generates HGVS annotation in a full format\\nby using all information to generates an exhaustive annotation\\n(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\\nUse 'use_exon' to add exon information\\n(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'use_version': {'help': "Generates HGVS annotation with transcript version\\n(e.g. 'NM_152232.1:c.231T>C').\\n", 'action': 'store_true', 'default': False}, 'codon_type': {'metavar': 'Codon type', 'help': "Amino Acide Codon format type to use to generate HGVS annotation.\\nAvailable:\\n- '1': codon in 1 character (e.g. 'C', 'R')\\n- '3': codon in 3 character (e.g. 'Cys', 'Arg')\\n-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\\n", 'required': False, 'default': '3', 'type': <class 'str'>, 'choices': ['1', '3', 'FULL'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'refgene': {'metavar': 'refGene', 'help': 'Path to refGene annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGene annotation file'}}}, 'refseqlink': {'metavar': 'refSeqLink', 'help': 'Path to refSeqLink annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGeneLink annotation file'}}}, 'refseq-folder': {'metavar': 'refseq folder', 'help': 'Folder containing refSeq files.\\n', 'required': False, 'default': '/Users/lebechea/howard/databases/refseq/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'message': 'Path to refGenefolder'}}}, 'download-genomes': {'metavar': 'genomes', 'help': "Path to genomes folder\\nwith Fasta files, indexes,\\nand all files generated by pygenome module.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to genomes folder'}}}, 'download-genomes-provider': {'metavar': 'genomes provider', 'help': 'Download Genome from an external provider.\\nAvailable: GENCODE, Ensembl, UCSC, NCBI.\\n', 'required': False, 'default': 'UCSC', 'type': <class 'str'>, 'choices': ['GENCODE', 'Ensembl', 'UCSC', 'NCBI'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'download-genomes-contig-regex': {'metavar': 'genomes contig regex', 'help': "Regular expression to select specific chromosome\\n(e.g 'chr[0-9XYM]+$').\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar': {'metavar': 'Annovar', 'help': "Path to Annovar databases\\n(e.g. '/Users/lebechea/howard/databases/annovar/current').\\n", 'required': False, 'type': <howard.tools.tools.PathType object>, 'default': None, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Annovar databases folder'}}}, 'download-annovar-files': {'metavar': 'Annovar code', 'help': "Download Annovar databases for a list of Annovar file code (see Annovar Doc).\\nUse None to donwload all available files,\\nor Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\\nNote that refGene will at least be downloaded,\\nand only files that not already exist or changed will be downloaded.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar-url': {'metavar': 'Annovar url', 'help': 'Annovar databases URL (see Annovar Doc).\\n', 'required': False, 'default': 'http://www.openbioinformatics.org/annovar/download', 'type': <class 'str'>}, 'download-snpeff': {'metavar': 'snpEff', 'help': 'Download snpEff databases within snpEff folder', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to snpEff databases folder'}}}, 'download-refseq': {'metavar': 'refSeq', 'help': "Path to refSeq databases\\n(e.g. '/Users/lebechea/howard/databases/refseq/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to refGene files folder'}}}, 'download-refseq-url': {'metavar': 'refSeq url', 'help': "refSeq databases URL (see refSeq WebSite)\\n(e.g. 'http://hgdownload.soe.ucsc.edu/goldenPath')\u2022/n", 'required': False, 'default': 'http://hgdownload.soe.ucsc.edu/goldenPath', 'type': <class 'str'>}, 'download-refseq-prefix': {'metavar': 'refSeq prefix', 'help': 'Check existing refSeq files in refSeq folder.\\n', 'required': False, 'default': 'ncbiRefSeq', 'type': <class 'str'>}, 'download-refseq-files': {'metavar': 'refSeq files', 'help': 'List of refSeq files to download.\\n', 'required': False, 'default': 'ncbiRefSeq.txt,ncbiRefSeqLink.txt', 'type': <class 'str'>}, 'download-refseq-format-file': {'metavar': 'refSeq format file', 'help': "Name of refSeq file to convert in BED format\\n(e.g. 'ncbiRefSeq.txt').\\nProcess only if not None.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-refseq-include-utr5': {'help': "Formating BED refSeq file including 5'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-utr3': {'help': "Formating BED refSeq file including 3'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-chrM': {'help': "Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-non-canonical-chr': {'help': 'Formating BED refSeq file including non canonical chromosomes.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-non-coding-transcripts': {'help': 'Formating BED refSeq file including non coding transcripts.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-transcript-version': {'help': 'Formating BED refSeq file including transcript version.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp': {'metavar': 'dbNSFP', 'help': "Download dbNSFP databases within dbNSFP folder(e.g. '/Users/lebechea/howard/databases').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbNSFP databases folder'}}}, 'download-dbnsfp-url': {'metavar': 'dbNSFP url', 'help': "Download dbNSFP databases URL (see dbNSFP website)\\n(e.g. https://dbnsfp.s3.amazonaws.com').\\n", 'required': False, 'default': 'https://dbnsfp.s3.amazonaws.com', 'type': <class 'str'>}, 'download-dbnsfp-release': {'metavar': 'dnNSFP release', 'help': "Release of dbNSFP to download (see dbNSFP website)\\n(e.g. '4.4a').\\n", 'required': False, 'default': '4.4a'}, 'download-dbnsfp-parquet-size': {'metavar': 'dbNSFP parquet size', 'help': 'Maximum size (Mb) of data files in Parquet folder.\\nParquet folder are partitioned (hive) by chromosome (sub-folder),\\nwhich contain N data files.\\n', 'required': False, 'default': 100, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000, 'increment': 10}}}, 'download-dbnsfp-subdatabases': {'help': 'Generate dbNSFP sub-databases.\\ndbNSFP provides multiple databases which are split onto multiple columns.\\nThis option create a Parquet folder for each sub-database (based on columns names).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-parquet': {'help': 'Generate a Parquet file for each Parquet folder.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-vcf': {'help': 'Generate a VCF file for each Parquet folder.\\nNeed genome FASTA file (see --download-genome).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-no-files-all': {'help': "Not generate database Parquet/VCF file for the entire database ('ALL').\\nOnly sub-databases files will be generated.\\n(see '--download-dbnsfp-subdatabases').\\n", 'action': 'store_true', 'default': False}, 'download-dbnsfp-add-info': {'help': 'Add INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-only-info': {'help': 'Add only INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nDecrease memory and space during generation of files.\\nIncrease time for partial annotation (some available columns).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-uniquify': {'help': 'Uniquify values within column\\n(e.g. "D,D" to "D", "D,.,T" to "D,T").\\nRemove transcripts information details.\\nUsefull to reduce size of the database.\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-row-group-size': {'metavar': 'dnNSFP row grooup size', 'help': 'Minimum number of rows in a parquet row group (see duckDB doc).\\nLower can reduce memory usage and slightly increase space during generation,\\nspeed up highly selective queries, slow down whole file queries (e.g. aggregations).\\n', 'required': False, 'default': 100000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}}, 'download-alphamissense': {'metavar': 'AlphaMissense', 'help': 'Path to AlphaMissense databases', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Alphamissense databases folder'}}}, 'download-alphamissense-url': {'metavar': 'AlphaMissense url', 'help': "Download AlphaMissense databases URL (see AlphaMissense website)\\n(e.g. 'https://storage.googleapis.com/dm_alphamissense').\\n", 'required': False, 'default': 'https://storage.googleapis.com/dm_alphamissense', 'type': <class 'str'>}, 'download-exomiser': {'metavar': 'Exomiser', 'help': 'Path to Exomiser databases\\n(e.g. /Users/lebechea/howard/databases/exomiser/current).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Exomiser databases folder'}}}, 'download-exomiser-application-properties': {'metavar': 'Exomiser application properties', 'help': "Exomiser Application Properties configuration file (see Exomiser website).\\nThis file contains configuration settings for the Exomiser tool.\\nIf this parameter is not provided, the function will attempt to locate\\nthe application properties file automatically based on the Exomiser.\\nConfiguration information will be used to download expected releases (if no other parameters).\\nCADD and REMM will be downloaded only if 'path' are provided.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'options': {'default_dir': '/Users/lebechea/howard/databases/exomiser/current', 'message': 'Path to Exomiser application properties file'}}}}, 'download-exomiser-url': {'metavar': 'Exomiser url', 'help': "URL where Exomiser database files can be downloaded from\\n(e.g. 'http://data.monarchinitiative.org/exomiser').\\n", 'required': False, 'default': 'http://data.monarchinitiative.org/exomiser', 'type': <class 'str'>}, 'download-exomiser-release': {'metavar': 'Exomiser release', 'help': 'Release of Exomiser data to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\nIf not provided (None), from Application Properties file (Exomiser data-version) \\nor default \\'2109\\'.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-phenotype-release': {'metavar': 'Exomiser phenoptye release', 'help': 'Release of Exomiser phenotype to download.\\nIf not provided (None), from Application Properties file (Exomiser Phenotype data-version)\\nor Exomiser release.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-release': {'metavar': 'Exomiser remm release', 'help': 'Release of ReMM (Regulatory Mendelian Mutation) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-url': {'metavar': 'Exomiser remm url', 'help': "URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/ReMM').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/ReMM', 'type': <class 'str'>}, 'download-exomiser-cadd-release': {'metavar': 'Exomiser cadd release', 'help': 'Release of CADD (Combined Annotation Dependent Depletion) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-cadd-url': {'metavar': 'Exomiser cadd url', 'help': "URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/CADD').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/CADD', 'type': <class 'str'>}, 'download-exomiser-cadd-url-snv-file': {'metavar': 'Exomiser url snv file', 'help': 'Name of the file containing the SNV (Single Nucleotide Variant) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'whole_genome_SNVs.tsv.gz', 'type': <class 'str'>}, 'download-exomiser-cadd-url-indel-file': {'metavar': 'Exomiser cadd url indel', 'help': 'Name of the file containing the INDEL (Insertion-Deletion) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'InDels.tsv.gz', 'type': <class 'str'>}, 'download-dbsnp': {'metavar': 'dnSNP', 'help': "Path to dbSNP databases\\n(e.g. '/Users/lebechea/howard/databases/exomiser/dbsnp').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbSNP databases folder'}}}, 'download-dbsnp-releases': {'metavar': 'dnSNP releases', 'help': "Release of dbSNP to download\\n(e.g. 'b152', 'b152,b156').\\n", 'required': False, 'default': 'b156', 'type': <class 'str'>}, 'download-dbsnp-release-default': {'metavar': 'dnSNP release default', 'help': "Default Release of dbSNP ('default' symlink)\\n(e.g. 'b156').\\nIf None, first release to download will be assigned as default\\nonly if it does not exists.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url': {'metavar': 'dbSNP url', 'help': "URL where dbSNP database files can be downloaded from.\\n(e.g. 'https://ftp.ncbi.nih.gov/snp/archive').\\n", 'required': False, 'default': 'https://ftp.ncbi.nih.gov/snp/archive', 'type': <class 'str'>}, 'download-dbsnp-url-files': {'metavar': 'dbSNP url files', 'help': 'Dictionary that maps assembly names to specific dbSNP URL files.\\nIt allows you to provide custom dbSNP URL files for specific assemblies\\ninstead of using the default file naming convention.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url-files-prefix': {'metavar': 'dbSNP url files prefix', 'help': 'String that represents the prefix of the dbSNP file name for a specific assembly.\\nIt is used to construct the full URL of the dbSNP file to be downloaded.\\n', 'required': False, 'default': 'GCF_000001405', 'type': <class 'str'>}, 'download-dbsnp-assemblies-map': {'metavar': 'dbSNP assemblies map', 'help': 'dictionary that maps assembly names to their corresponding dbSNP versions.\\nIt is used to construct the dbSNP file name based on the assembly name.\\n', 'required': False, 'default': {'hg19': '25', 'hg38': '40'}, 'type': <class 'str'>, 'gooey': {'options': {'initial_value': '{"hg19": "25", "hg38": "40"}'}}}, 'download-dbsnp-vcf': {'help': 'Generate well-formatted VCF from downloaded file:\\n- Add and filter contigs associated to assembly\\n- Normalize by splitting multiallelics\\n- Need genome (see --download-genome)\\n', 'action': 'store_true', 'default': False}, 'download-dbsnp-parquet': {'help': 'Generate Parquet file from VCF.\\n', 'action': 'store_true', 'default': False}, 'convert-hgmd': {'metavar': 'HGMD', 'help': 'Convert HGMD databases.\\nFolder where the HGMD databases will be stored.\\nFields in VCF, Parquet and TSV will be generated.\\nIf the folder does not exist, it will be created.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}}, 'convert-hgmd-file': {'metavar': 'HGMD file', 'help': "File from HGMD.\\nName format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser'}}, 'convert-hgmd-basename': {'metavar': 'HGMD basename', 'help': "File output basename.\\nGenerated files will be prefixed by basename\\n(e.g. 'HGMD_Pro_MY_RELEASE')\\nBy default (None), input file name without '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'generate-param': {'metavar': 'param', 'help': 'Parameter file (JSON) with all databases found.\\nDatabases folders scanned are defined in config file.\\nStructure of databases follow this structure (see doc):\\n.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-description': {'metavar': 'param description', 'help': 'Description file (JSON) with all databases found.\\nContains all databases with description of format, assembly, fields...\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-releases': {'metavar': 'param release', 'help': "List of database folder releases to check\\n(e.g. 'current', 'latest').\\n", 'required': False, 'default': 'current', 'type': <class 'str'>}, 'generate-param-formats': {'metavar': 'param formats', 'help': "List of database formats to check\\n(e.g. 'parquet', 'parquet,vcf,bed,tsv').\\n", 'required': False, 'default': 'parquet', 'type': <class 'str'>}, 'generate-param-bcftools': {'help': "Generate parameter JSON file with BCFTools annotation for allowed formats\\n(i.e. 'vcf', 'bed').\\n", 'action': 'store_true', 'default': False}, 'help_md': {'metavar': 'help markdown', 'help': 'Help Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.md)|*.md'}}}, 'help_html': {'metavar': 'help html', 'help': 'Help Output file in HTML format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.html)|*.html'}}}, 'help_pdf': {'metavar': 'help pdf', 'help': 'Help Output file in PDF format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'PDF file (*.pdf)|*.pdf'}}}, 'help_json_input': {'metavar': 'help JSON input', 'help': 'Help input file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'help_md_input': {'metavar': 'help MarkDown input', 'help': 'Help input file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'MarkDown file (*.md)|*.md|All files (*)|*'}}}, 'code_type': {'metavar': 'example code type', 'help': "Help example code type for input JSON format\\n(e.g. 'json', 'bash').\\n", 'required': False, 'default': '', 'type': <class 'str'>}, 'help_json_input_title': {'metavar': 'help JSON input title', 'help': 'Help JSON input title.\\n', 'required': False, 'default': 'Help', 'type': <class 'str'>}, 'genomes-folder': {'metavar': 'genomes', 'help': "Folder containing genomes.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current'", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/genomes/current', 'message': 'Path to genomes databases folder'}}}, 'config': {'metavar': 'config', 'help': 'Configuration JSON file defined default configuration regarding \\nresources (e.g. threads, memory),\\nsettings (e.g. verbosity, temporary files),\\ndefault folders (e.g. for databases)\\nand paths to external tools.\\n', 'required': False, 'default': '{}', 'type': <class 'str'>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '{}'}}}, 'threads': {'metavar': 'threads', 'help': 'Specify the number of threads to use for processing HOWARD.\\nIt determines the level of parallelism,\\neither on python scripts, duckdb engine and external tools.\\nIt and can help speed up the process/tool.\\nUse -1 to use all available CPU/cores.\\nEither non valid value is 1 CPU/core.\\n', 'required': False, 'type': <class 'int'>, 'default': -1, 'gooey': {'widget': 'IntegerField', 'options': {'min': -1, 'max': 1000, 'increment': 1}}, 'extra': {'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'memory': {'metavar': 'memory', 'help': "Specify the memory to use in format FLOAT[kMG]\\n(e.g. '8G', '12.42G', '1024M').\\nIt determines the amount of memory for duckDB engine and external tools\\n(especially for JAR programs).\\nIt can help to prevent 'out of memory' failures.\\nBy default (None) is 80%% of RAM (for duckDB).\\n", 'required': False, 'type': <class 'str'>, 'default': None, 'extra': {'format': 'FLOAT[kMG]', 'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'chunk_size': {'metavar': 'chunk size', 'help': 'Number of records in batch to export output file.\\nThe lower the chunk size, the less memory consumption.\\nFor Parquet partitioning, files size will depend on the chunk size.\\n', 'required': False, 'default': 1000000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}, 'extra': {'examples': {'Chunk size of 1.000.000 by default': '"chunk_size": 1000000', 'Smaller chunk size to reduce Parquet file size and memory usage': '"chunk_size": 100000'}}}, 'tmp': {'metavar': 'Temporary folder', 'help': "Temporary folder (e.g. '/tmp').\\nBy default, '.tmp' for duckDB (see doc),external tools and python scripts.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}, 'extra': {'examples': {'# System temporary folder': '"tmp": "/tmp"', '# HOWARD work directory': '"tmp": "~/howard/tmp"', '# Current work directory': '"tmp": ".tmp"'}}}, 'duckdb_settings': {'metavar': 'duckDB settings', 'help': 'DuckDB settings (see duckDB doc) as JSON (string or file).\\nThese settings have priority (see options \\'threads\\', \\'tmp\\'...).\\nExamples: \\'{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}\\'.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'examples': {'DuckDB settings JSON file': '"duckdb_settings": "/path/to/duckdb_config.json"', 'JSON string for Time zone, temporary directory and threads for duckDB': '"duckdb_settings": {\\n "TimeZone": "GMT",\\n "temp_directory": "/tmp/duckdb",\\n "threads": 8\\n}'}}}, 'verbosity': {'metavar': 'verbosity', 'help': 'Verbosity level\\nAvailable: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\\n- DEBUG: Detailed information, typically of interest only when diagnosing problems.\\n- INFO: Confirmation that things are working as expected.\\n- WARNING: An indication that something unexpected happened.\\n- ERROR: Due to a more serious problem.\\n- CRITICAL: A serious error.\\n- FATAL: A fatal error.\\n- NOTSET: All messages.\\n', 'required': False, 'choices': ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET', 'WARN', 'FATAL'], 'default': 'INFO', 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Default verbosity': '"verbosity": "INFO"', 'ERROR level (quiet mode)': '"verbosity": "ERROR"', 'For debug': '"verbosity": "DEBUG"'}}}, 'access': {'metavar': 'access mode', 'help': "Access mode to variants file or database.\\nEither 'RW' for Read and Write, or 'RO' for Read Only.\\n", 'default': 'RW', 'type': <class 'str'>, 'choices': ['RW', 'RO'], 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Read and Write mode': '"access": "RW"', 'Read only mode': '"access": "RO"'}}}, 'log': {'metavar': 'log', 'help': "Logs file\\n(e.g. 'my.log').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}, 'extra': {'examples': {'Relative path to log file': '"log": "my.log"', '# HOWARD work directory': '"log": "~/howard/log"', 'Full path to log file': '"log": "/tmp/my.log"'}}}, 'interactive': {'help': 'Interative mose..\\n', 'action': 'store_true', 'default': False}, 'quiet': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'verbose': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'debug': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'databases_folder': {'help': 'Path of HOWARD database folder.\\n', 'type': <class 'str'>, 'default': '/Users/lebechea/howard/databases'}, 'database': {'help': 'Which database to update.\\n', 'type': <class 'str'>, 'default': 'clinvar', 'choices': ['clinvar']}, 'update_config': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>}, 'current_folder': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>, 'default': 'current'}, 'add_variants_view': {'help': 'Create a sheet with all INFO fields exploded.\\n', 'action': 'store_true', 'default': False}, 'add_header': {'help': 'Create a sheet with all INFO fields header descritions.\\n', 'action': 'store_true', 'default': False}, 'transcripts_expected': {'metavar': 'List of transcripts (file)', 'help': 'File with a list of transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'transcripts_missing': {'metavar': 'List of missing transcripts (file)', 'help': 'File with a list of missing transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'genebe_use_refseq': {'help': 'Use refSeq to annotate (default).\\n', 'action': 'store_true', 'default': False}, 'genebe_use_ensembl': {'help': 'Use Ensembl to annotate.\\n', 'action': 'store_true', 'default': False}, 'not_flatten_consequences': {'help': 'Use exploded annotation informations.\\n', 'action': 'store_true', 'default': False}, 'minimalize_info': {'help': "Minimalize INFO field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_id': {'help': "Minimalize ID field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_qual': {'help': "Minimalize QUAL field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_filter': {'help': "Minimalize FILTER field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_samples': {'help': "Minimalize samples to keep only genotypes (i.e. 'GT').\\n", 'action': 'store_true', 'default': False}, 'remove_samples': {'help': 'Remove all samples to keep only variants.\\n', 'action': 'store_true', 'default': False}}"}, {"fullname": "howard.tools.tools.shared_arguments", "modulename": "howard.tools.tools", "qualname": "shared_arguments", "kind": "variable", "doc": "

    \n", "default_value": "['config', 'threads', 'memory', 'chunk_size', 'tmp', 'duckdb_settings', 'interactive', 'verbosity', 'log', 'quiet', 'verbose', 'debug']"}, {"fullname": "howard.tools.tools.commands_arguments", "modulename": "howard.tools.tools", "qualname": "commands_arguments", "kind": "variable", "doc": "

    \n", "default_value": "{'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \\'A\\' AND POS < 100000" \\n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\\' \\n howard query --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, \\\\"INFO/Interpro_domain\\\\" FROM \\'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\\' WHERE \\\\"INFO/Interpro_domain\\\\" NOT NULL ORDER BY \\\\"INFO/SiPhy_29way_logOdds_rankscore\\\\" DESC LIMIT 10" \\n howard query --explode_infos --explode_infos_prefix=\\'INFO/\\' --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, STRING_AGG(INFO, \\';\\') AS INFO FROM \\'tests/databases/annotations/current/hg19/*.parquet\\' GROUP BY \\\\"#CHROM\\\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \\n howard query --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \\'pathogenic\\'" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \\'./.\\'" --samples="sample2" \\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\\n howard stats --input=tests/data/example.vcf.gz \\n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \\n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\\'CLNSIG,SIFT,DP\\' --order_by=\\'CLNSIG DESC, DP DESC\\' \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\\'INFO/\\' --explode_infos_fields=\\'CLNSIG,SIFT,DP,*\\' --order_by=\\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\\' --include_header \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\\n', 'epilog': 'Usage examples:\\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\\n', 'epilog': "Usage examples:\\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \\n howard calculation --show_calculations \\n \\n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\\n- annotates genetic variants with multiple annotation databases/files and tools\\n- calculates and normalizes annotations\\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\\n- translates into various formats\\n- query genetic variants and annotations\\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \\n howard process --input=tests/data/example.vcf.gz --annotations=\\'snpeff\\' --calculations=\\'snpeff_hgvs\\' --prioritizations=\\'default\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --hgvs_options=\\'full_format,use_exon\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\\'full_format,use_exon\\' --annotations=\\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\\' --calculations=\\'NOMEN\\' --explode_infos --query=\\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \\'gnomad_AF\\', ClinPred_score, ClinPred_pred FROM variants\\' \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \\n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \\n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \\n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \\n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \\n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \\n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \\n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \\n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \\n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \\n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \\n\\nNotes:\\n - Downloading databases can take a while, depending on network, threads and memory\\n - Proxy: Beware of network and proxy configuration\\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\\n \\n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \\n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \\n \\n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \\n \\n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\\n \\n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}"}, {"fullname": "howard.tools.tools.arguments_dict", "modulename": "howard.tools.tools", "qualname": "arguments_dict", "kind": "variable", "doc": "

    \n", "default_value": "{'arguments': {'input': {'metavar': 'input', 'help': 'Input file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output': {'metavar': 'output', 'help': 'Output file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'param': {'metavar': 'param', 'help': 'Parameters JSON file (or string) defines parameters to process \\nannotations, calculations, prioritizations, convertions and queries.\\n', 'default': '{}', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '', 'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'query': {'metavar': 'query', 'help': "Query in SQL format\\n(e.g. 'SELECT * FROM variants LIMIT 50').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': 'SELECT * FROM variants'}}, 'extra': {'param_section': 'query'}}, 'filter': {'metavar': 'filter', 'help': "Filter variant using SQL format\\n(e.g. 'POS < 100000').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'samples': {'metavar': 'samples', 'help': "List of samples\\n(e.g. 'sample1,sample2').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'output_query': {'metavar': 'output', 'help': 'Output Query file.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'All files (*)|*'}}}, 'annotations': {'metavar': 'annotations', 'help': "Annotation with databases files, or with tools,\\nas a list of files in Parquet, VCF, BED, or keywords\\n (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\\n- For a Parquet/VCF/BED, use file paths\\n (e.g. 'file1.parquet,file2.vcf.gz').\\n- For BCFTools annotation, use keyword 'bcftools' with file paths\\n (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\\n- For Parquet annotation, use keyword 'parquet' with file paths\\n (e.g. 'parquet:file.parquet').\\n- For Annovar annotation, use keyword 'annovar' with annovar code\\n (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\\n- For snpeff annotation, use keyword 'snpeff' with options\\n (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\\n- For snpSift annotation, use keyword 'snpsift' with file paths\\n (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\\n- For Exomiser annotation, use keyword 'exomiser' with options as key=value\\n (e.g. 'exomiser:preset=exome:transcript_source=refseq').\\n- For add all availalbe databases files, use 'ALL' keyword,\\n with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\\n (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotations": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotations": "ALL:parquet:latest"', 'Annotation with BCFTools': '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotations": "annovar:refGene:cosmic70"', 'Annotation with snpEff (default options)': '"annotations": "snpeff"', 'Annotation with snpEff (with options)': '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', 'Annotation with snpSift': '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Exomiser with options': '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', 'Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)': '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"'}}}, 'annotation_parquet': {'metavar': 'annotation parquet', 'help': "Annotation with Parquet method, as a list of files in Parquet, VCF or BED\\n (e.g. 'file1.parquet,file2.vcf.gz').\\nFor add all availalbe databases files, use 'ALL' keyword,\\n with filters on type and release\\n (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotation_parquet": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotation_parquet": "ALL:parquet:latest"'}}}, 'annotation_bcftools': {'metavar': 'annotation BCFTools', 'help': "Annotation with BCFTools, as a list of files VCF or BED\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with BCFTools': '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_snpeff': {'metavar': 'annotation snpEff', 'help': "Annotation with snpEff, with options\\n (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'options', 'examples': {'Annotation with snpEff (default options)': '"annotation_snpeff": ""', 'Annotation with snpEff (with options)': '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"'}}}, 'annotation_snpsift': {'metavar': 'annotation snpSift', 'help': "Annotation with snpSift, as a list of files VCF\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with snpSift': '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_annovar': {'metavar': 'annotation Annovar', 'help': "Annotation with Annovar, as a list of database keywords\\n (e.g. 'refGene', 'refGene:cosmic70').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'keyword[:keyword]*', 'examples': {'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotation_annovar": "refGene:cosmic70"'}}}, 'annotation_exomiser': {'metavar': 'annotation Exomiser', 'help': "Annotation with Exomiser, as a list of options\\n (e.g. 'preset=exome:transcript_source=refseq').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Exomiser with options': '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"'}}}, 'annotation_splice': {'metavar': 'annotation Splice', 'help': "Annotation with Splice, as a list of options\\n (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Splice with options': '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"'}}}, 'annotations_update': {'help': 'Update option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be removed and re-annotated.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Update annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'annotations_append': {'help': 'Append option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be annotated only if not annotation exists for the variant.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Append annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'calculations': {'metavar': 'operations', 'help': "Quick calculations on genetic variants information and genotype information,\\nas a list of operations (e.g. 'VARTYPE,variant_id').\\nList of available calculations by default\\n (unsensitive case, see doc for more information):\\n VARTYPE snpeff_hgvs FINDBYPIPELINE GENOTYPECONCORDANCE BARCODE TRIO VAF VAF_STATS DP_STATS \\n", 'default': None, 'type': <class 'str'>}, 'prioritizations': {'metavar': 'prioritisations', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>, 'extra': {'examples': {'Prioritization profile by default': '"prioritization": "default" ', 'Prioritization profile by default and GERMLINE from Configuration JSON file': '"prioritization": "default,GERMLINE" '}}}, 'prioritization_config': {'metavar': 'prioritization config', 'help': 'Prioritization configuration JSON file (defines profiles, see doc).\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'prioritization', 'examples': {'Prioritization configuration JSON file as an option': '"prioritization_config": "prioritization_config.json" '}}}, 'profiles': {'metavar': 'profiles', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>}, 'default_profile': {'metavar': 'default profile', 'help': 'Prioritization profile by default (see doc).\\nDefault is the first profile in the list of prioritization profiles.\\n', 'default': None, 'type': <class 'str'>}, 'pzfields': {'metavar': 'pzfields', 'help': 'Prioritization fields to provide (see doc).\\nAvailable: PZScore, PZFlag, PZTags, PZComment, PZInfos\\n', 'default': 'PZScore,PZFlag', 'type': <class 'str'>}, 'prioritization_score_mode': {'metavar': 'prioritization score mode', 'help': 'Prioritization Score mode (see doc).\\nAvailable: HOWARD (increment score), VaRank (max score)\\n', 'default': 'HOWARD', 'type': <class 'str'>, 'choices': ['HOWARD', 'VaRank'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'query_limit': {'metavar': 'query limit', 'help': 'Limit of number of row for query (only for print result, not output).\\n', 'default': 10, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 10000, 'increment': 10}}}, 'query_print_mode': {'metavar': 'print mode', 'help': "Print mode of query result (only for print result, not output).\\nEither None (native), 'markdown', 'tabulate' or disabled.\\n", 'choices': [None, 'markdown', 'tabulate', 'disabled'], 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'explode_infos': {'help': "Explode VCF INFO/Tag into 'variants' table columns.\\n", 'action': 'store_true', 'default': False}, 'explode_infos_prefix': {'metavar': 'explode infos prefix', 'help': 'Explode VCF INFO/Tag with a specific prefix.\\n', 'default': '', 'type': <class 'str'>}, 'explode_infos_fields': {'metavar': 'explode infos list', 'help': "Explode VCF INFO/Tag specific fields/tags.\\nKeyword `*` specify all available fields, except those already specified.\\nPattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\\nExamples:\\n- 'HGVS,SIFT,Clinvar' (list of fields)\\n- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\\n- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\\n- 'HGVS,.*_score,*' (1 field, scores, all other fields)\\n- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\\n", 'default': '*', 'type': <class 'str'>}, 'include_header': {'help': 'Include header (in VCF format) in output file.\\nOnly for compatible formats (tab-delimiter format as TSV or BED).\\n', 'action': 'store_true', 'default': False}, 'order_by': {'metavar': 'order by', 'help': "List of columns to sort the result-set in ascending or descending order.\\nUse SQL format, and keywords ASC (ascending) and DESC (descending).\\nIf a column is not available, order will not be considered.\\nOrder is enable only for compatible format (e.g. TSV, CSV, JSON).\\nExamples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\\n", 'default': '', 'type': <class 'str'>, 'extra': {'examples': {'Order by ACMG score in descending order': '"order_by": "ACMG_score DESC" ', 'Order by PZFlag and PZScore in descending order': '"order_by": "PZFlag DESC, PZScore DESC" '}}}, 'parquet_partitions': {'metavar': 'parquet partitions', 'help': "Parquet partitioning using hive (available for any format).\\nThis option is faster parallel writing, but memory consuming.\\nUse 'None' (string) for NO partition but split parquet files into a folder.\\nExamples: '#CHROM', '#CHROM,REF', 'None'.\\n", 'default': None, 'type': <class 'str'>}, 'input_annovar': {'metavar': 'input annovar', 'help': "Input Annovar file path.\\nFormat file must be a Annovar TXT file, associated with '.idx'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output_annovar': {'metavar': 'output annovar', 'help': "Output Annovar file path.\\nFormat file must be either VCF compressesd file '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'annovar_code': {'metavar': 'Annovar code', 'help': 'Annovar code, or database name.\\nUsefull to name databases columns.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'annovar_to_parquet': {'metavar': 'to parquet', 'help': 'Parquet file conversion.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.parquet)|*.parquet'}}}, 'annovar_multi_variant': {'metavar': 'Annovar multi variant', 'help': "Variant with multiple annotation lines on Annovar file.\\nEither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'annovar_reduce_memory': {'metavar': 'reduce memory', 'help': "Reduce memory option for Annovar convert,\\neither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'input_extann': {'metavar': 'input extann', 'help': 'Input Extann file path.\\nFormat file must be a Extann TXT file or TSV file.\\nFile need to have at least the genes column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|All files (*)|*'}}}, 'output_extann': {'metavar': 'output extann', 'help': 'Output Extann file path.\\nOutput extann file, should be BED or BED.gz.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'mode_extann': {'metavar': 'mode extann', 'help': 'Mode extann selection.\\nHow to pick transcript from ncbi, keep all,\\nkeep the longest, or keep the chosen one (transcript_extann).\\n', 'required': False, 'default': 'longest', 'choices': ['all', 'longest', 'chosen'], 'type': <class 'str'>}, 'param_extann': {'metavar': 'param extann', 'help': "Param extann file path.\\nParam containing configuration, options to replace chars and\\nbedlike header description, conf vcf specs.\\n(e.g. '~/howard/config/param.extann.json')\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file format|*.tsv|'}}}, 'calculation_config': {'metavar': 'calculation config', 'help': 'Calculation configuration JSON file.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation', 'examples': {'Calculation configuration JSON file as an option': '"calculation_config": "calculation_config.json" '}}}, 'show_calculations': {'help': 'Show available calculation operations.\\n', 'action': 'store_true', 'default': False}, 'hgvs_field': {'metavar': 'HGVS field', 'help': 'HGVS INFO/tag containing a list o HGVS annotations.\\n', 'default': 'hgvs', 'type': <class 'str'>, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'transcripts': {'metavar': 'transcripts', 'help': 'Transcripts TSV file,\\nwith Transcript in first column, optional Gene in second column.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'trio_pedigree': {'metavar': 'trio pedigree', 'help': 'Pedigree Trio for trio inheritance calculation.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3\\' for father, mother and child,\\n \\'{"father": "sample1", "mother": "sample2", "child": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:TRIO'}}, 'family_pedigree': {'metavar': 'family pedigree', 'help': 'Pedigree family for barcode calculation on genotype.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3,sample4\\',\\n \\'{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:BARCODEFAMILY'}}, 'stats_md': {'metavar': 'stats markdown', 'help': 'Stats Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'Markdown file (*.md)|*.md'}}, 'extra': {'examples': {'Export statistics in Markdown format': '"stats_md": "/tmp/stats.md" '}}}, 'stats_json': {'metavar': 'stats json', 'help': 'Stats Output file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}, 'extra': {'examples': {'Export statistics in JSON format': '"stats_json": "/tmp/stats.json" '}}}, 'assembly': {'metavar': 'assembly', 'help': "Genome Assembly (e.g. 'hg19', 'hg38').\\n", 'required': False, 'default': 'hg19', 'type': <class 'str'>, 'extra': {'examples': {'Default assembly for all analysis tools': '"assembly": "hg19" ', 'List of assemblies for databases download tool': '"assembly": "hg19,hg38" '}}}, 'genome': {'metavar': 'genome', 'help': "Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\\n", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current/hg19/hg19.fa', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*'}}}, 'hgvs_options': {'metavar': 'HGVS options', 'help': "Quick HGVS annotation options.\\nThis option will skip all other hgvs options.\\nExamples:\\n- 'default' (for default options)\\n- 'full_format' (for full format HGVS annotation)\\n- 'use_gene=True:add_protein=true:codon_type=FULL'\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'use_gene': {'help': "Use Gene information to generate HGVS annotation\\n(e.g. 'NM_152232(TAS1R2):c.231T>C')", 'action': 'store_true', 'default': False}, 'use_exon': {'help': "Use Exon information to generate HGVS annotation\\n(e.g. 'NM_152232(exon2):c.231T>C').\\nOnly if 'use_gene' is not enabled.\\n", 'action': 'store_true', 'default': False}, 'use_protein': {'help': "Use Protein level to generate HGVS annotation\\n(e.g. 'NP_689418:p.Cys77Arg').\\nCan be used with 'use_exon' or 'use_gene'.\\n", 'action': 'store_true', 'default': False}, 'add_protein': {'help': "Add Protein level to DNA HGVS annotation (e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'full_format': {'help': "Generates HGVS annotation in a full format\\nby using all information to generates an exhaustive annotation\\n(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\\nUse 'use_exon' to add exon information\\n(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'use_version': {'help': "Generates HGVS annotation with transcript version\\n(e.g. 'NM_152232.1:c.231T>C').\\n", 'action': 'store_true', 'default': False}, 'codon_type': {'metavar': 'Codon type', 'help': "Amino Acide Codon format type to use to generate HGVS annotation.\\nAvailable:\\n- '1': codon in 1 character (e.g. 'C', 'R')\\n- '3': codon in 3 character (e.g. 'Cys', 'Arg')\\n-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\\n", 'required': False, 'default': '3', 'type': <class 'str'>, 'choices': ['1', '3', 'FULL'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'refgene': {'metavar': 'refGene', 'help': 'Path to refGene annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGene annotation file'}}}, 'refseqlink': {'metavar': 'refSeqLink', 'help': 'Path to refSeqLink annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGeneLink annotation file'}}}, 'refseq-folder': {'metavar': 'refseq folder', 'help': 'Folder containing refSeq files.\\n', 'required': False, 'default': '/Users/lebechea/howard/databases/refseq/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'message': 'Path to refGenefolder'}}}, 'download-genomes': {'metavar': 'genomes', 'help': "Path to genomes folder\\nwith Fasta files, indexes,\\nand all files generated by pygenome module.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to genomes folder'}}}, 'download-genomes-provider': {'metavar': 'genomes provider', 'help': 'Download Genome from an external provider.\\nAvailable: GENCODE, Ensembl, UCSC, NCBI.\\n', 'required': False, 'default': 'UCSC', 'type': <class 'str'>, 'choices': ['GENCODE', 'Ensembl', 'UCSC', 'NCBI'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'download-genomes-contig-regex': {'metavar': 'genomes contig regex', 'help': "Regular expression to select specific chromosome\\n(e.g 'chr[0-9XYM]+$').\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar': {'metavar': 'Annovar', 'help': "Path to Annovar databases\\n(e.g. '/Users/lebechea/howard/databases/annovar/current').\\n", 'required': False, 'type': <howard.tools.tools.PathType object>, 'default': None, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Annovar databases folder'}}}, 'download-annovar-files': {'metavar': 'Annovar code', 'help': "Download Annovar databases for a list of Annovar file code (see Annovar Doc).\\nUse None to donwload all available files,\\nor Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\\nNote that refGene will at least be downloaded,\\nand only files that not already exist or changed will be downloaded.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar-url': {'metavar': 'Annovar url', 'help': 'Annovar databases URL (see Annovar Doc).\\n', 'required': False, 'default': 'http://www.openbioinformatics.org/annovar/download', 'type': <class 'str'>}, 'download-snpeff': {'metavar': 'snpEff', 'help': 'Download snpEff databases within snpEff folder', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to snpEff databases folder'}}}, 'download-refseq': {'metavar': 'refSeq', 'help': "Path to refSeq databases\\n(e.g. '/Users/lebechea/howard/databases/refseq/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to refGene files folder'}}}, 'download-refseq-url': {'metavar': 'refSeq url', 'help': "refSeq databases URL (see refSeq WebSite)\\n(e.g. 'http://hgdownload.soe.ucsc.edu/goldenPath')\u2022/n", 'required': False, 'default': 'http://hgdownload.soe.ucsc.edu/goldenPath', 'type': <class 'str'>}, 'download-refseq-prefix': {'metavar': 'refSeq prefix', 'help': 'Check existing refSeq files in refSeq folder.\\n', 'required': False, 'default': 'ncbiRefSeq', 'type': <class 'str'>}, 'download-refseq-files': {'metavar': 'refSeq files', 'help': 'List of refSeq files to download.\\n', 'required': False, 'default': 'ncbiRefSeq.txt,ncbiRefSeqLink.txt', 'type': <class 'str'>}, 'download-refseq-format-file': {'metavar': 'refSeq format file', 'help': "Name of refSeq file to convert in BED format\\n(e.g. 'ncbiRefSeq.txt').\\nProcess only if not None.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-refseq-include-utr5': {'help': "Formating BED refSeq file including 5'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-utr3': {'help': "Formating BED refSeq file including 3'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-chrM': {'help': "Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-non-canonical-chr': {'help': 'Formating BED refSeq file including non canonical chromosomes.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-non-coding-transcripts': {'help': 'Formating BED refSeq file including non coding transcripts.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-transcript-version': {'help': 'Formating BED refSeq file including transcript version.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp': {'metavar': 'dbNSFP', 'help': "Download dbNSFP databases within dbNSFP folder(e.g. '/Users/lebechea/howard/databases').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbNSFP databases folder'}}}, 'download-dbnsfp-url': {'metavar': 'dbNSFP url', 'help': "Download dbNSFP databases URL (see dbNSFP website)\\n(e.g. https://dbnsfp.s3.amazonaws.com').\\n", 'required': False, 'default': 'https://dbnsfp.s3.amazonaws.com', 'type': <class 'str'>}, 'download-dbnsfp-release': {'metavar': 'dnNSFP release', 'help': "Release of dbNSFP to download (see dbNSFP website)\\n(e.g. '4.4a').\\n", 'required': False, 'default': '4.4a'}, 'download-dbnsfp-parquet-size': {'metavar': 'dbNSFP parquet size', 'help': 'Maximum size (Mb) of data files in Parquet folder.\\nParquet folder are partitioned (hive) by chromosome (sub-folder),\\nwhich contain N data files.\\n', 'required': False, 'default': 100, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000, 'increment': 10}}}, 'download-dbnsfp-subdatabases': {'help': 'Generate dbNSFP sub-databases.\\ndbNSFP provides multiple databases which are split onto multiple columns.\\nThis option create a Parquet folder for each sub-database (based on columns names).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-parquet': {'help': 'Generate a Parquet file for each Parquet folder.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-vcf': {'help': 'Generate a VCF file for each Parquet folder.\\nNeed genome FASTA file (see --download-genome).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-no-files-all': {'help': "Not generate database Parquet/VCF file for the entire database ('ALL').\\nOnly sub-databases files will be generated.\\n(see '--download-dbnsfp-subdatabases').\\n", 'action': 'store_true', 'default': False}, 'download-dbnsfp-add-info': {'help': 'Add INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-only-info': {'help': 'Add only INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nDecrease memory and space during generation of files.\\nIncrease time for partial annotation (some available columns).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-uniquify': {'help': 'Uniquify values within column\\n(e.g. "D,D" to "D", "D,.,T" to "D,T").\\nRemove transcripts information details.\\nUsefull to reduce size of the database.\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-row-group-size': {'metavar': 'dnNSFP row grooup size', 'help': 'Minimum number of rows in a parquet row group (see duckDB doc).\\nLower can reduce memory usage and slightly increase space during generation,\\nspeed up highly selective queries, slow down whole file queries (e.g. aggregations).\\n', 'required': False, 'default': 100000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}}, 'download-alphamissense': {'metavar': 'AlphaMissense', 'help': 'Path to AlphaMissense databases', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Alphamissense databases folder'}}}, 'download-alphamissense-url': {'metavar': 'AlphaMissense url', 'help': "Download AlphaMissense databases URL (see AlphaMissense website)\\n(e.g. 'https://storage.googleapis.com/dm_alphamissense').\\n", 'required': False, 'default': 'https://storage.googleapis.com/dm_alphamissense', 'type': <class 'str'>}, 'download-exomiser': {'metavar': 'Exomiser', 'help': 'Path to Exomiser databases\\n(e.g. /Users/lebechea/howard/databases/exomiser/current).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Exomiser databases folder'}}}, 'download-exomiser-application-properties': {'metavar': 'Exomiser application properties', 'help': "Exomiser Application Properties configuration file (see Exomiser website).\\nThis file contains configuration settings for the Exomiser tool.\\nIf this parameter is not provided, the function will attempt to locate\\nthe application properties file automatically based on the Exomiser.\\nConfiguration information will be used to download expected releases (if no other parameters).\\nCADD and REMM will be downloaded only if 'path' are provided.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'options': {'default_dir': '/Users/lebechea/howard/databases/exomiser/current', 'message': 'Path to Exomiser application properties file'}}}}, 'download-exomiser-url': {'metavar': 'Exomiser url', 'help': "URL where Exomiser database files can be downloaded from\\n(e.g. 'http://data.monarchinitiative.org/exomiser').\\n", 'required': False, 'default': 'http://data.monarchinitiative.org/exomiser', 'type': <class 'str'>}, 'download-exomiser-release': {'metavar': 'Exomiser release', 'help': 'Release of Exomiser data to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\nIf not provided (None), from Application Properties file (Exomiser data-version) \\nor default \\'2109\\'.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-phenotype-release': {'metavar': 'Exomiser phenoptye release', 'help': 'Release of Exomiser phenotype to download.\\nIf not provided (None), from Application Properties file (Exomiser Phenotype data-version)\\nor Exomiser release.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-release': {'metavar': 'Exomiser remm release', 'help': 'Release of ReMM (Regulatory Mendelian Mutation) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-url': {'metavar': 'Exomiser remm url', 'help': "URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/ReMM').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/ReMM', 'type': <class 'str'>}, 'download-exomiser-cadd-release': {'metavar': 'Exomiser cadd release', 'help': 'Release of CADD (Combined Annotation Dependent Depletion) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-cadd-url': {'metavar': 'Exomiser cadd url', 'help': "URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/CADD').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/CADD', 'type': <class 'str'>}, 'download-exomiser-cadd-url-snv-file': {'metavar': 'Exomiser url snv file', 'help': 'Name of the file containing the SNV (Single Nucleotide Variant) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'whole_genome_SNVs.tsv.gz', 'type': <class 'str'>}, 'download-exomiser-cadd-url-indel-file': {'metavar': 'Exomiser cadd url indel', 'help': 'Name of the file containing the INDEL (Insertion-Deletion) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'InDels.tsv.gz', 'type': <class 'str'>}, 'download-dbsnp': {'metavar': 'dnSNP', 'help': "Path to dbSNP databases\\n(e.g. '/Users/lebechea/howard/databases/exomiser/dbsnp').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbSNP databases folder'}}}, 'download-dbsnp-releases': {'metavar': 'dnSNP releases', 'help': "Release of dbSNP to download\\n(e.g. 'b152', 'b152,b156').\\n", 'required': False, 'default': 'b156', 'type': <class 'str'>}, 'download-dbsnp-release-default': {'metavar': 'dnSNP release default', 'help': "Default Release of dbSNP ('default' symlink)\\n(e.g. 'b156').\\nIf None, first release to download will be assigned as default\\nonly if it does not exists.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url': {'metavar': 'dbSNP url', 'help': "URL where dbSNP database files can be downloaded from.\\n(e.g. 'https://ftp.ncbi.nih.gov/snp/archive').\\n", 'required': False, 'default': 'https://ftp.ncbi.nih.gov/snp/archive', 'type': <class 'str'>}, 'download-dbsnp-url-files': {'metavar': 'dbSNP url files', 'help': 'Dictionary that maps assembly names to specific dbSNP URL files.\\nIt allows you to provide custom dbSNP URL files for specific assemblies\\ninstead of using the default file naming convention.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url-files-prefix': {'metavar': 'dbSNP url files prefix', 'help': 'String that represents the prefix of the dbSNP file name for a specific assembly.\\nIt is used to construct the full URL of the dbSNP file to be downloaded.\\n', 'required': False, 'default': 'GCF_000001405', 'type': <class 'str'>}, 'download-dbsnp-assemblies-map': {'metavar': 'dbSNP assemblies map', 'help': 'dictionary that maps assembly names to their corresponding dbSNP versions.\\nIt is used to construct the dbSNP file name based on the assembly name.\\n', 'required': False, 'default': {'hg19': '25', 'hg38': '40'}, 'type': <class 'str'>, 'gooey': {'options': {'initial_value': '{"hg19": "25", "hg38": "40"}'}}}, 'download-dbsnp-vcf': {'help': 'Generate well-formatted VCF from downloaded file:\\n- Add and filter contigs associated to assembly\\n- Normalize by splitting multiallelics\\n- Need genome (see --download-genome)\\n', 'action': 'store_true', 'default': False}, 'download-dbsnp-parquet': {'help': 'Generate Parquet file from VCF.\\n', 'action': 'store_true', 'default': False}, 'convert-hgmd': {'metavar': 'HGMD', 'help': 'Convert HGMD databases.\\nFolder where the HGMD databases will be stored.\\nFields in VCF, Parquet and TSV will be generated.\\nIf the folder does not exist, it will be created.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}}, 'convert-hgmd-file': {'metavar': 'HGMD file', 'help': "File from HGMD.\\nName format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser'}}, 'convert-hgmd-basename': {'metavar': 'HGMD basename', 'help': "File output basename.\\nGenerated files will be prefixed by basename\\n(e.g. 'HGMD_Pro_MY_RELEASE')\\nBy default (None), input file name without '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'generate-param': {'metavar': 'param', 'help': 'Parameter file (JSON) with all databases found.\\nDatabases folders scanned are defined in config file.\\nStructure of databases follow this structure (see doc):\\n.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-description': {'metavar': 'param description', 'help': 'Description file (JSON) with all databases found.\\nContains all databases with description of format, assembly, fields...\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-releases': {'metavar': 'param release', 'help': "List of database folder releases to check\\n(e.g. 'current', 'latest').\\n", 'required': False, 'default': 'current', 'type': <class 'str'>}, 'generate-param-formats': {'metavar': 'param formats', 'help': "List of database formats to check\\n(e.g. 'parquet', 'parquet,vcf,bed,tsv').\\n", 'required': False, 'default': 'parquet', 'type': <class 'str'>}, 'generate-param-bcftools': {'help': "Generate parameter JSON file with BCFTools annotation for allowed formats\\n(i.e. 'vcf', 'bed').\\n", 'action': 'store_true', 'default': False}, 'help_md': {'metavar': 'help markdown', 'help': 'Help Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.md)|*.md'}}}, 'help_html': {'metavar': 'help html', 'help': 'Help Output file in HTML format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.html)|*.html'}}}, 'help_pdf': {'metavar': 'help pdf', 'help': 'Help Output file in PDF format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'PDF file (*.pdf)|*.pdf'}}}, 'help_json_input': {'metavar': 'help JSON input', 'help': 'Help input file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'help_md_input': {'metavar': 'help MarkDown input', 'help': 'Help input file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'MarkDown file (*.md)|*.md|All files (*)|*'}}}, 'code_type': {'metavar': 'example code type', 'help': "Help example code type for input JSON format\\n(e.g. 'json', 'bash').\\n", 'required': False, 'default': '', 'type': <class 'str'>}, 'help_json_input_title': {'metavar': 'help JSON input title', 'help': 'Help JSON input title.\\n', 'required': False, 'default': 'Help', 'type': <class 'str'>}, 'genomes-folder': {'metavar': 'genomes', 'help': "Folder containing genomes.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current'", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/genomes/current', 'message': 'Path to genomes databases folder'}}}, 'config': {'metavar': 'config', 'help': 'Configuration JSON file defined default configuration regarding \\nresources (e.g. threads, memory),\\nsettings (e.g. verbosity, temporary files),\\ndefault folders (e.g. for databases)\\nand paths to external tools.\\n', 'required': False, 'default': '{}', 'type': <class 'str'>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '{}'}}}, 'threads': {'metavar': 'threads', 'help': 'Specify the number of threads to use for processing HOWARD.\\nIt determines the level of parallelism,\\neither on python scripts, duckdb engine and external tools.\\nIt and can help speed up the process/tool.\\nUse -1 to use all available CPU/cores.\\nEither non valid value is 1 CPU/core.\\n', 'required': False, 'type': <class 'int'>, 'default': -1, 'gooey': {'widget': 'IntegerField', 'options': {'min': -1, 'max': 1000, 'increment': 1}}, 'extra': {'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'memory': {'metavar': 'memory', 'help': "Specify the memory to use in format FLOAT[kMG]\\n(e.g. '8G', '12.42G', '1024M').\\nIt determines the amount of memory for duckDB engine and external tools\\n(especially for JAR programs).\\nIt can help to prevent 'out of memory' failures.\\nBy default (None) is 80%% of RAM (for duckDB).\\n", 'required': False, 'type': <class 'str'>, 'default': None, 'extra': {'format': 'FLOAT[kMG]', 'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'chunk_size': {'metavar': 'chunk size', 'help': 'Number of records in batch to export output file.\\nThe lower the chunk size, the less memory consumption.\\nFor Parquet partitioning, files size will depend on the chunk size.\\n', 'required': False, 'default': 1000000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}, 'extra': {'examples': {'Chunk size of 1.000.000 by default': '"chunk_size": 1000000', 'Smaller chunk size to reduce Parquet file size and memory usage': '"chunk_size": 100000'}}}, 'tmp': {'metavar': 'Temporary folder', 'help': "Temporary folder (e.g. '/tmp').\\nBy default, '.tmp' for duckDB (see doc),external tools and python scripts.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}, 'extra': {'examples': {'# System temporary folder': '"tmp": "/tmp"', '# HOWARD work directory': '"tmp": "~/howard/tmp"', '# Current work directory': '"tmp": ".tmp"'}}}, 'duckdb_settings': {'metavar': 'duckDB settings', 'help': 'DuckDB settings (see duckDB doc) as JSON (string or file).\\nThese settings have priority (see options \\'threads\\', \\'tmp\\'...).\\nExamples: \\'{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}\\'.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'examples': {'DuckDB settings JSON file': '"duckdb_settings": "/path/to/duckdb_config.json"', 'JSON string for Time zone, temporary directory and threads for duckDB': '"duckdb_settings": {\\n "TimeZone": "GMT",\\n "temp_directory": "/tmp/duckdb",\\n "threads": 8\\n}'}}}, 'verbosity': {'metavar': 'verbosity', 'help': 'Verbosity level\\nAvailable: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\\n- DEBUG: Detailed information, typically of interest only when diagnosing problems.\\n- INFO: Confirmation that things are working as expected.\\n- WARNING: An indication that something unexpected happened.\\n- ERROR: Due to a more serious problem.\\n- CRITICAL: A serious error.\\n- FATAL: A fatal error.\\n- NOTSET: All messages.\\n', 'required': False, 'choices': ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET', 'WARN', 'FATAL'], 'default': 'INFO', 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Default verbosity': '"verbosity": "INFO"', 'ERROR level (quiet mode)': '"verbosity": "ERROR"', 'For debug': '"verbosity": "DEBUG"'}}}, 'access': {'metavar': 'access mode', 'help': "Access mode to variants file or database.\\nEither 'RW' for Read and Write, or 'RO' for Read Only.\\n", 'default': 'RW', 'type': <class 'str'>, 'choices': ['RW', 'RO'], 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Read and Write mode': '"access": "RW"', 'Read only mode': '"access": "RO"'}}}, 'log': {'metavar': 'log', 'help': "Logs file\\n(e.g. 'my.log').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}, 'extra': {'examples': {'Relative path to log file': '"log": "my.log"', '# HOWARD work directory': '"log": "~/howard/log"', 'Full path to log file': '"log": "/tmp/my.log"'}}}, 'interactive': {'help': 'Interative mose..\\n', 'action': 'store_true', 'default': False}, 'quiet': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'verbose': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'debug': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'databases_folder': {'help': 'Path of HOWARD database folder.\\n', 'type': <class 'str'>, 'default': '/Users/lebechea/howard/databases'}, 'database': {'help': 'Which database to update.\\n', 'type': <class 'str'>, 'default': 'clinvar', 'choices': ['clinvar']}, 'update_config': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>}, 'current_folder': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>, 'default': 'current'}, 'add_variants_view': {'help': 'Create a sheet with all INFO fields exploded.\\n', 'action': 'store_true', 'default': False}, 'add_header': {'help': 'Create a sheet with all INFO fields header descritions.\\n', 'action': 'store_true', 'default': False}, 'transcripts_expected': {'metavar': 'List of transcripts (file)', 'help': 'File with a list of transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'transcripts_missing': {'metavar': 'List of missing transcripts (file)', 'help': 'File with a list of missing transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'genebe_use_refseq': {'help': 'Use refSeq to annotate (default).\\n', 'action': 'store_true', 'default': False}, 'genebe_use_ensembl': {'help': 'Use Ensembl to annotate.\\n', 'action': 'store_true', 'default': False}, 'not_flatten_consequences': {'help': 'Use exploded annotation informations.\\n', 'action': 'store_true', 'default': False}, 'minimalize_info': {'help': "Minimalize INFO field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_id': {'help': "Minimalize ID field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_qual': {'help': "Minimalize QUAL field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_filter': {'help': "Minimalize FILTER field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_samples': {'help': "Minimalize samples to keep only genotypes (i.e. 'GT').\\n", 'action': 'store_true', 'default': False}, 'remove_samples': {'help': 'Remove all samples to keep only variants.\\n', 'action': 'store_true', 'default': False}}, 'commands_arguments': {'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \\'A\\' AND POS < 100000" \\n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\\' \\n howard query --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, \\\\"INFO/Interpro_domain\\\\" FROM \\'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\\' WHERE \\\\"INFO/Interpro_domain\\\\" NOT NULL ORDER BY \\\\"INFO/SiPhy_29way_logOdds_rankscore\\\\" DESC LIMIT 10" \\n howard query --explode_infos --explode_infos_prefix=\\'INFO/\\' --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, STRING_AGG(INFO, \\';\\') AS INFO FROM \\'tests/databases/annotations/current/hg19/*.parquet\\' GROUP BY \\\\"#CHROM\\\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \\n howard query --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \\'pathogenic\\'" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \\'./.\\'" --samples="sample2" \\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\\n howard stats --input=tests/data/example.vcf.gz \\n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \\n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\\'CLNSIG,SIFT,DP\\' --order_by=\\'CLNSIG DESC, DP DESC\\' \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\\'INFO/\\' --explode_infos_fields=\\'CLNSIG,SIFT,DP,*\\' --order_by=\\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\\' --include_header \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\\n', 'epilog': 'Usage examples:\\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\\n', 'epilog': "Usage examples:\\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \\n howard calculation --show_calculations \\n \\n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\\n- annotates genetic variants with multiple annotation databases/files and tools\\n- calculates and normalizes annotations\\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\\n- translates into various formats\\n- query genetic variants and annotations\\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \\n howard process --input=tests/data/example.vcf.gz --annotations=\\'snpeff\\' --calculations=\\'snpeff_hgvs\\' --prioritizations=\\'default\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --hgvs_options=\\'full_format,use_exon\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\\'full_format,use_exon\\' --annotations=\\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\\' --calculations=\\'NOMEN\\' --explode_infos --query=\\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \\'gnomad_AF\\', ClinPred_score, ClinPred_pred FROM variants\\' \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \\n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \\n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \\n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \\n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \\n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \\n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \\n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \\n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \\n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \\n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \\n\\nNotes:\\n - Downloading databases can take a while, depending on network, threads and memory\\n - Proxy: Beware of network and proxy configuration\\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\\n \\n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \\n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \\n \\n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \\n \\n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\\n \\n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}, 'shared_arguments': ['config', 'threads', 'memory', 'chunk_size', 'tmp', 'duckdb_settings', 'interactive', 'verbosity', 'log', 'quiet', 'verbose', 'debug']}"}]; + /** pdoc search index */const docs = [{"fullname": "howard", "modulename": "howard", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions", "modulename": "howard.functions", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.commons", "modulename": "howard.functions.commons", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.commons.file_folder", "modulename": "howard.functions.commons", "qualname": "file_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/functions'"}, {"fullname": "howard.functions.commons.subfolder_plugins", "modulename": "howard.functions.commons", "qualname": "subfolder_plugins", "kind": "variable", "doc": "

    \n", "default_value": "'plugins'"}, {"fullname": "howard.functions.commons.folder_main", "modulename": "howard.functions.commons", "qualname": "folder_main", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD'"}, {"fullname": "howard.functions.commons.folder_config", "modulename": "howard.functions.commons", "qualname": "folder_config", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/config'"}, {"fullname": "howard.functions.commons.folder_user_home", "modulename": "howard.functions.commons", "qualname": "folder_user_home", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea'"}, {"fullname": "howard.functions.commons.folder_howard_home", "modulename": "howard.functions.commons", "qualname": "folder_howard_home", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard'"}, {"fullname": "howard.functions.commons.folder_plugins", "modulename": "howard.functions.commons", "qualname": "folder_plugins", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/plugins'"}, {"fullname": "howard.functions.commons.comparison_map", "modulename": "howard.functions.commons", "qualname": "comparison_map", "kind": "variable", "doc": "

    \n", "default_value": "{'gt': '>', 'gte': '>=', 'lt': '<', 'lte': '<=', 'equals': '=', 'contains': 'SIMILAR TO'}"}, {"fullname": "howard.functions.commons.code_type_map", "modulename": "howard.functions.commons", "qualname": "code_type_map", "kind": "variable", "doc": "

    \n", "default_value": "{'Integer': 0, 'String': 1, 'Float': 2, 'Flag': 3}"}, {"fullname": "howard.functions.commons.code_type_map_to_sql", "modulename": "howard.functions.commons", "qualname": "code_type_map_to_sql", "kind": "variable", "doc": "

    \n", "default_value": "{'Integer': 'INTEGER', 'String': 'VARCHAR', 'Float': 'FLOAT', 'Flag': 'VARCHAR'}"}, {"fullname": "howard.functions.commons.code_type_map_to_vcf", "modulename": "howard.functions.commons", "qualname": "code_type_map_to_vcf", "kind": "variable", "doc": "

    \n", "default_value": "{'INTEGER': 'Integer', 'VARCHAR': 'String', 'FLOAT': 'Float', 'DOUBLE': 'Integer', 'BOOLEAN': 'String'}"}, {"fullname": "howard.functions.commons.file_format_delimiters", "modulename": "howard.functions.commons", "qualname": "file_format_delimiters", "kind": "variable", "doc": "

    \n", "default_value": "{'vcf': '\\t', 'tsv': '\\t', 'csv': ',', 'psv': '|', 'bed': '\\t'}"}, {"fullname": "howard.functions.commons.file_format_allowed", "modulename": "howard.functions.commons", "qualname": "file_format_allowed", "kind": "variable", "doc": "

    \n", "default_value": "['vcf', 'tsv', 'csv', 'psv', 'bed', 'json', 'parquet', 'duckdb']"}, {"fullname": "howard.functions.commons.file_compressed_format", "modulename": "howard.functions.commons", "qualname": "file_compressed_format", "kind": "variable", "doc": "

    \n", "default_value": "['gz', 'bgz']"}, {"fullname": "howard.functions.commons.vcf_required_release", "modulename": "howard.functions.commons", "qualname": "vcf_required_release", "kind": "variable", "doc": "

    \n", "default_value": "'##fileformat=VCFv4.2'"}, {"fullname": "howard.functions.commons.vcf_required_columns", "modulename": "howard.functions.commons", "qualname": "vcf_required_columns", "kind": "variable", "doc": "

    \n", "default_value": "['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO']"}, {"fullname": "howard.functions.commons.vcf_required", "modulename": "howard.functions.commons", "qualname": "vcf_required", "kind": "variable", "doc": "

    \n", "default_value": "['##fileformat=VCFv4.2', '#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO']"}, {"fullname": "howard.functions.commons.DEFAULT_TOOLS_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_TOOLS_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/tools'"}, {"fullname": "howard.functions.commons.DEFAULT_TOOLS_BIN", "modulename": "howard.functions.commons", "qualname": "DEFAULT_TOOLS_BIN", "kind": "variable", "doc": "

    \n", "default_value": "{'bcftools': {'bin': 'bcftools'}, 'bgzip': {'bin': 'bgzip'}, 'java': {'bin': 'java'}, 'snpeff': {'jar': '~/howard/tools/snpeff/current/bin/snpEff.jar'}, 'annovar': {'perl': '~/howard/tools/annovar/current/bin/table_annovar.pl'}, 'exomiser': {'jar': '~/howard/tools/exomiser/current/bin/exomiser.jar'}, 'docker': {'bin': 'docker'}, 'splice': {'docker': {'image': 'bioinfochrustrasbourg/splice:0.2.1', 'entrypoint': '/bin/bash', 'options': None, 'command': None}}}"}, {"fullname": "howard.functions.commons.DEFAULT_ANNOVAR_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ANNOVAR_URL", "kind": "variable", "doc": "

    \n", "default_value": "'http://www.openbioinformatics.org/annovar/download'"}, {"fullname": "howard.functions.commons.DEFAULT_REFSEQ_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_REFSEQ_URL", "kind": "variable", "doc": "

    \n", "default_value": "'http://hgdownload.soe.ucsc.edu/goldenPath'"}, {"fullname": "howard.functions.commons.DEFAULT_DBNSFP_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBNSFP_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://dbnsfp.s3.amazonaws.com'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_URL", "kind": "variable", "doc": "

    \n", "default_value": "'http://data.monarchinitiative.org/exomiser'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_REMM_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_REMM_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://kircherlab.bihealth.org/download/ReMM'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_CADD_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_CADD_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://kircherlab.bihealth.org/download/CADD'"}, {"fullname": "howard.functions.commons.DEFAULT_ALPHAMISSENSE_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ALPHAMISSENSE_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://storage.googleapis.com/dm_alphamissense'"}, {"fullname": "howard.functions.commons.DEFAULT_DBSNP_URL", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBSNP_URL", "kind": "variable", "doc": "

    \n", "default_value": "'https://ftp.ncbi.nih.gov/snp/archive'"}, {"fullname": "howard.functions.commons.DEFAULT_DATABASE_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DATABASE_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases'"}, {"fullname": "howard.functions.commons.DEFAULT_ANNOTATIONS_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ANNOTATIONS_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/annotations/current'"}, {"fullname": "howard.functions.commons.DEFAULT_GENOME_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_GENOME_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/genomes/current'"}, {"fullname": "howard.functions.commons.DEFAULT_SNPEFF_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SNPEFF_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/snpeff/current'"}, {"fullname": "howard.functions.commons.DEFAULT_ANNOVAR_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ANNOVAR_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/annovar/current'"}, {"fullname": "howard.functions.commons.DEFAULT_REFSEQ_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_REFSEQ_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/refseq/current'"}, {"fullname": "howard.functions.commons.DEFAULT_DBNSFP_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBNSFP_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/dbnsfp/current'"}, {"fullname": "howard.functions.commons.DEFAULT_EXOMISER_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_EXOMISER_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/exomiser/current'"}, {"fullname": "howard.functions.commons.DEFAULT_DBSNP_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DBSNP_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/exomiser/dbsnp'"}, {"fullname": "howard.functions.commons.DEFAULT_SPLICE_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SPLICE_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/splice'"}, {"fullname": "howard.functions.commons.DEFAULT_SPLICEAI_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SPLICEAI_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/spliceai'"}, {"fullname": "howard.functions.commons.DEFAULT_SPIP_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_SPIP_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/databases/spip'"}, {"fullname": "howard.functions.commons.DEFAULT_DATA_FOLDER", "modulename": "howard.functions.commons", "qualname": "DEFAULT_DATA_FOLDER", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/howard/data'"}, {"fullname": "howard.functions.commons.DEFAULT_ASSEMBLY", "modulename": "howard.functions.commons", "qualname": "DEFAULT_ASSEMBLY", "kind": "variable", "doc": "

    \n", "default_value": "'hg19'"}, {"fullname": "howard.functions.commons.DUCKDB_EXTENSION", "modulename": "howard.functions.commons", "qualname": "DUCKDB_EXTENSION", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/functions/duckdb_extension'"}, {"fullname": "howard.functions.commons.MACHIN_LIST", "modulename": "howard.functions.commons", "qualname": "MACHIN_LIST", "kind": "variable", "doc": "

    \n", "default_value": "{'amd64': 'amd64', 'arm64': 'arm64'}"}, {"fullname": "howard.functions.commons.BCFTOOLS_FORMAT", "modulename": "howard.functions.commons", "qualname": "BCFTOOLS_FORMAT", "kind": "variable", "doc": "

    \n", "default_value": "['vcf', 'bed']"}, {"fullname": "howard.functions.commons.CODE_TYPE_MAP", "modulename": "howard.functions.commons", "qualname": "CODE_TYPE_MAP", "kind": "variable", "doc": "

    \n", "default_value": "{'Integer': 0, 'String': 1, 'Float': 2, 'Flag': 3}"}, {"fullname": "howard.functions.commons.GENOTYPE_MAP", "modulename": "howard.functions.commons", "qualname": "GENOTYPE_MAP", "kind": "variable", "doc": "

    \n", "default_value": "{None: '.', -1: 'A', -2: 'G', -3: 'R'}"}, {"fullname": "howard.functions.commons.DTYPE_LIMIT_AUTO", "modulename": "howard.functions.commons", "qualname": "DTYPE_LIMIT_AUTO", "kind": "variable", "doc": "

    \n", "default_value": "10000"}, {"fullname": "howard.functions.commons.DEFAULT_CHUNK_SIZE", "modulename": "howard.functions.commons", "qualname": "DEFAULT_CHUNK_SIZE", "kind": "variable", "doc": "

    \n", "default_value": "1048576"}, {"fullname": "howard.functions.commons.LOG_FORMAT", "modulename": "howard.functions.commons", "qualname": "LOG_FORMAT", "kind": "variable", "doc": "

    \n", "default_value": "'#[%(asctime)s] %(levelname)7s| %(message)s'"}, {"fullname": "howard.functions.commons.log_color", "modulename": "howard.functions.commons", "qualname": "log_color", "kind": "variable", "doc": "

    \n", "default_value": "None"}, {"fullname": "howard.functions.commons.prompt_mesage", "modulename": "howard.functions.commons", "qualname": "prompt_mesage", "kind": "variable", "doc": "

    \n", "default_value": "'#[{}] |'"}, {"fullname": "howard.functions.commons.prompt_color", "modulename": "howard.functions.commons", "qualname": "prompt_color", "kind": "variable", "doc": "

    \n", "default_value": "None"}, {"fullname": "howard.functions.commons.prompt_line_color", "modulename": "howard.functions.commons", "qualname": "prompt_line_color", "kind": "variable", "doc": "

    \n", "default_value": "'green'"}, {"fullname": "howard.functions.commons.remove_if_exists", "modulename": "howard.functions.commons", "qualname": "remove_if_exists", "kind": "function", "doc": "

    The function removes a file if it exists at the specified filepath(s).

    \n\n
    Parameters
    \n\n
      \n
    • filepaths: A list of file paths that you want to check for existence and remove if they exist
    • \n
    \n", "signature": "(filepaths: list) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.commons.set_log_level", "modulename": "howard.functions.commons", "qualname": "set_log_level", "kind": "function", "doc": "

    It sets the log level of the Python logging module

    \n\n
    Parameters
    \n\n
      \n
    • verbosity: The level of verbosity
    • \n
    \n", "signature": "(verbosity: str, log_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.split_interval", "modulename": "howard.functions.commons", "qualname": "split_interval", "kind": "function", "doc": "

    It takes a start and end value, and either a step size or a number of cuts, and returns a list of\nvalues that split the interval into equal-sized pieces

    \n\n
    Parameters
    \n\n
      \n
    • start: the start of the interval
    • \n
    • end: the end of the interval
    • \n
    • step: the step size between each cut
    • \n
    • ncuts: number of cuts to make
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of numbers.

    \n
    \n", "signature": "(start: int, end: int, step: int = None, ncuts: int = None):", "funcdef": "def"}, {"fullname": "howard.functions.commons.merge_regions", "modulename": "howard.functions.commons", "qualname": "merge_regions", "kind": "function", "doc": "

    It takes a list of genomic regions and returns a list of genomic regions where overlapping regions\nhave been merged

    \n\n
    Parameters
    \n\n
      \n
    • regions: A list of tuples representing genomic regions with the values of the chrom, start\nand end columns
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of tuples representing the merged regions with the values of the columns chrom,\n start and end.

    \n
    \n", "signature": "(regions: list) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.create_where_clause", "modulename": "howard.functions.commons", "qualname": "create_where_clause", "kind": "function", "doc": "

    It takes a list of merged regions and returns a SQL WHERE clause that can be used to filter variants\nin a SQL table

    \n\n
    Parameters
    \n\n
      \n
    • merged_regions: a list of tuples representing the merged regions with the values of the\nchrom, start and end columns
    • \n
    • table: The name of the table to query, defaults to variants (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    A dictionary with the chromosome as key and the where clause as value.

    \n
    \n", "signature": "(merged_regions: list, table: str = 'variants') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.command", "modulename": "howard.functions.commons", "qualname": "command", "kind": "function", "doc": "

    It runs a command in the shell and waits for it to finish

    \n\n
    Parameters
    \n\n
      \n
    • command: The command to run
    • \n
    \n\n
    Returns
    \n\n
    \n

    The return value is the exit status of the process.

    \n
    \n", "signature": "(command: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.run_parallel_commands", "modulename": "howard.functions.commons", "qualname": "run_parallel_commands", "kind": "function", "doc": "

    It takes a list of commands and a number of threads, and runs the commands in parallel

    \n\n
    Parameters
    \n\n
      \n
    • commands: a list of commands to run
    • \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of results from the commands.

    \n
    \n", "signature": "(commands: list, threads: int = 1) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.run_parallel_functions", "modulename": "howard.functions.commons", "qualname": "run_parallel_functions", "kind": "function", "doc": "

    It takes a list of functions and a number of threads, and runs the functions in parallel using the\nnumber of threads specified

    \n\n
    Parameters
    \n\n
      \n
    • functions: a list of functions to run in parallel
    • \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of multiprocessing.pool.ApplyResult objects.

    \n
    \n", "signature": "(functions: list, threads: int = 1) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.example_function", "modulename": "howard.functions.commons", "qualname": "example_function", "kind": "function", "doc": "

    example_function takes in a number and a word and returns a list of the number and the word

    \n\n
    Parameters
    \n\n
      \n
    • num: a number
    • \n
    • word: a string
    • \n
    \n\n
    Returns
    \n\n
    \n

    [num, word]

    \n
    \n", "signature": "(num, word):", "funcdef": "def"}, {"fullname": "howard.functions.commons.find", "modulename": "howard.functions.commons", "qualname": "find", "kind": "function", "doc": "

    It recursively walks the directory tree starting at the given path, and returns the first file it\nfinds with the given name

    \n\n
    Parameters
    \n\n
      \n
    • name: The name of the file you're looking for
    • \n
    • path: The path to search for the file
    • \n
    \n\n
    Returns
    \n\n
    \n

    The path to the file.

    \n
    \n", "signature": "(name: str, path: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_all", "modulename": "howard.functions.commons", "qualname": "find_all", "kind": "function", "doc": "

    \"Walk the directory tree starting at path, and for each regular file with the name name, append its\nfull path to the result list.\"

    \n\n

    The os.walk function is a generator that yields a 3-tuple containing the name of a directory, a list\nof its subdirectories, and a list of the files in that directory. The name of the directory is a\nstring, and the lists of subdirectories and files are lists of strings

    \n\n
    Parameters
    \n\n
      \n
    • name: The name of the file you're looking for
    • \n
    • path: The path to search in
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of all the files in the directory that have the name \"name\"

    \n
    \n", "signature": "(name: str, path: str) -> list:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_genome", "modulename": "howard.functions.commons", "qualname": "find_genome", "kind": "function", "doc": "

    The find_genome function checks if a genome file exists at the specified path, and if not, it\ntries to find it using the provided assembly name or file name.

    \n\n
    Parameters
    \n\n
      \n
    • genome_path: The path to the genome file
    • \n
    • assembly: The assembly parameter is a string that represents the name of the genome\nassembly. It is used to search for the genome file with the specified assembly name in the\ngenome_dir directory. If a genome file with the assembly name is found, its path is returned
    • \n
    • file: The file parameter is the name of the genome file that you want to find
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path to the genome file.

    \n
    \n", "signature": "(genome_path: str, assembly: str = None, file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_file_prefix", "modulename": "howard.functions.commons", "qualname": "find_file_prefix", "kind": "function", "doc": "

    The function find_file_prefix is used to find a specific file based on input parameters such as\ninput file, folder, and assembly.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The input file is the file that you want to find the prefix for. It can be a file\npath or just the file name if it is in the current directory
    • \n
    • folder: The folder parameter is a string that represents the directory where the file is\nlocated
    • \n
    • assembly: The \"assembly\" parameter is a string that represents the assembly version of the\nfile you are looking for. It is used to search for files with the specific assembly version in their\nfilename
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path of the output file.

    \n
    \n", "signature": "(\tinput_file: str = None,\tprefix: str = None,\tfolder: str = None,\tassembly: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.find_nomen", "modulename": "howard.functions.commons", "qualname": "find_nomen", "kind": "function", "doc": "

    The function find_nomen takes a HGVS string and a list of transcripts, parses the HGVS string, and\nreturns a dictionary with the best NOMEN based on specified patterns.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is a DataFrame containing the HGVS strings to parse. It seems like\nthe function is designed to process multiple HGVS strings at once. You can pass this DataFrame to\nthe function for processing. If you have a specific DataFrame that you would like to use, please\nprovide it
    • \n
    • transcript: The transcript parameter in the find_nomen function is used to specify a\nsingle transcript to use for ranking. It is a string that represents the transcript. If provided,\nthis transcript will be used along with the transcripts from the transcripts list to determine the\nbest NOMEN
    • \n
    • transcripts: Transcripts are a list of transcripts to use for ranking in the find_nomen\nfunction. You can provide a list of transcripts that you want to consider when constructing the\nNOMEN for a given HGVS string
    • \n
    • transcripts_source_order: The transcripts_source_order parameter is a list that specifies\nthe order in which different sources of transcripts should be considered. In the provided function,\nthe default order is [\"column\", \"file\"], which means that transcripts from a column in the input\ndata will be considered first, followed by
    • \n
    • pattern: The pattern parameter in the find_nomen function is used to specify the format\nin which the NOMEN should be constructed. By default, the pattern is set to\n\"GNOMEN:TNOMEN:ENOMEN:CNOMEN:RNOMEN:NNOMEN
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function find_nomen returns a dictionary containing the following keys:

    \n \n
      \n
    • NOMEN
    • \n
    • CNOMEN
    • \n
    • RNOMEN
    • \n
    • NNOMEN
    • \n
    • PNOMEN
    • \n
    • TVNOMEN
    • \n
    • TNOMEN
    • \n
    • TPVNOMEN
    • \n
    • TPNOMEN
    • \n
    • VNOMEN
    • \n
    • ENOMEN
    • \n
    • GNOMEN
    • \n
    \n
    \n", "signature": "(\thgvs: pandas.core.frame.DataFrame,\ttranscript: str = None,\ttranscripts: list = [],\ttranscripts_source_order: list = None,\tpattern=None,\ttranscripts_len: int = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.explode_annotation_format", "modulename": "howard.functions.commons", "qualname": "explode_annotation_format", "kind": "function", "doc": "

    The explode_annotation_format function takes an annotation string and formats it into a specified\noutput format with optional customization parameters.

    \n\n
    Parameters
    \n\n
      \n
    • annotation: The annotation parameter is a string containing multiple annotations separated\nby commas and pipe symbols. Each annotation consists of different fields separated by pipe symbols.\nFor example, an annotation string could look like this: \"A|B|C,D|E|F\"
    • \n
    • uniquify: The uniquify parameter in the explode_annotation_format function is a boolean\nflag that determines whether to keep only unique values for each annotation field. If set to True,\nonly unique values will be retained for each field before joining them together. If set to False,\nall values, defaults to False
    • \n
    • output_format: The output_format parameter specifies the format in which you want the\noutput to be generated. The function supports two output formats: \"fields\" and \"JSON\". If you choose\n\"fields\", the output will be a string with annotations separated by semicolons. If you choose\n\"JSON\", the, defaults to fields
    • \n
    • prefix: The prefix parameter in the explode_annotation_format function is used to specify\nthe prefix that will be added to each annotation field when generating the exploded annotation\nstring. In the provided function, the default prefix value is set to \"ANN_\". You can customize this\nprefix value to suit your specific, defaults to ANN_
    • \n
    • header: The header parameter in the explode_annotation_format function is a list of\ncolumn names that will be used to create a DataFrame from the input annotation string. Each element\nin the header list corresponds to a specific field in the annotation data
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function explode_annotation_format returns a string that contains the exploded and\n formatted annotation information based on the input parameters provided. The format of the returned\n string depends on the output_format parameter. If output_format is set to \"JSON\", the function\n returns a JSON-formatted string. Otherwise, it returns a string with annotations formatted based on\n the other parameters such as `uniquify

    \n
    \n", "signature": "(\tannotation: str = '',\tuniquify: bool = False,\toutput_format: str = 'fields',\tprefix: str = 'ANN_',\theader: list = ['Allele', 'Annotation', 'Annotation_Impact', 'Gene_Name', 'Gene_ID', 'Feature_Type', 'Feature_ID', 'Transcript_BioType', 'Rank', 'HGVS.c', 'HGVS.p', 'cDNA.pos / cDNA.length', 'CDS.pos / CDS.length', 'AA.pos / AA.length', 'Distance', 'ERRORS / WARNINGS / INFO']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_snpeff_hgvs", "modulename": "howard.functions.commons", "qualname": "extract_snpeff_hgvs", "kind": "function", "doc": "

    This function extracts HGVS annotations from a given snpEff annotation string and returns them as a\ncomma-separated string.

    \n\n
    Parameters
    \n\n
      \n
    • snpeff: The snpeff parameter is a string that contains annotations for genetic variants in\na specific format. It is used as input to extract HGVS notation for the variants
    • \n
    • header: The header parameter is a list of column names that will be used to create a pandas\nDataFrame from the snpeff string input. It is used to extract specific information from the snpeff\nannotations
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that contains the HGVS annotations extracted from the input SNPEff annotation\n string.

    \n
    \n", "signature": "(\tsnpeff: str = '',\theader: list = ['Allele', 'Annotation', 'Annotation_Impact', 'Gene_Name', 'Gene_ID', 'Feature_Type', 'Feature_ID', 'Transcript_BioType', 'Rank', 'HGVS.c', 'HGVS.p', 'cDNA.pos / cDNA.length', 'CDS.pos / CDS.length', 'AA.pos / AA.length', 'Distance', 'ERRORS / WARNINGS / INFO']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.explode_snpeff_ann", "modulename": "howard.functions.commons", "qualname": "explode_snpeff_ann", "kind": "function", "doc": "

    The explode_snpeff_ann function takes a string of SNPEff annotations, splits and processes them\nbased on specified parameters, and returns the processed annotations in a specified output format.

    \n\n
    Parameters
    \n\n
      \n
    • snpeff: The snpeff parameter is a string containing annotations separated by commas. Each\nannotation is further divided into different fields separated by pipes (|)
    • \n
    • uniquify: The uniquify parameter in the explode_snpeff_ann function is a boolean flag\nthat determines whether to keep only unique values for each annotation field or not. If uniquify\nis set to True, only unique values will be kept for each annotation field. If, defaults to False
    • \n
    • output_format: The output_format parameter in the explode_snpeff_ann function specifies\nthe format in which the output will be generated. The function supports two output formats: \"fields\"\nand \"JSON\", defaults to fields
    • \n
    • prefix: The prefix parameter in the explode_snpeff_ann function is used to specify the\nprefix that will be added to each annotation field in the output. For example, if the prefix is set\nto \"ANN_\", then the output annotations will be formatted as \"ANN_Annotation=example_annotation,\ndefaults to ANN_
    • \n
    • header: The header parameter in the explode_snpeff_ann function is a list of strings that\nrepresent the column names or fields for the output data. These strings include information such as\nallele, annotation, gene name, gene ID, feature type, transcript biotype, and various other details\nrelated
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function explode_snpeff_ann returns a string that contains the exploded and formatted\n SNPEff annotations based on the input parameters provided. The specific format of the returned\n string depends on the output_format, uniquify, and other parameters specified in the function.

    \n
    \n", "signature": "(\tsnpeff: str = '',\tuniquify: bool = False,\toutput_format: str = 'fields',\tprefix: str = 'ANN_',\theader: list = ['Allele', 'Annotation', 'Annotation_Impact', 'Gene_Name', 'Gene_ID', 'Feature_Type', 'Feature_ID', 'Transcript_BioType', 'Rank', 'HGVS.c', 'HGVS.p', 'cDNA.pos / cDNA.length', 'CDS.pos / CDS.length', 'AA.pos / AA.length', 'Distance', 'ERRORS / WARNINGS / INFO']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_index", "modulename": "howard.functions.commons", "qualname": "get_index", "kind": "function", "doc": "

    The function returns the index of a given value in a list, or -1 if the value is not in the list.

    \n\n
    Parameters
    \n\n
      \n
    • value: The value to search for in the list
    • \n
    • values: The parameter \"values\" is a list of values in which we want to find the index of a\nspecific value. It is an optional parameter with a default value of an empty list
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_index returns the index of the first occurrence of the value parameter\n in the values list. If the value parameter is not found in the values list, the function\n returns -1.

    \n
    \n", "signature": "(value, values: list = []) -> int:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_file_format", "modulename": "howard.functions.commons", "qualname": "get_file_format", "kind": "function", "doc": "

    It takes a filename and returns the file format

    \n\n
    Parameters
    \n\n
      \n
    • filename: the name of the file you want to get the format of
    • \n
    \n\n
    Returns
    \n\n
    \n

    The file format of the file.

    \n
    \n", "signature": "(filename: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.findbypipeline", "modulename": "howard.functions.commons", "qualname": "findbypipeline", "kind": "function", "doc": "

    This function takes a dataframe and a list of samples, and returns the number of pipelines found in\nthe samples that have a non-null GT value.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing genetic variant information
    • \n
    • samples: The samples parameter is a list of strings representing the names of the\nsamples/pipelines to be searched for in the input dataframe df
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string in the format of \"nb_pipeline_find/nb_pipeline\", where nb_pipeline_find is the\n number of pipelines in the input list samples that have a non-null GT value in the input dataframe\n df, and nb_pipeline is the total number of pipelines in the input list samples. If the input list\n samples is empty, the function returns \"0/0\".

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotypeconcordance", "modulename": "howard.functions.commons", "qualname": "genotypeconcordance", "kind": "function", "doc": "

    The function checks the genotype concordance of a given list of samples in a dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing genetic variant information, including genotype\ninformation for each sample/pipeline
    • \n
    • samples: The parameter \"samples\" is a list of sample/pipeline names that are present in the\ninput dataframe \"df\". These samples/pipelines have genotype information that will be used to\ncalculate genotype concordance
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that indicates whether the genotypes of the specified samples in the input\n dataframe are concordant or not. The string is either \"TRUE\" or \"FALSE\", depending on whether all\n the specified samples have the same genotype or not.

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotype_compression", "modulename": "howard.functions.commons", "qualname": "genotype_compression", "kind": "function", "doc": "

    The function takes a genotype string, replaces dots with zeros, removes non-digit characters, sorts\nand removes duplicates, and returns the compressed genotype string.

    \n\n
    Parameters
    \n\n
      \n
    • genotype: The input genotype as a string. It is a DNA sequence that contains genetic\ninformation
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function genotype_compression returns a compressed version of the input genotype\n string. The compressed string has all dots replaced with 0s, all non-digit characters removed, and\n duplicates removed and sorted. The compressed string is returned as a string.

    \n
    \n", "signature": "(genotype: str = '') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotype_barcode", "modulename": "howard.functions.commons", "qualname": "genotype_barcode", "kind": "function", "doc": "

    This function takes a genotype string and compresses it, then returns a barcode string based on the\nlength and content of the compressed genotype.

    \n\n
    Parameters
    \n\n
      \n
    • genotype: The genotype parameter is a string that represents a genetic sequence or code
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function genotype_barcode returns a string representing the barcode for a given\n genotype. The barcode can be \"0\", \"1\", \"2\", or \"?\" depending on the length and content of the\n compressed genotype string.

    \n
    \n", "signature": "(genotype: str = '') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.barcode", "modulename": "howard.functions.commons", "qualname": "barcode", "kind": "function", "doc": "

    Generates a barcode based on the genotype of the specified samples.

    \n\n
    Parameters
    \n\n
      \n
    • df: A pandas DataFrame containing the genetic data.

    • \n
    • samples: A list of sample names to use for generating the barcode.

    • \n
    \n\n
    Returns
    \n\n
    \n

    A barcode string based on the genotype of the specified samples.

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.trio", "modulename": "howard.functions.commons", "qualname": "trio", "kind": "function", "doc": "

    The function trio(df, samples:list = []) determines the type of variant (denovo, dominant, or\nrecessive) in a trio based on the barcode generated from the samples.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing genetic variant information
    • \n
    • samples: A list of sample IDs to be used in the analysis
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function trio returns a string that represents the type of variant in a trio\n analysis, which can be \"denovo\", \"dominant\", \"recessive\", or \"unknown\".

    \n
    \n", "signature": "(df, samples: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.commons.vaf_normalization", "modulename": "howard.functions.commons", "qualname": "vaf_normalization", "kind": "function", "doc": "

    This function takes in a row of data and a sample name, extracts the genotype information for that\nsample, calculates the variant allele frequency (VAF) from the genotype information, and adds the\nVAF to the genotype information before returning it.

    \n\n
    Parameters
    \n\n
      \n
    • row: The input row of a pandas DataFrame containing information about a genetic variant
    • \n
    • sample: The parameter \"sample\" is a string representing the name of the sample for which we\nwant to calculate the VAF (Variant Allele Frequency). It is used to extract the genotype information\nfor that particular sample from the input row
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the genotype information for a given sample with an added \"VAF\"\n field that represents the variant allele frequency.

    \n
    \n", "signature": "(row, sample: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.genotype_stats", "modulename": "howard.functions.commons", "qualname": "genotype_stats", "kind": "function", "doc": "

    This function computes statistics on a specified information field (e.g. VAF) for a given set of\nsamples in a pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • df: The input dataframe containing variant information
    • \n
    • samples: The list of sample/pipeline names for which to compute the genotype statistics. If\nempty, the function will return an empty dictionary
    • \n
    • info: The parameter \"info\" is a string that represents the type of information being analyzed\nin the function. In this case, it is used to compute statistics on the Variant Allele Frequency\n(VAF) of genetic variants, defaults to VAF
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary containing statistics related to a specified information field (default is\n \"VAF\") for a given set of samples in a pandas DataFrame. The statistics include the number of\n values, a list of values, minimum value, maximum value, mean, median, and standard deviation. If no\n samples are specified, an empty dictionary is returned.

    \n
    \n", "signature": "(df, samples: list = [], info: str = 'VAF'):", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_file", "modulename": "howard.functions.commons", "qualname": "extract_file", "kind": "function", "doc": "

    The function extracts a compressed file in .zip or .gz format based on the file path provided.

    \n\n
    Parameters
    \n\n
      \n
    • file_path: The file path parameter is a string that represents the path to a file that needs\nto be extracted. The function checks if the file has a \".zip\" or \".gz\" extension and extracts it\naccordingly
    • \n
    • path: The path parameter is an optional string that represents the directory where the\nextracted files will be saved. If no path is provided, the function will use the directory of the\nfile_path as the extraction destination
    • \n
    • threads: The threads parameter is an optional parameter that specifies the number of\nthreads to use for extraction. By default, it is set to 1, meaning the extraction will be done using\na single thread, defaults to 1
    • \n
    \n", "signature": "(file_path: str, path: str = None, threads: int = 1):", "funcdef": "def"}, {"fullname": "howard.functions.commons.download_file", "modulename": "howard.functions.commons", "qualname": "download_file", "kind": "function", "doc": "

    The download_file function is a Python function that downloads a file from a given URL and saves\nit to a specified destination file path in chunks.

    \n\n
    Parameters
    \n\n
      \n
    • url: The url parameter is the URL of the file you want to download. It should be a string\nthat represents the complete URL, including the protocol (e.g., \"http://example.com/file.txt\")
    • \n
    • dest_file_path: The dest_file_path parameter is the path where the downloaded file will be\nsaved. It should be a string representing the file path, including the file name and extension. For\nexample, if you want to save the file as \"myfile.txt\" in the current directory, you can set `dest
    • \n
    • chunk_size: The chunk_size parameter determines the size of each chunk of data that is\ndownloaded at a time. In this case, the default value is set to 1 MB, which means that the file will\nbe downloaded in chunks of 1 MB at a time. This parameter can be adjusted according to
    • \n
    • try_aria: The try_aria parameter is a boolean value that determines whether to use the\nAria2c command-line tool for downloading the file. If set to True, the function will attempt to\ndownload the file using Aria2c. If set to False, the function will use the, defaults to True
    • \n
    • aria_async_dns: The aria_async_dns parameter is a boolean value that determines whether to\nuse asynchronous DNS resolution with Aria2c. If set to True, Aria2c will use asynchronous DNS\nresolution, which can improve download performance. If set to False, Aria2c will use synchronous,\ndefaults to False
    • \n
    • threads: The threads parameter specifies the number of threads to be used for downloading\nthe file. It determines the number of simultaneous connections that will be made to download the\nfile. By default, it is set to 1, which means that only one connection will be made at a time.\nIncreasing the value, defaults to 1
    • \n
    • quiet: The quiet parameter is a boolean value that determines whether to suppress the\noutput of the download process. If set to True, the output will be suppressed. If set to False,\nthe output will be displayed. By default, it is set to True, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the file was successfully downloaded and saved to the\n specified destination file path.

    \n
    \n", "signature": "(\turl: str,\tdest_file_path: str,\tchunk_size: int = 1048576,\ttry_aria: bool = True,\taria_async_dns: bool = False,\tthreads: int = 1,\tquiet: bool = True):", "funcdef": "def"}, {"fullname": "howard.functions.commons.whereis_bin", "modulename": "howard.functions.commons", "qualname": "whereis_bin", "kind": "function", "doc": "

    \n", "signature": "(bin_file: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_bin", "modulename": "howard.functions.commons", "qualname": "get_bin", "kind": "function", "doc": "

    The get_bin function retrieves the path to a specified binary file from a configuration dictionary\nor searches for it in the file system if it is not specified in the configuration.

    \n\n
    Parameters
    \n\n
      \n
    • bin: The bin parameter is a string or a pattern that represents the name of the binary file (e.g.,\nsnpEff.jar, exomiser-cli*.jar) that you want to retrieve the path for
    • \n
    • tool: The tool parameter is a string that represents the name of the tool. It is used to\nretrieve the path to the tool's binary file
    • \n
    • bin_type: The bin_type parameter is a string that specifies the type of binary file to\nsearch for in the config dict (e.g., jar, bin). In this case, the default value is \"bin\". A value \"jar\" indicates that the function is searching\nfor a JAR file. Defaults to bin
    • \n
    • config: A dictionary containing configuration information for the snpEff tool, including the\npath to the snpEff jar file. If no configuration is provided, an empty dictionary is used
    • \n
    • default_folder: The default_folder parameter is a string that represents the default folder\nwhere the tool binaries are located. If the bin_file is not found in the configuration dictionary\nor in the file system, the function will search for it in this default folder
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path to the snpEff.jar file. If the file is not found, it returns None.

    \n
    \n", "signature": "(\tbin: str = None,\ttool: str = None,\tbin_type: str = 'bin',\tconfig: dict = {},\tdefault_folder: str = '/Users/lebechea/howard/tools',\toutput_type: str = 'bin') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_bin_command", "modulename": "howard.functions.commons", "qualname": "get_bin_command", "kind": "function", "doc": "

    The function get_bin_command generates a command based on the tool type (jar, java, docker) and\nspecified parameters.

    \n\n
    Parameters
    \n\n
      \n
    • bin: The bin parameter in the get_bin_command function is used to specify the binary\nexecutable file that you want to run. It is a string that represents the path or name of the binary\nfile. If you provide this parameter, the function will attempt to locate the binary file based on\nthe
    • \n
    • tool: The tool parameter in the get_bin_command function represents the name of the tool\nfor which you want to retrieve the command. It is used to identify the specific tool for which the\ncommand is being generated
    • \n
    • bin_type: The bin_type parameter in the get_bin_command function specifies the type of\nbinary executable that the tool uses. It can have values like \"bin\", \"jar\", \"java\", \"docker\", etc.,\ndepending on the type of tool being executed. The function uses this parameter to determine,\ndefaults to bin
    • \n
    • config: The config parameter in the get_bin_command function is a dictionary that holds\nconfiguration settings for the tool being used. It can include various settings such as paths,\nenvironment variables, or any other configuration options needed for the tool to run properly
    • \n
    • param: The param parameter in the get_bin_command function is a dictionary that contains\nadditional parameters or configurations for the tool being executed. These parameters can be used to\ncustomize the behavior or settings of the tool when generating the command for execution. The\nfunction uses the param dictionary along with the
    • \n
    • default_folder: The default_folder parameter in the get_bin_command function is used to\nspecify the default folder where the tools are located. If a specific folder is not provided when\ncalling the function, it will default to the value of DEFAULT_TOOLS_FOLDER
    • \n
    • add_options: The add_options parameter in the get_bin_command function allows you to pass\nadditional options or arguments to the command being constructed based on the tool type. These\nadditional options can be specific configurations, flags, or any other parameters that you want to\ninclude in the final command. When provided,
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_bin_command function returns a string representing the command to execute a\n specific tool based on the provided parameters. The returned command can be either a Java command\n for running a JAR file or a Docker command for running a Docker image/container. If the tool type is\n not Java or Docker, it returns the default tool bin.

    \n
    \n", "signature": "(\tbin: str = None,\ttool: str = None,\tbin_type: str = 'bin',\tconfig: dict = {},\tparam: dict = {},\tdefault_folder: str = '/Users/lebechea/howard/tools',\tadd_options: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_tmp", "modulename": "howard.functions.commons", "qualname": "get_tmp", "kind": "function", "doc": "

    The get_tmp function returns the value of the \"tmp\" parameter from either the param dictionary,\nconfig dictionary, or a default value \"/tmp\".

    \n\n
    Parameters
    \n\n
      \n
    • config: Config is a dictionary that contains configuration settings for the function. It is\nan optional parameter with a default value of an empty dictionary. It can be used to provide\nadditional configuration settings to the function get_tmp
    • \n
    • param: The param parameter is a dictionary containing parameters that can be passed to the\nfunction get_tmp. It can include various key-value pairs, but in this context, the function\nspecifically looks for the key \"tmp\" within the param dictionary to determine the temporary path\nvalue. If the \"
    • \n
    • default_tmp: The default_tmp parameter in the get_tmp function is a string that\nrepresents the default path for temporary files. If the \"tmp\" key is not found in the param\ndictionary or the config dictionary, the function will return this default_tmp value, which is,\ndefaults to /tmp
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_tmp returns the value of the \"tmp\" key from the param dictionary if it\n exists. If the \"tmp\" key is not found in the param dictionary, it returns the value of the \"tmp\"\n key from the config dictionary. If neither key is found in param or config, it returns the\n default value \"/tmp\".

    \n
    \n", "signature": "(config: dict = {}, param: dict = None, default_tmp: str = '/tmp') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_threads", "modulename": "howard.functions.commons", "qualname": "get_threads", "kind": "function", "doc": "

    This Python function retrieves the number of threads to use based on input parameters and system\nconfiguration.

    \n\n
    Parameters
    \n\n
      \n
    • config: The config parameter is a dictionary that contains configuration settings for the\nfunction get_threads. It can be used to provide default values for the number of threads to use in\nthe function
    • \n
    • param: The param parameter is a dictionary that may contain the key \"threads\" which\nspecifies the number of threads to use. If the \"threads\" key is not present in the param\ndictionary, the function will look for the \"threads\" key in the config dictionary. If neither
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_threads returns the number of threads to be used based on the input\n parameters.

    \n
    \n", "signature": "(config: dict = {}, param: dict = {}) -> int:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_memory", "modulename": "howard.functions.commons", "qualname": "get_memory", "kind": "function", "doc": "

    The get_memory function retrieves memory information using psutil and calculates a default memory\nvalue based on total memory, with the option to specify a custom memory value.

    \n\n
    Parameters
    \n\n
      \n
    • config: The config parameter is a dictionary that may contain configuration settings for\nthe function get_memory. It is used to provide default values or settings for the function
    • \n
    • param: The param parameter is a dictionary that may contain a key \"memory\" which represents\nthe amount of memory to be used. If the \"memory\" key is not present in the param dictionary, the\nfunction will try to retrieve the value from the config dictionary using the key \"
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_memory returns a string representing the amount of memory to be used.\n This memory value is calculated based on the total memory available on the system, with a default\n value set to 80% of the total memory. The function first checks if a specific memory value is\n provided in the param dictionary, and if not, it looks for a default value in the config

    \n
    \n", "signature": "(config: dict = {}, param: dict = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_float_from_str", "modulename": "howard.functions.commons", "qualname": "extract_float_from_str", "kind": "function", "doc": "

    The function extract_float_from_str extracts a float value from a given string input.

    \n\n
    Parameters
    \n\n
      \n
    • text: The extract_float_from_str function is designed to extract a floating-point number\nfrom a given string input. The function uses a regular expression to find the first occurrence of a\nfloating-point number in the input string and returns it as a float
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function extract_float_from_str returns a float value extracted from the input text\n string. If a float value is found in the text, it is returned as a float. If no float value is\n found, it returns None.

    \n
    \n", "signature": "(text: str = '') -> float:", "funcdef": "def"}, {"fullname": "howard.functions.commons.extract_memory_in_go", "modulename": "howard.functions.commons", "qualname": "extract_memory_in_go", "kind": "function", "doc": "

    The extract_memory_in_go function converts a memory size string in the format FLOAT[kMG] to an\ninteger value in Go memory units.

    \n\n
    Parameters
    \n\n
      \n
    • memory_str: The memory_str parameter should be a string representing a memory value with a\nunit suffix in the format FLOAT[kMG]. For example, it could be \"1G\", \"512M\", or \"2k\"
    • \n
    • default: The default parameter in the extract_memory_in_go function is used to specify a\ndefault integer value if the conversion of the memory size string fails or if the value cannot be\nextracted from the input string. If no valid value can be extracted from the input string, the\nfunction will return the, defaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    The extract_memory_in_go function is returning an integer value representing the memory\n size in Go units based on the input memory string provided.

    \n
    \n", "signature": "(memory_str, default_value: int = 1, default_unit: str = 'G') -> int:", "funcdef": "def"}, {"fullname": "howard.functions.commons.concat_file", "modulename": "howard.functions.commons", "qualname": "concat_file", "kind": "function", "doc": "

    This function concatenates multiple input files into a single output file.

    \n\n
    Parameters
    \n\n
      \n
    • input_files: A list of file paths to the input files that need to be concatenated
    • \n
    • output_file: The parameter \"output_file\" is a string that represents the name of the file\nthat will be created by the function and will contain the concatenated content of all the input\nfiles
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the output file was successfully created or not. It\n checks if the output file exists using the os.path.exists() function and returns True if it\n exists and False otherwise.

    \n
    \n", "signature": "(input_files: list, output_file: str) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.compress_file", "modulename": "howard.functions.commons", "qualname": "compress_file", "kind": "function", "doc": "

    This function compresses a file using the BGZF compression algorithm.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path and name of the input file that needs to be compressed
    • \n
    • output_file: The output_file parameter is a string that represents the name and path of\nthe file where the compressed data will be written
    • \n
    \n", "signature": "(input_file: str, output_file: str) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_compression_type", "modulename": "howard.functions.commons", "qualname": "get_compression_type", "kind": "function", "doc": "

    The function get_compression_type determines the compression type of a file based on its first few\nbytes.

    \n\n
    Parameters
    \n\n
      \n
    • filepath: The filepath parameter is a string that represents the path to the file for which\nwe want to determine the compression type
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_compression_type returns a string indicating the compression type of the\n file specified by the filepath parameter. The possible return values are \"gzip\" if the file is\n compressed using gzip, \"bgzip\" if the file is compressed using bgzip, \"unknown\" if the compression\n type is unknown, and \"none\" if the file is not compressed.

    \n
    \n", "signature": "(filepath: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_file_compressed", "modulename": "howard.functions.commons", "qualname": "get_file_compressed", "kind": "function", "doc": "

    This function takes a filename as input and returns True if the file is compressed (in bgzip) and False if it\nis not

    \n\n
    Parameters
    \n\n
      \n
    • filename: the name of the file to be checked
    • \n
    \n\n
    Returns
    \n\n
    \n

    A boolean value.

    \n
    \n", "signature": "(filename: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.concat_into_infile", "modulename": "howard.functions.commons", "qualname": "concat_into_infile", "kind": "function", "doc": "

    The function concat_into_infile concatenates multiple input files into a compressed output file,\nwith support for different compression types and multi-threading.

    \n\n
    Parameters
    \n\n
      \n
    • input_files: A list of input file paths that need to be concatenated into the compressed file
    • \n
    • compressed_file: The compressed_file parameter is an object that represents the file where\nthe concatenated contents of the input files will be written. It is expected to be a file object\nthat has write capabilities
    • \n
    • compression_type: The compression_type parameter specifies the type of compression to be\nused for the output file. The default value is \"none\", which means no compression will be applied.\nOther possible values include \"bgzip\" and \"gzip\", which indicate that the output file should be\ncompressed using the bgzip and, defaults to none
    • \n
    • threads: The \"threads\" parameter specifies the number of threads to use for compression or\ndecompression. It determines how many parallel processes can be executed simultaneously, which can\nhelp improve performance when dealing with large files or multiple files, defaults to 1
    • \n
    • block: The block parameter is used to specify the size of the block when reading the input\nfiles. It is set to 10 ** 6, which means 1 million bytes. This parameter determines how much data\nis read from the input files at a time
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value, specifically True.

    \n
    \n", "signature": "(\tinput_files: list,\tcompressed_file: object,\tcompression_type: str = 'none',\tthreads: int = 1,\tblock: int = 1000000) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.concat_and_compress_files", "modulename": "howard.functions.commons", "qualname": "concat_and_compress_files", "kind": "function", "doc": "

    The function concat_and_compress_files takes a list of input files, an output file name, and\noptional parameters for compression type, number of threads, block size, compression level, sorting,\nand indexing, and concatenates and compresses the input files into the output file.

    \n\n
    Parameters
    \n\n
      \n
    • input_files: A list of input file paths that need to be concatenated and compressed
    • \n
    • output_file: The output_file parameter is a string that specifies the path and name of the\noutput file that will be created after concatenating and compressing the input files
    • \n
    • compression_type: The compression_type parameter specifies the type of compression to be\napplied to the output file. It can take one of three values: \"bgzip\", \"gzip\", or \"none\", defaults to\nbgzip
    • \n
    • threads: The threads parameter specifies the number of threads to use for compression and\ndecompression. It determines the level of parallelism in the compression process, allowing for\nfaster execution when multiple threads are used, defaults to 1
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    • block: The block parameter specifies the size of the block used for reading and writing\ndata during compression. It is set to a default value of 10^6 (1 million) bytes
    • \n
    • compression_level: The compression_level parameter determines the level of compression to\nbe used when compressing the output file. It is an integer value ranging from 0 to 9, where 0\nindicates no compression and 9 indicates maximum compression. The higher the compression level, the\nsmaller the resulting compressed file size, defaults to 6
    • \n
    • sort: The sort parameter is a boolean flag that determines whether the output file should\nbe sorted or not. If sort is set to True, the output file will be sorted using\npysam.bcftools.sort before renaming it. If sort is set to `False, defaults to False
    • \n
    • index: The index parameter is a boolean flag that determines whether or not to index the\noutput file after concatenation and compression. If index is set to True, the output file will\nbe indexed using the pysam.tabix_index function with the preset \"vcf\". Make sure VCF is sorted.\nDefaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the output file exists or not.

    \n
    \n", "signature": "(\tinput_files: list,\toutput_file: str,\tcompression_type: str = 'bgzip',\tthreads: int = 1,\tmemory: int = 1,\tblock: int = 1000000,\tcompression_level: int = 6,\tsort: bool = False,\tindex: bool = False) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_plateform_name_from_duckdb", "modulename": "howard.functions.commons", "qualname": "get_plateform_name_from_duckdb", "kind": "function", "doc": "

    The function get_plateform_name_from_duckdb returns the platform information from a DuckDB connection.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The conn parameter is an instance of the DuckDBPyConnection class from the duckdb\nmodule. It represents a connection to a DuckDB database
    • \n
    \n\n
    Returns
    \n\n
    \n

    the platform information from the DuckDB connection.

    \n
    \n", "signature": "(conn: duckdb.duckdb.DuckDBPyConnection) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_duckdb_extension_file", "modulename": "howard.functions.commons", "qualname": "get_duckdb_extension_file", "kind": "function", "doc": "

    This function returns the file path of a DuckDB extension based on the extension name and platform.

    \n\n
    Parameters
    \n\n
      \n
    • extension_name: The name of the DuckDB extension file that is being requested
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the file path of a DuckDB extension file. The file path is\n constructed using the constant DUCKDB_EXTENSION, the platform name obtained from the\n get_plateform_name_from_duckdb() function, and the extension name passed as an argument to the function.

    \n
    \n", "signature": "(\textension_name: str,\tconn: duckdb.duckdb.DuckDBPyConnection,\tdownload: bool = True) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_duckdb_extension", "modulename": "howard.functions.commons", "qualname": "load_duckdb_extension", "kind": "function", "doc": "

    This function loads DuckDB extensions into a connection object and returns a boolean indicating\nwhether all extensions were successfully loaded.

    \n\n
    Parameters
    \n\n
      \n
    • conn: duckdb.DuckDBPyConnection object representing a connection to a DuckDB database
    • \n
    • duckdb_extensions: A list of strings representing the names of the DuckDB extensions to be\nloaded
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether all the specified DuckDB extensions were successfully\n loaded or not.

    \n
    \n", "signature": "(conn: duckdb.duckdb.DuckDBPyConnection, duckdb_extensions: list) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.TimeoutException", "modulename": "howard.functions.commons", "qualname": "TimeoutException", "kind": "class", "doc": "

    Common base class for all non-exit exceptions.

    \n", "bases": "builtins.Exception"}, {"fullname": "howard.functions.commons.time_limit", "modulename": "howard.functions.commons", "qualname": "time_limit", "kind": "function", "doc": "

    \n", "signature": "(seconds):", "funcdef": "def"}, {"fullname": "howard.functions.commons.duckdb_execute", "modulename": "howard.functions.commons", "qualname": "duckdb_execute", "kind": "function", "doc": "

    The duckdb_execute function executes a query using the DuckDB database engine and returns a\nboolean indicating whether the query was successful or not.

    \n\n
    Parameters
    \n\n
      \n
    • query: The query parameter is a string that represents the SQL query you want to execute in\nDuckDB. It can be any valid SQL statement, such as SELECT, INSERT, UPDATE, DELETE, etc
    • \n
    • threads: The \"threads\" parameter specifies the number of threads to use for executing the\nquery. By default, it is set to 1, meaning that the query will be executed using a single thread,\ndefaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function duckdb_execute returns a boolean value. It returns True if the query\n execution is successful, and False if it is not successful.

    \n
    \n", "signature": "(query: str, threads: int = 1) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.genome_build_switch", "modulename": "howard.functions.commons", "qualname": "genome_build_switch", "kind": "function", "doc": "

    The genome_build_switch function takes an assembly name as input and returns a new\nassembly name if a different version of the same genome is available, otherwise it returns\nNone.

    \n\n
    Parameters
    \n\n
      \n
    • assembly: The assembly parameter is a string that represents the name or identifier\nof a genome assembly
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function genome_build_switch returns a string.

    \n
    \n", "signature": "(assembly: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_argument", "modulename": "howard.functions.commons", "qualname": "get_argument", "kind": "function", "doc": "

    The get_argument function retrieves information about a specific argument from a dictionary, and\ncan also set its \"required\" status.

    \n\n
    Parameters
    \n\n
      \n
    • arguments: A dictionary containing information about the arguments passed to a function or\nmethod
    • \n
    • arg: The arg parameter is a string that represents the name of the argument that you want\nto retrieve information for
    • \n
    • required: The required parameter is a boolean value that determines whether the argument is\nrequired or not. If set to True, the function will return an empty dictionary if the argument is not\nfound in the arguments dictionary. If set to False (default), the function will still return an\nempty dictionary if, defaults to False
    • \n
    • remove_infos: The remove_infos parameter is a list that contains the names of specific\ninformation that you want to remove from the argument dictionary. In the code, it is used to remove\nspecific argument information such as \"gooey\" from the arg_infos dictionary
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary containing information about a specific argument, specified by the arg\n parameter. If the argument is found in the arguments dictionary, the function returns a dictionary\n containing the information about that argument. If the argument is not found, an empty dictionary is\n returned. The required parameter is used to specify whether the argument is required or not, and\n this information is added to

    \n
    \n", "signature": "(\targuments: dict = {},\targ: str = '',\trequired: bool = False,\tremove_infos: list = ['gooey', 'extra'],\tadd_metavar: bool = False) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_argument_gooey", "modulename": "howard.functions.commons", "qualname": "get_argument_gooey", "kind": "function", "doc": "

    The function get_argument_gooey takes an argument and returns the corresponding widget and options\nfor the Gooey library in Python.

    \n\n
    Parameters
    \n\n
      \n
    • arg: The arg parameter is a string that represents the name of the argument you want to\nretrieve information for
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_argument_gooey returns two values: widget and options.

    \n
    \n", "signature": "(arguments: dict = {}, arg: str = ''):", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_argument_to_mk", "modulename": "howard.functions.commons", "qualname": "get_argument_to_mk", "kind": "function", "doc": "

    The function get_argument_to_mk generates a formatted string containing information about a\ncommand line argument, which can be output in either Markdown or HTML format.

    \n\n
    Parameters
    \n\n
      \n
    • arg: The arg parameter is a string that represents the name of the argument. It is used to\ngenerate the header and text for the argument
    • \n
    • argument: The argument parameter is a dictionary that contains information about the\nargument. It has the following keys:
    • \n
    • mode: The mode parameter is used to specify the format of the output. It can have two\npossible values: \"mk\" or \"html\". If \"mk\" is specified, the output will be formatted using Markdown\nsyntax. If \"html\" is specified, the output will be formatted using HTML syntax, defaults to mk
    • \n
    \n\n
    Returns
    \n\n
    \n

    a formatted string that provides information about a command line argument. The format of\n the string depends on the value of the mode parameter. If mode is set to \"html\", the string is\n formatted as an HTML <pre> block. Otherwise, the string is formatted as a Markdown code block. The\n string includes the argument name, metavariable, help text, required

    \n
    \n", "signature": "(arg: str, argument: dict = {}, mode: str = 'mk') -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.help_generation_from_dict", "modulename": "howard.functions.commons", "qualname": "help_generation_from_dict", "kind": "function", "doc": "

    The help_generation_from_dict function generates help documentation from a dictionary input,\nsupporting markdown and HTML output formats with specific sections like \"__help\", \"__format\",\n\"__default\", \"__examples\", \"__code\", and \"__examples_code\".

    \n\n
    Parameters
    \n\n
      \n
    • element: The element parameter in the help_generation_from_dict function is a string that\nrepresents the current element or key in the dictionary for which help documentation is being\ngenerated. It is the specific key or element within the dictionary that you want to generate help\ndocumentation for
    • \n
    • help_dict: The help_dict parameter in the help_generation_from_dict function is a\ndictionary that contains the help documentation for various elements or keys. This dictionary\nstructure allows for organizing and storing information related to each element, such as help text,\nformatting details, default values, and examples. The function processes
    • \n
    • previous: The previous parameter in the help_generation_from_dict function is used to\nkeep track of the previous elements in the hierarchy. It is a string that represents the path to the\ncurrent element being processed. This parameter helps in maintaining the correct hierarchy level\nwhen generating help documentation for nested elements in a
    • \n
    • output_type: The output_type parameter in the help_generation_from_dict function\nspecifies the type of output format that you want the generated help documentation to be in. It can\ntake two possible values: \"markdown\" or \"html\". By default, the output type is set to markdown,\ndefaults to markdown
    • \n
    • level: The level parameter in the help_generation_from_dict function is used to keep\ntrack of the depth or level of recursion in the generation process. It starts at 1 for the initial\ncall and increments by 1 for each level of recursion into sub-elements. This parameter helps in\nformatting the, defaults to 1
    • \n
    • table: The table parameter in the help_generation_from_dict function is used to store the\ntable of contents for the generated help documentation. It is a string that contains the formatted\ntable of contents with links to different sections or elements within the documentation. This table\nhelps users navigate through the documentation easily
    • \n
    • generate_table: The generate_table parameter in the help_generation_from_dict function is\na boolean flag that determines whether the function should generate a table of contents for the help\ndocumentation. When set to True, the function will include a table of contents in the output based\non the hierarchy of elements in the, defaults to False
    • \n
    • code_type: The code_type parameter in the help_generation_from_dict function specifies\nthe type of code examples that will be included in the generated help documentation. It defaults to\n\"json\", meaning that the code examples provided in the \"__examples_code\" section of the dictionary\nwill be in JSON format
    • \n
    • auto_default: The auto_default parameter in the help_generation_from_dict function is a\nboolean flag that determines whether the function should automatically populate certain sections of\nthe help documentation based on the information available in the dictionary and the element's\narguments. When set to True, the function will automatically fill in sections, defaults to True
    • \n
    • previous_sections: The previous_sections parameter in the help_generation_from_dict\nfunction is a boolean flag that determines whether the function should include previous sections in\nthe hierarchy when generating help documentation for nested elements. When set to True, the\nfunction will maintain the previous sections in the hierarchy path, helping to provide, defaults to\nFalse
    • \n
    \n\n
    Returns
    \n\n
    \n

    The help_generation_from_dict function returns the generated help documentation based on\n the input help_dict dictionary. The output is formatted based on the specified output_type\n (either \"markdown\" or \"html\") and includes sections such as \"__help\", \"__format\", \"__default\", and\n \"__examples\" if they are present in the help_dict.

    \n
    \n", "signature": "(\telement: str,\thelp_dict: dict,\tprevious: str = '',\toutput_type: str = 'markdown',\tlevel: int = 1,\ttable: str = '',\tgenerate_table: bool = False,\tcode_type: str = '',\tauto_default: bool = True,\tprevious_sections: bool = False):", "funcdef": "def"}, {"fullname": "howard.functions.commons.help_generation_from_json", "modulename": "howard.functions.commons", "qualname": "help_generation_from_json", "kind": "function", "doc": "

    The help_generation_from_json function reads a JSON file containing help information, converts it\ninto a specified output format, and returns the generated help content.

    \n\n
    Parameters
    \n\n
      \n
    • help_json_file: The help_json_file parameter is a string that should contain the file path\nto the JSON file from which help information will be extracted. This JSON file likely contains\nstructured data that will be used to generate the help content
    • \n
    • output_type: The output_type parameter in the help_generation_from_json function\nspecifies the format in which the generated help content will be output. By default, it is set to\n\"markdown\", which means the help content will be formatted using Markdown syntax. However, you can\nalso specify other output formats such, defaults to markdown
    • \n
    • title: The title parameter in the help_generation_from_json function is a string that\nrepresents the title of the help documentation that will be generated. It is used to provide a title\nfor the help content to make it more organized and informative. By default, the title is set to\n\"Help\", defaults to Help (optional)
    • \n
    • code_type: The code_type parameter in the help_generation_from_json function is used to\nspecify the type of code examples that will be included in the generated help content. This\nparameter allows you to define the format or language of the code examples to be displayed alongside\nthe help information extracted from the JSON
    • \n
    • include_toc: The include_toc parameter in the help_generation_from_json function is a\nboolean flag that determines whether a table of contents (TOC) should be included in the generated\nhelp content. If include_toc is set to True, a table of contents will be generated based,\ndefaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function help_generation_from_json returns the generated help content based on the\n information stored in the JSON file provided as input.

    \n
    \n", "signature": "(\thelp_json_file: str,\toutput_type: str = 'markdown',\ttitle='Help',\tcode_type: str = '',\tinclude_toc: bool = False):", "funcdef": "def"}, {"fullname": "howard.functions.commons.RawTextArgumentDefaultsHelpFormatter", "modulename": "howard.functions.commons", "qualname": "RawTextArgumentDefaultsHelpFormatter", "kind": "class", "doc": "

    Help message formatter which adds default values to argument help.

    \n\n

    Only the name of this class is considered a public API. All the methods\nprovided by the class are considered an implementation detail.

    \n", "bases": "argparse.ArgumentDefaultsHelpFormatter, argparse.RawTextHelpFormatter"}, {"fullname": "howard.functions.commons.help_header", "modulename": "howard.functions.commons", "qualname": "help_header", "kind": "function", "doc": "

    The help_header function generates a header for the help documentation based on the metadata\ninformation provided in the setup file.

    \n\n
    Parameters
    \n\n
      \n
    • setup: The setup parameter is a string that represents the path to a configuration file.\nThis file contains metadata about the program, such as its name, version, description, and long\ndescription content type
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function help_header returns a string that represents the header for the help\n documentation. The header includes the program name, version, authors, and description.

    \n
    \n", "signature": "(setup: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.help_generation", "modulename": "howard.functions.commons", "qualname": "help_generation", "kind": "function", "doc": "

    The help_generation function generates a parser object for command-line arguments, as well as\nmarkdown or HTML help documentation for those arguments.

    \n\n
    Parameters
    \n\n
      \n
    • arguments_dict: A dictionary containing the arguments for the function. It has three keys:
    • \n
    • parser: The parser parameter is an instance of the argparse.ArgumentParser class. It is\nused to define the command-line interface and parse the command-line arguments. If no parser is\nprovided, a new instance of argparse.ArgumentParser will be created
    • \n
    • setup: The setup parameter is a string that represents the path to a configuration file.\nThis file contains metadata about the program, such as its name, version, description, and long\ndescription content type
    • \n
    • output_type: The output_type parameter determines the format of the output. It can be one\nof the following values:, defaults to parser
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function help_generation returns different outputs based on the value of the\n output_type parameter.

    \n
    \n", "signature": "(arguments_dict: dict = {}, parser=None, output_type: str = 'parser'):", "funcdef": "def"}, {"fullname": "howard.functions.commons.format_arg_help", "modulename": "howard.functions.commons", "qualname": "format_arg_help", "kind": "function", "doc": "

    The function format_arg_help formats a help message for a function argument, including a default\nvalue if provided.

    \n\n
    Parameters
    \n\n
      \n
    • help_message: The help_message parameter is a string that contains the description or help\nmessage for a function or method argument. It provides information about the purpose or usage of the\nargument
    • \n
    • default_value: The default_value parameter in the format_arg_help function is an optional\nparameter that specifies a default value for the argument being described in the help message. If a\ndefault value is provided, it will be included in the formatted help message to indicate the default\nvalue for that argument
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function format_arg_help returns a formatted help message with a default value\n appended at the end if provided.

    \n
    \n", "signature": "(help_message: str, default_value: object = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.bed_sort", "modulename": "howard.functions.commons", "qualname": "bed_sort", "kind": "function", "doc": "

    The bed_sort function reads a tab-separated input file, sorts the data based on columns 0, 1, and\n2 in ascending order, and writes the sorted data to a tab-separated output file.

    \n\n
    Parameters
    \n\n
      \n
    • input: The input parameter is the path to the input file that contains the data to be\nsorted. This file should be in a tab-separated format
    • \n
    • output: The output parameter is a string that specifies the path and filename of the output\nfile where the sorted data will be saved
    • \n
    \n", "signature": "(input: str, output: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.full_path", "modulename": "howard.functions.commons", "qualname": "full_path", "kind": "function", "doc": "

    The function full_path takes a path string as input and returns the full expanded path.

    \n\n
    Parameters
    \n\n
      \n
    • path: The full_path function takes a string path as input and returns the full path by\nexpanding the user's home directory in the path if it is not None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function full_path is returning the expanded version of the input path using\n os.path.expanduser(path). This function expands the ~ character in the path to the user's home\n directory.

    \n
    \n", "signature": "(path: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_default_argument", "modulename": "howard.functions.commons", "qualname": "get_default_argument", "kind": "function", "doc": "

    The function get_default_argument retrieves the default value of a specified argument from a\ndictionary of arguments.

    \n\n
    Parameters
    \n\n
      \n
    • arguments_dict: The arguments_dict parameter is a dictionary that contains information\nabout arguments
    • \n
    • argument: The get_default_argument function takes in two parameters:
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function is attempting to return the default value of a specific argument from a\n dictionary of arguments. However, there is a mistake in the code. The correct key to access the\n argument's default value should be \"argument\" instead of \"arguments\". Therefore, the function will\n return the default value of the specified argument if it exists, otherwise it will return None.

    \n
    \n", "signature": "(arguments_dict: dict, argument: str):", "funcdef": "def"}, {"fullname": "howard.functions.commons.set_param", "modulename": "howard.functions.commons", "qualname": "set_param", "kind": "function", "doc": "

    The function set_param takes input arguments and adds them to a dictionary based on certain\nconditions.

    \n\n
    Parameters
    \n\n
      \n
    • param: The param parameter is a dictionary that stores configuration parameters or\nsettings. It is used to collect and store various arguments and their values based on the conditions\nspecified in the set_param function
    • \n
    • args: The args parameter in the set_param function is likely an instance of the\nargparse.Namespace class, which is typically used to store the command-line arguments parsed by\nthe argparse module in Python. It contains the values of the arguments provided by the user when\nthe script
    • \n
    • arguments_dict: The arguments_dict parameter seems to be a dictionary that likely contains\ninformation about arguments and their default values. This dictionary is used in the function\nset_param to determine whether a specific argument should be included in the param dictionary\nbased on certain conditions
    • \n
    • argument: The argument parameter in the set_param function represents the specific\nargument that you want to set in the param dictionary. It is the key that will be used to store\nthe value in the dictionary
    • \n
    • section: The section parameter in the set_param function is used to specify a section\nwithin the param dictionary where the argument value should be stored. If a section is provided,\nthe argument value will be stored under that section in the param dictionary. If no `section
    • \n
    \n\n
    Returns
    \n\n
    \n

    the updated param dictionary after setting the specified argument value based on the\n conditions provided in the function.

    \n
    \n", "signature": "(\tparam: dict,\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>,\targuments_dict: dict,\targument: str,\tsection: list = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.add_value_into_dict", "modulename": "howard.functions.commons", "qualname": "add_value_into_dict", "kind": "function", "doc": "

    The function add_value_into_dict adds a value into a dictionary tree based on the provided\nsections.

    \n\n
    Parameters
    \n\n
      \n
    • dict_tree: The dict_tree parameter is a dictionary representing a tree structure. It serves\nas the starting point for adding a value based on the provided sections
    • \n
    • sections: The sections parameter in the add_value_into_dict function represents a list of\nsections corresponding to successive keys in the dictionary. These sections are used to traverse the\ndictionary tree and determine the location where the value should be added. Each element in the\nsections list corresponds to a key in
    • \n
    • value: The value parameter in the add_value_into_dict function represents the value that\nyou want to add into the dictionary tree at the specified location determined by the sections\nlist. This value can be of any data type (e.g., int, str, list, dict, etc.)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function add_value_into_dict returns the updated dictionary tree after adding the\n value based on the given sections.

    \n
    \n", "signature": "(dict_tree: dict, sections: list = [], value=None):", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_param", "modulename": "howard.functions.commons", "qualname": "load_param", "kind": "function", "doc": "

    The function load_param takes command line arguments and returns a dictionary containing\nparameters loaded from a file or as JSON.

    \n\n
    Parameters
    \n\n
      \n
    • args: It seems like the code snippet you provided is a function named load_param that takes\nan argument args of type argparse and returns a dictionary. The function is intended to load\nparameters from a file or a string
    • \n
    \n\n
    Returns
    \n\n
    \n

    A dictionary containing the loaded parameters is being returned.

    \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_config_args", "modulename": "howard.functions.commons", "qualname": "load_config_args", "kind": "function", "doc": "

    The function load_config_args takes in arguments, extracts specific keys from them, and loads\nparameters in JSON format.

    \n\n
    Parameters
    \n\n
      \n
    • args: The load_config_args function takes in an args object as input. This args object\nseems to contain various configuration parameters that the function will use to load and return\nspecific values
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function load_config_args returns the variables arguments_dict, setup_cfg,\n config, and param.

    \n
    \n", "signature": "(args):", "funcdef": "def"}, {"fullname": "howard.functions.commons.load_args", "modulename": "howard.functions.commons", "qualname": "load_args", "kind": "function", "doc": "

    The load_args function processes arguments based on specified parameters and conditions, raising\nan error if a specified argument is not found.

    \n\n
    Parameters
    \n\n
      \n
    • param: The param parameter in the load_args function is a dictionary that stores the\narguments and their values. It is used to keep track of the arguments that have been loaded or\nprocessed during the argument parsing process
    • \n
    • args: The args parameter in the load_args function is an instance of the\nargparse.ArgumentParser class from the argparse module in Python. This object is used to parse\ncommand-line arguments and options. It contains information about the arguments passed to the script\nwhen it was executed
    • \n
    • arguments_dict: The arguments_dict parameter in the load_args function is a dictionary\nthat likely contains information about the arguments expected by the script. It may include details\nsuch as the argument names, their corresponding sections, and any additional parameters related to\neach argument. This dictionary is used within the `load_args
    • \n
    • command: The command parameter in the load_args function is a string that represents a\nspecific command or action for which arguments need to be loaded. This parameter is used to identify\nthe command-specific arguments that should be processed during argument parsing
    • \n
    • arguments_list: The arguments_list parameter in the load_args function is a dictionary\nthat contains the names of arguments that are expected to be present in the args object. This list\nis used to specify which arguments should be processed by the function load_args during the\nargument parsing process
    • \n
    • strict: The strict parameter in the load_args function is a boolean flag that determines\nwhether an error should be raised if an argument specified in the arguments_list list is not found\nin the args object. If strict is set to True, an error will be raised, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function load_args is returning a dictionary named param after processing the\n arguments based on the input parameters and conditions specified in the function.

    \n
    \n", "signature": "(\tparam: dict,\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>,\targuments_dict: dict,\tcommand: str = None,\targuments_list: dict = {},\tstrict: bool = False,\tsection_prefix: list = []) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.get_random", "modulename": "howard.functions.commons", "qualname": "get_random", "kind": "function", "doc": "

    The function get_random generates a random string of uppercase letters and digits with a default\nlength of 10.

    \n\n
    Parameters
    \n\n
      \n
    • N: The parameter N in the get_random function represents the length of the random string\nthat will be generated. By default, if no value is provided for N, it will generate a random\nstring of length 10 consisting of uppercase letters and digits, defaults to 10
    • \n
    \n\n
    Returns
    \n\n
    \n

    A random string of length N consisting of uppercase letters and digits.

    \n
    \n", "signature": "(N: int = 10) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.transcripts_file_to_df", "modulename": "howard.functions.commons", "qualname": "transcripts_file_to_df", "kind": "function", "doc": "

    This Python function reads a transcripts file into a pandas DataFrame, filtering out comment lines.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_file: The transcripts_file parameter is a string that represents the file path\nto a file containing transcript information. This function is designed to read the contents of this\nfile and convert it into a pandas DataFrame. The file is expected to be tab-separated with two\ncolumns: \"transcript\" and \"gene
    • \n
    • column_names: The column_names parameter is a list that specifies the column names expected\nin the transcripts file. By default, it is set to [\"transcript\", \"gene\"], indicating that the file\nshould have two columns named \"transcript\" and \"gene\". If the actual column names in the
    • \n
    \n\n
    Returns
    \n\n
    \n

    A pandas DataFrame containing transcript and gene information read from the specified file\n after filtering out comment lines is being returned.

    \n
    \n", "signature": "(\ttranscripts_file: str,\tcolumn_names: list = ['transcript', 'gene']) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "howard.functions.commons.identical", "modulename": "howard.functions.commons", "qualname": "identical", "kind": "function", "doc": "

    The identical function compares the contents of multiple VCF files to determine if they are\nidentical.

    \n\n
    Parameters
    \n\n
      \n
    • vcf_list: The vcf_list parameter is a list of file paths to VCF (Variant Call Format) files\nthat you want to compare for identity. The function reads the contents of these files and checks if\nthey are identical based on the specified conditions
    • \n
    • begin: The begin parameter in the identical function is used to specify a string that\nindicates the beginning of a line in the input files. If a line in the input file starts with the\nspecified begin string, it will be skipped and not included in the comparison process. By default,\ndefaults to ##
    • \n
    • line_strip: The line_strip parameter in the identical function is a boolean flag that\ndetermines whether each line read from the input files should be stripped of leading and trailing\nwhitespaces before being compared. If line_strip is set to True, each line will be stripped\nusing the `strip, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function identical is returning a boolean value. It returns True if all the lines\n in the VCF files provided in the vcf_list are identical, and False otherwise.

    \n
    \n", "signature": "(vcf_list: List[str], begin: str = '##', line_strip: bool = True) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.check_docker_image_exists", "modulename": "howard.functions.commons", "qualname": "check_docker_image_exists", "kind": "function", "doc": "

    Checks if a Docker image with a specific tag exists in the local repository.

    \n\n
    Parameters
    \n\n
      \n
    • image_with_tag: Image name with tag (e.g., \"image: version\")
    • \n
    \n\n
    Returns
    \n\n
    \n

    True if the image exists, False otherwise

    \n
    \n", "signature": "(image_with_tag: str) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.commons.params_string_to_dict", "modulename": "howard.functions.commons", "qualname": "params_string_to_dict", "kind": "function", "doc": "

    The params_string_to_dict function in Python converts a string of parameters into a dictionary\nusing specified separators and clears certain characters from the parameter values.

    \n\n
    Parameters
    \n\n
      \n
    • params: The params parameter in the params_string_to_dict function is a string of\nparameters that you want to convert into a dictionary. It contains the information you want to parse\nand organize into key-value pairs
    • \n
    • param_sep: The param_sep parameter in the params_string_to_dict function is used to\nspecify the separator that separates different parameters in the input string params. By default,\nthe param_sep is set to \":\" in the function definition. This means that the function expects the\nparameters in the input, defaults to :
    • \n
    • var_val_sep: The var_val_sep parameter in the params_string_to_dict function is used to\nspecify the separator between the variable and value in the input string params. By default, it is\nset to \"=\", which means that the function expects the format of each parameter in the params,\ndefaults to =
    • \n
    • val_clear: The val_clear parameter in the params_string_to_dict function is a dictionary\nthat contains key-value pairs used to clear specific characters from the parameter values before\nstoring them in the resulting dictionary
    • \n
    • header: The header parameter in the params_string_to_dict function is a boolean flag that\ndetermines whether the input string params has a header that should be skipped when processing the\nparameters. If header is set to True, the function will start processing parameters from the\nsecond line onwards, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function params_string_to_dict returns a dictionary containing the parameters\n extracted from the input string params.

    \n
    \n", "signature": "(\tparams: str,\tparam_sep: str = ':',\tvar_val_sep: str = '=',\tval_clear: dict = {'+': ',', ' ': ''},\theader: bool = True) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.commons.determine_value_type", "modulename": "howard.functions.commons", "qualname": "determine_value_type", "kind": "function", "doc": "

    The function determine_value_type determines the type of a given value in a string format,\nhandling lists of values separated by a specified separator and skipping specified null-like\nvalues.

    \n\n
    Parameters
    \n\n
      \n
    • value: The value parameter in the determine_value_type function is the input value\nthat you want to determine the type of. It can be a string containing one or more values\nseparated by a specified separator (default is ';')
    • \n
    • sep: The sep parameter in the determine_value_type function is used to specify the\nseparator character that is used to split the input value string into individual values. By\ndefault, the separator is set to \";\", but you can change it to a different character if needed,\ndefaults to ;
    • \n
    • skip_null: The skip_null parameter in the determine_value_type function is a list\nthat contains values that should be skipped during the type determination process. These values\nare considered as null-like or empty values and are not taken into account when determining the\ntype of the given value
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function determine_value_type returns a string indicating the type of the given\n value. The possible return values are:

    \n \n
      \n
    • \"VARCHAR\" if the value contains at least one non-numeric character
    • \n
    • \"DOUBLE\" if the value contains at least one floating-point number
    • \n
    • \"BIGINT\" if the value contains only integers
    • \n
    • None if the value is empty or does not match any
    • \n
    \n
    \n", "signature": "(value: str, sep: str = ';', skip_null: list = ['', '.']) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.determine_column_types", "modulename": "howard.functions.commons", "qualname": "determine_column_types", "kind": "function", "doc": "

    The function determine_column_types analyzes a list of values to determine the predominant\ndata type among VARCHAR, DOUBLE, and BIGINT.

    \n\n
    Parameters
    \n\n
      \n
    • values_list: It seems like you have provided the code snippet for a function that\ndetermines the type of values in a list, but you have not provided the actual values_list that\nthe function will operate on. If you provide me with the values_list, I can help you test the\nfunction and see how it determines the
    • \n
    \n\n
    Returns
    \n\n
    \n

    the type of the column based on the types of values present in the input list. It will\n return \"VARCHAR\" if the list contains any string values, \"DOUBLE\" if it contains any float\n values, \"BIGINT\" if it contains any integer values, and \"VARCHAR\" if none of the specific types\n are found.

    \n
    \n", "signature": "(values_list: list) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.detect_column_type", "modulename": "howard.functions.commons", "qualname": "detect_column_type", "kind": "function", "doc": "

    The function detect_column_type determines the type of a given column in a DataFrame as either\nDATETIME, BOOLEAN, DOUBLE, or VARCHAR.

    \n\n
    Parameters
    \n\n
      \n
    • column: The function detect_column_type takes a column as input and determines its data\ntype based on certain conditions. The conditions are as follows:
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function detect_column_type returns a string indicating the type of data in the input\n column. The possible return values are \"DATETIME\", \"BOOLEAN\", \"DOUBLE\", or \"VARCHAR\" based on the\n conditions checked in the function.

    \n
    \n", "signature": "(column) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.determine_column_number", "modulename": "howard.functions.commons", "qualname": "determine_column_number", "kind": "function", "doc": "

    \n", "signature": "(values_list: list) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.clean_annotation_field", "modulename": "howard.functions.commons", "qualname": "clean_annotation_field", "kind": "function", "doc": "

    The clean_annotation_field function removes characters from a string that are not alphanumeric or\nin a specified list.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string that represents the input text that you want to clean.\nIt typically contains annotations or other text that you want to process
    • \n
    • char_allowed: The char_allowed parameter is a list that contains characters that are\nallowed to remain in the name string after cleaning. Any character in the name string that is\nnot alphanumeric and not in the char_allowed list will be removed during the cleaning process
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function clean_annotation_field returns a cleaned version of the name string, where\n only alphanumeric characters and characters from the char_allowed list are kept.

    \n
    \n", "signature": "(name: str = '', char_allowed: list = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.docker_automount", "modulename": "howard.functions.commons", "qualname": "docker_automount", "kind": "function", "doc": "

    Add needed volume to the tool container, check first if we are already inside one otherwise return empty string

    \n\n
    Parameters
    \n\n
      \n
    • containerid: for other linux distribution catch container mount from container ID
    • \n
    \n\n
    Returns
    \n\n
    \n

    string containing volume to add

    \n
    \n", "signature": "() -> str:", "funcdef": "def"}, {"fullname": "howard.functions.commons.sort_contigs", "modulename": "howard.functions.commons", "qualname": "sort_contigs", "kind": "function", "doc": "

    Function that sort contigs in VCF header

    \n\n

    Args:\n vcf_reader (vcf): VCF object from VCF package

    \n\n

    Returns:\n vcf:VCF object from VCF package

    \n", "signature": "(vcf_reader):", "funcdef": "def"}, {"fullname": "howard.functions.databases", "modulename": "howard.functions.databases", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.databases.generate_databases_param", "modulename": "howard.functions.databases", "qualname": "generate_databases_param", "kind": "function", "doc": "

    This function generates database parameters based on specified arguments and assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter in the generate_databases_param function is expected to be an\ninstance of the argparse module, which is commonly used for parsing command-line arguments. This\nparameter is used to retrieve various arguments and options provided by the user when running the\nscript or program
    • \n
    • assemblies: The assemblies parameter is a list containing the assemblies for which\ndatabases will be generated. The function generate_databases_param takes various arguments using\nthe argparse module and generates database parameters based on these inputs. If the\ngenerate_param argument is provided and set to True,
    • \n
    \n\n
    Returns
    \n\n
    \n

    None

    \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>,\tassemblies: list = []):", "funcdef": "def"}, {"fullname": "howard.functions.databases.query_and_concatenate_columns", "modulename": "howard.functions.databases", "qualname": "query_and_concatenate_columns", "kind": "function", "doc": "

    This function performs an SQL query on a large Parquet file and concatenates multiple columns (if not empty),\nincluding the column name in the concatenation.

    \n\n
    Parameters
    \n\n
      \n
    • parquet_file: The path to the Parquet file
    • \n
    • output_file: The path to the output file where the concatenated data will be written
    • \n
    • columns: The list of columns to concatenate
    • \n
    \n", "signature": "(parquet_file: str, output_file: str, columns: list):", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_infos", "modulename": "howard.functions.databases", "qualname": "databases_infos", "kind": "function", "doc": "

    The databases_infos function scans database folders and retrieves information about the databases\nfound, including their folder, release, assembly, subdatabase, format, header, and parameters.

    \n\n
    Parameters
    \n\n
      \n
    • database_folders: A list of folders where the databases are located
    • \n
    • database_folder_releases: A list of specific releases of the database folders to include in\nthe search. If None, all releases will be included
    • \n
    • assembly: The assembly parameter is a string that specifies the assembly version of the\ndatabases to be searched. It is used to filter the databases based on their assembly version. The\ndefault value is \"hg19\", defaults to hg19
    • \n
    • database_formats: The database_formats parameter is a list that specifies the formats of\nthe databases to include in the results. If this parameter is not provided or is set to None, all\ndatabase formats will be included
    • \n
    • config: The config parameter is a dictionary that contains configuration settings for the\nfunction. It has the following structure:
    • \n
    \n\n
    Returns
    \n\n
    \n

    The databases_infos function returns a dictionary containing information about the\n databases found in the specified database folders. The keys of the dictionary are the paths to the\n database files, and the values are dictionaries containing the following information: folder,\n release, assembly, subdatabase, format, header, and parameters.

    \n
    \n", "signature": "(\tdatabase_folders: list = [],\tdatabase_folder_releases: list = ['current'],\tassembly: str = 'hg19',\tdatabase_formats: list = None,\tconfig: dict = {}) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_param", "modulename": "howard.functions.databases", "qualname": "databases_param", "kind": "function", "doc": "

    The databases_param function takes in a dictionary of database information, an optional output\nfile path, and a boolean flag for bcftools preference, and returns a dictionary containing the\nparameters for parquet and bcftools annotations.

    \n\n
    Parameters
    \n\n
      \n
    • databases_infos_dict: A dictionary containing information about databases. Each key in the\ndictionary represents the name of a database, and the corresponding value is another dictionary\ncontaining information about the database, such as its format and parameters
    • \n
    • output: The output parameter is a string that specifies the path and filename of the output\nfile where the generated JSON object will be written. If this parameter is not provided or is set to\nNone, the JSON object will not be written to a file
    • \n
    • output_description: The output_description parameter is a string that specifies the path\nand filename of the output file where the description of the databases will be written. If this\nparameter is not provided or is set to None, the description will not be written to a file
    • \n
    • bcftools_preference: The bcftools_preference parameter is a boolean flag that determines\nwhether to prioritize databases in the BCFTOOLS format. If bcftools_preference is set to True,\ndatabases in the BCFTOOLS format will be given priority over other formats. If `bcftools, defaults\nto False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_param returns a dictionary object named \"param_stats_show\".

    \n
    \n", "signature": "(\tdatabases_infos_dict: dict,\toutput: str = None,\toutput_description: str = None,\tbcftools_preference: bool = False) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_annovar", "modulename": "howard.functions.databases", "qualname": "databases_download_annovar", "kind": "function", "doc": "

    This function downloads and extracts Annovar databases for specified assemblies and files.

    \n\n
    Parameters
    \n\n
      \n
    • folder: The folder where the Annovar databases will be downloaded to
    • \n
    • files: The files parameter is a list of specific Annovar database files to download. If not\nprovided, only the mandatory files will be downloaded. If set to \"ALL\", all available files will be\ndownloaded
    • \n
    • assemblies: A list of genome assemblies for which Annovar databases will be downloaded.\nDefault is [\"hg19\"]
    • \n
    • annovar_url: The URL where Annovar databases can be downloaded from, defaults to\nhttp://www.openbioinformatics.org/annovar/download
    • \n
    • threads: The \"threads\" parameter specifies the number of threads (parallel processes) to use\nfor download and extract/uncompress files. Default: 1
    • \n
    \n", "signature": "(\tfolder: str = None,\tfiles: list = None,\tassemblies: list = ['hg19'],\tannovar_url: str = 'http://www.openbioinformatics.org/annovar/download',\tthreads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_snpeff", "modulename": "howard.functions.databases", "qualname": "databases_download_snpeff", "kind": "function", "doc": "

    The databases_download_snpeff function downloads and extracts snpEff databases for specified\ngenome assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • folder: The folder parameter is a string that specifies the folder where the snpEff\ndatabases will be downloaded and stored. If the folder does not exist, it will be created
    • \n
    • assemblies: The assemblies parameter is a list of genome assemblies for which the snpEff\ndatabases need to be downloaded. It specifies the genome assemblies for which you want to download\nthe snpEff databases. For example, if you want to download the snpEff databases for the human genome\nassembly hg
    • \n
    • config: The config parameter is a dictionary that contains information about the tools and\ntheir configurations. It is used to retrieve the path to the Java binary and the path to the snpEff\nbinary
    • \n
    • threads: The threads parameter specifies the number of threads to be used for downloading\nthe snpEff databases. It determines the parallelism of the download process, allowing multiple files\nto be downloaded simultaneously, defaults to 1
    • \n
    \n", "signature": "(\tfolder: str = None,\tassemblies: list = ['hg19'],\tconfig: dict = {},\tthreads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_genomes", "modulename": "howard.functions.databases", "qualname": "databases_download_genomes", "kind": "function", "doc": "

    This function downloads genome assemblies using genomepy package with options to specify genome\nfolder, provider, contig regex, and number of threads.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: a list of genome assembly names to download
    • \n
    • genomes_folder: The folder where the downloaded genome files will be saved. If no folder is\nspecified, the default folder will be used
    • \n
    • provider: The provider parameter specifies the source of the genome data. In this case, the\ndefault provider is set to \"UCSC\", which refers to the University of California, Santa Cruz Genome\nBrowser. Other possible providers could include NCBI or Ensembl, defaults to UCSC
    • \n
    • contig_regex: The contig_regex parameter is a regular expression used to filter the contigs\n(chromosomes or scaffolds) to be downloaded for a given genome assembly. It allows users to download\nonly a subset of the available contigs, based on their names or other characteristics. If\ncontig_regex is not specified
    • \n
    • threads: The \"threads\" parameter specifies the number of threads (parallel processes) to use\nfor downloading the genomes. This can speed up the process if the computer has multiple cores or\nprocessors. The default value is 1, meaning that the download will be done using a single thread,\ndefaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    None is being returned.

    \n
    \n", "signature": "(\tassemblies: list,\tgenomes_folder: str = '/Users/lebechea/howard/databases/genomes/current',\tprovider: str = 'UCSC',\tcontig_regex: str = None,\tthreads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_refseq", "modulename": "howard.functions.databases", "qualname": "databases_download_refseq", "kind": "function", "doc": "

    The databases_download_refseq function downloads RefSeq files for a list of assemblies and returns\na dictionary of installed RefSeq files for each assembly.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of assemblies for which the RefSeq files need to be downloaded. Each\nassembly is represented as a string
    • \n
    • refseq_folder: The refseq_folder parameter is a string that specifies the folder where the\nRefSeq files will be downloaded and stored. If this parameter is not provided, a default folder will\nbe used
    • \n
    • refseq_url: The refseq_url parameter is a string that represents the URL where the RefSeq\nfiles can be downloaded from
    • \n
    • refseq_prefix: The refseq_prefix parameter is a string that specifies the prefix for the\ndownloaded RefSeq files. By default, it is set to \"ncbiRefSeq\". This prefix is used to identify the\nRefSeq files for each assembly. For example, if the prefix is set to \"ncbi, defaults to ncbiRefSeq
    • \n
    • refseq_files: The refseq_files parameter is a list of filenames that need to be downloaded\nfor each assembly. The default value is [\"ncbiRefSeq.txt\", \"ncbiRefSeqLink.txt\"], but you can\nprovide your own list of filenames if needed
    • \n
    • refseq_format_file: The refseq_format_file parameter is a string that specifies the\nfilename of the RefSeq file that needs to be formatted. This file will be used as input for the\ndatabases_format_refseq function. By default, the value is set to \"ncbiRefSeq.txt\", defaults to\nncbiRefSeq.txt
    • \n
    • refseq_format_file_output: The refseq_format_file_output parameter is a string that\nspecifies the output file path for the formatted RefSeq file. This file will be generated by the\ndatabases_format_refseq function and will contain the formatted RefSeq data. If this parameter is\nnot provided, the formatted RefSeq
    • \n
    • include_utr_5: A boolean parameter that specifies whether to include the 5' untranslated\nregion (UTR) in the downloaded RefSeq files. If set to True, the 5' UTR will be included. If set to\nFalse, the 5' UTR will be excluded, defaults to True
    • \n
    • include_utr_3: The include_utr_3 parameter is a boolean value that specifies whether to\ninclude the 3' untranslated region (UTR) in the downloaded RefSeq files. If set to True, the 3'\nUTR will be included. If set to False, the 3, defaults to True
    • \n
    • include_chrM: The include_chrM parameter is a boolean value that determines whether to\ninclude the mitochondrial chromosome (chrM) in the downloaded RefSeq files. If set to True, the chrM\nwill be included; if set to False, it will be excluded, defaults to True
    • \n
    • include_non_canonical_chr: The include_non_canonical_chr parameter is a boolean value that\ndetermines whether or not to include non-canonical chromosomes in the downloaded RefSeq files. If\nset to True, non-canonical chromosomes will be included. If set to False, non-canonical\nchromosomes will be excluded, defaults to True
    • \n
    • include_non_coding_transcripts: The include_non_coding_transcripts parameter is a boolean\nflag that determines whether non-coding transcripts should be included in the downloaded RefSeq\nfiles. If set to True, non-coding transcripts will be included. If set to False, non-coding\ntranscripts will be excluded, defaults to True
    • \n
    • include_transcript_ver: The include_transcript_ver parameter is a boolean value that\ndetermines whether to include the transcript version in the downloaded RefSeq files. If set to\nTrue, the transcript version will be included. If set to False, the transcript version will be\nexcluded, defaults to True
    • \n
    • threads: The threads parameter specifies the number of threads to use for downloading and\nextracting the RefSeq files. It determines the level of parallelism in the download and extraction\nprocess. By default, it is set to 1, which means that the download and extraction will be performed\nsequentially. If you want, defaults to 1
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_download_refseq returns a dictionary installed_refseq which\n contains information about the downloaded RefSeq files for each assembly. The keys of the dictionary\n are the assembly names, and the values are lists of the installed RefSeq files for each assembly.

    \n
    \n", "signature": "(\tassemblies: list,\trefseq_folder: str = None,\trefseq_url: str = None,\trefseq_prefix: str = 'ncbiRefSeq',\trefseq_files: List = ['ncbiRefSeq.txt', 'ncbiRefSeqLink.txt'],\trefseq_format_file: str = 'ncbiRefSeq.txt',\trefseq_format_file_output: str = None,\tinclude_utr_5: bool = True,\tinclude_utr_3: bool = True,\tinclude_chrM: bool = True,\tinclude_non_canonical_chr: bool = True,\tinclude_non_coding_transcripts: bool = True,\tinclude_transcript_ver: bool = True,\tthreads: int = 1,\tmemory: int = 1) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_format_refseq", "modulename": "howard.functions.databases", "qualname": "databases_format_refseq", "kind": "function", "doc": "

    The databases_format_refseq function takes a RefSeq file as input, formats it according to\nspecified criteria, and outputs the formatted file.

    \n\n
    Parameters
    \n\n
      \n
    • refseq_file: The refseq_file parameter is a string that represents the path to the input\nRefSeq file. This file contains information about gene annotations, including chromosome, start and\nend positions, strand, and other details
    • \n
    • output_file: The output_file parameter is a string that represents the name of the file\nwhere the formatted data will be written
    • \n
    • include_utr_5: The include_utr_5 parameter is a boolean that determines whether to include\nthe 5' UTR (untranslated region) in the output file. If set to True, the 5' UTR will be included.\nIf set to False, the 5' U, defaults to True
    • \n
    • include_utr_3: A boolean parameter that determines whether to include the 3' UTR\n(untranslated region) in the output. If set to True, the 3' UTR will be included. If set to False,\nthe 3' UTR will be excluded, defaults to True
    • \n
    • include_chrM: The include_chrM parameter is a boolean that determines whether to include\ntranscripts from the mitochondrial chromosome (chrM or chrMT) in the output file. If set to True,\ntranscripts from the mitochondrial chromosome will be included. If set to False, transcripts from\nthe mitochondrial chromosome will be excluded, defaults to True
    • \n
    • include_non_canonical_chr: The parameter include_non_canonical_chr determines whether or\nnot to include non-canonical chromosomes in the output. If set to True, non-canonical chromosomes\nwill be included. If set to False, non-canonical chromosomes will be excluded, defaults to True
    • \n
    • include_non_coding_transcripts: The parameter include_non_coding_transcripts determines\nwhether non-coding transcripts should be included in the output file. If set to True, non-coding\ntranscripts will be included. If set to False, non-coding transcripts will be excluded, defaults\nto True
    • \n
    • include_transcript_ver: The include_transcript_ver parameter determines whether to include\nthe transcript version in the output file. If set to True, the transcript version will be included\nin the output file. If set to False, the transcript version will be removed from the output file.\nThe default value is `True, defaults to True
    • \n
    • sort: The sort parameter determines whether to sort the output file in ascending order\nbased on the chromosome and start position. If set to True, the file will be sorted. If set to\nFalse, the file will not be sorted. The default value is False, defaults to False
    • \n
    • header: The header parameter is a boolean that determines whether to include a header line\nin the output file. If set to True, a header line will be included. If set to False, no header\nline will be included. The default value is False, defaults to False
    • \n
    • header_first_line: The header_first_line parameter is a boolean that determines whether to\ninclude the header line as the first line in the output file. If set to True, the header line will\nbe included as the first line. If set to False, the header line will not be included as the first,\ndefaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_format_refseq returns the path of the output file.

    \n
    \n", "signature": "(\trefseq_file: str,\toutput_file: str,\tinclude_utr_5: bool = True,\tinclude_utr_3: bool = True,\tinclude_chrM: bool = True,\tinclude_non_canonical_chr: bool = True,\tinclude_non_coding_transcripts: bool = True,\tinclude_transcript_ver: bool = True,\tsort: bool = False,\theader: bool = False,\theader_first_line: bool = True) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_dbnsfp", "modulename": "howard.functions.databases", "qualname": "databases_download_dbnsfp", "kind": "function", "doc": "

    The databases_download_dbnsfp function is used to download and process dbNSFP databases for\nspecified genome assemblies, generating Parquet and VCF files based on the provided configurations.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of genome assemblies for which to download and process dbNSFP data. Each\nassembly should be specified as a string
    • \n
    • dbnsfp_folder: The dbnsfp_folder parameter is a string that specifies the folder where the\ndbNSFP database files are located. If this parameter is not provided, the function will attempt to\ndownload the dbNSFP database files from the dbnsfp_url parameter
    • \n
    • dbnsfp_url: The dbnsfp_url parameter represents the URL from which the dbNSFP database\nfiles can be downloaded. This URL is used by the function to fetch the necessary database files for\nprocessing
    • \n
    • dbnsfp_release: The dbnsfp_release parameter specifies the version of the dbNSFP database\nto be used. The default value is \"4.4a\", but you can specify a different version if needed, defaults\nto 4.4a
    • \n
    • threads: The threads parameter specifies the number of threads to use for parallel\nprocessing. Increasing the number of threads can potentially speed up the execution time of the\nfunction, especially if there are multiple cores available on the machine. It determines how many\ntasks can be executed simultaneously
    • \n
    • memory: The memory parameter specifies the amount of maximum memory (in gigabytes) to use\nfor sorting. It is used in the context of processing and sorting data efficiently. The default value\nfor this parameter is set to 1, meaning that 1 gigabyte of memory will be allocated for sorting\noperations, defaults to 1
    • \n
    • parquet_size: The parquet_size parameter specifies the maximum size (in megabytes) of data\nfiles in the Parquet folder. It determines the size at which the Parquet files will be split or\ngenerated. The value should be an integer representing the size in megabytes, defaults to 100
    • \n
    • generate_parquet_file: The generate_parquet_file parameter in the\ndatabases_download_dbnsfp function is a boolean flag that indicates whether to generate a Parquet\nfile or not. If set to True, the function will create Parquet files based on the specified\nparameters and data. If set to `, defaults to False
    • \n
    • generate_sub_databases: The generate_sub_databases parameter in the\ndatabases_download_dbnsfp function determines whether to generate sub-databases based on the\nassemblies provided. If set to True, the function will create sub-databases based on the specified\ngenome assemblies. If set to False, the function, defaults to False
    • \n
    • generate_vcf_file: The generate_vcf_file parameter in the databases_download_dbnsfp\nfunction is a boolean flag that indicates whether to generate a VCF file based on the specified\nparameters and data. If set to True, the function will generate a VCF file. If set to `False,\ndefaults to False
    • \n
    • not_generate_files_all: The not_generate_files_all parameter in the\ndatabases_download_dbnsfp function is a boolean flag that indicates whether to skip generating\ndatabase Parquet/VCF files for the entire database. If set to True, the function will not generate\nfiles for the entire database. If set to, defaults to False
    • \n
    • genomes_folder: The genomes_folder parameter specifies the folder where the genome files\nare located. It is a string that represents the path to the folder containing genome assemblies
    • \n
    • add_info: The add_info parameter in the databases_download_dbnsfp function is a boolean\nflag that determines whether to include an \"INFO\" column in the Parquet folder/file. If set to\nTrue, the function will add an INFO column to the generated Parquet files. This INFO, defaults to\nFalse
    • \n
    • only_info: The only_info parameter in the databases_download_dbnsfp function is a boolean\nflag that, when set to True, indicates that only the \"INFO\" column should be included in the\noutput. This parameter is used to control whether to include only the \"INFO\" column in, defaults to\nFalse
    • \n
    • row_group_size: The row_group_size parameter specifies the row group size to generate the\nParquet folder and file. It is used to control the size of row groups in the Parquet file. This\nparameter affects the organization of data within the Parquet file and can impact performance and\nmemory usage during processing. The, defaults to 100000
    • \n
    • uniquify: The uniquify parameter in the databases_download_dbnsfp function is a boolean\nflag that determines whether to generate unique values for each annotation in the Parquet file. When\nset to True, the function will ensure that each annotation column contains only unique values.\nThis can be, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_download_dbnsfp returns a boolean value indicating whether the\n process of downloading and processing dbNSFP databases for specified genome assemblies was\n successful or not.

    \n
    \n", "signature": "(\tassemblies: list,\tdbnsfp_folder: str = None,\tdbnsfp_url: str = None,\tdbnsfp_release: str = '4.4a',\tthreads: int = None,\tmemory: int = 1,\tparquet_size: int = 100,\tgenerate_parquet_file: bool = False,\tgenerate_sub_databases: bool = False,\tgenerate_vcf_file: bool = False,\tnot_generate_files_all: bool = False,\tgenomes_folder: str = None,\tadd_info: bool = False,\tonly_info: bool = False,\trow_group_size: int = 100000,\tuniquify: bool = False) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_alphamissense", "modulename": "howard.functions.databases", "qualname": "databases_download_alphamissense", "kind": "function", "doc": "

    The databases_download_alphamissense function downloads and converts AlphaMissense databases for a\nlist of assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: assemblies is a list of assemblies for which the AlphaMissense database needs\nto be downloaded. Each assembly represents a specific genome or genetic sequence
    • \n
    • alphamissense_folder: The alphamissense_folder parameter is a string that specifies the\nfolder where the AlphaMissense files will be downloaded and stored. It is set to\nDEFAULT_ANNOTATIONS_FOLDER by default, which is likely a predefined constant or variable in your\ncode
    • \n
    • alphamissense_url: The alphamissense_url parameter is a string that specifies the URL where\nthe AlphaMissense files are located. It is used to construct the download URL for each assembly's\nAlphaMissense file
    • \n
    • threads: The threads parameter is an optional parameter that specifies the number of\nthreads to use for the conversion process. It determines the level of parallelism when converting\nthe AlphaMissense TSV file to the Parquet format. If not specified, the default value will be used
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function databases_download_alphamissense returns a boolean value True.

    \n
    \n", "signature": "(\tassemblies: list,\talphamissense_folder: str = '/Users/lebechea/howard/databases/annotations/current',\talphamissense_url: str = 'https://storage.googleapis.com/dm_alphamissense',\tthreads: int = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_exomiser", "modulename": "howard.functions.databases", "qualname": "databases_download_exomiser", "kind": "function", "doc": "

    The databases_download_exomiser function downloads and sets up the Exomiser database for the\nspecified assemblies.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of assemblies for which to download Exomiser databases. Each assembly is a\nstring representing a genome build, such as \"GRCh37\" or \"GRCh38\"
    • \n
    • exomiser_folder: The exomiser_folder parameter is a string that specifies the folder where\nthe Exomiser databases will be downloaded and stored. If the folder does not exist, it will be\ncreated
    • \n
    • exomiser_application_properties: The exomiser_application_properties parameter is a string\nrepresenting the path to the Exomiser application properties file. This file contains configuration\nsettings for the Exomiser tool. If this parameter is not provided, the function will attempt to\nlocate the application properties file automatically based on the Exomiser
    • \n
    • exomiser_url: The exomiser_url parameter is the URL where the Exomiser database files can\nbe downloaded from. It is used to construct the download URLs for the phenotype and assembly files
    • \n
    • exomiser_release: The exomiser_release parameter is used to specify the version of the\nExomiser data to download. If it is set to \"default\", \"auto\", or \"config\", the function will attempt\nto retrieve the version from the exomiser.application.properties file. If it is
    • \n
    • exomiser_phenotype_release: The exomiser_phenotype_release parameter is used to specify the\nrelease version of the Exomiser phenotype database. If not provided, it will default to the value\nspecified in the application.properties file or the latest available release
    • \n
    • exomiser_remm_release: The exomiser_remm_release parameter is used to specify the version\nof the ReMM (Regulatory Mendelian Mutation) database to download. If the value is set to \"default\",\n\"auto\", or \"config\", it will try to retrieve the version from the application.properties
    • \n
    • exomiser_remm_url: The exomiser_remm_url parameter is the URL where the ReMM (Regulatory\nMendelian Mutation) database can be downloaded from. It is used in the function to construct the\ndownload URL for the ReMM database files, defaults to https://kircherlab.bihealth.org/download/ReMM
    • \n
    • exomiser_cadd_release: The exomiser_cadd_release parameter is used to specify the version\nof the CADD (Combined Annotation Dependent Depletion) database to download. If the value is set to\n\"default\", \"auto\", or \"config\", it will try to retrieve the version from the `exom
    • \n
    • exomiser_cadd_url: The exomiser_cadd_url parameter is the URL where the CADD (Combined\nAnnotation Dependent Depletion) database files can be downloaded from. It is used to construct the\ndownload URLs for the CADD database files, defaults to https://kircherlab.bihealth.org/download/CADD
    • \n
    • exomiser_cadd_url_snv_file: The parameter exomiser_cadd_url_snv_file is the name of the\nfile containing the SNV (Single Nucleotide Variant) data for the CADD (Combined Annotation Dependent\nDepletion) database, defaults to whole_genome_SNVs.tsv.gz
    • \n
    • exomiser_cadd_url_indel_file: The parameter exomiser_cadd_url_indel_file is the name of the\nINDEL file that will be downloaded from the CADD database, defaults to InDels.tsv.gz
    • \n
    • threads: The threads parameter specifies the number of threads to use for parallel\nprocessing. It determines how many tasks can be executed simultaneously. Increasing the number of\nthreads can potentially speed up the execution time of the function, especially if there are\nmultiple cores available on the machine
    • \n
    \n", "signature": "(\tassemblies: list,\texomiser_folder: str = '/Users/lebechea/howard/databases/exomiser/current',\texomiser_application_properties: str = None,\texomiser_url: str = 'http://data.monarchinitiative.org/exomiser',\texomiser_release: str = None,\texomiser_phenotype_release: str = None,\texomiser_remm_release: str = None,\texomiser_remm_url: str = 'https://kircherlab.bihealth.org/download/ReMM',\texomiser_cadd_release: str = None,\texomiser_cadd_url: str = 'https://kircherlab.bihealth.org/download/CADD',\texomiser_cadd_url_snv_file: str = 'whole_genome_SNVs.tsv.gz',\texomiser_cadd_url_indel_file: str = 'InDels.tsv.gz',\tthreads: int = 1) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_dbsnp", "modulename": "howard.functions.databases", "qualname": "databases_download_dbsnp", "kind": "function", "doc": "

    The function databases_download_dbsnp downloads dbSNP files, generates VCF files, and converts\nthem to Parquet format.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of genome assemblies for which to download dbSNP data
    • \n
    • dbsnp_folder: The folder where the dbSNP files will be downloaded and stored
    • \n
    • dbsnp_releases: List of releases to download. Default: [\"b156\"]
    • \n
    • dbsnp_release_default: Default release to link in default folder. Default: first release in dbsnp_releases
    • \n
    • dbsnp_url: The dbsnp_url parameter is a string that represents the base URL where the dbSNP\nfiles are located. This URL is used to construct the full URL for downloading the dbSNP files
    • \n
    • dbsnp_url_files: The dbsnp_url_files parameter is a dictionary that maps assembly names to\nspecific dbSNP URL files. It allows you to provide custom dbSNP URL files for specific assemblies\ninstead of using the default file naming convention
    • \n
    • dbsnp_url_files_prefix: The dbsnp_url_files_prefix parameter is a string that represents the\nprefix of the dbSNP file name for a specific assembly. It is used to construct the full URL of the\ndbSNP file to be downloaded. By default, the value is set to \"GCF_000001405\"
    • \n
    • dbsnp_assemblies_map: The dbsnp_assemblies_map parameter is a dictionary that maps assembly\nnames to their corresponding dbSNP versions. It is used to construct the dbSNP file name based on\nthe assembly name. For example, if the assembly is \"hg19\", the corresponding dbSNP version is \"
    • \n
    • genomes_folder: The genomes_folder parameter is a string that specifies the folder where the\ngenome index files are located. These index files are used for generating the VCF file from the\ndownloaded dbSNP file
    • \n
    • threads: The threads parameter specifies the number of threads to use for downloading and\nprocessing the dbSNP files, defaults to 1
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    • dbsnp_vcf: A boolean flag indicating whether to generate a VCF file from the downloaded\ndbSNP data. If set to True, the function will generate a VCF file. If set to False, the function\nwill not generate a VCF file, defaults to False
    • \n
    • dbsnpparquet: A boolean flag indicating whether to generate a Parquet file from the\ndownloaded dbSNP data. If set to True, a Parquet file will be generated; if set to False, no Parquet\nfile will be generated, defaults to False
    • \n
    \n", "signature": "(\tassemblies: list,\tdbsnp_folder: str = '/Users/lebechea/howard/databases/exomiser/dbsnp',\tdbsnp_releases: list = ['b156'],\tdbsnp_release_default: str = None,\tdbsnp_url: str = 'https://ftp.ncbi.nih.gov/snp/archive',\tdbsnp_url_files: dict = None,\tdbsnp_url_files_prefix: str = 'GCF_000001405',\tdbsnp_assemblies_map: dict = {'hg19': '25', 'hg38': '40'},\tgenomes_folder: str = '/Users/lebechea/howard/databases/genomes/current',\tthreads: int = 1,\tmemory: int = 1,\tdbsnp_vcf: bool = False,\tdbsnp_parquet: bool = False,\tdbsnp_parquet_explode_infos: bool = True) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.databases.databases_download_hgmd", "modulename": "howard.functions.databases", "qualname": "databases_download_hgmd", "kind": "function", "doc": "

    The databases_download_hgmd function converts an HGMD database file into VCF, Parquet, and TSV\nformats.

    \n\n
    Parameters
    \n\n
      \n
    • assemblies: A list of assemblies for which the HGMD database should be downloaded and\nconverted. Only one assembly can be specified
    • \n
    • hgmd_file: The hgmd_file parameter is a string that represents the path to the HGMD\ndatabase file in VCF format. This file contains the variants and their associated information
    • \n
    • hgmd_folder: The hgmd_folder parameter is a string that represents the path to the folder\nwhere the HGMD database files will be stored. If no value is provided, it will use the\nDEFAULT_ANNOTATIONS_FOLDER constant as the default value
    • \n
    • output_basename: The output_basename parameter is a string that specifies the base name for\nthe output files. If not provided, it will be set as the base name of the input HGMD file without\nthe assembly information
    • \n
    • threads: The threads parameter specifies the number of threads to use for processing the\nHGMD database. It determines the level of parallelism and can help speed up the conversion process
    • \n
    • memory: The memory parameter specifies the amount of max memory (in Gb) to use for sorting.\ndefaults to 1
    • \n
    • genomes_folder: The genomes_folder parameter is a string that specifies the folder where\nthe genome files are located. If this parameter is not provided, it will default to a constant value\nDEFAULT_GENOME_FOLDER
    • \n
    • to_parquet: The to_parquet parameter is a boolean value that specifies whether the HGMD\ndatabase should be converted to the Parquet format or not. If set to True, the database will be\nconverted to Parquet format. If set to False, the conversion will be skipped, defaults to True
    • \n
    • to_tsv: The to_tsv parameter is a boolean value that specifies whether the HGMD database\nshould be converted to TSV format or not. If set to True, the function will generate a TSV file\nfrom the HGMD database. If set to False, the TSV conversion will be, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the HGMD database conversion was successful or not.

    \n
    \n", "signature": "(\tassemblies: list,\thgmd_file: str,\thgmd_folder: str = '/Users/lebechea/howard/databases/annotations/current',\toutput_basename: str = None,\tthreads: int = None,\tmemory: int = 1,\tgenomes_folder: str = None,\tto_parquet: bool = True,\tto_tsv: bool = True) -> bool:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar", "modulename": "howard.functions.from_annovar", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.from_annovar.TYPES", "modulename": "howard.functions.from_annovar", "qualname": "TYPES", "kind": "variable", "doc": "

    \n", "default_value": "{'int': 'Integer', 'int64': 'Integer', 'float': 'Float', 'float64': 'Float', 'object': 'String'}"}, {"fullname": "howard.functions.from_annovar.from_annovar", "modulename": "howard.functions.from_annovar", "qualname": "from_annovar", "kind": "function", "doc": "

    This function converts an Annovar database to a VCF and Parquet format.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object with several attributes representing the input parameters for the\nfunction. These attributes include:
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar.annovar_to_vcf", "modulename": "howard.functions.from_annovar", "qualname": "annovar_to_vcf", "kind": "function", "doc": "

    This function converts an ANNOVAR file to a VCF file and optionally to a Parquet file, with various\noptions for annotations, headers, databases, and memory usage.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path to the input file in ANNOVAR format that needs to be converted to VCF\nformat
    • \n
    • output_file: The name of the output VCF file that will be generated by the function
    • \n
    • output_file_parquet: output_file_parquet is an optional parameter that specifies the name of\nthe output file in Parquet format. If this parameter is not provided, the output will not be saved\nin Parquet format
    • \n
    • annotations: This parameter is used to specify the location of the ANNOVAR annotation\ndatabase files. If not provided, ANNOVAR will use the default location
    • \n
    • header_file: The path to a file containing the header information for the VCF output. This\ncan be used to customize the output format of the VCF file. If not provided, a default header will\nbe used
    • \n
    • database_name: The name of the ANNOVAR database used for annotation
    • \n
    • bcftools: The path to the bcftools executable, defaults to bcftools
    • \n
    • genome: The genome parameter specifies the reference genome file to be used for the\nconversion from annovar format to VCF format, defaults to hg19.fa
    • \n
    • threads: The number of threads to use for processing. This can speed up the process if your\ncomputer has multiple cores
    • \n
    • maxmem: The maximum amount of memory that can be used by the program. It is usually specified\nin units of bytes, kilobytes, megabytes, or gigabytes. For example, \"2G\" means 2 gigabytes of memory
    • \n
    • remove_annotations: remove_annotations is a list of annotations to be removed from the\noutput VCF file. These annotations will not be included in the final VCF file
    • \n
    • reduce_memory: A boolean parameter that determines whether to reduce memory usage during the\nconversion process. If set to True, the function will attempt to reduce memory usage by using a more\nmemory-efficient algorithm, but this may result in slower performance. If set to False, the function\nwill use a faster algorithm that may consume more, defaults to False
    • \n
    • multivariant: A boolean parameter that determines if input file contains multiple annotations\nfor each variant (position ref alt). If set to False, the function will attempt to reduce memory usage\na specific query without 'group by', for a more memory-efficient algorithm. If set to True, the function\nwill use a query using 'group by', which may consume more memory. I set to None, the function will\nauto-detemine the parameter value with a sample of variants. Defaults to None (auto)
    • \n
    \n", "signature": "(\tinput_file: str,\toutput_file: str,\toutput_file_parquet: str = None,\tannotations: str = None,\theader_file: str = None,\tdatabase_name: str = None,\tbcftools: str = 'bcftools',\tgenome: str = 'hg19.fa',\tthreads: int = None,\tmaxmem: str = None,\tremove_annotations: list = [],\treduce_memory: bool = None,\tmulti_variant: bool = None) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar.parquet_info_explode", "modulename": "howard.functions.from_annovar", "qualname": "parquet_info_explode", "kind": "function", "doc": "

    This function takes a parquet file, splits it by chromosome, explodes the INFO column, and then\nmerges the exploded files back together.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path to the input file, which can be either a TSV or VCF file
    • \n
    • output_file: The name of the output file in Parquet format after exploding the input file
    • \n
    • threads: The number of threads to use for processing the parquet file, defaults to None (all)
    • \n
    • memory: The among of memory to use for processing the parquet file, defaults to None (all)
    • \n
    • reduce_memory: The reduce_memory parameter is a boolean flag that determines whether or not\nto use memory reduction techniques during the execution of the function. If set to True, the\nfunction will attempt to reduce memory usage during the execution, which may result in slower\nperformance but lower memory usage. If set to `, defaults to False
    • \n
    \n", "signature": "(\tinput_file: str,\toutput_file: str,\tthreads: int = None,\tmemory: str = None,\treduce_memory: bool = False) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_annovar.tsv_to_parquet", "modulename": "howard.functions.from_annovar", "qualname": "tsv_to_parquet", "kind": "function", "doc": "

    The function converts a TSV file to a Parquet file with customizable options.

    \n\n
    Parameters
    \n\n
      \n
    • tsv: The path to the TSV file that needs to be converted to Parquet format
    • \n
    • parquet: parquet is the file path and name of the output Parquet file that will be created\nby the function
    • \n
    • delim: The delimiter used in the TSV file to separate columns. If not specified, the default\ndelimiter (tab) will be used
    • \n
    • columns: The columns parameter is a dictionary that maps column names to their data types.\nIt is used to specify the schema of the resulting Parquet file. For example, if the input TSV file\nhas columns \"name\", \"age\", and \"salary\", and we want \"name\" to be
    • \n
    • quote: The quote parameter is an optional parameter that specifies the character used to\nquote fields in the TSV file. If not specified, the default quote character is double quotes (\")
    • \n
    • nullstr: The nullstr parameter is used to specify the string that represents null values in\nthe input TSV file. This parameter is used to correctly interpret and convert null values in the TSV\nfile to null values in the resulting Parquet file. For example, if the null value in the TSV
    • \n
    • skip: The skip parameter is an optional integer parameter that specifies the number of rows\nto skip at the beginning of the TSV file. This is useful if the TSV file has a header row that\nshould not be included in the resulting Parquet file. If skip is not specified, no
    • \n
    \n", "signature": "(\ttsv: str,\tparquet: str,\tdelim: str = None,\tcolumns: dict = None,\tquote: str = None,\tnullstr: str = None,\tskip: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann", "modulename": "howard.functions.from_extann", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.from_extann.create_metaheader", "modulename": "howard.functions.from_extann", "qualname": "create_metaheader", "kind": "function", "doc": "

    From extann file in dataframe, create metaheader of pseudo bed file\ninput: path of input extann\nconfig: dict\nextra_cols: list of column from refgene to keep

    \n", "signature": "(\tdf_extann: pandas.core.frame.DataFrame,\tinput: str,\tconfig: dict,\textra_cols=None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.add_default_metaheader", "modulename": "howard.functions.from_extann", "qualname": "add_default_metaheader", "kind": "function", "doc": "

    \n", "signature": "(fields, id):", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.read_json", "modulename": "howard.functions.from_extann", "qualname": "read_json", "kind": "function", "doc": "

    From json file to python dict

    \n", "signature": "(file: str) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.read_refgene", "modulename": "howard.functions.from_extann", "qualname": "read_refgene", "kind": "function", "doc": "

    \n", "signature": "(refgene: str) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.metaheader_rows", "modulename": "howard.functions.from_extann", "qualname": "metaheader_rows", "kind": "function", "doc": "

    INFO=

    \n\n

    fields: INFO, FORMAT....\nnumber: 0, 1, ., ...\ntype: String, Float, ....\ndescription: descprition of the field\nconf https://samtools.github.io/hts-specs/VCFv4.4.pdf

    \n", "signature": "(fields: str, id: str, number: str, type: str, description: str) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.replace_values", "modulename": "howard.functions.from_extann", "qualname": "replace_values", "kind": "function", "doc": "

    \n", "signature": "(input_string: str, config: dict) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.write_extann", "modulename": "howard.functions.from_extann", "qualname": "write_extann", "kind": "function", "doc": "

    Write ExtAnn into a bed like file and his hdr mate

    \n", "signature": "(\tparam,\theader,\toutput,\tdf_extann,\tdf_refgene,\textra_cols=None,\tmode=None,\tdf_transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.extann_to_info", "modulename": "howard.functions.from_extann", "qualname": "extann_to_info", "kind": "function", "doc": "

    from pandas series (row of dataframe) create the info field of the vcf from extann data per gene

    \n", "signature": "(record: pandas.core.series.Series) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_longest_transcript", "modulename": "howard.functions.from_extann", "qualname": "get_longest_transcript", "kind": "function", "doc": "

    From pandas dataframe containing one gene and many transcript and coordinate return the longest

    \n\n

    if there are many same size transcript keep the MANE

    \n", "signature": "(\tdf: pandas.core.frame.DataFrame,\textra_col=None) -> <built-in function any>:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_all_transcript", "modulename": "howard.functions.from_extann", "qualname": "get_all_transcript", "kind": "function", "doc": "

    Get all from trasncript from refgene matching gene name

    \n", "signature": "(\tmatch: pandas.core.frame.DataFrame,\textra_col=None) -> <built-in function any>:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_chosen_transcript", "modulename": "howard.functions.from_extann", "qualname": "get_chosen_transcript", "kind": "function", "doc": "

    From a txt / tsv file with gene and transcript, it will keep only provided transcript for this gene, if gene does not match it will take the longest

    \n", "signature": "(\tmatch: pandas.core.frame.DataFrame,\tdf_transcript: pandas.core.frame.DataFrame,\textra_col=None):", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.get_gene_coordinate", "modulename": "howard.functions.from_extann", "qualname": "get_gene_coordinate", "kind": "function", "doc": "

    From pandas dataframe containing refgene file, get chr start stop from each gene present in extann\ndo the same process for each gene/transcript it will lead to duplicate\ndf_refgene: refgene dataframe\ngene_row: pandas series of extann row\nlog

    \n", "signature": "(\tdf_refgene: pandas.core.frame.DataFrame,\tgene_row: pandas.core.series.Series,\textra_col=None,\tmode=None,\tdf_transcript=None) -> <built-in function any>:", "funcdef": "def"}, {"fullname": "howard.functions.from_extann.from_extann", "modulename": "howard.functions.from_extann", "qualname": "from_extann", "kind": "function", "doc": "

    This function converts an txt or tsv files containing genes-bases information

    \n\n

    From a \"genes\" columns which contains genes symbol it will match gene coordinates in refgene database and create a bed-like output with vcf header

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object with several attributes representing the input parameters for the\nfunction. These attributes include:
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.functions.plugins", "modulename": "howard.functions.plugins", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.functions.plugins.plugins_infos", "modulename": "howard.functions.plugins", "qualname": "plugins_infos", "kind": "function", "doc": "

    The plugins_infos function loads Python plugins from a specified directory and returns a\ndictionary mapping plugin names to their respective modules.

    \n\n
    Parameters
    \n\n
      \n
    • plugins_dir: The plugins_dir parameter in the plugins_infos function is a string that\nrepresents the directory where the plugins are located. This function loads Python plugins from the\nspecified directory and returns a dictionary containing the loaded plugins
    • \n
    • subfolder_plugins: The subfolder_plugins parameter in the plugins_infos function is a\nstring that represents the subfolder within the plugins_dir where the plugins are located. By\ndefault, the value of subfolder_plugins is set to \"plugins\". This parameter is used to specify the\nsubfolder, defaults to plugins
    • \n
    \n\n
    Returns
    \n\n
    \n

    A dictionary containing information about the loaded plugins is being returned. Each key in\n the dictionary represents the name of a plugin, and the corresponding value is a dictionary\n containing the attributes and functions defined in that plugin.

    \n
    \n", "signature": "(plugins_dir: str, subfolder_plugins: str = 'plugins') -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.plugins.plugins_list", "modulename": "howard.functions.plugins", "qualname": "plugins_list", "kind": "function", "doc": "

    The plugins_list function loads plugin information from a specified directory and determines which\nplugins are enabled based on a dictionary of plugin data.

    \n\n
    Parameters
    \n\n
      \n
    • plugins: The plugins parameter is a dictionary containing information about various\nplugins. Each key in the dictionary represents the name of a plugin, and the corresponding value is\na dictionary containing data about that plugin
    • \n
    • plugins_dir: The plugins_dir parameter is a string that represents the directory where the\nplugins are located. This directory is used by the list_plugins function to locate the plugins and\ngather information about them
    • \n
    • subfolder_plugins: The subfolder_plugins parameter in the plugins_list function is a\nstring that represents the subfolder within the plugins_dir where the plugins are located. By\ndefault, the value of subfolder_plugins is set to \"plugins\". This parameter is used to specify the\nsubfolder, defaults to plugins
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function plugins_list returns a dictionary plugin_info containing information about\n each plugin specified in the plugins parameter. The information includes whether the plugin is\n enabled (based on whether it is in the list of enabled plugins obtained from the specified\n directory), as well as any additional data provided for each plugin in the plugins dictionary.

    \n
    \n", "signature": "(\tplugins: dict,\tplugins_dir: str,\tsubfolder_plugins: str = 'plugins') -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.plugins.plugins_to_load", "modulename": "howard.functions.plugins", "qualname": "plugins_to_load", "kind": "function", "doc": "

    The plugins_to_load function filters a dictionary of plugins based on their \"enabled\" and\n\"__enabled__\" keys.

    \n\n
    Parameters
    \n\n
      \n
    • plugins_list_dict: The plugins_list_dict parameter is a dictionary containing information\nabout various plugins. Each key in the dictionary represents the name of a plugin, and the\ncorresponding value is another dictionary with plugin information. The plugin information dictionary\nmay contain keys such as \"enabled\" and \"__enabled__\" to indicate whether the
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function plugins_to_load returns a dictionary containing plugins that are enabled\n based on the input plugins_list_dict. The plugins are selected based on the values of the\n \"enabled\" and \"__enabled__\" keys in the nested dictionaries within the input dictionary.

    \n
    \n", "signature": "(plugins_list_dict: dict) -> dict:", "funcdef": "def"}, {"fullname": "howard.functions.utils", "modulename": "howard.functions.utils", "kind": "module", "doc": "

    Helper functions.

    \n"}, {"fullname": "howard.functions.utils.read_refgene", "modulename": "howard.functions.utils", "qualname": "read_refgene", "kind": "function", "doc": "

    The function \"read_refgene\" reads a genePred file with an extra column at the front and returns the\ngenePred data.

    \n\n

    refGene = genePred with extra column at front (and ignored ones after)

    \n\n
    Parameters
    \n\n
      \n
    • infile: The input file containing the refGene data
    • \n
    \n\n
    Returns
    \n\n
    \n

    the result of calling the function read_genepred with the argument infile and the\n keyword argument skip_first_column set to True.

    \n
    \n", "signature": "(infile):", "funcdef": "def"}, {"fullname": "howard.functions.utils.read_genepred", "modulename": "howard.functions.utils", "qualname": "read_genepred", "kind": "function", "doc": "

    The function read_genepred reads a file in GenePred extension format and yields a dictionary for\neach line, containing information about a gene.

    \n\n
    Parameters
    \n\n
      \n
    • infile: The infile parameter is the input file object that contains the gene annotation\ndata in the GenePred format. It is used to read the lines of the file and extract the necessary\ninformation
    • \n
    • skip_first_column: The skip_first_column parameter is a boolean flag that determines\nwhether to skip the first column of the input file when parsing the genePred format. By default, it\nis set to False, which means the first column (usually the transcript ID) will be included in the\noutput. If you, defaults to False (optional)
    • \n
    \n\n

    GenePred extension format:\nhttp://genome.ucsc.edu/FAQ/FAQformat.html#GenePredExt

    \n\n

    Column definitions:

    \n\n
      \n
    1. string name; \"Name of gene (usually transcript_id from GTF)\"
    2. \n
    3. string chrom; \"Chromosome name\"
    4. \n
    5. char[1] strand; \"+ or - for strand\"
    6. \n
    7. uint txStart; \"Transcription start position\"
    8. \n
    9. uint txEnd; \"Transcription end position\"
    10. \n
    11. uint cdsStart; \"Coding region start\"
    12. \n
    13. uint cdsEnd; \"Coding region end\"
    14. \n
    15. uint exonCount; \"Number of exons\"
    16. \n
    17. uint[exonCount] exonStarts; \"Exon start positions\"
    18. \n
    19. uint[exonCount] exonEnds; \"Exon end positions\"
    20. \n
    21. uint id; \"Unique identifier\"
    22. \n
    23. string name2; \"Alternate name (e.g. gene_id from GTF)\"
    24. \n
    \n", "signature": "(infile, skip_first_column=False):", "funcdef": "def"}, {"fullname": "howard.functions.utils.make_transcript", "modulename": "howard.functions.utils", "qualname": "make_transcript", "kind": "function", "doc": "

    The function make_transcript takes a JSON object representing a transcript and creates a\nTranscript object from it.

    \n\n
    Parameters
    \n\n
      \n
    • transcript_json: The transcript_json parameter is a JSON object that contains information\nabout a transcript. It should have the following keys:
    • \n
    \n\n
    Returns
    \n\n
    \n

    a Transcript object.

    \n
    \n", "signature": "(transcript_json):", "funcdef": "def"}, {"fullname": "howard.functions.utils.json_perfect_exons_to_cdna_match", "modulename": "howard.functions.utils", "qualname": "json_perfect_exons_to_cdna_match", "kind": "function", "doc": "

    The function json_perfect_exons_to_cdna_match converts a list of ordered exons into a list of cDNA\nmatches, where each match consists of the start and end positions of the exon, the start and end\npositions of the corresponding cDNA sequence, and an optional gap list.

    \n\n

    Perfectly matched exons are basically a no-gap case of cDNA match\nsingle - use a single cDNA match (deletions for introns) - this is currently broken do not use

    \n\n
    Parameters
    \n\n
      \n
    • ordered_exons: A list of tuples representing the start and end positions of exons in a gene\nsequence. The exons should be ordered based on their position in the gene
    • \n
    • single: The single parameter is a boolean flag that determines whether to use a single cDNA\nmatch or not. If single is set to True, the function will create a single cDNA match by\nconsidering deletions for introns. If single is set to False (, defaults to False (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of lists, where each inner list represents a cDNA match. Each inner list contains\n the start and end positions of the exon, the start and end positions of the corresponding cDNA\n match, and a string representing any gaps (intron lengths) between exons.

    \n
    \n", "signature": "(ordered_exons, single=False):", "funcdef": "def"}, {"fullname": "howard.functions.utils.read_transcripts", "modulename": "howard.functions.utils", "qualname": "read_transcripts", "kind": "function", "doc": "

    The function read_transcripts reads all transcripts in a RefGene file and returns them as a\ndictionary.

    \n\n
    Parameters
    \n\n
      \n
    • refgene_file: The refgene_file parameter is the file path to a RefGene file. This file\ncontains information about gene transcripts, such as their names, full names, and other relevant\ndetails. The read_transcripts function reads this file and returns a dictionary of transcripts,\nwhere the keys are the
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary of transcripts.

    \n
    \n", "signature": "(refgene_file):", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_genomic_sequence", "modulename": "howard.functions.utils", "qualname": "get_genomic_sequence", "kind": "function", "doc": "

    The function get_genomic_sequence returns a sequence for a given genomic region.

    \n\n
    Parameters
    \n\n
      \n
    • genome: A dictionary containing genomic sequences for different chromosomes. The keys of the\ndictionary are chromosome names (e.g., 'chr1', 'chr2', etc.), and the values are the corresponding\ngenomic sequences
    • \n
    • chrom: The chrom parameter represents the chromosome or genomic region from which you want to\nextract the sequence
    • \n
    • start: The start parameter is the 1-based coordinate of the beginning of the genomic region
    • \n
    • end: The end parameter is the end coordinate of the genomic region. It is a 1-based,\nend-inclusive coordinate, meaning that the base at the end position is included in the returned\nsequence
    • \n
    \n\n
    Returns
    \n\n
    \n

    a sequence for the specified genomic region.

    \n
    \n", "signature": "(genome, chrom, start, end):", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_vcf_allele", "modulename": "howard.functions.utils", "qualname": "get_vcf_allele", "kind": "function", "doc": "

    The function get_vcf_allele takes a HGVS name, a genome, and an optional transcript, and returns a\nVCF-style allele.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is an object of type HGVSName. It likely contains information\nabout a genetic variant, such as the chromosome, start and end positions, and the type of mutation\n(e.g., substitution, deletion, insertion, etc.)
    • \n
    • genome: The genome parameter is the genomic sequence from which the allele will be\nextracted. It is a string representing the entire genome sequence
    • \n
    • transcript: The transcript parameter is an optional argument that represents a transcript.\nIt is used to retrieve the VCF-style allele from the given HGVSName and genome. If a transcript is\nprovided, the function will use it to get the VCF coordinates and the reference and alternate\nalleles. If no
    • \n
    \n\n
    Returns
    \n\n
    \n

    the chromosome, start position, end position, reference allele, and alternate allele.

    \n
    \n", "signature": "(hgvs, genome, transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_alt_from_sequence", "modulename": "howard.functions.utils", "qualname": "get_alt_from_sequence", "kind": "function", "doc": "

    The function \"get_alt_from_sequence\" returns a genomic sequence from a given HGVS notation, genome,\nand transcript.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is an object that provides methods for working with Human Genome\nVariation Society (HGVS) nomenclature. It likely has a method called get_raw_coords() that takes a\ntranscript as input and returns the chromosome, start position, and end position of the\ncorresponding genomic sequence
    • \n
    • genome: The genome parameter refers to the genomic sequence from which the alternative allele\nwill be extracted
    • \n
    • transcript: The transcript parameter is a string that represents the transcript ID or name
    • \n
    \n\n
    Returns
    \n\n
    \n

    the genomic sequence from the specified region in the genome.

    \n
    \n", "signature": "(hgvs, genome, transcript):", "funcdef": "def"}, {"fullname": "howard.functions.utils.matches_ref_allele", "modulename": "howard.functions.utils", "qualname": "matches_ref_allele", "kind": "function", "doc": "

    The function matches_ref_allele checks if the reference allele in a given HGVS notation matches\nthe corresponding genomic sequence.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The hgvs parameter is an object that represents a variant in the Human Genome\nVariation Society (HGVS) format. It contains information about the variant's reference allele,\nalternative allele, and genomic coordinates
    • \n
    • genome: The genome parameter is the genomic sequence from which the reference allele is\nextracted
    • \n
    • transcript: The transcript parameter is an object that represents a transcript. It has a\nproperty called tx_position which provides information about the position of the transcript on the\ngenome, including whether it is on the forward or reverse strand
    • \n
    \n\n
    Returns
    \n\n
    \n

    True if the reference allele matches the genomic sequence, and False otherwise.

    \n
    \n", "signature": "(hgvs, genome, transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.utils.hgvs_justify_dup", "modulename": "howard.functions.utils", "qualname": "hgvs_justify_dup", "kind": "function", "doc": "

    The function hgvs_justify_dup determines if an allele is a duplication and justifies it by\nreturning the duplicated region if applicable.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome name where the allele is located
    • \n
    • offset: The offset parameter is the 1-index genomic coordinate, which represents the position\nof the variant on the chromosome
    • \n
    • ref: The \"ref\" parameter represents the reference allele, which is the allele that is present\nin the reference genome at the given genomic coordinate
    • \n
    • alt: The alt parameter represents the alternate allele, which is the allele that differs\nfrom the reference allele at a specific genomic position
    • \n
    • genome: The genome parameter is a pygr compatible genome object. It is an object that\nrepresents a reference genome and provides methods to access genomic sequences
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the chromosome name, offset, reference allele, alternate allele, and\n mutation type.

    \n
    \n", "signature": "(chrom, offset, ref, alt, genome):", "funcdef": "def"}, {"fullname": "howard.functions.utils.hgvs_justify_indel", "modulename": "howard.functions.utils", "qualname": "hgvs_justify_indel", "kind": "function", "doc": "

    The function hgvs_justify_indel justifies an indel (insertion or deletion) according to the HGVS\nstandard by determining the genomic sequence around the lesion, identifying the actual lesion\nsequence, and 3' justifying the offset.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome where the indel is located
    • \n
    • offset: The offset parameter represents the position of the indel (insertion or deletion)\nwithin the chromosome or genomic sequence
    • \n
    • ref: The ref parameter represents the reference allele of the variant. It is a string that\ncontains the nucleotide sequence of the reference allele
    • \n
    • alt: The alt parameter in the hgvs_justify_indel function represents the alternate allele\nsequence for an indel variant. It is the sequence that replaces the reference allele sequence\n(ref) at the specified offset position on the chrom chromosome
    • \n
    • strand: The parameter \"strand\" represents the orientation of the DNA strand where the indel\nis located. It can have two possible values: \"+\" or \"-\". The \"+\" strand refers to the forward\nstrand, while the \"-\" strand refers to the reverse complement strand
    • \n
    • genome: The genome parameter is a dictionary that contains the genomic sequence for each\nchromosome. The keys of the dictionary are the chromosome names (e.g., \"chr1\", \"chr2\", etc.), and\nthe values are the corresponding genomic sequences
    • \n
    \n\n
    Returns
    \n\n
    \n

    the variables chrom, offset, ref, and alt.

    \n
    \n", "signature": "(chrom, offset, ref, alt, strand, genome):", "funcdef": "def"}, {"fullname": "howard.functions.utils.hgvs_normalize_variant", "modulename": "howard.functions.utils", "qualname": "hgvs_normalize_variant", "kind": "function", "doc": "

    The function hgvs_normalize_variant converts a variant in VCF-style to HGVS-style by adjusting the\noffset, reference and alternate alleles, and determining the mutation type.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome where the variant is located
    • \n
    • offset: The offset parameter represents the position of the variant within the chromosome. It\nis an integer value
    • \n
    • ref: The ref parameter represents the reference allele in a variant
    • \n
    • alt: The alt parameter represents the alternate allele in a variant. It is a string that\nrepresents the alternative nucleotide(s) or sequence(s) at a specific position in the genome
    • \n
    • genome: The genome parameter is the reference genome sequence. It is used to perform\ncertain operations on the variant, such as justifying indels and representing duplications
    • \n
    • transcript: The transcript parameter is an optional argument that represents the transcript\nor gene in which the variant occurs. It is used to determine the strand of the gene and to perform\ncertain operations on the variant. If no transcript is provided, the default value is None
    • \n
    \n\n
    Returns
    \n\n
    \n

    the following values: chrom, offset, ref, alt, and mutation_type.

    \n
    \n", "signature": "(chrom, offset, ref, alt, genome, transcript=None):", "funcdef": "def"}, {"fullname": "howard.functions.utils.parse_hgvs_name", "modulename": "howard.functions.utils", "qualname": "parse_hgvs_name", "kind": "function", "doc": "

    The function parse_hgvs_name takes an HGVS name, a genome object, and optional parameters, and\nreturns the chromosome, start position, reference allele, and alternate allele of the variant\ndescribed by the HGVS name.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs_name: The HGVS name to parse
    • \n
    • genome: A pygr compatible genome object. This object represents the reference genome and\nprovides methods to access genomic sequences and annotations
    • \n
    • transcript: The transcript parameter is an optional argument that represents the transcript\ncorresponding to the HGVS name. It is used to determine the reference sequence for the variant. If\nnot provided, the get_transcript function is used to retrieve the transcript based on the HGVS name.\nIf neither transcript nor get_transcript is
    • \n
    • get_transcript: A function that takes a transcript name as input and returns the\ncorresponding transcript object. If not provided, the default behavior is to return None
    • \n
    • flank_length: The flank_length parameter is an integer that specifies the length of the\nflanking sequence to include when normalizing the variant allele. This is used in the\nnormalize_variant function to determine the reference allele and normalize the variant allele\naccording to the VCF standard, defaults to 30 (optional)
    • \n
    • normalize: A boolean parameter that determines whether the allele should be normalized\naccording to the VCF standard. If set to True, the allele will be normalized; if set to False, the\nallele will not be normalized, defaults to True (optional)
    • \n
    • lazy: The lazy parameter is a boolean flag that determines whether or not to discard\nversion information from the incoming transcript or gene. If lazy is set to True, the version\ninformation will be discarded. If lazy is set to False, the version information will be included\nin the, defaults to False (optional)
    • \n
    • indels_start_with_same_base: The parameter \"indels_start_with_same_base\" is a boolean flag\nthat determines whether or not to strip the common prefix from indels when normalizing alleles. If\nset to True, the common prefix will not be stripped, defaults to True (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function parse_hgvs_name returns a tuple containing the chromosome, start position,\n reference allele, and alternate allele of the parsed HGVS name.

    \n
    \n", "signature": "(\thgvs_name,\tgenome,\ttranscript=None,\tget_transcript=<function <lambda>>,\tflank_length=30,\tnormalize=True,\tlazy=False,\tindels_start_with_same_base=True):", "funcdef": "def"}, {"fullname": "howard.functions.utils.cdna_to_protein", "modulename": "howard.functions.utils", "qualname": "cdna_to_protein", "kind": "function", "doc": "

    The function cdna_to_protein takes in various parameters related to a genetic mutation and returns\nan updated HGVS object with additional protein information.

    \n\n
    Parameters
    \n\n
      \n
    • hgvs: The parameter hgvs is an object that represents a variant in the Human Genome\nVariation Society (HGVS) format. It contains information about the variant, such as the cDNA start\nand end positions
    • \n
    • offset: The offset is a numerical value that represents the starting position of the genomic\nsequence in the reference genome. It is used to calculate the genomic position of the mutation
    • \n
    • genome: The genome parameter is a dictionary that represents the genomic sequence. It\ncontains the chromosome as the key and the corresponding DNA sequence as the value
    • \n
    • chrom: The chrom parameter represents the chromosome on which the mutation occurs
    • \n
    • transcript: The transcript parameter is a string that represents the transcript ID or name.\nIt is used to identify the specific transcript in the genome
    • \n
    • ref: The parameter \"ref\" is a string that represents the reference nucleotide sequence. It is\nused to determine the codons in the DNA sequence
    • \n
    • alt: The alt parameter in the cdna_to_protein function is a string that represents the\nalternate nucleotide sequence for a mutation
    • \n
    • mutation_type: The mutation_type parameter is a string that represents the type of\nmutation. It can have the following values:
    • \n
    • codon_type: The codon_type parameter is a string that specifies the type of codon\ntranslation to be used. It can have one of the following values:, defaults to 3
    • \n
    \n\n
    Returns
    \n\n
    \n

    the updated hgvs object.

    \n
    \n", "signature": "(\thgvs,\toffset,\tgenome,\tchrom,\ttranscript,\tref,\talt,\tmutation_type,\tcodon_type: str = '3'):", "funcdef": "def"}, {"fullname": "howard.functions.utils.variant_to_hgvs_name", "modulename": "howard.functions.utils", "qualname": "variant_to_hgvs_name", "kind": "function", "doc": "

    The function variant_to_hgvs_name takes in genomic coordinates, alleles, and other parameters, and\nreturns a HGVS-style name for the variant.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chromosome name where the variant is located
    • \n
    • offset: The offset parameter represents the genomic offset of the allele. It is the\nposition of the variant on the chromosome
    • \n
    • ref: The reference allele at the given genomic coordinate
    • \n
    • alt: The alt parameter is the alternate allele. In genetics, a variant or mutation can\noccur at a specific position in the genome, and the alt allele represents the alternative\nnucleotide or sequence at that position compared to the reference genome
    • \n
    • genome: A pygr compatible genome object, which represents the reference genome sequence
    • \n
    • transcript: The transcript parameter is the transcript corresponding to the allele. It is\nused to determine the type of coordinates to use in the HGVS name (either genomic coordinates or\ncDNA coordinates). If the transcript is not available, the function will use genomic coordinates
    • \n
    • transcript_protein: The transcript_protein parameter is an optional argument that\nrepresents the protein sequence corresponding to the transcript. It is used to populate the\ntranscript_protein attribute of the HGVSName object
    • \n
    • exon: The exon parameter is an optional argument that represents the exon number or\nidentifier associated with the variant. It is used to populate the exon attribute of the\nHGVSName object. If provided, it will be included in the final HGVS name generated by the function
    • \n
    • max_allele_length: The max_allele_length parameter is used to determine whether to\nrepresent the alleles as their actual sequence or as the length of the sequence. If the length of\nthe reference allele or alternate allele is greater than max_allele_length, then the length of the\nallele is used instead of the actual, defaults to 4 (optional)
    • \n
    • use_counsyl: A boolean flag indicating whether to use Counsyl-specific rules for single-base\nindels, defaults to False (optional)
    • \n
    • codon_type: The parameter codon_type is a string that specifies the type of codon numbering\nto be used in the HGVS name. It is used in the cdna_to_protein function to determine the type of\ncodon numbering to be used in the protein-level HGVS name. The, defaults to 3
    • \n
    \n\n
    Returns
    \n\n
    \n

    an object of type HGVSName.

    \n
    \n", "signature": "(\tchrom,\toffset,\tref,\talt,\tgenome,\ttranscript,\ttranscript_protein=None,\texon=None,\tmax_allele_length=4,\tuse_counsyl=False,\tcodon_type: str = '3'):", "funcdef": "def"}, {"fullname": "howard.functions.utils.format_hgvs_name", "modulename": "howard.functions.utils", "qualname": "format_hgvs_name", "kind": "function", "doc": "

    The format_hgvs_name function generates a HGVS name from a genomic coordinate.

    \n\n
    Parameters
    \n\n
      \n
    • chrom: The chrom parameter represents the chromosome name. It is a string that specifies\nthe chromosome on which the variant occurs
    • \n
    • offset: The offset parameter represents the genomic offset of the allele, which is the\nposition of the variant on the chromosome. It is used to generate the HGVS name based on the genomic\ncoordinate
    • \n
    • ref: The ref parameter represents the reference allele. In genetics, a variant or mutation\ncan occur at a specific position in the genome, resulting in a change from the reference allele to\nan alternate allele. The ref parameter specifies the sequence of the reference allele at that\nposition
    • \n
    • alt: The alt parameter represents the alternate allele. In genetics, a variant or mutation\ncan occur at a specific position in the genome, resulting in a change from the reference allele to\nan alternate allele. The alt parameter specifies the sequence of the alternate allele at that\nposition
    • \n
    • genome: A pygr compatible genome object, which is used to retrieve genomic sequences and\nannotations. It provides methods to access genomic information such as chromosome names, sequences,\nand gene annotations
    • \n
    • transcript: The transcript parameter is the transcript corresponding to the allele. It is\nused to generate the HGVS name based on the genomic coordinate
    • \n
    • transcript_protein: The transcript_protein parameter is an optional argument that\nrepresents the protein transcript corresponding to the cDNA transcript. It is used to generate the\nprotein HGVS name if it exists
    • \n
    • exon: The exon parameter is used to specify the exon number in the HGVS name. It is an\noptional parameter and is used to generate a more specific HGVS name when needed
    • \n
    • use_prefix: A boolean indicating whether to include a transcript/gene/chromosome prefix in\nthe HGVS name. If set to True, the prefix will be included; if set to False, the prefix will be\nexcluded, defaults to True (optional)
    • \n
    • use_gene: A boolean parameter that determines whether to include the gene name in the HGVS\nprefix. If set to True, the gene name will be included; if set to False, the gene name will be\nexcluded, defaults to True (optional)
    • \n
    • use_protein: A boolean parameter that determines whether to include protein HGVS notation in\nthe generated HGVS name. If set to True, the protein HGVS notation will be included if it exists. If\nset to False, only the genomic and transcript HGVS notation will be included, defaults to False\n(optional)
    • \n
    • use_counsyl: The use_counsyl parameter is a boolean parameter that determines whether to\nuse Counsyl-specific formatting for the HGVS name. If set to True, the HGVS name will be formatted\naccording to Counsyl's specific guidelines. If set to False, the HGVS name will be, defaults to\nFalse (optional)
    • \n
    • max_allele_length: The max_allele_length parameter is used to determine the maximum length\nof the allele. If the length of the allele is greater than the specified max_allele_length, then\nthe allele length will be used in the HGVS name instead of the actual allele sequence. By default,\nthe `, defaults to 4 (optional)
    • \n
    • full_format: A boolean parameter that determines whether to use the full HGVS format or not.\nIf set to True, the HGVS name will include the gene name, transcript name, exon number (if\nprovided), and the amino acid change (if protein information is available). If set to False, the\nHGVS, defaults to False (optional)
    • \n
    • use_version: A boolean parameter that determines whether to include the version number of the\ntranscript in the HGVS name. If set to True, the version number will be included; if set to False,\nthe version number will be excluded, defaults to False (optional)
    • \n
    • codon_type: The codon_type parameter is a string that specifies the type of codon numbering\nto be used in the HGVS name. It can have one of the following values:, defaults to 3
    • \n
    \n\n
    Returns
    \n\n
    \n

    a formatted HGVS name generated from a genomic coordinate.

    \n
    \n", "signature": "(\tchrom,\toffset,\tref,\talt,\tgenome,\ttranscript,\ttranscript_protein=None,\texon=None,\tuse_prefix=True,\tuse_gene=True,\tuse_protein=False,\tuse_counsyl=False,\tmax_allele_length=4,\tfull_format=False,\tuse_version=False,\tcodon_type: str = '3'):", "funcdef": "def"}, {"fullname": "howard.functions.utils.create_refseq_table", "modulename": "howard.functions.utils", "qualname": "create_refseq_table", "kind": "function", "doc": "

    The function create_refseq_table creates a table in a database with the specified name and\nstructure, either using a file or without a file.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The conn parameter is a connection object that represents a connection to a database.\nIt is used to execute SQL queries and interact with the database
    • \n
    • refseq_table: The refseq_table parameter is a string that specifies the name of the table\nthat will be created in the database to store the RefGene data, defaults to refseq
    • \n
    • refseq_file: The refseq_file parameter is a string that specifies the path to a file\ncontaining the data for the refGene table. If this parameter is provided, the function will create\nthe refGene table in the database using the data from the file. If this parameter is not provided,\nthe function will
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the refseq table that was created or used.

    \n
    \n", "signature": "(conn, refseq_table: str = 'refseq', refseq_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_refseq_table", "modulename": "howard.functions.utils", "qualname": "get_refseq_table", "kind": "function", "doc": "

    The function get_refseq_table checks if a table named refseq exists in a database, and if not,\ncreates it using the create_refseq_table function.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The parameter conn is expected to be a connection object that allows you to interact\nwith a database. It could be an instance of a database connector class, such as pymysql.connect()\nfor MySQL or psycopg2.connect() for PostgreSQL
    • \n
    • refseq_table: The parameter \"refseq_table\" is a string that specifies the name of the table\nin the database where the refGene data will be stored. If this table already exists in the database,\nthe function will return the name of the existing table. If the table does not exist, the function\nwill create, defaults to refseq
    • \n
    • refseq_file: The refseq_file parameter is the name or path of the file that contains the\nrefGene data. This file is used to populate the refGene table in the database
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the refseq_table.

    \n
    \n", "signature": "(conn, refseq_table: str = 'refseq', refseq_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.functions.utils.get_transcript", "modulename": "howard.functions.utils", "qualname": "get_transcript", "kind": "function", "doc": "

    The function get_transcript takes a dictionary of transcripts and a name as input, and returns the\ntranscript associated with that name.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts: A dictionary containing transcripts as values, with names as keys
    • \n
    • name: The name parameter is a string that represents the name of the transcript that you want\nto retrieve from the transcripts dictionary
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value associated with the given name key in the transcripts dictionary.

    \n
    \n", "signature": "(\ttranscripts: dict,\ttranscript_name: str) -> howard.objects.transcript.Transcript:", "funcdef": "def"}, {"fullname": "howard.main", "modulename": "howard.main", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.main.msg_gui_disable", "modulename": "howard.main", "qualname": "msg_gui_disable", "kind": "variable", "doc": "

    \n", "default_value": "'HOWARD GUI disabled'"}, {"fullname": "howard.main.main_folder", "modulename": "howard.main", "qualname": "main_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard'"}, {"fullname": "howard.main.main", "modulename": "howard.main", "qualname": "main", "kind": "function", "doc": "

    It loads a VCF file in multiple format (VCF, parquet, DB), and process, query, export data

    \n", "signature": "() -> None:", "funcdef": "def"}, {"fullname": "howard.objects", "modulename": "howard.objects", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.cdna", "modulename": "howard.objects.cdna", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.cdna.CDNA_START_CODON", "modulename": "howard.objects.cdna", "qualname": "CDNA_START_CODON", "kind": "variable", "doc": "

    \n", "default_value": "'cdna_start'"}, {"fullname": "howard.objects.cdna.CDNA_STOP_CODON", "modulename": "howard.objects.cdna", "qualname": "CDNA_STOP_CODON", "kind": "variable", "doc": "

    \n", "default_value": "'cdna_stop'"}, {"fullname": "howard.objects.cdna.CDNACoord", "modulename": "howard.objects.cdna", "qualname": "CDNACoord", "kind": "class", "doc": "

    A HGVS cDNA-based coordinate.

    \n\n

    A cDNA coordinate can take one of these forms:

    \n\n

    N = nucleotide N in protein coding sequence (e.g. 11A>G)

    \n\n

    -N = nucleotide N 5' of the ATG translation initiation codon (e.g. -4A>G)\n NOTE: so located in the 5'UTR or 5' of the transcription initiation\n site (upstream of the gene, incl. promoter)

    \n\n

    *N = nucleotide N 3' of the translation stop codon (e.g. *6A>G)\n NOTE: so located in the 3'UTR or 3' of the polyA-addition site\n (including downstream of the gene)

    \n\n

    N+M = nucleotide M in the intron after (3' of) position N in the coding DNA\n reference sequence (e.g. 30+4A>G)

    \n\n

    N-M = nucleotide M in the intron before (5' of) position N in the coding\n DNA reference sequence (e.g. 301-2A>G)

    \n\n

    -N+M / -N-M = nucleotide in an intron in the 5'UTR (e.g. -45+4A>G)

    \n\n

    *N+M / *N-M = nucleotide in an intron in the 3'UTR (e.g. *212-2A>G)

    \n"}, {"fullname": "howard.objects.cdna.CDNACoord.__init__", "modulename": "howard.objects.cdna", "qualname": "CDNACoord.__init__", "kind": "function", "doc": "

    coord: main coordinate along cDNA on the same strand as the transcript

    \n\n

    offset: an additional genomic offset from the main coordinate. This\n allows referencing non-coding (e.g. intronic) positions.\n Offset is also interpreted on the coding strand.

    \n\n

    landmark: ('cdna_start', 'cdna_stop') indicating that 'coord'\n is relative to one of these landmarks.

    \n\n

    string: a coordinate from an HGVS name. If given coord, offset, and\n landmark should not be specified.

    \n", "signature": "(coord=0, offset=0, landmark='cdna_start', string='')"}, {"fullname": "howard.objects.cdna.CDNACoord.parse", "modulename": "howard.objects.cdna", "qualname": "CDNACoord.parse", "kind": "function", "doc": "

    Parse a HGVS formatted cDNA coordinate.

    \n", "signature": "(self, coord_text):", "funcdef": "def"}, {"fullname": "howard.objects.database", "modulename": "howard.objects.database", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.database.SEP_TYPE", "modulename": "howard.objects.database", "qualname": "SEP_TYPE", "kind": "variable", "doc": "

    \n", "default_value": "{'vcf': '\\t', 'tsv': '\\t', 'csv': ',', 'tbl': '|', 'bed': '\\t'}"}, {"fullname": "howard.objects.database.DATABASE_TYPE_NEEDED_COLUMNS", "modulename": "howard.objects.database", "qualname": "DATABASE_TYPE_NEEDED_COLUMNS", "kind": "variable", "doc": "

    \n", "default_value": "{'variants': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'POS': ['POS'], 'REF': ['REF'], 'ALT': ['ALT']}, 'regions': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'START': ['START', 'POSITIONSTART', 'POS'], 'END': ['END', 'POSITIONEND', 'POS']}, 'vcf': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'POS': ['POS', 'POSITION'], 'ID': ['ID', 'IDENTIFIER'], 'REF': ['REF', 'REFERENCE'], 'ALT': ['ALT', 'ALTERNATIVE'], 'QUAL': ['QUAL', 'QUALITY'], 'FILTER': ['FILTER'], 'INFO': ['INFO']}, 'bed': {'#CHROM': ['#CHROM', 'CHROM', 'CHR', 'CHROMOSOME'], 'START': ['START', 'POSITIONSTART', 'POS'], 'END': ['END', 'POSITIONEND', 'POS']}}"}, {"fullname": "howard.objects.database.DEFAULT_VCF_HEADER", "modulename": "howard.objects.database", "qualname": "DEFAULT_VCF_HEADER", "kind": "variable", "doc": "

    \n", "default_value": "['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO']"}, {"fullname": "howard.objects.database.DEFAULT_VCF_HEADER_DUCKDB_TYPES", "modulename": "howard.objects.database", "qualname": "DEFAULT_VCF_HEADER_DUCKDB_TYPES", "kind": "variable", "doc": "

    \n", "default_value": "{'#CHROM': 'STRING', 'POS': 'INT', 'START': 'INT', 'END': 'INT', 'ID': 'VARCHAR', 'REF': 'VARCHAR', 'ALT': 'VARCHAR', 'FILTER': 'VARCHAR', 'INFO': 'VARCHAR'}"}, {"fullname": "howard.objects.database.DEFAULT_HEADER_LIST", "modulename": "howard.objects.database", "qualname": "DEFAULT_HEADER_LIST", "kind": "variable", "doc": "

    \n", "default_value": "['##fileformat=VCFv4.2', '#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO']"}, {"fullname": "howard.objects.database.FILE_FORMAT_DELIMITERS", "modulename": "howard.objects.database", "qualname": "FILE_FORMAT_DELIMITERS", "kind": "variable", "doc": "

    \n", "default_value": "{'vcf': '\\t', 'tsv': '\\t', 'csv': ',', 'tbl': '|', 'bed': '\\t'}"}, {"fullname": "howard.objects.database.DUCKDB_EXTENSION_TO_LOAD", "modulename": "howard.objects.database", "qualname": "DUCKDB_EXTENSION_TO_LOAD", "kind": "variable", "doc": "

    \n", "default_value": "['sqlite_scanner']"}, {"fullname": "howard.objects.database.Database", "modulename": "howard.objects.database", "qualname": "Database", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.__init__", "modulename": "howard.objects.database", "qualname": "Database.__init__", "kind": "function", "doc": "

    This is an initialization function for a class that sets up a database and header file for use\nin a DuckDB connection.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name of the database to be used. If None, the default\ndatabase will be used
    • \n
    • format: The format parameter is not described in the docstring, so it is unclear what\nit represents
    • \n
    • header: The header parameter is a string that represents the name of the header file\nthat contains the column names for the database. It is used in conjunction with the database\nparameter to set the header for the database. If the header parameter is not provided, the\nheader will be set to
    • \n
    • header_file: The header_file parameter is a string that represents the file path to the\nheader file that contains the column names for the database. It is used in the set_header()\nmethod to set the header attribute of the class
    • \n
    • databases_folders: A list of folders where the database files are located. This parameter\nis used in the set_database() method to search for the database file in the specified folders.\nIf the database file is not found in any of the folders, an error is raised
    • \n
    • assembly: A string representing the name of the assembly to be used. It is used in\nconjunction with the set_assembly() method to set the assembly for the DuckDB connection. If\nthe assembly parameter is not provided, the default assembly will be used
    • \n
    • conn: An optional parameter that represents an existing DuckDBPyConnection object. If\nprovided, the class will use this connection instead of creating a new one. If not provided, a\nnew connection will be created
    • \n
    • conn_config: An optional parameter for DuckDBPyConnection object config (see duckdb.connect)
    • \n
    • table: The table parameter is a string representing the name of the table in the\ndatabase that will be used in the DuckDB connection. It is used in the set_table() method to\nset the table attribute of the class. If the table parameter is not provided, the default\ntable will
    • \n
    \n", "signature": "(\tdatabase: str = None,\tformat: str = None,\theader: str = None,\theader_file: str = None,\tdatabases_folders: list = None,\tassembly: str = None,\tconn=None,\tconn_config: dict = {},\ttable: str = None)"}, {"fullname": "howard.objects.database.Database.database", "modulename": "howard.objects.database", "qualname": "Database.database", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.format", "modulename": "howard.objects.database", "qualname": "Database.format", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.header", "modulename": "howard.objects.database", "qualname": "Database.header", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.header_file", "modulename": "howard.objects.database", "qualname": "Database.header_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.databases_folders", "modulename": "howard.objects.database", "qualname": "Database.databases_folders", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.assembly", "modulename": "howard.objects.database", "qualname": "Database.assembly", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.table", "modulename": "howard.objects.database", "qualname": "Database.table", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.database.Database.set_database", "modulename": "howard.objects.database", "qualname": "Database.set_database", "kind": "function", "doc": "

    This function sets the database attribute of an object to a specified database if it exists or\ncan be found in a list of folders.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name of the database to be set
    • \n
    • databases_folders: The databases_folders parameter is a list of folders/directories\nwhere the find_database method will search for the specified database. If the database is\nfound in any of these folders, it will be set as the current database. If databases_folders is\nnot provided, the
    • \n
    • format: The format parameter is an optional string representing the format of the\ndatabase to be searched for. If provided, the find_database method will search for the\ndatabase only in the specified format. If not provided, the method will search for the database\nin all formats
    • \n
    • assembly: The assembly parameter is an optional string representing the name of the\nassembly to which the database belongs. If provided, the find_database method will search for\nthe database only in the specified assembly. If not provided, the method will search for the\ndatabase in all assemblies
    • \n
    \n", "signature": "(\tself,\tdatabase: str,\tdatabases_folders: list = None,\tformat: str = None,\tassembly: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_databases_folders", "modulename": "howard.objects.database", "qualname": "Database.set_databases_folders", "kind": "function", "doc": "

    This function sets the list of folders where databases are located as an attribute of an object.

    \n\n
    Parameters
    \n\n
      \n
    • databases_folders: databases_folders is a list parameter that contains the paths to the\nfolders where the databases are stored. The default value of the parameter is a list with a\nsingle element, which is the current directory (\".\")
    • \n
    \n", "signature": "(self, databases_folders: list = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_folders", "modulename": "howard.objects.database", "qualname": "Database.get_database_folders", "kind": "function", "doc": "

    This function returns a list of database folders.

    \n\n
    Returns
    \n\n
    \n

    The method get_database_folders is returning a list of database folders. The specific\n list being returned is stored in the instance variable databases_folders.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.read_header_file", "modulename": "howard.objects.database", "qualname": "Database.read_header_file", "kind": "function", "doc": "

    This function reads the header of a VCF file and returns a list of the header lines.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The path to the VCF file header that needs to be read
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of header lines of a VCF file.

    \n
    \n", "signature": "(self, header_file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_length", "modulename": "howard.objects.database", "qualname": "Database.get_header_length", "kind": "function", "doc": "

    The get_header_length function returns the length of a header file, excluding the first line.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The header_file parameter is a string that represents the file path or\nname of the header file. It is an optional parameter, which means it can be omitted when calling\nthe get_header_length method
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer, which represents the length of the header file.

    \n
    \n", "signature": "(self, header_file: str = None) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_file_columns", "modulename": "howard.objects.database", "qualname": "Database.get_header_file_columns", "kind": "function", "doc": "

    The function get_header_columns returns the header list of a VCF file.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The header_file parameter is a string that represents the file path of the\nheader file. It is an optional parameter and its default value is None
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of header columns.

    \n
    \n", "signature": "(self, header_file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_from_list", "modulename": "howard.objects.database", "qualname": "Database.get_header_from_list", "kind": "function", "doc": "

    The function get_header_from_list returns a vcf.Reader object with a header generated from a\ngiven list or a default list.

    \n\n
    Parameters
    \n\n
      \n
    • header_list: The header_list parameter is a list of strings representing the header\nlines of a VCF (Variant Call Format) file. It is an optional parameter, meaning it can be\nprovided as an argument to the function, but if no argument is provided, a default list of\nheader lines will be used
    • \n
    \n\n
    Returns
    \n\n
    \n

    a vcf.Reader object.

    \n
    \n", "signature": "(\tself,\theader_list: list = None) -> <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'>:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_from_file", "modulename": "howard.objects.database", "qualname": "Database.get_header_from_file", "kind": "function", "doc": "

    This function returns a VCF header either from a default list or from a file.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: A string representing the file path of a VCF header file. If this parameter\nis not provided or is an empty string, the function will use a default header list
    • \n
    \n\n
    Returns
    \n\n
    \n

    a VCF object, which is obtained by calling the get_header_from_list method with the\n header_list as an argument. The header_list is either the default header list or the list\n obtained by reading a header file using the read_header_file method.

    \n
    \n", "signature": "(\tself,\theader_file: str) -> <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'>:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.find_header_file", "modulename": "howard.objects.database", "qualname": "Database.find_header_file", "kind": "function", "doc": "

    This function finds the header file for a given database in various formats.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the path to a database\nfile. If this parameter is not provided, the get_database() method is called to retrieve the\npath to the database file
    • \n
    \n\n
    Returns
    \n\n
    \n

    the path to the header file for a given database. If the header is in a separate file,\n it returns the path to that file. If the header is within the database file itself, it returns\n the path to the database file. If the database or its format cannot be determined, it returns\n None.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header", "modulename": "howard.objects.database", "qualname": "Database.get_header", "kind": "function", "doc": "

    The get_header function in Python returns the header of a VCF file from a file, a list, or the\nobject itself based on specified conditions.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_header function represents a string that\nspecifies the database from which the header information should be retrieved or used. It is used\nin various parts of the function to determine how to construct the header of the VCF file
    • \n
    • header_file: The header_file parameter in the get_header function is a string\nrepresenting the path to a file containing the header information for a VCF file. This parameter\nallows you to specify a file from which the function will read the header information
    • \n
    • header_list: The header_list parameter in the get_header function is a list\ncontaining the header lines of a VCF file. If provided, the function will construct the header\nfrom this list using the get_header_from_list method. If header_list is not provided, the\nfunction will
    • \n
    • sql_query: The sql_query parameter in the get_header function is a string\nrepresenting an SQL query that can be used to retrieve data from a database. This parameter is\nused in the function to help construct the header of a VCF file based on the query results or\nother conditions specified in the function
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_header function returns the header of a VCF file based on different\n conditions:

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_file: str = None,\theader_list: list = None,\tsql_query: str = None) -> <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'>:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_from_columns", "modulename": "howard.objects.database", "qualname": "Database.get_header_from_columns", "kind": "function", "doc": "

    The function get_header_from_columns generates a VCF header based on database columns and adds\ncustom annotations to it.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of a database. It\nis an optional parameter, and if not provided, the get_database() method is called to retrieve\nthe default database. This parameter specifies the database from which the columns will be used\nto generate the VCF header
    • \n
    • header_columns: The header_columns parameter is a list of column names that will be\nused to generate header information for a VCF file. If no header_columns are provided, the\nfunction will attempt to automatically detect the columns to use based on the database being\nused
    • \n
    • sql_query: The sql_query parameter in the get_header_from_columns function is used to\nspecify a SQL query that will be executed to retrieve column information from the database. This\nquery can be customized to fetch specific columns or data based on the requirements of the VCF\nheader generation process. If provided,
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_from_columns returns a VCF header object that includes\n information about the columns in a database and their data types. The header object is created\n based on the input parameters, including the database name and a list of header columns.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_columns: list = [],\tsql_query: str = None) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.query", "modulename": "howard.objects.database", "qualname": "Database.query", "kind": "function", "doc": "

    This is a Python function that takes in a database and query string as parameters and returns\nthe result of the query on the database.

    \n\n
    Parameters
    \n\n
      \n
    • query: The query parameter is a string that represents the SQL query that needs to be\nexecuted on the database. It can be any valid SQL statement such as SELECT, INSERT, UPDATE,\nDELETE, etc
    • \n
    \n\n
    Returns
    \n\n
    \n

    If a query is provided, the method returns the result of the query executed on the\n database. If no query is provided, the method returns None.

    \n
    \n", "signature": "(self, query: str = None) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_header", "modulename": "howard.objects.database", "qualname": "Database.set_header", "kind": "function", "doc": "

    This function sets the header of a database based on a provided header file or the database\nformat.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name or path of a database file. If not provided, the\nmethod will attempt to get the database name from the object's attributes
    • \n
    • header: header is a variable of type vcf (presumably representing a VCF header) that\ncan be provided as an argument to the set_header method to set the header attribute of the\nobject. If header is provided, the header_file parameter is ignored
    • \n
    • header_file: A string representing the file path of a header file. If provided, the\nfunction will use this header file to set the header attribute of the object
    • \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader: <module 'vcf' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/site-packages/vcf/__init__.py'> = None,\theader_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_header_file", "modulename": "howard.objects.database", "qualname": "Database.set_header_file", "kind": "function", "doc": "

    This function sets the header file attribute of an object to the value passed as an argument.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The parameter header_file is a string that represents the name or path of\na header file. This method sets the header_file attribute of an object to the value passed as\nan argument. If no argument is passed, the header_file attribute remains unchanged
    • \n
    \n", "signature": "(self, header_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_columns_from_database", "modulename": "howard.objects.database", "qualname": "Database.get_header_columns_from_database", "kind": "function", "doc": "

    The get_header_columns_from_database function retrieves column names from a specified database\ntable.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_header_columns_from_database function is\na string that represents the name of the database from which you want to retrieve the header\ncolumns. If no specific database is provided when calling the function, it will default to using\nthe get_database() method to retrieve the
    • \n
    • query: The query parameter in the get_header_columns_from_database function is a\nstring that represents a SQL query. If provided, this query will be used to retrieve column\nnames from the specified database table instead of using the default database table
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_columns_from_database returns a list of column names from the\n specified database table. If successful, it will return the list of column names. If there is an\n error or no columns are found, it will return None.

    \n
    \n", "signature": "(self, database: str = None, query: str = None) -> Optional[list]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_file", "modulename": "howard.objects.database", "qualname": "Database.get_header_file", "kind": "function", "doc": "

    The function get_header_file generates a VCF header file based on specified parameters or a\ndefault header if needed.

    \n\n
    Parameters
    \n\n
      \n
    • header_file: The header_file parameter is a string representing the file path and name\nof the header file. If set to None, the default header file path and name will be used
    • \n
    • remove_header_line: The remove_header_line parameter is a boolean parameter that\ndetermines whether to remove the #CHROM line from the header file. If set to True, the line\nwill be removed; otherwise, it will remain in the header file. By default, this parameter is set\nto False, meaning, defaults to False
    • \n
    • replace_header_line: The replace_header_line parameter is a list of columns that can be\nused to replace the header line in the generated header file. For example, if you provide\n['#CHROM', 'POS', 'ID'], these columns will be used as the header line in the generated file\ninstead
    • \n
    • force: The force parameter in the get_header_file function is a boolean parameter\nthat determines whether to force the generation of a header file even if a header file already\nexists. If force is set to True, the function will replace the existing header file with a\nnew one. If, defaults to False
    • \n
    • sql_query: The sql_query parameter in the get_header_file function is used to specify\na SQL query that can be used to retrieve header information from a database. This query can be\npassed to the function to customize the header generation process based on the query results
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_file returns a string which is the name of the header file\n that was generated or None if no header file was generated.

    \n
    \n", "signature": "(\tself,\theader_file: str = None,\tremove_header_line: bool = False,\treplace_header_line: list = None,\tforce: bool = False,\tsql_query: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_assembly", "modulename": "howard.objects.database", "qualname": "Database.set_assembly", "kind": "function", "doc": "

    This is a function that sets the assembly attribute of an object to a given string value.

    \n\n
    Parameters
    \n\n
      \n
    • assembly: The assembly parameter is a string that represents the name or type of assembly\nthat the object belongs to. This method sets the assembly attribute of the object to the value\npassed in as the assembly parameter. If no value is passed in, the assembly attribute remains\nunchanged. The method returns the updated value of the
    • \n
    \n", "signature": "(self, assembly: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_assembly", "modulename": "howard.objects.database", "qualname": "Database.get_assembly", "kind": "function", "doc": "

    This function returns the assembly attribute of an object if it exists, otherwise it returns\nNone.

    \n\n
    Returns
    \n\n
    \n

    If self.assembly is not None, then it returns the value of self.assembly.\n Otherwise, it returns None.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.find_database", "modulename": "howard.objects.database", "qualname": "Database.find_database", "kind": "function", "doc": "

    This function finds a database file in a specified folder or the current directory.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to be searched for. If not provided, it will call the\nget_database() method to get the name of the database. It is a string type parameter
    • \n
    • databases_folders: A list of folders where the function should look for the database\nfile. If this parameter is not provided, the function will look for the database file in the\ncurrent directory
    • \n
    • format: The file format of the database file. It is an optional parameter and if not\nprovided, the function will call the get_format() method to get the format
    • \n
    • assembly: assembly is an optional parameter that represents the name of a subfolder\nwhere the function should look for the database file. If provided, the function will search for\nthe database file in the specified subfolder within each of the databases_folders. If not\nprovided, the function will only search for
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the path to the database file. If the database is not found or\n if no database is specified, it returns None.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\tdatabases_folders: list = None,\tdatabase_format: str = None,\tassembly: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database", "modulename": "howard.objects.database", "qualname": "Database.get_database", "kind": "function", "doc": "

    This function returns the database name as a string.

    \n\n
    Returns
    \n\n
    \n

    The get_database method is returning the database attribute of the object. The\n return type is a string.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_basename", "modulename": "howard.objects.database", "qualname": "Database.get_database_basename", "kind": "function", "doc": "

    This function returns the basename of a database file.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter database is a string that represents the name of a database. If\nit is not provided, the method will use the get_database() method to retrieve the current\ndatabase
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string which is the basename of the database file. If the database parameter is not\n provided, it gets the current database using the get_database() method. If the database\n exists, it returns the basename of the database file using the os.path.basename() method. If\n the database does not exist, it returns None.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_dirname", "modulename": "howard.objects.database", "qualname": "Database.get_database_dirname", "kind": "function", "doc": "

    This function returns the directory name of a given database or the current database if none is\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter database is a string that represents the path to a database\nfile. If it is not provided, the method will call self.get_database() to retrieve the path to\nthe default database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the directory name of the specified database file. If no\n database file is specified, it will use the default database file and return its directory name.\n If there is no database file, it will return None.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.exists", "modulename": "howard.objects.database", "qualname": "Database.exists", "kind": "function", "doc": "

    This function checks if a database exists in the specified path or in the default path.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name or path of a\ndatabase file. If it is not provided, the method will call the get_database() method to\nretrieve the default database name/path
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the specified database exists or not. If the\n database parameter is not provided, it gets the current database using the get_database()\n method and checks if it exists using the os.path.exists() function.

    \n
    \n", "signature": "(self, database: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.set_format", "modulename": "howard.objects.database", "qualname": "Database.set_format", "kind": "function", "doc": "

    This is a method in a Python class that sets a format attribute to a specified string.

    \n\n
    Parameters
    \n\n
      \n
    • format: The format parameter is a string that specifies the desired format for the data.\nIt is an optional parameter, meaning that if it is not provided, the format attribute of the\nobject will not be changed. The function returns a string indicating the current format of the\nobject
    • \n
    \n", "signature": "(self, format: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_format", "modulename": "howard.objects.database", "qualname": "Database.get_format", "kind": "function", "doc": "

    This Python function returns the file format of a given database or the current database if none\nis provided.\nFormat database:\n - parquet\n - duckdb\n - sqlite\n - vcf\n - csv

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the type of database. It\nis an optional parameter and if not provided, the function will call the get_database() method\nto retrieve the database type
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the type of database. The type of database can be one of the\n following: \"parquet\", \"duckdb\", \"sqlite\", \"vcf\", or \"csv\". The specific type of database is determined by\n the input parameter database, which is either passed as an argument to the function or\n obtained by calling the get_database() method. The `get_file_format

    \n
    \n", "signature": "(self, database: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_type", "modulename": "howard.objects.database", "qualname": "Database.get_type", "kind": "function", "doc": "

    The get_type function determines the type of a database (variants VCF-like or regions\nBED-like) based on its columns and format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_type function is a string representing\nthe name of a database. If this parameter is not provided when calling the function, it will\nattempt to retrieve the database name using the get_database() method. This parameter is used\nto specify the database for which you
    • \n
    • sql_query: The sql_query parameter in the get_type function is used to pass an SQL\nquery as a string. This query can be used to filter or manipulate the data before determining\nthe type of the database based on its columns. If provided, the function will use this SQL query\nto fetch the
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_type function returns a string that represents the type of the database,\n which can be either \"variants\" (VCF-like) or \"regions\" (BED-like). If the database is not found\n or does not exist, the function returns None.

    \n
    \n", "signature": "(self, database: str = None, sql_query: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_tables", "modulename": "howard.objects.database", "qualname": "Database.get_database_tables", "kind": "function", "doc": "

    This function retrieves a list of tables in a specified database using the DuckDB format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database for which you want to retrieve the list of tables. If\nno database name is provided, it will use the default database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of tables in the specified database, or None if the database does not exist or\n the format is not supported.

    \n
    \n", "signature": "(self, database: str = None) -> Union[str, list, NoneType]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_database_table", "modulename": "howard.objects.database", "qualname": "Database.get_database_table", "kind": "function", "doc": "

    This function returns the name of a table in a specified database if it exists and is in a\nsupported format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to retrieve the table from. If None, it will use the\ndefault database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the name of a table in a database, or None if no suitable\n table is found.

    \n
    \n", "signature": "(self, database: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_type_from_columns", "modulename": "howard.objects.database", "qualname": "Database.get_type_from_columns", "kind": "function", "doc": "

    This function returns the type of a database based on the provided list of columns.

    \n\n
    Parameters
    \n\n
      \n
    • database_columns: a list of column names in a database table
    • \n
    • check_database_type: A database type to check for. If not provided, it defaults\nto all database types defined in the constant DATABASE_TYPE_NEEDED_COLUMNS
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the type of database based on the provided list of columns. If\n the needed columns for a specific database type are not found in the provided list, the function\n returns None.

    \n
    \n", "signature": "(\tself,\tdatabase_columns: list = [],\tcheck_database_type: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_needed_columns", "modulename": "howard.objects.database", "qualname": "Database.get_needed_columns", "kind": "function", "doc": "

    This function takes a list of database columns and a type, and returns a dictionary of needed\ncolumns and their corresponding values found in the database columns.

    \n\n
    Parameters
    \n\n
      \n
    • database_columns: A list of column names in a database table
    • \n
    • type: The type of database being used. It is used to determine which columns are needed\nfor the specific database type
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary containing the columns that are needed for a specific database type, along\n with their corresponding column names in the actual database. The function takes in a list of\n database columns and a database type as input, and uses the DATABASE_TYPE_NEEDED_COLUMNS\n dictionary to determine which columns are needed for the specified database type. It then\n searches through the list of database columns to find the

    \n
    \n", "signature": "(self, database_columns: list = [], database_type: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_sql_from", "modulename": "howard.objects.database", "qualname": "Database.get_sql_from", "kind": "function", "doc": "

    This function returns a SQL query string based on the input database format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter \"database\" is a string that represents the name or path of the\ndatabase that the function will read from. If no value is provided for this parameter, the\nfunction will call the \"get_database()\" method to retrieve the default database
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents a SQL query to read data from a database file. The specific\n SQL query returned depends on the format of the database file, which is determined by the\n get_format() method. The SQL query returned will be in the form of a function call to one of\n the following functions: read_parquet(), read_csv(), read_json(),

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_file: str = None,\tsample_size: int = 20480) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_sql_database_attach", "modulename": "howard.objects.database", "qualname": "Database.get_sql_database_attach", "kind": "function", "doc": "

    This function returns a SQL query to attach or detach a database based on the specified format\nand output.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to attach. If not provided, it will try to get the\ndefault database from the connection
    • \n
    • output: The \"output\" parameter is a string that specifies the desired output of the\nfunction. It can take on the following values:, defaults to query
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents a SQL query to attach a database to a DuckDB or SQLite\n database engine. The specific output depends on the value of the output parameter, which can\n be \"query\" (default), \"attach\", \"detach\", or \"name\". If output is \"query\" or \"attach\", the\n function returns a SQL query to attach the specified database.

    \n
    \n", "signature": "(self, database: str = None, output: str = 'query') -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_sql_database_link", "modulename": "howard.objects.database", "qualname": "Database.get_sql_database_link", "kind": "function", "doc": "

    This function returns a SQL database link based on the provided database name or the default\ndatabase.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIf it is not provided, the method will call the get_database() method to retrieve the default\ndatabase
    • \n
    \n\n
    Returns
    \n\n
    \n

    a SQL database link as a string. If a database name is provided as an argument, it will\n use that database to construct the link. Otherwise, it will use the default database obtained\n from self.get_database(). The link is constructed using the sql_from and sql_table\n obtained from other methods, and the final link is returned as a string. If the

    \n
    \n", "signature": "(self, database: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.create_view", "modulename": "howard.objects.database", "qualname": "Database.create_view", "kind": "function", "doc": "

    The create_view function creates a view in a specified database or the default database, using\na SQL database link.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIf no value is provided, it will use the value returned by the get_database() method
    • \n
    • view_name: The view_name parameter is a string that specifies the name of the view that\nwill be created in the database, defaults to variants
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the created view.

    \n
    \n", "signature": "(self, database: str = None, view_name: str = 'variants') -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_view", "modulename": "howard.objects.database", "qualname": "Database.get_view", "kind": "function", "doc": "

    The get_view function returns the name of a view in a database, or creates a new view if\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIt is an optional parameter and if not provided, the method get_database() is called to\nretrieve the database name
    • \n
    • create_view: The create_view parameter is a string that represents the name of the view\nthat you want to create. If this parameter is provided, the get_view method will call the\ncreate_view method and pass the database and view_name parameters to it
    • \n
    \n\n
    Returns
    \n\n
    \n

    The method get_view returns a string.

    \n
    \n", "signature": "(self, database: str = None, create_view: str = None) -> Optional[str]:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.is_compressed", "modulename": "howard.objects.database", "qualname": "Database.is_compressed", "kind": "function", "doc": "

    This Python function checks if a given file is compressed and returns the format of the\ncompression.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the path or name of the\ninput file that needs to be checked for compression. If no value is provided for database, the\nmethod calls get_database() to retrieve the default database file
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function is_compressed returns a boolean value indicating whether the input file\n is compressed or not. The function calls another function get_file_compressed to determine the\n compression format of the file.

    \n
    \n", "signature": "(self, database: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_header_infos_list", "modulename": "howard.objects.database", "qualname": "Database.get_header_infos_list", "kind": "function", "doc": "

    This function returns a list of header information for a given database or the current database\nif none is specified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database\nfrom which the header information is to be retrieved. If no database name is provided, the\nmethod will use the default database name obtained from the get_database() method
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of header information from a database, or an empty list if the database header\n is not available.

    \n
    \n", "signature": "(self, database: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.find_column", "modulename": "howard.objects.database", "qualname": "Database.find_column", "kind": "function", "doc": "

    The find_column function searches for a specific column in a database table, with the option\nto search for a column with a specific prefix or within the INFO column header.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to search for the column in. If not provided, it will\nuse the current database that the code is connected to
    • \n
    • table: The \"table\" parameter is the name of the table in the database where the column is\nlocated
    • \n
    • column: The \"column\" parameter is a string that represents the name of the column to\nsearch for in the database table. By default, it is set to \"INFO\", but you can change it to\nsearch for a specific column name, defaults to INFO
    • \n
    • prefixes: The prefixes parameter is a list of strings that are used to search for a\ncolumn with a specific prefix in the database. For example, if the prefixes list contains \"DP/\",\nthe function will search for a column named \"DP/INFO\" in addition to the default \"INFO\" column
    • \n
    • database_columns: The database_columns parameter is a list that contains the names of\nall the columns in a specific database table. It is used to check if a specific column exists in\nthe database. If the database_columns parameter is not provided, the function will call the\nget_columns method to retrieve
    • \n
    \n\n
    Returns
    \n\n
    \n

    a string that represents the name of the column found in the database, based on the\n input parameters. If the column is found, it returns the column name. If the column is not\n found, it returns None.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\ttable: str = None,\tcolumn: str = 'INFO',\tprefixes: list = ['INFO/'],\tdatabase_columns: list = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.map_columns", "modulename": "howard.objects.database", "qualname": "Database.map_columns", "kind": "function", "doc": "

    The map_columns function maps input columns to their corresponding columns in a specified\ndatabase table, using specified prefixes to filter the columns.

    \n\n
    Parameters
    \n\n
      \n
    • database: The name of the database to search for columns in. If no database is specified,\nthe method will use the default database set in the connection
    • \n
    • table: The table parameter is the name of the table in the database that you want to\nmap the columns for
    • \n
    • columns: A list of column names that you want to map to their corresponding column names\nin the database
    • \n
    • prefixes: The prefixes parameter is a list of strings that are used to filter the\ncolumns that are searched for. Only columns that start with one of the prefixes in the list will\nbe considered. In the code above, the default value for prefixes is [\"INFO/\"]
    • \n
    \n\n
    Returns
    \n\n
    \n

    a dictionary that maps the input columns to their corresponding columns found in the\n specified database and table, with the specified prefixes.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\ttable: str = None,\tcolumns: list = [],\tprefixes: list = ['INFO/']) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_columns", "modulename": "howard.objects.database", "qualname": "Database.get_columns", "kind": "function", "doc": "

    The function get_columns retrieves a list of column names from a specified database and table\nusing SQL queries.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the get_columns function is used to specify the\nname of the database from which you want to retrieve the column names. If this parameter is not\nprovided, the function will default to using the current database
    • \n
    • table: The table parameter in the get_columns function represents the name of the\ntable in the database for which you want to retrieve the column names. If this parameter is not\nprovided, the function will attempt to get the table name from the specified database. If the\ntable parameter is not specified and
    • \n
    • header_file: The header_file parameter in the get_columns function is used to specify\nthe file containing the header information for the data source. This information is often used\nin cases where the column names are not explicitly defined in the database schema or where the\ndata is stored in a file format that requires additional
    • \n
    • sql_query: The sql_query parameter in the get_columns function is used to specify a\ncustom SQL query to retrieve column names from the database table. If a sql_query is provided,\nthe function will execute that query to get the column names and return them as a list
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_columns returns a list of column names for a given database and\n table. If a SQL query is provided, it executes the query and returns the column names from the\n result. If no database is specified, it uses the current database. It then checks the database\n format and connects to the database accordingly to retrieve the column names using a SQL query.\n If the table parameter is not provided

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\ttable: str = None,\theader_file: str = None,\tsql_query: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_table_columns_from_format", "modulename": "howard.objects.database", "qualname": "Database.get_table_columns_from_format", "kind": "function", "doc": "

    The function get_table_columns_from_format returns a list of table columns based on the\nspecified database format.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name of the database.\nIt is an optional parameter, which means it has a default value of None. If no value is\nprovided for the database parameter, the get_database() method is called to retrieve the\ncurrent database name
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of table columns.

    \n
    \n", "signature": "(self, database: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_table_columns_from_file", "modulename": "howard.objects.database", "qualname": "Database.get_table_columns_from_file", "kind": "function", "doc": "

    The function get_table_columns_from_file retrieves the column names from a database or header\nfile.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter is a string that represents the name or path of the\ndatabase file. If this parameter is not provided, the get_database() method is called to\nretrieve the database name or path
    • \n
    • header_file: The header_file parameter is a string that represents the file path or\nname of the header file. This file contains the header information for a table, which typically\nincludes the names of the columns in the table
    • \n
    • header_file_find: Allow header file find if not provided
    • \n
    \n\n
    Returns
    \n\n
    \n

    a list of table columns.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\theader_file: str = None,\theader_file_find: bool = True) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_annotations", "modulename": "howard.objects.database", "qualname": "Database.get_annotations", "kind": "function", "doc": "

    This function returns the annotations of a database or the default database if none is\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • database: The parameter database is a string that represents the name of the database\nto retrieve annotations from. If no database name is provided, the method will use the default\ndatabase name obtained from the get_database() method
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_annotations returns the infos attribute of the header of a\n database. If the database parameter is not provided, it gets the current database using the\n get_database method. If there is no header, it returns None.

    \n
    \n", "signature": "(self, database: str = None) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_extra_columns", "modulename": "howard.objects.database", "qualname": "Database.get_extra_columns", "kind": "function", "doc": "

    This Python function returns a list of extra columns in a database table that are not needed\nbased on the database type and existing columns.

    \n\n
    Parameters
    \n\n
      \n
    • database: A string representing the name of the database to retrieve columns from. If\nNone is provided, the default database will be used
    • \n
    • database_type: The database_type parameter in the get_extra_columns function\nrepresents the type of the database for which you want to retrieve the list of extra columns. It\nis used to determine which columns are needed based on the database type and the existing\ncolumns in the specified database table
    • \n
    • sql_query: The sql_query parameter in the get_extra_columns function is used to pass\nan SQL query that can be used to retrieve specific columns from the database. This query can be\ncustomized to filter columns based on certain conditions or criteria before analyzing them to\ndetermine the extra columns that are not needed
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of extra columns in a database table that are not needed based on the database\n type and existing columns.

    \n
    \n", "signature": "(\tself,\tdatabase: str = None,\tdatabase_type: str = None,\tsql_query: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.is_vcf", "modulename": "howard.objects.database", "qualname": "Database.is_vcf", "kind": "function", "doc": "

    The is_vcf function checks if a given database is of type \"vcf\" by examining its columns and\ntheir types.

    \n\n
    Parameters
    \n\n
      \n
    • database: The database parameter in the is_vcf function is a string that represents\nthe name of the database that the function will use to check if the file is a VCF (Variant Call\nFormat) file. If the database parameter is not provided when calling the function, it will
    • \n
    • sql_query: The sql_query parameter in the is_vcf function is used to pass an SQL\nquery string that can be used to filter the columns retrieved from the database. This query can\nbe used to narrow down the columns that are considered when checking if the database is of type\n\"vcf\"
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function is_vcf returns a boolean value indicating whether the database type is\n \"vcf\" or not.

    \n
    \n", "signature": "(self, database: str = None, sql_query: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.get_conn", "modulename": "howard.objects.database", "qualname": "Database.get_conn", "kind": "function", "doc": "

    The function returns the connection object.

    \n\n
    Returns
    \n\n
    \n

    The method is returning the value of the instance variable self.conn.

    \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.is_genotype_column", "modulename": "howard.objects.database", "qualname": "Database.is_genotype_column", "kind": "function", "doc": "

    The is_genotype_column function in Python checks if a specified column in a database contains\ngenotype data based on a regular expression pattern.

    \n\n
    Parameters
    \n\n
      \n
    • column: The column parameter is a string that represents the name of a column in a\ndatabase table. It is used to specify the column for which you want to check if it contains\ngenotype information based on a regular expression pattern
    • \n
    • database: The database parameter in the is_genotype_column method is used to specify\nthe name of the database from which the data will be queried. If a database is provided, the\nmethod will query the specified database to check if the given column contains genotype\ninformation. If no database is provided,
    • \n
    • downsampling: The downsampling parameter in the is_genotype_column method is an\ninteger value that determines the number of rows to be sampled from the database table when\nchecking for genotype information in the specified column. This parameter is used to limit the\nnumber of rows to be processed in order to improve performance, defaults to 1000
    • \n
    • check_format: The check_format parameter in the is_genotype_column method is a\nboolean flag that determines whether the function should check the format of the data before\nproceeding with the genotype column analysis. If check_format is set to True, the function\nwill verify if the specified column exists in, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    The is_genotype_column method returns a boolean value. If the specified column in a\n database table contains genotype information, it returns True; otherwise, it returns False.

    \n
    \n", "signature": "(\tself,\tcolumn: str,\tdatabase: str = None,\tdownsampling: int = 1000,\tcheck_format: bool = True) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.database.Database.export", "modulename": "howard.objects.database", "qualname": "Database.export", "kind": "function", "doc": "

    The export function exports data from a database to a specified output format, compresses it\nif necessary, and returns a boolean value indicating whether the export was successful or not.

    \n\n
    Parameters
    \n\n
      \n
    • output_database: The output_database parameter is a string that represents the path and\nfilename of the output file to be exported. It specifies where the exported data will be saved
    • \n
    • output_header: The output_header parameter is an optional string that represents the\nheader of the output file. If provided, it specifies the header that will be included in the\noutput file. If not provided, the header will be automatically detected based on the output file\nformat
    • \n
    • header_in_output: The header_in_output parameter is a boolean value that determines\nwhether the header should be included in the output file. If set to True, the header will be\nincluded in the output file. If set to False, the header will not be included in the output\nfile. By default,, defaults to True
    • \n
    • database: The database parameter is the name of the database from which you want to\nexport data. If this parameter is not provided, the function will use the get_database()\nmethod to retrieve the current database
    • \n
    • table: The table parameter specifies the name of the table in the database from which\nthe data will be exported. By default, if not specified, it is set to \"variants\", defaults to\nvariants
    • \n
    • parquet_partitions: The parquet_partitions parameter is a list that specifies the\npartition columns for the Parquet output format. Each element in the list represents a partition\ncolumn. The partitions are used to organize the data in the Parquet file based on the values of\nthe specified columns
    • \n
    • threads: The threads parameter in the export function is an optional integer that\nspecifies the number of threads to use for exporting the data. It determines the level of\nparallelism during the export process. By default, it is set to 1, defaults to 1
    • \n
    • sort: The sort parameter in the export function is a boolean value that specifies\nwhether the output file should be sorted based on the genomic coordinates of the variants. If\nsort is set to True, the output file will be sorted. If sort is set to False,, defaults\nto False
    • \n
    • index: The index parameter is a boolean value that specifies whether to index the\noutput file. If index is set to True, the output file will be indexed. If index is set to\nFalse or not provided, the output file will not be indexed. By default,, defaults to False
    • \n
    • existing_columns_header: The existing_columns_header parameter is a list that\nrepresents the existing columns in the header of the output file. It is used to determine the\ncolumns that should be included in the output file. If this parameter is not provided, the\nfunction will automatically detect the header columns based on the output file format
    • \n
    • order_by: The order_by parameter in the export function is a string that specifies\nthe columns by which the output file should be ordered. You can specify multiple columns\nseparated by commas. Each column can be followed by the keyword \"ASC\" (ascending) or \"DESC\"\n(descending) to specify
    • \n
    • query: The query parameter in the export function represents a SQL query that\nspecifies the data to be exported from the database. If provided, the function will export the\nresult of this query. If the query parameter is not provided, the function will generate a\nquery to export the data from
    • \n
    • compression_type: The compression_type parameter in the export function specifies the\ntype of compression to be applied to the output file. By default, the compression type is set to\n\"bgzip\". This parameter allows you to choose the compression algorithm for the output file, such\nas \"gzip\", \"bgzip
    • \n
    • chunk_size: The chunk_size parameter in the export function specifies the size of\neach chunk or batch of data that will be processed during the export operation. It determines\nhow many records or lines of data will be included in each chunk that is processed at a time,\ndefaults to 1000000
    • \n
    • export_mode: The export_mode parameter in the export function specifies the mode of\nexport, which can be either \"pyarrow\" or \"duckdb\", defaults to pyarrow
    • \n
    • compresslevel: The compresslevel parameter in the export function represents the\nlevel of compression for gzip. By default, it is set to 6. This parameter allows you to specify\nthe compression level when using gzip compression for the output file. The compression level can\nrange from 0 (no compression), defaults to 6
    • \n
    • export_header: The export_header parameter is a boolean flag that determines whether\nthe header of a VCF file should be exported to a separate file or not. If export_header is\nTrue, the header will be exported to a file. If export_header is False, the header will not\nbe, defaults to True
    • \n
    • sample_list: The sample_list parameter in the export function is a list that\nspecifies the samples to be included in the exported data. If provided, the samples listed in\nthis parameter will be included in the output file. If not provided, the function will determine\nthe samples to include based on the data
    • \n
    \n\n
    Returns
    \n\n
    \n

    The export function returns a boolean value indicating whether the export was\n successful or not.

    \n
    \n", "signature": "(\tself,\toutput_database: str,\toutput_header: str = None,\theader_in_output: bool = True,\tdatabase: str = None,\ttable: str = 'variants',\tparquet_partitions: list = None,\tthreads: int = 1,\tsort: bool = False,\tindex: bool = False,\texisting_columns_header: list = [],\torder_by: str = '',\tquery: str = None,\tcompression_type: str = None,\tchunk_size: int = 1000000,\texport_mode: str = 'pyarrow',\tcompresslevel: int = 6,\texport_header: bool = True,\tsample_list: list = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.genome", "modulename": "howard.objects.genome", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.genome.ChromosomeSubset", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset", "kind": "class", "doc": "

    Allow direct access to a subset of the chromosome.

    \n"}, {"fullname": "howard.objects.genome.ChromosomeSubset.__init__", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset.__init__", "kind": "function", "doc": "

    \n", "signature": "(name, genome=None)"}, {"fullname": "howard.objects.genome.ChromosomeSubset.name", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.ChromosomeSubset.genome", "modulename": "howard.objects.genome", "qualname": "ChromosomeSubset.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset", "modulename": "howard.objects.genome", "qualname": "GenomeSubset", "kind": "class", "doc": "

    Allow the direct access of a subset of the genome.

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.__init__", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.__init__", "kind": "function", "doc": "

    \n", "signature": "(genome, chrom, start, end, seqid)"}, {"fullname": "howard.objects.genome.GenomeSubset.genome", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.chrom", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.chrom", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.start", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.end", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.GenomeSubset.seqid", "modulename": "howard.objects.genome", "qualname": "GenomeSubset.seqid", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockGenomeError", "modulename": "howard.objects.genome", "qualname": "MockGenomeError", "kind": "class", "doc": "

    Common base class for all non-exit exceptions.

    \n", "bases": "builtins.Exception"}, {"fullname": "howard.objects.genome.MockSequence", "modulename": "howard.objects.genome", "qualname": "MockSequence", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockSequence.__init__", "modulename": "howard.objects.genome", "qualname": "MockSequence.__init__", "kind": "function", "doc": "

    \n", "signature": "(sequence)"}, {"fullname": "howard.objects.genome.MockSequence.sequence", "modulename": "howard.objects.genome", "qualname": "MockSequence.sequence", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockChromosome", "modulename": "howard.objects.genome", "qualname": "MockChromosome", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockChromosome.__init__", "modulename": "howard.objects.genome", "qualname": "MockChromosome.__init__", "kind": "function", "doc": "

    \n", "signature": "(name, genome=None)"}, {"fullname": "howard.objects.genome.MockChromosome.name", "modulename": "howard.objects.genome", "qualname": "MockChromosome.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockChromosome.genome", "modulename": "howard.objects.genome", "qualname": "MockChromosome.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockGenome", "modulename": "howard.objects.genome", "qualname": "MockGenome", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.genome.MockGenome.__init__", "modulename": "howard.objects.genome", "qualname": "MockGenome.__init__", "kind": "function", "doc": "

    A mock genome object that provides a pygr compatible interface.

    \n\n

    lookup: a list of ((chrom, start, end), seq) values that define\n a lookup table for genome sequence requests.\nfilename: a stream or filename containing a lookup table.\ndb_filename: a fasta file to use for genome sequence requests. All\n requests are recorded and can be writen to a lookup table file\n using the write method.\ndefault_seq: if given, this base will always be returned if\n region is unavailable.

    \n", "signature": "(lookup=None, filename=None, db_filename=None, default_seq=None)"}, {"fullname": "howard.objects.genome.MockGenome.get_seq", "modulename": "howard.objects.genome", "qualname": "MockGenome.get_seq", "kind": "function", "doc": "

    Return a sequence by chromosome name and region [start, end).

    \n\n

    Coordinates are 0-based, end-exclusive.

    \n", "signature": "(self, chrom, start, end):", "funcdef": "def"}, {"fullname": "howard.objects.genome.MockGenome.read", "modulename": "howard.objects.genome", "qualname": "MockGenome.read", "kind": "function", "doc": "

    Read a sequence lookup table from a file.

    \n\n

    filename: a filename string or file stream.

    \n", "signature": "(self, filename):", "funcdef": "def"}, {"fullname": "howard.objects.genome.MockGenome.write", "modulename": "howard.objects.genome", "qualname": "MockGenome.write", "kind": "function", "doc": "

    Write a sequence lookup table to file.

    \n", "signature": "(self, filename):", "funcdef": "def"}, {"fullname": "howard.objects.genome.MockGenomeTestFile", "modulename": "howard.objects.genome", "qualname": "MockGenomeTestFile", "kind": "class", "doc": "

    \n", "bases": "MockGenome"}, {"fullname": "howard.objects.genome.MockGenomeTestFile.__init__", "modulename": "howard.objects.genome", "qualname": "MockGenomeTestFile.__init__", "kind": "function", "doc": "

    A mock genome object that provides a pygr compatible interface.

    \n\n

    lookup: a list of ((chrom, start, end), seq) values that define\n a lookup table for genome sequence requests.\nfilename: a stream or filename containing a lookup table.\ndb_filename: a fasta file to use for genome sequence requests. All\n requests are recorded and can be writen to a lookup table file\n using the write method.\ndefault_seq: if given, this base will always be returned if\n region is unavailable.

    \n", "signature": "(\tlookup=None,\tfilename=None,\tdb_filename=None,\tdefault_seq=None,\tcreate_data=False)"}, {"fullname": "howard.objects.genome.MockGenomeTestFile.get_seq", "modulename": "howard.objects.genome", "qualname": "MockGenomeTestFile.get_seq", "kind": "function", "doc": "

    Return a sequence by chromosome name and region [start, end).

    \n\n

    Coordinates are 0-based, end-exclusive.

    \n", "signature": "(self, chrom, start, end):", "funcdef": "def"}, {"fullname": "howard.objects.hgvs", "modulename": "howard.objects.hgvs", "kind": "module", "doc": "

    HGVS language currently implemented.

    \n\n

    HGVS = ALLELE\n | PREFIX_NAME : ALLELE

    \n\n

    PREFIX_NAME = TRANSCRIPT\n | TRANSCRIPT '(' GENE ')'

    \n\n

    TRANSCRIPT = TRANSCRIPT_NAME\n | TRANSCRIPT_NAME '.' TRANSCRIPT_VERSION

    \n\n

    TRANSCRIPT_VERSION = NUMBER

    \n\n

    ALLELE = 'c.' CDNA_ALLELE # cDNA\n | 'g.' GENOMIC_ALLELE # genomic\n | 'm.' MIT_ALLELE # mitochondrial sequence\n | 'n.' NC_ALLELE # non-coding RNA reference sequence\n | 'r.' RNA_ALLELE # RNA sequence (like r.76a>u)\n | 'p.' PROTEIN_ALLELE # protein sequence (like p.Lys76Asn)

    \n\n

    NC_ALLELE =\nRNA_ALLELE =\nCDNA_ALLELE = CDNA_COORD SINGLE_BASE_CHANGE\n | CDNA_COORD_RANGE MULTI_BASE_CHANGE

    \n\n

    GENOMIC_ALLELE =\nMIT_ALLELE = COORD SINGLE_BASE_CHANGE\n | COORD_RANGE MULTI_BASE_CHANGE

    \n\n

    SINGLE_BASE_CHANGE = CDNA_ALLELE = CDNA_COORD BASE '=' # no change\n | CDNA_COORD BASE '>' BASE # substitution\n | CDNA_COORD 'ins' BASE # 1bp insertion\n | CDNA_COORD 'del' BASE # 1bp deletion\n | CDNA_COORD 'dup' BASE # 1bp duplication\n | CDNA_COORD 'ins' # 1bp insertion\n | CDNA_COORD 'del' # 1bp deletion\n | CDNA_COORD 'dup' # 1bp duplication\n | CDNA_COORD 'del' BASE 'ins' BASE # 1bp indel\n | CDNA_COORD 'delins' BASE # 1bp indel

    \n\n

    MULTI_BASE_CHANGE = COORD_RANGE 'del' BASES # deletion\n | COORD_RANGE 'ins' BASES # insertion\n | COORD_RANGE 'dup' BASES # duplication\n | COORD_RANGE 'del' # deletion\n | COORD_RANGE 'dup' # duplication\n | COORD_RANGE 'del' BASES 'ins' BASES # indel\n | COORD_RANGE 'delins' BASES # indel

    \n\n

    AMINO1 = [GAVLIMFWPSTCYNQDEKRH]

    \n\n

    AMINO3 = 'Gly' | 'Ala' | 'Val' | 'Leu' | 'Ile' | 'Met' | 'Phe' | 'Trp' | 'Pro'\n | 'Ser' | 'Thr' | 'Cys' | 'Tyr' | 'Asn' | 'Gln' | 'Asp' | 'Glu' | 'Lys'\n | 'Arg' | 'His'

    \n\n

    PROTEIN_ALLELE = AMINO3 COORD '=' # no peptide change\n | AMINO1 COORD '=' # no peptide change\n | AMINO3 COORD AMINO3 PEP_EXTRA # peptide change\n | AMINO1 COORD AMINO1 PEP_EXTRA # peptide change\n | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA # indel\n | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA # indel\n | AMINO3 COORD '_' AMINO3 COORD PEP_EXTRA AMINO3 # indel\n | AMINO1 COORD '_' AMINO1 COORD PEP_EXTRA AMINO1 # indel

    \n\n

    A genomic range:

    \n\n

    COORD_RANGE = COORD '_' COORD

    \n\n

    A cDNA range:

    \n\n

    CDNA_COORD_RANGE = CDNA_COORD '_' CDNA_COORD

    \n\n

    A cDNA coordinate:

    \n\n

    CDNA_COORD = COORD_PREFIX COORD\n | COORD_PREFIX COORD OFFSET_PREFIX OFFSET\nCOORD_PREFIX = '' | '-' | '*'\nCOORD = NUMBER\nOFFSET_PREFIX = '-' | '+'\nOFFSET = NUMBER

    \n\n

    Primatives:

    \n\n

    NUMBER = \"\\d+\"\nBASE = [ACGT]\nBASES = BASE+

    \n"}, {"fullname": "howard.objects.hgvs.CHROM_PREFIX", "modulename": "howard.objects.hgvs", "qualname": "CHROM_PREFIX", "kind": "variable", "doc": "

    \n", "default_value": "'chr'"}, {"fullname": "howard.objects.hgvs.CODON_1", "modulename": "howard.objects.hgvs", "qualname": "CODON_1", "kind": "variable", "doc": "

    \n", "default_value": "{'TTT': 'F', 'TTC': 'F', 'TCT': 'S', 'TCC': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C', 'TTA': 'L', 'TCA': 'S', 'TAA': '*', 'TGA': '*', 'TTG': 'L', 'TCG': 'S', 'TAG': '*', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CCT': 'P', 'CCC': 'P', 'CAT': 'H', 'CAC': 'H', 'CGT': 'R', 'CGC': 'R', 'CTA': 'L', 'CTG': 'L', 'CCA': 'P', 'CCG': 'P', 'CAA': 'Q', 'CAG': 'Q', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ACT': 'T', 'ACC': 'T', 'AAT': 'N', 'AAC': 'N', 'AGT': 'S', 'AGC': 'S', 'ATA': 'I', 'ACA': 'T', 'AAA': 'K', 'AGA': 'R', 'ATG': 'M', 'ACG': 'T', 'AAG': 'K', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GCT': 'A', 'GCC': 'A', 'GAT': 'D', 'GAC': 'D', 'GGT': 'G', 'GGC': 'G', 'GTA': 'V', 'GTG': 'V', 'GCA': 'A', 'GCG': 'A', 'GAA': 'E', 'GAG': 'E', 'GGA': 'G', 'GGG': 'G'}"}, {"fullname": "howard.objects.hgvs.CODON_3", "modulename": "howard.objects.hgvs", "qualname": "CODON_3", "kind": "variable", "doc": "

    \n", "default_value": "{'TTT': 'Phe', 'TTC': 'Phe', 'TCT': 'Ser', 'TCC': 'Ser', 'TAT': 'Tyr', 'TAC': 'Tyr', 'TGT': 'Cys', 'TGC': 'Cys', 'TTA': 'Leu', 'TCA': 'Ser', 'TAA': '*', 'TGA': '*', 'TTG': 'Leu', 'TCG': 'Ser', 'TAG': '*', 'TGG': 'Trp', 'CTT': 'Leu', 'CTC': 'Leu', 'CCT': 'Pro', 'CCC': 'Pro', 'CAT': 'His', 'CAC': 'His', 'CGT': 'Arg', 'CGC': 'Arg', 'CTA': 'Leu', 'CTG': 'Leu', 'CCA': 'Pro', 'CCG': 'Pro', 'CAA': 'Gln', 'CAG': 'Gln', 'CGA': 'Arg', 'CGG': 'Arg', 'ATT': 'Ile', 'ATC': 'Ile', 'ACT': 'Thr', 'ACC': 'Thr', 'AAT': 'Asn', 'AAC': 'Asn', 'AGT': 'Ser', 'AGC': 'Ser', 'ATA': 'Ile', 'ACA': 'Thr', 'AAA': 'Lys', 'AGA': 'Arg', 'ATG': 'Met', 'ACG': 'Thr', 'AAG': 'Lys', 'AGG': 'Arg', 'GTT': 'Val', 'GTC': 'Val', 'GCT': 'Ala', 'GCC': 'Ala', 'GAT': 'Asp', 'GAC': 'Asp', 'GGT': 'Gly', 'GGC': 'Gly', 'GTA': 'Val', 'GTG': 'Val', 'GCA': 'Ala', 'GCG': 'Ala', 'GAA': 'Glu', 'GAG': 'Glu', 'GGA': 'Gly', 'GGG': 'Gly'}"}, {"fullname": "howard.objects.hgvs.CODON_FULL", "modulename": "howard.objects.hgvs", "qualname": "CODON_FULL", "kind": "variable", "doc": "

    \n", "default_value": "{'TTT': 'Phenylalanine', 'TTC': 'Phenylalanine', 'TCT': 'Serine', 'TCC': 'Serine', 'TAT': 'Tyrosine', 'TAC': 'Tyrosine', 'TGT': 'Cysteine', 'TGC': 'Cysteine', 'TTA': 'Leucine', 'TCA': 'Serine', 'TAA': 'Stop', 'TGA': 'Stop', 'TTG': 'Leucine', 'TCG': 'Serine', 'TAG': 'Stop', 'TGG': 'Tryptophan', 'CTT': 'Leucine', 'CTC': 'Leucine', 'CCT': 'Proline', 'CCC': 'Proline', 'CAT': 'Histidine', 'CAC': 'Histidine', 'CGT': 'Arginine', 'CGC': 'Arginine', 'CTA': 'Leucine', 'CTG': 'Leucine', 'CCA': 'Proline', 'CCG': 'Proline', 'CAA': 'Glutamine', 'CAG': 'Glutamine', 'CGA': 'Arginine', 'CGG': 'Arginine', 'ATT': 'Isoleucine', 'ATC': 'Isoleucine', 'ACT': 'Threonine', 'ACC': 'Threonine', 'AAT': 'Asparagine', 'AAC': 'Asparagine', 'AGT': 'Serine', 'AGC': 'Serine', 'ATA': 'Isoleucine', 'ACA': 'Threonine', 'AAA': 'Lysine', 'AGA': 'Arginine', 'ATG': 'Methionine', 'ACG': 'Threonine', 'AAG': 'Lysine', 'AGG': 'Arginine', 'GTT': 'Valine', 'GTC': 'Valine', 'GCT': 'Alanine', 'GCC': 'Alanine', 'GAT': 'Aspartic acid', 'GAC': 'Aspartic acid', 'GGT': 'Glycine', 'GGC': 'Glycine', 'GTA': 'Valine', 'GTG': 'Valine', 'GCA': 'Alanine', 'GCG': 'Alanine', 'GAA': 'Glutamic acid', 'GAG': 'Glutamic acid', 'GGA': 'Glycine', 'GGG': 'Glycine'}"}, {"fullname": "howard.objects.hgvs.NUCLEOTIDE_TRANSLATE", "modulename": "howard.objects.hgvs", "qualname": "NUCLEOTIDE_TRANSLATE", "kind": "variable", "doc": "

    \n", "default_value": "{'T': 'A', 'A': 'T', 'G': 'C', 'C': 'G'}"}, {"fullname": "howard.objects.hgvs.HGVSRegex", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex", "kind": "class", "doc": "

    All regular expression for HGVS names.

    \n"}, {"fullname": "howard.objects.hgvs.HGVSRegex.BASE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.BASE", "kind": "variable", "doc": "

    \n", "default_value": "'[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]|\\\\d+'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.BASES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.BASES", "kind": "variable", "doc": "

    \n", "default_value": "'[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DNA_REF", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DNA_REF", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DNA_ALT", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DNA_ALT", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.EQUAL", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.EQUAL", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>=)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.SUB", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.SUB", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>>)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.INS", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.INS", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>ins)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DEL", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DEL", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>del)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.DUP", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.DUP", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<mutation_type>dup)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.COORD_START", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.COORD_START", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.COORD_END", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.COORD_END", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<end>\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.COORD_RANGE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.COORD_RANGE", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>\\\\d+)_(?P<end>\\\\d+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_COORD", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_COORD", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<coord_prefix>|-|\\\\*)(?P<coord>\\\\d+)((?P<offset_prefix>-|\\\\+)(?P<offset>\\\\d+))?'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_START", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_START", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_END", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_END", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_RANGE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_RANGE", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_ALLELE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_ALLELE", "kind": "variable", "doc": "

    \n", "default_value": "['(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>=)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>del)', '(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)(?P<mutation_type>dup)', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|\\\\+)(?P<end_offset>\\\\d+))?)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))']"}, {"fullname": "howard.objects.hgvs.HGVSRegex.CDNA_ALLELE_REGEXES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.CDNA_ALLELE_REGEXES", "kind": "variable", "doc": "

    \n", "default_value": "[re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>=)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtb), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>del)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)(?P<mutation_type>dup)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offset_prefix>-|), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhk), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offse), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>(?P<start_coord_prefix>|-|\\\\*)(?P<start_coord>\\\\d+)((?P<start_offset_prefix>-|\\\\+)(?P<start_offset>\\\\d+))?)_(?P<end>(?P<end_coord_prefix>|-|\\\\*)(?P<end_coord>\\\\d+)((?P<end_offse)]"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP", "kind": "variable", "doc": "

    \n", "default_value": "'([A-Z]([a-z]{2}))+'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_REF", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_REF", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<ref>([A-Z]([a-z]{2}))+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_REF2", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_REF2", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<ref2>([A-Z]([a-z]{2}))+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_ALT", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_ALT", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<alt>([A-Z]([a-z]{2}))+)'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_EXTRA", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_EXTRA", "kind": "variable", "doc": "

    \n", "default_value": "'(?P<extra>(|=|\\\\?)(|fs))'"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_ALLELE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_ALLELE", "kind": "variable", "doc": "

    \n", "default_value": "['(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<extra>(|=|\\\\?)(|fs))', '(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs))', '(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<extra>(|=|\\\\?)(|fs)))', '(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs)))']"}, {"fullname": "howard.objects.hgvs.HGVSRegex.PEP_ALLELE_REGEXES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.PEP_ALLELE_REGEXES", "kind": "variable", "doc": "

    \n", "default_value": "[re.compile('^(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<extra>(|=|\\\\?)(|fs))$'), re.compile('^(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs))$'), re.compile('^(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<extra>(|=|\\\\?)(|fs)))$'), re.compile('^(?P<delins>(?P<ref>([A-Z]([a-z]{2}))+)(?P<start>\\\\d+)_(?P<ref2>([A-Z]([a-z]{2}))+)(?P<end>\\\\d+)(?P<alt>([A-Z]([a-z]{2}))+)(?P<extra>(|=|\\\\?)(|fs)))$')]"}, {"fullname": "howard.objects.hgvs.HGVSRegex.GENOMIC_ALLELE", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.GENOMIC_ALLELE", "kind": "variable", "doc": "

    \n", "default_value": "['(?P<start>\\\\d+)(?P<mutation_type>=)', '(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)', '(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)(?P<mutation_type>del)', '(?P<start>\\\\d+)(?P<mutation_type>dup)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)', '(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)', '(?P<delins>(?P<start>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))', '(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))']"}, {"fullname": "howard.objects.hgvs.HGVSRegex.GENOMIC_ALLELE_REGEXES", "modulename": "howard.objects.hgvs", "qualname": "HGVSRegex.GENOMIC_ALLELE_REGEXES", "kind": "variable", "doc": "

    \n", "default_value": "[re.compile('^(?P<start>\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>\\\\d+)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)(?P<mutation_type>>)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>del)$'), re.compile('^(?P<start>\\\\d+)(?P<mutation_type>dup)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>=)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>ins)(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>del)$'), re.compile('^(?P<start>\\\\d+)_(?P<end>\\\\d+)(?P<mutation_type>dup)$'), re.compile('^(?P<delins>(?P<start>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)del(?P<ref>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+)ins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$'), re.compile('^(?P<delins>(?P<start>\\\\d+)_(?P<end>\\\\d+)delins(?P<alt>[acgtbdhkmnrsvwyACGTBDHKMNRSVWY]+|\\\\d+))$')]"}, {"fullname": "howard.objects.hgvs.REFSEQ_PREFIXES", "modulename": "howard.objects.hgvs", "qualname": "REFSEQ_PREFIXES", "kind": "variable", "doc": "

    \n", "default_value": "[('AC_', 'genomic', 'Complete genomic molecule, usually alternate assembly'), ('NC_', 'genomic', 'Complete genomic molecule, usually reference assembly'), ('NG_', 'genomic', 'Incomplete genomic region'), ('NT_', 'genomic', 'Contig or scaffold, clone-based or WGS'), ('NW_', 'genomic', 'Contig or scaffold, primarily WGS'), ('NS_', 'genomic', 'Environmental sequence'), ('NZ_', 'genomic', 'Unfinished WGS'), ('NM_', 'mRNA', ''), ('NR_', 'RNA', ''), ('XM_', 'mRNA', 'Predicted model'), ('XR_', 'RNA', 'Predicted model'), ('AP_', 'Protein', 'Annotated on AC_ alternate assembly'), ('NP_', 'Protein', 'Associated with an NM_ or NC_ accession'), ('YP_', 'Protein', ''), ('XP_', 'Protein', 'Predicted model, associated with an XM_ accession'), ('ZP_', 'Protein', 'Predicted model, annotated on NZ_ genomic records')]"}, {"fullname": "howard.objects.hgvs.REFSEQ_PREFIX_LOOKUP", "modulename": "howard.objects.hgvs", "qualname": "REFSEQ_PREFIX_LOOKUP", "kind": "variable", "doc": "

    \n", "default_value": "{'AC_': ('genomic', 'Complete genomic molecule, usually alternate assembly'), 'NC_': ('genomic', 'Complete genomic molecule, usually reference assembly'), 'NG_': ('genomic', 'Incomplete genomic region'), 'NT_': ('genomic', 'Contig or scaffold, clone-based or WGS'), 'NW_': ('genomic', 'Contig or scaffold, primarily WGS'), 'NS_': ('genomic', 'Environmental sequence'), 'NZ_': ('genomic', 'Unfinished WGS'), 'NM_': ('mRNA', ''), 'NR_': ('RNA', ''), 'XM_': ('mRNA', 'Predicted model'), 'XR_': ('RNA', 'Predicted model'), 'AP_': ('Protein', 'Annotated on AC_ alternate assembly'), 'NP_': ('Protein', 'Associated with an NM_ or NC_ accession'), 'YP_': ('Protein', ''), 'XP_': ('Protein', 'Predicted model, associated with an XM_ accession'), 'ZP_': ('Protein', 'Predicted model, annotated on NZ_ genomic records')}"}, {"fullname": "howard.objects.hgvs.get_refseq_type", "modulename": "howard.objects.hgvs", "qualname": "get_refseq_type", "kind": "function", "doc": "

    The get_refseq_type function returns the RefSeq type for a given RefSeq name.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string representing a RefSeq name
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_refseq_type returns the RefSeq type for a given RefSeq name.

    \n
    \n", "signature": "(name: str) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName", "kind": "class", "doc": "

    Inappropriate argument value (of correct type).

    \n", "bases": "builtins.ValueError"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.__init__", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.__init__", "kind": "function", "doc": "

    The function initializes an InvalidHGVSName object with a message, name, part, and reason.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string that represents the invalid HGVS name. It is the\nname that is considered invalid and does not meet the required criteria
    • \n
    • part: The \"part\" parameter represents the part of the HGVS (Human Genome Variation\nSociety) name that is invalid. It is used to provide more specific information about the error\nthat occurred, defaults to name
    • \n
    • reason: The \"reason\" parameter is an optional argument that provides additional\ninformation or context for why the HGVS name is considered invalid. It can be used to provide\nspecific details about the error or to explain why the name does not meet the required criteria
    • \n
    \n", "signature": "(name: str = '', part: str = 'name', reason: str = '')"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.name", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.part", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.part", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.InvalidHGVSName.reason", "modulename": "howard.objects.hgvs", "qualname": "InvalidHGVSName.reason", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName", "modulename": "howard.objects.hgvs", "qualname": "HGVSName", "kind": "class", "doc": "

    Represents a HGVS variant name.

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.__init__", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.__init__", "kind": "function", "doc": "

    The function is a constructor that initializes various attributes of an object and parses a\ngiven name to populate those attributes.

    \n\n
    Parameters
    \n\n
      \n
    • name: The full HGVS name of the variant
    • \n
    • prefix: The prefix parameter is a string that is used as a prefix for the HGVS name. It\ncan be used to indicate additional information or context about the variant
    • \n
    • chrom: The chrom parameter represents the chromosome where the mutation occurs. It is a\nstring that specifies the chromosome number or identifier
    • \n
    • transcript: The transcript parameter represents the transcript ID or name associated\nwith the mutation. It is used to specify the specific transcript in which the mutation occurs
    • \n
    • transcript_protein: The transcript_protein parameter is used to store information about\nthe protein associated with the transcript. It can be used to specify the protein variant or\nisoform that is affected by the mutation
    • \n
    • gene: The \"gene\" parameter represents the gene associated with the variant. It is a\nstring that specifies the gene name or identifier
    • \n
    • exon: The exon parameter represents the exon number or range in which the mutation\noccurs. It is used to specify the location of the mutation within the transcript
    • \n
    • kind: The \"kind\" parameter is used to specify the type of variant or mutation. It can be\na string that represents the kind of mutation, such as \"substitution\", \"deletion\", \"insertion\",\netc. This parameter helps to categorize and describe the type of mutation being represented by\nthe
    • \n
    • mutation_type: The mutation_type parameter is used to specify the type of mutation. It\ncan be a string that represents the type of mutation, such as \"SNP\" (single nucleotide\npolymorphism), \"DEL\" (deletion), \"INS\" (insertion), etc
    • \n
    • start: The start parameter represents the starting position of the mutation or variant\nin the genomic sequence. It is an integer value that indicates the position of the mutation or\nvariant on the genomic sequence. If not provided, it defaults to 0, defaults to 0
    • \n
    • end: The \"end\" parameter represents the end position of the mutation or variant. It is an\ninteger value that indicates the position of the mutation or variant on the genomic sequence,\ndefaults to 0
    • \n
    • ref_allele: The ref_allele parameter represents the reference allele in a genetic\nmutation. It is the allele that is present in the reference genome at a specific position
    • \n
    • ref2_allele: The ref2_allele parameter represents the reference allele at the end of a\npeptide indel. In the context of genetic mutations, an indel refers to the insertion or deletion\nof nucleotides in a DNA sequence. The ref2_allele specifically represents the reference allele\nthat is
    • \n
    • alt_allele: The alt_allele parameter represents the alternate allele in a genetic\nmutation. In genetics, an allele is one of the possible forms of a gene. In the context of this\ncode, alt_allele is used to store the alternate allele that is present in a mutation
    • \n
    • cdna_start: The cdna_start parameter is used to specify the start position of the\nmutation in the cDNA sequence. It is an optional parameter and if not provided, it will be set\nto a default value of CDNACoord()
    • \n
    • cdna_end: The cdna_end parameter is used to store the end coordinate of the cDNA\n(complementary DNA) sequence. It is an optional parameter and if not provided, it will be\ninitialized as a CDNACoord object. The CDNACoord object is likely a
    • \n
    • pep_extra: The pep_extra parameter is a string that represents any additional\ninformation related to the protein. It is used in the context of protein-specific fields
    • \n
    \n", "signature": "(\tname: str = '',\tprefix: str = '',\tchrom: str = '',\ttranscript: str = '',\ttranscript_protein: str = None,\tgene: str = '',\texon: str = None,\tkind: str = '',\tmutation_type: str = None,\tstart: int = 0,\tend: int = 0,\tref_allele: str = '',\tref2_allele: str = '',\talt_allele: str = '',\tcdna_start: int = None,\tcdna_end: int = None,\tpep_extra: str = '')"}, {"fullname": "howard.objects.hgvs.HGVSName.name", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.prefix", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.prefix", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.chrom", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.chrom", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.transcript", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.transcript", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.transcript_protein", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.transcript_protein", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.gene", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.gene", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.exon", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.exon", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.kind", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.kind", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.mutation_type", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.mutation_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.start", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.end", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.ref_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.ref_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.ref2_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.ref2_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.alt_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.alt_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.cdna_start", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.cdna_start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.cdna_end", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.cdna_end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.pep_extra", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.pep_extra", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.hgvs.HGVSName.parse", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse", "kind": "function", "doc": "

    The parse function is used to split an HGVS name into a prefix and allele, and then validate\nthe parsed components.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter is a string that represents an HGVS name. It is the input to\nthe parse function and is used to parse the HGVS name by splitting it into a prefix and allele
    • \n
    \n", "signature": "(self, name: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_prefix", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_prefix", "kind": "function", "doc": "

    The parse_prefix function is used to parse a HGVS prefix (gene/transcript/chromosome) and\nassign the parsed values to the corresponding attributes of the object.

    \n\n
    Parameters
    \n\n
      \n
    • prefix: The prefix parameter is a string that represents a HGVS prefix, which can be a\ngene, transcript, or chromosome identifier. It is used to determine the type of prefix and\nassign the parsed values to the corresponding attributes of the object
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function parse_prefix returns the parsed values for the transcript and gene\n attributes, or sets the chrom or gene attributes based on the given prefix.

    \n
    \n", "signature": "(self, prefix: str):", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_allele", "kind": "function", "doc": "

    The function parse_allele parses a HGVS allele description and determines the kind of HGVS\nname (c., p., g., etc.) and the mutation type.

    \n\n

    Some examples include:\n cDNA substitution: c.101A>C,\n cDNA indel: c.3428delCinsTA, c.1000_1003delATG, c.1000_1001insATG\n No protein change: p.Glu1161=\n Protein change: p.Glu1161Ser\n Protein frameshift: p.Glu1161_Ser1164?fs\n Genomic substitution: g.1000100A>T\n Genomic indel: g.1000100_1000102delATG

    \n\n
    Parameters
    \n\n
      \n
    • allele: The allele parameter is a string that represents a HGVS allele description. It\ncan contain various types of mutations, such as cDNA substitutions, cDNA indels, protein\nchanges, protein frameshifts, genomic substitutions, and genomic indels. The purpose of the\nparse_allele
    • \n
    \n", "signature": "(self, allele: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_cdna", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_cdna", "kind": "function", "doc": "

    The function parse_cdna is used to parse a HGVS cDNA name and extract information such as\nmutation type, coordinates, and alleles.

    \n\n

    Some examples include:\n Substitution: 101A>C,\n Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG

    \n\n
    Parameters
    \n\n
      \n
    • details: The details parameter is a string that represents a HGVS cDNA name. It\ncontains information about a genetic mutation, such as a substitution or an indel, along with\nthe specific coordinates and alleles involved in the mutation
    • \n
    \n\n
    Returns
    \n\n
    \n

    None.

    \n
    \n", "signature": "(self, details: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_protein", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_protein", "kind": "function", "doc": "

    The function parse_protein is used to parse a HGVS protein name and extract information such\nas mutation type, coordinates, alleles, and additional details.

    \n\n

    Some examples include:\n No change: Glu1161=\n Change: Glu1161Ser\n Frameshift: Glu1161_Ser1164?fs

    \n\n
    Parameters
    \n\n
      \n
    • details: The details parameter is a string that represents a HGVS protein name. It\ncontains information about a protein mutation, such as the amino acid change and the position of\nthe mutation
    • \n
    \n\n
    Returns
    \n\n
    \n

    The method parse_protein does not return anything. It updates the instance variables\n of the object it is called on.

    \n
    \n", "signature": "(self, details: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.parse_genome", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.parse_genome", "kind": "function", "doc": "

    The function parse_genome is used to parse a HGVS genomic name and extract information such as\nmutation type, coordinates, and alleles.

    \n\n

    Some examples include:\n Substitution: 1000100A>T\n Indel: 1000100_1000102delATG

    \n\n
    Parameters
    \n\n
      \n
    • details: The details parameter is a string that represents a HGVS genomic name. It\ncontains information about a genomic mutation, such as a substitution or an indel
    • \n
    \n\n
    Returns
    \n\n
    \n

    None.

    \n
    \n", "signature": "(self, details: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format", "kind": "function", "doc": "

    The format function generates a HGVS name as a string based on various formatting options.

    \n\n
    Parameters
    \n\n
      \n
    • use_prefix: A boolean indicating whether to include the prefix in the HGVS name. If set\nto True, the prefix will be included in the HGVS name. If set to False, the prefix will be\nexcluded. The default value is True, defaults to True
    • \n
    • use_gene: A boolean indicating whether to include the gene name in the HGVS name. If set\nto True, the gene name will be included in the HGVS name. If set to False, the gene name will\nnot be included. The default value is True, defaults to True
    • \n
    • use_exon: A boolean indicating whether to include exon information in the HGVS name. If\nset to True, exon information will be included in the HGVS name. If set to False, exon\ninformation will not be included, defaults to False
    • \n
    • use_protein: A boolean indicating whether to include the protein change in the HGVS name.\nIf set to True, the protein change will be included in the HGVS name. If set to False, the\nprotein change will not be included, defaults to False
    • \n
    • full_format: A boolean parameter that determines whether the full format of the allele\nshould be included in the output. If set to True, and if the allele is not a protein variant,\nthe allele will be appended with ':p.' followed by the formatted protein variant, defaults to\nFalse (optional)
    • \n
    • use_version: A boolean parameter that determines whether to include the version number in\nthe formatted HGVS name. If set to True, the version number will be included in the output. If\nset to False, the version number will not be included, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a HGVS name as a string.

    \n
    \n", "signature": "(\tself,\tuse_prefix: bool = True,\tuse_gene: bool = True,\tuse_exon: bool = False,\tuse_protein: bool = False,\tfull_format=False,\tuse_version: bool = False) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_prefix", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_prefix", "kind": "function", "doc": "

    The format_prefix function generates an HGVS transcript/gene prefix based on various\nparameters.

    \n\n
    Parameters
    \n\n
      \n
    • use_gene: A boolean parameter that determines whether to include the gene name in the\nprefix. If set to True, the gene name will be included in the prefix. If set to False, the gene\nname will not be included in the prefix. The default value is True, defaults to True
    • \n
    • use_exon: A boolean parameter that determines whether to include the exon information in\nthe prefix. If set to True, the exon information will be included in the prefix. If set to\nFalse, the exon information will not be included, defaults to False
    • \n
    • use_protein: A boolean indicating whether to use the protein transcript instead of the\nnucleotide transcript if available. If set to True, the protein transcript will be used. If set\nto False, the nucleotide transcript will be used. The default value is False, defaults to False
    • \n
    • full_format: A boolean parameter that determines whether to generate the full HGVS name\nwith transcript/gene prefix or not. If set to True, the full format will be generated. If set to\nFalse, only the transcript/gene prefix will be generated, defaults to False
    • \n
    • use_version: A boolean parameter that determines whether to include the version number in\nthe transcript prefix. If set to True, the version number will be included in the prefix (e.g.,\nNM_007294.3). If set to False, only the transcript ID without the version number will be\nincluded in the prefix, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function format_prefix returns a formatted HGVS transcript/gene prefix as a\n string.

    \n
    \n", "signature": "(\tself,\tuse_gene: bool = True,\tuse_exon: bool = False,\tuse_protein: bool = False,\tfull_format: bool = False,\tuse_version: bool = False) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_cdna_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_cdna_coords", "kind": "function", "doc": "

    The function format_cdna_coords generates a string representing HGVS cDNA coordinates,\nreturning either the start coordinate or a string in the format \"start_end\" depending on whether\nthe start and end coordinates are the same or not.

    \n\n
    Returns
    \n\n
    \n

    a string representing the cDNA coordinates. If the start and end coordinates are the\n same, it returns just the start coordinate. Otherwise, it returns a string in the format\n \"start_end\".

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_dna_allele", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_dna_allele", "kind": "function", "doc": "

    The function format_dna_allele generates an HGVS DNA allele based on the mutation type and\nalleles provided.

    \n\n
    Returns
    \n\n
    \n

    The function format_dna_allele returns a string representing the HGVS DNA allele. The\n specific format of the returned string depends on the value of the mutation_type attribute of\n the object. The possible return values are:

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_cdna", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_cdna", "kind": "function", "doc": "

    The function \"format_cdna\" generates an HGVS cDNA allele by combining the cDNA coordinates and\nthe DNA allele.

    \n\n

    Some examples include:\n Substitution: 101A>C,\n Indel: 3428delCinsTA, 1000_1003delATG, 1000_1001insATG

    \n\n
    Returns
    \n\n
    \n

    a string that represents the HGVS cDNA allele.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_protein", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_protein", "kind": "function", "doc": "

    The format_protein function generates an HGVS protein name based on different scenarios such\nas no change, change, frameshift, and range change.

    \n\n

    Some examples include:\n No change: Glu1161=\n Change: Glu1161Ser\n Frameshift: Glu1161_Ser1164?fs

    \n\n
    Returns
    \n\n
    \n

    The method format_protein returns a string representing the HGVS protein name.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_coords", "kind": "function", "doc": "

    The function format_coords generates a string representation of HGVS cDNA coordinates.

    \n\n
    Returns
    \n\n
    \n

    a string that represents the HGVS cDNA coordinates. If the start and end coordinates\n are the same, it returns just the start coordinate. Otherwise, it returns a string in the format\n \"start_end\".

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.format_genome", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.format_genome", "kind": "function", "doc": "

    The function \"format_genome\" generates an HGVS genomic allele by combining the formatted\ncoordinates and DNA allele.

    \n\n

    Some examples include:\n Substitution: 1000100A>T\n Indel: 1000100_1000102delATG

    \n\n
    Returns
    \n\n
    \n

    a string that represents the HGVS genomic allele.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_raw_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_raw_coords", "kind": "function", "doc": "

    The function get_raw_coords returns the genomic coordinates based on the given transcript or\nthe provided chromosomal coordinates.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an object that represents a transcript. It is\nused to retrieve genomic coordinates based on the type of HGVS name (self.kind). The\ntranscript object should have the following attributes and methods:
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the genomic coordinates. The tuple consists of three elements: the\n chromosome, the start position, and the end position.

    \n
    \n", "signature": "(self, transcript: object = None) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_ref_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_ref_coords", "kind": "function", "doc": "

    The function \"get_ref_coords\" returns the genomic coordinates of the reference allele, taking\ninto account different mutation types.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an optional object that represents a transcript\nor gene. It is used to retrieve the genomic coordinates of the reference allele
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the genomic coordinates of the reference allele. The tuple consists\n of three elements: the chromosome, the start position, and the end position.

    \n
    \n", "signature": "(self, transcript: object = None) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_vcf_coords", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_vcf_coords", "kind": "function", "doc": "

    The function \"get_vcf_coords\" returns the genomic coordinates of the reference allele in\nVCF-style, with left-padding for indels.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an object that represents a transcript or gene.\nIt is used to retrieve the genomic coordinates of the reference allele
    • \n
    \n\n
    Returns
    \n\n
    \n

    a tuple containing the genomic coordinates of the reference allele in VCF-style. The\n tuple consists of three elements: the chromosome, the start position, and the end position.

    \n
    \n", "signature": "(self, transcript: object = None) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.hgvs.HGVSName.get_ref_alt", "modulename": "howard.objects.hgvs", "qualname": "HGVSName.get_ref_alt", "kind": "function", "doc": "

    The function get_ref_alt returns the reference and alternate alleles, with an option to modify\nduplications to look like inserts.

    \n\n
    Parameters
    \n\n
      \n
    • is_forward_strand: The parameter is_forward_strand is a boolean flag that indicates\nwhether the alleles should be returned for the forward strand or the reverse complement strand.\nIf is_forward_strand is True, the alleles will be returned as is. If is_forward_strand is\nFalse,, defaults to True
    • \n
    • raw_dup_alleles: The raw_dup_alleles parameter is a boolean flag that determines\nwhether the raw values of duplicated alleles should be returned. By default, it is set to\nFalse, which means that if the mutation type is a duplication (dup), the reference allele\nwill be represented as an empty string, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_ref_alt returns a tuple containing the reference and alternate\n alleles.

    \n
    \n", "signature": "(\tself,\tis_forward_strand: bool = True,\traw_dup_alleles: bool = False) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.transcript", "modulename": "howard.objects.transcript", "kind": "module", "doc": "

    Models for representing genomic elements.

    \n"}, {"fullname": "howard.objects.transcript.Gene", "modulename": "howard.objects.transcript", "qualname": "Gene", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Gene.__init__", "modulename": "howard.objects.transcript", "qualname": "Gene.__init__", "kind": "function", "doc": "

    \n", "signature": "(name)"}, {"fullname": "howard.objects.transcript.Gene.name", "modulename": "howard.objects.transcript", "qualname": "Gene.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript", "modulename": "howard.objects.transcript", "qualname": "Transcript", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.__init__", "modulename": "howard.objects.transcript", "qualname": "Transcript.__init__", "kind": "function", "doc": "

    The function initializes an object with various attributes related to a gene and its transcript.

    \n\n
    Parameters
    \n\n
      \n
    • name: A string representing the name of the coding sequence
    • \n
    • version: The version parameter is a string that represents the version of the object.\nIt is used to track changes or updates to the object over time
    • \n
    • gene: The gene parameter is a string that represents the gene associated with the\ncoding sequence
    • \n
    • tx_position: The tx_position parameter represents the position of the transcript. It is\nan integer value that indicates the position of the transcript in the genome
    • \n
    • cds_position: The cds_position parameter represents the position of the coding sequence\n(CDS) within the transcript. It is an integer value that indicates the starting position of the\nCDS within the transcript sequence
    • \n
    • is_default: The is_default parameter is a boolean flag that indicates whether the\ninstance of the class is the default version of the gene. It is set to False by default, but\ncan be set to True if the instance is the default version, defaults to False
    • \n
    • cdna_match: The cdna_match parameter is a list that contains the positions of the\nmatching cDNA sequences. It is an optional parameter and if not provided, it defaults to an\nempty list
    • \n
    • start_codon_transcript_pos: The parameter \"start_codon_transcript_pos\" is an optional\nparameter that represents the transcript position of the start codon. It is used to store the\npre-calculated transcript position of the start codon for a specific gene
    • \n
    • stop_codon_transcript_pos: The parameter stop_codon_transcript_pos is an optional\ninteger that represents the transcript position of the stop codon. It is used to store the\npre-calculated transcript coordinate of the stop codon. If not provided, it will be set to\nNone
    • \n
    \n", "signature": "(\tname: str,\tversion: str,\tgene: str,\ttx_position: int,\tcds_position: int,\tis_default: bool = False,\tcdna_match: list = None,\tstart_codon_transcript_pos: int = None,\tstop_codon_transcript_pos: int = None)"}, {"fullname": "howard.objects.transcript.Transcript.name", "modulename": "howard.objects.transcript", "qualname": "Transcript.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.version", "modulename": "howard.objects.transcript", "qualname": "Transcript.version", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.gene", "modulename": "howard.objects.transcript", "qualname": "Transcript.gene", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.tx_position", "modulename": "howard.objects.transcript", "qualname": "Transcript.tx_position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.cds_position", "modulename": "howard.objects.transcript", "qualname": "Transcript.cds_position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.is_default", "modulename": "howard.objects.transcript", "qualname": "Transcript.is_default", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.cdna_match", "modulename": "howard.objects.transcript", "qualname": "Transcript.cdna_match", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Transcript.full_name", "modulename": "howard.objects.transcript", "qualname": "Transcript.full_name", "kind": "variable", "doc": "

    The function full_name returns the full name of an object, including its version if it exists.

    \n\n
    Returns
    \n\n
    \n

    a string. If the version attribute of the object is not None, it returns a string\n in the format name.version. Otherwise, it returns just the name attribute.

    \n
    \n", "annotation": ": str"}, {"fullname": "howard.objects.transcript.Transcript.is_coding", "modulename": "howard.objects.transcript", "qualname": "Transcript.is_coding", "kind": "variable", "doc": "

    The function checks if a coding transcript has a non-zero length coding sequence.

    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the coding transcript has a coding sequence (CDS)\n with a non-zero length.

    \n
    \n", "annotation": ": bool"}, {"fullname": "howard.objects.transcript.Transcript.strand", "modulename": "howard.objects.transcript", "qualname": "Transcript.strand", "kind": "variable", "doc": "

    The function returns a string '+' if the tx_position is on the forward strand, and '-' if it is\non the reverse strand.

    \n\n
    Returns
    \n\n
    \n

    a string that represents the strand of the given self.tx_position. If\n self.tx_position.is_forward_strand is True, then the string returned is '+'. Otherwise, the\n string returned is '-'.

    \n
    \n", "annotation": ": str"}, {"fullname": "howard.objects.transcript.Transcript.ordered_cdna_match", "modulename": "howard.objects.transcript", "qualname": "Transcript.ordered_cdna_match", "kind": "function", "doc": "

    The function \"ordered_cdna_match\" sorts a list of cdna_match objects based on their\ntx_position.chrom_start attribute and returns the sorted list.

    \n\n
    Returns
    \n\n
    \n

    a sorted list of cdna_match objects.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.get_cds_start_stop", "modulename": "howard.objects.transcript", "qualname": "Transcript.get_cds_start_stop", "kind": "function", "doc": "

    The function \"get_cds_start_stop\" returns the start and stop positions of a coding sequence,\ntaking into account the direction of the strand.

    \n\n
    Returns
    \n\n
    \n

    a tuple containing the start and stop positions of the coding sequence (CDS).

    \n
    \n", "signature": "(self) -> tuple:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.start_codon", "modulename": "howard.objects.transcript", "qualname": "Transcript.start_codon", "kind": "function", "doc": "

    The function returns the transcript position of the start codon.

    \n\n
    Returns
    \n\n
    \n

    the transcript position of the start codon.

    \n
    \n", "signature": "(self) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.stop_codon", "modulename": "howard.objects.transcript", "qualname": "Transcript.stop_codon", "kind": "function", "doc": "

    The function returns the transcript position of the stop codon.

    \n\n
    Returns
    \n\n
    \n

    The method stop_codon returns an integer, which represents the transcript position of\n the stop codon.

    \n
    \n", "signature": "(self) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.cdna_to_genomic_coord", "modulename": "howard.objects.transcript", "qualname": "Transcript.cdna_to_genomic_coord", "kind": "function", "doc": "

    The function cdna_to_genomic_coord converts a HGVS cDNA coordinate to a genomic coordinate.

    \n\n
    Parameters
    \n\n
      \n
    • coord: The parameter coord is an object that represents a cDNA coordinate. It is used\nto specify a position along a cDNA sequence
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value, which represents the genomic coordinate corresponding to the given\n cDNA coordinate.

    \n
    \n", "signature": "(self, coord: object) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.genomic_to_cdna_coord", "modulename": "howard.objects.transcript", "qualname": "Transcript.genomic_to_cdna_coord", "kind": "function", "doc": "

    The function genomic_to_cdna_coord converts a genomic coordinate to a cDNA coordinate and\noffset, taking into account exons, strand, and coding transcript information.

    \n\n
    Parameters
    \n\n
      \n
    • genomic_coord: The genomic_coord parameter is an integer representing a genomic\ncoordinate
    • \n
    \n\n
    Returns
    \n\n
    \n

    an object of type CDNACoord.

    \n
    \n", "signature": "(self, genomic_coord: int) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Transcript.find_exon_number", "modulename": "howard.objects.transcript", "qualname": "Transcript.find_exon_number", "kind": "function", "doc": "

    The function find_exon_number returns the exon number for a given position.

    \n\n
    Parameters
    \n\n
      \n
    • offset: The offset parameter represents a position in the genome. It is an integer value\nthat indicates the position of interest within the genome
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value, which represents the exon number for a given position.

    \n
    \n", "signature": "(self, offset: int) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.BED6Interval_base", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base", "kind": "class", "doc": "

    BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "bases": "builtins.tuple"}, {"fullname": "howard.objects.transcript.BED6Interval_base.__init__", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.__init__", "kind": "function", "doc": "

    Create new instance of BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "signature": "(chrom, chrom_start, chrom_end, name, score, strand)"}, {"fullname": "howard.objects.transcript.BED6Interval_base.chrom", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.chrom", "kind": "variable", "doc": "

    Alias for field number 0

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.chrom_start", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.chrom_start", "kind": "variable", "doc": "

    Alias for field number 1

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.chrom_end", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.chrom_end", "kind": "variable", "doc": "

    Alias for field number 2

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.name", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.name", "kind": "variable", "doc": "

    Alias for field number 3

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.score", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.score", "kind": "variable", "doc": "

    Alias for field number 4

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval_base.strand", "modulename": "howard.objects.transcript", "qualname": "BED6Interval_base.strand", "kind": "variable", "doc": "

    Alias for field number 5

    \n"}, {"fullname": "howard.objects.transcript.BED6Interval", "modulename": "howard.objects.transcript", "qualname": "BED6Interval", "kind": "class", "doc": "

    BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "bases": "BED6Interval_base"}, {"fullname": "howard.objects.transcript.BED6Interval.__init__", "modulename": "howard.objects.transcript", "qualname": "BED6Interval.__init__", "kind": "function", "doc": "

    Create new instance of BED6Interval_base(chrom, chrom_start, chrom_end, name, score, strand)

    \n", "signature": "(chrom, chrom_start, chrom_end, name, score, strand)"}, {"fullname": "howard.objects.transcript.BED6Interval.distance", "modulename": "howard.objects.transcript", "qualname": "BED6Interval.distance", "kind": "function", "doc": "

    The distance function calculates the distance between an offset and an interval, returning\nzero if the offset is inside the interval, a positive value if the interval comes after the\noffset, and a negative value if the interval comes before the offset.\nif offset is inside the exon, distance is zero.\notherwise, distance is the distance to the nearest edge.\ndistance is positive if the exon comes after the offset.\ndistance is negative if the exon comes before the offset.

    \n\n
    Parameters
    \n\n
      \n
    • offset: The offset parameter represents a position or point in the genome. It is an\ninteger value that indicates the position within the genome sequence
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value, which represents the distance to the interval.

    \n
    \n", "signature": "(self, offset: int) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Exon", "modulename": "howard.objects.transcript", "qualname": "Exon", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.__init__", "modulename": "howard.objects.transcript", "qualname": "Exon.__init__", "kind": "function", "doc": "

    The function initializes an object with a transcript, a position in the transcript, and a\nnumber.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is of type Transcript. It represents a\ntranscript object that contains information about a conversation or dialogue
    • \n
    • tx_position: The tx_position parameter represents the position of the transcript in a\nlist or array. It is an integer value that indicates the index of the transcript in the list or\narray
    • \n
    • number: The \"number\" parameter is an integer that represents a specific number. It is\nused as a parameter in the constructor of a class
    • \n
    \n", "signature": "(\ttranscript: howard.objects.transcript.Transcript,\ttx_position: int,\tnumber: int)"}, {"fullname": "howard.objects.transcript.Exon.transcript", "modulename": "howard.objects.transcript", "qualname": "Exon.transcript", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.tx_position", "modulename": "howard.objects.transcript", "qualname": "Exon.tx_position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.number", "modulename": "howard.objects.transcript", "qualname": "Exon.number", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.Exon.name", "modulename": "howard.objects.transcript", "qualname": "Exon.name", "kind": "variable", "doc": "

    The function returns a string that combines the name of the transcript and a number.

    \n\n
    Returns
    \n\n
    \n

    a string that combines the name of the transcript with the number. The format of the\n string is \"{transcript name}.{number}\".

    \n
    \n", "annotation": ": str"}, {"fullname": "howard.objects.transcript.Exon.get_as_interval", "modulename": "howard.objects.transcript", "qualname": "Exon.get_as_interval", "kind": "function", "doc": "

    The function get_as_interval returns the coding region for an exon as a BED6Interval object.\nThis function returns a BED6Interval objects containing position\ninformation for this exon. This may be used as input for\npybedtools.create_interval_from_list() after casting chrom_start\nand chrom_end as strings.

    \n\n
    Parameters
    \n\n
      \n
    • coding_only: The coding_only parameter is a boolean flag that determines whether to\ninclude only exons in the coding region. If coding_only is set to True, the function will\ncheck if the exon is completely outside the coding region defined by the transcript's CDS\n(coding sequence) position, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a BED6Interval object.

    \n
    \n", "signature": "(self, coding_only: bool = False) -> object:", "funcdef": "def"}, {"fullname": "howard.objects.transcript.Exon.strand", "modulename": "howard.objects.transcript", "qualname": "Exon.strand", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match", "kind": "class", "doc": "

    \n", "bases": "Exon"}, {"fullname": "howard.objects.transcript.CDNA_Match.__init__", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.__init__", "kind": "function", "doc": "

    The function initializes a CDNA_Match object with specified attributes.

    \n\n
    Parameters
    \n\n
      \n
    • transcript: The transcript parameter is an instance of the Transcript class. It\nrepresents the transcript that the CDNA match belongs to
    • \n
    • tx_position: The tx_position parameter represents the position of the transcript in the\ngenome. It is an integer value
    • \n
    • cdna_start: The cdna_start parameter represents the starting position of the cDNA\nmatch. It is an integer value
    • \n
    • cdna_end: The cdna_end parameter represents the end position of the cDNA match. It is\nan integer value that indicates the position of the last nucleotide in the cDNA sequence that\nmatches the transcript
    • \n
    • gap: The \"gap\" parameter represents the number of nucleotides that are missing or\ninserted in the cDNA sequence compared to the reference transcript sequence. It indicates the\npresence of gaps or insertions in the alignment between the cDNA and the reference transcript
    • \n
    • number: The number parameter represents the number of the CDNA match. It is used to\nuniquely identify each CDNA match object
    • \n
    \n", "signature": "(\ttranscript: howard.objects.transcript.Transcript,\ttx_position: int,\tcdna_start: int,\tcdna_end: int,\tgap: int,\tnumber: int)"}, {"fullname": "howard.objects.transcript.CDNA_Match.cdna_start", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.cdna_start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match.cdna_end", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.cdna_end", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match.gap", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.gap", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.transcript.CDNA_Match.length", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.length", "kind": "variable", "doc": "

    The function calculates the length of a sequence by subtracting the start position from the end\nposition and adding 1.

    \n\n
    Returns
    \n\n
    \n

    The length of the sequence, calculated by subtracting the cdna_start from the cdna_end\n and adding 1.

    \n
    \n", "annotation": ": int"}, {"fullname": "howard.objects.transcript.CDNA_Match.get_offset", "modulename": "howard.objects.transcript", "qualname": "CDNA_Match.get_offset", "kind": "function", "doc": "

    The get_offset function calculates the offset for a given position in a cDNA sequence based on\nthe GAP attribute.\ncdna_match GAP attribute looks like: 'M185 I3 M250' which is code/length\n@see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#the-gap-attribute\ncodes operation\nM match\nI insert a gap into the reference sequence\nD insert a gap into the target (delete from reference)\nIf you want the whole exon, then pass the end

    \n\n
    Parameters
    \n\n
      \n
    • position: The position parameter is an integer that represents the position in the\nsequence. It is used to calculate the offset based on the GAP attribute of the cDNA match
    • \n
    • validate: The validate parameter is a boolean flag that determines whether to perform\nvalidation checks during the calculation of the offset. If validate is set to True, the\nfunction will raise a ValueError if the given position falls within an insertion or deletion\ngap. If validate is set, defaults to True
    • \n
    \n\n
    Returns
    \n\n
    \n

    an integer value representing the offset for a given position in the cDNA sequence.

    \n
    \n", "signature": "(self, position: int, validate: bool = True) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.variant", "modulename": "howard.objects.variant", "kind": "module", "doc": "

    Methods for manipulating genetic variants.

    \n"}, {"fullname": "howard.objects.variant.Position", "modulename": "howard.objects.variant", "qualname": "Position", "kind": "class", "doc": "

    A position in the genome.

    \n"}, {"fullname": "howard.objects.variant.Position.__init__", "modulename": "howard.objects.variant", "qualname": "Position.__init__", "kind": "function", "doc": "

    \n", "signature": "(chrom, chrom_start, chrom_stop, is_forward_strand)"}, {"fullname": "howard.objects.variant.Position.chrom", "modulename": "howard.objects.variant", "qualname": "Position.chrom", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.Position.chrom_start", "modulename": "howard.objects.variant", "qualname": "Position.chrom_start", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.Position.chrom_stop", "modulename": "howard.objects.variant", "qualname": "Position.chrom_stop", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.Position.is_forward_strand", "modulename": "howard.objects.variant", "qualname": "Position.is_forward_strand", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.revcomp", "modulename": "howard.objects.variant", "qualname": "revcomp", "kind": "function", "doc": "

    Reverse complement.

    \n", "signature": "(seq):", "funcdef": "def"}, {"fullname": "howard.objects.variant.get_sequence", "modulename": "howard.objects.variant", "qualname": "get_sequence", "kind": "function", "doc": "

    Return a sequence for the genomic region.

    \n\n

    Coordinates are 0-based, end-exclusive.

    \n", "signature": "(genome, chrom, start, end, is_forward_strand=True):", "funcdef": "def"}, {"fullname": "howard.objects.variant.get_sequence_from_position", "modulename": "howard.objects.variant", "qualname": "get_sequence_from_position", "kind": "function", "doc": "

    Return a sequence for the genomic region

    \n\n

    Position is 0-based, end-exclusive.

    \n", "signature": "(genome, position):", "funcdef": "def"}, {"fullname": "howard.objects.variant.justify_indel", "modulename": "howard.objects.variant", "qualname": "justify_indel", "kind": "function", "doc": "

    Justify an indel to the left or right along a sequence 'seq'.

    \n\n

    start, end: 0-based, end-exclusive coordinates of 'indel' within the\n sequence 'seq'. Inserts denote the insertion point using start=end\n and deletions indicate the deleted region with (start,end).\nindel: indel sequence, can be insertion or deletion.\nseq: a larger sequence containing the indel. Can be a fragment from the\n genome.\njustify: Which direction to justify the indel ('left', 'right').

    \n", "signature": "(start, end, indel, seq, justify):", "funcdef": "def"}, {"fullname": "howard.objects.variant.justify_genomic_indel", "modulename": "howard.objects.variant", "qualname": "justify_genomic_indel", "kind": "function", "doc": "

    start, end: 0-based, end-exclusive coordinates of 'indel'.

    \n", "signature": "(genome, chrom, start, end, indel, justify, flank_length=20):", "funcdef": "def"}, {"fullname": "howard.objects.variant.normalize_variant", "modulename": "howard.objects.variant", "qualname": "normalize_variant", "kind": "function", "doc": "

    Normalize variant according to the GATK/VCF standard.

    \n\n

    chrom: chromsome containing variant.\noffset: 1-based coordinate of reference allele in the genome.\nref_sequence: reference allele.\nalt_sequences: list of all alternate sequences.\ngenome: pygr-compatiable genome object.

    \n", "signature": "(\tchrom,\toffset,\tref_sequence,\talt_sequences,\tgenome,\tjustify='left',\tflank_length=30,\tindels_start_with_same_base=True):", "funcdef": "def"}, {"fullname": "howard.objects.variant.NormalizedVariant", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant", "kind": "class", "doc": "

    Normalizes variant representation to match GATK/VCF.

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.__init__", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.__init__", "kind": "function", "doc": "

    position: a 0-index genomic Position.\nref_allele: the reference allele sequence.\nalt_alleles: a list of alternate allele sequences.\nseq_5p: 5 prime flanking sequence of variant.\nseq_3p: 3 prime flanking sequence of variant.\ngenome: a pygr compatible genome object (optional).

    \n\n

    indels_start_with_same_base: DML - I have no idea why this is required\n but am keeping for backwards compat

    \n", "signature": "(\tposition,\tref_allele,\talt_alleles,\tseq_5p='',\tseq_3p='',\tgenome=None,\tjustify='left',\tindels_start_with_same_base=True)"}, {"fullname": "howard.objects.variant.NormalizedVariant.position", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.position", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.alleles", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.alleles", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.seq_5p", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.seq_5p", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.seq_3p", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.seq_3p", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.genome", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.genome", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.log", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.log", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.indels_start_with_same_base", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.indels_start_with_same_base", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.molecular_class", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.molecular_class", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.ref_allele", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.ref_allele", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.alt_alleles", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.alt_alleles", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variant.NormalizedVariant.variant", "modulename": "howard.objects.variant", "qualname": "NormalizedVariant.variant", "kind": "variable", "doc": "

    \n"}, {"fullname": "howard.objects.variants", "modulename": "howard.objects.variants", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.objects.variants.Variants", "modulename": "howard.objects.variants", "qualname": "Variants", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.objects.variants.Variants.__init__", "modulename": "howard.objects.variants", "qualname": "Variants.__init__", "kind": "function", "doc": "

    The function __init__ initializes the variables, sets the input, output, config, param, connexion and\nheader

    \n\n
    Parameters
    \n\n
      \n
    • conn: the connection to the database
    • \n
    • input: the input file
    • \n
    • output: the output file
    • \n
    • config: a dictionary containing the configuration of the model
    • \n
    • param: a dictionary containing the parameters of the model
    • \n
    \n", "signature": "(\tconn=None,\tinput: str = None,\toutput: str = None,\tconfig: dict = {},\tparam: dict = {},\tload: bool = False)"}, {"fullname": "howard.objects.variants.Variants.load_header", "modulename": "howard.objects.variants", "qualname": "Variants.load_header", "kind": "function", "doc": "

    Load header in a table, with INFO, FORMAT, FILTERS, SAMPLES and METADATA

    \n\n

    Args:\n header (vcfobject, optional): VCF object from pyVCF. Defaults to None (header of the Variants object).\n table (str, optional): Table name of the header table. Defaults to None (defined as 'header' later).\n drop (bool, optional): Drop table if exists. Defaults to False.\n view_name (str, optional): Name of the table. Defaults to 'header'.

    \n\n

    Returns:\n str: Name of the table, None otherwise

    \n", "signature": "(\tself,\theader=None,\ttable: str = None,\tdrop: bool = False,\tview_name: str = 'header') -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_samples", "modulename": "howard.objects.variants", "qualname": "Variants.set_samples", "kind": "function", "doc": "

    The function set_samples sets the samples attribute of an object to a provided list or\nretrieves it from a parameter dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • samples: The set_samples method is a method of a class that takes a list of samples as\ninput and sets the samples attribute of the class to the provided list. If no samples are\nprovided, it tries to get the samples from the class's parameters using the get_param method
    • \n
    \n\n
    Returns
    \n\n
    \n

    The samples list is being returned.

    \n
    \n", "signature": "(self, samples: list = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_samples", "modulename": "howard.objects.variants", "qualname": "Variants.get_samples", "kind": "function", "doc": "

    This function returns a list of samples.

    \n\n
    Returns
    \n\n
    \n

    The get_samples method is returning the samples attribute of the object.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_samples_check", "modulename": "howard.objects.variants", "qualname": "Variants.get_samples_check", "kind": "function", "doc": "

    This function returns the value of the \"check\" key within the \"samples\" dictionary retrieved\nfrom the parameters.

    \n\n
    Returns
    \n\n
    \n

    The method get_samples_check is returning the value of the key \"check\" inside the\n \"samples\" dictionary, which is nested inside the dictionary returned by the get_param()\n method. If the key \"check\" is not found, it will return False.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_input", "modulename": "howard.objects.variants", "qualname": "Variants.set_input", "kind": "function", "doc": "

    The function set_input takes a file name as input, extracts the name and extension, and sets\nattributes in the class accordingly.

    \n\n
    Parameters
    \n\n
      \n
    • input: The set_input method in the provided code snippet is used to set attributes\nrelated to the input file. Here's a breakdown of the parameters and their usage in the method:
    • \n
    \n", "signature": "(self, input: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_config", "modulename": "howard.objects.variants", "qualname": "Variants.set_config", "kind": "function", "doc": "

    The set_config function takes a config object and assigns it as the configuration object for the\nclass.

    \n\n
    Parameters
    \n\n
      \n
    • config: The config parameter in the set_config function is a dictionary object that\ncontains configuration settings for the class. When you call the set_config function with a\ndictionary object as the argument, it will set that dictionary as the configuration object for\nthe class
    • \n
    \n", "signature": "(self, config: dict) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_param", "modulename": "howard.objects.variants", "qualname": "Variants.set_param", "kind": "function", "doc": "

    This function sets a parameter object for the class based on the input dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • param: The set_param method you provided takes a dictionary object as input and sets it\nas the param attribute of the class instance
    • \n
    \n", "signature": "(self, param: dict) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.init_variables", "modulename": "howard.objects.variants", "qualname": "Variants.init_variables", "kind": "function", "doc": "

    This function initializes the variables that will be used in the rest of the class

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_indexing", "modulename": "howard.objects.variants", "qualname": "Variants.get_indexing", "kind": "function", "doc": "

    It returns the value of the key \"indexing\" in the dictionary. If the key is not present, it\nreturns False.

    \n\n
    Returns
    \n\n
    \n

    The value of the indexing parameter.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_config", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_config", "kind": "function", "doc": "

    The function get_connexion_config returns a dictionary containing the configuration for a\nconnection, including the number of threads and memory limit.

    \n\n
    Returns
    \n\n
    \n

    a dictionary containing the configuration for the Connexion library.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_duckdb_settings", "modulename": "howard.objects.variants", "qualname": "Variants.get_duckdb_settings", "kind": "function", "doc": "

    The function get_duckdb_settings retrieves DuckDB settings from a configuration file or a\nstring.

    \n\n
    Returns
    \n\n
    \n

    The function get_duckdb_settings returns a dictionary object duckdb_settings_dict.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_connexion_db", "modulename": "howard.objects.variants", "qualname": "Variants.set_connexion_db", "kind": "function", "doc": "

    The function set_connexion_db returns the appropriate database connection string based on the\ninput format and connection type.

    \n\n
    Returns
    \n\n
    \n

    the value of the variable connexion_db.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_connexion", "modulename": "howard.objects.variants", "qualname": "Variants.set_connexion", "kind": "function", "doc": "

    The function set_connexion creates a connection to a database, with options for different\ndatabase formats and settings.

    \n\n
    Parameters
    \n\n
      \n
    • conn: The conn parameter in the set_connexion method is the connection to the\ndatabase. If a connection is not provided, a new connection to an in-memory database is created.\nThe method then proceeds to set up the connection based on the specified format (e.g., duckdb or\nsqlite
    • \n
    \n", "signature": "(self, conn) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_output", "modulename": "howard.objects.variants", "qualname": "Variants.set_output", "kind": "function", "doc": "

    The set_output function in Python sets the output file based on the input or a specified key\nin the config file, extracting the output name, extension, and format.

    \n\n
    Parameters
    \n\n
      \n
    • output: The output parameter in the set_output method is used to specify the name of\nthe output file. If the config file has an 'output' key, the method sets the output to the value\nof that key. If no output is provided, it sets the output to None
    • \n
    \n", "signature": "(self, output: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_header", "modulename": "howard.objects.variants", "qualname": "Variants.set_header", "kind": "function", "doc": "

    It reads the header of a VCF file and stores it as a list of strings and as a VCF object

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_query_to_df", "modulename": "howard.objects.variants", "qualname": "Variants.get_query_to_df", "kind": "function", "doc": "

    The get_query_to_df function takes a query as a string and returns the result as a pandas\nDataFrame based on the connection format.

    \n\n
    Parameters
    \n\n
      \n
    • query: The query parameter in the get_query_to_df function is a string that\nrepresents the SQL query you want to execute. This query will be used to fetch data from a\ndatabase and convert it into a pandas DataFrame
    • \n
    • limit: The limit parameter in the get_query_to_df function is used to specify the\nmaximum number of rows to be returned in the resulting dataframe. If a limit is provided, the\nfunction will only fetch up to that number of rows from the database query result. If no limit\nis specified,
    • \n
    \n\n
    Returns
    \n\n
    \n

    A pandas DataFrame is being returned by the get_query_to_df function.

    \n
    \n", "signature": "(self, query: str = '', limit: int = None) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_overview", "modulename": "howard.objects.variants", "qualname": "Variants.get_overview", "kind": "function", "doc": "

    The function prints the input, output, config, and dataframe of the current object

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_stats", "modulename": "howard.objects.variants", "qualname": "Variants.get_stats", "kind": "function", "doc": "

    The get_stats function calculates and returns various statistics of the current object,\nincluding information about the input file, variants, samples, header fields, quality, and\nSNVs/InDels.

    \n\n
    Returns
    \n\n
    \n

    a dictionary containing various statistics of the current object. The dictionary has\n the following structure:

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.stats_to_file", "modulename": "howard.objects.variants", "qualname": "Variants.stats_to_file", "kind": "function", "doc": "

    The function stats_to_file takes a file name as input, retrieves statistics, serializes them\ninto a JSON object, and writes the JSON object to the specified file.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is a string that represents the file path where the JSON data\nwill be written
    • \n
    \n\n
    Returns
    \n\n
    \n

    the name of the file that was written to.

    \n
    \n", "signature": "(self, file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.print_stats", "modulename": "howard.objects.variants", "qualname": "Variants.print_stats", "kind": "function", "doc": "

    The print_stats function generates a markdown file and prints the statistics contained in a\nJSON file in a formatted manner.

    \n\n
    Parameters
    \n\n
      \n
    • output_file: The output_file parameter is a string that specifies the path and filename\nof the output file where the stats will be printed in Markdown format. If no output_file is\nprovided, a temporary directory will be created and the stats will be saved in a file named\n\"stats.md\" within that
    • \n
    • json_file: The json_file parameter is a string that represents the path to the JSON\nfile where the statistics will be saved. If no value is provided, a temporary directory will be\ncreated and a default file name \"stats.json\" will be used
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function print_stats does not return any value. It has a return type annotation\n of None.

    \n
    \n", "signature": "(self, output_file: str = None, json_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_input", "modulename": "howard.objects.variants", "qualname": "Variants.get_input", "kind": "function", "doc": "

    It returns the value of the input variable.

    \n\n
    Returns
    \n\n
    \n

    The input is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_input_format", "modulename": "howard.objects.variants", "qualname": "Variants.get_input_format", "kind": "function", "doc": "

    This function returns the format of the input variable, either from the provided input file or\nby prompting for input.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The input_file parameter in the get_input_format method is a string that\nrepresents the file path of the input file. If no input_file is provided when calling the\nmethod, it will default to None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The format of the input variable is being returned.

    \n
    \n", "signature": "(self, input_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_input_compressed", "modulename": "howard.objects.variants", "qualname": "Variants.get_input_compressed", "kind": "function", "doc": "

    The function get_input_compressed returns the format of the input variable after compressing\nit.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The input_file parameter in the get_input_compressed method is a string\nthat represents the file path of the input file. If no input_file is provided when calling the\nmethod, it will default to None and the method will then call self.get_input() to
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_input_compressed returns the compressed format of the input\n variable.

    \n
    \n", "signature": "(self, input_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_output", "modulename": "howard.objects.variants", "qualname": "Variants.get_output", "kind": "function", "doc": "

    It returns the output of the neuron.

    \n\n
    Returns
    \n\n
    \n

    The output of the neural network.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_output_format", "modulename": "howard.objects.variants", "qualname": "Variants.get_output_format", "kind": "function", "doc": "

    The function get_output_format returns the format of the input variable or the output file if\nprovided.

    \n\n
    Parameters
    \n\n
      \n
    • output_file: The output_file parameter in the get_output_format method is a string\nthat represents the file path of the output file. If no output_file is provided when calling\nthe method, it will default to the output obtained from the get_output method of the class\ninstance. The
    • \n
    \n\n
    Returns
    \n\n
    \n

    The format of the input variable is being returned.

    \n
    \n", "signature": "(self, output_file: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_config", "modulename": "howard.objects.variants", "qualname": "Variants.get_config", "kind": "function", "doc": "

    It returns the config

    \n\n
    Returns
    \n\n
    \n

    The config variable is being returned.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_param", "modulename": "howard.objects.variants", "qualname": "Variants.get_param", "kind": "function", "doc": "

    It returns the param

    \n\n
    Returns
    \n\n
    \n

    The param variable is being returned.

    \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_db", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_db", "kind": "function", "doc": "

    It returns the connexion_db attribute of the object

    \n\n
    Returns
    \n\n
    \n

    The connexion_db is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_prefix", "modulename": "howard.objects.variants", "qualname": "Variants.get_prefix", "kind": "function", "doc": "

    It returns the prefix of the object.

    \n\n
    Returns
    \n\n
    \n

    The prefix is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_table_variants", "modulename": "howard.objects.variants", "qualname": "Variants.get_table_variants", "kind": "function", "doc": "

    This function returns the table_variants attribute of the object

    \n\n
    Parameters
    \n\n
      \n
    • clause: the type of clause the table will be used. Either \"select\" or \"from\" (optional),\ndefaults to select (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The table_variants attribute of the object.

    \n
    \n", "signature": "(self, clause: str = 'select') -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_tmp_dir", "modulename": "howard.objects.variants", "qualname": "Variants.get_tmp_dir", "kind": "function", "doc": "

    The function get_tmp_dir returns the temporary directory path based on configuration\nparameters or a default path.

    \n\n
    Returns
    \n\n
    \n

    The get_tmp_dir method is returning the temporary directory path based on the\n configuration, parameters, and a default value of \"/tmp\".

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_type", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_type", "kind": "function", "doc": "

    If the connexion type is not in the list of allowed connexion types, raise a ValueError

    \n\n
    Returns
    \n\n
    \n

    The connexion type is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion", "kind": "function", "doc": "

    It returns the connection object

    \n\n
    Returns
    \n\n
    \n

    The connection object.

    \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.close_connexion", "modulename": "howard.objects.variants", "qualname": "Variants.close_connexion", "kind": "function", "doc": "

    This function closes the connection to the database.

    \n\n
    Returns
    \n\n
    \n

    The connection is being closed.

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header", "modulename": "howard.objects.variants", "qualname": "Variants.get_header", "kind": "function", "doc": "

    This function returns the header of the VCF file as a list of strings

    \n\n
    Parameters
    \n\n
      \n
    • type: the type of header you want to get, defaults to vcf (optional)
    • \n
    \n\n
    Returns
    \n\n
    \n

    The header of the vcf file.

    \n
    \n", "signature": "(self, type: str = 'vcf'):", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_infos_list", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_infos_list", "kind": "function", "doc": "

    This function retrieves a list of information fields from the header.

    \n\n
    Returns
    \n\n
    \n

    A list of information fields from the header.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_length", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_length", "kind": "function", "doc": "

    The function get_header_length returns the length of the header list, excluding the #CHROM\nline.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is an optional argument that specifies the path to a VCF\nheader file. If this argument is provided, the function will read the header from the specified\nfile and return the length of the header list minus 1 (to exclude the #CHROM line)
    • \n
    \n\n
    Returns
    \n\n
    \n

    the length of the header list, excluding the #CHROM line.

    \n
    \n", "signature": "(self, file: str = None) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_columns", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_columns", "kind": "function", "doc": "

    This function returns the header list of a VCF

    \n\n
    Returns
    \n\n
    \n

    The length of the header list.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_columns_as_list", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_columns_as_list", "kind": "function", "doc": "

    This function returns the header list of a VCF

    \n\n
    Returns
    \n\n
    \n

    The length of the header list.

    \n
    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_columns_as_sql", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_columns_as_sql", "kind": "function", "doc": "

    This function retruns header length (without #CHROM line)

    \n\n
    Returns
    \n\n
    \n

    The length of the header list.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_header_sample_list", "modulename": "howard.objects.variants", "qualname": "Variants.get_header_sample_list", "kind": "function", "doc": "

    The function get_header_sample_list returns a list of samples from a VCF header, with optional\nchecking and filtering based on input parameters.

    \n\n
    Parameters
    \n\n
      \n
    • check: The check parameter in the get_header_sample_list function is a boolean\nparameter that determines whether to check if the samples in the list are properly defined as\ngenotype columns. If check is set to True, the function will verify if each sample in the\nlist is defined as a, defaults to False
    • \n
    • samples: The samples parameter in the get_header_sample_list function is a list that\nallows you to specify a subset of samples from the header. If you provide a list of sample\nnames, the function will check if each sample is defined in the header. If a sample is not found\nin the
    • \n
    • samples_force: The samples_force parameter in the get_header_sample_list function is\na boolean parameter that determines whether to force the function to return the sample list\nwithout checking if the samples are genotype columns. If samples_force is set to True, the\nfunction will return the sample list without performing, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_header_sample_list returns a list of samples based on the input\n parameters and conditions specified in the function.

    \n
    \n", "signature": "(\tself,\tcheck: bool = False,\tsamples: list = None,\tsamples_force: bool = False) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.sort_contigs", "modulename": "howard.objects.variants", "qualname": "Variants.sort_contigs", "kind": "function", "doc": "

    This function sort contigs

    \n\n
    Returns
    \n\n
    \n

    None

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.is_genotype_column", "modulename": "howard.objects.variants", "qualname": "Variants.is_genotype_column", "kind": "function", "doc": "

    This function checks if a given column is a genotype column in a database.

    \n\n
    Parameters
    \n\n
      \n
    • column: The column parameter in the is_genotype_column method is a string that\nrepresents the column name in a database table. This method checks if the specified column is a\ngenotype column in the database. If a column name is provided, it calls the is_genotype_column\nmethod of
    • \n
    \n\n
    Returns
    \n\n
    \n

    The is_genotype_column method is returning a boolean value. If the column parameter\n is not None, it calls the is_genotype_column method of the Database class with the specified\n column name and returns the result. If the column parameter is None, it returns False.

    \n
    \n", "signature": "(self, column: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_verbose", "modulename": "howard.objects.variants", "qualname": "Variants.get_verbose", "kind": "function", "doc": "

    It returns the value of the \"verbose\" key in the config dictionary, or False if the key doesn't\nexist

    \n\n
    Returns
    \n\n
    \n

    The value of the key \"verbose\" in the config dictionary.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_connexion_format", "modulename": "howard.objects.variants", "qualname": "Variants.get_connexion_format", "kind": "function", "doc": "

    It returns the connexion format of the object.

    \n\n
    Returns
    \n\n
    \n

    The connexion_format is being returned.

    \n
    \n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.insert_file_to_table", "modulename": "howard.objects.variants", "qualname": "Variants.insert_file_to_table", "kind": "function", "doc": "

    The function reads a file in chunks and inserts each chunk into a table based on the specified\ndatabase format.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is the file that you want to load into a table. It should be\nthe path to the file on your system
    • \n
    • columns: The columns parameter in the insert_file_to_table function is a string that\nshould contain the names of the columns in the table where the data will be inserted. The column\nnames should be separated by commas within the string. For example, if you have columns named\n\"id\", \"name
    • \n
    • header_len: The header_len parameter in the insert_file_to_table function specifies\nthe number of lines to skip at the beginning of the file before reading the actual data. This\nparameter allows you to skip any header information present in the file before processing the\ndata, defaults to 0
    • \n
    • sep: The sep parameter in the insert_file_to_table function is used to specify the\nseparator character that is used in the file being read. In this case, the default separator is\nset to , which represents a tab character. You can change this parameter to a different\nseparator character if, defaults to
    • \n
    • chunksize: The chunksize parameter specifies the number of rows to read in at a time\nwhen processing the file in chunks. In the provided code snippet, the default value for\nchunksize is set to 1000000. This means that the file will be read in chunks of 1,, defaults\nto 1000000
    • \n
    \n", "signature": "(\tself,\tfile,\tcolumns: str,\theader_len: int = 0,\tsep: str = '\\t',\tchunksize: int = 1000000) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.load_data", "modulename": "howard.objects.variants", "qualname": "Variants.load_data", "kind": "function", "doc": "

    The load_data function reads a VCF file and inserts it into a table, with options to drop the\ntable before loading the data and specify a sample size.

    \n\n
    Parameters
    \n\n
      \n
    • input_file: The path to the input file. This is the VCF file that will be loaded into the\ntable
    • \n
    • drop_variants_table: The drop_variants_table parameter is a boolean flag that\ndetermines whether the variants table should be dropped before loading the data. If set to\nTrue, the variants table will be dropped. If set to False (default), the variants table will\nnot be dropped, defaults to False
    • \n
    • sample_size: The sample_size parameter determines the number of rows to be sampled from\nthe input file. If it is set to None, the default value of 20480 will be used, defaults to\n20480
    • \n
    \n", "signature": "(\tself,\tinput_file: str = None,\tdrop_variants_table: bool = False,\tsample_size: int = 20480) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_explode_infos", "modulename": "howard.objects.variants", "qualname": "Variants.get_explode_infos", "kind": "function", "doc": "

    The function get_explode_infos returns the value of the \"explode_infos\" parameter, defaulting\nto False if it is not set.

    \n\n
    Returns
    \n\n
    \n

    The method is returning the value of the \"explode_infos\" parameter, which is a boolean\n value. If the parameter is not present, it will return False.

    \n
    \n", "signature": "(self) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_explode_infos_fields", "modulename": "howard.objects.variants", "qualname": "Variants.get_explode_infos_fields", "kind": "function", "doc": "

    The get_explode_infos_fields function returns a list of exploded information fields based on\nthe input parameter explode_infos_fields.

    \n\n
    Parameters
    \n\n
      \n
    • explode_infos_fields: The explode_infos_fields parameter is a string that specifies the\nfields to be exploded. It can be set to \"ALL\" to explode all fields, or it can be a\ncomma-separated list of field names to explode
    • \n
    • remove_fields_not_in_header: The parameter remove_fields_not_in_header is a boolean\nflag that determines whether to remove fields that are not present in the header. If it is set\nto True, any field that is not in the header will be excluded from the list of exploded\ninformation fields. If it is set to `, defaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_explode_infos_fields returns a list of exploded information fields.\n If the explode_infos_fields parameter is not provided or is set to None, it returns an empty\n list. If the parameter is provided and its value is \"ALL\", it also returns an empty list.\n Otherwise, it returns a list of exploded information fields after removing any spaces and\n splitting the string by commas.

    \n
    \n", "signature": "(\tself,\texplode_infos_fields: str = None,\tremove_fields_not_in_header: bool = False) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_explode_infos_prefix", "modulename": "howard.objects.variants", "qualname": "Variants.get_explode_infos_prefix", "kind": "function", "doc": "

    The function get_explode_infos_prefix returns the value of the explode_infos_prefix parameter, or\nthe value of self.get_param().get(\"explode_infos_prefix\", None) if explode_infos_prefix is\nnot provided.

    \n\n
    Parameters
    \n\n
      \n
    • explode_infos_prefix: The parameter explode_infos_prefix is a string that specifies a\nprefix to be used for exploding or expanding information
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable explode_infos_prefix.

    \n
    \n", "signature": "(self, explode_infos_prefix: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.add_column", "modulename": "howard.objects.variants", "qualname": "Variants.add_column", "kind": "function", "doc": "

    The add_column function adds a column to a SQLite or DuckDB table with a default value if it\ndoesn't already exist.

    \n\n
    Parameters
    \n\n
      \n
    • table_name: The name of the table to which you want to add a column
    • \n
    • column_name: The parameter \"column_name\" is the name of the column that you want to add\nto the table
    • \n
    • column_type: The column_type parameter specifies the data type of the column that you\nwant to add to the table. It should be a string that represents the desired data type, such as\n\"INTEGER\", \"TEXT\", \"REAL\", etc
    • \n
    • default_value: The default_value parameter is an optional parameter that specifies the\ndefault value for the newly added column. If a default value is provided, it will be assigned to\nthe column for any existing rows that do not have a value for that column
    • \n
    • drop: The drop parameter is a boolean flag that determines whether to drop the column\nif it already exists in the table. If drop is set to True, the function will drop the\nexisting column before adding the new column. If drop is set to False (default),, defaults\nto False
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value indicating whether the column was successfully added to the table.

    \n
    \n", "signature": "(\tself,\ttable_name,\tcolumn_name,\tcolumn_type,\tdefault_value=None,\tdrop: bool = False) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.drop_column", "modulename": "howard.objects.variants", "qualname": "Variants.drop_column", "kind": "function", "doc": "

    The drop_column function drops a specified column from a given table in a database and returns\nTrue if the column was successfully dropped, and False if the column does not exist in the\ntable.

    \n\n
    Parameters
    \n\n
      \n
    • column: The column parameter is a dictionary that contains information about the column\nyou want to drop. It has two keys:
    • \n
    • table_name: The table_name parameter is the name of the table from which you want to\ndrop a column
    • \n
    • column_name: The column_name parameter is the name of the column that you want to drop\nfrom the table
    • \n
    \n\n
    Returns
    \n\n
    \n

    a boolean value. It returns True if the column was successfully dropped from the table,\n and False if the column does not exist in the table.

    \n
    \n", "signature": "(\tself,\tcolumn: dict = None,\ttable_name: str = None,\tcolumn_name: str = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.explode_infos", "modulename": "howard.objects.variants", "qualname": "Variants.explode_infos", "kind": "function", "doc": "

    The explode_infos function in Python takes a VCF file and explodes the INFO fields into\nindividual columns, returning a list of added columns.

    \n\n
    Parameters
    \n\n
      \n
    • prefix: The prefix parameter is a string that is used as a prefix for the exploded INFO\nfields. If the prefix is not provided or is set to None, the function will use the value of\nself.get_explode_infos_prefix() as the prefix
    • \n
    • create_index: The create_index parameter is a boolean flag that specifies whether to\ncreate indexes on the exploded INFO fields. If set to True, indexes will be created; if set to\nFalse, indexes will not be created. The default value is False, defaults to False
    • \n
    • fields: The fields parameter in the explode_infos function is a list of INFO fields\nthat you want to explode into individual columns. If this parameter is not provided, all INFO\nfields will be exploded. You can specify the INFO fields you want to explode by passing them as\na list to the `
    • \n
    • force: The force parameter in the explode_infos function is a boolean flag that\ndetermines whether to drop and recreate a column if it already exists in the table. If force\nis set to True, the column will be dropped and recreated. If force is set to `False,\ndefaults to False
    • \n
    • proccess_all_fields_together: The proccess_all_fields_together parameter is a boolean\nflag that determines whether to process all the INFO fields together or individually. If set to\nTrue, all the INFO fields will be processed together. If set to False, each INFO field will\nbe processed individually. The default value is, defaults to False
    • \n
    • table: The table parameter in the explode_infos function is used to specify the name\nof the table where the exploded INFO fields will be added as individual columns. If you provide\na value for the table parameter, the function will use that table name. If the table\nparameter is
    • \n
    \n\n
    Returns
    \n\n
    \n

    The explode_infos function returns a list of added columns.

    \n
    \n", "signature": "(\tself,\tprefix: str = None,\tcreate_index: bool = False,\tfields: list = None,\tforce: bool = False,\tproccess_all_fields_together: bool = False,\ttable: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_indexes", "modulename": "howard.objects.variants", "qualname": "Variants.create_indexes", "kind": "function", "doc": "

    Create indexes on the table after insertion

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.drop_indexes", "modulename": "howard.objects.variants", "qualname": "Variants.drop_indexes", "kind": "function", "doc": "

    Create indexes on the table after insertion

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.read_vcf_header", "modulename": "howard.objects.variants", "qualname": "Variants.read_vcf_header", "kind": "function", "doc": "

    It reads the header of a VCF file and returns a list of the header lines

    \n\n
    Parameters
    \n\n
      \n
    • f: the file object
    • \n
    \n\n
    Returns
    \n\n
    \n

    The header lines of the VCF file.

    \n
    \n", "signature": "(self, f) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.read_vcf_header_file", "modulename": "howard.objects.variants", "qualname": "Variants.read_vcf_header_file", "kind": "function", "doc": "

    The read_vcf_header_file function reads the header of a VCF file, handling both compressed and\nuncompressed files.

    \n\n
    Parameters
    \n\n
      \n
    • file: The file parameter is a string that represents the path to the VCF header file\nthat you want to read. It is an optional parameter, so if you don't provide a value, it will\ndefault to None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function read_vcf_header_file returns a list.

    \n
    \n", "signature": "(self, file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.execute_query", "modulename": "howard.objects.variants", "qualname": "Variants.execute_query", "kind": "function", "doc": "

    It takes a query as an argument, executes it, and returns the results

    \n\n
    Parameters
    \n\n
      \n
    • query: The query to be executed
    • \n
    \n\n
    Returns
    \n\n
    \n

    The result of the query is being returned.

    \n
    \n", "signature": "(self, query: str):", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.export_output", "modulename": "howard.objects.variants", "qualname": "Variants.export_output", "kind": "function", "doc": "

    The export_output function exports data from a VCF file to various formats, including VCF,\nCSV, TSV, PSV, and Parquet, with options for customization such as filtering, sorting, and\npartitioning.

    \n\n
    Parameters
    \n\n
      \n
    • output_file: The output_file parameter is a string that specifies the name of the\noutput file where the exported data will be saved
    • \n
    • output_header: The output_header parameter is a string that specifies the name of the\nfile where the header of the VCF file will be exported. If this parameter is not provided, the\nheader will be exported to a file with the same name as the output_file parameter, but with\nthe extension \"
    • \n
    • export_header: The export_header parameter is a boolean flag that determines whether\nthe header of a VCF file should be exported to a separate file or not. If export_header is\nTrue, the header will be exported to a file. If export_header is False, the header will not\nbe, defaults to True
    • \n
    • query: The query parameter in the export_output function is an optional SQL query\nthat can be used to filter and select specific data from the VCF file before exporting it. If\nprovided, only the data that matches the query will be exported. This allows you to customize\nthe exported data based on
    • \n
    • parquet_partitions: The parquet_partitions parameter is a list that specifies the\ncolumns to be used for partitioning the Parquet file during export. Partitioning is a way to\norganize data in a hierarchical directory structure based on the values of one or more columns.\nThis can improve query performance when working with large datasets
    • \n
    • chunk_size: The chunk_size parameter specifies the number of records in a batch when\nexporting data in Parquet format. This parameter is used for partitioning the Parquet file into\nmultiple files. It helps in optimizing the export process by breaking down the data into\nmanageable chunks for processing and storage
    • \n
    • threads: The threads parameter in the export_output function specifies the number of\nthreads to be used during the export process. It determines the level of parallelism and can\nimprove the performance of the export operation. If this parameter is not provided, the function\nwill use the default number of threads
    • \n
    • sort: The sort parameter in the export_output function is a boolean flag that\ndetermines whether the output file should be sorted based on genomic coordinates of the\nvariants. If sort is set to True, the output file will be sorted. If sort is set to\nFalse,, defaults to False
    • \n
    • index: The index parameter in the export_output function is a boolean flag that\ndetermines whether an index should be created on the output file. If index is set to True,\nan index will be created on the output file. If index is set to False, no, defaults to False
    • \n
    • order_by: The order_by parameter in the export_output function is a string that\nspecifies the column(s) to use for sorting the output file. This parameter is only applicable\nwhen exporting data in VCF format. It allows you to specify the column(s) based on which the\noutput file should be
    • \n
    • fields_to_rename: The fields_to_rename parameter is a dictionary that specifies the\nmapping of field names to be renamed during the export process. This parameter allows you to\ncustomize the output field names before exporting the data. Each key-value pair in the\ndictionary represents the original field name as the key and the new field name
    • \n
    \n\n
    Returns
    \n\n
    \n

    The export_output function returns a boolean value. It checks if the output file\n exists and returns True if it does, or None if it doesn't.

    \n
    \n", "signature": "(\tself,\toutput_file: str | None = None,\toutput_header: str | None = None,\texport_header: bool = True,\tquery: str | None = None,\tparquet_partitions: list | None = None,\tchunk_size: int | None = None,\tthreads: int | None = None,\tsort: bool = False,\tindex: bool = False,\torder_by: str | None = None,\tfields_to_rename: dict | None = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_extra_infos", "modulename": "howard.objects.variants", "qualname": "Variants.get_extra_infos", "kind": "function", "doc": "

    The get_extra_infos function returns a list of columns that are in a specified table but not\nin the header.

    \n\n
    Parameters
    \n\n
      \n
    • table: The table parameter in the get_extra_infos function is used to specify the\nname of the table from which you want to retrieve the extra columns that are not present in the\nheader. If the table parameter is not provided when calling the function, it will default to\nusing the variants
    • \n
    \n\n
    Returns
    \n\n
    \n

    A list of columns that are in the specified table but not in the header of the table.

    \n
    \n", "signature": "(self, table: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_extra_infos_sql", "modulename": "howard.objects.variants", "qualname": "Variants.get_extra_infos_sql", "kind": "function", "doc": "

    It returns a string of the extra infos, separated by commas, and each extra info is surrounded\nby double quotes

    \n\n
    Parameters
    \n\n
      \n
    • table: The name of the table to get the extra infos from. If None, the default table is\nused
    • \n
    \n\n
    Returns
    \n\n
    \n

    A string of the extra infos

    \n
    \n", "signature": "(self, table: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.export_header", "modulename": "howard.objects.variants", "qualname": "Variants.export_header", "kind": "function", "doc": "

    The export_header function takes a VCF file, extracts the header, modifies it according to\nspecified options, and writes it to a new file.

    \n\n
    Parameters
    \n\n
      \n
    • header_name: The header_name parameter is the name of the header file to be created. If\nthis parameter is not specified, the header will be written to the output file
    • \n
    • output_file: The output_file parameter in the export_header function is used to\nspecify the name of the output file where the header will be written. If this parameter is not\nprovided, the header will be written to a temporary file
    • \n
    • output_file_ext: The output_file_ext parameter in the export_header function is a\nstring that represents the extension of the output header file. By default, it is set to \".hdr\"\nif not specified by the user. This extension will be appended to the output_file name to\ncreate the final, defaults to .hdr
    • \n
    • clean_header: The clean_header parameter in the export_header function is a boolean\nflag that determines whether the header should be cleaned or not. When clean_header is set to\nTrue, the function will clean the header by modifying certain lines based on a specific\npattern. If clean_header, defaults to True
    • \n
    • clean_info_flag: The clean_info_flag parameter in the export_header function is a boolean\nflag that determines whether the header should be cleaned for INFO/tags that are 'Flag' type.\nWhen clean_info_flag is set to True, the function will replace INFO/tags 'Type' as 'String'.\nDefault to False
    • \n
    • remove_chrom_line: The remove_chrom_line parameter in the export_header function is a\nboolean flag that determines whether the #CHROM line should be removed from the header before\nwriting it to the output file. If set to True, the #CHROM line will be removed; if set to `,\ndefaults to False
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function export_header returns the name of the temporary header file that is\n created.

    \n
    \n", "signature": "(\tself,\theader_name: str = None,\toutput_file: str = None,\toutput_file_ext: str = '.hdr',\tclean_header: bool = True,\tclean_info_flag: bool = False,\tremove_chrom_line: bool = False) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.export_variant_vcf", "modulename": "howard.objects.variants", "qualname": "Variants.export_variant_vcf", "kind": "function", "doc": "

    The export_variant_vcf function exports a VCF file with specified samples, allowing options to\nremove INFO field, add samples, and control compression and indexing.

    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: The vcf_file parameter is the name of the file where the VCF data will be\nwritten to. It is the output file that will contain the filtered VCF data based on the specified\nparameters
    • \n
    • remove_info: The remove_info parameter in the export_variant_vcf function is a\nboolean flag that determines whether to remove the INFO field from the output VCF file. If set\nto True, the INFO field will be removed. If set to False, the INFO field will be included\nin, defaults to False
    • \n
    • add_samples: The add_samples parameter is a boolean parameter that determines whether\nthe samples should be added to the VCF file or not. If set to True, the samples will be added.\nIf set to False, the samples will be removed. The default value is True, defaults to True
    • \n
    • list_samples: The list_samples parameter is a list of samples that you want to include\nin the output VCF file. By default, all samples will be included. If you provide a list of\nsamples, only those samples will be included in the output file
    • \n
    • index: The index parameter in the export_variant_vcf function is a boolean flag that\ndetermines whether or not to create an index for the output VCF file. If index is set to\nTrue, the output VCF file will be indexed using tabix. If index, defaults to False
    • \n
    • threads: The threads parameter in the export_variant_vcf function specifies the\nnumber of threads to use for exporting the VCF file. It determines how many parallel threads\nwill be used during the export process. More threads can potentially speed up the export process\nby utilizing multiple cores of the processor. If
    • \n
    \n\n
    Returns
    \n\n
    \n

    The export_variant_vcf function returns the result of calling the export_output\n method with various parameters including the output file, query, threads, sort flag, and index\n flag. The export_output method is responsible for exporting the VCF data based on the\n specified parameters and configurations provided in the export_variant_vcf function.

    \n
    \n", "signature": "(\tself,\tvcf_file,\tremove_info: bool = False,\tadd_samples: bool = True,\tlist_samples: list = [],\twhere_clause: str = '',\tindex: bool = False,\tthreads: int | None = None) -> bool | None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.run_commands", "modulename": "howard.objects.variants", "qualname": "Variants.run_commands", "kind": "function", "doc": "

    It takes a list of commands and runs them in parallel using the number of threads specified

    \n\n
    Parameters
    \n\n
      \n
    • commands: A list of commands to run
    • \n
    • threads: The number of threads to use, defaults to 1 (optional)
    • \n
    \n", "signature": "(self, commands: list = [], threads: int = 1) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_threads", "modulename": "howard.objects.variants", "qualname": "Variants.get_threads", "kind": "function", "doc": "

    This function returns the number of threads to use for a job, with a default value of 1 if not\nspecified.

    \n\n
    Parameters
    \n\n
      \n
    • default: The default parameter in the get_threads method is used to specify the\ndefault number of threads to use if no specific value is provided. If no value is provided for\nthe threads parameter in the configuration or input parameters, the default value will be\nused, defaults to 1
    • \n
    \n\n
    Returns
    \n\n
    \n

    the number of threads to use for the current job.

    \n
    \n", "signature": "(self, default: int = 1) -> int:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_memory", "modulename": "howard.objects.variants", "qualname": "Variants.get_memory", "kind": "function", "doc": "

    This function retrieves the memory value from parameters or configuration with a default value\nif not found.

    \n\n
    Parameters
    \n\n
      \n
    • default: The get_memory function takes in a default value as a string parameter. This\ndefault value is used as a fallback in case the memory parameter is not provided in the\nparam dictionary or the config dictionary. If memory is not found in either dictionary,\nthe function
    • \n
    \n\n
    Returns
    \n\n
    \n

    The get_memory function returns a string value representing the memory parameter. If\n the input_memory is provided in the parameters, it will return that value. Otherwise, it will\n return the default value provided as an argument to the function.

    \n
    \n", "signature": "(self, default: str = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.update_from_vcf", "modulename": "howard.objects.variants", "qualname": "Variants.update_from_vcf", "kind": "function", "doc": "
    \n

    If the database is duckdb, then use the parquet method, otherwise use the sqlite method

    \n
    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: the path to the VCF file
    • \n
    \n", "signature": "(self, vcf_file: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.update_from_vcf_duckdb", "modulename": "howard.objects.variants", "qualname": "Variants.update_from_vcf_duckdb", "kind": "function", "doc": "

    It takes a VCF file and updates the INFO column of the variants table in the database with the\nINFO column of the VCF file

    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: the path to the VCF file
    • \n
    \n", "signature": "(self, vcf_file: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.update_from_vcf_sqlite", "modulename": "howard.objects.variants", "qualname": "Variants.update_from_vcf_sqlite", "kind": "function", "doc": "

    It creates a temporary table in the SQLite database, loads the VCF file into the temporary\ntable, then updates the INFO column of the variants table with the INFO column of the temporary\ntable

    \n\n
    Parameters
    \n\n
      \n
    • vcf_file: The path to the VCF file you want to update the database with
    • \n
    \n", "signature": "(self, vcf_file: str) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.drop_variants_table", "modulename": "howard.objects.variants", "qualname": "Variants.drop_variants_table", "kind": "function", "doc": "
    \n

    This function drops the variants table

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.set_variant_id", "modulename": "howard.objects.variants", "qualname": "Variants.set_variant_id", "kind": "function", "doc": "

    It adds a column to the variants table called variant_id and populates it with a hash of the\n#CHROM, POS, REF, and ALT columns

    \n\n
    Parameters
    \n\n
      \n
    • variant_id_column: The name of the column to be created in the variants table, defaults\nto variant_id
    • \n
    • force: If True, the variant_id column will be created even if it already exists
    • \n
    \n\n
    Returns
    \n\n
    \n

    The name of the column that contains the variant_id

    \n
    \n", "signature": "(self, variant_id_column: str = 'variant_id', force: bool = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_variant_id_column", "modulename": "howard.objects.variants", "qualname": "Variants.get_variant_id_column", "kind": "function", "doc": "

    This function returns the variant_id column name

    \n\n
    Parameters
    \n\n
      \n
    • variant_id_column: The name of the column in the dataframe that contains the variant IDs,\ndefaults to variant_id
    • \n
    • force: If True, will force the variant_id to be set to the value of variant_id_column. If\nFalse, will only set the variant_id if it is not already set. If None, will set the variant_id\nif it is not already set, or if it is set
    • \n
    \n\n
    Returns
    \n\n
    \n

    The variant_id column name.

    \n
    \n", "signature": "(self, variant_id_column: str = 'variant_id', force: bool = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.scan_databases", "modulename": "howard.objects.variants", "qualname": "Variants.scan_databases", "kind": "function", "doc": "

    The function scan_databases scans for available databases based on specified formats and\nreleases.

    \n\n
    Parameters
    \n\n
      \n
    • database_formats: The database_formats parameter is a list that specifies the formats\nof the databases to be scanned. In this case, the accepted format is \"parquet\"
    • \n
    • database_releases: The database_releases parameter is a list that specifies the\nreleases of the databases to be scanned. In the provided function, the default value for\ndatabase_releases is set to [\"current\"], meaning that by default, the function will scan\ndatabases that are in the \"current\"
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function scan_databases returns a dictionary containing information about\n databases that match the specified formats and releases.

    \n
    \n", "signature": "(\tself,\tdatabase_formats: list = ['parquet'],\tdatabase_releases: list = ['current']) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation", "modulename": "howard.objects.variants", "qualname": "Variants.annotation", "kind": "function", "doc": "

    It annotates the VCF file with the annotations specified in the config file.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_bigwig", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_bigwig", "kind": "function", "doc": "

    The function annotation_bigwig annotates variants in a VCF file using bigwig databases.

    \n\n
    Parameters
    \n\n
      \n
    • threads: The threads parameter in the annotation_bigwig method is used to specify the\nnumber of threads to be used for parallel processing during the annotation process. If the\nthreads parameter is not provided, the method will attempt to determine the optimal number of\nthreads to use based on the system configuration
    • \n
    \n\n
    Returns
    \n\n
    \n

    True

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_snpsift", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_snpsift", "kind": "function", "doc": "

    This function annotate with bcftools

    \n\n
    Parameters
    \n\n
      \n
    • threads: Number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_bcftools", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_bcftools", "kind": "function", "doc": "

    This function annotate with bcftools

    \n\n
    Parameters
    \n\n
      \n
    • threads: Number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_exomiser", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_exomiser", "kind": "function", "doc": "

    This function annotate with Exomiser

    \n\n

    This function uses args as parameters, in section \"annotation\" -> \"exomiser\", with sections:

    \n\n
      \n
    • \"analysis\" (dict/file):\nFull analysis dictionnary parameters (see Exomiser docs).\nEither a dict, or a file in JSON or YAML format.\nThese parameters may change depending on other parameters (e.g. phenotipicFeatures/HPO)\nDefault : None
    • \n
    • \"preset\" (string):\nAnalysis preset (available in config folder).\nUsed if no full \"analysis\" is provided.\nDefault: \"exome\"
    • \n
    • \"phenopacket\" (dict/file):\nSamples and phenotipic features parameters (see Exomiser docs).\nEither a dict, or a file in JSON or YAML format.\nDefault: None
    • \n
    • \"subject\" (dict):\nSample parameters (see Exomiser docs).\nExample:\n \"subject\":\n {\n \"id\": \"ISDBM322017\",\n \"sex\": \"FEMALE\"\n }\nDefault: None
    • \n
    • \"sample\" (string):\nSample name to construct \"subject\" section:\n \"subject\":\n {\n \"id\": \"\",\n \"sex\": \"UNKNOWN_SEX\"\n }\nDefault: None
    • \n
    • \"phenotypicFeatures\" (dict)\nPhenotypic features to construct \"subject\" section.\nExample:\n \"phenotypicFeatures\":\n [\n { \"type\": { \"id\": \"HP:0001159\", \"label\": \"Syndactyly\" } },\n { \"type\": { \"id\": \"HP:0000486\", \"label\": \"Strabismus\" } }\n ]
    • \n
    • \"hpo\" (list)\nList of HPO ids as phenotypic features.\nExample:\n \"hpo\": ['0001156', '0001363', '0011304', '0010055']\nDefault: []
    • \n
    • \"outputOptions\" (dict):\nOutput options (see Exomiser docs).\nDefault:\n \"output_options\" =\n {\n \"outputContributingVariantsOnly\": False,\n \"numGenes\": 0,\n \"outputFormats\": [\"TSV_VARIANT\", \"VCF\"]\n }
    • \n
    • \"transcript_source\" (string):\nTranscript source (either \"refseq\", \"ucsc\", \"ensembl\")\nDefault: \"refseq\"
    • \n
    • \"exomiser_to_info\" (boolean):\nAdd exomiser TSV file columns as INFO fields in VCF.\nDefault: False
    • \n
    • \"release\" (string):\nExomise database release.\nIf not exists, database release will be downloaded (take a while).\nDefault: None (provided by application.properties configuration file)
    • \n
    • \"exomiser_application_properties\" (file):\nExomiser configuration file (see Exomiser docs).\nUseful to automatically download databases (especially for specific genome databases).
    • \n
    \n\n

    Notes:

    \n\n
      \n
    • If no sample in parameters, first sample in VCF will be chosen
    • \n
    • If no HPO found, \"hiPhivePrioritiser\" analysis step will be switch off
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    None.

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_snpeff", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_snpeff", "kind": "function", "doc": "

    This function annotate with snpEff

    \n\n
    Parameters
    \n\n
      \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_annovar", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_annovar", "kind": "function", "doc": "

    It takes a VCF file, annotates it with Annovar, and then updates the database with the new\nannotations

    \n\n
    Parameters
    \n\n
      \n
    • threads: number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_parquet", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_parquet", "kind": "function", "doc": "

    It takes a VCF file, and annotates it with a parquet file

    \n\n
    Parameters
    \n\n
      \n
    • threads: number of threads to use for the annotation
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"result\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_splice", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_splice", "kind": "function", "doc": "

    This function annotate with snpEff

    \n\n
    Parameters
    \n\n
      \n
    • threads: The number of threads to use
    • \n
    \n\n
    Returns
    \n\n
    \n

    the value of the variable \"return_value\".

    \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_config_default", "modulename": "howard.objects.variants", "qualname": "Variants.get_config_default", "kind": "function", "doc": "

    The function get_config_default returns a dictionary containing default configurations for\nvarious calculations and prioritizations.

    \n\n
    Parameters
    \n\n
      \n
    • name: The get_config_default function returns a dictionary containing default\nconfigurations for different calculations and prioritizations. The name parameter is used to\nspecify which specific configuration to retrieve from the dictionary
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_config_default returns a dictionary containing default configuration\n settings for different calculations and prioritizations. The specific configuration settings are\n retrieved based on the input name parameter provided to the function. If the name parameter\n matches a key in the config_default dictionary, the corresponding configuration settings are\n returned. If there is no match, an empty dictionary is returned.

    \n
    \n", "signature": "(self, name: str) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_config_json", "modulename": "howard.objects.variants", "qualname": "Variants.get_config_json", "kind": "function", "doc": "

    The function get_config_json retrieves a configuration JSON object with prioritizations from\ndefault values, a dictionary, and a file.

    \n\n
    Parameters
    \n\n
      \n
    • name: The name parameter in the get_config_json function is a string that represents\nthe name of the configuration. It is used to identify and retrieve the configuration settings\nfor a specific component or module
    • \n
    • config_dict: The config_dict parameter in the get_config_json function is a\ndictionary that allows you to provide additional configuration settings or overrides. When you\ncall the get_config_json function, you can pass a dictionary containing key-value pairs where\nthe key is the configuration setting you want to override or
    • \n
    • config_file: The config_file parameter in the get_config_json function is used to\nspecify the path to a configuration file that contains additional settings. If provided, the\nfunction will read the contents of this file and update the configuration dictionary with the\nvalues found in the file, overriding any existing values with the
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function get_config_json returns a dictionary containing the configuration\n settings.

    \n
    \n", "signature": "(self, name: str, config_dict: dict = {}, config_file: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.prioritization", "modulename": "howard.objects.variants", "qualname": "Variants.prioritization", "kind": "function", "doc": "

    The prioritization function in Python processes VCF files, adds new INFO fields, and\nprioritizes variants based on configured profiles and criteria.

    \n\n
    Parameters
    \n\n
      \n
    • table: The table parameter in the prioritization function is used to specify the name\nof the table (presumably a VCF file) on which the prioritization operation will be performed. If\na table name is provided, the method will prioritize the variants in that specific table
    • \n
    • pz_prefix: The pz_prefix parameter is used to specify a prefix that will be added to\ncertain INFO fields in a VCF file during the prioritization process. If this parameter is not\nprovided, the code will use a default prefix value of \"PZ\"
    • \n
    • pz_param: The pz_param parameter in the prioritization method is used to pass\nadditional parameters specific to the prioritization process. These parameters can include\nsettings related to prioritization profiles, fields, scoring modes, flags, comments, and other\nconfigurations needed for the prioritization of variants in a V
    • \n
    • pz_keys: The pz_keys parameter in the prioritization function is used to specify the\nkeys that will be used to join the prioritization table with the variant table. If no keys are\nprovided, the function will use the default keys of [\"#CHROM\", \"POS\", \"REF\", \"ALT\"]
    • \n
    \n\n
    Returns
    \n\n
    \n

    The prioritization function returns a boolean value (True) if the prioritization\n operation is successful. If the operation fails, the function will return a boolean value of\n False

    \n
    \n", "signature": "(\tself,\ttable: str = None,\tpz_prefix: str = None,\tpz_param: dict = None,\tpz_keys: list = None) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_hgvs", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_hgvs", "kind": "function", "doc": "

    The annotation_hgvs function performs HGVS annotation on a set of variants using genomic\ncoordinates and alleles.

    \n\n
    Parameters
    \n\n
      \n
    • threads: The threads parameter is an optional integer that specifies the number of\nthreads to use for parallel processing. If no value is provided, it will default to the number\nof threads obtained from the get_threads() method
    • \n
    \n", "signature": "(self, threads: int = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.get_operations_help", "modulename": "howard.objects.variants", "qualname": "Variants.get_operations_help", "kind": "function", "doc": "

    \n", "signature": "(\tself,\toperations_config_dict: dict = {},\toperations_config_file: str = None) -> list:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation", "modulename": "howard.objects.variants", "qualname": "Variants.calculation", "kind": "function", "doc": "

    It takes a list of operations, and for each operation, it checks if it's a python or sql\noperation, and then calls the appropriate function

    \n\n

    param json example:\n \"calculation\": {\n \"NOMEN\": {\n \"options\": {\n \"hgvs_field\": \"hgvs\"\n },\n \"middle\" : null\n }

    \n", "signature": "(\tself,\toperations: dict = {},\toperations_config_dict: dict = {},\toperations_config_file: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_process_sql", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_process_sql", "kind": "function", "doc": "

    The calculation_process_sql function takes in a mathematical operation as a string and\nperforms the operation, updating the specified table with the result.

    \n\n
    Parameters
    \n\n
      \n
    • operation: The operation parameter is a dictionary that contains information about the\nmathematical operation to be performed. It includes the following keys:
    • \n
    • operation_name: The operation_name parameter is a string that represents the name of\nthe mathematical operation being performed. It is used for logging and error handling purposes,\ndefaults to unknown
    • \n
    \n", "signature": "(self, operation: dict, operation_name: str = 'unknown') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_process_function", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_process_function", "kind": "function", "doc": "

    The calculation_process_function takes in an operation dictionary and performs the specified\nfunction with the given parameters.

    \n\n
    Parameters
    \n\n
      \n
    • operation: The operation parameter is a dictionary that contains information about the\noperation to be performed. It has the following keys:
    • \n
    • operation_name: The operation_name parameter is a string that represents the name of\nthe operation being performed. It is used for logging purposes, defaults to unknown
    • \n
    \n", "signature": "(self, operation: dict, operation_name: str = 'unknown') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_variant_id", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_variant_id", "kind": "function", "doc": "

    The function calculation_variant_id adds a variant ID annotation to a VCF file header and\nupdates the INFO field of a variants table with the variant ID.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_extract_snpeff", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_extract_snpeff", "kind": "function", "doc": "

    This function extracts SnpEff annotations from the specified field in the VCF file and processes them according to the provided parameters. The annotations can be exploded into separate rows, converted into JSON format, and/or ensured to be unique. The processed annotations are then added to the VCF file with the specified prefixes.

    \n\n

    Args:\n snpeff_field (str): The annotation field in the VCF file to extract SnpEff annotations from. Default is \"ANN\".\n snpeff_hgvs (str): The prefix for the HGVS annotations extracted from SnpEff. Default is \"snpeff_hgvs\".\n snpeff_explode (bool): Whether to explode the annotations into separate rows. Default is \"snpeff_\".\n snpeff_json (bool): Whether to convert the annotations into JSON format. Default is \"snpeff_json\".\n uniquify (bool): Whether to ensure unique annotations. Default is True.

    \n\n

    Returns:\n None

    \n", "signature": "(\tself,\tsnpeff_field: str = 'ANN',\tsnpeff_hgvs: str = 'snpeff_hgvs',\tsnpeff_explode: bool = 'snpeff_',\tsnpeff_json: bool = 'snpeff_json',\tuniquify: bool = True) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_extract_nomen", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_extract_nomen", "kind": "function", "doc": "

    This function extracts the HGVS nomenclature from the calculation/identification of NOMEN.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_find_by_pipeline", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_find_by_pipeline", "kind": "function", "doc": "

    The function calculation_find_by_pipeline performs a calculation to find the number of\npipeline/sample for a variant and updates the variant information in a VCF file.

    \n\n
    Parameters
    \n\n
      \n
    • tag: The tag parameter is a string that represents the annotation field for the\n\"findbypipeline\" information in the VCF file. It is used to create the annotation field in the\nVCF header and to update the corresponding field in the variants table, defaults to\nfindbypipeline
    • \n
    \n", "signature": "(self, tag: str = 'findbypipeline') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_genotype_concordance", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_genotype_concordance", "kind": "function", "doc": "

    The function calculation_genotype_concordance calculates the genotype concordance for\nmulti-caller VCF files and updates the variant information in the database.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_barcode", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_barcode", "kind": "function", "doc": "

    The calculation_barcode function calculates barcode values for variants in a VCF file and\nupdates the INFO field in the file with the calculated barcode values.

    \n\n
    Parameters
    \n\n
      \n
    • tag: The tag parameter in the calculation_barcode function is used to specify the tag\nname that will be used for the barcode calculation in the VCF file. If no tag name is provided,\nthe default tag name is set to \"barcode\", defaults to barcode
    • \n
    \n", "signature": "(self, tag: str = 'barcode') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_barcode_family", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_barcode_family", "kind": "function", "doc": "

    The calculation_barcode_family function calculates barcode values for variants in a VCF file\nand updates the INFO field in the file with the calculated barcode values.

    \n\n
    Parameters
    \n\n
      \n
    • tag: The tag parameter in the calculation_barcode_family function is used to specify\nthe barcode tag that will be added to the VCF file during the calculation process. If no value\nis provided for the tag parameter, the default value used is \"BCF\", defaults to BCF
    • \n
    • tag_samples: The tag_samples parameter in the calculation_barcode_family function is\nused to specify the barcode tag that will be added to the VCF file for samples during the\ncalculation process. If no value is provided for the tag_samples parameter, the default value\nused is \"BCFS\", defaults to BCFS
    • \n
    \n", "signature": "(self, tag: str = None, tag_samples: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_trio", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_trio", "kind": "function", "doc": "

    The calculation_trio function performs trio calculations on a VCF file by adding trio\ninformation to the INFO field of each variant.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_vaf_normalization", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_vaf_normalization", "kind": "function", "doc": "

    The calculation_vaf_normalization function calculates the VAF (Variant Allele Frequency)\nnormalization for each sample in a VCF file and updates the FORMAT and INFO fields accordingly.

    \n\n
    Returns
    \n\n
    \n

    The function does not return anything.

    \n
    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_genotype_stats", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_genotype_stats", "kind": "function", "doc": "

    The calculation_genotype_stats function calculates genotype statistics for a given information\nfield in a VCF file and updates the INFO column of the variants table with the calculated\nstatistics.

    \n\n
    Parameters
    \n\n
      \n
    • info: The info parameter is a string that represents the type of information for which\ngenotype statistics are calculated. It is used to generate various VCF info tags for the\nstatistics, such as the number of occurrences, the list of values, the minimum value, the\nmaximum value, the mean, the median, defaults to VAF
    • \n
    \n", "signature": "(self, info: str = 'VAF') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_transcripts_annotation", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_transcripts_annotation", "kind": "function", "doc": "

    The calculation_transcripts_annotation function creates a transcripts table and adds an info\nfield to it if transcripts are available.

    \n\n
    Parameters
    \n\n
      \n
    • info_json: The info_json parameter in the calculation_transcripts_annotation method\nis a string parameter that represents the information field to be used in the transcripts JSON.\nIt is used to specify the JSON format for the transcripts information. If no value is provided\nwhen calling the method, it defaults to \"
    • \n
    • info_format: The info_format parameter in the calculation_transcripts_annotation\nmethod is a string parameter that specifies the format of the information field to be used in\nthe transcripts JSON. It is used to define the format of the information field
    • \n
    \n", "signature": "(self, info_json: str = None, info_format: str = None) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_transcripts_prioritization", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_transcripts_prioritization", "kind": "function", "doc": "

    The function calculation_transcripts_prioritization creates a transcripts table and\nprioritizes transcripts based on certain criteria.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_transcripts_export", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_transcripts_export", "kind": "function", "doc": "

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_transcript_view", "modulename": "howard.objects.variants", "qualname": "Variants.create_transcript_view", "kind": "function", "doc": "

    The create_transcript_view function generates a transcript view by processing data from a\nspecified table based on provided parameters and structural information.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter in the create_transcript_view function\nis used to specify the name of the table that will store the final transcript view data. If a table\nname is not provided, the function will create a new table to store the transcript view data, and by\ndefault,, defaults to transcripts
    • \n
    • transcripts_table_drop: The transcripts_table_drop parameter in the\ncreate_transcript_view function is a boolean parameter that determines whether to drop the\nexisting transcripts table before creating a new one. If transcripts_table_drop is set to True,\nthe function will drop the existing transcripts table if it exists, defaults to False
    • \n
    • param: The param parameter in the create_transcript_view function is a dictionary that\ncontains information needed to create a transcript view. It includes details such as the structure\nof the transcripts, columns mapping, column formats, and other necessary information for generating\nthe view. This parameter allows for flexibility and customization
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_transcript_view function returns the name of the transcripts table that was\n created or modified during the execution of the function.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = None,\ttranscripts_table_drop: bool = False,\tparam: dict = {}) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_transcript_view_from_columns_map", "modulename": "howard.objects.variants", "qualname": "Variants.create_transcript_view_from_columns_map", "kind": "function", "doc": "

    The create_transcript_view_from_columns_map function generates a temporary table view based on\nspecified columns mapping for transcripts data.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is a string that specifies the name\nof the table where the transcripts data is stored or will be stored in the database. This table\ntypically contains information about transcripts such as Ensembl transcript IDs, gene names,\nscores, predictions, etc. It defaults to \"transcripts, defaults to transcripts
    • \n
    • columns_maps: The columns_maps parameter is a dictionary that contains information\nabout how to map columns from a transcripts table to create a view. Each entry in the\ncolumns_maps list represents a mapping configuration for a specific set of columns. It\ntypically includes details such as the main transcript column and additional information columns
    • \n
    • added_columns: The added_columns parameter in the\ncreate_transcript_view_from_columns_map function is a list that stores the additional columns\nthat will be added to the view being created based on the columns map provided. These columns\nare generated by exploding the transcript information columns along with the main transcript\ncolumn
    • \n
    • temporary_tables: The temporary_tables parameter in the\ncreate_transcript_view_from_columns_map function is a list that stores the names of temporary\ntables created during the process of creating a transcript view from a columns map. These\ntemporary tables are used to store intermediate results or transformations before the final view\nis generated
    • \n
    • annotation_fields: The annotation_fields parameter in the\ncreate_transcript_view_from_columns_map function is a list that stores the fields that are\nused for annotation in the query view creation process. These fields are extracted from the\ntranscripts_column and transcripts_infos_columns specified in the `columns
    • \n
    • column_rename: The column_rename parameter in the\ncreate_transcript_view_from_columns_map function is a dictionary that allows you to specify\ncustom renaming for columns during the creation of the temporary table view. This parameter\nprovides a mapping of original column names to the desired renamed column names. By using this\nparameter,
    • \n
    • column_clean: The column_clean parameter in the\ncreate_transcript_view_from_columns_map function is a boolean flag that determines whether the\ncolumn values should be cleaned or not. If set to True, the column values will be cleaned by\nremoving any non-alphanumeric characters from them. This cleaning process ensures, defaults to\nFalse
    • \n
    • column_case: The column_case parameter in the create_transcript_view_from_columns_map\nfunction is used to specify the case transformation to be applied to the columns during the view\ncreation process. It allows you to control whether the column values should be converted to\nlowercase, uppercase, or remain unchanged
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_transcript_view_from_columns_map function returns a tuple containing three\n lists: added_columns, temporary_tables, and annotation_fields.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = 'transcripts',\tcolumns_maps: dict = {},\tadded_columns: list = [],\ttemporary_tables: list = None,\tannotation_fields: list = None,\tcolumn_rename: dict = {},\tcolumn_clean: bool = False,\tcolumn_case: str = None) -> tuple[list, list, list]:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_transcript_view_from_column_format", "modulename": "howard.objects.variants", "qualname": "Variants.create_transcript_view_from_column_format", "kind": "function", "doc": "

    The create_transcript_view_from_column_format function generates a transcript view based on\nspecified column formats, adds additional columns and annotation fields, and returns the list of\ntemporary tables and annotation fields.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is a string that specifies the name\nof the table containing the transcripts data. This table will be used as the base table for\ncreating the transcript view. The default value for this parameter is \"transcripts\", but you can\nprovide a different table name if needed, defaults to transcripts
    • \n
    • column_formats: The column_formats parameter is a dictionary that contains information\nabout the columns to be used for creating the transcript view. Each entry in the dictionary\nspecifies the mapping between a transcripts column and a transcripts infos column. This\nparameter allows you to define how the columns from the transcripts table should be transformed\nor mapped
    • \n
    • temporary_tables: The temporary_tables parameter in the\ncreate_transcript_view_from_column_format function is a list that stores the names of\ntemporary views created during the process of creating a transcript view from a column format.\nThese temporary views are used to manipulate and extract data before generating the final\ntranscript view
    • \n
    • annotation_fields: The annotation_fields parameter in the\ncreate_transcript_view_from_column_format function is a list that stores the annotation fields\nthat are extracted from the temporary views created during the process. These annotation fields\nare obtained by querying the temporary views and extracting the column names excluding specific\ncolumns like `#CH
    • \n
    • column_rename: The column_rename parameter in the\ncreate_transcript_view_from_column_format function is a dictionary that allows you to specify\ncustom renaming of columns in the transcripts infos table. By providing a mapping of original\ncolumn names to new column names in this dictionary, you can rename specific columns during the\nprocess
    • \n
    • column_clean: The column_clean parameter in the\ncreate_transcript_view_from_column_format function is a boolean flag that determines whether\nthe transcripts infos columns should undergo a cleaning process. If set to True, the columns\nwill be cleaned during the creation of the transcript view based on the specified column format,\ndefaults to False
    • \n
    • column_case: The column_case parameter in the\ncreate_transcript_view_from_column_format function is used to specify the case transformation\nto be applied to the columns in the transcript view. It can be set to either \"upper\" or \"lower\"\nto convert the column names to uppercase or lowercase, respectively
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_transcript_view_from_column_format function returns two lists:\n temporary_tables and annotation_fields.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = 'transcripts',\tcolumn_formats: dict = {},\ttemporary_tables: list = None,\tannotation_fields: list = None,\tcolumn_rename: dict = {},\tcolumn_clean: bool = False,\tcolumn_case: str = None) -> tuple[list, list, list]:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.annotation_format_to_table", "modulename": "howard.objects.variants", "qualname": "Variants.annotation_format_to_table", "kind": "function", "doc": "

    The annotation_format_to_table function converts annotation data from a VCF file into a\nstructured table format, ensuring unique values and creating a temporary table for further\nprocessing or analysis.

    \n\n
    Parameters
    \n\n
      \n
    • uniquify: The uniquify parameter is a boolean flag that determines whether to ensure\nunique values in the output or not. If set to True, the function will make sure that the\noutput values are unique, defaults to True
    • \n
    • annotation_field: The annotation_field parameter refers to the field in the VCF file\nthat contains the annotation information for each variant. This field is used to extract the\nannotation details for further processing in the function. By default, it is set to \"ANN\",\ndefaults to ANN
    • \n
    • annotation_id: The annotation_id parameter in the annotation_format_to_table method\nis used to specify the identifier for the annotation feature. This identifier will be used as a\ncolumn name in the resulting table or view that is created based on the annotation data. It\nhelps in uniquely identifying each annotation entry in the, defaults to Feature_ID
    • \n
    • view_name: The view_name parameter in the annotation_format_to_table method is used\nto specify the name of the temporary table that will be created to store the transformed\nannotation data. This table will hold the extracted information from the annotation field in a\nstructured format for further processing or analysis. By default,, defaults to transcripts
    • \n
    • column_rename: The column_rename parameter in the annotation_format_to_table method\nis a dictionary that allows you to specify custom renaming for columns. By providing key-value\npairs in this dictionary, you can rename specific columns in the resulting table or view that is\ncreated based on the annotation data. This feature enables
    • \n
    • column_clean: The column_clean parameter in the annotation_format_to_table method is\na boolean flag that determines whether the annotation field should undergo a cleaning process.\nIf set to True, the function will clean the annotation field before further processing. This\ncleaning step may involve removing any unwanted characters, formatting inconsistencies, defaults\nto False
    • \n
    • column_case: The column_case parameter in the annotation_format_to_table method is\nused to specify the case transformation to be applied to the column names extracted from the\nannotation data. It allows you to set the case of the column names to either lowercase or\nuppercase for consistency or other specific requirements during the conversion
    • \n
    • column_split: The column_split parameter in the annotation_format_to_table method is\nused to specify the separator to split fields values. Default as '&'. None to disable.
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function annotation_format_to_table is returning the name of the view created,\n which is stored in the variable view_name.

    \n
    \n", "signature": "(\tself,\tannotation_field: str = 'ANN',\tannotation_id: str = 'Feature_ID',\tview_name: str = 'transcripts',\tcolumn_rename: dict = {},\tcolumn_clean: bool = False,\tcolumn_case: str = None,\tcolumn_split: str = '&') -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.transcripts_export", "modulename": "howard.objects.variants", "qualname": "Variants.transcripts_export", "kind": "function", "doc": "

    Exports transcript data from a table to a specified file.

    \n\n

    Args:\n transcripts_table (str): The name of the transcripts table.\n param (dict): A dictionary of parameters to customize the export process. This can include various options such as filtering criteria, formatting options, etc.

    \n\n

    Returns:\n bool: Returns True if the export is successful, False otherwise.

    \n\n

    This function exports transcript data to a specified file, using the provided parameters to customize the export process. The function returns True if the export is successful, and False otherwise.

    \n", "signature": "(self, transcripts_table: str = None, param: dict = {}) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.transcripts_prioritization", "modulename": "howard.objects.variants", "qualname": "Variants.transcripts_prioritization", "kind": "function", "doc": "

    The transcripts_prioritization function prioritizes transcripts based on certain parameters\nand updates the variants table with the prioritized information.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is a string that specifies the name\nof the table containing transcripts data. If no value is provided, it defaults to \"transcripts\".\nThis parameter is used to identify the table where the transcripts data is stored for the\nprioritization process
    • \n
    • param: The param parameter in the transcripts_prioritization method is a dictionary\nthat contains various configuration settings for the prioritization process of transcripts. It\nis used to customize the behavior of the prioritization algorithm and includes settings such as\nthe prefix for prioritization fields, default profiles, and other
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function transcripts_prioritization returns a boolean value True if the\n transcripts prioritization process is successfully completed, and False if there are any\n issues or if no profile is defined for transcripts prioritization.

    \n
    \n", "signature": "(self, transcripts_table: str = None, param: dict = {}) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.transcript_view_to_variants", "modulename": "howard.objects.variants", "qualname": "Variants.transcript_view_to_variants", "kind": "function", "doc": "

    The transcript_view_to_variants function updates a variants table with information from\ntranscripts in JSON format.

    \n\n
    Parameters
    \n\n
      \n
    • transcripts_table: The transcripts_table parameter is used to specify the name of the\ntable containing the transcripts data. If this parameter is not provided, the function will\nattempt to retrieve it from the param dictionary or use a default value of \"transcripts\"
    • \n
    • transcripts_column_id: The transcripts_column_id parameter is used to specify the\ncolumn in the transcripts_table that contains the unique identifier for each transcript. This\nidentifier is used to match transcripts with variants in the database
    • \n
    • transcripts_info_json: The transcripts_info_json parameter is used to specify the name\nof the column in the variants table where the transcripts information will be stored in JSON\nformat. This parameter allows you to define the column in the variants table that will hold the\nJSON-formatted information about transcripts
    • \n
    • transcripts_info_field_json: The transcripts_info_field_json parameter is used to\nspecify the field in the VCF header that will contain information about transcripts in JSON\nformat. This field will be added to the VCF header as an INFO field with the specified name
    • \n
    • transcripts_info_format: The transcripts_info_format parameter is used to specify the\nformat of the information about transcripts that will be stored in the variants table. This\nformat can be used to define how the transcript information will be structured or displayed\nwithin the variants table
    • \n
    • transcripts_info_field_format: The transcripts_info_field_format parameter is used to\nspecify the field in the VCF header that will contain information about transcripts in a\nspecific format. This field will be added to the VCF header as an INFO field with the specified\nname
    • \n
    • param: The param parameter in the transcript_view_to_variants method is a dictionary\nthat contains various configuration settings related to transcripts. It is used to provide\ndefault values for certain parameters if they are not explicitly provided when calling the\nmethod. The param dictionary can be passed as an argument
    • \n
    \n\n
    Returns
    \n\n
    \n

    The function transcript_view_to_variants returns a boolean value. It returns True\n if the operation is successful and False if certain conditions are not met.

    \n
    \n", "signature": "(\tself,\ttranscripts_table: str = None,\ttranscripts_column_id: str = None,\ttranscripts_info_json: str = None,\ttranscripts_info_field_json: str = None,\ttranscripts_info_format: str = None,\ttranscripts_info_field_format: str = None,\tparam: dict = {}) -> bool:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.rename_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.rename_info_fields", "kind": "function", "doc": "

    The rename_info_fields function renames specified fields in a VCF file header and updates\ncorresponding INFO fields in the variants table.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: The fields_to_rename parameter is a dictionary that contains the\nmapping of fields to be renamed in a VCF (Variant Call Format) file. The keys in the dictionary\nrepresent the original field names that need to be renamed, and the corresponding values\nrepresent the new names to which the fields should be
    • \n
    • table: The table parameter in the rename_info_fields function represents the name of\nthe table in which the variants data is stored. This table contains information about genetic\nvariants, and the function updates the corresponding INFO fields in this table when renaming\nspecified fields in the VCF file header
    • \n
    \n\n
    Returns
    \n\n
    \n

    The rename_info_fields function returns a dictionary fields_processed that contains\n the original field names as keys and their corresponding new names (or None if the field was\n removed) as values after renaming or removing specified fields in a VCF file header and updating\n corresponding INFO fields in the variants table.

    \n
    \n", "signature": "(self, fields_to_rename: dict = None, table: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.recreate_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.recreate_info_fields", "kind": "function", "doc": "

    The recreate_info_fields function renames specified fields in a VCF file header and updates\ncorresponding INFO fields in the variants table.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: The fields_to_rename parameter is a dictionary that contains the\nmapping of fields to be renamed in a VCF (Variant Call Format) file. The keys in the dictionary\nrepresent the original field names that need to be renamed, and the corresponding values\nrepresent the new names to which the fields should be renamed. Default {}
    • \n
    • table: The table parameter in the recreate_info_fields function represents the name of\nthe table in which the variants data is stored. This table contains information about genetic\nvariants, and the function updates the corresponding INFO fields in this table when renaming\nspecified fields in the VCF file header. Default Variants table 'variants'.
    • \n
    \n\n
    Returns
    \n\n
    \n

    The recreate_info_fields function returns a dictionary fields_renamed that contains\n the original field names as keys and their corresponding new names (or None if the field was\n removed) as values after renaming or removing specified fields in a VCF file header and updating\n corresponding INFO fields in the variants table.

    \n
    \n", "signature": "(self, fields_to_rename: dict = None, table: str = None) -> dict:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_rename_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_rename_info_fields", "kind": "function", "doc": "

    The calculation_rename_info_fields function retrieves parameters from a dictionary, updates\nfields to rename and table if provided, and then calls another function to rename the fields.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: fields_to_rename is a dictionary that contains the fields to be\nrenamed in a table. Each key-value pair in the dictionary represents the original field name as\nthe key and the new field name as the value
    • \n
    • table: The table parameter in the calculation_rename_info_fields method is used to\nspecify the name of the table for which the fields are to be renamed. It is a string type\nparameter
    • \n
    • operation_name: The operation_name parameter in the calculation_rename_info_fields\nmethod is a string that specifies the name of the operation being performed. In this context, it\nis used as a default value for the operation name if not explicitly provided when calling the\nfunction, defaults to RENAME_INFO_FIELDS
    • \n
    \n", "signature": "(\tself,\tfields_to_rename: dict = None,\ttable: str = None,\toperation_name: str = 'RENAME_INFO_FIELDS') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.calculation_recreate_info_fields", "modulename": "howard.objects.variants", "qualname": "Variants.calculation_recreate_info_fields", "kind": "function", "doc": "

    The calculation_recreate_info_fields function retrieves parameters from a dictionary, recreate\nINFO fields with rename and table if provided, and then calls another function to rename the fields.

    \n\n
    Parameters
    \n\n
      \n
    • fields_to_rename: fields_to_rename is a dictionary that contains the fields to be\nrenamed in a table. Each key-value pair in the dictionary represents the original field name as\nthe key and the new field name as the value
    • \n
    • table: The table parameter in the calculation_recreate_info_fields method is used to\nspecify the name of the table for which the fields are to be renamed. It is a string type\nparameter
    • \n
    • operation_name: The operation_name parameter in the calculation_recreate_info_fields\nmethod is a string that specifies the name of the operation being performed. In this context, it\nis used as a default value for the operation name if not explicitly provided when calling the\nfunction, defaults to RENAME_INFO_FIELDS
    • \n
    \n", "signature": "(\tself,\tfields_to_rename: dict = None,\ttable: str = None,\toperation_name: str = 'RENAME_INFO_FIELDS') -> None:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.create_annotations_view", "modulename": "howard.objects.variants", "qualname": "Variants.create_annotations_view", "kind": "function", "doc": "

    The create_annotations_view function creates a SQL view from fields in a VCF INFO column.

    \n\n
    Parameters
    \n\n
      \n
    • table: The table parameter in the create_annotations_view function is used to specify\nthe name of the table from which the fields are to be extracted. This table contains the\nvariants data, and the function creates a view based on the fields in the INFO column of this\ntable. Defaults to None
    • \n
    • view: The view parameter in the create_annotations_view function is used to specify\nthe name of the view that will be created based on the fields in the VCF INFO column. This view\nwill contain the extracted fields from the INFO column in a structured format for further\nprocessing or analysis. Defaults to None
    • \n
    • view_type: The view_type parameter in the create_annotations_view function is used to\nspecify the type of view that will be created. It can be either a VIEW or a TABLE, and the\nfunction will create the view based on the specified type. Defaults to VIEW
    • \n
    • view_mode: The view_mode parameter in the create_annotations_view function is used to\nspecify the mode of view that will be created. It can be either a full or a explore, and the\nfunction will create the view based on the specified algorithm/SQL query. Defaults to full
    • \n
    • fields: The fields parameter in the create_annotations_view function is a list that\ncontains the names of the fields to be extracted from the INFO column in the VCF file. These\nfields will be used to create the view with the specified columns and data extracted from the\nINFO column. Defaults to None
    • \n
    • fields_needed: The fields_needed parameter in the create_annotations_view function is\na list of fields that are required for the view. These fields are essential for the view and\nmust be included in the view to ensure that the data is complete and accurate. By default, the\nfunction will include all columns' table in the view, but you can specify the\nrequired fields using this parameter. Defaults to None, which means key columns corresponding\nof a variant [\"#CHROM\", \"POS\", \"REF\", \"ALT\"]
    • \n
    • fields_needed_all: The fields_needed_all parameter in the create_annotations_view\nfunction is a boolean flag that determines whether to include all fields in the table in the\nview. If set to True, the function will include all fields in the table in the view (only\nif fields_needed is False). If set to False, the function will only include the\nneeded fields specified in the fields_needed parameter in the view. Defaults to False
    • \n
    • detect_type_list: The detect_type_list parameter in the create_annotations_view\nfunction is a boolean flag that determines whether to detect the type of the fields extracted\nfrom the INFO column. If set to True, the function will detect the type of the fields and\nhandle them accordingly in the view. Defaults to False
    • \n
    • fields_not_exists: The fields_not_exists parameter in the create_annotations_view\nfunction is a boolean flag that determines whether to include fields that do not exist in the\ntable in the view. If set to True, the function will include fields that do not exist in the\ntable as NULL values in the view. Defaults to True
    • \n
    • info_prefix_column: The info_prefix_column parameter in the create_annotations_view\nfunction is used to specify a prefix that will be added to the field names in the view.\nIf provided, the function will generate a fields with the prefix (e.g. \"\", \"INFOS_\", \"annotations_\").\nIf not provided (None), the function will not genereate columns. This prefix helps in\ndistinguishing the fields extracted from the INFO column in the view. Defaults to None.
    • \n
    • info_struct_column: The info_struct_column parameter in the create_annotations_view\nfunction is used to specify the name of the column that will contain the extracted fields from\nthe INFO column in the view. This column will hold the structured data extracted from the INFO\ncolumn for further processing or analysis (e.g. \"INFOS\" or \"annotations\"). If not provided (None),\nthe function will not genereate the column. Defaults to None
    • \n
    • sample_struct_column: The sample_struct_column parameter in the create_annotations_view\nfunction is used to specify the name of the column that will contain the extracted formats from\nthe samples columns in the view. This column will hold the structured data extracted from all\nsamples column for further processing or analysis (e.g. \"SAMPLES\" or \"genotypes\"). If not provided (None),\nthe function will not genereate the column. Defaults to None
    • \n
    • drop_view: The drop_view parameter in the create_annotations_view function is a boolean\nflag that determines whether to drop the existing view with the same name before creating a new\nview. If set to True, the function will drop the existing view before creating a new view with\nthe specified name. Defaults to False
    • \n
    • fields_to_rename: The fields_to_rename parameter in the create_annotations_view\nfunction is a dictionary that contains the mapping of fields to be renamed in the VCF file. The\nkeys in the dictionary represent the original field names that need to be renamed, and the\ncorresponding values represent the new names to which the fields should be. Defaults to None
    • \n
    • fields_forced_as_varchar: Force fields as type VARCHAR
    • \n
    • limit: The limit parameter in the create_annotations_view function is an integer that\nspecifies the maximum number of rows to be included in the view. If provided, the function will\nlimit the number of rows in the view to the specified value. Defaults to None
    • \n
    \n\n
    Returns
    \n\n
    \n

    The create_annotations_view function returns the name of the view that is created\n based on the fields extracted from the INFO column in the VCF file. This view contains the\n extracted fields in a structured format for further processing or analysis. Defaults to None

    \n
    \n", "signature": "(\tself,\ttable: str = None,\tview: str = None,\tview_type: str = None,\tview_mode: str = None,\tfields: list = None,\tfields_needed: list = None,\tfields_needed_all: bool = False,\tdetect_type_list: bool = True,\tfields_not_exists: bool = True,\tinfo_prefix_column: str = None,\tinfo_struct_column: str = None,\tsample_struct_column: str = None,\tdrop_view: bool = False,\tfields_to_rename: dict = None,\tfields_forced_as_varchar: bool = False,\tlimit: int = None) -> str:", "funcdef": "def"}, {"fullname": "howard.objects.variants.Variants.remove_tables_or_views", "modulename": "howard.objects.variants", "qualname": "Variants.remove_tables_or_views", "kind": "function", "doc": "

    Remove specified tables and views from the database.

    \n\n

    Args:\n tables (list): A list of table names to be removed. Default is None.\n views (list): A list of view names to be removed. Default is None.

    \n\n

    Returns:\n list: A list of tables and views that were successfully removed.

    \n\n

    This function attempts to remove the specified tables and views from the database.\nIt first tries to drop each item as a table, and if that fails, it tries to drop it as a view.\nIf an item is neither a table nor a view, an error is logged.

    \n", "signature": "(self, tables: list = None, views: list = None) -> list:", "funcdef": "def"}, {"fullname": "howard.tools", "modulename": "howard.tools", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.annotation", "modulename": "howard.tools.annotation", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.annotation.annotation", "modulename": "howard.tools.annotation", "qualname": "annotation", "kind": "function", "doc": "

    The annotation function performs annotation on a VCF file based on specified parameters and\nexports the annotated data.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is likely an object or dictionary containing various arguments\npassed to the annotation function. It is not clear from the code snippet what specific arguments\nare expected or required
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.calculation", "modulename": "howard.tools.calculation", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.calculation.calculation", "modulename": "howard.tools.calculation", "qualname": "calculation", "kind": "function", "doc": "

    This function performs calculations on VCF data based on user input and exports the results.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is a command line argument parser object that contains the\narguments passed to the script when it was executed
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.databases", "modulename": "howard.tools.databases", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.databases.databases", "modulename": "howard.tools.databases", "qualname": "databases", "kind": "function", "doc": "

    The function downloads databases and logs the start and end of the process.

    \n\n
    Parameters
    \n\n
      \n
    • args: The \"args\" parameter is likely an object or dictionary containing various arguments or\noptions related to the \"databases\" function. Without more context, it's difficult to say exactly\nwhat these arguments might be, but they could include things like the names or locations of\ndatabases to download, authentication credentials, or
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.filter", "modulename": "howard.tools.filter", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.filter.filter", "modulename": "howard.tools.filter", "qualname": "filter", "kind": "function", "doc": "

    This Python function loads and queries data from a VCF file based on user input and exports the\nresults.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the arguments passed to the function. It is likely a\nNamespace object created by parsing command line arguments using argparse
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.gui", "modulename": "howard.tools.gui", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.gui.main_folder", "modulename": "howard.tools.gui", "qualname": "main_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/tools'"}, {"fullname": "howard.tools.gui.image_dir", "modulename": "howard.tools.gui", "qualname": "image_dir", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/tools/../../images/'"}, {"fullname": "howard.tools.gui.gui", "modulename": "howard.tools.gui", "qualname": "gui", "kind": "function", "doc": "

    The gui function generates a graphical user interface (GUI) for a Python script using the\nargparse module and the Gooey library.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is of type argparse, which is a module in Python used for\nparsing command-line arguments. It is used to define the arguments that the program accepts and to\ngenerate help messages. In this code, it seems that args is an object that contains information\nabout the
    • \n
    \n", "signature": "(*args, **kwargs):", "funcdef": "def"}, {"fullname": "howard.tools.help", "modulename": "howard.tools.help", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.help.main_folder", "modulename": "howard.tools.help", "qualname": "main_folder", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/lebechea/BIOINFO/git/HOWARD/howard/tools'"}, {"fullname": "howard.tools.help.help", "modulename": "howard.tools.help", "qualname": "help", "kind": "function", "doc": "

    The help function generates help documentation in various formats (parser, Markdown, HTML) based\non the provided arguments and setup configuration.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is of type argparse.Namespace. It is used to pass command-line\narguments to the help function. The argparse module provides a way to parse command-line\narguments and generate help messages. The Namespace object holds the values of the command-line\narguments
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.hgvs", "modulename": "howard.tools.hgvs", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.hgvs.hgvs", "modulename": "howard.tools.hgvs", "qualname": "hgvs", "kind": "function", "doc": "

    The hgvs function takes command line arguments, creates a VCF object, sets parameters and\nconfigurations, loads data from an input file, performs annotation using HGVS notation, exports the\noutput, and closes the connection.

    \n\n
    Parameters
    \n\n
      \n
    • args: The args parameter is of type argparse.Namespace and is used to parse command line\narguments. It contains the following attributes:
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.interactive", "modulename": "howard.tools.interactive", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.interactive.launch_interactive_terminal", "modulename": "howard.tools.interactive", "qualname": "launch_interactive_terminal", "kind": "function", "doc": "

    Launch an interactive SQL terminal with DuckDB

    \n", "signature": "(args=None, variants=None, tmp=None, display_format='dataframe'):", "funcdef": "def"}, {"fullname": "howard.tools.prioritization", "modulename": "howard.tools.prioritization", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.prioritization.prioritization", "modulename": "howard.tools.prioritization", "qualname": "prioritization", "kind": "function", "doc": "

    The function performs prioritization on a VCF file based on user-specified configurations and\nexports the results.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the command line arguments passed to the script. It is\nused to configure the behavior of the script and to provide input and output file paths, as well as\nother parameters needed for the execution of the script
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.process", "modulename": "howard.tools.process", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.process.process", "modulename": "howard.tools.process", "qualname": "process", "kind": "function", "doc": "

    The \"process\" function processes input arguments, loads parameters in JSON format, creates a VCF\nobject, performs quick annotations, calculations, prioritizations, and queries, exports output, and\ncloses the connection.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is a variable that contains the arguments passed to the function \"process\". It is\nassumed to be an object with several attributes, including \"config\", \"param\", \"input\", \"output\",\n\"annotations\", \"calculations\", \"prioritizations\", and \"query\". These attributes are used to
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.query", "modulename": "howard.tools.query", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.query.query", "modulename": "howard.tools.query", "qualname": "query", "kind": "function", "doc": "

    This Python function loads and queries data from a VCF file based on user input and exports the\nresults.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the arguments passed to the function. It is likely a\nNamespace object created by parsing command line arguments using argparse
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.sort", "modulename": "howard.tools.sort", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.sort.sort", "modulename": "howard.tools.sort", "qualname": "sort", "kind": "function", "doc": "

    This Python function loads and sort variants from a VCF file based on user input and exports the\nresults.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is an object that contains the arguments passed to the function. It is likely a\nNamespace object created by parsing command line arguments using argparse
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.stats", "modulename": "howard.tools.stats", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.stats.stats", "modulename": "howard.tools.stats", "qualname": "stats", "kind": "function", "doc": "

    The stats() function takes in arguments, loads data from an input file, gets statistics on the data,\nand closes the connection.

    \n\n
    Parameters
    \n\n
      \n
    • args: args is a parameter that is passed to the function stats(). It is likely an object or a\ndictionary that contains various arguments or parameters that are needed by the function to perform\nits tasks. Some of the arguments that may be included in args are input file path, configuration\nsettings, and other parameters that are
    • \n
    \n", "signature": "(\targs: <module 'argparse' from '/Users/lebechea/miniconda/envs/howard_devel/lib/python3.10/argparse.py'>) -> None:", "funcdef": "def"}, {"fullname": "howard.tools.tools", "modulename": "howard.tools.tools", "kind": "module", "doc": "

    \n"}, {"fullname": "howard.tools.tools.PathType", "modulename": "howard.tools.tools", "qualname": "PathType", "kind": "class", "doc": "

    \n"}, {"fullname": "howard.tools.tools.PathType.__init__", "modulename": "howard.tools.tools", "qualname": "PathType.__init__", "kind": "function", "doc": "

    exists:\n True: a path that does exist\n False: a path that does not exist, in a valid parent directory\n None: don't care\ntype: file, dir, symlink, None, or a function returning True for valid paths\n None: don't care\ndash_ok: whether to allow \"-\" as stdin/stdout

    \n", "signature": "(exists=True, type='file', dash_ok=True)"}, {"fullname": "howard.tools.tools.arguments", "modulename": "howard.tools.tools", "qualname": "arguments", "kind": "variable", "doc": "

    \n", "default_value": "{'input': {'metavar': 'input', 'help': 'Input file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output': {'metavar': 'output', 'help': 'Output file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'param': {'metavar': 'param', 'help': 'Parameters JSON file (or string) defines parameters to process \\nannotations, calculations, prioritizations, convertions and queries.\\n', 'default': '{}', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '', 'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'query': {'metavar': 'query', 'help': "Query in SQL format\\n(e.g. 'SELECT * FROM variants LIMIT 50').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': 'SELECT * FROM variants'}}, 'extra': {'param_section': 'query'}}, 'filter': {'metavar': 'filter', 'help': "Filter variant using SQL format\\n(e.g. 'POS < 100000').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'samples': {'metavar': 'samples', 'help': "List of samples\\n(e.g. 'sample1,sample2').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'output_query': {'metavar': 'output', 'help': 'Output Query file.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'All files (*)|*'}}}, 'annotations': {'metavar': 'annotations', 'help': "Annotation with databases files, or with tools,\\nas a list of files in Parquet, VCF, BED, or keywords\\n (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\\n- For a Parquet/VCF/BED, use file paths\\n (e.g. 'file1.parquet,file2.vcf.gz').\\n- For BCFTools annotation, use keyword 'bcftools' with file paths\\n (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\\n- For Parquet annotation, use keyword 'parquet' with file paths\\n (e.g. 'parquet:file.parquet').\\n- For Annovar annotation, use keyword 'annovar' with annovar code\\n (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\\n- For snpeff annotation, use keyword 'snpeff' with options\\n (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\\n- For snpSift annotation, use keyword 'snpsift' with file paths\\n (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\\n- For Exomiser annotation, use keyword 'exomiser' with options as key=value\\n (e.g. 'exomiser:preset=exome:transcript_source=refseq').\\n- For add all availalbe databases files, use 'ALL' keyword,\\n with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\\n (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotations": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotations": "ALL:parquet:latest"', 'Annotation with BCFTools': '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotations": "annovar:refGene:cosmic70"', 'Annotation with snpEff (default options)': '"annotations": "snpeff"', 'Annotation with snpEff (with options)': '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', 'Annotation with snpSift': '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Exomiser with options': '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', 'Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)': '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"'}}}, 'annotation_parquet': {'metavar': 'annotation parquet', 'help': "Annotation with Parquet method, as a list of files in Parquet, VCF or BED\\n (e.g. 'file1.parquet,file2.vcf.gz').\\nFor add all availalbe databases files, use 'ALL' keyword,\\n with filters on type and release\\n (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotation_parquet": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotation_parquet": "ALL:parquet:latest"'}}}, 'annotation_bcftools': {'metavar': 'annotation BCFTools', 'help': "Annotation with BCFTools, as a list of files VCF or BED\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with BCFTools': '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_snpeff': {'metavar': 'annotation snpEff', 'help': "Annotation with snpEff, with options\\n (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'options', 'examples': {'Annotation with snpEff (default options)': '"annotation_snpeff": ""', 'Annotation with snpEff (with options)': '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"'}}}, 'annotation_snpsift': {'metavar': 'annotation snpSift', 'help': "Annotation with snpSift, as a list of files VCF\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with snpSift': '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_annovar': {'metavar': 'annotation Annovar', 'help': "Annotation with Annovar, as a list of database keywords\\n (e.g. 'refGene', 'refGene:cosmic70').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'keyword[:keyword]*', 'examples': {'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotation_annovar": "refGene:cosmic70"'}}}, 'annotation_exomiser': {'metavar': 'annotation Exomiser', 'help': "Annotation with Exomiser, as a list of options\\n (e.g. 'preset=exome:transcript_source=refseq').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Exomiser with options': '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"'}}}, 'annotation_splice': {'metavar': 'annotation Splice', 'help': "Annotation with Splice, as a list of options\\n (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Splice with options': '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"'}}}, 'annotations_update': {'help': 'Update option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be removed and re-annotated.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Update annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'annotations_append': {'help': 'Append option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be annotated only if not annotation exists for the variant.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Append annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'calculations': {'metavar': 'operations', 'help': "Quick calculations on genetic variants information and genotype information,\\nas a list of operations (e.g. 'VARTYPE,variant_id').\\nList of available calculations by default\\n (unsensitive case, see doc for more information):\\n VARTYPE snpeff_hgvs FINDBYPIPELINE GENOTYPECONCORDANCE BARCODE TRIO VAF VAF_STATS DP_STATS \\n", 'default': None, 'type': <class 'str'>}, 'prioritizations': {'metavar': 'prioritisations', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>, 'extra': {'examples': {'Prioritization profile by default': '"prioritization": "default" ', 'Prioritization profile by default and GERMLINE from Configuration JSON file': '"prioritization": "default,GERMLINE" '}}}, 'prioritization_config': {'metavar': 'prioritization config', 'help': 'Prioritization configuration JSON file (defines profiles, see doc).\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'prioritization', 'examples': {'Prioritization configuration JSON file as an option': '"prioritization_config": "prioritization_config.json" '}}}, 'profiles': {'metavar': 'profiles', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>}, 'default_profile': {'metavar': 'default profile', 'help': 'Prioritization profile by default (see doc).\\nDefault is the first profile in the list of prioritization profiles.\\n', 'default': None, 'type': <class 'str'>}, 'pzfields': {'metavar': 'pzfields', 'help': 'Prioritization fields to provide (see doc).\\nAvailable: PZScore, PZFlag, PZTags, PZComment, PZInfos\\n', 'default': 'PZScore,PZFlag', 'type': <class 'str'>}, 'prioritization_score_mode': {'metavar': 'prioritization score mode', 'help': 'Prioritization Score mode (see doc).\\nAvailable: HOWARD (increment score), VaRank (max score)\\n', 'default': 'HOWARD', 'type': <class 'str'>, 'choices': ['HOWARD', 'VaRank'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'query_limit': {'metavar': 'query limit', 'help': 'Limit of number of row for query (only for print result, not output).\\n', 'default': 10, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 10000, 'increment': 10}}}, 'query_print_mode': {'metavar': 'print mode', 'help': "Print mode of query result (only for print result, not output).\\nEither None (native), 'markdown', 'tabulate' or disabled.\\n", 'choices': [None, 'markdown', 'tabulate', 'disabled'], 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'explode_infos': {'help': "Explode VCF INFO/Tag into 'variants' table columns.\\n", 'action': 'store_true', 'default': False}, 'explode_infos_prefix': {'metavar': 'explode infos prefix', 'help': 'Explode VCF INFO/Tag with a specific prefix.\\n', 'default': '', 'type': <class 'str'>}, 'explode_infos_fields': {'metavar': 'explode infos list', 'help': "Explode VCF INFO/Tag specific fields/tags.\\nKeyword `*` specify all available fields, except those already specified.\\nPattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\\nExamples:\\n- 'HGVS,SIFT,Clinvar' (list of fields)\\n- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\\n- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\\n- 'HGVS,.*_score,*' (1 field, scores, all other fields)\\n- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\\n", 'default': '*', 'type': <class 'str'>}, 'include_header': {'help': 'Include header (in VCF format) in output file.\\nOnly for compatible formats (tab-delimiter format as TSV or BED).\\n', 'action': 'store_true', 'default': False}, 'order_by': {'metavar': 'order by', 'help': "List of columns to sort the result-set in ascending or descending order.\\nUse SQL format, and keywords ASC (ascending) and DESC (descending).\\nIf a column is not available, order will not be considered.\\nOrder is enable only for compatible format (e.g. TSV, CSV, JSON).\\nExamples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\\n", 'default': '', 'type': <class 'str'>, 'extra': {'examples': {'Order by ACMG score in descending order': '"order_by": "ACMG_score DESC" ', 'Order by PZFlag and PZScore in descending order': '"order_by": "PZFlag DESC, PZScore DESC" '}}}, 'parquet_partitions': {'metavar': 'parquet partitions', 'help': "Parquet partitioning using hive (available for any format).\\nThis option is faster parallel writing, but memory consuming.\\nUse 'None' (string) for NO partition but split parquet files into a folder.\\nExamples: '#CHROM', '#CHROM,REF', 'None'.\\n", 'default': None, 'type': <class 'str'>}, 'input_annovar': {'metavar': 'input annovar', 'help': "Input Annovar file path.\\nFormat file must be a Annovar TXT file, associated with '.idx'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output_annovar': {'metavar': 'output annovar', 'help': "Output Annovar file path.\\nFormat file must be either VCF compressesd file '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'annovar_code': {'metavar': 'Annovar code', 'help': 'Annovar code, or database name.\\nUsefull to name databases columns.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'annovar_to_parquet': {'metavar': 'to parquet', 'help': 'Parquet file conversion.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.parquet)|*.parquet'}}}, 'annovar_multi_variant': {'metavar': 'Annovar multi variant', 'help': "Variant with multiple annotation lines on Annovar file.\\nEither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'annovar_reduce_memory': {'metavar': 'reduce memory', 'help': "Reduce memory option for Annovar convert,\\neither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'input_extann': {'metavar': 'input extann', 'help': 'Input Extann file path.\\nFormat file must be a Extann TXT file or TSV file.\\nFile need to have at least the genes column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|All files (*)|*'}}}, 'output_extann': {'metavar': 'output extann', 'help': 'Output Extann file path.\\nOutput extann file, should be BED or BED.gz.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'mode_extann': {'metavar': 'mode extann', 'help': 'Mode extann selection.\\nHow to pick transcript from ncbi, keep all,\\nkeep the longest, or keep the chosen one (transcript_extann).\\n', 'required': False, 'default': 'longest', 'choices': ['all', 'longest', 'chosen'], 'type': <class 'str'>}, 'param_extann': {'metavar': 'param extann', 'help': "Param extann file path.\\nParam containing configuration, options to replace chars and\\nbedlike header description, conf vcf specs.\\n(e.g. '~/howard/config/param.extann.json')\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file format|*.tsv|'}}}, 'calculation_config': {'metavar': 'calculation config', 'help': 'Calculation configuration JSON file.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation', 'examples': {'Calculation configuration JSON file as an option': '"calculation_config": "calculation_config.json" '}}}, 'show_calculations': {'help': 'Show available calculation operations.\\n', 'action': 'store_true', 'default': False}, 'hgvs_field': {'metavar': 'HGVS field', 'help': 'HGVS INFO/tag containing a list o HGVS annotations.\\n', 'default': 'hgvs', 'type': <class 'str'>, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'transcripts': {'metavar': 'transcripts', 'help': 'Transcripts TSV file,\\nwith Transcript in first column, optional Gene in second column.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'trio_pedigree': {'metavar': 'trio pedigree', 'help': 'Pedigree Trio for trio inheritance calculation.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3\\' for father, mother and child,\\n \\'{"father": "sample1", "mother": "sample2", "child": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:TRIO'}}, 'family_pedigree': {'metavar': 'family pedigree', 'help': 'Pedigree family for barcode calculation on genotype.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3,sample4\\',\\n \\'{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:BARCODEFAMILY'}}, 'stats_md': {'metavar': 'stats markdown', 'help': 'Stats Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'Markdown file (*.md)|*.md'}}, 'extra': {'examples': {'Export statistics in Markdown format': '"stats_md": "/tmp/stats.md" '}}}, 'stats_json': {'metavar': 'stats json', 'help': 'Stats Output file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}, 'extra': {'examples': {'Export statistics in JSON format': '"stats_json": "/tmp/stats.json" '}}}, 'assembly': {'metavar': 'assembly', 'help': "Genome Assembly (e.g. 'hg19', 'hg38').\\n", 'required': False, 'default': 'hg19', 'type': <class 'str'>, 'extra': {'examples': {'Default assembly for all analysis tools': '"assembly": "hg19" ', 'List of assemblies for databases download tool': '"assembly": "hg19,hg38" '}}}, 'genome': {'metavar': 'genome', 'help': "Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\\n", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current/hg19/hg19.fa', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*'}}}, 'hgvs_options': {'metavar': 'HGVS options', 'help': "Quick HGVS annotation options.\\nThis option will skip all other hgvs options.\\nExamples:\\n- 'default' (for default options)\\n- 'full_format' (for full format HGVS annotation)\\n- 'use_gene=True:add_protein=true:codon_type=FULL'\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'use_gene': {'help': "Use Gene information to generate HGVS annotation\\n(e.g. 'NM_152232(TAS1R2):c.231T>C')", 'action': 'store_true', 'default': False}, 'use_exon': {'help': "Use Exon information to generate HGVS annotation\\n(e.g. 'NM_152232(exon2):c.231T>C').\\nOnly if 'use_gene' is not enabled.\\n", 'action': 'store_true', 'default': False}, 'use_protein': {'help': "Use Protein level to generate HGVS annotation\\n(e.g. 'NP_689418:p.Cys77Arg').\\nCan be used with 'use_exon' or 'use_gene'.\\n", 'action': 'store_true', 'default': False}, 'add_protein': {'help': "Add Protein level to DNA HGVS annotation (e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'full_format': {'help': "Generates HGVS annotation in a full format\\nby using all information to generates an exhaustive annotation\\n(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\\nUse 'use_exon' to add exon information\\n(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'use_version': {'help': "Generates HGVS annotation with transcript version\\n(e.g. 'NM_152232.1:c.231T>C').\\n", 'action': 'store_true', 'default': False}, 'codon_type': {'metavar': 'Codon type', 'help': "Amino Acide Codon format type to use to generate HGVS annotation.\\nAvailable:\\n- '1': codon in 1 character (e.g. 'C', 'R')\\n- '3': codon in 3 character (e.g. 'Cys', 'Arg')\\n-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\\n", 'required': False, 'default': '3', 'type': <class 'str'>, 'choices': ['1', '3', 'FULL'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'refgene': {'metavar': 'refGene', 'help': 'Path to refGene annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGene annotation file'}}}, 'refseqlink': {'metavar': 'refSeqLink', 'help': 'Path to refSeqLink annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGeneLink annotation file'}}}, 'refseq-folder': {'metavar': 'refseq folder', 'help': 'Folder containing refSeq files.\\n', 'required': False, 'default': '/Users/lebechea/howard/databases/refseq/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'message': 'Path to refGenefolder'}}}, 'download-genomes': {'metavar': 'genomes', 'help': "Path to genomes folder\\nwith Fasta files, indexes,\\nand all files generated by pygenome module.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to genomes folder'}}}, 'download-genomes-provider': {'metavar': 'genomes provider', 'help': 'Download Genome from an external provider.\\nAvailable: GENCODE, Ensembl, UCSC, NCBI.\\n', 'required': False, 'default': 'UCSC', 'type': <class 'str'>, 'choices': ['GENCODE', 'Ensembl', 'UCSC', 'NCBI'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'download-genomes-contig-regex': {'metavar': 'genomes contig regex', 'help': "Regular expression to select specific chromosome\\n(e.g 'chr[0-9XYM]+$').\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar': {'metavar': 'Annovar', 'help': "Path to Annovar databases\\n(e.g. '/Users/lebechea/howard/databases/annovar/current').\\n", 'required': False, 'type': <howard.tools.tools.PathType object>, 'default': None, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Annovar databases folder'}}}, 'download-annovar-files': {'metavar': 'Annovar code', 'help': "Download Annovar databases for a list of Annovar file code (see Annovar Doc).\\nUse None to donwload all available files,\\nor Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\\nNote that refGene will at least be downloaded,\\nand only files that not already exist or changed will be downloaded.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar-url': {'metavar': 'Annovar url', 'help': 'Annovar databases URL (see Annovar Doc).\\n', 'required': False, 'default': 'http://www.openbioinformatics.org/annovar/download', 'type': <class 'str'>}, 'download-snpeff': {'metavar': 'snpEff', 'help': 'Download snpEff databases within snpEff folder', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to snpEff databases folder'}}}, 'download-refseq': {'metavar': 'refSeq', 'help': "Path to refSeq databases\\n(e.g. '/Users/lebechea/howard/databases/refseq/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to refGene files folder'}}}, 'download-refseq-url': {'metavar': 'refSeq url', 'help': "refSeq databases URL (see refSeq WebSite)\\n(e.g. 'http://hgdownload.soe.ucsc.edu/goldenPath')\u2022/n", 'required': False, 'default': 'http://hgdownload.soe.ucsc.edu/goldenPath', 'type': <class 'str'>}, 'download-refseq-prefix': {'metavar': 'refSeq prefix', 'help': 'Check existing refSeq files in refSeq folder.\\n', 'required': False, 'default': 'ncbiRefSeq', 'type': <class 'str'>}, 'download-refseq-files': {'metavar': 'refSeq files', 'help': 'List of refSeq files to download.\\n', 'required': False, 'default': 'ncbiRefSeq.txt,ncbiRefSeqLink.txt', 'type': <class 'str'>}, 'download-refseq-format-file': {'metavar': 'refSeq format file', 'help': "Name of refSeq file to convert in BED format\\n(e.g. 'ncbiRefSeq.txt').\\nProcess only if not None.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-refseq-include-utr5': {'help': "Formating BED refSeq file including 5'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-utr3': {'help': "Formating BED refSeq file including 3'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-chrM': {'help': "Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-non-canonical-chr': {'help': 'Formating BED refSeq file including non canonical chromosomes.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-non-coding-transcripts': {'help': 'Formating BED refSeq file including non coding transcripts.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-transcript-version': {'help': 'Formating BED refSeq file including transcript version.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp': {'metavar': 'dbNSFP', 'help': "Download dbNSFP databases within dbNSFP folder(e.g. '/Users/lebechea/howard/databases').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbNSFP databases folder'}}}, 'download-dbnsfp-url': {'metavar': 'dbNSFP url', 'help': "Download dbNSFP databases URL (see dbNSFP website)\\n(e.g. https://dbnsfp.s3.amazonaws.com').\\n", 'required': False, 'default': 'https://dbnsfp.s3.amazonaws.com', 'type': <class 'str'>}, 'download-dbnsfp-release': {'metavar': 'dnNSFP release', 'help': "Release of dbNSFP to download (see dbNSFP website)\\n(e.g. '4.4a').\\n", 'required': False, 'default': '4.4a'}, 'download-dbnsfp-parquet-size': {'metavar': 'dbNSFP parquet size', 'help': 'Maximum size (Mb) of data files in Parquet folder.\\nParquet folder are partitioned (hive) by chromosome (sub-folder),\\nwhich contain N data files.\\n', 'required': False, 'default': 100, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000, 'increment': 10}}}, 'download-dbnsfp-subdatabases': {'help': 'Generate dbNSFP sub-databases.\\ndbNSFP provides multiple databases which are split onto multiple columns.\\nThis option create a Parquet folder for each sub-database (based on columns names).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-parquet': {'help': 'Generate a Parquet file for each Parquet folder.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-vcf': {'help': 'Generate a VCF file for each Parquet folder.\\nNeed genome FASTA file (see --download-genome).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-no-files-all': {'help': "Not generate database Parquet/VCF file for the entire database ('ALL').\\nOnly sub-databases files will be generated.\\n(see '--download-dbnsfp-subdatabases').\\n", 'action': 'store_true', 'default': False}, 'download-dbnsfp-add-info': {'help': 'Add INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-only-info': {'help': 'Add only INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nDecrease memory and space during generation of files.\\nIncrease time for partial annotation (some available columns).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-uniquify': {'help': 'Uniquify values within column\\n(e.g. "D,D" to "D", "D,.,T" to "D,T").\\nRemove transcripts information details.\\nUsefull to reduce size of the database.\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-row-group-size': {'metavar': 'dnNSFP row grooup size', 'help': 'Minimum number of rows in a parquet row group (see duckDB doc).\\nLower can reduce memory usage and slightly increase space during generation,\\nspeed up highly selective queries, slow down whole file queries (e.g. aggregations).\\n', 'required': False, 'default': 100000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}}, 'download-alphamissense': {'metavar': 'AlphaMissense', 'help': 'Path to AlphaMissense databases', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Alphamissense databases folder'}}}, 'download-alphamissense-url': {'metavar': 'AlphaMissense url', 'help': "Download AlphaMissense databases URL (see AlphaMissense website)\\n(e.g. 'https://storage.googleapis.com/dm_alphamissense').\\n", 'required': False, 'default': 'https://storage.googleapis.com/dm_alphamissense', 'type': <class 'str'>}, 'download-exomiser': {'metavar': 'Exomiser', 'help': 'Path to Exomiser databases\\n(e.g. /Users/lebechea/howard/databases/exomiser/current).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Exomiser databases folder'}}}, 'download-exomiser-application-properties': {'metavar': 'Exomiser application properties', 'help': "Exomiser Application Properties configuration file (see Exomiser website).\\nThis file contains configuration settings for the Exomiser tool.\\nIf this parameter is not provided, the function will attempt to locate\\nthe application properties file automatically based on the Exomiser.\\nConfiguration information will be used to download expected releases (if no other parameters).\\nCADD and REMM will be downloaded only if 'path' are provided.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'options': {'default_dir': '/Users/lebechea/howard/databases/exomiser/current', 'message': 'Path to Exomiser application properties file'}}}}, 'download-exomiser-url': {'metavar': 'Exomiser url', 'help': "URL where Exomiser database files can be downloaded from\\n(e.g. 'http://data.monarchinitiative.org/exomiser').\\n", 'required': False, 'default': 'http://data.monarchinitiative.org/exomiser', 'type': <class 'str'>}, 'download-exomiser-release': {'metavar': 'Exomiser release', 'help': 'Release of Exomiser data to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\nIf not provided (None), from Application Properties file (Exomiser data-version) \\nor default \\'2109\\'.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-phenotype-release': {'metavar': 'Exomiser phenoptye release', 'help': 'Release of Exomiser phenotype to download.\\nIf not provided (None), from Application Properties file (Exomiser Phenotype data-version)\\nor Exomiser release.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-release': {'metavar': 'Exomiser remm release', 'help': 'Release of ReMM (Regulatory Mendelian Mutation) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-url': {'metavar': 'Exomiser remm url', 'help': "URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/ReMM').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/ReMM', 'type': <class 'str'>}, 'download-exomiser-cadd-release': {'metavar': 'Exomiser cadd release', 'help': 'Release of CADD (Combined Annotation Dependent Depletion) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-cadd-url': {'metavar': 'Exomiser cadd url', 'help': "URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/CADD').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/CADD', 'type': <class 'str'>}, 'download-exomiser-cadd-url-snv-file': {'metavar': 'Exomiser url snv file', 'help': 'Name of the file containing the SNV (Single Nucleotide Variant) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'whole_genome_SNVs.tsv.gz', 'type': <class 'str'>}, 'download-exomiser-cadd-url-indel-file': {'metavar': 'Exomiser cadd url indel', 'help': 'Name of the file containing the INDEL (Insertion-Deletion) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'InDels.tsv.gz', 'type': <class 'str'>}, 'download-dbsnp': {'metavar': 'dnSNP', 'help': "Path to dbSNP databases\\n(e.g. '/Users/lebechea/howard/databases/exomiser/dbsnp').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbSNP databases folder'}}}, 'download-dbsnp-releases': {'metavar': 'dnSNP releases', 'help': "Release of dbSNP to download\\n(e.g. 'b152', 'b152,b156').\\n", 'required': False, 'default': 'b156', 'type': <class 'str'>}, 'download-dbsnp-release-default': {'metavar': 'dnSNP release default', 'help': "Default Release of dbSNP ('default' symlink)\\n(e.g. 'b156').\\nIf None, first release to download will be assigned as default\\nonly if it does not exists.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url': {'metavar': 'dbSNP url', 'help': "URL where dbSNP database files can be downloaded from.\\n(e.g. 'https://ftp.ncbi.nih.gov/snp/archive').\\n", 'required': False, 'default': 'https://ftp.ncbi.nih.gov/snp/archive', 'type': <class 'str'>}, 'download-dbsnp-url-files': {'metavar': 'dbSNP url files', 'help': 'Dictionary that maps assembly names to specific dbSNP URL files.\\nIt allows you to provide custom dbSNP URL files for specific assemblies\\ninstead of using the default file naming convention.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url-files-prefix': {'metavar': 'dbSNP url files prefix', 'help': 'String that represents the prefix of the dbSNP file name for a specific assembly.\\nIt is used to construct the full URL of the dbSNP file to be downloaded.\\n', 'required': False, 'default': 'GCF_000001405', 'type': <class 'str'>}, 'download-dbsnp-assemblies-map': {'metavar': 'dbSNP assemblies map', 'help': 'dictionary that maps assembly names to their corresponding dbSNP versions.\\nIt is used to construct the dbSNP file name based on the assembly name.\\n', 'required': False, 'default': {'hg19': '25', 'hg38': '40'}, 'type': <class 'str'>, 'gooey': {'options': {'initial_value': '{"hg19": "25", "hg38": "40"}'}}}, 'download-dbsnp-vcf': {'help': 'Generate well-formatted VCF from downloaded file:\\n- Add and filter contigs associated to assembly\\n- Normalize by splitting multiallelics\\n- Need genome (see --download-genome)\\n', 'action': 'store_true', 'default': False}, 'download-dbsnp-parquet': {'help': 'Generate Parquet file from VCF.\\n', 'action': 'store_true', 'default': False}, 'convert-hgmd': {'metavar': 'HGMD', 'help': 'Convert HGMD databases.\\nFolder where the HGMD databases will be stored.\\nFields in VCF, Parquet and TSV will be generated.\\nIf the folder does not exist, it will be created.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}}, 'convert-hgmd-file': {'metavar': 'HGMD file', 'help': "File from HGMD.\\nName format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser'}}, 'convert-hgmd-basename': {'metavar': 'HGMD basename', 'help': "File output basename.\\nGenerated files will be prefixed by basename\\n(e.g. 'HGMD_Pro_MY_RELEASE')\\nBy default (None), input file name without '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'generate-param': {'metavar': 'param', 'help': 'Parameter file (JSON) with all databases found.\\nDatabases folders scanned are defined in config file.\\nStructure of databases follow this structure (see doc):\\n.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-description': {'metavar': 'param description', 'help': 'Description file (JSON) with all databases found.\\nContains all databases with description of format, assembly, fields...\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-releases': {'metavar': 'param release', 'help': "List of database folder releases to check\\n(e.g. 'current', 'latest').\\n", 'required': False, 'default': 'current', 'type': <class 'str'>}, 'generate-param-formats': {'metavar': 'param formats', 'help': "List of database formats to check\\n(e.g. 'parquet', 'parquet,vcf,bed,tsv').\\n", 'required': False, 'default': 'parquet', 'type': <class 'str'>}, 'generate-param-bcftools': {'help': "Generate parameter JSON file with BCFTools annotation for allowed formats\\n(i.e. 'vcf', 'bed').\\n", 'action': 'store_true', 'default': False}, 'help_md': {'metavar': 'help markdown', 'help': 'Help Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.md)|*.md'}}}, 'help_html': {'metavar': 'help html', 'help': 'Help Output file in HTML format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.html)|*.html'}}}, 'help_pdf': {'metavar': 'help pdf', 'help': 'Help Output file in PDF format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'PDF file (*.pdf)|*.pdf'}}}, 'help_json_input': {'metavar': 'help JSON input', 'help': 'Help input file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'help_md_input': {'metavar': 'help MarkDown input', 'help': 'Help input file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'MarkDown file (*.md)|*.md|All files (*)|*'}}}, 'code_type': {'metavar': 'example code type', 'help': "Help example code type for input JSON format\\n(e.g. 'json', 'bash').\\n", 'required': False, 'default': '', 'type': <class 'str'>}, 'help_json_input_title': {'metavar': 'help JSON input title', 'help': 'Help JSON input title.\\n', 'required': False, 'default': 'Help', 'type': <class 'str'>}, 'genomes-folder': {'metavar': 'genomes', 'help': "Folder containing genomes.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current'", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/genomes/current', 'message': 'Path to genomes databases folder'}}}, 'config': {'metavar': 'config', 'help': 'Configuration JSON file defined default configuration regarding \\nresources (e.g. threads, memory),\\nsettings (e.g. verbosity, temporary files),\\ndefault folders (e.g. for databases)\\nand paths to external tools.\\n', 'required': False, 'default': '{}', 'type': <class 'str'>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '{}'}}}, 'threads': {'metavar': 'threads', 'help': 'Specify the number of threads to use for processing HOWARD.\\nIt determines the level of parallelism,\\neither on python scripts, duckdb engine and external tools.\\nIt and can help speed up the process/tool.\\nUse -1 to use all available CPU/cores.\\nEither non valid value is 1 CPU/core.\\n', 'required': False, 'type': <class 'int'>, 'default': -1, 'gooey': {'widget': 'IntegerField', 'options': {'min': -1, 'max': 1000, 'increment': 1}}, 'extra': {'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'memory': {'metavar': 'memory', 'help': "Specify the memory to use in format FLOAT[kMG]\\n(e.g. '8G', '12.42G', '1024M').\\nIt determines the amount of memory for duckDB engine and external tools\\n(especially for JAR programs).\\nIt can help to prevent 'out of memory' failures.\\nBy default (None) is 80%% of RAM (for duckDB).\\n", 'required': False, 'type': <class 'str'>, 'default': None, 'extra': {'format': 'FLOAT[kMG]', 'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'chunk_size': {'metavar': 'chunk size', 'help': 'Number of records in batch to export output file.\\nThe lower the chunk size, the less memory consumption.\\nFor Parquet partitioning, files size will depend on the chunk size.\\n', 'required': False, 'default': 1000000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}, 'extra': {'examples': {'Chunk size of 1.000.000 by default': '"chunk_size": 1000000', 'Smaller chunk size to reduce Parquet file size and memory usage': '"chunk_size": 100000'}}}, 'tmp': {'metavar': 'Temporary folder', 'help': "Temporary folder (e.g. '/tmp').\\nBy default, '.tmp' for duckDB (see doc),external tools and python scripts.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}, 'extra': {'examples': {'# System temporary folder': '"tmp": "/tmp"', '# HOWARD work directory': '"tmp": "~/howard/tmp"', '# Current work directory': '"tmp": ".tmp"'}}}, 'duckdb_settings': {'metavar': 'duckDB settings', 'help': 'DuckDB settings (see duckDB doc) as JSON (string or file).\\nThese settings have priority (see options \\'threads\\', \\'tmp\\'...).\\nExamples: \\'{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}\\'.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'examples': {'DuckDB settings JSON file': '"duckdb_settings": "/path/to/duckdb_config.json"', 'JSON string for Time zone, temporary directory and threads for duckDB': '"duckdb_settings": {\\n "TimeZone": "GMT",\\n "temp_directory": "/tmp/duckdb",\\n "threads": 8\\n}'}}}, 'verbosity': {'metavar': 'verbosity', 'help': 'Verbosity level\\nAvailable: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\\n- DEBUG: Detailed information, typically of interest only when diagnosing problems.\\n- INFO: Confirmation that things are working as expected.\\n- WARNING: An indication that something unexpected happened.\\n- ERROR: Due to a more serious problem.\\n- CRITICAL: A serious error.\\n- FATAL: A fatal error.\\n- NOTSET: All messages.\\n', 'required': False, 'choices': ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET', 'WARN', 'FATAL'], 'default': 'INFO', 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Default verbosity': '"verbosity": "INFO"', 'ERROR level (quiet mode)': '"verbosity": "ERROR"', 'For debug': '"verbosity": "DEBUG"'}}}, 'access': {'metavar': 'access mode', 'help': "Access mode to variants file or database.\\nEither 'RW' for Read and Write, or 'RO' for Read Only.\\n", 'default': 'RW', 'type': <class 'str'>, 'choices': ['RW', 'RO'], 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Read and Write mode': '"access": "RW"', 'Read only mode': '"access": "RO"'}}}, 'log': {'metavar': 'log', 'help': "Logs file\\n(e.g. 'my.log').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}, 'extra': {'examples': {'Relative path to log file': '"log": "my.log"', '# HOWARD work directory': '"log": "~/howard/log"', 'Full path to log file': '"log": "/tmp/my.log"'}}}, 'interactive': {'help': 'Interative mose..\\n', 'action': 'store_true', 'default': False}, 'quiet': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'verbose': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'debug': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'databases_folder': {'help': 'Path of HOWARD database folder.\\n', 'type': <class 'str'>, 'default': '/Users/lebechea/howard/databases'}, 'database': {'help': 'Which database to update.\\n', 'type': <class 'str'>, 'default': 'clinvar', 'choices': ['clinvar']}, 'update_config': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>}, 'current_folder': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>, 'default': 'current'}, 'add_variants_view': {'help': 'Create a sheet with all INFO fields exploded.\\n', 'action': 'store_true', 'default': False}, 'add_header': {'help': 'Create a sheet with all INFO fields header descritions.\\n', 'action': 'store_true', 'default': False}, 'transcripts_expected': {'metavar': 'List of transcripts (file)', 'help': 'File with a list of transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'transcripts_missing': {'metavar': 'List of missing transcripts (file)', 'help': 'File with a list of missing transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'genebe_use_refseq': {'help': 'Use refSeq to annotate (default).\\n', 'action': 'store_true', 'default': False}, 'genebe_use_ensembl': {'help': 'Use Ensembl to annotate.\\n', 'action': 'store_true', 'default': False}, 'not_flatten_consequences': {'help': 'Use exploded annotation informations.\\n', 'action': 'store_true', 'default': False}, 'minimalize_info': {'help': "Minimalize INFO field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_id': {'help': "Minimalize ID field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_qual': {'help': "Minimalize QUAL field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_filter': {'help': "Minimalize FILTER field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_samples': {'help': "Minimalize samples to keep only genotypes (i.e. 'GT').\\n", 'action': 'store_true', 'default': False}, 'remove_samples': {'help': 'Remove all samples to keep only variants.\\n', 'action': 'store_true', 'default': False}}"}, {"fullname": "howard.tools.tools.shared_arguments", "modulename": "howard.tools.tools", "qualname": "shared_arguments", "kind": "variable", "doc": "

    \n", "default_value": "['config', 'threads', 'memory', 'chunk_size', 'tmp', 'duckdb_settings', 'interactive', 'verbosity', 'log', 'quiet', 'verbose', 'debug']"}, {"fullname": "howard.tools.tools.commands_arguments", "modulename": "howard.tools.tools", "qualname": "commands_arguments", "kind": "variable", "doc": "

    \n", "default_value": "{'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \\'A\\' AND POS < 100000" \\n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\\' \\n howard query --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, \\\\"INFO/Interpro_domain\\\\" FROM \\'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\\' WHERE \\\\"INFO/Interpro_domain\\\\" NOT NULL ORDER BY \\\\"INFO/SiPhy_29way_logOdds_rankscore\\\\" DESC LIMIT 10" \\n howard query --explode_infos --explode_infos_prefix=\\'INFO/\\' --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, STRING_AGG(INFO, \\';\\') AS INFO FROM \\'tests/databases/annotations/current/hg19/*.parquet\\' GROUP BY \\\\"#CHROM\\\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \\n howard query --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \\'pathogenic\\'" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \\'./.\\'" --samples="sample2" \\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'sort': {'function': 'sort', 'description': "Sort genetic variations from contig order. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Sort genetic variations file from contig order.', 'epilog': 'Usage examples:\\n howard sort --input=tests/data/example.vcf.gz --output=/tmp/example.sorted.vcf.gz \\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\\n howard stats --input=tests/data/example.vcf.gz \\n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \\n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\\'CLNSIG,SIFT,DP\\' --order_by=\\'CLNSIG DESC, DP DESC\\' \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\\'INFO/\\' --explode_infos_fields=\\'CLNSIG,SIFT,DP,*\\' --order_by=\\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\\' --include_header \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\\n', 'epilog': 'Usage examples:\\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\\n', 'epilog': "Usage examples:\\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \\n howard calculation --show_calculations \\n \\n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\\n- annotates genetic variants with multiple annotation databases/files and tools\\n- calculates and normalizes annotations\\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\\n- translates into various formats\\n- query genetic variants and annotations\\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \\n howard process --input=tests/data/example.vcf.gz --annotations=\\'snpeff\\' --calculations=\\'snpeff_hgvs\\' --prioritizations=\\'default\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --hgvs_options=\\'full_format,use_exon\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\\'full_format,use_exon\\' --annotations=\\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\\' --calculations=\\'NOMEN\\' --explode_infos --query=\\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \\'gnomad_AF\\', ClinPred_score, ClinPred_pred FROM variants\\' \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \\n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \\n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \\n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \\n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \\n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \\n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \\n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \\n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \\n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \\n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \\n\\nNotes:\\n - Downloading databases can take a while, depending on network, threads and memory\\n - Proxy: Beware of network and proxy configuration\\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\\n \\n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \\n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \\n \\n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \\n \\n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\\n \\n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}"}, {"fullname": "howard.tools.tools.arguments_dict", "modulename": "howard.tools.tools", "qualname": "arguments_dict", "kind": "variable", "doc": "

    \n", "default_value": "{'arguments': {'input': {'metavar': 'input', 'help': 'Input file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output': {'metavar': 'output', 'help': 'Output file path.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\nFiles can be compressesd (e.g. vcf.gz, tsv.gz).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'param': {'metavar': 'param', 'help': 'Parameters JSON file (or string) defines parameters to process \\nannotations, calculations, prioritizations, convertions and queries.\\n', 'default': '{}', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '', 'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'query': {'metavar': 'query', 'help': "Query in SQL format\\n(e.g. 'SELECT * FROM variants LIMIT 50').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': 'SELECT * FROM variants'}}, 'extra': {'param_section': 'query'}}, 'filter': {'metavar': 'filter', 'help': "Filter variant using SQL format\\n(e.g. 'POS < 100000').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'samples': {'metavar': 'samples', 'help': "List of samples\\n(e.g. 'sample1,sample2').\\n", 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Textarea', 'options': {'initial_value': ''}}}, 'output_query': {'metavar': 'output', 'help': 'Output Query file.\\nFormat file must be either VCF, Parquet, TSV, CSV, PSV or duckDB.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'All files (*)|*'}}}, 'annotations': {'metavar': 'annotations', 'help': "Annotation with databases files, or with tools,\\nas a list of files in Parquet, VCF, BED, or keywords\\n (e.g. 'file.parquet,bcftools:file2.vcf.gz,annovar:refGene,snpeff').\\n- For a Parquet/VCF/BED, use file paths\\n (e.g. 'file1.parquet,file2.vcf.gz').\\n- For BCFTools annotation, use keyword 'bcftools' with file paths\\n (e.g. 'bcftools:file.vcf.gz:file.bed.gz').\\n- For Parquet annotation, use keyword 'parquet' with file paths\\n (e.g. 'parquet:file.parquet').\\n- For Annovar annotation, use keyword 'annovar' with annovar code\\n (e.g. 'annovar:refGene', 'annovar:refGene:cosmic70').\\n- For snpeff annotation, use keyword 'snpeff' with options\\n (e.g. 'snpeff', 'snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3').\\n- For snpSift annotation, use keyword 'snpsift' with file paths\\n (e.g. 'snpsift:file.vcf.gz:file.bed.gz').\\n- For Exomiser annotation, use keyword 'exomiser' with options as key=value\\n (e.g. 'exomiser:preset=exome:transcript_source=refseq').\\n- For add all availalbe databases files, use 'ALL' keyword,\\n with filters on format (e.g. 'parquet', 'vcf') and release (e.g. 'current', 'devel')\\n (e.g. 'ALL', ALL:format=parquet', 'ALL:format=parquet:release=current', 'ALL:format=parquet+vcf:release=current+devel').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'DB[,DB]*[,bcftools:DB[:DB]*][,annovar:KEY[:KEY]*][,snpeff][,exomiser[:var=val]*]', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotations": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotations": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotations": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotations": "ALL:parquet:latest"', 'Annotation with BCFTools': '"annotations": "bcftools:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotations": "annovar:refGene:cosmic70"', 'Annotation with snpEff (default options)': '"annotations": "snpeff"', 'Annotation with snpEff (with options)': '"annotations": "snpeff:-hgvs -noShiftHgvs -spliceSiteSize 3"', 'Annotation with snpSift': '"annotations": "snpsift:/path/to/database2.vcf.gz:/path/to/database2.bed.gz"', 'Annotation with Exomiser with options': '"annotations": "exomiser:preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"', 'Multiple tools annotations (Parquet method, BCFTools, Annovar, snpEff and Exomiser)': '"annotations": "/path/to/database1.parquet,bcftools:/path/to/database2.vcf.gz,annovar:refGene:cosmic70,snpeff,exomiser:preset=exome:transcript_source=refseq"'}}}, 'annotation_parquet': {'metavar': 'annotation parquet', 'help': "Annotation with Parquet method, as a list of files in Parquet, VCF or BED\\n (e.g. 'file1.parquet,file2.vcf.gz').\\nFor add all availalbe databases files, use 'ALL' keyword,\\n with filters on type and release\\n (e.g. 'ALL', 'ALL:parquet:current', 'ALL:parquet,vcf:current,devel').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Parquet method annotation with 2 Parquet files': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.parquet"', 'Parquet method annotation with multiple file formats': '"annotation_parquet": "/path/to/database1.parquet,/path/to/database2.vcf.gz,/path/to/database2.bed.gz"', 'Parquet method annotation with available Parquet databases in current release (check databases in production)': '"annotation_parquet": "ALL:parquet:current"', 'Parquet method annotation with available Parquet databases in latest release (check databases before production)': '"annotation_parquet": "ALL:parquet:latest"'}}}, 'annotation_bcftools': {'metavar': 'annotation BCFTools', 'help': "Annotation with BCFTools, as a list of files VCF or BED\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with BCFTools': '"annotation_bcftools": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_snpeff': {'metavar': 'annotation snpEff', 'help': "Annotation with snpEff, with options\\n (e.g. '', '-hgvs -noShiftHgvs -spliceSiteSize 3').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'options', 'examples': {'Annotation with snpEff (default options)': '"annotation_snpeff": ""', 'Annotation with snpEff (with options)': '"annotation_snpeff": "-hgvs -noShiftHgvs -spliceSiteSize 3"'}}}, 'annotation_snpsift': {'metavar': 'annotation snpSift', 'help': "Annotation with snpSift, as a list of files VCF\\n (e.g. 'file.vcf.gz,file.bed.gz').\\n", 'default': None, 'type': <class 'str'>, 'nargs': '+', 'gooey': {'widget': 'MultiFileChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/annotations/current', 'message': 'Database files'}}, 'extra': {'format': 'DB[,DB]*', 'examples': {'Annovation with snpSift': '"annotation_snpsift": "/path/to/database2.vcf.gz,/path/to/database2.bed.gz"'}}}, 'annotation_annovar': {'metavar': 'annotation Annovar', 'help': "Annotation with Annovar, as a list of database keywords\\n (e.g. 'refGene', 'refGene:cosmic70').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'keyword[:keyword]*', 'examples': {'Annotation with Annovar (refGene with hgvs and Cosmic)': '"annotation_annovar": "refGene:cosmic70"'}}}, 'annotation_exomiser': {'metavar': 'annotation Exomiser', 'help': "Annotation with Exomiser, as a list of options\\n (e.g. 'preset=exome:transcript_source=refseq').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Exomiser with options': '"annotation_exomiser": "preset=exome:hpo=0001156+0001363+0011304+0010055:transcript_source=refseq:release=2109"'}}}, 'annotation_splice': {'metavar': 'annotation Splice', 'help': "Annotation with Splice, as a list of options\\n (e.g. 'split_mode=one:spliceai_distance=500:spliceai_mask=1').\\n", 'default': None, 'type': <class 'str'>, 'extra': {'format': 'option=value[:option=value]', 'examples': {'Annotation with Splice with options': '"annotation_splice": "split_mode=one:spliceai_distance=500:spliceai_mask=1"'}}}, 'annotations_update': {'help': 'Update option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be removed and re-annotated.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Update annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'annotations_append': {'help': 'Append option for annotation (Only for Parquet annotation).\\nIf True, annotation fields will be annotated only if not annotation exists for the variant.\\nThese options will be applied to all annotation databases.\\n', 'action': 'store_true', 'default': False, 'gooey': {'widget': 'BlockCheckbox', 'options': {'checkbox_label': 'Append annotation method'}}, 'extra': {'param_section': 'annotation:options'}}, 'calculations': {'metavar': 'operations', 'help': "Quick calculations on genetic variants information and genotype information,\\nas a list of operations (e.g. 'VARTYPE,variant_id').\\nList of available calculations by default\\n (unsensitive case, see doc for more information):\\n VARTYPE snpeff_hgvs FINDBYPIPELINE GENOTYPECONCORDANCE BARCODE TRIO VAF VAF_STATS DP_STATS \\n", 'default': None, 'type': <class 'str'>}, 'prioritizations': {'metavar': 'prioritisations', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>, 'extra': {'examples': {'Prioritization profile by default': '"prioritization": "default" ', 'Prioritization profile by default and GERMLINE from Configuration JSON file': '"prioritization": "default,GERMLINE" '}}}, 'prioritization_config': {'metavar': 'prioritization config', 'help': 'Prioritization configuration JSON file (defines profiles, see doc).\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'prioritization', 'examples': {'Prioritization configuration JSON file as an option': '"prioritization_config": "prioritization_config.json" '}}}, 'profiles': {'metavar': 'profiles', 'help': "List of prioritization profiles to process (based on Prioritization JSON file),\\nsuch as 'default', 'rare variants', 'low allele frequency', 'GERMLINE'.\\nBy default, all profiles available will be processed.\\n", 'default': None, 'type': <class 'str'>}, 'default_profile': {'metavar': 'default profile', 'help': 'Prioritization profile by default (see doc).\\nDefault is the first profile in the list of prioritization profiles.\\n', 'default': None, 'type': <class 'str'>}, 'pzfields': {'metavar': 'pzfields', 'help': 'Prioritization fields to provide (see doc).\\nAvailable: PZScore, PZFlag, PZTags, PZComment, PZInfos\\n', 'default': 'PZScore,PZFlag', 'type': <class 'str'>}, 'prioritization_score_mode': {'metavar': 'prioritization score mode', 'help': 'Prioritization Score mode (see doc).\\nAvailable: HOWARD (increment score), VaRank (max score)\\n', 'default': 'HOWARD', 'type': <class 'str'>, 'choices': ['HOWARD', 'VaRank'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'query_limit': {'metavar': 'query limit', 'help': 'Limit of number of row for query (only for print result, not output).\\n', 'default': 10, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 10000, 'increment': 10}}}, 'query_print_mode': {'metavar': 'print mode', 'help': "Print mode of query result (only for print result, not output).\\nEither None (native), 'markdown', 'tabulate' or disabled.\\n", 'choices': [None, 'markdown', 'tabulate', 'disabled'], 'default': None, 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'explode_infos': {'help': "Explode VCF INFO/Tag into 'variants' table columns.\\n", 'action': 'store_true', 'default': False}, 'explode_infos_prefix': {'metavar': 'explode infos prefix', 'help': 'Explode VCF INFO/Tag with a specific prefix.\\n', 'default': '', 'type': <class 'str'>}, 'explode_infos_fields': {'metavar': 'explode infos list', 'help': "Explode VCF INFO/Tag specific fields/tags.\\nKeyword `*` specify all available fields, except those already specified.\\nPattern (regex) can be used, such as `.*_score` for fields named with '_score' at the end.\\nExamples:\\n- 'HGVS,SIFT,Clinvar' (list of fields)\\n- 'HGVS,*,Clinvar' (list of fields with all other fields at the end)\\n- 'HGVS,.*_score,Clinvar' (list of 2 fields with all scores in the middle)\\n- 'HGVS,.*_score,*' (1 field, scores, all other fields)\\n- 'HGVS,*,.*_score' (1 field, all other fields, all scores)\\n", 'default': '*', 'type': <class 'str'>}, 'include_header': {'help': 'Include header (in VCF format) in output file.\\nOnly for compatible formats (tab-delimiter format as TSV or BED).\\n', 'action': 'store_true', 'default': False}, 'order_by': {'metavar': 'order by', 'help': "List of columns to sort the result-set in ascending or descending order.\\nUse SQL format, and keywords ASC (ascending) and DESC (descending).\\nIf a column is not available, order will not be considered.\\nOrder is enable only for compatible format (e.g. TSV, CSV, JSON).\\nExamples: 'ACMG_score DESC', 'PZFlag DESC, PZScore DESC'.\\n", 'default': '', 'type': <class 'str'>, 'extra': {'examples': {'Order by ACMG score in descending order': '"order_by": "ACMG_score DESC" ', 'Order by PZFlag and PZScore in descending order': '"order_by": "PZFlag DESC, PZScore DESC" '}}}, 'parquet_partitions': {'metavar': 'parquet partitions', 'help': "Parquet partitioning using hive (available for any format).\\nThis option is faster parallel writing, but memory consuming.\\nUse 'None' (string) for NO partition but split parquet files into a folder.\\nExamples: '#CHROM', '#CHROM,REF', 'None'.\\n", 'default': None, 'type': <class 'str'>}, 'input_annovar': {'metavar': 'input annovar', 'help': "Input Annovar file path.\\nFormat file must be a Annovar TXT file, associated with '.idx'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'Parquet file (*.parquet)|*.parquet|All files (*)|*'}}}, 'output_annovar': {'metavar': 'output annovar', 'help': "Output Annovar file path.\\nFormat file must be either VCF compressesd file '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'annovar_code': {'metavar': 'Annovar code', 'help': 'Annovar code, or database name.\\nUsefull to name databases columns.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'annovar_to_parquet': {'metavar': 'to parquet', 'help': 'Parquet file conversion.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.parquet)|*.parquet'}}}, 'annovar_multi_variant': {'metavar': 'Annovar multi variant', 'help': "Variant with multiple annotation lines on Annovar file.\\nEither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'annovar_reduce_memory': {'metavar': 'reduce memory', 'help': "Reduce memory option for Annovar convert,\\neither 'auto' (auto-detection), 'enable' or 'disable'.\\n", 'default': 'auto', 'type': <class 'str'>, 'choices': ['auto', 'enable', 'disable'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'input_extann': {'metavar': 'input extann', 'help': 'Input Extann file path.\\nFormat file must be a Extann TXT file or TSV file.\\nFile need to have at least the genes column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'VCF, Parquet, TSV, CSV, PSV or duckDB|*.*|All files (*)|*'}}}, 'output_extann': {'metavar': 'output extann', 'help': 'Output Extann file path.\\nOutput extann file, should be BED or BED.gz.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}}, 'mode_extann': {'metavar': 'mode extann', 'help': 'Mode extann selection.\\nHow to pick transcript from ncbi, keep all,\\nkeep the longest, or keep the chosen one (transcript_extann).\\n', 'required': False, 'default': 'longest', 'choices': ['all', 'longest', 'chosen'], 'type': <class 'str'>}, 'param_extann': {'metavar': 'param extann', 'help': "Param extann file path.\\nParam containing configuration, options to replace chars and\\nbedlike header description, conf vcf specs.\\n(e.g. '~/howard/config/param.extann.json')\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file format|*.tsv|'}}}, 'calculation_config': {'metavar': 'calculation config', 'help': 'Calculation configuration JSON file.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation', 'examples': {'Calculation configuration JSON file as an option': '"calculation_config": "calculation_config.json" '}}}, 'show_calculations': {'help': 'Show available calculation operations.\\n', 'action': 'store_true', 'default': False}, 'hgvs_field': {'metavar': 'HGVS field', 'help': 'HGVS INFO/tag containing a list o HGVS annotations.\\n', 'default': 'hgvs', 'type': <class 'str'>, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'transcripts': {'metavar': 'transcripts', 'help': 'Transcripts TSV file,\\nwith Transcript in first column, optional Gene in second column.\\n', 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:NOMEN:options'}}, 'trio_pedigree': {'metavar': 'trio pedigree', 'help': 'Pedigree Trio for trio inheritance calculation.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3\\' for father, mother and child,\\n \\'{"father": "sample1", "mother": "sample2", "child": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:TRIO'}}, 'family_pedigree': {'metavar': 'family pedigree', 'help': 'Pedigree family for barcode calculation on genotype.\\nEither a JSON file or JSON string or a list of samples\\n(e.g. \\'sample1,sample2,sample3,sample4\\',\\n \\'{"father": "sample1", "mother": "sample2", "child1": "sample3", "child2": "sample3"}\\').\\n', 'default': None, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'param_section': 'calculation:calculations:BARCODEFAMILY'}}, 'stats_md': {'metavar': 'stats markdown', 'help': 'Stats Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'Markdown file (*.md)|*.md'}}, 'extra': {'examples': {'Export statistics in Markdown format': '"stats_md": "/tmp/stats.md" '}}}, 'stats_json': {'metavar': 'stats json', 'help': 'Stats Output file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}, 'extra': {'examples': {'Export statistics in JSON format': '"stats_json": "/tmp/stats.json" '}}}, 'assembly': {'metavar': 'assembly', 'help': "Genome Assembly (e.g. 'hg19', 'hg38').\\n", 'required': False, 'default': 'hg19', 'type': <class 'str'>, 'extra': {'examples': {'Default assembly for all analysis tools': '"assembly": "hg19" ', 'List of assemblies for databases download tool': '"assembly": "hg19,hg38" '}}}, 'genome': {'metavar': 'genome', 'help': "Genome file in fasta format (e.g. 'hg19.fa', 'hg38.fa').\\n", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current/hg19/hg19.fa', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*'}}}, 'hgvs_options': {'metavar': 'HGVS options', 'help': "Quick HGVS annotation options.\\nThis option will skip all other hgvs options.\\nExamples:\\n- 'default' (for default options)\\n- 'full_format' (for full format HGVS annotation)\\n- 'use_gene=True:add_protein=true:codon_type=FULL'\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'use_gene': {'help': "Use Gene information to generate HGVS annotation\\n(e.g. 'NM_152232(TAS1R2):c.231T>C')", 'action': 'store_true', 'default': False}, 'use_exon': {'help': "Use Exon information to generate HGVS annotation\\n(e.g. 'NM_152232(exon2):c.231T>C').\\nOnly if 'use_gene' is not enabled.\\n", 'action': 'store_true', 'default': False}, 'use_protein': {'help': "Use Protein level to generate HGVS annotation\\n(e.g. 'NP_689418:p.Cys77Arg').\\nCan be used with 'use_exon' or 'use_gene'.\\n", 'action': 'store_true', 'default': False}, 'add_protein': {'help': "Add Protein level to DNA HGVS annotation (e.g 'NM_152232:c.231T>C,NP_689418:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'full_format': {'help': "Generates HGVS annotation in a full format\\nby using all information to generates an exhaustive annotation\\n(non-standard, e.g. 'TAS1R2:NM_152232:NP_689418:c.231T>C:p.Cys77Arg').\\nUse 'use_exon' to add exon information\\n(e.g 'TAS1R2:NM_152232:NP_689418:exon2:c.231T>C:p.Cys77Arg').\\n", 'action': 'store_true', 'default': False}, 'use_version': {'help': "Generates HGVS annotation with transcript version\\n(e.g. 'NM_152232.1:c.231T>C').\\n", 'action': 'store_true', 'default': False}, 'codon_type': {'metavar': 'Codon type', 'help': "Amino Acide Codon format type to use to generate HGVS annotation.\\nAvailable:\\n- '1': codon in 1 character (e.g. 'C', 'R')\\n- '3': codon in 3 character (e.g. 'Cys', 'Arg')\\n-'FULL': codon in full name (e.g. 'Cysteine', 'Arginine')\\n", 'required': False, 'default': '3', 'type': <class 'str'>, 'choices': ['1', '3', 'FULL'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'refgene': {'metavar': 'refGene', 'help': 'Path to refGene annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGene annotation file'}}}, 'refseqlink': {'metavar': 'refSeqLink', 'help': 'Path to refSeqLink annotation file.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'default_file': 'ncbiRefSeq.txt', 'message': 'Path to refGeneLink annotation file'}}}, 'refseq-folder': {'metavar': 'refseq folder', 'help': 'Folder containing refSeq files.\\n', 'required': False, 'default': '/Users/lebechea/howard/databases/refseq/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/refseq/current', 'message': 'Path to refGenefolder'}}}, 'download-genomes': {'metavar': 'genomes', 'help': "Path to genomes folder\\nwith Fasta files, indexes,\\nand all files generated by pygenome module.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to genomes folder'}}}, 'download-genomes-provider': {'metavar': 'genomes provider', 'help': 'Download Genome from an external provider.\\nAvailable: GENCODE, Ensembl, UCSC, NCBI.\\n', 'required': False, 'default': 'UCSC', 'type': <class 'str'>, 'choices': ['GENCODE', 'Ensembl', 'UCSC', 'NCBI'], 'gooey': {'widget': 'Dropdown', 'options': {}}}, 'download-genomes-contig-regex': {'metavar': 'genomes contig regex', 'help': "Regular expression to select specific chromosome\\n(e.g 'chr[0-9XYM]+$').\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar': {'metavar': 'Annovar', 'help': "Path to Annovar databases\\n(e.g. '/Users/lebechea/howard/databases/annovar/current').\\n", 'required': False, 'type': <howard.tools.tools.PathType object>, 'default': None, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Annovar databases folder'}}}, 'download-annovar-files': {'metavar': 'Annovar code', 'help': "Download Annovar databases for a list of Annovar file code (see Annovar Doc).\\nUse None to donwload all available files,\\nor Annovar keyword (e.g. 'refGene', 'cosmic70', 'clinvar_202*').\\nNote that refGene will at least be downloaded,\\nand only files that not already exist or changed will be downloaded.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-annovar-url': {'metavar': 'Annovar url', 'help': 'Annovar databases URL (see Annovar Doc).\\n', 'required': False, 'default': 'http://www.openbioinformatics.org/annovar/download', 'type': <class 'str'>}, 'download-snpeff': {'metavar': 'snpEff', 'help': 'Download snpEff databases within snpEff folder', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to snpEff databases folder'}}}, 'download-refseq': {'metavar': 'refSeq', 'help': "Path to refSeq databases\\n(e.g. '/Users/lebechea/howard/databases/refseq/current').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to refGene files folder'}}}, 'download-refseq-url': {'metavar': 'refSeq url', 'help': "refSeq databases URL (see refSeq WebSite)\\n(e.g. 'http://hgdownload.soe.ucsc.edu/goldenPath')\u2022/n", 'required': False, 'default': 'http://hgdownload.soe.ucsc.edu/goldenPath', 'type': <class 'str'>}, 'download-refseq-prefix': {'metavar': 'refSeq prefix', 'help': 'Check existing refSeq files in refSeq folder.\\n', 'required': False, 'default': 'ncbiRefSeq', 'type': <class 'str'>}, 'download-refseq-files': {'metavar': 'refSeq files', 'help': 'List of refSeq files to download.\\n', 'required': False, 'default': 'ncbiRefSeq.txt,ncbiRefSeqLink.txt', 'type': <class 'str'>}, 'download-refseq-format-file': {'metavar': 'refSeq format file', 'help': "Name of refSeq file to convert in BED format\\n(e.g. 'ncbiRefSeq.txt').\\nProcess only if not None.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-refseq-include-utr5': {'help': "Formating BED refSeq file including 5'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-utr3': {'help': "Formating BED refSeq file including 3'UTR.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-chrM': {'help': "Formating BED refSeq file including Mitochondiral chromosome 'chrM' or 'chrMT'.\\n", 'action': 'store_true', 'default': False}, 'download-refseq-include-non-canonical-chr': {'help': 'Formating BED refSeq file including non canonical chromosomes.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-non-coding-transcripts': {'help': 'Formating BED refSeq file including non coding transcripts.\\n', 'action': 'store_true', 'default': False}, 'download-refseq-include-transcript-version': {'help': 'Formating BED refSeq file including transcript version.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp': {'metavar': 'dbNSFP', 'help': "Download dbNSFP databases within dbNSFP folder(e.g. '/Users/lebechea/howard/databases').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbNSFP databases folder'}}}, 'download-dbnsfp-url': {'metavar': 'dbNSFP url', 'help': "Download dbNSFP databases URL (see dbNSFP website)\\n(e.g. https://dbnsfp.s3.amazonaws.com').\\n", 'required': False, 'default': 'https://dbnsfp.s3.amazonaws.com', 'type': <class 'str'>}, 'download-dbnsfp-release': {'metavar': 'dnNSFP release', 'help': "Release of dbNSFP to download (see dbNSFP website)\\n(e.g. '4.4a').\\n", 'required': False, 'default': '4.4a'}, 'download-dbnsfp-parquet-size': {'metavar': 'dbNSFP parquet size', 'help': 'Maximum size (Mb) of data files in Parquet folder.\\nParquet folder are partitioned (hive) by chromosome (sub-folder),\\nwhich contain N data files.\\n', 'required': False, 'default': 100, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000, 'increment': 10}}}, 'download-dbnsfp-subdatabases': {'help': 'Generate dbNSFP sub-databases.\\ndbNSFP provides multiple databases which are split onto multiple columns.\\nThis option create a Parquet folder for each sub-database (based on columns names).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-parquet': {'help': 'Generate a Parquet file for each Parquet folder.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-vcf': {'help': 'Generate a VCF file for each Parquet folder.\\nNeed genome FASTA file (see --download-genome).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-no-files-all': {'help': "Not generate database Parquet/VCF file for the entire database ('ALL').\\nOnly sub-databases files will be generated.\\n(see '--download-dbnsfp-subdatabases').\\n", 'action': 'store_true', 'default': False}, 'download-dbnsfp-add-info': {'help': 'Add INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-only-info': {'help': 'Add only INFO column (VCF format) in Parquet folder and file.\\nUseful for speed up full annotation (all available columns).\\nDecrease memory and space during generation of files.\\nIncrease time for partial annotation (some available columns).\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-uniquify': {'help': 'Uniquify values within column\\n(e.g. "D,D" to "D", "D,.,T" to "D,T").\\nRemove transcripts information details.\\nUsefull to reduce size of the database.\\nIncrease memory and space during generation of files.\\n', 'action': 'store_true', 'default': False}, 'download-dbnsfp-row-group-size': {'metavar': 'dnNSFP row grooup size', 'help': 'Minimum number of rows in a parquet row group (see duckDB doc).\\nLower can reduce memory usage and slightly increase space during generation,\\nspeed up highly selective queries, slow down whole file queries (e.g. aggregations).\\n', 'required': False, 'default': 100000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}}, 'download-alphamissense': {'metavar': 'AlphaMissense', 'help': 'Path to AlphaMissense databases', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Alphamissense databases folder'}}}, 'download-alphamissense-url': {'metavar': 'AlphaMissense url', 'help': "Download AlphaMissense databases URL (see AlphaMissense website)\\n(e.g. 'https://storage.googleapis.com/dm_alphamissense').\\n", 'required': False, 'default': 'https://storage.googleapis.com/dm_alphamissense', 'type': <class 'str'>}, 'download-exomiser': {'metavar': 'Exomiser', 'help': 'Path to Exomiser databases\\n(e.g. /Users/lebechea/howard/databases/exomiser/current).\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to Exomiser databases folder'}}}, 'download-exomiser-application-properties': {'metavar': 'Exomiser application properties', 'help': "Exomiser Application Properties configuration file (see Exomiser website).\\nThis file contains configuration settings for the Exomiser tool.\\nIf this parameter is not provided, the function will attempt to locate\\nthe application properties file automatically based on the Exomiser.\\nConfiguration information will be used to download expected releases (if no other parameters).\\nCADD and REMM will be downloaded only if 'path' are provided.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'All files (*)|*', 'options': {'default_dir': '/Users/lebechea/howard/databases/exomiser/current', 'message': 'Path to Exomiser application properties file'}}}}, 'download-exomiser-url': {'metavar': 'Exomiser url', 'help': "URL where Exomiser database files can be downloaded from\\n(e.g. 'http://data.monarchinitiative.org/exomiser').\\n", 'required': False, 'default': 'http://data.monarchinitiative.org/exomiser', 'type': <class 'str'>}, 'download-exomiser-release': {'metavar': 'Exomiser release', 'help': 'Release of Exomiser data to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\nIf not provided (None), from Application Properties file (Exomiser data-version) \\nor default \\'2109\\'.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-phenotype-release': {'metavar': 'Exomiser phenoptye release', 'help': 'Release of Exomiser phenotype to download.\\nIf not provided (None), from Application Properties file (Exomiser Phenotype data-version)\\nor Exomiser release.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-release': {'metavar': 'Exomiser remm release', 'help': 'Release of ReMM (Regulatory Mendelian Mutation) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-remm-url': {'metavar': 'Exomiser remm url', 'help': "URL where ReMM (Regulatory Mendelian Mutation) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/ReMM').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/ReMM', 'type': <class 'str'>}, 'download-exomiser-cadd-release': {'metavar': 'Exomiser cadd release', 'help': 'Release of CADD (Combined Annotation Dependent Depletion) database to download.\\nIf "default", "auto", or "config", retrieve from Application Properties file.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-exomiser-cadd-url': {'metavar': 'Exomiser cadd url', 'help': "URL where CADD (Combined Annotation Dependent Depletion) database files can be downloaded from\\n(e.g. 'https://kircherlab.bihealth.org/download/CADD').\\n", 'required': False, 'default': 'https://kircherlab.bihealth.org/download/CADD', 'type': <class 'str'>}, 'download-exomiser-cadd-url-snv-file': {'metavar': 'Exomiser url snv file', 'help': 'Name of the file containing the SNV (Single Nucleotide Variant) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'whole_genome_SNVs.tsv.gz', 'type': <class 'str'>}, 'download-exomiser-cadd-url-indel-file': {'metavar': 'Exomiser cadd url indel', 'help': 'Name of the file containing the INDEL (Insertion-Deletion) data\\nfor the CADD (Combined Annotation Dependent Depletion) database.\\n', 'required': False, 'default': 'InDels.tsv.gz', 'type': <class 'str'>}, 'download-dbsnp': {'metavar': 'dnSNP', 'help': "Path to dbSNP databases\\n(e.g. '/Users/lebechea/howard/databases/exomiser/dbsnp').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases', 'message': 'Path to dbSNP databases folder'}}}, 'download-dbsnp-releases': {'metavar': 'dnSNP releases', 'help': "Release of dbSNP to download\\n(e.g. 'b152', 'b152,b156').\\n", 'required': False, 'default': 'b156', 'type': <class 'str'>}, 'download-dbsnp-release-default': {'metavar': 'dnSNP release default', 'help': "Default Release of dbSNP ('default' symlink)\\n(e.g. 'b156').\\nIf None, first release to download will be assigned as default\\nonly if it does not exists.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url': {'metavar': 'dbSNP url', 'help': "URL where dbSNP database files can be downloaded from.\\n(e.g. 'https://ftp.ncbi.nih.gov/snp/archive').\\n", 'required': False, 'default': 'https://ftp.ncbi.nih.gov/snp/archive', 'type': <class 'str'>}, 'download-dbsnp-url-files': {'metavar': 'dbSNP url files', 'help': 'Dictionary that maps assembly names to specific dbSNP URL files.\\nIt allows you to provide custom dbSNP URL files for specific assemblies\\ninstead of using the default file naming convention.\\n', 'required': False, 'default': None, 'type': <class 'str'>}, 'download-dbsnp-url-files-prefix': {'metavar': 'dbSNP url files prefix', 'help': 'String that represents the prefix of the dbSNP file name for a specific assembly.\\nIt is used to construct the full URL of the dbSNP file to be downloaded.\\n', 'required': False, 'default': 'GCF_000001405', 'type': <class 'str'>}, 'download-dbsnp-assemblies-map': {'metavar': 'dbSNP assemblies map', 'help': 'dictionary that maps assembly names to their corresponding dbSNP versions.\\nIt is used to construct the dbSNP file name based on the assembly name.\\n', 'required': False, 'default': {'hg19': '25', 'hg38': '40'}, 'type': <class 'str'>, 'gooey': {'options': {'initial_value': '{"hg19": "25", "hg38": "40"}'}}}, 'download-dbsnp-vcf': {'help': 'Generate well-formatted VCF from downloaded file:\\n- Add and filter contigs associated to assembly\\n- Normalize by splitting multiallelics\\n- Need genome (see --download-genome)\\n', 'action': 'store_true', 'default': False}, 'download-dbsnp-parquet': {'help': 'Generate Parquet file from VCF.\\n', 'action': 'store_true', 'default': False}, 'convert-hgmd': {'metavar': 'HGMD', 'help': 'Convert HGMD databases.\\nFolder where the HGMD databases will be stored.\\nFields in VCF, Parquet and TSV will be generated.\\nIf the folder does not exist, it will be created.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}}, 'convert-hgmd-file': {'metavar': 'HGMD file', 'help': "File from HGMD.\\nName format 'HGMD_Pro_<release>_<assembly>.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser'}}, 'convert-hgmd-basename': {'metavar': 'HGMD basename', 'help': "File output basename.\\nGenerated files will be prefixed by basename\\n(e.g. 'HGMD_Pro_MY_RELEASE')\\nBy default (None), input file name without '.vcf.gz'.\\n", 'required': False, 'default': None, 'type': <class 'str'>}, 'generate-param': {'metavar': 'param', 'help': 'Parameter file (JSON) with all databases found.\\nDatabases folders scanned are defined in config file.\\nStructure of databases follow this structure (see doc):\\n.../<database>/<release>/<assembly>/*.[parquet|vcf.gz|...]\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-description': {'metavar': 'param description', 'help': 'Description file (JSON) with all databases found.\\nContains all databases with description of format, assembly, fields...\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'JSON file (*.json)|*.json'}}}, 'generate-param-releases': {'metavar': 'param release', 'help': "List of database folder releases to check\\n(e.g. 'current', 'latest').\\n", 'required': False, 'default': 'current', 'type': <class 'str'>}, 'generate-param-formats': {'metavar': 'param formats', 'help': "List of database formats to check\\n(e.g. 'parquet', 'parquet,vcf,bed,tsv').\\n", 'required': False, 'default': 'parquet', 'type': <class 'str'>}, 'generate-param-bcftools': {'help': "Generate parameter JSON file with BCFTools annotation for allowed formats\\n(i.e. 'vcf', 'bed').\\n", 'action': 'store_true', 'default': False}, 'help_md': {'metavar': 'help markdown', 'help': 'Help Output file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.md)|*.md'}}}, 'help_html': {'metavar': 'help html', 'help': 'Help Output file in HTML format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'HTML file (*.html)|*.html'}}}, 'help_pdf': {'metavar': 'help pdf', 'help': 'Help Output file in PDF format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'PDF file (*.pdf)|*.pdf'}}}, 'help_json_input': {'metavar': 'help JSON input', 'help': 'Help input file in JSON format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}}, 'help_md_input': {'metavar': 'help MarkDown input', 'help': 'Help input file in MarkDown format.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'MarkDown file (*.md)|*.md|All files (*)|*'}}}, 'code_type': {'metavar': 'example code type', 'help': "Help example code type for input JSON format\\n(e.g. 'json', 'bash').\\n", 'required': False, 'default': '', 'type': <class 'str'>}, 'help_json_input_title': {'metavar': 'help JSON input title', 'help': 'Help JSON input title.\\n', 'required': False, 'default': 'Help', 'type': <class 'str'>}, 'genomes-folder': {'metavar': 'genomes', 'help': "Folder containing genomes.\\n(e.g. '/Users/lebechea/howard/databases/genomes/current'", 'required': False, 'default': '/Users/lebechea/howard/databases/genomes/current', 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser', 'options': {'default_dir': '/Users/lebechea/howard/databases/genomes/current', 'message': 'Path to genomes databases folder'}}}, 'config': {'metavar': 'config', 'help': 'Configuration JSON file defined default configuration regarding \\nresources (e.g. threads, memory),\\nsettings (e.g. verbosity, temporary files),\\ndefault folders (e.g. for databases)\\nand paths to external tools.\\n', 'required': False, 'default': '{}', 'type': <class 'str'>, 'gooey': {'widget': 'FileChooser', 'options': {'initial_value': '{}'}}}, 'threads': {'metavar': 'threads', 'help': 'Specify the number of threads to use for processing HOWARD.\\nIt determines the level of parallelism,\\neither on python scripts, duckdb engine and external tools.\\nIt and can help speed up the process/tool.\\nUse -1 to use all available CPU/cores.\\nEither non valid value is 1 CPU/core.\\n', 'required': False, 'type': <class 'int'>, 'default': -1, 'gooey': {'widget': 'IntegerField', 'options': {'min': -1, 'max': 1000, 'increment': 1}}, 'extra': {'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'memory': {'metavar': 'memory', 'help': "Specify the memory to use in format FLOAT[kMG]\\n(e.g. '8G', '12.42G', '1024M').\\nIt determines the amount of memory for duckDB engine and external tools\\n(especially for JAR programs).\\nIt can help to prevent 'out of memory' failures.\\nBy default (None) is 80%% of RAM (for duckDB).\\n", 'required': False, 'type': <class 'str'>, 'default': None, 'extra': {'format': 'FLOAT[kMG]', 'examples': {'# Automatically detect all available CPU/cores': '"threads": -1', '# Define 8 CPU/cores': '"threads": 8'}}}, 'chunk_size': {'metavar': 'chunk size', 'help': 'Number of records in batch to export output file.\\nThe lower the chunk size, the less memory consumption.\\nFor Parquet partitioning, files size will depend on the chunk size.\\n', 'required': False, 'default': 1000000, 'type': <class 'int'>, 'gooey': {'widget': 'IntegerField', 'options': {'min': 1, 'max': 100000000000, 'increment': 10000}}, 'extra': {'examples': {'Chunk size of 1.000.000 by default': '"chunk_size": 1000000', 'Smaller chunk size to reduce Parquet file size and memory usage': '"chunk_size": 100000'}}}, 'tmp': {'metavar': 'Temporary folder', 'help': "Temporary folder (e.g. '/tmp').\\nBy default, '.tmp' for duckDB (see doc),external tools and python scripts.\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'DirChooser'}, 'extra': {'examples': {'# System temporary folder': '"tmp": "/tmp"', '# HOWARD work directory': '"tmp": "~/howard/tmp"', '# Current work directory': '"tmp": ".tmp"'}}}, 'duckdb_settings': {'metavar': 'duckDB settings', 'help': 'DuckDB settings (see duckDB doc) as JSON (string or file).\\nThese settings have priority (see options \\'threads\\', \\'tmp\\'...).\\nExamples: \\'{"TimeZone": "GMT", "temp_directory": "/tmp/duckdb", "threads": 8}\\'.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'JSON file (*.json)|*.json|All files (*)|*'}}, 'extra': {'examples': {'DuckDB settings JSON file': '"duckdb_settings": "/path/to/duckdb_config.json"', 'JSON string for Time zone, temporary directory and threads for duckDB': '"duckdb_settings": {\\n "TimeZone": "GMT",\\n "temp_directory": "/tmp/duckdb",\\n "threads": 8\\n}'}}}, 'verbosity': {'metavar': 'verbosity', 'help': 'Verbosity level\\nAvailable: CRITICAL, ERROR, WARNING, INFO, DEBUG or NOTSET\\n- DEBUG: Detailed information, typically of interest only when diagnosing problems.\\n- INFO: Confirmation that things are working as expected.\\n- WARNING: An indication that something unexpected happened.\\n- ERROR: Due to a more serious problem.\\n- CRITICAL: A serious error.\\n- FATAL: A fatal error.\\n- NOTSET: All messages.\\n', 'required': False, 'choices': ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET', 'WARN', 'FATAL'], 'default': 'INFO', 'type': <class 'str'>, 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Default verbosity': '"verbosity": "INFO"', 'ERROR level (quiet mode)': '"verbosity": "ERROR"', 'For debug': '"verbosity": "DEBUG"'}}}, 'access': {'metavar': 'access mode', 'help': "Access mode to variants file or database.\\nEither 'RW' for Read and Write, or 'RO' for Read Only.\\n", 'default': 'RW', 'type': <class 'str'>, 'choices': ['RW', 'RO'], 'gooey': {'widget': 'Dropdown', 'options': {}}, 'extra': {'examples': {'Read and Write mode': '"access": "RW"', 'Read only mode': '"access": "RO"'}}}, 'log': {'metavar': 'log', 'help': "Logs file\\n(e.g. 'my.log').\\n", 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver'}, 'extra': {'examples': {'Relative path to log file': '"log": "my.log"', '# HOWARD work directory': '"log": "~/howard/log"', 'Full path to log file': '"log": "/tmp/my.log"'}}}, 'interactive': {'help': 'Interative mose..\\n', 'action': 'store_true', 'default': False}, 'quiet': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'verbose': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'debug': {'help': '==SUPPRESS==', 'action': 'store_true', 'default': False}, 'databases_folder': {'help': 'Path of HOWARD database folder.\\n', 'type': <class 'str'>, 'default': '/Users/lebechea/howard/databases'}, 'database': {'help': 'Which database to update.\\n', 'type': <class 'str'>, 'default': 'clinvar', 'choices': ['clinvar']}, 'update_config': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>}, 'current_folder': {'help': 'Path of json configuration file.\\n', 'type': <class 'str'>, 'default': 'current'}, 'add_variants_view': {'help': 'Create a sheet with all INFO fields exploded.\\n', 'action': 'store_true', 'default': False}, 'add_header': {'help': 'Create a sheet with all INFO fields header descritions.\\n', 'action': 'store_true', 'default': False}, 'transcripts_expected': {'metavar': 'List of transcripts (file)', 'help': 'File with a list of transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileChooser', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'transcripts_missing': {'metavar': 'List of missing transcripts (file)', 'help': 'File with a list of missing transcripts in first column.\\n', 'required': False, 'default': None, 'type': <howard.tools.tools.PathType object>, 'gooey': {'widget': 'FileSaver', 'options': {'wildcard': 'TSV file (*.tsv)|*.tsv|All files (*)|*'}}}, 'genebe_use_refseq': {'help': 'Use refSeq to annotate (default).\\n', 'action': 'store_true', 'default': False}, 'genebe_use_ensembl': {'help': 'Use Ensembl to annotate.\\n', 'action': 'store_true', 'default': False}, 'not_flatten_consequences': {'help': 'Use exploded annotation informations.\\n', 'action': 'store_true', 'default': False}, 'minimalize_info': {'help': "Minimalize INFO field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_id': {'help': "Minimalize ID field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_qual': {'help': "Minimalize QUAL field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_filter': {'help': "Minimalize FILTER field (e.g. '.' value).\\n", 'action': 'store_true', 'default': False}, 'minimalize_samples': {'help': "Minimalize samples to keep only genotypes (i.e. 'GT').\\n", 'action': 'store_true', 'default': False}, 'remove_samples': {'help': 'Remove all samples to keep only variants.\\n', 'action': 'store_true', 'default': False}}, 'commands_arguments': {'query': {'function': 'query', 'description': "Query genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). Using --explode_infos allow query on INFO/tag annotations. SQL query can also use external data within the request, such as a Parquet file(s). ", 'help': 'Query genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = \\'A\\' AND POS < 100000" \\n howard query --input=tests/data/example.vcf.gz --explode_infos --query=\\'SELECT "#CHROM", POS, REF, ALT, DP, CLNSIG, sample2, sample3 FROM variants WHERE DP >= 50 OR CLNSIG NOT NULL ORDER BY DP DESC\\' \\n howard query --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, \\\\"INFO/Interpro_domain\\\\" FROM \\'tests/databases/annotations/current/hg19/dbnsfp42a.parquet\\' WHERE \\\\"INFO/Interpro_domain\\\\" NOT NULL ORDER BY \\\\"INFO/SiPhy_29way_logOdds_rankscore\\\\" DESC LIMIT 10" \\n howard query --explode_infos --explode_infos_prefix=\\'INFO/\\' --query="SELECT \\\\"#CHROM\\\\", POS, REF, ALT, STRING_AGG(INFO, \\';\\') AS INFO FROM \\'tests/databases/annotations/current/hg19/*.parquet\\' GROUP BY \\\\"#CHROM\\\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv && head -n2 /tmp/full_annotation.tsv \\n howard query --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': False, 'output': False, 'param': False, 'query': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Query': {'query_limit': False, 'query_print_mode': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'filter': {'function': 'filter', 'description': "Filter genetic variations in SQL format. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Filter genetic variations file in SQL format.', 'epilog': 'Usage examples:\\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="REF = \\'A\\' AND POS < 100000" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="INFOS.CLNSIG LIKE \\'pathogenic\\'" --samples="sample1,sample2" \\n howard filter --input=tests/data/example.vcf.gz --output=/tmp/example.filter.vcf.gz --filter="QUAL > 100 AND SAMPLES.sample2.GT != \\'./.\\'" --samples="sample2" \\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Filters': {'filter': False, 'samples': False}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'sort': {'function': 'sort', 'description': "Sort genetic variations from contig order. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). ", 'help': 'Sort genetic variations file from contig order.', 'epilog': 'Usage examples:\\n howard sort --input=tests/data/example.vcf.gz --output=/tmp/example.sorted.vcf.gz \\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Export': {'include_header': False, 'parquet_partitions': False}}}, 'stats': {'function': 'stats', 'description': 'Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...', 'help': 'Statistics on genetic variations file.', 'epilog': 'Usage examples:\\n howard stats --input=tests/data/example.vcf.gz \\n howard stats --input=tests/data/example.vcf.gz --stats_md=/tmp/stats.md \\n howard stats --input=tests/data/example.vcf.gz --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'param': False}, 'Stats': {'stats_md': False, 'stats_json': False}}}, 'convert': {'function': 'convert', 'description': "Convert genetic variations file to another format. Multiple format are available, such as usual and official VCF and BCF format, but also other formats such as TSV, CSV, PSV and Parquet/duckDB. These formats need a header '.hdr' file to take advantage of the power of howard (especially through INFO/tag definition), and using howard convert tool automatically generate header file fo futher use. ", 'help': 'Convert genetic variations file to another format.', 'epilog': 'Usage examples:\\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.parquet \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_fields=\\'CLNSIG,SIFT,DP\\' --order_by=\\'CLNSIG DESC, DP DESC\\' \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --explode_infos --explode_infos_prefix=\\'INFO/\\' --explode_infos_fields=\\'CLNSIG,SIFT,DP,*\\' --order_by=\\'"INFO/CLNSIG" DESC, "INFO/DP" DESC\\' --include_header \\n howard convert --input=tests/data/example.vcf.gz --output=/tmp/example.tsv --param=config/param.json \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'hgvs': {'function': 'hgvs', 'description': "HGVS annotation using HUGO HGVS internation Sequence Variant Nomenclature (http://varnomen.hgvs.org/). Annotation refere to refGene and genome to generate HGVS nomenclature for all available transcripts. This annotation add 'hgvs' field into VCF INFO column of a VCF file.", 'help': 'HGVS annotation (HUGO internation nomenclature) using refGene, genome and transcripts list.\\n', 'epilog': 'Usage examples:\\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.tsv --param=config/param.json \\n howard hgvs --input=tests/data/example.full.vcf --output=/tmp/example.hgvs.vcf --full_format --use_exon \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}}}, 'annotation': {'function': 'annotation', 'description': 'Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). ', 'help': 'Annotation of genetic variations file using databases/files and tools.', 'epilog': "Usage examples:\\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='annovar:refGene,annovar:cosmic70,snpeff,tests/databases/annotations/current/hg19/clinvar_20210123.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_parquet='tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_bcftools='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpsift='tests/databases/annotations/current/hg19/nci60.vcf.gz,tests/databases/annotations/current/hg19/dbnsfp42a.vcf.gz' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_annovar='nci60:cosmic70' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_snpeff='-hgvs' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_exomiser='preset=exome:transcript_source=refseq' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotation_splice='split_mode=one:spliceai_distance=500:spliceai_mask=1' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --assembly=hg19 --annotations='ALL:parquet' \\n howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'annotations': False, 'annotation_parquet': False, 'annotation_bcftools': False, 'annotation_annovar': False, 'annotation_snpeff': False, 'annotation_snpsift': False, 'annotation_exomiser': False, 'annotation_splice': False, 'assembly': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}}}, 'calculation': {'function': 'calculation', 'description': 'Calculation processes variants information to generate new information, such as: identify variation type (VarType), harmonizes allele frequency (VAF) and calculate sttistics (VAF_stats), extracts Nomen (transcript, cNomen, pNomen...) from an HGVS field (e.g. snpEff, Annovar) with an optional list of personalized transcripts, generates VaRank format barcode, identify trio inheritance.', 'help': 'Calculation operations on genetic variations file and genotype information.\\n', 'epilog': "Usage examples:\\n howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.calculated.tsv --calculations='snpeff_hgvs,NOMEN' --hgvs_field=snpeff_hgvs --transcripts=tests/data/transcripts.tsv \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='TRIO' --trio_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.vcf.gz --output=/tmp/example.calculated.tsv --calculations='BARCODEFAMILY' --family_pedigree='sample1,sample2,sample4' \\n howard calculation --input=tests/data/example.ann.transcripts.vcf.gz --output=/tmp/example.calculation.transcripts.tsv --param=config/param.transcripts.json --calculations='TRANSCRIPTS_ANNOTATIONS,TRANSCRIPTS_PRIORITIZATION,TRANSCRIPTS_EXPORT' \\n howard calculation --input=tests/data/example.ann.vcf.gz --output=/tmp/example.ann.tsv --param=config/param.json \\n howard calculation --show_calculations \\n \\n", 'groups': {'main': {'input': False, 'output': False, 'param': False, 'calculations': False}, 'Calculation': {'calculation_config': False, 'show_calculations': False}, 'NOMEN': {'hgvs_field': False, 'transcripts': False}, 'TRIO': {'trio_pedigree': False}, 'BARCODEFAMILY': {'family_pedigree': False}}}, 'prioritization': {'function': 'prioritization', 'description': "Prioritization algorithm uses profiles to flag variants (as passed or filtered), calculate a prioritization score, and automatically generate a comment for each variants (example: 'polymorphism identified in dbSNP. associated to Lung Cancer. Found in ClinVar database'). Prioritization profiles are defined in a configuration file in JSON format. A profile is defined as a list of annotation/value, using wildcards and comparison options (contains, lower than, greater than, equal...). Annotations fields may be quality values (usually from callers, such as 'DP') or other annotations fields provided by annotations tools, such as HOWARD itself (example: COSMIC, Clinvar, 1000genomes, PolyPhen, SIFT). Multiple profiles can be used simultaneously, which is useful to define multiple validation/prioritization levels (example: 'standard', 'stringent', 'rare variants', 'low allele frequency').\\n", 'help': 'Prioritization of genetic variations based on annotations criteria (profiles).', 'epilog': "Usage examples:\\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default' \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations='default,GERMLINE' --prioritization_config=config/prioritization_profiles.json \\n howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.tsv --param=config/param.json \\n \\n", 'groups': {'main': {'input': True, 'output': True, 'param': False, 'prioritizations': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}}}, 'process': {'function': 'process', 'description': 'howard process tool manage genetic variations to:\\n- annotates genetic variants with multiple annotation databases/files and tools\\n- calculates and normalizes annotations\\n- prioritizes variants with profiles (list of citeria) to calculate scores and flags\\n- translates into various formats\\n- query genetic variants and annotations\\n- generates variants statistics', 'help': 'Full genetic variations process: annotation, calculation, prioritization, format, query, filter...', 'epilog': 'Usage examples:\\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \\n howard process --input=tests/data/example.vcf.gz --annotations=\\'snpeff\\' --calculations=\\'snpeff_hgvs\\' --prioritizations=\\'default\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, snpeff_hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --hgvs_options=\\'full_format,use_exon\\' --explode_infos --output=/tmp/example.annotated.tsv --query=\\'SELECT "#CHROM", POS, ALT, REF, hgvs FROM variants\\' \\n howard process --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --hgvs=\\'full_format,use_exon\\' --annotations=\\'tests/databases/annotations/current/hg19/avsnp150.parquet,tests/databases/annotations/current/hg19/dbnsfp42a.parquet,tests/databases/annotations/current/hg19/gnomad211_genome.parquet\\' --calculations=\\'NOMEN\\' --explode_infos --query=\\'SELECT NOMEN, REVEL_score, SIFT_score, AF AS \\'gnomad_AF\\', ClinPred_score, ClinPred_pred FROM variants\\' \\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'hgvs_options': False, 'annotations': False, 'calculations': False, 'prioritizations': False, 'assembly': False}, 'HGVS': {'use_gene': False, 'use_exon': False, 'use_protein': False, 'add_protein': False, 'full_format': False, 'codon_type': False, 'refgene': False, 'refseqlink': False}, 'Annotation': {'annotations_update': False, 'annotations_append': False}, 'Calculation': {'calculation_config': False}, 'Prioritization': {'default_profile': False, 'pzfields': False, 'prioritization_score_mode': False, 'prioritization_config': False}, 'Query': {'query': False, 'query_limit': False, 'query_print_mode': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'databases': {'function': 'databases', 'description': 'Download databases and needed files for howard and associated tools', 'help': 'Download databases and needed files for howard and associated tools', 'epilog': "Usage examples:\\n howard databases --assembly=hg19 --download-genomes=~/howard/databases/genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' \\n howard databases --assembly=hg19 --download-annovar=~/howard/databases/annovar/current --download-annovar-files='refGene,cosmic70,nci60' \\n howard databases --assembly=hg19 --download-snpeff=~/howard/databases/snpeff/current \\n howard databases --assembly=hg19 --download-refseq=~/howard/databases/refseq/current --download-refseq-format-file='ncbiRefSeq.txt' \\n howard databases --assembly=hg19 --download-dbnsfp=~/howard/databases/dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases \\n howard databases --assembly=hg19 --download-alphamissense=~/howard/databases/alphamissense/current \\n howard databases --assembly=hg19 --download-exomiser=~/howard/databases/exomiser/current \\n howard databases --assembly=hg19 --download-dbsnp=~/howard/databases/dbsnp/current --download-dbsnp-vcf \\n cd ~/howard/databases && howard databases --assembly=hg19 --download-genomes=genomes/current --download-genomes-provider=UCSC --download-genomes-contig-regex='chr[0-9XYM]+$' --download-annovar=annovar/current --download-annovar-files='refGene,cosmic70,nci60' --download-snpeff=snpeff/current --download-refseq=refseq/current --download-refseq-format-file='ncbiRefSeq.txt' --download-dbnsfp=dbnsfp/current --download-dbnsfp-release='4.4a' --download-dbnsfp-subdatabases --download-alphamissense=alphamissense/current --download-exomiser=exomiser/current --download-dbsnp=dbsnp/current --download-dbsnp-vcf --threads=8 \\n howard databases --generate-param=/tmp/param.json --generate-param-description=/tmp/test.description.json --generate-param-formats=parquet \\n howard databases --input_annovar=tests/databases/others/hg19_nci60.txt --output_annovar=/tmp/nci60.from_annovar.vcf.gz --annovar_to_parquet=/tmp/nci60.from_annovar.parquet --annovar_code=nci60 --genome=~/howard/databases/genomes/current/hg19.fa \\n\\nNotes:\\n - Downloading databases can take a while, depending on network, threads and memory\\n - Proxy: Beware of network and proxy configuration\\n - dbNSFP download: More threads, more memory usage (8 threads ~ 16Gb, 24 threads ~ 32Gb)\\n \\n", 'groups': {'main': {'assembly': False, 'genomes-folder': False, 'genome': False, 'param': False}, 'Genomes': {'download-genomes': False, 'download-genomes-provider': False, 'download-genomes-contig-regex': False}, 'snpEff': {'download-snpeff': False}, 'Annovar': {'download-annovar': False, 'download-annovar-files': False, 'download-annovar-url': False}, 'refSeq': {'download-refseq': False, 'download-refseq-url': False, 'download-refseq-prefix': False, 'download-refseq-files': False, 'download-refseq-format-file': False, 'download-refseq-include-utr5': False, 'download-refseq-include-utr3': False, 'download-refseq-include-chrM': False, 'download-refseq-include-non-canonical-chr': False, 'download-refseq-include-non-coding-transcripts': False, 'download-refseq-include-transcript-version': False}, 'dbNSFP': {'download-dbnsfp': False, 'download-dbnsfp-url': False, 'download-dbnsfp-release': False, 'download-dbnsfp-parquet-size': False, 'download-dbnsfp-subdatabases': False, 'download-dbnsfp-parquet': False, 'download-dbnsfp-vcf': False, 'download-dbnsfp-no-files-all': False, 'download-dbnsfp-add-info': False, 'download-dbnsfp-only-info': False, 'download-dbnsfp-uniquify': False, 'download-dbnsfp-row-group-size': False}, 'AlphaMissense': {'download-alphamissense': False, 'download-alphamissense-url': False}, 'Exomiser': {'download-exomiser': False, 'download-exomiser-application-properties': False, 'download-exomiser-url': False, 'download-exomiser-release': False, 'download-exomiser-phenotype-release': False, 'download-exomiser-remm-release': False, 'download-exomiser-remm-url': False, 'download-exomiser-cadd-release': False, 'download-exomiser-cadd-url': False, 'download-exomiser-cadd-url-snv-file': False, 'download-exomiser-cadd-url-indel-file': False}, 'dbSNP': {'download-dbsnp': False, 'download-dbsnp-releases': False, 'download-dbsnp-release-default': False, 'download-dbsnp-url': False, 'download-dbsnp-url-files': False, 'download-dbsnp-url-files-prefix': False, 'download-dbsnp-assemblies-map': False, 'download-dbsnp-vcf': False, 'download-dbsnp-parquet': False}, 'HGMD': {'convert-hgmd': False, 'convert-hgmd-file': False, 'convert-hgmd-basename': False}, 'from_Annovar': {'input_annovar': False, 'output_annovar': False, 'annovar_code': False, 'annovar_to_parquet': False, 'annovar_reduce_memory': False, 'annovar_multi_variant': False}, 'from_extann': {'input_extann': False, 'output_extann': False, 'refgene': False, 'transcripts': False, 'param_extann': False, 'mode_extann': False}, 'Parameters': {'generate-param': False, 'generate-param-description': False, 'generate-param-releases': False, 'generate-param-formats': False, 'generate-param-bcftools': False}}}, 'gui': {'function': 'gui', 'description': 'Graphical User Interface tools', 'help': 'Graphical User Interface tools', 'epilog': 'Usage examples:\\n howard gui ', 'groups': {}}, 'help': {'function': 'help', 'description': 'Help tools', 'help': 'Help tools', 'epilog': "Usage examples:\\n howard help --help_md=docs/help.md --help_html=docs/html/help.html --help_pdf=docs/pdf/help.pdf\\n howard help --help_json_input=docs/json/help.configuration.json --help_json_input_title='HOWARD Configuration' --help_md=docs/help.configuration.md --help_html=docs/html/help.configuration.html --help_pdf=docs/pdf/help.configuration.pdf --code_type='json'\\n howard help --help_json_input=docs/json/help.parameteres.json --help_json_input_title='HOWARD Parameters' --help_md=docs/help.parameteres.md --help_html=docs/html/help.parameteres.html --help_pdf=docs/pdf/help.parameteres.pdf --code_type='json' \\n howard help --help_json_input=docs/json/help.parameteres.databases.json --help_json_input_title='HOWARD Parameters Databases' --help_md=docs/help.parameteres.databases.md --help_html=docs/html/help.parameteres.databases.html --help_pdf=docs/pdf/help.parameteres.databases.pdf --code_type='json' \\n \\n", 'groups': {'main': {'help_md': False, 'help_html': False, 'help_pdf': False, 'help_md_input': False, 'help_json_input': False, 'help_json_input_title': False, 'code_type': False}}}, 'update_database': {'function': 'update_database', 'description': 'Update HOWARD database\\n', 'help': '(plugin) Update HOWARD database', 'epilog': 'Usage examples:\\n howard update_database --database clinvar --databases_folder /home1/DB/HOWARD --update_config update_databases.json \\n \\n', 'groups': {'main': {'param': False}, 'Update_database': {'databases_folder': False, 'database': False, 'update_config': False, 'current_folder': False}, 'Options': {'show': False, 'limit': False}}}, 'to_excel': {'function': 'to_excel', 'description': "Convert VCF file to Excel '.xlsx' format.\\n", 'help': "(plugin) Convert VCF file to Excel '.xlsx' format", 'epilog': 'Usage examples:\\n howard to_excel --input=tests/data/example.vcf.gz --output=/tmp/example.xlsx --add_variants_view\\n \\n', 'groups': {'main': {'input': True, 'output': True}, 'Add': {'add_variants_view': False, 'add_header': False}}}, 'transcripts_check': {'function': 'transcripts_check', 'description': 'Check if a transcript list is present in a generated transcript table from a input VCF file.\\n', 'help': '(plugin) Check transcript list in transcript table', 'epilog': 'Usage examples:\\n howard transcripts_check --input=plugins/transcripts_check/tests/data/example.ann.transcripts.vcf.gz --param=plugins/transcripts_check/tests/data/param.transcripts.json --transcripts_expected=plugins/transcripts_check/tests/data/transcripts.tsv --stats=/tmp/transcripts.stats.json --transcripts_missing=/tmp/transcripts.missing.tsv\\n \\n', 'groups': {'main': {'input': True, 'param': True, 'transcripts_expected': True, 'transcripts_missing': False, 'stats_json': False}}}, 'genebe': {'function': 'genebe', 'description': 'GeneBe annotation using REST API (see https://genebe.net/).\\n', 'help': '(plugin) GeneBe annotation using REST API', 'epilog': 'Usage examples:\\n howard genebe --input=tests/data/example.vcf.gz --output=/tmp/example.genebe.vcf.gz --genebe_use_refseq\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False, 'assembly': False}, 'GeneBe': {'genebe_use_refseq': False, 'genebe_use_ensembl': False, 'not_flatten_consequences': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}, 'minimalize': {'function': 'minimalize', 'description': "Minimalize a VCF file consists in put missing value ('.') on INFO/Tags, ID, QUAL or FILTER fields. Options can also minimalize samples (keep only GT) or remove all samples. INFO/tags can by exploded before minimalize to keep tags into separated columns (useful for Parquet or TSV format to constitute a database).\\n", 'help': '(plugin) Minimalize a VCF file, such as removing INFO/Tags or samples', 'epilog': 'Usage examples:\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.vcf.gz --minimalize_info --minimalize_filter --minimalize_qual --minimalize_id --minimalize_samples\\n howard minimalize --input=tests/data/example.vcf.gz --output=/tmp/example.minimal.tsv --remove_samples --explode_infos --minimalize_info\\n \\n', 'groups': {'main': {'input': True, 'output': True, 'param': False}, 'Minimalize': {'minimalize_info': False, 'minimalize_id': False, 'minimalize_qual': False, 'minimalize_filter': False, 'minimalize_samples': False, 'remove_samples': False}, 'Explode': {'explode_infos': False, 'explode_infos_prefix': False, 'explode_infos_fields': False}, 'Export': {'include_header': False, 'order_by': False, 'parquet_partitions': False}}}}, 'shared_arguments': ['config', 'threads', 'memory', 'chunk_size', 'tmp', 'duckdb_settings', 'interactive', 'verbosity', 'log', 'quiet', 'verbose', 'debug']}"}]; // mirrored in build-search-index.js (part 1) // Also split on html tags. this is a cheap heuristic, but good enough. diff --git a/docs/tips.pdf b/docs/tips.pdf index a398b46..57e4ece 100644 Binary files a/docs/tips.pdf and b/docs/tips.pdf differ diff --git a/docs/user_guide.pdf b/docs/user_guide.pdf index 8dca687..0da14d2 100644 Binary files a/docs/user_guide.pdf and b/docs/user_guide.pdf differ diff --git a/howard/functions/commons.py b/howard/functions/commons.py index b60c9ff..e43a860 100644 --- a/howard/functions/commons.py +++ b/howard/functions/commons.py @@ -4352,3 +4352,60 @@ def docker_automount() -> str: if "sock" not in volume.get("Source") and "tmp" not in volume.get("Source"): mounts_new += f" -v {volume.get('Source')}:{volume.get ('Destination')}:{volume.get('Mode')}" return mounts_new + + +def sort_contigs(vcf_reader): + """ + Function that sort contigs in VCF header + + Args: + vcf_reader (vcf): VCF object from VCF package + + Returns: + vcf:VCF object from VCF package + """ + + from collections import OrderedDict + + # inf + inf = 100000000 + + # Extract contigs from header + contigs = list(vcf_reader.contigs.keys()) + + # Sort function + def contig_sort_key(contig): + + # Remove 'chr' from contig + contig_clean = re.sub(r"^chr", "", contig) + + # Special cases: X, Y, M/MT + if contig_clean == "X": + return (float(inf) - 3, contig) + elif contig_clean == "Y": + return (float(inf) - 2, contig) + elif contig_clean in ["M", "MT"]: + return (float(inf) - 1, contig) + + # Contig as integer + try: + return (int(contig_clean), contig) + except ValueError: + # Contig as on-numeric + return (float(inf), contig_clean) + + # Sort contigs + sorted_contigs = sorted(contigs, key=contig_sort_key) + + # Create new contgis OrderedDict + ordered_contigs = OrderedDict() + + # Add contigs + for contig in sorted_contigs: + ordered_contigs[contig] = vcf_reader.contigs[contig] + + # Replace contigs + vcf_reader.contigs = ordered_contigs + + # Return + return vcf_reader diff --git a/howard/objects/variants.py b/howard/objects/variants.py index 37c68b0..682d4fb 100644 --- a/howard/objects/variants.py +++ b/howard/objects/variants.py @@ -73,6 +73,7 @@ comparison_map, code_type_map_to_sql, code_type_map_to_vcf, + sort_contigs, ) from howard.objects.database import Database @@ -1437,6 +1438,20 @@ def get_header_sample_list( # Return samples list return samples_list + def sort_contigs(self) -> None: + """ + This function sort contigs + + :return: None + """ + + # Sort contigs + header = self.get_header() + header = sort_contigs(header) + + # Return + return None + def is_genotype_column(self, column: str = None) -> bool: """ This function checks if a given column is a genotype column in a database. diff --git a/howard/tools/__init__.py b/howard/tools/__init__.py index 9f11576..d4501ed 100644 --- a/howard/tools/__init__.py +++ b/howard/tools/__init__.py @@ -7,6 +7,7 @@ "prioritization", "query", "filter", + "sort", "stats", "databases", "gui", diff --git a/howard/tools/filter.py b/howard/tools/filter.py new file mode 100644 index 0000000..d07d18d --- /dev/null +++ b/howard/tools/filter.py @@ -0,0 +1,101 @@ +import argparse +import logging as log +from tabulate import tabulate # type: ignore + +from howard.functions.commons import load_args, load_config_args +from howard.objects.variants import Variants + + +def filter(args: argparse) -> None: + """ + This Python function loads and queries data from a VCF file based on user input and exports the + results. + + :param args: args is an object that contains the arguments passed to the function. It is likely a + Namespace object created by parsing command line arguments using argparse + :type args: argparse + """ + + log.info("Start") + + # Load config args + arguments_dict, _, config, param = load_config_args(args) + + # Create variants object + vcfdata_obj = Variants( + input=args.input, output=args.output, config=config, param=param + ) + + # Get Config and Params + config = vcfdata_obj.get_config() + param = vcfdata_obj.get_param() + + # Load args into param + param = load_args( + param=param, + args=args, + arguments_dict=arguments_dict, + command="filter", + strict=False, + ) + + # Load data + if vcfdata_obj.get_input(): + vcfdata_obj.load_data() + vcfdata_obj.load_header() + view_name = "variants_view" + vcfdata_obj.create_annotations_view( + view=view_name, + view_type="view", + view_mode="explore", + info_prefix_column="", + fields_needed_all=True, + info_struct_column="INFOS", + sample_struct_column="SAMPLES", + detect_type_list=True, + ) + + # Filtering + log.info("Filtering...") + + # Filter + filter = param.get("filters", {}).get("filter", None) + + # Columns + columns = vcfdata_obj.get_header_columns_as_list() + + # Samples + samples_param = param.get("filters", {}).get("samples", None) + samples = [] + if not (samples_param is None or samples_param.strip() == ""): + + # Check samples in file + samples_in_file = vcfdata_obj.get_header_sample_list(check=True) + + for s in samples_param.split(","): + # Check if sample in file + if s.strip() in samples_in_file: + samples.append(s.strip()) + else: + log.warning(f"Sample '{s.strip()}' not in file") + + if len(samples): + # Remove samples from columns if not selected + for s in samples_in_file: + if s not in samples: + columns.remove(s) + + # Query + query = f"""SELECT {", ".join([f'"{c}"' for c in columns])} FROM {view_name}""" + if filter: + query += f""" WHERE {filter}""" + log.debug(f"query={query}") + + # Export + vcfdata_obj.export_output(query=query, export_header=True) + + # Log + log.info("End") + + # Return variants object + return vcfdata_obj diff --git a/howard/tools/sort.py b/howard/tools/sort.py new file mode 100644 index 0000000..460a1f2 --- /dev/null +++ b/howard/tools/sort.py @@ -0,0 +1,135 @@ +import argparse +import logging as log +from tabulate import tabulate # type: ignore + +from howard.functions.commons import load_args, load_config_args +from howard.objects.variants import Variants + + +def sort(args: argparse) -> None: + """ + This Python function loads and sort variants from a VCF file based on user input and exports the + results. + + :param args: args is an object that contains the arguments passed to the function. It is likely a + Namespace object created by parsing command line arguments using argparse + :type args: argparse + """ + + log.info("Start") + + # Load config args + arguments_dict, _, config, param = load_config_args(args) + + # Create variants object + vcfdata_obj = Variants( + input=args.input, output=args.output, config=config, param=param + ) + + # Get Config and Params + config = vcfdata_obj.get_config() + param = vcfdata_obj.get_param() + + # Access + input_format = vcfdata_obj.get_input_format() + if param.get("explode", {}).get("explode_infos", False) or not input_format in [ + "duckdb", + "parquet", + ]: + access = "RW" + else: + access = "RO" + config["access"] = access + + # Load args into param + param = load_args( + param=param, + args=args, + arguments_dict=arguments_dict, + command="filter", + strict=False, + ) + + # Load data + if vcfdata_obj.get_input(): + vcfdata_obj.load_data() + # vcfdata_obj.load_header() + # view_name = "variants_view" + # vcfdata_obj.create_annotations_view( + # view=view_name, + # view_type="view", + # view_mode="explore", + # info_prefix_column="", + # fields_needed_all=True, + # info_struct_column="INFOS", + # sample_struct_column="SAMPLES", + # detect_type_list=True, + # ) + + # Filtering + log.info("Sorting...") + + # # Filter + # filter = param.get("filters", {}).get("filter", None) + + # # Columns + # columns = vcfdata_obj.get_header_columns_as_list() + + # # Samples + # samples_param = param.get("filters", {}).get("samples", None) + # samples = [] + # if not (samples_param is None or samples_param.strip() == ""): + + # # Check samples in file + # samples_in_file = vcfdata_obj.get_header_sample_list(check=True) + + # for s in samples_param.split(","): + # # Check if sample in file + # if s.strip() in samples_in_file: + # samples.append(s.strip()) + # else: + # log.warning(f"Sample '{s.strip()}' not in file") + + # if len(samples): + # # Remove samples from columns if not selected + # for s in samples_in_file: + # if s not in samples: + # columns.remove(s) + + # Sort contigs + vcfdata_obj.sort_contigs() + + # variants table + table_variants = vcfdata_obj.get_table_variants() + + # Create case clause + case_clause = "" + for i, chrom in enumerate(vcfdata_obj.get_header().contigs): + case_clause += f""" WHEN "#CHROM" = '{chrom}' THEN {i + 1}\n""" + + # Create case clause order by + if case_clause != "": + case_clause_order_by = f""" + ORDER BY + CASE + {case_clause} + END + """ + else: + case_clause_order_by = "" + + # Create sort query + query_sort = f""" + SELECT * + FROM {table_variants} + {case_clause_order_by} + """ + + # Export + vcfdata_obj.export_output(query=query_sort, export_header=True) + + # Log + log.info("End") + + # Return variants object + return vcfdata_obj diff --git a/howard/tools/tools.py b/howard/tools/tools.py index 01e4133..10932a1 100644 --- a/howard/tools/tools.py +++ b/howard/tools/tools.py @@ -33,6 +33,7 @@ from howard.tools.prioritization import prioritization from howard.tools.query import query from howard.tools.filter import filter +from howard.tools.sort import sort from howard.tools.stats import stats from howard.tools.convert import convert from howard.tools.databases import databases @@ -1857,6 +1858,21 @@ def __call__(self, string): "Export": {"include_header": False, "parquet_partitions": False}, }, }, + "sort": { + "function": "sort", + "description": """Sort genetic variations from contig order. Data can be loaded into 'variants' table from various formats (e.g. VCF, TSV, Parquet...). SQL filter can also use external data within the request, such as a Parquet file(s). """, + "help": "Sort genetic variations file from contig order.", + "epilog": """Usage examples:\n""" + """ howard sort --input=tests/data/example.vcf.gz --output=/tmp/example.sorted.vcf.gz \n""" + """ \n""", + "groups": { + "main": { + "input": True, + "output": True, + }, + "Export": {"include_header": False, "parquet_partitions": False}, + }, + }, "stats": { "function": "stats", "description": """Statistics on genetic variations, such as: number of variants, number of samples, statistics by chromosome, genotypes by samples...""", diff --git a/plugins/README.pdf b/plugins/README.pdf index 56490ec..abbd099 100644 Binary files a/plugins/README.pdf and b/plugins/README.pdf differ diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py new file mode 100644 index 0000000..7acec58 --- /dev/null +++ b/tests/test_tools_filter.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +""" +Tests + +Usage: +pytest tests/ + +Coverage: +coverage run -m pytest tests/test_tools_query.py -x -v --log-cli-level=INFO --capture=tee-sys +coverage report --include=howard/* -m +""" + +import logging as log +import os +from tempfile import TemporaryDirectory +import argparse +import pytest # type: ignore + +from howard.objects.variants import Variants +from howard.functions.commons import remove_if_exists +from howard.tools.tools import arguments_dict +from howard.tools.filter import filter as vcf_filter + +from test_needed import tests_folder, tests_data_folder + + +@pytest.mark.parametrize( + "filter, samples, expected_results", + [ + # Filter 1 + ( + "POS < 100000", + None, + { + "nb_lines": 57, + "nb_variants": 3, + "samples": [ + "sample1", + "sample2", + "sample3", + "sample4", + ], + }, + ), + # Filter with INFOS + ( + "INFOS.CLNSIG LIKE 'pathogenic'", + None, + { + "nb_lines": 55, + "nb_variants": 1, + "samples": [ + "sample1", + "sample2", + "sample3", + "sample4", + ], + }, + ), + # Filter with SAMPLES + ( + "SAMPLES.sample2.GT != './.'", + "sample1,sample2", + { + "nb_lines": 57, + "nb_variants": 3, + "samples": [ + "sample1", + "sample2", + ], + }, + ), + ], +) +def test_filter(filter, samples, expected_results): + """ + The `test_filter` function tests filter of variants and exporting the output in correct + format using pyVCF. + """ + + with TemporaryDirectory(dir=tests_folder) as tmp_dir: + + # Init files + input_vcf = tests_data_folder + "/example.vcf.gz" + output_vcf = os.path.join(tmp_dir, "output_file.tsv") + + # prepare arguments for the query function + args = argparse.Namespace( + input=input_vcf, + output=output_vcf, + filter=filter, + samples=samples, + include_header=True, + arguments_dict=arguments_dict, + ) + + # Remove if output file exists + remove_if_exists([output_vcf]) + + # Filter + vcf_filter(args) + + # read the contents of the actual output file + with open(output_vcf, "r") as f: + result_output_nb_lines = 0 + result_output_nb_variants = 0 + result_lines = [] + for line in f: + if not result_output_nb_lines: + log.debug(line) + result_output_nb_lines += 1 + if not line.startswith("#"): + result_output_nb_variants += 1 + result_lines.append(line.strip()) + + # Expected result + expected_result_nb_lines = expected_results.get("nb_lines", None) + expected_result_nb_variants = expected_results.get("nb_variants", None) + expected_result_samples = expected_results.get("samples", None) + + # Compare + assert result_output_nb_lines == expected_result_nb_lines + assert result_output_nb_variants == expected_result_nb_variants + + # Variants + variants = Variants(input=output_vcf, load=True) + assert variants.get_header_sample_list() == expected_result_samples diff --git a/tests/test_tools_sort.py b/tests/test_tools_sort.py new file mode 100644 index 0000000..8e4495d --- /dev/null +++ b/tests/test_tools_sort.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +""" +Tests + +Usage: +pytest tests/ + +Coverage: +coverage run -m pytest tests/test_tools_query.py -x -v --log-cli-level=INFO --capture=tee-sys +coverage report --include=howard/* -m +""" + +import logging as log +import os +from tempfile import TemporaryDirectory +import argparse + +from howard.objects.variants import Variants +from howard.functions.commons import remove_if_exists +from howard.tools.tools import arguments_dict +from howard.tools.sort import sort as vcf_sort + +from test_needed import tests_folder, tests_data_folder + + +def test_filter(): + """ + The `test_filter` function tests filter of variants and exporting the output in correct + format using pyVCF. + """ + + with TemporaryDirectory(dir=tests_folder) as tmp_dir: + + # Init files + input_vcf = tests_data_folder + "/example.vcf.gz" + output_vcf = os.path.join(tmp_dir, "output_file.tsv") + + # prepare arguments for the query function + args = argparse.Namespace( + input=input_vcf, + output=output_vcf, + include_header=True, + arguments_dict=arguments_dict, + ) + + # Remove if output file exists + remove_if_exists([output_vcf]) + + # Filter + vcf_sort(args) + + # Variants + variants = Variants(input=output_vcf, load=True) + assert list(variants.get_header().contigs.keys()) == [ + "1", + "chr1", + "2", + "3", + "4", + "5", + "6", + "7", + "chr7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "X", + "Y", + "M", + ]