Skip to content

Commit

Permalink
Merge pull request #79 from bioinfo-chru-strasbourg/fix_logger
Browse files Browse the repository at this point in the history
fix #78 logger #4 docs
  • Loading branch information
antonylebechec authored Jul 24, 2023
2 parents ebe265d + ffbe5f5 commit a2b0b77
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 32 deletions.
12 changes: 7 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ howard.egg-info/
.cache
.vscode
.coverage
tests/data/annotations/annovar/
tests/data/annotations/snpeff/
tests/data/annotations/hg19.fa
tests/data/annotations/hg19.fa.fai
tests/data/annotations/.DS_Store
tests/databases/annovar/
tests/databases/snpeff/
tests/databases/genomes/
tests/databases/refseq/
tests/databases/hg19.fa
tests/databases/hg19.fa.fai
tests/databases/.DS_Store
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ howard query --input=tests/data/example.vcf.gz --explode_infos --query='SELECT "

- Query a Parquet file with specific columns (e.g. from VCF convertion to Parquet)
```
howard query --query="SELECT * FROM 'tests/data/annotations/dbnsfp42a.parquet' WHERE \"INFO/Interpro_domain\" NOT NULL ORDER BY \"INFO/SiPhy_29way_logOdds_rankscore\" DESC"
howard query --query="SELECT * FROM 'tests/databases/annotations/hg19/dbnsfp42a.parquet' WHERE \"INFO/Interpro_domain\" NOT NULL ORDER BY \"INFO/SiPhy_29way_logOdds_rankscore\" DESC"
```

- Query multiple Parquet files, merge INFO columns, and extract as TSV (in VCF format)
```
howard query --query="SELECT \"#CHROM\" AS \"#CHROM\", POS AS POS, '' AS ID, REF AS REF, ALT AS ALT, '' AS QUAL, '' AS FILTER, STRING_AGG(INFO, ';') AS INFO FROM 'tests/data/annotations/*.parquet' GROUP BY \"#CHROM\", POS, REF, ALT" --output=/tmp/full_annotation.tsv
howard query --query="SELECT \"#CHROM\" AS \"#CHROM\", POS AS POS, '' AS ID, REF AS REF, ALT AS ALT, '' AS QUAL, '' AS FILTER, STRING_AGG(INFO, ';') AS INFO FROM 'tests/databases/annotations/hg19/*.parquet' GROUP BY \"#CHROM\", POS, REF, ALT" --output=/tmp/full_annotation.tsv
```


Expand All @@ -128,12 +128,12 @@ Annotation is mainly based on a build-in Parquet annotation method, and tools su

- VCF annotation with Parquet and VCF databases, output as VCF format
```
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/data/annotations/dbnsfp42a.parquet,tests/data/annotations/gnomad211_genome.parquet,tests/data/annotations/cosmic70.vcf.gz'
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/hg19/dbnsfp42a.parquet,tests/databases/annotations/hg19/gnomad211_genome.parquet,tests/databases/annotations/hg19/cosmic70.vcf.gz'
```

- VCF annotation with Clinvar Parquet, Annovar refGene and snpEff databases, output as TSV format
```
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --annotations='annovar:refGene,snpeff,tests/data/annotations/clinvar_20210123.parquet'
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --annotations='annovar:refGene,snpeff,tests/databases/annotations/hg19/clinvar_20210123.parquet'
```

## Calculation
Expand Down Expand Up @@ -226,20 +226,20 @@ howard process --config=config/config.json --param=config/param.json --input=tes
},
"parquet": {
"annotations": {
"tests/data/annotations/avsnp150.parquet": {
"tests/databases/annotations/hg19/avsnp150.parquet": {
"INFO": null
},
"tests/data/annotations/dbnsfp42a.parquet": {
"tests/databases/annotations/hg19/dbnsfp42a.parquet": {
"INFO": null
},
"tests/data/annotations/gnomad211_genome.parquet": {
"tests/databases/annotations/hg19/gnomad211_genome.parquet": {
"INFO": null
}
}
},
"bcftools": {
"annotations": {
"tests/data/annotations/cosmic70.vcf.gz": {
"tests/databases/annotations/hg19/cosmic70.vcf.gz": {
"INFO": null
}
}
Expand Down
8 changes: 4 additions & 4 deletions config/param.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
},
"parquet": {
"annotations": {
"tests/data/annotations/avsnp150.parquet": {
"tests/databases/annotations/hg19/avsnp150.parquet": {
"INFO": null
},
"tests/data/annotations/dbnsfp42a.parquet": {
"tests/databases/annotations/hg19/dbnsfp42a.parquet": {
"INFO": null
},
"tests/data/annotations/gnomad211_genome.parquet": {
"tests/databases/annotations/hg19/gnomad211_genome.parquet": {
"INFO": null
}
}
},
"bcftools": {
"annotations": {
"tests/data/annotations/cosmic70.vcf.gz": {
"tests/databases/annotations/hg19/cosmic70.vcf.gz": {
"INFO": null
}
}
Expand Down
8 changes: 4 additions & 4 deletions docs/howard.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ Shared options:
Default: INFO
Usage examples:
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/data/annotations/avsnp150.parquet,tests/data/annotations/dbnsfp42a.parquet,tests/data/annotations/gnomad211_genome.parquet'
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --annotations='annovar:refGene,snpeff,tests/data/annotations/clinvar_20210123.parquet'
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/hg19/avsnp150.parquet,tests/databases/annotations/hg19/dbnsfp42a.parquet,tests/databases/annotations/hg19/gnomad211_genome.parquet'
howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --annotations='annovar:refGene,snpeff,tests/databases/annotations/hg19/clinvar_20210123.parquet'
```

# Calculation
Expand Down Expand Up @@ -397,8 +397,8 @@ Shared options:
Usage examples:
howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = 'A' AND POS < 100000"
howard query --input=tests/data/example.vcf.gz --explode_infos --query='SELECT "#CHROM", POS, REF, ALT, "INFO/DP", "INFO/CLNSIG", sample2, sample3 FROM variants WHERE "INFO/DP" >= 50 OR "INFO/CLNSIG" NOT NULL ORDER BY "INFO/DP" DESC'
howard query --query="SELECT * FROM 'tests/data/annotations/dbnsfp42a.parquet' WHERE \"INFO/Interpro_domain\" NOT NULL ORDER BY \"INFO/SiPhy_29way_logOdds_rankscore\" DESC"
howard query --query="SELECT \"#CHROM\" AS \"#CHROM\", POS AS POS, '' AS ID, REF AS REF, ALT AS ALT, '' AS QUAL, '' AS FILTER, STRING_AGG(INFO, ';') AS INFO FROM 'tests/data/annotations/*.parquet' GROUP BY \"#CHROM\", POS, REF, ALT" --output=/tmp/full_annotation.tsv
howard query --query="SELECT * FROM 'tests/databases/annotations/hg19/dbnsfp42a.parquet' WHERE \"INFO/Interpro_domain\" NOT NULL ORDER BY \"INFO/SiPhy_29way_logOdds_rankscore\" DESC"
howard query --query="SELECT \"#CHROM\" AS \"#CHROM\", POS AS POS, '' AS ID, REF AS REF, ALT AS ALT, '' AS QUAL, '' AS FILTER, STRING_AGG(INFO, ';') AS INFO FROM 'tests/databases/annotations/hg19/*.parquet' GROUP BY \"#CHROM\", POS, REF, ALT" --output=/tmp/full_annotation.tsv
```

# Stats
Expand Down
10 changes: 7 additions & 3 deletions howard/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import zipfile
import gzip
import requests
import genomepy


file_folder = os.path.dirname(__file__)
Expand Down Expand Up @@ -90,6 +89,8 @@
"arm64": "arm64"
}

LOG_FORMAT = "#[%(asctime)s] [%(levelname)s] %(message)s"

def remove_if_exists(filepaths: list) -> None:
"""
The function removes a file if it exists at the specified filepath(s).
Expand All @@ -104,7 +105,7 @@ def remove_if_exists(filepaths: list) -> None:
os.remove(filepath)


def set_log_level(verbosity: str) -> str:
def set_log_level(verbosity: str, log_file:str = None) -> str:
"""
It sets the log level of the Python logging module
Expand All @@ -120,8 +121,11 @@ def set_log_level(verbosity: str) -> str:
}
if verbosity not in configs.keys():
raise ValueError("Unknown verbosity level:" + verbosity)

log.basicConfig(
format="#[%(asctime)s] [%(levelname)s] %(message)s",
filename=log_file,
encoding='utf-8',
format=LOG_FORMAT,
datefmt="%Y-%m-%d %H:%M:%S",
level=configs[verbosity],
)
Expand Down
4 changes: 2 additions & 2 deletions howard/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def main() -> None:
#usage="howard [<shared-args>]",
epilog="Usage examples:\n"
""" howard process --input=tests/data/example.vcf.gz --output=/tmp/example.annotated.vcf.gz --param=config/param.json \n"""
""" howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/data/annotations/dbnsfp42a.parquet,tests/data/annotations/gnomad211_genome.parquet' \n"""
""" howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/hg19/dbnsfp42a.parquet,tests/databases/annotations/hg19/gnomad211_genome.parquet' \n"""
""" howard calculation --input=tests/data/example.full.vcf --output=/tmp/example.calculation.tsv --calculations='vartype' \n"""
""" howard prioritization --input=tests/data/example.vcf.gz --output=/tmp/example.prioritized.vcf.gz --prioritizations=config/prioritization_profiles.json --profiles='default,GERMLINE' \n"""
""" howard query --input=tests/data/example.vcf.gz --explode_infos --query='SELECT "#CHROM", POS, REF, ALT, "INFO/DP", "INFO/CLNSIG", sample2, sample3 FROM variants WHERE "INFO/DP" >= 50 OR "INFO/CLNSIG" NOT NULL ORDER BY "INFO/DP" DESC' \n"""
Expand Down Expand Up @@ -105,7 +105,7 @@ def main() -> None:
args.verbosity = "info"

# Logging
set_log_level(args.verbosity)
set_log_level(args.verbosity, args.log)

# Threads
if "threads" in args and args.threads:
Expand Down
2 changes: 2 additions & 0 deletions howard/tools/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,8 @@ def databases_download_genomes(assemblies: list, genome_folder: str = None, prov

log.info(f"Download Genomes {assemblies}")

import genomepy

if not genome_folder:
genome_folder = DEFAULT_GENOME_FOLDER

Expand Down
19 changes: 13 additions & 6 deletions howard/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from howard.tools.from_annovar import *



# Arguments dict
arguments = {

Expand Down Expand Up @@ -428,6 +427,14 @@
"required": False,
"default": "info"
},
"log": {
"metavar": "FILE",
"help": """Logs file\n"""
"""Format: LOG\n"""
"""Example: 'my.log'\n"""
"""Default: None""",
"default": None
},
"quiet": {
"help": argparse.SUPPRESS,
"action": "store_true"
Expand All @@ -445,7 +452,7 @@


# Shared arguments
shared_arguments = ["config", "threads", "memory", "verbosity", "quiet", "verbose", "debug"]
shared_arguments = ["config", "threads", "memory", "verbosity", "log", "quiet", "verbose", "debug"]

# Command dict
commands_arguments = {
Expand Down Expand Up @@ -482,8 +489,8 @@
"description": """Annotation is mainly based on a build-in Parquet annotation method, and tools such as BCFTOOLS, Annovar and snpEff. It uses available databases (see Annovar and snpEff) and homemade databases. Format of databases are: parquet, duckdb, vcf, bed, Annovar and snpEff (Annovar and snpEff databases are automatically downloaded, see howard databases tool). """,
"help": """Annotation of genetic variations using databases/files and tools.""",
"epilog": """Usage examples:\n"""
""" howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/data/annotations/avsnp150.parquet,tests/data/annotations/dbnsfp42a.parquet,tests/data/annotations/gnomad211_genome.parquet' \n"""
""" howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --annotations='annovar:refGene,snpeff,tests/data/annotations/clinvar_20210123.parquet' \n""",
""" howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.vcf.gz --annotations='tests/databases/annotations/hg19/avsnp150.parquet,tests/databases/annotations/hg19/dbnsfp42a.parquet,tests/databases/annotations/hg19/gnomad211_genome.parquet' \n"""
""" howard annotation --input=tests/data/example.vcf.gz --output=/tmp/example.howard.tsv --annotations='annovar:refGene,snpeff,tests/databases/annotations/hg19/clinvar_20210123.parquet' \n""",
"groups": {
"main": {
"input": True,
Expand Down Expand Up @@ -571,8 +578,8 @@
"epilog": """Usage examples:\n"""
""" howard query --input=tests/data/example.vcf.gz --query="SELECT * FROM variants WHERE REF = 'A' AND POS < 100000" \n"""
""" howard query --input=tests/data/example.vcf.gz --explode_infos --query='SELECT "#CHROM", POS, REF, ALT, "INFO/DP", "INFO/CLNSIG", sample2, sample3 FROM variants WHERE "INFO/DP" >= 50 OR "INFO/CLNSIG" NOT NULL ORDER BY "INFO/DP" DESC' \n"""
""" howard query --query="SELECT * FROM 'tests/data/annotations/dbnsfp42a.parquet' WHERE \\"INFO/Interpro_domain\\" NOT NULL ORDER BY \\"INFO/SiPhy_29way_logOdds_rankscore\\" DESC" \n"""
""" howard query --query="SELECT \\"#CHROM\\" AS \\"#CHROM\\", POS AS POS, '' AS ID, REF AS REF, ALT AS ALT, '' AS QUAL, '' AS FILTER, STRING_AGG(INFO, ';') AS INFO FROM 'tests/data/annotations/*.parquet' GROUP BY \\"#CHROM\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv \n"""
""" howard query --query="SELECT * FROM 'tests/databases/annotations/hg19/dbnsfp42a.parquet' WHERE \\"INFO/Interpro_domain\\" NOT NULL ORDER BY \\"INFO/SiPhy_29way_logOdds_rankscore\\" DESC" \n"""
""" howard query --query="SELECT \\"#CHROM\\" AS \\"#CHROM\\", POS AS POS, '' AS ID, REF AS REF, ALT AS ALT, '' AS QUAL, '' AS FILTER, STRING_AGG(INFO, ';') AS INFO FROM 'tests/databases/annotations/hg19/*.parquet' GROUP BY \\"#CHROM\\", POS, REF, ALT" --output=/tmp/full_annotation.tsv \n"""
,
"groups": {
"main": {
Expand Down
2 changes: 2 additions & 0 deletions tests/test_tools_databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,8 @@ def test_databases_download_genomes():
different assemblies and contig filters.
"""

import genomepy

# Init
assemblies_config = {
"sacCer3": {
Expand Down

0 comments on commit a2b0b77

Please sign in to comment.