Skip to content

Commit

Permalink
Merge pull request #10 from moka-guys/panelapp_gene_query
Browse files Browse the repository at this point in the history
Panelapp gene query script added (#10)
  • Loading branch information
RachelDuffin authored Jun 24, 2024
2 parents 3139013 + 31f8b84 commit c8aecaa
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
okd_qc_commands.py
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,27 @@ If run with the name of a TSO related project in DNA nexus as an argument, this
```bash tso_upload.sh 002_240216_A01229_0290_AHNL5GDMXY_TSO24006```

The resulting command will be sent to std out with just the APP_ID and MOKAGUYS_AUTH_TOKEN needing to be added to each line, this can be done using find & replace. IMPORTANT: HD200 and NTC samples (HD200 or 00000_00000 in sample name) should have their lines removed manually as these should not be uploaded.

## panelapp_gene_query.py

Generates a list of all signed-off gene/panel relations using the PanelApp API.

Confidence scores:

- Score 3 (lime green) - High level of evidence for this gene-disease association. Demonstrates confidence that this gene should be used for genome interpretation.
- Score 2 (amber) - Moderate evidence for this gene-disease association. This gene should not be used for genomic interpretation.
- Score 0 or 1 (red) - Not enough evidence for this gene-disease association. This gene should not be used for genomic interpretation.

### Usage:
```
python3 panelapp_gene_query.py
```

### Output:
CSV within the current working directory (```panelapp_gene_data.csv```). Data contains the following columns: gene_symbol, hgnc_id, panel_name, confidence_level and panel_id

### Testing:
Spotcheck of output CSV to ensure both:

- Results and confidence scores are reflected on the live PanelApp website
- All results pertain to signed-off panels
97 changes: 97 additions & 0 deletions panelapp_gene_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
'''
Created by the bioinformatics team @ Synnovis
Guy's & St. Thomas' NHS Trust
Simple script to generate CSV containing all current gene/panel relations using PanelApp API
Includes data only from signed-off panels
Stores gene_symbol, hgnc_id, panel[name], confidence_level and panel_id
Usage: python3 panelapp_gene_query.py
panelapp_gene_data.csv will be saved in the current working directory
'''

import csv
import requests

def fetch_data(url):
"""Call the API"""
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to retrieve data from the API. Status code: {response.status_code}")
return None

def check_panel_signed(panel_id, signed_panels_cache, invalid_panels_cache):
"""Check if the panel is signed off using the signed_off panels endpoint"""

# If panel_id already checked, skip check to avoid extra API calls
if panel_id in signed_panels_cache:
# Already checked and signed-off
return True
if panel_id in invalid_panels_cache:
# Already checked and not signed-off
return False

# New panel_id found, use API to check status and cache results
url = f"https://panelapp.genomicsengland.co.uk/api/v1/panels/signedoff/{panel_id}"
response = requests.get(url)
if response.status_code == 200 and "detail" not in response.json():
print(f"Panel {panel_id} is signed off. Caching as signed-off.")
signed_panels_cache.add(panel_id)
return True
else:
print(f"Panel {panel_id} is not a signed-off panel or does not exist. Caching as invalid.")
invalid_panels_cache.add(panel_id)
return False

def write_data_to_csv(data, file_path, signed_panels_cache, invalid_panels_cache):
"""Write to CSV"""
with open(file_path, mode="w", newline='') as file:
writer = csv.writer(file)
writer.writerow(["Gene Symbol", "HGNC ID", "Panel Name", "Confidence Level", "Panel ID"])
write_rows(writer, data, signed_panels_cache, invalid_panels_cache)

def write_rows(writer, data, signed_panels_cache, invalid_panels_cache):
"""Helper function to write rows in CSV"""
for result in data["results"]:
gene_data = result["gene_data"]
gene_symbol = gene_data["gene_symbol"]
hgnc_id = gene_data["hgnc_id"]
panel_name = result["panel"]["name"]
panel_id = result["panel"]["id"]
confidence_level = result["confidence_level"]

# Check if the panel is present in the signed-off panel list before adding to list
if check_panel_signed(panel_id, signed_panels_cache, invalid_panels_cache):
writer.writerow([gene_symbol, hgnc_id, panel_name, confidence_level, panel_id])

def handle_pagination(data, writer, signed_panels_cache, invalid_panels_cache):
"""Handle pagination"""
while data["next"]:
data = fetch_data(data["next"])
if data:
write_rows(writer, data, signed_panels_cache, invalid_panels_cache)
else:
break

def main():
url = "https://panelapp.genomicsengland.co.uk/api/v1/genes/"
output_file = "panelapp_gene_data.csv"
signed_panels_cache = set()
invalid_panels_cache = set()

# Initial data fetch
initial_data = fetch_data(url)
if initial_data:
write_data_to_csv(initial_data, output_file, signed_panels_cache, invalid_panels_cache)

# Handle rest of pages
with open(output_file, mode="a", newline='') as file:
writer = csv.writer(file)
handle_pagination(initial_data, writer, signed_panels_cache, invalid_panels_cache)

print(f"Gene data saved to {output_file}")

if __name__ == "__main__":
main()
37 changes: 37 additions & 0 deletions settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.envFile": "${workspaceFolder}/.venv",
"python.analysis.extraPaths": [
],
"editor.formatOnSaveMode": "file",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
}
},
"isort.args": [
"--profile",
"black"
],
"flake8.args": [
"--max-line-length=120"
],
"pylint.args": [
"--max-line-length=120"
],
"black-formatter.args": [
"--line-length",
"120"
],
"python.analysis.typeCheckingMode": "basic"
}

0 comments on commit c8aecaa

Please sign in to comment.