Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Panelapp gene query script added #10

Merged
merged 5 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
okd_qc_commands.py
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,27 @@ If run with the name of a TSO related project in DNA nexus as an argument, this
```bash tso_upload.sh 002_240216_A01229_0290_AHNL5GDMXY_TSO24006```

The resulting command will be sent to std out with just the APP_ID and MOKAGUYS_AUTH_TOKEN needing to be added to each line, this can be done using find & replace. IMPORTANT: HD200 and NTC samples (HD200 or 00000_00000 in sample name) should have their lines removed manually as these should not be uploaded.

## panelapp_gene_query.py

Generates a list of all signed-off gene/panel relations using the PanelApp API.

Confidence scores:

- Score 3 (lime green) - High level of evidence for this gene-disease association. Demonstrates confidence that this gene should be used for genome interpretation.
- Score 2 (amber) - Moderate evidence for this gene-disease association. This gene should not be used for genomic interpretation.
- Score 0 or 1 (red) - Not enough evidence for this gene-disease association. This gene should not be used for genomic interpretation.

### Usage:
```
python3 panelapp_gene_query.py
```

### Output:
CSV within the current working directory (```panelapp_gene_data.csv```). Data contains the following columns: gene_symbol, hgnc_id, panel_name, confidence_level and panel_id

### Testing:
Spotcheck of output CSV to ensure both:

- Results and confidence scores are reflected on the live PanelApp website
- All results pertain to signed-off panels
97 changes: 97 additions & 0 deletions panelapp_gene_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
'''
Created by the bioinformatics team @ Synnovis
Guy's & St. Thomas' NHS Trust

Simple script to generate CSV containing all current gene/panel relations using PanelApp API
Includes data only from signed-off panels
Stores gene_symbol, hgnc_id, panel[name], confidence_level and panel_id

Usage: python3 panelapp_gene_query.py
panelapp_gene_data.csv will be saved in the current working directory
'''

import csv
import requests

def fetch_data(url):
"""Call the API"""
response = requests.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to retrieve data from the API. Status code: {response.status_code}")
return None

def check_panel_signed(panel_id, signed_panels_cache, invalid_panels_cache):
"""Check if the panel is signed off using the signed_off panels endpoint"""

# If panel_id already checked, skip check to avoid extra API calls
if panel_id in signed_panels_cache:
# Already checked and signed-off
return True
if panel_id in invalid_panels_cache:
# Already checked and not signed-off
return False

# New panel_id found, use API to check status and cache results
url = f"https://panelapp.genomicsengland.co.uk/api/v1/panels/signedoff/{panel_id}"
response = requests.get(url)
if response.status_code == 200 and "detail" not in response.json():
print(f"Panel {panel_id} is signed off. Caching as signed-off.")
signed_panels_cache.add(panel_id)
return True
else:
print(f"Panel {panel_id} is not a signed-off panel or does not exist. Caching as invalid.")
invalid_panels_cache.add(panel_id)
return False

def write_data_to_csv(data, file_path, signed_panels_cache, invalid_panels_cache):
"""Write to CSV"""
with open(file_path, mode="w", newline='') as file:
writer = csv.writer(file)
writer.writerow(["Gene Symbol", "HGNC ID", "Panel Name", "Confidence Level", "Panel ID"])
write_rows(writer, data, signed_panels_cache, invalid_panels_cache)

def write_rows(writer, data, signed_panels_cache, invalid_panels_cache):
"""Helper function to write rows in CSV"""
for result in data["results"]:
gene_data = result["gene_data"]
gene_symbol = gene_data["gene_symbol"]
hgnc_id = gene_data["hgnc_id"]
panel_name = result["panel"]["name"]
panel_id = result["panel"]["id"]
confidence_level = result["confidence_level"]

# Check if the panel is present in the signed-off panel list before adding to list
if check_panel_signed(panel_id, signed_panels_cache, invalid_panels_cache):
writer.writerow([gene_symbol, hgnc_id, panel_name, confidence_level, panel_id])

def handle_pagination(data, writer, signed_panels_cache, invalid_panels_cache):
"""Handle pagination"""
while data["next"]:
data = fetch_data(data["next"])
if data:
write_rows(writer, data, signed_panels_cache, invalid_panels_cache)
else:
break

def main():
url = "https://panelapp.genomicsengland.co.uk/api/v1/genes/"
output_file = "panelapp_gene_data.csv"
signed_panels_cache = set()
invalid_panels_cache = set()

# Initial data fetch
initial_data = fetch_data(url)
if initial_data:
write_data_to_csv(initial_data, output_file, signed_panels_cache, invalid_panels_cache)

# Handle rest of pages
with open(output_file, mode="a", newline='') as file:
writer = csv.writer(file)
handle_pagination(initial_data, writer, signed_panels_cache, invalid_panels_cache)

print(f"Gene data saved to {output_file}")

if __name__ == "__main__":
main()
37 changes: 37 additions & 0 deletions settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.envFile": "${workspaceFolder}/.venv",
"python.analysis.extraPaths": [
],
"editor.formatOnSaveMode": "file",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
}
},
"isort.args": [
"--profile",
"black"
],
"flake8.args": [
"--max-line-length=120"
],
"pylint.args": [
"--max-line-length=120"
],
"black-formatter.args": [
"--line-length",
"120"
],
"python.analysis.typeCheckingMode": "basic"
}