Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] filter genes by list of genes in the portal #77

Open
wants to merge 3 commits into
base: retrieve-ids
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ rule gene_json_appyter_link:
input:
script = "scripts/build-appyter-gene-links.py",
id_list = "data/inputs/gene_IDs_for_alias_tables.txt",
validate_csv = expand("data/validate/{term}.csv", term=TERM_TYPES),
output:
directory("output_pieces_gene/01-appyter")
params:
Expand All @@ -191,6 +192,7 @@ rule gene_json_appyter_lincs_geo_reverse_link:
input:
script = "scripts/build-appyter-gene-links-lincs-geo-reverse.py",
id_list = "data/inputs/gene_IDs_for_lincs_reverse_search.txt",
validate_csv = expand("data/validate/{term}.csv", term=TERM_TYPES),
output:
directory("output_pieces_gene/02-appyter-lincs-geo-reverse")
params:
Expand All @@ -207,6 +209,7 @@ rule gene_json_ucsc_genome_browser_widget:
script = "scripts/build-markdown-pieces-ucsc-genome-browser-widget.pl",
id_list = "data/inputs/gene_IDs_for_UCSC_genome_browser_widget.txt",
coord_info = "data/inputs/homo_sapiens.coords.tsv",
validate_csv = expand("data/validate/{term}.csv", term=TERM_TYPES),
output:
directory("output_pieces_gene/70-ucsc")
params:
Expand All @@ -224,6 +227,7 @@ rule gene_json_expression_widget:
input:
script = "scripts/build-markdown-pieces.py",
id_list = "data/inputs/gene_IDS_for_gtex.txt",
validate_csv = expand("data/validate/{term}.csv", term=TERM_TYPES),
output:
directory("output_pieces_gene/10-expression")
params:
Expand All @@ -240,6 +244,7 @@ rule gene_json_transcript_widget:
input:
script = "scripts/build-markdown-pieces.py",
id_list = "data/inputs/gene_IDS_for_gtex.txt",
validate_csv = expand("data/validate/{term}.csv", term=TERM_TYPES),
output:
directory("output_pieces_gene/20-transcripts")
params:
Expand All @@ -255,6 +260,7 @@ rule gene_json_lincs_widget:
input:
script = "scripts/build-markdown-pieces-MetGene.py",
id_list = "data/inputs/gene_IDs_for_MetGene.txt",
validate_csv = expand("data/validate/{term}.csv", term=TERM_TYPES),
output:
directory("output_pieces_gene/05-MetGene")
params:
Expand Down
20 changes: 7 additions & 13 deletions scripts/build-appyter-gene-links.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ def main():

# validate term
term = args.termtype
if term not in cfde_common.REF_FILES:
print(f"ERROR: unknown term type '{term}'", file=sys.stderr)
sys.exit(-1)
if term not in cfde_common.ID_FILES:
print(f"WARNING: unknown term type '{term}'", file=sys.stderr)

print(f"Running with term: {term}", file=sys.stderr)

Expand All @@ -41,10 +40,9 @@ def main():
os.mkdir(output_dir)

# validate that ID list is contained within actual IDs in database
ref_file = cfde_common.REF_FILES.get(term)
ref_file = cfde_common.ID_FILES.get(term)
if ref_file is None:
print(f"ERROR: no ref file for term. Dying terribly.", file=sys.stderr)
sys.exit(-1)
print(f"WARNING: no ref file for term.", file=sys.stderr)

# load in ref file; ID is first column
ref_id_list = set()
Expand All @@ -66,14 +64,10 @@ def main():
for line in fp:
line = line.strip()
if line:
if line in ref_id_list:
id_list.add(line)
if line not in ref_id_list:
print(f"ERROR: requested input id {line} not found in ref_id_list", file=sys.stderr)
print(f"skipping!", file=sys.stderr)
continue
#sys.exit(-1)

id_list.add(line)

print(f"WARNING: requested input id {line} not found in ref_id_list", file=sys.stderr)
print(f"Loaded {len(id_list)} IDs from {args.id_list}",
file=sys.stderr)

Expand Down
18 changes: 7 additions & 11 deletions scripts/build-markdown-pieces-MetGene.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ def main():

# validate term
term = args.termtype
if term not in cfde_common.REF_FILES:
print(f"ERROR: unknown term type '{term}'", file=sys.stderr)
sys.exit(-1)
if term not in cfde_common.ID_FILES:
print(f"WARNING: unknown term type '{term}'", file=sys.stderr)

print(f"Running with term: {term}", file=sys.stderr)

Expand All @@ -40,10 +39,9 @@ def main():
os.mkdir(output_dir)

# validate that ID list is contained within actual IDs in database
ref_file = cfde_common.REF_FILES.get(term)
ref_file = cfde_common.ID_FILES.get(term)
if ref_file is None:
print(f"ERROR: no ref file for term. Dying terribly.", file=sys.stderr)
sys.exit(-1)
print(f"WARNING: no ref file for term.", file=sys.stderr)

# load in ref file; ID is first column
ref_id_list = set()
Expand All @@ -65,13 +63,11 @@ def main():
for line in fp:
line = line.strip()
if line:
if line in ref_id_list:
id_list.add(line)
if line not in ref_id_list:
print(f"ERROR: requested input id {line} not found in ref_id_list", file=sys.stderr)
print(f"skipping!", file=sys.stderr)
continue
#sys.exit(-1)
print(f"WARNING: requested input id {line} not found in ref_id_list", file=sys.stderr)

id_list.add(line)

print(f"Loaded {len(id_list)} IDs from {args.id_list}",
file=sys.stderr)
Expand Down
19 changes: 8 additions & 11 deletions scripts/build-markdown-pieces-gene-translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ def main():

# validate term
term = args.term
if term not in cfde_common.REF_FILES:
print(f"ERROR: unknown term type '{term}'", file=sys.stderr)
sys.exit(-1)
if term not in cfde_common.ID_FILES:
print(f"WARNING: unknown term type '{term}'", file=sys.stderr)

print(f"Running with term: {term}", file=sys.stderr)

Expand All @@ -38,10 +37,9 @@ def main():
if not os.path.exists(output_dir):
os.mkdir(output_dir)

ref_file = cfde_common.REF_FILES.get(term)
ref_file = cfde_common.ID_FILES.get(term)
if ref_file is None:
print(f"ERROR: no ref file for term. Dying terribly.", file=sys.stderr)
sys.exit(-1)
print(f"WARNING: no ref file for term.", file=sys.stderr)

# load in ref file; ID is first column
ref_id_list = set()
Expand Down Expand Up @@ -139,12 +137,11 @@ def isnull(value):
for line in fp:
line = line.strip()
if line:
if line in ref_id_list:
id_list.add(line)
if line not in ref_id_list:
print(f"ERROR: requested input id {line} not found in ref_id_list", file=sys.stderr)
sys.exit(-1)

id_list.add(line)

print(f"WARNING: requested input id {line} not found in ref_id_list", file=sys.stderr)

print(f"Loaded {len(id_list)} IDs from {args.id_list}",
file=sys.stderr)

Expand Down