Skip to content

Commit

Permalink
Make feature branch up to date
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmymathews committed Mar 25, 2024
2 parents 9d7e218 + 3c58b19 commit 15855bb
Show file tree
Hide file tree
Showing 31 changed files with 1,293 additions and 71 deletions.
11 changes: 10 additions & 1 deletion analysis_replication/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

# Reproducible analyses

The scripts in this directory reproduce the analyses of the curated datasets, in the order they are mentioned in the article.
The scripts in this directory reproduce the analyses of the curated datasets, in almost exactly the same order that they are mentioned in the article.

These scripts were written as a record of usage of the dashboard web application, which provides the same results.

Expand All @@ -22,3 +22,12 @@ substituting the argument with the address of your local API server. (See *Setti
- These scripts just call the web API, and so they do not require Python package `spatialprofilingtoolbox`.
- You can alternatively store the API host in `api_host.txt` and omit the command-line argument above.
- The run result is here in [results.txt](results.txt).

# Figure generation

One figure is generated programmatically from published source TIFF files.
To run the figure generation script, alter the command below to reference your own database configuration file and path to unzipped Moldoveanu et al dataset.

```bash
python retrieve_example_plot.py dataset_directory/ ~/.spt_db.config
```
71 changes: 53 additions & 18 deletions analysis_replication/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
from typing import cast
import re
from itertools import chain
from urllib.request import urlopen
from urllib.parse import urlencode
from urllib.error import HTTPError
import json
from requests import get as get_request # type: ignore
from os.path import exists
from time import sleep

Expand All @@ -17,6 +15,7 @@
from numpy import mean
from numpy import log
from scipy.stats import ttest_ind # type: ignore
from sklearn.metrics import auc # type:ignore


def get_default_host(given: str | None) -> str | None:
Expand Down Expand Up @@ -219,12 +218,11 @@ def _retrieve(self, endpoint, query):
base = f'{self._get_base()}'
url = '/'.join([base, endpoint, '?' + query])
try:
with urlopen(url) as response:
content = response.read()
except HTTPError as exception:
content = get_request(url)
except Exception as exception:
print(url)
raise exception
return json.loads(content), url
return content.json(), url

def _phenotype_criteria(self, name):
if isinstance(name, dict):
Expand Down Expand Up @@ -341,15 +339,35 @@ def handle_expected_actual(expected: float, actual: float | None):
print(Colors.bold_green + padded + Colors.reset, end='')


def compute_auc(list1: list[float], list2: list[float]) -> float:
pairs = [(value, 0) for value in list1] + [(value, 1) for value in list2]
pairs.sort(key=lambda pair: pair[0])
total_labelled = sum([pair[1] for pair in pairs])
total_unlabelled = len(pairs) - total_labelled
graph_points = [(0.0, 1.0)]
true_positives = 0
true_negatives = total_unlabelled
for _, label in pairs:
if label == 1:
true_positives = true_positives + 1
else:
true_negatives = true_negatives - 1
graph_points.append((true_positives / total_labelled, true_negatives / total_unlabelled))
_auc = auc([p[0] for p in graph_points], [p[1] for p in graph_points])
_auc = max(_auc, 1 - _auc)
return _auc


def univariate_pair_compare(
list1,
list2,
expected_fold=None,
do_log_fold: bool = False,
show_pvalue=False,
list1,
list2,
expected_fold=None,
do_log_fold: bool = False,
show_pvalue=False,
show_auc=False,
):
list1 = list(filter(lambda element: not isnan(element), list1.values))
list2 = list(filter(lambda element: not isnan(element), list2.values))
list1 = list(filter(lambda element: not isnan(element) and not element==inf, list1.values))
list2 = list(filter(lambda element: not isnan(element) and not element==inf, list2.values))

mean1 = float(mean(list1))
mean2 = float(mean(list2))
Expand All @@ -368,16 +386,33 @@ def univariate_pair_compare(

if show_pvalue:
if do_log_fold:
result = ttest_ind(_list1, _list2)
result = ttest_ind(_list1, _list2, equal_var=False)
print(
' p-value (after log): ' + Colors.blue + str(result.pvalue) + Colors.reset, end=''
)
else:
result = ttest_ind(list1, list2)
result = ttest_ind(list1, list2, equal_var=False)
print(' p-value: ' + Colors.blue + str(result.pvalue) + Colors.reset, end='')

if show_auc:
_auc = compute_auc(list1, list2)
print(' AUC: ' + Colors.blue + str(_auc) + Colors.reset, end='')

print('')


def print_comparison() -> None:
pass
def get_fractions(df, column_numerator, column_denominator, cohort1, cohort2, omit_zeros=True):
fractions = df[column_numerator] / df[column_denominator]
if omit_zeros:
mask = ~ ( (df[column_numerator] == 0) | (df[column_denominator] == 0) )
total1 = sum((df['cohort'] == cohort1))
omit1 = total1 - sum((df['cohort'] == cohort1) & mask)
total2 = sum((df['cohort'] == cohort2))
omit2 = total2 - sum((df['cohort'] == cohort2) & mask)
if omit1 !=0 or omit2 !=0:
print(f'(Omitting {omit1}/{total1} from {cohort1} and {omit2}/{total2} from {cohort2}.)')
else:
mask = True
fractions1 = fractions[(df['cohort'] == cohort1) & mask]
fractions2 = fractions[(df['cohort'] == cohort2) & mask]
return fractions1, fractions2
1 change: 1 addition & 0 deletions analysis_replication/api_host.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
http://oncopathtk.org/api
2 changes: 1 addition & 1 deletion analysis_replication/breast_imc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test(host):
values2 = df[df['cohort'] == '2']['proximity, KRT14+ CK+ and KRT7+ CK+']
# handle_expected_actual(1.6216, mean2 / mean1)
# # handle_expected_actual(1.69, mean2 / mean1)
compare(values1, values2, expected_fold=1.6216, show_pvalue=True)
compare(values1, values2, expected_fold=1.6216, show_pvalue=True, show_auc=True)

df = access.proximity([KRT['14'], KRT['5']])
values2 = df[df['cohort'] == '2']['proximity, KRT14+ CK+ and KRT5+ CK+']
Expand Down
6 changes: 3 additions & 3 deletions analysis_replication/luad_imc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ def test(host):
df = access.spatial_autocorrelation('B cell')
values1 = df[df['cohort'] == '1']['spatial autocorrelation, B cell']
values2 = df[df['cohort'] == '2']['spatial autocorrelation, B cell']
compare(values1, values2, expected_fold=1.571, show_pvalue=True)
compare(values1, values2, expected_fold=1.513, show_pvalue=True)

df = access.neighborhood_enrichment(['CD163+ macrophage', 'Regulatory T cell'])
values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD163+ macrophage and Regulatory T cell']
values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD163+ macrophage and Regulatory T cell']
compare(values1, values2, expected_fold=797.46, do_log_fold=True, show_pvalue=True)
compare(values1, values2, expected_fold=467.1, do_log_fold=True, show_pvalue=True)

df = access.neighborhood_enrichment(['CD163+ macrophage', 'Endothelial cell'])
values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD163+ macrophage and Endothelial cell']
values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD163+ macrophage and Endothelial cell']
compare(values1, values2, expected_fold=9.858, do_log_fold=True, show_pvalue=True)
compare(values1, values2, expected_fold=5.336, do_log_fold=True, show_pvalue=True)

klrd1 = {'positive_markers': ['KLRD1'], 'negative_markers': []}
cd14 = {'positive_markers': ['CD14'], 'negative_markers': []}
Expand Down
116 changes: 97 additions & 19 deletions analysis_replication/melanoma_ici.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Data analysis script for one dataset."""
import sys

from numpy import mean

from accessors import DataAccessor
from accessors import get_default_host
from accessors import univariate_pair_compare as compare
Expand All @@ -11,25 +9,104 @@ def test(host):
study = 'Melanoma CyTOF ICI'
access = DataAccessor(study, host=host)

antigen_experienced_cytotoxic = {'positive_markers': ['CD8A', 'CD3', 'CD45RO'], 'negative_markers': []}
name1 = 'Cytotoxic T cell antigen-experienced'
name2 = 'Melanoma'

print('Not especially statistically significant, but matches prior finding:')
df = access.proximity([name1, name2])
prox = f'proximity, {name1} and {name2}'
print(f'Proximity of {name1} to {name2}')
values1 = df[df['cohort'] == '1'][prox]
values2 = df[df['cohort'] == '2'][prox]
compare(values1, values2, expected_fold=1.66, show_pvalue=True, show_auc=True)

df = access.proximity([name2, name1])
prox = f'proximity, {name2} and {name1}'
print(f'Proximity of {name2} to {name1}')
values1 = df[df['cohort'] == '1'][prox]
values2 = df[df['cohort'] == '2'][prox]
compare(values1, values2, expected_fold=1.34, show_pvalue=True, show_auc=True)

df = access.neighborhood_enrichment([name1, name2])
ne = f'neighborhood enrichment, {name1} and {name2}'
print(f'Neighborhood enrichment for {name1} w.r.t. {name2}.')
values1 = df[df['cohort'] == '1'][ne]
values2 = df[df['cohort'] == '2'][ne]
compare(values1, values2, expected_fold=0.75, show_pvalue=True)

df = access.neighborhood_enrichment([name2, name1])
ne = f'neighborhood enrichment, {name2} and {name1}'
print(f'Neighborhood enrichment for {name2} w.r.t. {name1}.')
values1 = df[df['cohort'] == '1'][ne]
values2 = df[df['cohort'] == '2'][ne]
compare(values1, values2, expected_fold=0.405, show_pvalue=True)

# Two phenotypes spatial
print('\nResults involving spatial metrics for 2 phenotypes (shown in both orders for reference)')
for p1, p2, fold in zip(
['Cytotoxic T cell antigen-experienced', 'T regulatory cells', 'Cytotoxic T cell antigen-experienced', 'Naive cytotoxic T cell'],
['T regulatory cells', 'Cytotoxic T cell antigen-experienced', 'Naive cytotoxic T cell', 'Cytotoxic T cell antigen-experienced'],
[0.285, 0.369, 0.299, 0.5],
):
df = access.neighborhood_enrichment([p1, p2])
ne = f'neighborhood enrichment, {p1} and {p2}'
values1 = df[df['cohort'] == '1'][ne].dropna()
values2 = df[df['cohort'] == '2'][ne].dropna()
print(f'Neighborhood enrichment for {p1} and {p2}')
compare(values1, values2, expected_fold=fold, show_pvalue=True, do_log_fold=True, show_auc=True)

for p1, p2, fold in zip(
['Naive cytotoxic T cell', 'T regulatory cells', 'Naive cytotoxic T cell', 'T helper cell antigen-experienced'],
['T regulatory cells', 'Naive cytotoxic T cell', 'T helper cell antigen-experienced', 'Naive cytotoxic T cell'],
[0.683, 0.417, 0.899, 0.400],
):
df = access.proximity([p1, p2])
prox = f'proximity, {p1} and {p2}'
print(f'Proximity of {p1} to {p2}')
values1 = df[df['cohort'] == '1'][prox]
values2 = df[df['cohort'] == '2'][prox]
compare(values1, values2, expected_fold=fold, show_pvalue=True, show_auc=True)

print('\nSingle channel or phenotype results')

def p(channel):
return {'positive_markers':[channel], 'negative_markers':[]}

CD3CD45RO = {'positive_markers': ['CD3', 'CD45RO'], 'negative_markers': []}
name = 'CD3+ CD45RO+'
fold = 0.000878
df = access.neighborhood_enrichment([CD3CD45RO, CD3CD45RO])
ne = f'neighborhood enrichment, {name} and {name}'
values1 = df[df['cohort'] == '1'][ne].dropna()
values2 = df[df['cohort'] == '2'][ne].dropna()
print(f'Neighborhood enrichment for {name} and itself')
compare(values1, values2, expected_fold=fold, show_pvalue=True, do_log_fold=True, show_auc=True)

### Channel
for channel, fold in zip(['ICOS'], [0.791]):
df = access.proximity([p(channel), p(channel)])
prox = f'proximity, {channel}+ and {channel}+'
print(f'Proximity of {channel} to {channel}')
values1 = df[df['cohort'] == '1'][prox]
values2 = df[df['cohort'] == '2'][prox]
compare(values1, values2, expected_fold=fold, show_pvalue=True, show_auc=True)

# The average value of the neighborhood enrichment score for phenotype(s) CD3+ CD45RO+ CD8A+ and
# Melanoma is 1.39 times higher in cohort 1 than in cohort 2.
df = access.neighborhood_enrichment([antigen_experienced_cytotoxic, 'Melanoma'])
values1 = df[df['cohort'] == '1']['neighborhood enrichment, CD8A+ CD3+ CD45RO+ and Melanoma']
values2 = df[df['cohort'] == '2']['neighborhood enrichment, CD8A+ CD3+ CD45RO+ and Melanoma']
# handle_expected_actual(1.234, mean1 / mean2)
# # handle_expected_actual(1.39, mean1 / mean2)
compare(values2, values1, expected_fold=1.234, do_log_fold=True)
### Phenotype
for phenotype, fold in zip(['B cells'], [0.0826]):
df = access.spatial_autocorrelation(phenotype)
autocorr = f'spatial autocorrelation, {phenotype}'
values1 = df[df['cohort'] == '1'][autocorr]
values2 = df[df['cohort'] == '2'][autocorr]
print(f'Spatial autocorrelation for {phenotype}')
compare(values1, values2, expected_fold=fold, show_pvalue=True, show_auc=True)

# On average, the fraction of cells that are CD8A+ CD3+ CD45RO+ and MKI67+ is 1.41 times higher
# in cohort 1 than in cohort 2.
proliferative = {'positive_markers': ['MKI67'], 'negative_markers': []}
df = access.counts([antigen_experienced_cytotoxic, proliferative])
fractions = df['CD8A+ CD3+ CD45RO+ and MKI67+'] / df['all cells']
fractions1 = fractions[df['cohort'] == '1']
fractions2 = fractions[df['cohort'] == '2']
compare(fractions2, fractions1, expected_fold=1.41)
for phenotype, fold in zip(['T helper cell antigen-experienced', 'Cytotoxic T cell antigen-experienced'], [12.42, 0.278]):
df = access.neighborhood_enrichment([phenotype, phenotype])
ne = f'neighborhood enrichment, {phenotype} and {phenotype}'
values1 = df[df['cohort'] == '1'][ne].dropna()
values2 = df[df['cohort'] == '2'][ne].dropna()
print(f'Neighborhood enrichment for {phenotype} and itself')
compare(values1, values2, expected_fold=fold, show_pvalue=True, show_auc=True)


if __name__=='__main__':
Expand All @@ -40,4 +117,5 @@ def test(host):
host = get_default_host(None)
if host is None:
raise RuntimeError('Could not determine API server.')
print(f'Using API server: {host}')
test(host)
Loading

0 comments on commit 15855bb

Please sign in to comment.