Skip to content

Commit

Permalink
full intersect of markers vs rank_genes_groups
Browse files Browse the repository at this point in the history
  • Loading branch information
ktpolanski committed Feb 29, 2024
1 parent 20d782f commit c34cae9
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
1 change: 1 addition & 0 deletions sctk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
volcano_plot,
calc_marker_stats,
filter_marker_stats,
intersect_with_rank_genes_groups,
top_markers,
test_markers,
)
Expand Down
24 changes: 22 additions & 2 deletions sctk/_markers.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def draw(
plt.show()


#this is meant to be ran on log1p normalised data
def calc_marker_stats(ad, groupby, genes=None, use_rep="raw", inplace=False, partial=False):
if ad.obs[groupby].dtype.name != "category":
raise ValueError('"%s" is not categorical' % groupby)
Expand Down Expand Up @@ -233,12 +234,31 @@ def filter_marker_stats(
["top_frac_group", "mean_diff", "frac_diff"], ascending=[True, False, False]
)
filtered["top_frac_group"] = filtered["top_frac_group"].astype("category")
filtered["top_frac_group"].cat.reorder_categories(
list(stats_df["top_frac_group"].cat.categories), inplace=True
filtered["top_frac_group"] = filtered["top_frac_group"].cat.reorder_categories(
list(stats_df["top_frac_group"].cat.categories)
)
return filtered


def intersect_with_rank_genes_groups(mks, adata, pvals_adj_thresh=0.05):
#needs sc.tl.rank_genes_groups() ran on the adata
#retrieve full rank_genes_groups results space
rgg = sc.get.rank_genes_groups_df(adata, group=None)
#subset to overexpressed markers for cluster
#pvals_adj below threshold, and positive logfoldchanges
rgg = rgg.loc[rgg['pvals_adj'] < pvals_adj_thresh, :]
rgg = rgg.loc[rgg['logfoldchanges'] > 0, :]
#get a master list of markers called by both methods
#reported as GENE_CLUSTER
sctkm = [i+"_"+j for i,j in zip(mks.index, mks['top_frac_group'])]
rggm = [i+"_"+j for i,j in zip(rgg['names'], rgg['group'])]
#which of the sctk markers are in the rank_genes_groups markers?
mask = np.isin(sctkm, rggm)
#subset and return sctk marker list
mks = mks.loc[mask, :]
return mks


def top_markers(df, top_n=5, groupby="top_frac_group"):
return df.groupby(groupby).head(top_n).index.to_list()

Expand Down

0 comments on commit c34cae9

Please sign in to comment.