Skip to content

Commit

Permalink
Merge branch 'master' of github.com:saezlab/pypath
Browse files Browse the repository at this point in the history
  • Loading branch information
npalacioescat committed Feb 27, 2020
2 parents 791e820 + 52e1e50 commit 8053a6b
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 30 deletions.
152 changes: 123 additions & 29 deletions src/pypath/core/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def __init__(
df_dtype = None,
pickle_file = None,
ncbi_tax_id = 9606,
allow_loops = True,
allow_loops = None,
**kwargs
):

Expand Down Expand Up @@ -521,6 +521,7 @@ def load(
cache_files = None,
only_directions = False,
pickle_file = None,
allow_loops = None,
):
"""
Loads data from a network resource or a collection of resources.
Expand Down Expand Up @@ -553,6 +554,7 @@ def load(
'keep_raw': keep_raw,
'top_call': False,
'only_directions': only_directions,
'allow_loops': allow_loops,
}

exclude = common.to_set(exclude)
Expand Down Expand Up @@ -622,6 +624,7 @@ def load_resource(
redownload = None,
keep_raw = False,
only_directions = False,
allow_loops = None,
**kwargs
):
"""
Expand Down Expand Up @@ -664,15 +667,24 @@ def load_resource(
redownload = redownload,
keep_raw = keep_raw,
)
self._add_edge_list(only_directions = only_directions)

self.organisms_check()
self.remove_zero_degree()
allow_loops = self._allow_loops(
allow_loops = allow_loops,
resource = resource,
)

if not self.allow_loops:
self._log('Loops allowed for resource `%s`: %s' % (
resource.name,
allow_loops,
))

self.remove_loops()
self._add_edge_list(
only_directions = only_directions,
allow_loops = allow_loops,
)

self.organisms_check()
self.remove_zero_degree()

self._log(
'Completed: loading network data from '
Expand Down Expand Up @@ -1664,6 +1676,7 @@ def _add_edge_list(
edge_list = False,
regulator = False,
only_directions = False,
allow_loops = None,
):
"""
Adds edges to the network from *edge_list* obtained from file or
Expand All @@ -1684,6 +1697,8 @@ def _add_edge_list(

self._log('Adding preprocessed edge list to existing network.')

allow_loops = self._allow_loops(allow_loops = allow_loops)

if not edge_list:

if (
Expand Down Expand Up @@ -1713,11 +1728,15 @@ def _add_edge_list(

return False

edges = []
self._filtered_loops = 0

for e in edge_list:

self._add_update_edge(e, only_directions = only_directions)
self._add_update_edge(
e,
allow_loops = allow_loops,
only_directions = only_directions,
)

self._log(
'New network resource added, current number '
Expand All @@ -1727,12 +1746,19 @@ def _add_edge_list(
)
)

if not allow_loops:

self._log('Loop edges discarded: %u' % self._filtered_loops)

delattr(self, '_filtered_loops')

self.raw_data = None


def _add_update_edge(
self,
edge,
allow_loops = None,
only_directions = False,
):
"""
Expand Down Expand Up @@ -1817,6 +1843,8 @@ def _add_update_edge(
edge['attrs_node_b'],
)

allow_loops = allow_loops or self.allow_loops

refs = {refs_mod.Reference(pmid) for pmid in refs}

entity_a = entity_mod.Entity(
Expand All @@ -1839,6 +1867,11 @@ def _add_update_edge(
b = entity_b,
)

if not allow_loops and interaction.is_loop():

self._filtered_loops += 1
return

if is_directed:

interaction.add_evidence(
Expand Down Expand Up @@ -2703,31 +2736,14 @@ def remove_htp(self, threshold = 50, keep_directed = False):
)
)

interactions_per_reference = self.numof_interactions_per_reference()
interactions_by_reference = self.interactions_by_reference()

htp_refs = {
ref
for ref, cnt in iteritems(interactions_per_reference)
if cnt > threshold
}

to_remove = set()
to_remove = self.htp_interactions(
threshold = threshold,
ignore_directed = keep_directed,
)

ecount_before = self.ecount
vcount_before = self.vcount

for key, ia in iteritems(self.interactions):

if (
not ia.get_references() - htp_refs and (
not keep_directed or
not ia.is_directed()
)
):

to_remove.add(key)

for key in to_remove:

self.remove_interaction(*key)
Expand All @@ -2746,6 +2762,54 @@ def remove_htp(self, threshold = 50, keep_directed = False):
)


def htp_references(self, threshold = 50):
"""
Collects the high-throughput references i.e. the ones cited at a
higher number of interactions than ``threshold``.
"""

interactions_per_reference = self.numof_interactions_per_reference()

htp_refs = {
ref
for ref, cnt in iteritems(interactions_per_reference)
if cnt > threshold
}

self._log('High-throughput references collected: %u' % len(htp_refs))

return htp_refs


def htp_interactions(self, threshold = 50, ignore_directed = False):
"""
Collects the interactions only from high-throughput studies.
:returns:
Set of interaction keys (tuples of entities).
"""

htp_refs = self.htp_references(threshold = threshold)

htp_int = set()

for key, ia in iteritems(self.interactions):

if (
(
not ignore_directed or
not ia.is_directed()
) and
not ia.get_references() - htp_refs
):

htp_int.add(key)

self._log('High-throughput interactions collected: %u' % len(htp_int))

return htp_int


def remove_undirected(self, min_refs = None):

self._log(
Expand Down Expand Up @@ -2918,6 +2982,7 @@ def load_transcription(
exclude = None,
reread = False,
redownload = False,
allow_loops = None,
**kwargs
):

Expand All @@ -2929,6 +2994,7 @@ def load_transcription(
levels = dorothea_levels,
reread = reread,
redownload = redownload,
allow_loops = allow_loops,
)

if original_resources:
Expand All @@ -2944,6 +3010,7 @@ def load_transcription(
reread = reread,
redownload = redownload,
exclude = exclude,
allow_loops = allow_loops,
)

if make_df:
Expand All @@ -2962,6 +3029,7 @@ def transcription(
redownload = False,
make_df = False,
ncbi_tax_id = 9606,
allow_loops = None,
**kwargs
):
"""
Expand Down Expand Up @@ -3824,6 +3892,32 @@ def _add_method(cls, method_name, method, signature = None, doc = None):
)


def _allow_loops(self, allow_loops = None, resource = None):
"""
Integrates settings for the `allow_loops` parameter from the
method, instance and module level settings.
"""

default = settings.get('network_allow_loops')

return (
# from the arguments of the actual `load` call
allow_loops
if isinstance(allow_loops, bool) else
# from the current instance
self.allow_loops
if isinstance(self.allow_loops, bool) else
# interaction type specific settings from the module level
resource.networkinput.interaction_type in default
if (
isinstance(default, common.list_like) and
hasattr(resource, 'networkinput')
) else
# general settings from the module level
bool(default)
)


Network._generate_get_methods()
Network._generate_partners_methods()
Network._generate_count_methods()
Expand Down
10 changes: 9 additions & 1 deletion src/pypath/omnipath/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,15 @@ def process_interaction(self, ia):
directed = bool(ia.direction[nodes])
directed_rev = bool(ia.direction[tuple(reversed(nodes))])

if not directed and (_dir == 'b_a' or directed_rev):
if (
(
not directed and
(_dir == 'b_a' or directed_rev)
) or (
ia.is_loop() and
_dir == 'b_a'
)
):

continue

Expand Down
1 change: 1 addition & 0 deletions src/pypath/share/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
'deathdomain': 'deathdomain.tsv',
'hpmr_preprocessed': 'hpmr_preprocessed.pickle',
'network_expand_complexes': False,
'network_allow_loops': True,
'network_keep_original_names': True,
'network_pickle_cache': True,
'go_pickle_cache': True,
Expand Down

0 comments on commit 8053a6b

Please sign in to comment.