Skip to content

Commit

Permalink
stabilize the placement field order to avoid incompatible placements …
Browse files Browse the repository at this point in the history
…in qiita db due to newer pplacer version
  • Loading branch information
sjanssen2 committed Nov 25, 2024
1 parent e02c9cb commit 4a2eaad
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 2 deletions.
54 changes: 53 additions & 1 deletion qp_deblur/deblur.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,57 @@ def generate_deblur_workflow_commands(preprocessed_fp, out_dir, parameters):
return cmd


def _reorder_fields(plcmnt, obs_order_fields, EXP_ORDER_FIELDS=[
'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length',
'pendant_length']):
"""Re-orders field information in all lines for a list of placements.
Parameters
----------
plcmnt : [[float]]
The original placement as a list of lists (=lines) with multiple fields.
obs_order_fields : [str]
The current order of placement field information.
EXP_ORDER_FIELDS : [str]
Desired order placement field information.
Returns
-------
Reordered placement: [[float]]
Notes
-----
We started to store placements in qiita produced by SEPP, which
internally used pplacer.
The SEPP-bundled binary of pplacer is version v1.1.alpha13-0-g1ec7786
and returns placements in the EXP_ORDER_FIELDS order. Later versions,
specifically v1.1.alpha17 produces a different field order for
placements! We therefore cannot combine raw placements of these
different versions. Therefore, we here ensure that field order of
placements strictly adhers to what we expect.
A placement (plcmnt) is composed of a set of
potential placement positions (line) and every line
is composed of multiple fields. For example
[[-24653.717, 351337, 0.14285715, 5.000002e-07, 6.113515e-06],
[-24653.717, 351341, 0.14285715, 5.000002e-07, 6.113515e-06],
[-24653.717, 348440, 0.14285715, 5.000002e-07, 6.113515e-06],
[-24653.717, 351336, 0.14285715, 5.000002e-07, 6.113515e-06],
[-24653.717, 351353, 0.14285715, 5.000002e-07, 6.113515e-06],
[-24653.717, 351354, 0.14285715, 5.000002e-07, 6.113515e-06],
[-24653.717, 351302, 0.14285715, 5.000002e-07, 6.113515e-06]]
We iterate through all lines and re-order the fields by
a) iterating over the current index of the field: i
b) asking which position the current index i shall have in the
desired field order: EXP_ORDER_FIELDS
c) obtaining the index of the actual index
d) grep the field at this latter position
"""
return [[line[obs_order_fields.index(EXP_ORDER_FIELDS[i])]
for i in range(len(line))]
for line in plcmnt]


def generate_sepp_placements(seqs, out_dir, threads, reference_phylogeny=None,
reference_alignment=None):
"""Generates the SEPP commands
Expand Down Expand Up @@ -143,7 +194,8 @@ def generate_sepp_placements(seqs, out_dir, threads, reference_phylogeny=None,
if exists(file_placements):
with open(file_placements, 'r') as fh_placements:
plcmnts = json.loads(fh_placements.read())
return {seqlbl[0]: p['p']
obs_order_fields = plcmnts['fields']
result = {seqlbl[0]: _reorder_fields(p['p'], obs_order_fields)
for p in plcmnts['placements']
for seqlbl in p['nm']}
else:
Expand Down
40 changes: 39 additions & 1 deletion qp_deblur/tests/test_sepp.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

from qp_deblur.deblur import (generate_sepp_placements,
generate_insertion_trees,
_generate_template_rename)
_generate_template_rename,
_reorder_fields)


TESTPREFIX = 'foo'
Expand Down Expand Up @@ -297,5 +298,42 @@ def test__generate_template_rename_errors(self):
rmtree(out_dir)


class pplacerReorderTests(TestCase):
jplace = {
"tree": "",
"placements": [{
"p": [[351337, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515],
[351341, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515],
[348440, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515],
[351336, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515],
[351353, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515],
[351354, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515],
[351302, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515]
],
"nm": [["TGG...", 1]]}],
"metadata": {
"invocation": "SEPP-generated json file (sepp 2)."
},
"version": 1,
"fields": ["edge_num", "likelihood", "like_weight_ratio",
"distal_length", "pendant_length"]}

# keep the very same order
obs = _reorder_fields(jplace['placements'], jplace['fields'],
EXP_ORDER_FIELDS=[
'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length',
'pendant_length'])
self.assertEqual(obs[0],
[351337, -24653.717, 0.14285715, 5.000002E-7, 0.000006113515])

# flip edge_num with pedant_length
obs = _reorder_fields(jplace['placements'], jplace['fields'],
EXP_ORDER_FIELDS=[
'pendant_length', 'likelihood', 'like_weight_ratio',
'distal_length', 'edge_num'])
self.assertEqual(obs[0],
[0.000006113515, -24653.717, 0.14285715, 5.000002E-7, 351337])


if __name__ == '__main__':
main()

0 comments on commit 4a2eaad

Please sign in to comment.