Skip to content

Commit

Permalink
Update base python version to 3.11 (#17)
Browse files Browse the repository at this point in the history
* update base python version to 3.11

* remove deprecated universal line in open function
  • Loading branch information
rvhonorato authored Apr 18, 2024
1 parent eaedbae commit 1df641c
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 61 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#==============================================================================================
FROM python:3.8 as base
FROM python:3.11 as base

LABEL author="Rodrigo V. Honorato <[email protected]>"

Expand Down
50 changes: 32 additions & 18 deletions libwhiscy/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
def calculate_accessibility(pdb_file_name, output_file_name):
"""Calculates the SASA using freesasa.
Uses the command line interface and not the Python bindings to be able to get
Uses the command line interface and not the Python bindings to be able to get
a RSA NACCESS-format like file.
"""
cmd = "freesasa {} -n 20 --format=rsa --radii=naccess -o {}".format(pdb_file_name, output_file_name)
cmd = "freesasa {} -n 20 --format=rsa --radii=naccess -o {}".format(
pdb_file_name, output_file_name
)
try:
subprocess.run(cmd, shell=True)
except:
subprocess.check_call(cmd, shell=True)


class ResidueSASA():
class ResidueSASA:
def __init__(self, chain_id, name, number, tot_rel, sd_rel, bk_rel):
self.chain_id = chain_id
self.name = name
Expand All @@ -28,10 +30,10 @@ def __init__(self, chain_id, name, number, tot_rel, sd_rel, bk_rel):
def parse_rsa_file(rsa_file_name):
"""Parses a .rsa NACCESS (or freesasa) file and gets the relative SASAs"""
residue_sasas = []
with open(rsa_file_name, "rU") as input_handle:
with open(rsa_file_name, "r") as input_handle:
for line in input_handle:
if line.startswith("RES"):
if line[13] == ' ':
if line[13] == " ":
# Avoid alternative positions
name = line[4:7]
chain_id = line[8]
Expand All @@ -50,13 +52,14 @@ def parse_rsa_file(rsa_file_name):
except ValueError:
bk_rel = -99.9

residue_sasas.append(ResidueSASA(chain_id, name, number,
tot_rel, sd_rel, bk_rel))
residue_sasas.append(
ResidueSASA(chain_id, name, number, tot_rel, sd_rel, bk_rel)
)

return residue_sasas


def create_cutoff_files(rsa_file_name, pdb_code, chain_id, cutoffs, path='.'):
def create_cutoff_files(rsa_file_name, pdb_code, chain_id, cutoffs, path="."):
"""Creates three output files depending on cutoffs of accessibility.
- pdb_code.sur: rsa_file_name filtered by surface sa_pred_cutoff
Expand All @@ -67,28 +70,39 @@ def create_cutoff_files(rsa_file_name, pdb_code, chain_id, cutoffs, path='.'):

# Create .sur file
output_file_name = os.path.join(path, "{0}_{1}.sur".format(pdb_code, chain_id))
cutoff = cutoffs['sa_pred_cutoff']
with open(output_file_name, 'w') as output_handle:
cutoff = cutoffs["sa_pred_cutoff"]
with open(output_file_name, "w") as output_handle:
for res_sasa in residue_sasas:
if res_sasa.chain_id == chain_id:
if res_sasa.tot_rel >= cutoff or res_sasa.sd_rel >= cutoff or res_sasa.bk_rel >= cutoff:
if (
res_sasa.tot_rel >= cutoff
or res_sasa.sd_rel >= cutoff
or res_sasa.bk_rel >= cutoff
):
output_handle.write("{}{}".format(res_sasa.number, os.linesep))

# Create .suract file
output_file_name = os.path.join(path, "{0}_{1}.suract".format(pdb_code, chain_id))
cutoff = cutoffs['sa_act_cutoff']
with open(output_file_name, 'w') as output_handle:
cutoff = cutoffs["sa_act_cutoff"]
with open(output_file_name, "w") as output_handle:
for res_sasa in residue_sasas:
if res_sasa.chain_id == chain_id:
if res_sasa.tot_rel >= cutoff or res_sasa.sd_rel >= cutoff or res_sasa.bk_rel >= cutoff:
if (
res_sasa.tot_rel >= cutoff
or res_sasa.sd_rel >= cutoff
or res_sasa.bk_rel >= cutoff
):
output_handle.write("{}{}".format(res_sasa.number, os.linesep))

# Create .lac file
output_file_name = os.path.join(path, "{0}_{1}.lac".format(pdb_code, chain_id))
cutoff = cutoffs['sa_pred_cutoff']
with open(output_file_name, 'w') as output_handle:
cutoff = cutoffs["sa_pred_cutoff"]
with open(output_file_name, "w") as output_handle:
for res_sasa in residue_sasas:
if res_sasa.chain_id == chain_id:
if (res_sasa.tot_rel < cutoff and res_sasa.tot_rel > 0) and \
res_sasa.sd_rel < cutoff and res_sasa.bk_rel < cutoff:
if (
(res_sasa.tot_rel < cutoff and res_sasa.tot_rel > 0)
and res_sasa.sd_rel < cutoff
and res_sasa.bk_rel < cutoff
):
output_handle.write("{}{}".format(res_sasa.number, os.linesep))
2 changes: 1 addition & 1 deletion libwhiscy/hssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def hssp_file_to_phylip(hssp_file_name, phylip_file_name, chain_id, master_seque
line_buffer = []
parsing_proteins = False
prot_line_buffer = []
with open(hssp_file_name, "rU") as handle:
with open(hssp_file_name, "r") as handle:
for line in handle:
line = line.rstrip(os.linesep)
if line.startswith("NCHAIN"):
Expand Down
51 changes: 27 additions & 24 deletions libwhiscy/pam_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@
from libwhiscy.pam_data import code, logpameigval, pameigvec, pameigvecinv


class Distance():
def __init__(self, seq=0, dist=0., mat=None, expect=None):
class Distance:
def __init__(self, seq=0, dist=0.0, mat=None, expect=None):
self.seq = seq
self.dist = dist
if mat is None:
self.mat = [[0 for x in range(20)] for y in range(20)]
else:
self.mat = mat
if expect is None:
self.expect = [0. for x in range(20)]
self.expect = [0.0 for x in range(20)]
else:
self.expect = expect


def get_pam_assemble(distance):
m = [[0. for x in range(20)] for y in range(20)]
m = [[0.0 for x in range(20)] for y in range(20)]
disteigval = [0.0 for x in range(20)]

for n in range(20):
Expand All @@ -47,12 +47,12 @@ def pam_load_sequences(alignment_file, distance_file):
if not os.path.exists(alignment_file):
raise Exception("Sequence file {0} does not exist".format(alignment_file))

refseq = ''
refseq = ""
seqtodis = []

seqnr = 0
distances = []
with open(distance_file, "rU") as input_distances:
with open(distance_file, "r") as input_distances:
first_line = input_distances.readline().rstrip(os.linesep)
fields = first_line.split()
try:
Expand All @@ -76,7 +76,9 @@ def pam_load_sequences(alignment_file, distance_file):
raise ValueError()
dist = val
except ValueError:
raise Exception("Reading error in distance file {0}".format(distance_file))
raise Exception(
"Reading error in distance file {0}".format(distance_file)
)

m = get_pam_assemble(100 * dist)

Expand All @@ -87,10 +89,10 @@ def pam_load_sequences(alignment_file, distance_file):

d = Distance(seq, dist, m, expect)
distances.append(d)

seqlen = 0
sequences = [[] for _ in range(seqnr)]
with open(alignment_file, "rU") as input_alignment:
with open(alignment_file, "r") as input_alignment:
first_line = input_alignment.readline().rstrip(os.linesep)
fields = first_line.split()
seqlen = int(fields[1])
Expand All @@ -108,7 +110,9 @@ def pam_load_sequences(alignment_file, distance_file):
sequences[n].append(code[ord(c)])

# Sorted in ascending order as the C++ qsort
sorted_distances = sorted(distances, key=lambda distance: distance.dist, reverse=False)
sorted_distances = sorted(
distances, key=lambda distance: distance.dist, reverse=False
)
seqtodis = [0 for _ in range(seqnr)]
for n in range(seqnr):
seqtodis[sorted_distances[n].seq] = n
Expand All @@ -135,21 +139,21 @@ def pam_load_sequences(alignment_file, distance_file):
def pam_calc_similarity(pos, seqnr, seq, dis):
nextnr = 0
currnr = 0
nextdist = 0.
currdist = 0.
lastdist = 0.
scores = [0. for _ in range(seqnr)]
distances = [0. for _ in range(seqnr)]
nextdist = 0.0
currdist = 0.0
lastdist = 0.0
scores = [0.0 for _ in range(seqnr)]
distances = [0.0 for _ in range(seqnr)]
for n in range(1, seqnr):
if seq[dis[n].seq][pos] >= 0:
nextnr = n
nextdist = dis[n].dist
break
if n == seqnr:
if n == seqnr:
return 0, distances, scores

sim = 0.
totsim = 0.
sim = 0.0
totsim = 0.0
weight = 0.5 * nextdist
totweight = weight

Expand All @@ -167,20 +171,19 @@ def pam_calc_similarity(pos, seqnr, seq, dis):
break
if n == (seqnr - 1):
break
if isclose(currdist, lastdist):
if isclose(currdist, lastdist):
continue

m = dis[currnr].mat
vcomp = seq[dis[currnr].seq][pos]
weight = .5 * (nextdist - lastdist)
# This scaling factor of 2.4 is totally arbitrary, but gives a nice range of scores.
weight = 0.5 * (nextdist - lastdist)
# This scaling factor of 2.4 is totally arbitrary, but gives a nice range of scores.
# Scaling does not affect the final ranking of scores whatsoever
sim = 2.4 * (m[vref][vcomp] - dis[currnr].expect[vref])

totsim += weight * sim
distances[counter] = currdist
scores[counter] = totsim
counter += 1

return counter, distances, scores

Loading

0 comments on commit 1df641c

Please sign in to comment.