Update base python version to 3.11 (#17)

* update base python version to 3.11 * remove deprecated universal line in open function
haddocking · Apr 18, 2024 · 1df641c · 1df641c
1 parent eaedbae
commit 1df641c
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 61 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 #==============================================================================================
-FROM python:3.8 as base
+FROM python:3.11 as base
 
 LABEL author="Rodrigo V. Honorato <[email protected]>"
 

diff --git a/libwhiscy/access.py b/libwhiscy/access.py
@@ -5,17 +5,19 @@
 def calculate_accessibility(pdb_file_name, output_file_name):
     """Calculates the SASA using freesasa.
 
-    Uses the command line interface and not the Python bindings to be able to get 
+    Uses the command line interface and not the Python bindings to be able to get
     a RSA NACCESS-format like file.
     """
-    cmd = "freesasa {} -n 20 --format=rsa --radii=naccess -o {}".format(pdb_file_name, output_file_name)
+    cmd = "freesasa {} -n 20 --format=rsa --radii=naccess -o {}".format(
+        pdb_file_name, output_file_name
+    )
     try:
         subprocess.run(cmd, shell=True)
     except:
         subprocess.check_call(cmd, shell=True)
 
 
-class ResidueSASA():
+class ResidueSASA:
     def __init__(self, chain_id, name, number, tot_rel, sd_rel, bk_rel):
         self.chain_id = chain_id
         self.name = name
@@ -28,10 +30,10 @@ def __init__(self, chain_id, name, number, tot_rel, sd_rel, bk_rel):
 def parse_rsa_file(rsa_file_name):
     """Parses a .rsa NACCESS (or freesasa) file and gets the relative SASAs"""
     residue_sasas = []
-    with open(rsa_file_name, "rU") as input_handle:
+    with open(rsa_file_name, "r") as input_handle:
         for line in input_handle:
             if line.startswith("RES"):
-                if line[13] == ' ':
+                if line[13] == " ":
                     # Avoid alternative positions
                     name = line[4:7]
                     chain_id = line[8]
@@ -50,13 +52,14 @@ def parse_rsa_file(rsa_file_name):
                     except ValueError:
                         bk_rel = -99.9
 
-                    residue_sasas.append(ResidueSASA(chain_id, name, number,
-                                                     tot_rel, sd_rel, bk_rel))
+                    residue_sasas.append(
+                        ResidueSASA(chain_id, name, number, tot_rel, sd_rel, bk_rel)
+                    )
 
     return residue_sasas
 
 
-def create_cutoff_files(rsa_file_name, pdb_code, chain_id, cutoffs, path='.'):
+def create_cutoff_files(rsa_file_name, pdb_code, chain_id, cutoffs, path="."):
     """Creates three output files depending on cutoffs of accessibility.
 
     - pdb_code.sur: rsa_file_name filtered by surface sa_pred_cutoff
@@ -67,28 +70,39 @@ def create_cutoff_files(rsa_file_name, pdb_code, chain_id, cutoffs, path='.'):
 
     # Create .sur file
     output_file_name = os.path.join(path, "{0}_{1}.sur".format(pdb_code, chain_id))
-    cutoff = cutoffs['sa_pred_cutoff']
-    with open(output_file_name, 'w') as output_handle:
+    cutoff = cutoffs["sa_pred_cutoff"]
+    with open(output_file_name, "w") as output_handle:
         for res_sasa in residue_sasas:
             if res_sasa.chain_id == chain_id:
-                if res_sasa.tot_rel >= cutoff or res_sasa.sd_rel >= cutoff or res_sasa.bk_rel >= cutoff:
+                if (
+                    res_sasa.tot_rel >= cutoff
+                    or res_sasa.sd_rel >= cutoff
+                    or res_sasa.bk_rel >= cutoff
+                ):
                     output_handle.write("{}{}".format(res_sasa.number, os.linesep))
 
     # Create .suract file
     output_file_name = os.path.join(path, "{0}_{1}.suract".format(pdb_code, chain_id))
-    cutoff = cutoffs['sa_act_cutoff']
-    with open(output_file_name, 'w') as output_handle:
+    cutoff = cutoffs["sa_act_cutoff"]
+    with open(output_file_name, "w") as output_handle:
         for res_sasa in residue_sasas:
             if res_sasa.chain_id == chain_id:
-                if res_sasa.tot_rel >= cutoff or res_sasa.sd_rel >= cutoff or res_sasa.bk_rel >= cutoff:
+                if (
+                    res_sasa.tot_rel >= cutoff
+                    or res_sasa.sd_rel >= cutoff
+                    or res_sasa.bk_rel >= cutoff
+                ):
                     output_handle.write("{}{}".format(res_sasa.number, os.linesep))
 
     # Create .lac file
     output_file_name = os.path.join(path, "{0}_{1}.lac".format(pdb_code, chain_id))
-    cutoff = cutoffs['sa_pred_cutoff']
-    with open(output_file_name, 'w') as output_handle:
+    cutoff = cutoffs["sa_pred_cutoff"]
+    with open(output_file_name, "w") as output_handle:
         for res_sasa in residue_sasas:
             if res_sasa.chain_id == chain_id:
-                if (res_sasa.tot_rel < cutoff and res_sasa.tot_rel > 0) and \
-                    res_sasa.sd_rel < cutoff and res_sasa.bk_rel < cutoff:
+                if (
+                    (res_sasa.tot_rel < cutoff and res_sasa.tot_rel > 0)
+                    and res_sasa.sd_rel < cutoff
+                    and res_sasa.bk_rel < cutoff
+                ):
                     output_handle.write("{}{}".format(res_sasa.number, os.linesep))
diff --git a/libwhiscy/hssp.py b/libwhiscy/hssp.py
@@ -126,7 +126,7 @@ def hssp_file_to_phylip(hssp_file_name, phylip_file_name, chain_id, master_seque
     line_buffer = []
     parsing_proteins = False
     prot_line_buffer = []
-    with open(hssp_file_name, "rU") as handle:
+    with open(hssp_file_name, "r") as handle:
         for line in handle:
             line = line.rstrip(os.linesep)
             if line.startswith("NCHAIN"):

diff --git a/libwhiscy/pam_calc.py b/libwhiscy/pam_calc.py
@@ -9,22 +9,22 @@
 from libwhiscy.pam_data import code, logpameigval, pameigvec, pameigvecinv
 
 
-class Distance():
-    def __init__(self, seq=0, dist=0., mat=None, expect=None):
+class Distance:
+    def __init__(self, seq=0, dist=0.0, mat=None, expect=None):
         self.seq = seq
         self.dist = dist
         if mat is None:
             self.mat = [[0 for x in range(20)] for y in range(20)]
         else:
             self.mat = mat
         if expect is None:
-            self.expect = [0. for x in range(20)]
+            self.expect = [0.0 for x in range(20)]
         else:
             self.expect = expect
 
 
 def get_pam_assemble(distance):
-    m = [[0. for x in range(20)] for y in range(20)]
+    m = [[0.0 for x in range(20)] for y in range(20)]
     disteigval = [0.0 for x in range(20)]
 
     for n in range(20):
@@ -47,12 +47,12 @@ def pam_load_sequences(alignment_file, distance_file):
     if not os.path.exists(alignment_file):
         raise Exception("Sequence file {0} does not exist".format(alignment_file))
 
-    refseq = ''
+    refseq = ""
     seqtodis = []
 
     seqnr = 0
     distances = []
-    with open(distance_file, "rU") as input_distances:
+    with open(distance_file, "r") as input_distances:
         first_line = input_distances.readline().rstrip(os.linesep)
         fields = first_line.split()
         try:
@@ -76,7 +76,9 @@ def pam_load_sequences(alignment_file, distance_file):
                     raise ValueError()
                 dist = val
             except ValueError:
-                raise Exception("Reading error in distance file {0}".format(distance_file))
+                raise Exception(
+                    "Reading error in distance file {0}".format(distance_file)
+                )
 
             m = get_pam_assemble(100 * dist)
 
@@ -87,10 +89,10 @@ def pam_load_sequences(alignment_file, distance_file):
 
             d = Distance(seq, dist, m, expect)
             distances.append(d)
-    
+
     seqlen = 0
     sequences = [[] for _ in range(seqnr)]
-    with open(alignment_file, "rU") as input_alignment:
+    with open(alignment_file, "r") as input_alignment:
         first_line = input_alignment.readline().rstrip(os.linesep)
         fields = first_line.split()
         seqlen = int(fields[1])
@@ -108,7 +110,9 @@ def pam_load_sequences(alignment_file, distance_file):
                     sequences[n].append(code[ord(c)])
 
     # Sorted in ascending order as the C++ qsort
-    sorted_distances = sorted(distances, key=lambda distance: distance.dist, reverse=False)
+    sorted_distances = sorted(
+        distances, key=lambda distance: distance.dist, reverse=False
+    )
     seqtodis = [0 for _ in range(seqnr)]
     for n in range(seqnr):
         seqtodis[sorted_distances[n].seq] = n
@@ -135,21 +139,21 @@ def pam_load_sequences(alignment_file, distance_file):
 def pam_calc_similarity(pos, seqnr, seq, dis):
     nextnr = 0
     currnr = 0
-    nextdist = 0.
-    currdist = 0.
-    lastdist = 0.
-    scores = [0. for _ in range(seqnr)]
-    distances = [0. for _ in range(seqnr)]
+    nextdist = 0.0
+    currdist = 0.0
+    lastdist = 0.0
+    scores = [0.0 for _ in range(seqnr)]
+    distances = [0.0 for _ in range(seqnr)]
     for n in range(1, seqnr):
         if seq[dis[n].seq][pos] >= 0:
             nextnr = n
             nextdist = dis[n].dist
             break
-    if n == seqnr: 
+    if n == seqnr:
         return 0, distances, scores
 
-    sim = 0.
-    totsim = 0.
+    sim = 0.0
+    totsim = 0.0
     weight = 0.5 * nextdist
     totweight = weight
 
@@ -167,20 +171,19 @@ def pam_calc_similarity(pos, seqnr, seq, dis):
                 break
         if n == (seqnr - 1):
             break
-        if isclose(currdist, lastdist): 
+        if isclose(currdist, lastdist):
             continue
-        
+
         m = dis[currnr].mat
         vcomp = seq[dis[currnr].seq][pos]
-        weight = .5 * (nextdist - lastdist)
-        # This scaling factor of 2.4 is totally arbitrary, but gives a nice range of scores. 
+        weight = 0.5 * (nextdist - lastdist)
+        # This scaling factor of 2.4 is totally arbitrary, but gives a nice range of scores.
         # Scaling does not affect the final ranking of scores whatsoever
         sim = 2.4 * (m[vref][vcomp] - dis[currnr].expect[vref])
-        
+
         totsim += weight * sim
         distances[counter] = currdist
         scores[counter] = totsim
         counter += 1
 
     return counter, distances, scores
-