openalea · thomasarsouze · Jan 22, 2024 · Jan 9, 2024
diff --git a/sequence_analysis/tutorials/Code/__init__.py b/sequence_analysis/tutorials/Code/__init__.py
diff --git a/sequence_analysis/tutorials/Code/amlseq2R.py b/sequence_analysis/tutorials/Code/amlseq2R.py
@@ -0,0 +1,126 @@
+# Read .seq file and transform to an R file.
+import os
+
+def Setup(chdir=False, virtual=True):
+    """
+    Default setup and paths to data
+    """
+
+    print(os.getcwd())
+
+    if not(virtual):
+        home_dir = "D:\\"
+    else:
+        home_dir = "/media/sf_transfert"
+    base_path = os.path.join(home_dir, "devlp_shared", "AppleCultivarsTreatments")
+    data_path = base_path + os.sep + "Data"
+
+    if chdir:
+        os.chdir(base_path)
+
+    output_file = base_path + os.sep + "Data" + os.sep + "multiseq_N_APPLE_R.csv"
+
+    return base_path, data_path, output_file
+
+def ReadSequence(data_path="", data_file="multiseq_N_APPLE_unix.seq"):
+    """
+    Read sequence from file and return Sequences object + python list
+    """
+    if data_path == "":
+        base_path, data_path, output_file = Setup(chdir=False)
+
+    # Python sequence 
+    from openalea.sequence_analysis import sequences
+    pyseq1 = sequences.Sequences(data_path + os.sep + data_file)
+    pylist = []
+    dim_seq = len(pyseq1[0][0])
+
+    for i in range(len(pyseq1)):
+        seq_i = []
+        for l in range(len(pyseq1[i])):
+            seq_i += [[pyseq1[i][l][d] for d in range(dim_seq)]]
+        pylist += [seq_i]
+
+    return pyseq1, pylist
+
+def DefaultHeader():
+    """
+    Return default header for .csv
+    """
+    h = "######################################################################### \n"
+    h += "# \n"
+    h += "#  N-APPLE shoot \n"
+    h += "# \n"
+    h += "#  VARIABLE 1: treatment code (1: control, 2: treatment with 20 g N/tree dose, 3: treatment with 30 g N/h tree dose), \n"
+    h += "#  VARIABLE 2: cultivar (1: Rubinola, 2: Topaz, 3: Golden Delicious), \n"
+    h += "#  VARIABLE 3: axillary shoot type (0: no shoot, 1: short shoot, 2: medium shoot, 3: long shoot, 4: sylleptic shoot), \n"
+    h += "#  VARIABLE 4: lateral flowering (0: no lateral flowering, 1: LF in median zone, 2: LF in distal zone, 3: LF in median and distal zone = Long LF zone), \n"
+    h += "#  VARIABLE 5: terminal flowering (0: no flower cluster, 1: presence of a flower cluster). \n"
+    h += "# \n"
+    h += "#  Data : Martin Meszaros, Evelyne Costes, Jean-Baptiste Durand \n"
+    h += "# \n"
+    h += '#  read with read.csv(multiseq_N_APPLE_R.csv, header=TRUE, row.names=1, comment.char="#") \n'
+    h += "# \n"
+    h += "######################################################################### \n"
+    h += '"", treatment code, '
+    h += "cultivar, "
+    h += "axillary shoot type, " 
+    h += "lateral flowering, "
+    h += "terminal flowering, "
+    h += "sequence N. \n"    
+    return h
+
+def RestoredStatesHeader():
+    """
+    Return header for restored states
+    """
+    h = "######################################################################### \n"
+    h += "# \n"
+    h += "#  N-APPLE shoot \n"
+    h += "# \n"
+    h += "#  VARIABLE 1: restored state. \n"
+    h += "#  VARIABLE 2: axillary shoot type (0: no shoot, 1: short shoot, 2: medium shoot, 3: long shoot, 4: sylleptic shoot), \n"
+    h += "#  VARIABLE 3: lateral flowering (0: no lateral flowering, 1: LF in median zone, 2: LF in distal zone, 3: LF in median and distal zone = Long LF zone), \n"
+    h += "#  VARIABLE 4: terminal flowering (0: no flower cluster, 1: presence of a flower cluster). \n"
+    h += "# \n"
+    h += "#  Data : Martin Meszaros, Evelyne Costes, Jean-Baptiste Durand \n"
+    h += "# \n"
+    h += '#  read with read.csv(multiseq_N_APPLE_R.csv, header=TRUE, row.names=1, comment.char="#") \n'
+    h += "# \n"
+    h += "######################################################################### \n"
+    h += '"", restored state, '
+    h += "axillary shoot type, " 
+    h += "lateral flowering, "
+    h += "terminal flowering, "
+    h += "sequence N. \n"    
+    return h
+
+def WriteRSequence(pyseq1, output_file="", header=None):
+    """
+    Write sequence in a .txt file, one line per sequence index
+    """
+    if output_file == "":
+        base_path, data_path, output_file = Setup(chdir=False)
+
+    f = open(output_file, "w")
+
+    if header is None:
+        h = DefaultHeader()
+    else:
+        h = str(header)
+    f.write(h)
+    dim_seq = len(pyseq1[0][0])
+    l = 1 # line
+    for i in range(len(pyseq1)):
+        # i: sequence
+        for p in range(len(pyseq1[i])):
+            # p: position
+            f.write('"' + str(l) + '", ')
+            for v in range(dim_seq):
+                # write variables
+                f.write(str(pyseq1[i][p][v]) + ", ")
+            # write sequence id
+            f.write(str(i) + "\n")
+            l += 1
+
+    f.close()
diff --git a/sequence_analysis/tutorials/Code/matrix_plot.py b/sequence_analysis/tutorials/Code/matrix_plot.py
@@ -0,0 +1,65 @@
+# Plot transition matrices
+# Use with conda environment bnp-mrf
+
+import matplotlib.pyplot as plt
+import networkx, os
+import numpy as np
+
+import os
+import numpy as np
+
+print(os.getcwd())
+
+virtual = True
+nb_states = 5
+
+if not(virtual):
+    home_dir = "D:\\"
+else:
+    home_dir = "/mnt/transfert"
+base_path = os.path.join(home_dir, "devlp_shared", "AppleCultivarsTreatments")
+data_path = base_path + os.sep + "Data"
+os.chdir(base_path)
+
+
+matrix_file = base_path + os.sep + "Models" + os.sep + "seq1v_" + \
+    str(nb_states) + "s_L.mat"
+
+TM = np.fromfile(matrix_file)
+TM = TM.reshape(nb_states, nb_states)
+
+G = networkx.from_numpy_matrix(TM, create_using=networkx.DiGraph)
+blues = cm = plt.get_cmap('Blues')
+
+f = plt.figure(1, figsize=(9, 9))
+ax = f.add_subplot(1,1,1)
+
+#networkx.draw_circular(G, node_size=150, node_color='#1f78b4',
+                       #with_labels=True, arrows=True, 
+                       #arrow_size=140,
+                       #width=4,
+                       #edge_cmap = plt.cm.Blues)
+
+# G = networkx.star_graph(81)
+initial_nodes = np.array(list(G.nodes()))
+final_nodes = initial_nodes + 1
+
+networkx.relabel_nodes(G, dict(zip(initial_nodes, final_nodes)), copy=False)
+
+colors = [G.get_edge_data(a, b)['weight'] for (a,b) in G.edges()]    
+
+# networkx.draw_circular(G, edge_color = range(81), edge_cmap = plt.cm.Blues)
+
+options = {
+    "node_color": "#A0CBE2",
+    "edge_color": colors,
+    "width": 3,
+    "edge_cmap": plt.cm.Blues,
+    "with_labels": True,
+    "node_size": 150, 
+    "with_labels": True,
+}
+
+
+networkx.draw_circular(G, **options) 
+plt.show() 
diff --git a/sequence_analysis/tutorials/Code/python_dics2R.py b/sequence_analysis/tutorials/Code/python_dics2R.py
@@ -0,0 +1,123 @@
+# Export python dictionaries of sufficients HMSC statistics for 
+# GLMMs to an R file.
+
+import os
+
+
+def ComputeGLMStatistics(pyseq1, seg_pyseq):
+    """
+    Compute statistics for GLM(M) estimation
+    """
+    dwell = {} # indexed by state, cultivar, tree
+    transition = {} # indexed by past_state, treatment, cultivar, tree
+    observation = {} # indexed by state, treatment, cultivar, tree
+    for i in range(len(pyseq1)):
+        d = 0
+        s_past = -1 # past state
+
+        for l in range(len(pyseq1[i])):
+            va = pyseq1[i][l]
+            t = va[0] # treatment
+            c = va[1] # cultivar
+            v = seg_pyseq[i][l]
+            s = v[0]
+            if s != s_past:
+                if s_past != -1:
+                    if dwell.has_key((s_past, t, c, i+1)):
+                        dwell[s_past, t, c, i+1] += [[d, 1]]
+                        transition[s_past, t, c, i+1] += [s]
+                    else:
+                        dwell[s_past, t, c, i+1] = [[d, 1]]
+                        transition[s_past, t, c, i+1] = [s]
+                d = 1 # dwell time 
+            else:
+                d += 1
+            s_past = s
+            if observation.has_key((s, t, c, i+1)):
+                observation[s, t, c, i+1] += [v[1:]]
+            else:
+                observation[s, t, c, i+1] = [v[1:]]
+        if dwell.has_key((s_past, t, c, i+1)):
+            dwell[s_past, t, c, i+1] += [[d, 0]]
+        else:
+            dwell[s_past, t, c, i+1] = [[d, 0]]
+
+    return dwell, transition, observation
+
+def GLMStatisticsHeader():
+
+    """
+    Return header for GLM statistics
+    """
+    h = "######################################################################### \n"
+    h += "# \n"
+    h += "#  Statistics for GLM(M) estimation issued from HSMC restoration\n"
+    h += "# \n"
+    h += '#  VARIABLE 1: type of statistics ("d"well time, "t"ransition, "o"bservation). \n'
+    h += '#  VARIABLE 2: restored state (current state for "d" and "o", previous state for "t"). \n'
+    h += "#  VARIABLE 3: treatment. \n"
+    h += "#  VARIABLE 4: cultivar. \n"
+    h += "#  VARIABLE 5: value of statistics. \n"
+    h += '#  VARIABLE 6: for "d", 0 if last dwell time (censored), 1 otherwise; for "t", NA \n'
+    h += '#              for "o", id of variable. \n'
+    h += "# \n"
+    h += '#  read with read.csv(file_name.csv, header=TRUE, row.names=1, comment.char="#") \n'
+    h += "# \n"
+    h += "######################################################################### \n"
+    h += '"", type, '
+    h += "restored state, " 
+    h += "treatment, "
+    h += "cultivar, "
+    h += "sequence N., "
+    h += "value, "
+    h += "info \n"
+    return h
+
+
+def WriteGLMStatistics(dwell, transition, observation, output_file):
+    """
+    Write statistics for GLM(M) estimation
+    """
+    f = open(output_file, "w")
+    f.write(GLMStatisticsHeader())
+    l = 1 # line 
+    for (k, v) in dwell.items():
+        f.write(str(l) + ", ")
+        f.write("d, ")
+        for o in k:
+            f.write(str(o) + ", ")
+        s = ""
+        for o in v:
+            for i in o:
+                s += str(i) + ", "
+        s = s[0:-2] + "\n"
+        l += 1
+        f.write(s)
+
+    for (k, v) in transition.items():
+        f.write(str(l) + ", ")
+        f.write("t, ")
+        for o in k:
+            f.write(str(o) + ", ")
+        s = ""
+        for o in v:
+            s += str(o) + ", "
+        s = s[0:-2] + ", NA \n"
+        l += 1
+        f.write(s)
+
+    for (k, v) in observation.items():
+        # common part of all elements in current 
+        # list of objects and associated string
+        s_base = "o, " 
+        for o in k:
+            s_base += str(o) + ", "
+        for o in v:
+            for i in range(len(o)):
+                # Variables
+                s = s_base + str(o[i]) + ", " + str(i+1) + "\n"
+                f.write(str(l) + ", " + s)
+                l += 1
+
+    f.close()
+
diff --git a/sequence_analysis/tutorials/Utils/__init__.py b/sequence_analysis/tutorials/Utils/__init__.py
@@ -0,0 +1,2 @@
+from unix2dos import unix2dos
+from dos2unix import dos2unix
diff --git a/sequence_analysis/tutorials/Utils/dos2unix.py b/sequence_analysis/tutorials/Utils/dos2unix.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+"""\
+convert dos linefeeds (crlf) to unix (lf)
+usage: dos2unix.py <input> <output>
+"""
+
+"""
+__version__ = "1"  # version is needed for packaging
+
+import sys
+
+if len(sys.argv[1:]) != 2:
+  sys.exit(__doc__)
+
+content = ''
+outsize = 0
+with open(sys.argv[1], 'rb') as infile:
+  content = infile.read()
+with open(sys.argv[2], 'wb') as output:
+  for line in content.splitlines():
+    outsize += len(line) + 1
+    output.write(line + b'\n')
+
+print("Done. Stripped %s bytes." % (len(content)-outsize))
+"""
+
+def dos2unix(infile, output):
+    import sys
+
+    content = ''
+    outsize = 0
+    ifile = open(infile)
+    content = ifile.read()
+    ofile = open(output, 'wb')
+    for line in content.splitlines():
+        outsize += len(line) + 1
+        ofile.write(line + b'\n')
+
+    print("Done. Stripped %s bytes." % (len(content)-outsize))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from unix2dos import unix2dos
		from dos2unix import dos2unix