function added for label file loading and notebook tidied up

emrecdem · Nov 7, 2017 · 39cb1e5 · 39cb1e5
1 parent 849c3c6
commit 39cb1e5
Show file tree

Hide file tree

Showing 2 changed files with 122 additions and 160 deletions.
diff --git a/explosmile/load_iemocap.py b/explosmile/load_iemocap.py
@@ -3,6 +3,7 @@
 This model provides functionality to pre-process the iemocap dataset
 """
 import os
+import pandas as pd
 
 def returnrealfiles(filenameslist):
     """
@@ -43,6 +44,8 @@ def find_matching_label_file(wav_filename, labfiles, label_files_path):
     lab_fullpath : string
         The name of the label txt file with full path
     """
+
+    # the wav files have longer names than the label files, so first remove the end:
     wav_namepieces = wav_filename.split("/")[-1].split("_")
     matchinglabfile = wav_namepieces[0] + '_' + wav_namepieces[1] + '.txt'
     # check whether the matching label file truly exists:
@@ -55,3 +58,35 @@ def find_matching_label_file(wav_filename, labfiles, label_files_path):
     # create full path for labelfile:
     lab_fullpath =  os.path.join(label_files_path, matchinglabfile3)
     return lab_fullpath;
+
+def readlabtxt(lab_fullpath):
+    """
+    Loads txt with the Emotion labels, takes the summary labels per time frame,
+     and stores it in a pandase data.frame
+
+    Parameters
+    ------------
+    lab_fullpath : string
+        The name of the label txt file with path
+
+    Returns
+    ------------
+    labels : pandas data.frame
+        pandas data.frame with all summary labels per time frame
+    """    
+    # get all labels for all utterances in this improvisation
+    labels = pd.read_table(lab_fullpath,header=0).iloc[0::8,:] # the summary is stored in every 8th row
+    # the format of the data needs to be tidied up a bit
+    # turn rownames into column
+    labels.index.name = 'newhead'
+    labels.reset_index(inplace=True)
+    # rename the columns that are now created
+    labels = labels.rename(columns={'level_0': '[START_TIME - END_TIME]', 'level_1': 'TURN_NAME', 'level_2': 'EMOTION'})
+    # split valence, activation, and dominance:
+    labels[['V','A','D']] = labels['% [START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]'].str.split('\s',expand=True)
+    labels[['START_TIME','END_TIME']] = labels['[START_TIME - END_TIME]'].str.split(' - ',expand=True)
+    labels = labels.drop(['% [START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]','[START_TIME - END_TIME]'], axis=1)
+    # remove unwanted characters
+    for colnam in ['V','A','D', 'START_TIME','END_TIME']:
+        labels[colnam] = labels[colnam].map(lambda x: x.lstrip('[,').rstrip('],'))
+    return labels;