Skip to content

Commit

Permalink
function added for label file loading and notebook tidied up
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentvanhees committed Nov 7, 2017
1 parent 849c3c6 commit 39cb1e5
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 160 deletions.
35 changes: 35 additions & 0 deletions explosmile/load_iemocap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
This model provides functionality to pre-process the iemocap dataset
"""
import os
import pandas as pd

def returnrealfiles(filenameslist):
"""
Expand Down Expand Up @@ -43,6 +44,8 @@ def find_matching_label_file(wav_filename, labfiles, label_files_path):
lab_fullpath : string
The name of the label txt file with full path
"""

# the wav files have longer names than the label files, so first remove the end:
wav_namepieces = wav_filename.split("/")[-1].split("_")
matchinglabfile = wav_namepieces[0] + '_' + wav_namepieces[1] + '.txt'
# check whether the matching label file truly exists:
Expand All @@ -55,3 +58,35 @@ def find_matching_label_file(wav_filename, labfiles, label_files_path):
# create full path for labelfile:
lab_fullpath = os.path.join(label_files_path, matchinglabfile3)
return lab_fullpath;

def readlabtxt(lab_fullpath):
"""
Loads txt with the Emotion labels, takes the summary labels per time frame,
and stores it in a pandase data.frame
Parameters
------------
lab_fullpath : string
The name of the label txt file with path
Returns
------------
labels : pandas data.frame
pandas data.frame with all summary labels per time frame
"""
# get all labels for all utterances in this improvisation
labels = pd.read_table(lab_fullpath,header=0).iloc[0::8,:] # the summary is stored in every 8th row
# the format of the data needs to be tidied up a bit
# turn rownames into column
labels.index.name = 'newhead'
labels.reset_index(inplace=True)
# rename the columns that are now created
labels = labels.rename(columns={'level_0': '[START_TIME - END_TIME]', 'level_1': 'TURN_NAME', 'level_2': 'EMOTION'})
# split valence, activation, and dominance:
labels[['V','A','D']] = labels['% [START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]'].str.split('\s',expand=True)
labels[['START_TIME','END_TIME']] = labels['[START_TIME - END_TIME]'].str.split(' - ',expand=True)
labels = labels.drop(['% [START_TIME - END_TIME] TURN_NAME EMOTION [V, A, D]','[START_TIME - END_TIME]'], axis=1)
# remove unwanted characters
for colnam in ['V','A','D', 'START_TIME','END_TIME']:
labels[colnam] = labels[colnam].map(lambda x: x.lstrip('[,').rstrip('],'))
return labels;
Loading

0 comments on commit 39cb1e5

Please sign in to comment.