Skip to content
This repository has been archived by the owner on Sep 8, 2024. It is now read-only.

Commit

Permalink
Added viseme support for TTS, allowing enclosure to display visemes (#…
Browse files Browse the repository at this point in the history
…357)

* Added viseme support for TTS, allowing enclosure to display visemes as appropriate

* Enclosure versino bump
  • Loading branch information
Steve authored and aatchison committed Sep 5, 2016
1 parent 340542d commit c653c43
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 11 deletions.
8 changes: 7 additions & 1 deletion mycroft/client/enclosure/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,15 @@ def mouth_listen(self):
def mouth_smile(self):
self.client.emit(Message("enclosure.mouth.smile"))

def mouth_viseme(self, visCode):
self.client.emit(
Message("enclosure.mouth.viseme", metadata={
'code': visCode}))

def mouth_text(self, text=""):
self.client.emit(
Message("enclosure.mouth.text", metadata={'text': text}))
Message("enclosure.mouth.text", metadata={
'text': text}))

def weather_display(self, img_code, temp):
self.client.emit(
Expand Down
18 changes: 18 additions & 0 deletions mycroft/client/enclosure/mouth.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


from mycroft.util.log import getLogger
import time

__author__ = 'jdorleans'

Expand All @@ -41,6 +42,7 @@ def __init_events(self):
self.client.on('enclosure.mouth.think', self.think)
self.client.on('enclosure.mouth.listen', self.listen)
self.client.on('enclosure.mouth.smile', self.smile)
self.client.on('enclosure.mouth.viseme', self.viseme)
self.client.on('enclosure.mouth.text', self.text)

def reset(self, event=None):
Expand All @@ -58,6 +60,22 @@ def listen(self, event=None):
def smile(self, event=None):
self.writer.write("mouth.smile")

def viseme(self, event=None):
visCmds = ''
if event and event.metadata:
visCmds = event.metadata.get("code", visCmds)
# visCmds will be string of viseme codes and cumulative durations
# ex: '0:0.34,1:1.23,0:1.32,'
lisPairs = visCmds.split(",")
timeStart = time.time()
for pair in lisPairs:
vis_dur = pair.split(":")
if vis_dur[0] >= "0" and vis_dur[0] <= "6":
elap = time.time() - timeStart
self.writer.write("mouth.viseme=" + vis_dur[0])
if elap < float(vis_dur[1]):
time.sleep(float(vis_dur[1]) - elap)

def text(self, event=None):
text = ""
if event and event.metadata:
Expand Down
2 changes: 1 addition & 1 deletion mycroft/client/enclosure/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.12
0.1.13
2 changes: 1 addition & 1 deletion mycroft/client/speech/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def mute_and_speak(utterance):
try:
logger.info("Speak: " + utterance)
loop.mute()
tts.execute(utterance)
tts.execute(utterance, client)
finally:
loop.unmute()
mutex.release()
Expand Down
2 changes: 1 addition & 1 deletion mycroft/tts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, lang, voice, filename='/tmp/tts.wav'):
self.filename = filename

@abc.abstractmethod
def execute(self, sentence):
def execute(self, sentence, client):
pass


Expand Down
2 changes: 1 addition & 1 deletion mycroft/tts/espeak_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ESpeak(TTS):
def __init__(self, lang, voice):
super(ESpeak, self).__init__(lang, voice)

def execute(self, sentence):
def execute(self, sentence, client):
subprocess.call(
['espeak', '-v', self.lang + '+' + self.voice, sentence])

Expand Down
2 changes: 1 addition & 1 deletion mycroft/tts/google_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class GoogleTTS(TTS):
def __init__(self, lang, voice):
super(GoogleTTS, self).__init__(lang, voice)

def execute(self, sentence):
def execute(self, sentence, client):
tts = gTTS(text=sentence, lang=self.lang)
tts.save(self.filename)
play_wav(self.filename)
Expand Down
111 changes: 108 additions & 3 deletions mycroft/tts/mimic_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@
# You should have received a copy of the GNU General Public License
# along with Mycroft Core. If not, see <http://www.gnu.org/licenses/>.


import subprocess
from os.path import join
import re
import random
import os
import time

from mycroft import MYCROFT_ROOT_PATH
from mycroft.tts import TTS, TTSValidator
from mycroft.configuration import ConfigurationManager
from mycroft.client.enclosure.api import EnclosureAPI

__author__ = 'jdorleans'

Expand All @@ -32,13 +35,115 @@
BIN = config.get(
"mimic.path", join(MYCROFT_ROOT_PATH, 'mimic', 'bin', 'mimic'))

# Mapping based on Jeffers phoneme to viseme map, seen in table 1 from:
# http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.221.6377&rep=rep1&type=pdf
#
# Mycroft unit visemes based on images found at:
# http://www.web3.lu/wp-content/uploads/2014/09/visemes.jpg
# and mapping was created partially based on the "12 mouth shapes"
# visuals seen at:
# https://wolfpaulus.com/journal/software/lipsynchronization/
# with final viseme group to image mapping by Steve Penrod


class Mimic(TTS):

def __init__(self, lang, voice):
super(Mimic, self).__init__(lang, voice)

def execute(self, sentence):
subprocess.call([BIN, '-voice', self.voice, '-t', sentence])
def PhonemeToViseme(self, pho):
return {
# /A group
'v': '5',
'f': '5',
# /B group
'uh': '2',
'w': '2',
'uw': '2',
'er': '2',
'r': '2',
'ow': '2',
# /C group
'b': '4',
'p': '4',
'm': '4',
# /D group
'aw': '1',
# /E group
'th': '3',
'dh': '3',
# /F group
'zh': '3',
'ch': '3',
'sh': '3',
'jh': '3',
# /G group
'oy': '6',
'ao': '6',
# /Hgroup
'z': '3',
's': '3',
# /I group
'ae': '0',
'eh': '0',
'ey': '0',
'ah': '0',
'ih': '0',
'y': '0',
'iy': '0',
'aa': '0',
'ay': '0',
'ax': '0',
'hh': '0',
# /J group
'n': '3',
't': '3',
'd': '3',
'l': '3',
# /K group
'g': '3',
'ng': '3',
'k': '3',
# blank mouth
'pau': '4',
}.get(pho, '4') # 4 is default if pho not found

def execute(self, sentence, client):
enclosure = EnclosureAPI(client)

random.seed()
# blink 50% of the time before speaking (only shows up if the
# mimic TTS generation takes fairly long)
if (random.random() < 0.5):
enclosure.eyes_blink("b")

# invoke mimic, creating WAV and outputting phoneme:duration pairs
outMimic = subprocess.check_output([BIN, '-voice', self.voice, '-t',
sentence, '-psdur', "-o",
"/tmp/mimic.wav"])

# split into parts
lisPairs = outMimic.split(" ")

# covert phonemes to visemes
visCodes = ''
for pair in lisPairs:
pho_dur = pair.split(":")
if len(pho_dur) != 2:
continue
visCodes += self.PhonemeToViseme(pho_dur[0]) + ":"
visCodes += pho_dur[1] + ","

# play WAV and walk thru visemes while it plays
enclosure.mouth_viseme(visCodes)
subprocess.call(['aplay', '/tmp/mimic.wav'])

# after speaking, blink 20% of the time
if (random.random() < 0.2):
enclosure.eyes_blink("b")

# delete WAV
os.remove("/tmp/mimic.wav")


class MimicValidator(TTSValidator):
Expand Down
2 changes: 1 addition & 1 deletion mycroft/tts/remote_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, lang, voice, url, api_path):
self.url = remove_last_slash(url)
self.session = FuturesSession()

def execute(self, sentence):
def execute(self, sentence, client):
phrases = self.__get_phrases(sentence)

if len(phrases) > 0:
Expand Down
2 changes: 1 addition & 1 deletion mycroft/tts/spdsay_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class SpdSay(TTS):
def __init__(self, lang, voice):
super(SpdSay, self).__init__(lang, voice)

def execute(self, sentence):
def execute(self, sentence, client):
subprocess.call(
['spd-say', '-l', self.lang, '-t', self.voice, sentence])

Expand Down

0 comments on commit c653c43

Please sign in to comment.