From 9bcfd3782edb2f9c740e90c1342a134e038020c7 Mon Sep 17 00:00:00 2001 From: Fan Deng Date: Mon, 11 Sep 2017 12:51:51 -0700 Subject: [PATCH] Upload code for Voice API V2. --- HACKING.md | 83 ---- Makefile | 2 +- README.md | 15 +- checkpoints/check_audio.py | 50 --- checkpoints/check_cloud.py | 52 +-- checkpoints/check_wifi.py | 3 +- checkpoints/load_test.py | 185 --------- config/status-led.ini.default | 5 - config/voice-recognizer.ini.default | 30 -- requirements.txt | 4 - scripts/install-deps.sh | 7 - scripts/install-services.sh | 4 - shortcuts/check_audio.desktop | 2 +- shortcuts/check_cloud.desktop | 2 +- shortcuts/check_wifi.desktop | 2 +- src/action.py | 318 ---------------- src/actionbase.py | 75 ---- src/aiy/_apis/_speech.py | 1 + src/aiy/_drivers/_button.py | 11 +- src/aiy/_drivers/_led.py | 79 ++-- src/aiy/_drivers/_player.py | 4 +- src/aiy/_drivers/_recorder.py | 20 +- src/aiy/_drivers/_status_ui.py | 66 ++-- src/aiy/_drivers/_tts.py | 17 +- src/aiy/assistant/grpc.py | 2 - src/aiy/i18n.py | 1 - src/aiy/voicehat.py | 33 +- src/assistant_grpc_demo.py | 34 +- src/auth_helpers.py | 71 ---- src/cloudspeech_demo.py | 4 +- src/led.py | 82 ---- src/main.py | 407 -------------------- src/speech.py | 461 ----------------------- src/status-monitor.py | 77 ---- src/triggers/__init__.py | 0 src/triggers/clap.py | 52 --- src/triggers/gpio.py | 61 --- src/triggers/trigger.py | 29 -- systemd/status-led-off.service | 12 - systemd/status-led-on.service | 12 - systemd/status-led.service | 16 - systemd/status-monitor.service | 16 - systemd/voice-recognizer.service | 16 - tests/test_actor_base.py | 96 ----- tests/test_change_light_color.py | 63 ---- tests/test_speak_shell_command_output.py | 41 -- tests/test_speak_time.py | 59 --- 47 files changed, 180 insertions(+), 2502 deletions(-) delete mode 100644 HACKING.md delete mode 100755 checkpoints/load_test.py delete mode 100644 config/status-led.ini.default delete mode 100644 config/voice-recognizer.ini.default delete mode 100644 src/action.py delete mode 100644 src/actionbase.py delete mode 100644 src/auth_helpers.py delete mode 100644 src/led.py delete mode 100755 src/main.py delete mode 100644 src/speech.py delete mode 100755 src/status-monitor.py delete mode 100644 src/triggers/__init__.py delete mode 100644 src/triggers/clap.py delete mode 100644 src/triggers/gpio.py delete mode 100644 src/triggers/trigger.py delete mode 100644 systemd/status-led-off.service delete mode 100644 systemd/status-led-on.service delete mode 100644 systemd/status-led.service delete mode 100644 systemd/status-monitor.service delete mode 100644 systemd/voice-recognizer.service delete mode 100644 tests/test_actor_base.py delete mode 100644 tests/test_change_light_color.py delete mode 100644 tests/test_speak_shell_command_output.py delete mode 100644 tests/test_speak_time.py diff --git a/HACKING.md b/HACKING.md deleted file mode 100644 index 80a6e817..00000000 --- a/HACKING.md +++ /dev/null @@ -1,83 +0,0 @@ -# Setting up the image - -We recommend using [the images](https://aiyprojects.withgoogle.com/voice) we -provide. Those images are based on [Raspbian](https://www.raspberrypi.org/downloads/raspbian/), -with a few customizations and are tested on the Raspberry Pi 3. If you prefer -to setup Raspbian yourself, there are some manual steps you need to take. - -## Installing the dependencies - -First, make sure you have `git` installed and clone this repository in -`~/voice-recognizer-raspi`: - -```shell -sudo apt-get install git -cd -git clone https://github.com/google/aiyprojects-raspbian.git voice-recognizer-raspi -``` - -Then, install the project dependencies and setup the services: - -``` shell -cd ~/voice-recognizer-raspi -scripts/install-deps.sh -sudo scripts/install-services.sh -``` - -## Installing the Voice HAT driver and config - -To use the Voice HAT, you'll need to upgrade your kernel to 4.9, then adjust the -kernel and ALSA configuration: - -``` shell -sudo apt-get update -sudo apt-get install raspberrypi-kernel -sudo scripts/configure-driver.sh -sudo scripts/install-alsa-config.sh -sudo reboot -``` - -## Get cloud credentials - -To access the cloud services you need to register a project and generate -credentials for cloud APIs. This is documented in the -[setup instructions](https://aiyprojects.withgoogle.com/voice#users-guide-1-1--connect-to-google-cloud-platform) on the -webpage. - -# Making code changes - -If you edit the code on a different computer, you can deploy it to your -Raspberry Pi by running: - -``` shell -make deploy -``` - -To execute the script on the Raspberry Pi run, login to it and run: - -``` shell -cd ~/voice-recognizer-raspi -source env/bin/activate -python3 src/main.py -``` - -# I18N - -Strings wrapped with `_()` are marked for translation: - -``` shell -# update catalog after string changed -pygettext3 -d voice-recognizer -p po src/main.py src/action.py - -# add new language -msgmerge po/de.po po/voice-recognizer.pot -# now edit po/de.po - -# update language -msgmerge -U po/de.po po/voice-recognizer.pot -# now edit po/de.po - -# create language bundle -mkdir po/de/LC_MESSAGES/ -msgfmt po/de.po -o po/de/LC_MESSAGES/voice-recognizer.mo -``` diff --git a/Makefile b/Makefile index 8bbe7184..1171c39d 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ check: PYTHONPATH=$$PWD/src python3 -m unittest discover tests deploy_scripts: - git ls-files | rsync -avz --exclude=".*" --exclude="*.desktop" --files-from - . pi@$(PI):~/voice-recognizer-raspi + git ls-files | rsync -avz --exclude=".*" --exclude="*.desktop" --files-from - . pi@$(PI):~/AIY-voice-kit-python deploy_shortcuts: scp $(SHORTCUTS) pi@$(PI):~/Desktop diff --git a/README.md b/README.md index 3f565d09..57a68d34 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,2 @@ -This repository contains the source code for the AIYProjects "Voice Kit". See -https://aiyprojects.withgoogle.com/voice/. - -If you're using Raspbian instead of Google's provided image, read -[HACKING.md](HACKING.md) for information on getting started. - -[![Build Status](https://travis-ci.org/google/aiyprojects-raspbian.svg?branch=master)](https://travis-ci.org/google/aiyprojects-raspbian/builds) -[![Test Coverage](https://codecov.io/gh/google/aiyprojects-raspbian/branch/master/graph/badge.svg)](https://codecov.io/gh/google/aiyprojects-raspbian) - -## Troubleshooting - -The scripts in the `checkpoints` directory verify the Raspberry Pi's setup. -They can be run from the desktop shortcuts or from the terminal. +This repository contains the APIs and demo apps for the AIYProjects. See +https://aiyprojects.withgoogle.com. diff --git a/checkpoints/check_audio.py b/checkpoints/check_audio.py index 80a7710e..8d64c2b7 100755 --- a/checkpoints/check_audio.py +++ b/checkpoints/check_audio.py @@ -16,11 +16,9 @@ """Check that the voiceHAT audio input and output are both working.""" import os -import subprocess import sys import tempfile import textwrap -import time import traceback sys.path.append(os.path.realpath(os.path.join(__file__, '..', '..')) + '/src/') @@ -30,10 +28,6 @@ CARDS_PATH = '/proc/asound/cards' VOICEHAT_ID = 'googlevoicehat' -SERVICE_NAME = 'voice-recognizer' -ACTIVE_STR = 'ActiveState=active' -INACTIVE_STR = 'ActiveState=inactive' - STOP_DELAY = 1.0 TEST_SOUND_PATH = '/usr/share/sounds/alsa/Front_Center.wav' @@ -57,20 +51,6 @@ def get_sound_cards(): return cards -def is_service_active(): - """Return True if the voice-recognizer service is active.""" - output = subprocess.check_output(['systemctl', 'show', SERVICE_NAME]).decode('utf-8') - - if ACTIVE_STR in output: - return True - elif INACTIVE_STR in output: - return False - - print('WARNING: failed to parse output:') - print(output) - return False - - def ask(prompt): """Get a yes or no answer from the user.""" ans = input(prompt + ' (y/n) ') @@ -81,29 +61,6 @@ def ask(prompt): return ans[0].lower() == 'y' -def stop_service(): - """Stop the voice-recognizer so we can use the mic. - - Returns: - True if the service has been stopped. - """ - if not is_service_active(): - return False - - subprocess.check_call(['sudo', 'systemctl', 'stop', SERVICE_NAME], stdout=subprocess.PIPE) - time.sleep(STOP_DELAY) - if is_service_active(): - print('WARNING: failed to stop service, mic may not work.') - return False - - return True - - -def start_service(): - """Start the voice-recognizer again.""" - subprocess.check_call(['sudo', 'systemctl', 'start', SERVICE_NAME], stdout=subprocess.PIPE) - - def check_voicehat_present(): """Check that the voiceHAT is present.""" return any(VOICEHAT_ID in card for card in get_sound_cards().values()) @@ -112,7 +69,6 @@ def check_voicehat_present(): def check_voicehat_is_first_card(): """Check that the voiceHAT is the first card on the system.""" cards = get_sound_cards() - return 0 in cards and VOICEHAT_ID in cards[0] @@ -174,14 +130,8 @@ def do_checks(): def main(): - """Run all checks, stopping the voice-recognizer if necessary.""" - should_restart = stop_service() - do_checks() - if should_restart: - start_service() - if __name__ == '__main__': try: diff --git a/checkpoints/check_cloud.py b/checkpoints/check_cloud.py index 75927be4..906f1a5c 100755 --- a/checkpoints/check_cloud.py +++ b/checkpoints/check_cloud.py @@ -17,23 +17,30 @@ import json import os -import subprocess +import os.path +import sys import traceback -if os.path.exists('/home/pi/credentials.json'): +sys.path.append(os.path.realpath(os.path.join(__file__, '..', '..')) + '/src/') + +import aiy._apis._speech # noqa + +OLD_CREDENTIALS_FILE = os.path.expanduser('~/credentials.json') +NEW_CREDENTIALS_FILE = os.path.expanduser('~/cloud_speech.json') +if os.path.exists(OLD_CREDENTIALS_FILE): # Legacy fallback: old location of credentials. - CREDENTIALS_PATH = '/home/pi/credentials.json' + CREDENTIALS_PATH = OLD_CREDENTIALS_FILE else: - CREDENTIALS_PATH = '/home/pi/cloud_speech.json' + CREDENTIALS_PATH = NEW_CREDENTIALS_FILE -VOICE_RECOGNIZER_PATH = os.path.realpath(os.path.join(__file__, '..', '..')) -PYTHON3 = VOICE_RECOGNIZER_PATH + '/env/bin/python3' -SPEECH_PY = VOICE_RECOGNIZER_PATH + '/src/speech.py' +ROOT_PATH = os.path.realpath(os.path.join(__file__, '..', '..')) +PYTHON3 = ROOT_PATH + '/env/bin/python3' +SPEECH_PY = ROOT_PATH + '/src/aiy/_apis/_speech.py' SPEECH_PY_ENV = { - 'VIRTUAL_ENV': VOICE_RECOGNIZER_PATH + '/env', - 'PATH': VOICE_RECOGNIZER_PATH + '/env/bin:' + os.getenv('PATH'), + 'VIRTUAL_ENV': ROOT_PATH + '/env', + 'PATH': ROOT_PATH + '/env/bin:' + os.getenv('PATH'), } -TEST_AUDIO = VOICE_RECOGNIZER_PATH + '/checkpoints/test_hello.raw' +TEST_AUDIO = ROOT_PATH + '/checkpoints/test_hello.raw' RECOGNIZED_TEXT = 'hello' @@ -48,23 +55,22 @@ def check_credentials_valid(): def check_speech_reco(): - """Try to test the speech reco code from voice-recognizer-raspi.""" + """Try to test the speech recognition code from AIY APIs.""" print('Testing the Google Cloud Speech API...') - p = subprocess.Popen( # pylint: disable=invalid-name - [PYTHON3, SPEECH_PY, TEST_AUDIO], env=SPEECH_PY_ENV, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - output = p.communicate()[0].decode('utf-8') - - if p.returncode: - print('Speech recognition failed with', p.returncode) - print(output) - return False + req = aiy._apis._speech.CloudSpeechRequest(CREDENTIALS_PATH) + with open(TEST_AUDIO, 'rb') as f: + while True: + chunk = f.read(64000) + if not chunk: + break + req.add_data(chunk) + req.end_audio() + output = req.do_request() - # speech.py succeeded, check the text was recognized if RECOGNIZED_TEXT in output: return True - print('Speech recognition output not as expected:') + print('Speech recognition failed or output not as expected:') print(output) print('Expected:', RECOGNIZED_TEXT) return False @@ -96,6 +102,6 @@ def main(): try: main() input('Press Enter to close...') - except Exception: # pylint: disable=W0703 + except: # pylint: disable=bare-except traceback.print_exc() input('Press Enter to close...') diff --git a/checkpoints/check_wifi.py b/checkpoints/check_wifi.py index cb52f326..6b3f9d16 100755 --- a/checkpoints/check_wifi.py +++ b/checkpoints/check_wifi.py @@ -26,6 +26,7 @@ def check_wifi_is_configured(): """Check wpa_supplicant.conf has at least one network configured.""" output = subprocess.check_output(['sudo', 'cat', WPA_CONF_PATH]).decode('utf-8') + return 'network=' in output @@ -74,6 +75,6 @@ def main(): try: main() input('Press Enter to close...') - except Exception: # pylint: disable=W0703 + except: # pylint: disable=bare-except traceback.print_exc() input('Press Enter to close...') diff --git a/checkpoints/load_test.py b/checkpoints/load_test.py deleted file mode 100755 index 776d804f..00000000 --- a/checkpoints/load_test.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Synthetic load test simillar to running the actual app.""" - -import json -import os -import subprocess -import tempfile -import time -import traceback - -if os.path.exists('/home/pi/credentials.json'): - # Legacy fallback: old location of credentials. - CREDENTIALS_PATH = '/home/pi/credentials.json' -else: - CREDENTIALS_PATH = '/home/pi/cloud_speech.json' - -SERVICE_NAME = 'voice-recognizer' -ACTIVE_STR = 'ActiveState=active' -INACTIVE_STR = 'ActiveState=inactive' - -STOP_DELAY = 1.0 - -VOICE_RECOGNIZER_PATH = os.path.realpath(os.path.join(__file__, '..', '..')) -PYTHON3 = VOICE_RECOGNIZER_PATH + '/env/bin/python3' -AUDIO_PY = VOICE_RECOGNIZER_PATH + '/src/aiy/audio.py' -SPEECH_PY = VOICE_RECOGNIZER_PATH + '/src/speech.py' -SPEECH_PY_ENV = { - 'VIRTUAL_ENV': VOICE_RECOGNIZER_PATH + '/env', - 'PATH': VOICE_RECOGNIZER_PATH + '/env/bin:' + os.getenv('PATH'), -} -TEST_AUDIO = '/usr/share/sounds/alsa/Front_Center.wav' -LED_FIFO = '/tmp/status-led' - -RECORD_DURATION_SECONDS = '3' - - -def check_credentials_valid(): - """Check the credentials are JSON service credentials.""" - try: - obj = json.load(open(CREDENTIALS_PATH)) - except ValueError: - return False - - return 'type' in obj and obj['type'] == 'service_account' - - -def is_service_active(): - """Return True if the voice-recognizer service is active.""" - output = subprocess.check_output(['systemctl', 'show', SERVICE_NAME]).decode('utf-8') - - if ACTIVE_STR in output: - return True - elif INACTIVE_STR in output: - return False - - print('WARNING: failed to parse output:') - print(output) - return False - - -def stop_service(): - """Stop the voice-recognizer so we can use the mic. - - Returns: - True if the service has been stopped. - """ - if not is_service_active(): - return False - - subprocess.check_call(['sudo', 'systemctl', 'stop', SERVICE_NAME], stdout=subprocess.PIPE) - time.sleep(STOP_DELAY) - if is_service_active(): - print('WARNING: failed to stop service, mic may not work.') - return False - - return True - - -def start_service(): - """Start the voice-recognizer again.""" - subprocess.check_call(['sudo', 'systemctl', 'start', SERVICE_NAME], stdout=subprocess.PIPE) - - -def check_speech_reco(): - """Try to test the speech reco code from voice-recognizer-raspi.""" - p = subprocess.Popen( # pylint: disable=invalid-name - [PYTHON3, SPEECH_PY, TEST_AUDIO], env=SPEECH_PY_ENV, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - p.communicate()[0].decode('utf-8') - - return not p.returncode - - -def play_wav(): - """Play a WAV file.""" - subprocess.check_call([PYTHON3, AUDIO_PY, 'play', TEST_AUDIO], - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - - -def record_wav(): - """Record a wav file.""" - temp_file, temp_path = tempfile.mkstemp(suffix='.wav') - os.close(temp_file) - subprocess.check_call( - [PYTHON3, AUDIO_PY, 'dump', temp_path, - '-d', RECORD_DURATION_SECONDS], - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - try: - os.unlink(temp_path) - except FileNotFoundError: - pass - - -def led_status(status): - """Get the led status.""" - with open(LED_FIFO, 'w') as led: - led.write(status + '\n') - - -def run_test(): - """Start the test.""" - print('Running test forever - press Ctrl+C to stop...') - try: - while True: - print('\rrecognizing', end='') - led_status('listening') - check_speech_reco() - time.sleep(0.5) - print('\rrecording ', end='') - led_status('thinking') - record_wav() - time.sleep(0.5) - print('\rplaying ', end='') - led_status('ready') - play_wav() - time.sleep(0.5) - except KeyboardInterrupt: - led_status('power-off') - print('\nTest finished') - - -def main(): - """Run all checks and print status.""" - if not os.path.exists(CREDENTIALS_PATH): - print( - """Please follow these instructions to get Google Cloud credentials: -https://cloud.google.com/speech/docs/getting-started#set_up_your_project -and save them to""", CREDENTIALS_PATH) - return - - if not check_credentials_valid(): - print( - CREDENTIALS_PATH, """is not valid, please check that you have downloaded JSON -service credentials.""") - return - - should_restart = stop_service() - - run_test() - - if should_restart: - start_service() - - -if __name__ == '__main__': - try: - main() - input('Press Enter to close...') - except Exception: # pylint: disable=W0703 - traceback.print_exc() - input('Press Enter to close...') diff --git a/config/status-led.ini.default b/config/status-led.ini.default deleted file mode 100644 index e963a18d..00000000 --- a/config/status-led.ini.default +++ /dev/null @@ -1,5 +0,0 @@ -# Default config file for the status-led service. -# Should be installed to ~/.config/status-led.ini - -# GPIO pin (in BCM numbering) to use for the status LED (default 25) -# gpio-pin = 25 diff --git a/config/voice-recognizer.ini.default b/config/voice-recognizer.ini.default deleted file mode 100644 index 861c9691..00000000 --- a/config/voice-recognizer.ini.default +++ /dev/null @@ -1,30 +0,0 @@ -# Default config file for the voice-recognizer service. -# Should be installed to ~/.config/voice-recognizer.ini - -# Select the trigger: gpio (default), clap, ok-google. -# trigger = clap - -# Select the trigger sound: -# trigger-sound = path_to_your_sound.wav - -# Uncomment to enable the Cloud Speech API for local commands. -# cloud-speech = true - -# Uncomment to change the language. The following are supported: -# Embedded Assistant API [cloud-speech = false] (at launch) -# en-US -# Cloud Speech API with local TTS [cloud-speech = true] -# de-DE en-GB en-US es-ES fr-FR it-IT -# (This is limited by the local TTS. Cloud Speech API supports many more.) -# language = en-US - -# Path to client secrets for the Assistant API. -assistant-secrets = ~/assistant.json - -# Path to service account credentials for the Cloud Speech API. -cloud-speech-secrets = ~/cloud_speech.json - -# Uncomment to play Assistant responses for local actions. You should make -# sure that you have IFTTT applets for your actions to get the correct -# response, and also that your actions do not call say(). -# assistant-always-responds = true diff --git a/requirements.txt b/requirements.txt index 36d7918d..3f74d5a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,3 @@ google-assistant-grpc==0.0.2 grpc-google-cloud-speech-v1beta1==0.14.0 -protobuf==3.1.0 -configargparse==0.11.0 -phue==0.9 -rgbxy==0.5 google-auth-oauthlib==0.1.0 diff --git a/scripts/install-deps.sh b/scripts/install-deps.sh index 1709556d..0a7e6a47 100755 --- a/scripts/install-deps.sh +++ b/scripts/install-deps.sh @@ -37,10 +37,3 @@ env/bin/pip install -r requirements.txt if [[ "$(uname -m)" == "armv7l" ]] ; then env/bin/pip install google-assistant-library==0.0.3 fi - -for config in status-led.ini voice-recognizer.ini; do - if [[ ! -f "${HOME}/.config/${config}" ]] ; then - echo "Installing ${config}" - cp "config/${config}.default" "${HOME}/.config/${config}" - fi -done diff --git a/scripts/install-services.sh b/scripts/install-services.sh index b75c2711..d03303a8 100755 --- a/scripts/install-services.sh +++ b/scripts/install-services.sh @@ -35,7 +35,3 @@ done # credentials are set up, so we explicitly enable the other services. systemctl enable alsa-init.service systemctl enable ntpdate.service -systemctl enable status-led.service -systemctl enable status-led-on.service -systemctl enable status-led-off.service -systemctl enable status-monitor.service diff --git a/shortcuts/check_audio.desktop b/shortcuts/check_audio.desktop index 4f54c918..55835330 100644 --- a/shortcuts/check_audio.desktop +++ b/shortcuts/check_audio.desktop @@ -3,5 +3,5 @@ Encoding=UTF-8 Type=Application Name=Check audio Comment=Check that the voiceHAT audio input and output are both working. -Exec=/home/pi/voice-recognizer-raspi/checkpoints/check_audio.py +Exec=$HOME/AIY-voice-kit-python/checkpoints/check_audio.py Terminal=true diff --git a/shortcuts/check_cloud.desktop b/shortcuts/check_cloud.desktop index 0d8566e6..551e854c 100644 --- a/shortcuts/check_cloud.desktop +++ b/shortcuts/check_cloud.desktop @@ -3,5 +3,5 @@ Encoding=UTF-8 Type=Application Name=Check Cloud Comment=Check that the Cloud Speech API can be used. -Exec=/home/pi/voice-recognizer-raspi/checkpoints/check_cloud.py +Exec=$HOME/AIY-voice-kit-python/checkpoints/check_cloud.py Terminal=true diff --git a/shortcuts/check_wifi.desktop b/shortcuts/check_wifi.desktop index 1c62c99c..f7e8d04b 100644 --- a/shortcuts/check_wifi.desktop +++ b/shortcuts/check_wifi.desktop @@ -3,5 +3,5 @@ Encoding=UTF-8 Type=Application Name=Check WiFi Comment=Check that the WiFi is working. -Exec=/home/pi/voice-recognizer-raspi/checkpoints/check_wifi.py +Exec=$HOME/AIY-voice-kit-python/checkpoints/check_wifi.py Terminal=true diff --git a/src/action.py b/src/action.py deleted file mode 100644 index 3886aff2..00000000 --- a/src/action.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Carry out voice commands by recognising keywords.""" - -import datetime -import logging -import subprocess - -import phue -from rgbxy import Converter - -import actionbase - -# ============================================================================= -# -# Hey, Makers! -# -# This file contains some examples of voice commands that are handled locally, -# right on your Raspberry Pi. -# -# Do you want to add a new voice command? Check out the instructions at: -# https://aiyprojects.withgoogle.com/voice/#makers-guide-3-3--create-a-new-voice-command-or-action -# (MagPi readers - watch out! You should switch to the instructions in the link -# above, since there's a mistake in the MagPi instructions.) -# -# In order to make a new voice command, you need to do two things. First, make a -# new action where it says: -# "Implement your own actions here" -# Secondly, add your new voice command to the actor near the bottom of the file, -# where it says: -# "Add your own voice commands here" -# -# ============================================================================= - -# Actions might not use the user's command. pylint: disable=unused-argument - - -# Example: Say a simple response -# ================================ -# -# This example will respond to the user by saying something. You choose what it -# says when you add the command below - look for SpeakAction at the bottom of -# the file. -# -# There are two functions: -# __init__ is called when the voice commands are configured, and stores -# information about how the action should work: -# - self.say is a function that says some text aloud. -# - self.words are the words to use as the response. -# run is called when the voice command is used. It gets the user's exact voice -# command as a parameter. - -class SpeakAction(object): - - """Says the given text via TTS.""" - - def __init__(self, say, words): - self.say = say - self.words = words - - def run(self, voice_command): - self.say(self.words) - - -# Example: Tell the current time -# ============================== -# -# This example will tell the time aloud. The to_str function will turn the time -# into helpful text (for example, "It is twenty past four."). The run function -# uses to_str say it aloud. - -class SpeakTime(object): - - """Says the current local time with TTS.""" - - def __init__(self, say): - self.say = say - - def run(self, voice_command): - time_str = self.to_str(datetime.datetime.now()) - self.say(time_str) - - def to_str(self, dt): - """Convert a datetime to a human-readable string.""" - HRS_TEXT = ['midnight', 'one', 'two', 'three', 'four', 'five', 'six', - 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve'] - MINS_TEXT = ["five", "ten", "quarter", "twenty", "twenty-five", "half"] - hour = dt.hour - minute = dt.minute - - # convert to units of five minutes to the nearest hour - minute_rounded = (minute + 2) // 5 - minute_is_inverted = minute_rounded > 6 - if minute_is_inverted: - minute_rounded = 12 - minute_rounded - hour = (hour + 1) % 24 - - # convert time from 24-hour to 12-hour - if hour > 12: - hour -= 12 - - if minute_rounded == 0: - if hour == 0: - return 'It is midnight.' - return "It is %s o'clock." % HRS_TEXT[hour] - - if minute_is_inverted: - return 'It is %s to %s.' % (MINS_TEXT[minute_rounded - 1], HRS_TEXT[hour]) - return 'It is %s past %s.' % (MINS_TEXT[minute_rounded - 1], HRS_TEXT[hour]) - - -# Example: Run a shell command and say its output -# =============================================== -# -# This example will use a shell command to work out what to say. You choose the -# shell command when you add the voice command below - look for the example -# below where it says the IP address of the Raspberry Pi. - -class SpeakShellCommandOutput(object): - - """Speaks out the output of a shell command.""" - - def __init__(self, say, shell_command, failure_text): - self.say = say - self.shell_command = shell_command - self.failure_text = failure_text - - def run(self, voice_command): - output = subprocess.check_output(self.shell_command, shell=True).strip() - if output: - self.say(output.decode('utf-8')) - elif self.failure_text: - self.say(self.failure_text) - - -# Example: Change the volume -# ========================== -# -# This example will can change the speaker volume of the Raspberry Pi. It uses -# the shell command SET_VOLUME to change the volume, and then GET_VOLUME gets -# the new volume. The example says the new volume aloud after changing the -# volume. - -class VolumeControl(object): - - """Changes the volume and says the new level.""" - - GET_VOLUME = r'amixer get Master | grep "Front Left:" | sed "s/.*\[\([0-9]\+\)%\].*/\1/"' - SET_VOLUME = 'amixer -q set Master %d%%' - - def __init__(self, say, change): - self.say = say - self.change = change - - def run(self, voice_command): - res = subprocess.check_output(VolumeControl.GET_VOLUME, shell=True).strip() - try: - logging.info("volume: %s", res) - vol = int(res) + self.change - vol = max(0, min(100, vol)) - subprocess.call(VolumeControl.SET_VOLUME % vol, shell=True) - self.say(_('Volume at %d %%.') % vol) - except (ValueError, subprocess.CalledProcessError): - logging.exception("Error using amixer to adjust volume.") - - -# Example: Repeat after me -# ======================== -# -# This example will repeat what the user said. It shows how you can access what -# the user said, and change what you do or how you respond. - -class RepeatAfterMe(object): - - """Repeats the user's command.""" - - def __init__(self, say, keyword): - self.say = say - self.keyword = keyword - - def run(self, voice_command): - # The command still has the 'repeat after me' keyword, so we need to - # remove it before saying whatever is left. - to_repeat = voice_command.replace(self.keyword, '', 1) - self.say(to_repeat) - - -# Example: Change Philips Light Color -# ==================================== -# -# This example will change the color of the named bulb to that of the -# HEX RGB color and respond with 'ok' -# -# actor.add_keyword(_('change to ocean blue'), \ -# ChangeLightColor(say, "philips-hue", "Lounge Lamp", "0077be")) - -class ChangeLightColor(object): - - """Change a Philips Hue bulb color.""" - - def __init__(self, say, bridge_address, bulb_name, hex_color): - self.converter = Converter() - self.say = say - self.hex_color = hex_color - self.bulb_name = bulb_name - self.bridge_address = bridge_address - - def run(self): - bridge = self.find_bridge() - if bridge: - light = bridge.get_light_objects("name")[self.bulb_name] - light.on = True - light.xy = self.converter.hex_to_xy(self.hex_color) - self.say(_("Ok")) - - def find_bridge(self): - try: - bridge = phue.Bridge(self.bridge_address) - bridge.connect() - return bridge - except phue.PhueRegistrationException: - logging.info("hue: No bridge registered, press button on bridge and try again") - self.say(_("No bridge registered, press button on bridge and try again")) - - -# Power: Shutdown or reboot the pi -# ================================ -# Shuts down the pi or reboots with a response -# - -class PowerCommand(object): - """Shutdown or reboot the pi""" - - def __init__(self, say, command): - self.say = say - self.command = command - - def run(self, voice_command): - if self.command == "shutdown": - self.say("Shutting down, goodbye") - subprocess.call("sudo shutdown now", shell=True) - elif self.command == "reboot": - self.say("Rebooting") - subprocess.call("sudo shutdown -r now", shell=True) - else: - logging.error("Error identifying power command.") - self.say("Sorry I didn't identify that command") - -# ========================================= -# Makers! Implement your own actions here. -# ========================================= - - -def make_actor(say): - """Create an actor to carry out the user's commands.""" - - actor = actionbase.Actor() - - actor.add_keyword( - _('ip address'), SpeakShellCommandOutput( - say, "ip -4 route get 1 | head -1 | cut -d' ' -f8", - _('I do not have an ip address assigned to me.'))) - - actor.add_keyword(_('volume up'), VolumeControl(say, 10)) - actor.add_keyword(_('volume down'), VolumeControl(say, -10)) - actor.add_keyword(_('max volume'), VolumeControl(say, 100)) - - actor.add_keyword(_('repeat after me'), - RepeatAfterMe(say, _('repeat after me'))) - - # ========================================= - # Makers! Add your own voice commands here. - # ========================================= - - actor.add_keyword(_('raspberry power off'), PowerCommand(say, 'shutdown')) - actor.add_keyword(_('raspberry reboot'), PowerCommand(say, 'reboot')) - - return actor - - -def add_commands_just_for_cloud_speech_api(actor, say): - """Add simple commands that are only used with the Cloud Speech API.""" - def simple_command(keyword, response): - actor.add_keyword(keyword, SpeakAction(say, response)) - - simple_command('alexa', _("We've been friends since we were both starter projects")) - simple_command( - 'beatbox', - 'pv zk pv pv zk pv zk kz zk pv pv pv zk pv zk zk pzk pzk pvzkpkzvpvzk kkkkkk bsch') - simple_command(_('clap'), _('clap clap')) - simple_command('google home', _('She taught me everything I know.')) - simple_command(_('hello'), _('hello to you too')) - simple_command(_('tell me a joke'), - _('What do you call an alligator in a vest? An investigator.')) - simple_command(_('three laws of robotics'), - _("""The laws of robotics are -0: A robot may not injure a human being or, through inaction, allow a human -being to come to harm. -1: A robot must obey orders given it by human beings except where such orders -would conflict with the First Law. -2: A robot must protect its own existence as long as such protection does not -conflict with the First or Second Law.""")) - simple_command(_('where are you from'), _("A galaxy far, far, just kidding. I'm from Seattle.")) - simple_command(_('your name'), _('A machine has no name')) - - actor.add_keyword(_('time'), SpeakTime(say)) diff --git a/src/actionbase.py b/src/actionbase.py deleted file mode 100644 index 26ef665e..00000000 --- a/src/actionbase.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Handle voice commands locally. - -This code lets you link keywords to actions. The actions are declared in -action.py. -""" - - -class Actor(object): - - """Passes commands on to a list of action handlers.""" - - def __init__(self): - self.handlers = [] - - def add_keyword(self, keyword, action): - self.handlers.append(KeywordHandler(keyword, action)) - - def get_phrases(self): - """Get a list of all phrases that are expected by the handlers.""" - return [phrase for h in self.handlers for phrase in h.get_phrases()] - - def can_handle(self, command): - """Check if command is handled without running the handlers. - - Returns True if the command would be handled.""" - - for handler in self.handlers: - if handler.can_handle(command): - return True - return False - - def handle(self, command): - """Pass command to handlers, stopping after one has handled the command. - - Returns True if the command was handled.""" - - for handler in self.handlers: - if handler.handle(command): - return True - return False - - -class KeywordHandler(object): - - """Perform the action when the given keyword is in the command.""" - - def __init__(self, keyword, action): - self.keyword = keyword.lower() - self.action = action - - def get_phrases(self): - return [self.keyword] - - def can_handle(self, command): - return self.keyword in command.lower() - - def handle(self, command): - if self.can_handle(command): - self.action.run(command) - return True - return False diff --git a/src/aiy/_apis/_speech.py b/src/aiy/_apis/_speech.py index 329cba69..375af163 100644 --- a/src/aiy/_apis/_speech.py +++ b/src/aiy/_apis/_speech.py @@ -439,6 +439,7 @@ def _log_audio_out(self, frames): response_wav.writeframes(frames) response_wav.close() + if __name__ == '__main__': logging.basicConfig(level=logging.INFO) diff --git a/src/aiy/_drivers/_button.py b/src/aiy/_drivers/_button.py index 68469e9f..1d556992 100644 --- a/src/aiy/_drivers/_button.py +++ b/src/aiy/_drivers/_button.py @@ -15,7 +15,6 @@ """Button driver for the VoiceHat.""" import time - import RPi.GPIO as GPIO @@ -40,7 +39,6 @@ def __init__(self, GPIO.PUD_UP. debounce_time: the time used in debouncing the button in seconds. """ - if polarity not in [GPIO.FALLING, GPIO.RISING]: raise ValueError( 'polarity must be one of: GPIO.FALLING or GPIO.RISING') @@ -55,8 +53,11 @@ def __init__(self, self.callback = None + def __del__(self): + GPIO.cleanup(self.channel) + def wait_for_press(self): - """Waits for the button to be pressed. + """Wait for the button to be pressed. This method blocks until the button is pressed. """ @@ -68,7 +69,7 @@ def wait_for_press(self): time.sleep(0.02) def on_press(self, callback): - """Calls the callback whenever the button is pressed. + """Call the callback whenever the button is pressed. Args: callback: a function to call whenever the button is pressed. It should @@ -91,7 +92,7 @@ def _debounce_and_callback(self, _): self.callback() def _debounce(self): - """Debounces the GPIO signal. + """Debounce the GPIO signal. Check that the input holds the expected value for the debounce period, to avoid false trigger on short pulses. diff --git a/src/aiy/_drivers/_led.py b/src/aiy/_drivers/_led.py index c7bc9bae..44951723 100644 --- a/src/aiy/_drivers/_led.py +++ b/src/aiy/_drivers/_led.py @@ -15,22 +15,20 @@ """LED driver for the VoiceHat.""" import itertools -import os import threading import time - import RPi.GPIO as GPIO class LED: """Starts a background thread to show patterns with the LED. - Simple usage: - my_led = LED(channel = 25) - my_led.start() - my_led.set_state(LED.BEACON) - my_led.stop() - """ + Simple usage: + my_led = LED(channel = 25) + my_led.start() + my_led.set_state(LED.BEACON) + my_led.stop() + """ OFF = 0 ON = 1 @@ -43,48 +41,50 @@ class LED: PULSE_QUICK = 8 def __init__(self, channel): - self.animator = threading.Thread(target=self._animate) + self.animator = threading.Thread(target=self._animate, daemon=True) self.channel = channel self.iterator = None self.running = False self.state = None self.sleep = 0 - GPIO.setmode(GPIO.BCM) GPIO.setup(channel, GPIO.OUT) self.pwm = GPIO.PWM(channel, 100) - self.lock = threading.Lock() + def __del__(self): + self.stop() + GPIO.cleanup(self.channel) + def start(self): - """Starts the LED driver.""" - with self.lock: + """Start the LED driver.""" + with self.lock: # pylint: disable=E1129 if not self.running: self.running = True self.pwm.start(0) # off by default self.animator.start() def stop(self): - """Stops the LED driver and sets the LED to off.""" - with self.lock: + """Stop the LED driver and sets the LED to off.""" + with self.lock: # pylint: disable=E1129 if self.running: self.running = False self.animator.join() self.pwm.stop() def set_state(self, state): - """Sets the LED driver's new state. + """Set the LED driver's new state. - Note the LED driver must be started for this to have any effect. - """ - with self.lock: + Note the LED driver must be started for this to have any effect. + """ + with self.lock: # pylint: disable=E1129 self.state = state def _animate(self): while True: state = None running = False - with self.lock: + with self.lock: # pylint: disable=E1129 state = self.state self.state = None running = self.running @@ -103,42 +103,45 @@ def _animate(self): def _parse_state(self, state): self.iterator = None self.sleep = 0.0 + handled = False + if state == self.OFF: self.pwm.ChangeDutyCycle(0) - return True - if state == self.ON: + handled = True + elif state == self.ON: self.pwm.ChangeDutyCycle(100) - return True - if state == self.BLINK: + handled = True + elif state == self.BLINK: self.iterator = itertools.cycle([0, 100]) self.sleep = 0.5 - return True - if state == self.BLINK_3: + handled = True + elif state == self.BLINK_3: self.iterator = itertools.cycle([0, 100] * 3 + [0, 0]) self.sleep = 0.25 - return True - if state == self.BEACON: + handled = True + elif state == self.BEACON: self.iterator = itertools.cycle( itertools.chain([30] * 100, [100] * 8, range(100, 30, -5))) self.sleep = 0.05 - return True - if state == self.BEACON_DARK: + handled = True + elif state == self.BEACON_DARK: self.iterator = itertools.cycle( itertools.chain([0] * 100, range(0, 30, 3), range(30, 0, -3))) self.sleep = 0.05 - return True - if state == self.DECAY: + handled = True + elif state == self.DECAY: self.iterator = itertools.cycle(range(100, 0, -2)) self.sleep = 0.05 - return True - if state == self.PULSE_SLOW: + handled = True + elif state == self.PULSE_SLOW: self.iterator = itertools.cycle( itertools.chain(range(0, 100, 2), range(100, 0, -2))) self.sleep = 0.1 - return True - if state == self.PULSE_QUICK: + handled = True + elif state == self.PULSE_QUICK: self.iterator = itertools.cycle( itertools.chain(range(0, 100, 5), range(100, 0, -5))) self.sleep = 0.05 - return True - return False + handled = True + + return handled diff --git a/src/aiy/_drivers/_player.py b/src/aiy/_drivers/_player.py index d0a83b82..cc1dc623 100644 --- a/src/aiy/_drivers/_player.py +++ b/src/aiy/_drivers/_player.py @@ -24,7 +24,6 @@ class Player(object): - """Plays short audio clips from a buffer or file.""" def __init__(self, output_device='default'): @@ -38,13 +37,13 @@ def play_bytes(self, audio_bytes, sample_rate, sample_width=2): sample_rate: sample rate in Hertz (24 kHz by default) sample_width: sample width in bytes (eg 2 for 16-bit audio) """ - cmd = [ 'aplay', '-q', '-t', 'raw', '-D', self._output_device, '-c', '1', + # pylint: disable=W0212 '-f', aiy._drivers._alsa.sample_width_to_string(sample_width), '-r', str(sample_rate), ] @@ -64,7 +63,6 @@ def play_wav(self, wav_path): Args: wav_path: path to the wav file """ - with wave.open(wav_path, 'r') as wav: if wav.getnchannels() != 1: raise ValueError(wav_path + ' is not a mono file') diff --git a/src/aiy/_drivers/_recorder.py b/src/aiy/_drivers/_recorder.py index d5d3e3b3..0ae59b71 100644 --- a/src/aiy/_drivers/_recorder.py +++ b/src/aiy/_drivers/_recorder.py @@ -18,7 +18,6 @@ import os import subprocess import threading -import wave import aiy._drivers._alsa @@ -26,7 +25,6 @@ class Recorder(threading.Thread): - """A driver to record audio from the VoiceHat microphones. Stream audio from microphone in a background thread and run processing @@ -54,7 +52,7 @@ def __init__(self, input_device='default', - sample_rate_hz: sample rate in hertz """ - super().__init__() + super().__init__(daemon=True) self._processors = [] @@ -66,6 +64,7 @@ def __init__(self, input_device='default', '-t', 'raw', '-D', input_device, '-c', str(channels), + # pylint: disable=W0212 '-f', aiy._drivers._alsa.sample_width_to_string(bytes_per_sample), '-r', str(sample_rate_hz), ] @@ -73,7 +72,7 @@ def __init__(self, input_device='default', self._closed = False def add_processor(self, processor): - """Adds an audio processor. + """Add an audio processor. An audio processor is an object that has an 'add_data' method with the following signature: @@ -89,8 +88,7 @@ def add_data(self, data): self._processors.append(processor) def remove_processor(self, processor): - """Removes an added audio processor.""" - + """Remove an added audio processor.""" try: self._processors.remove(processor) except ValueError: @@ -127,6 +125,12 @@ def run(self): logging.shutdown() os._exit(1) # pylint: disable=protected-access + def stop(self): + """Stops the recorder and cleans up all resources.""" + self._closed = True + if self._arecord: + self._arecord.kill() + def _handle_chunk(self, chunk): """Send audio chunk to all processors.""" for p in self._processors: @@ -137,6 +141,4 @@ def __enter__(self): return self def __exit__(self, *args): - self._closed = True - if self._arecord: - self._arecord.kill() + self.stop() diff --git a/src/aiy/_drivers/_status_ui.py b/src/aiy/_drivers/_status_ui.py index 39f97a73..c6be6256 100644 --- a/src/aiy/_drivers/_status_ui.py +++ b/src/aiy/_drivers/_status_ui.py @@ -18,55 +18,61 @@ import os.path import aiy.audio - -# Location of the LED status-ui service's FIFO file. -_LED_FIFO = "/tmp/status-led" +import aiy.voicehat logger = logging.getLogger('status_ui') class _StatusUi(object): - """Gives the user status feedback. + The LED and optionally a trigger sound tell the user when the box is ready, listening or thinking. """ - def __init__(self, led_fifo=_LED_FIFO): - self.trigger_sound_wave = None - if led_fifo and os.path.exists(led_fifo): - self.led_fifo = led_fifo - else: - if led_fifo: - logger.warning( - 'File %s specified for --led-fifo does not exist.', - led_fifo) - self.led_fifo = None + def __init__(self): + self._trigger_sound_wave = None + self._state_map = { + "starting": aiy.voicehat.LED.PULSE_QUICK, + "ready": aiy.voicehat.LED.BEACON_DARK, + "listening": aiy.voicehat.LED.ON, + "thinking": aiy.voicehat.LED.PULSE_QUICK, + "stopping": aiy.voicehat.LED.PULSE_QUICK, + "power-off": aiy.voicehat.LED.OFF, + "error": aiy.voicehat.LED.BLINK_3, + } + aiy.voicehat.get_led().set_state(aiy.voicehat.LED.OFF) def set_trigger_sound_wave(self, trigger_sound_wave): - """Sets the trigger sound. + """Set the trigger sound. + A trigger sound is played when the status is 'listening' to indicate that the assistant is actively listening to the user. The trigger_sound_wave argument should be the path to a valid wave file. If it is None, the trigger sound is disabled. """ - if trigger_sound_wave and os.path.exists(os.path.expanduser(trigger_sound_wave)): - self.trigger_sound_wave = os.path.expanduser(trigger_sound_wave) + if not trigger_sound_wave: + self._trigger_sound_wave = None + expanded_path = os.path.expanduser(trigger_sound_wave) + if os.path.exists(expanded_path): + self._trigger_sound_wave = expanded_path else: - if trigger_sound_wave: - logger.warning( - 'File %s specified for --trigger-sound does not exist.', - trigger_sound_wave) - self.trigger_sound_wave = None + logger.warning( + 'File %s specified as trigger sound does not exist.', + trigger_sound_wave) + self._trigger_sound_wave = None def status(self, status): """Activate the status. - For a list of supported statuses, view src/led.py. - """ - if self.led_fifo: - with open(self.led_fifo, 'w') as led: - led.write(status + '\n') - logger.info('%s...', status) - if status == 'listening' and self.trigger_sound_wave: - aiy.audio.play_wave(self.trigger_sound_wave) + This method updates the LED animation. Returns True if the status is + valid and has been updated. + """ + if status not in self._state_map: + logger.warning("unsupported state: %s, must be one of %s", + status, ",".join(self._state_map.keys())) + return False + aiy.voicehat.get_led().set_state(self._state_map[status]) + if status == 'listening' and self._trigger_sound_wave: + aiy.audio.play_wave(self._trigger_sound_wave) + return True diff --git a/src/aiy/_drivers/_tts.py b/src/aiy/_drivers/_tts.py index 7422fe3f..a941b479 100644 --- a/src/aiy/_drivers/_tts.py +++ b/src/aiy/_drivers/_tts.py @@ -19,8 +19,7 @@ import os import subprocess import tempfile - -import aiy.i18n +from aiy import i18n # Path to a tmpfs directory to avoid SD card wear TMP_DIR = '/run/user/%d' % os.getuid() @@ -29,9 +28,8 @@ def create_say(player): - """Return a function say(words) for the given player. - """ - lang = aiy.i18n.get_language_code() + """Return a function say(words) for the given player.""" + lang = i18n.get_language_code() return functools.partial(say, player, lang=lang) @@ -43,17 +41,13 @@ def say(player, words, lang='en-US'): words: string to say aloud. lang: language for the text-to-speech engine. """ - try: (fd, tts_wav) = tempfile.mkstemp(suffix='.wav', dir=TMP_DIR) except IOError: logger.exception('Using fallback directory for TTS output') (fd, tts_wav) = tempfile.mkstemp(suffix='.wav') - os.close(fd) - words = '%s' % words - try: subprocess.call(['pico2wave', '--lang', lang, '-w', tts_wav, words]) player.play_wav(tts_wav) @@ -63,8 +57,7 @@ def say(player, words, lang='en-US'): def _main(): import argparse - - import aiy.audio + from aiy import audio logging.basicConfig(level=logging.INFO) @@ -74,7 +67,7 @@ def _main(): if args.words: words = ' '.join(args.words) - player = aiy.audio.get_player() + player = audio.get_player() create_say(player)(words) diff --git a/src/aiy/assistant/grpc.py b/src/aiy/assistant/grpc.py index 8d86e4db..6ac11f70 100644 --- a/src/aiy/assistant/grpc.py +++ b/src/aiy/assistant/grpc.py @@ -14,8 +14,6 @@ """An API to access the Google Assistant.""" -import os.path - import aiy._apis._speech import aiy.assistant.auth_helpers import aiy.audio diff --git a/src/aiy/i18n.py b/src/aiy/i18n.py index 79ca9939..4afae431 100644 --- a/src/aiy/i18n.py +++ b/src/aiy/i18n.py @@ -15,7 +15,6 @@ """Internationalization helpers.""" import gettext -import os _DEFAULT_LANGUAGE_CODE = 'en-US' _LOCALE_DOMAIN = 'voice-recognizer' diff --git a/src/aiy/voicehat.py b/src/aiy/voicehat.py index db1fd08a..3ee8d4d0 100644 --- a/src/aiy/voicehat.py +++ b/src/aiy/voicehat.py @@ -19,8 +19,8 @@ import aiy._drivers._status_ui # GPIO definitions (BCM) -GPIO_BUTTON = 23 -GPIO_LED = 25 +_GPIO_BUTTON = 23 +_GPIO_LED = 25 # Import LED class to expose the LED constants. LED = aiy._drivers._led.LED @@ -34,7 +34,7 @@ def get_button(): """Returns a driver to the VoiceHat button. - The button driver detects edges on GPIO_BUTTON. It can be used both + The button driver detects edges on _GPIO_BUTTON. It can be used both synchronously and asynchrously. Synchronous usage: @@ -59,7 +59,7 @@ def on_button_press(_): """ global _voicehat_button if _voicehat_button is None: - _voicehat_button = aiy._drivers._button.Button(channel=GPIO_BUTTON) + _voicehat_button = aiy._drivers._button.Button(channel=_GPIO_BUTTON) return _voicehat_button @@ -77,16 +77,31 @@ def get_led(): """ global _voicehat_led if _voicehat_led is None: - _voicehat_led = aiy._drivers._led.LED(channel=GPIO_LED) + _voicehat_led = aiy._drivers._led.LED(channel=_GPIO_LED) _voicehat_led.start() return _voicehat_led def get_status_ui(): - """Returns a driver to access the StatusUI daemon. - - The StatusUI daemon controls the LEDs in the background. It supports a list - of statuses it is able to communicate with the LED on the Voicehat. + """Returns a driver to control the LED via statuses. + + The supported statuses are: + - "starting" + - "ready" + - "listening" + - "thinking" + - "stopping" + - "power-off" + - "error" + + Optionally, a sound may be played once when the status changes to + "listening". For example, if you have a wave file at ~/ding.wav, you may set + the trigger sound by: + aiy.voicehat.get_status_ui().set_trigger_sound_wave('~/ding.wav') + + To set the status, use: + aiy.voicehat.get_status_ui().set_state('starting') + aiy.voicehat.get_status_ui().set_state('thinking') """ global _status_ui if _status_ui is None: diff --git a/src/assistant_grpc_demo.py b/src/assistant_grpc_demo.py index fcfa6776..3add021a 100755 --- a/src/assistant_grpc_demo.py +++ b/src/assistant_grpc_demo.py @@ -16,7 +16,6 @@ """A demo of the Google Assistant GRPC recognizer.""" import logging -import os import aiy.assistant.grpc import aiy.audio @@ -33,22 +32,23 @@ def main(): status_ui.status('starting') assistant = aiy.assistant.grpc.get_assistant() button = aiy.voicehat.get_button() - aiy.audio.get_recorder().start() - while True: - status_ui.status('ready') - print('Press the button and speak') - button.wait_for_press() - status_ui.status('listening') - print('Listening...') - text, audio = assistant.recognize() - if text is not None: - if text == 'goodbye': - status_ui.status('stopping') - print('Bye!') - os._exit(0) - print('You said "', text, '"') - if audio is not None: - aiy.audio.play_audio(audio) + with aiy.audio.get_recorder(): + while True: + status_ui.status('ready') + print('Press the button and speak') + button.wait_for_press() + status_ui.status('listening') + print('Listening...') + break + text, audio = assistant.recognize() + if text is not None: + if text == 'goodbye': + status_ui.status('stopping') + print('Bye!') + break + print('You said "', text, '"') + if audio is not None: + aiy.audio.play_audio(audio) if __name__ == '__main__': diff --git a/src/auth_helpers.py b/src/auth_helpers.py deleted file mode 100644 index 091ed0fd..00000000 --- a/src/auth_helpers.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Auth helpers for Google Assistant API.""" - -import json -import os.path - -import google_auth_oauthlib.flow -import google.auth.transport -import google.oauth2.credentials - - -ASSISTANT_OAUTH_SCOPE = ( - 'https://www.googleapis.com/auth/assistant-sdk-prototype' -) - - -def load_credentials(credentials_path): - migrate = False - with open(credentials_path, 'r') as f: - credentials_data = json.load(f) - if 'access_token' in credentials_data: - migrate = True - del credentials_data['access_token'] - credentials_data['scopes'] = [ASSISTANT_OAUTH_SCOPE] - if migrate: - with open(credentials_path, 'w') as f: - json.dump(credentials_data, f) - credentials = google.oauth2.credentials.Credentials(token=None, - **credentials_data) - http_request = google.auth.transport.requests.Request() - credentials.refresh(http_request) - return credentials - - -def credentials_flow_interactive(client_secrets_path): - flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file( - client_secrets_path, - scopes=[ASSISTANT_OAUTH_SCOPE]) - if 'DISPLAY' in os.environ: - credentials = flow.run_local_server() - else: - credentials = flow.run_console() - return credentials - - -def save_credentials(credentials_path, credentials): - config_path = os.path.dirname(credentials_path) - if not os.path.isdir(config_path): - os.makedirs(config_path) - with open(credentials_path, 'w') as f: - json.dump({ - 'refresh_token': credentials.refresh_token, - 'token_uri': credentials.token_uri, - 'client_id': credentials.client_id, - 'client_secret': credentials.client_secret, - 'scopes': credentials.scopes - }, f) diff --git a/src/cloudspeech_demo.py b/src/cloudspeech_demo.py index 81f372eb..e3de3c10 100755 --- a/src/cloudspeech_demo.py +++ b/src/cloudspeech_demo.py @@ -15,8 +15,6 @@ """A demo of the Google CloudSpeech recognizer.""" -import os - import aiy.audio import aiy.cloudspeech import aiy.voicehat @@ -48,7 +46,7 @@ def main(): elif 'blink' in text: led.set_state(aiy.voicehat.LED.BLINK) elif 'goodbye' in text: - os._exit(0) + break if __name__ == '__main__': diff --git a/src/led.py b/src/led.py deleted file mode 100644 index 26ebe78b..00000000 --- a/src/led.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Signal states on a LED""" - -import logging -import os -import time - -import aiy.voicehat -import RPi.GPIO as GPIO - -logger = logging.getLogger('led') - -CONFIG_DIR = os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config') -CONFIG_FILES = [ - '/etc/status-led.ini', - os.path.join(CONFIG_DIR, 'status-led.ini') -] - - -def main(): - logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" - ) - - import configargparse - parser = configargparse.ArgParser( - default_config_files=CONFIG_FILES, - description="Status LED daemon" - ) - parser.add_argument('-G', '--gpio-pin', default=25, type=int, - help='GPIO pin for the LED (default: 25)') - args = parser.parse_args() - - led = None - state_map = { - "starting": aiy.voicehat.LED.PULSE_QUICK, - "ready": aiy.voicehat.LED.BEACON_DARK, - "listening": aiy.voicehat.LED.ON, - "thinking": aiy.voicehat.LED.PULSE_QUICK, - "stopping": aiy.voicehat.LED.PULSE_QUICK, - "power-off": aiy.voicehat.LED.OFF, - "error": aiy.voicehat.LED.BLINK_3, - } - try: - GPIO.setmode(GPIO.BCM) - - led = aiy.voicehat.get_led() - while True: - try: - state = input() - if not state: - continue - if state not in state_map: - logger.warning("unsupported state: %s, must be one of: %s", - state, ",".join(state_map.keys())) - continue - - led.set_state(state_map[state]) - except EOFError: - time.sleep(1) - except KeyboardInterrupt: - pass - finally: - led.stop() - GPIO.cleanup() - - -if __name__ == '__main__': - main() diff --git a/src/main.py b/src/main.py deleted file mode 100755 index 8565cb7f..00000000 --- a/src/main.py +++ /dev/null @@ -1,407 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Main recognizer loop: wait for a trigger then perform and handle -recognition.""" - -import logging -import os -import os.path -import sys -import threading -import time - -import configargparse - -import aiy.audio -import aiy.i18n -import auth_helpers -import action -import speech - -# ============================================================================= -# -# Hey, Makers! -# -# Are you looking for actor.add_keyword? Do you want to add a new command? -# You need to edit src/action.py. Check out the instructions at: -# https://aiyprojects.withgoogle.com/voice/#makers-guide-3-3--create-a-new-voice-command-or-action -# -# ============================================================================= - -logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" -) -logger = logging.getLogger('main') - -CACHE_DIR = os.getenv('XDG_CACHE_HOME') or os.path.expanduser('~/.cache') -VR_CACHE_DIR = os.path.join(CACHE_DIR, 'voice-recognizer') - -CONFIG_DIR = os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config') -CONFIG_FILES = [ - '/etc/voice-recognizer.ini', - os.path.join(CONFIG_DIR, 'voice-recognizer.ini') -] - -# Legacy fallback: old locations of secrets/credentials. -OLD_CLIENT_SECRETS = os.path.expanduser('~/client_secrets.json') -OLD_SERVICE_CREDENTIALS = os.path.expanduser('~/credentials.json') - -ASSISTANT_CREDENTIALS = ( - os.path.join(VR_CACHE_DIR, 'assistant_credentials.json') -) - -# Where the locale/language bundles are stored -LOCALE_DIR = os.path.realpath( - os.path.join(os.path.abspath(os.path.dirname(__file__)), '../po')) - - -def try_to_get_credentials(client_secrets): - """Try to get credentials, or print an error and quit on failure.""" - - if os.path.exists(ASSISTANT_CREDENTIALS): - return auth_helpers.load_credentials(ASSISTANT_CREDENTIALS) - - if not os.path.exists(VR_CACHE_DIR): - os.mkdir(VR_CACHE_DIR) - - if not os.path.exists(client_secrets) and os.path.exists(OLD_CLIENT_SECRETS): - client_secrets = OLD_CLIENT_SECRETS - - if not os.path.exists(client_secrets): - print('You need client secrets to use the Assistant API.') - print('Follow these instructions:') - print(' https://developers.google.com/api-client-library/python/auth/installed-app' - '#creatingcred') - print('and put the file at', client_secrets) - sys.exit(1) - - if not os.getenv('DISPLAY') and not sys.stdout.isatty(): - print(""" -To use the Assistant API, manually start the application from the dev terminal. -See the "Turn on the Assistant API" section of the Voice Recognizer -User's Guide for more info.""") - sys.exit(1) - - credentials = auth_helpers.credentials_flow_interactive(client_secrets) - auth_helpers.save_credentials(ASSISTANT_CREDENTIALS, credentials) - logging.info('OAuth credentials initialized: %s', ASSISTANT_CREDENTIALS) - return credentials - - -def create_pid_file(file_name): - if not file_name: - # Try the default locations of the pid file, preferring /run/user as - # it uses tmpfs. - pid_dir = '/run/user/%d' % os.getuid() - if not os.path.isdir(pid_dir): - pid_dir = '/tmp' - file_name = os.path.join(pid_dir, 'voice-recognizer.pid') - - with open(file_name, 'w') as pid_file: - pid_file.write("%d" % os.getpid()) - - -def main(): - parser = configargparse.ArgParser( - default_config_files=CONFIG_FILES, - description="Act on voice commands using Google's speech recognition") - parser.add_argument('-T', '--trigger', default='gpio', - choices=['clap', 'gpio', 'ok-google'], help='Trigger to use') - parser.add_argument('--cloud-speech', action='store_true', - help='Use the Cloud Speech API instead of the Assistant API') - parser.add_argument('-L', '--language', default='en-US', - help='Language code to use for speech (default: en-US)') - parser.add_argument('-l', '--led-fifo', default='/tmp/status-led', - help='Status led control fifo') - parser.add_argument('-p', '--pid-file', - help='File containing our process id for monitoring') - parser.add_argument('--audio-logging', action='store_true', - help='Log all requests and responses to WAV files in /tmp') - parser.add_argument('--assistant-always-responds', action='store_true', - help='Play Assistant responses for local actions.' - ' You should make sure that you have IFTTT applets for' - ' your actions to get the correct response, and also' - ' that your actions do not call say().') - parser.add_argument('--assistant-secrets', - default=os.path.expanduser('~/assistant.json'), - help='Path to client secrets for the Assistant API') - parser.add_argument('--cloud-speech-secrets', - default=os.path.expanduser('~/cloud_speech.json'), - help='Path to service account credentials for the ' - 'Cloud Speech API') - parser.add_argument('--trigger-sound', default=None, - help='Sound when trigger is activated (WAV format)') - - args = parser.parse_args() - - create_pid_file(args.pid_file) - aiy.i18n.set_locale_dir(LOCALE_DIR) - aiy.i18n.set_language_code(args.language, gettext_install=True) - - player = aiy.audio.get_player() - - if args.cloud_speech: - credentials_file = os.path.expanduser(args.cloud_speech_secrets) - if not os.path.exists(credentials_file) and os.path.exists(OLD_SERVICE_CREDENTIALS): - credentials_file = OLD_SERVICE_CREDENTIALS - recognizer = speech.CloudSpeechRequest(credentials_file) - else: - credentials = try_to_get_credentials( - os.path.expanduser(args.assistant_secrets)) - recognizer = speech.AssistantSpeechRequest(credentials) - - status_ui = StatusUi(player, args.led_fifo, args.trigger_sound) - - # The ok-google trigger is handled with the Assistant Library, so we need - # to catch this case early. - if args.trigger == 'ok-google': - if args.cloud_speech: - print('trigger=ok-google only works with the Assistant, not with ' - 'the Cloud Speech API.') - sys.exit(1) - do_assistant_library(args, credentials, player, status_ui) - else: - recorder = aiy.audio.get_recorder() - with recorder: - do_recognition(args, recorder, recognizer, player, status_ui) - - -def do_assistant_library(args, credentials, player, status_ui): - """Run a recognizer using the Google Assistant Library. - - The Google Assistant Library has direct access to the audio API, so this - Python code doesn't need to record audio. - """ - - try: - from google.assistant.library import Assistant - from google.assistant.library.event import EventType - except ImportError: - print(''' -ERROR: failed to import the Google Assistant Library. This is required for -"OK Google" hotwording, but is only available for Raspberry Pi 2/3. It can be -installed with: - env/bin/pip install google-assistant-library==0.0.2''') - sys.exit(1) - - say = aiy.audio.say - actor = action.make_actor(say) - - def process_event(event): - logging.info(event) - - if event.type == EventType.ON_START_FINISHED: - status_ui.status('ready') - if sys.stdout.isatty(): - print('Say "OK, Google" then speak, or press Ctrl+C to quit...') - - elif event.type == EventType.ON_CONVERSATION_TURN_STARTED: - status_ui.status('listening') - - elif event.type == EventType.ON_END_OF_UTTERANCE: - status_ui.status('thinking') - - elif event.type == EventType.ON_RECOGNIZING_SPEECH_FINISHED and \ - event.args and actor.can_handle(event.args['text']): - if not args.assistant_always_responds: - assistant.stop_conversation() - actor.handle(event.args['text']) - - elif event.type == EventType.ON_CONVERSATION_TURN_FINISHED: - status_ui.status('ready') - - elif event.type == EventType.ON_ASSISTANT_ERROR and \ - event.args and event.args['is_fatal']: - sys.exit(1) - - with Assistant(credentials) as assistant: - for event in assistant.start(): - process_event(event) - - -def do_recognition(args, recorder, recognizer, player, status_ui): - """Configure and run the recognizer.""" - say = aiy.audio.say - actor = action.make_actor(say) - - if args.cloud_speech: - action.add_commands_just_for_cloud_speech_api(actor, say) - - recognizer.add_phrases(actor) - recognizer.set_audio_logging_enabled(args.audio_logging) - - if args.trigger == 'gpio': - import triggers.gpio - triggerer = triggers.gpio.GpioTrigger(channel=23) - msg = 'Press the button on GPIO 23' - elif args.trigger == 'clap': - import triggers.clap - triggerer = triggers.clap.ClapTrigger(recorder) - msg = 'Clap your hands' - else: - logger.error("Unknown trigger '%s'", args.trigger) - return - - mic_recognizer = SyncMicRecognizer( - actor, recognizer, recorder, player, say, triggerer, status_ui, - args.assistant_always_responds) - - with mic_recognizer: - if sys.stdout.isatty(): - print(msg + ' then speak, or press Ctrl+C to quit...') - - # wait for KeyboardInterrupt - while True: - time.sleep(1) - - -class StatusUi(object): - - """Gives the user status feedback. - - The LED and optionally a trigger sound tell the user when the box is - ready, listening or thinking. - """ - - def __init__(self, player, led_fifo, trigger_sound): - self.player = player - - if led_fifo and os.path.exists(led_fifo): - self.led_fifo = led_fifo - else: - if led_fifo: - logger.warning( - 'File %s specified for --led-fifo does not exist.', - led_fifo) - self.led_fifo = None - - if trigger_sound and os.path.exists(os.path.expanduser(trigger_sound)): - self.trigger_sound = os.path.expanduser(trigger_sound) - else: - if trigger_sound: - logger.warning( - 'File %s specified for --trigger-sound does not exist.', - trigger_sound) - self.trigger_sound = None - - def status(self, status): - if self.led_fifo: - with open(self.led_fifo, 'w') as led: - led.write(status + '\n') - logger.info('%s...', status) - - if status == 'listening' and self.trigger_sound: - self.player.play_wav(self.trigger_sound) - - -class SyncMicRecognizer(object): - - """Detects triggers and runs recognition in a background thread. - - This is a context manager, so it will clean up the background thread if the - main program is interrupted. - """ - - # pylint: disable=too-many-instance-attributes - - def __init__(self, actor, recognizer, recorder, player, say, triggerer, - status_ui, assistant_always_responds): - self.actor = actor - self.player = player - self.recognizer = recognizer - self.recognizer.set_endpointer_cb(self.endpointer_cb) - self.recorder = recorder - self.say = say - self.triggerer = triggerer - self.triggerer.set_callback(self.recognize) - self.status_ui = status_ui - self.assistant_always_responds = assistant_always_responds - - self.running = False - - self.recognizer_event = threading.Event() - - def __enter__(self): - self.running = True - threading.Thread(target=self._recognize).start() - self.triggerer.start() - self.status_ui.status('ready') - - def __exit__(self, *args): - self.running = False - self.recognizer_event.set() - - self.recognizer.end_audio() - - def recognize(self): - if self.recognizer_event.is_set(): - # Duplicate trigger (eg multiple button presses) - return - - self.status_ui.status('listening') - self.recognizer.reset() - self.recorder.add_processor(self.recognizer) - # Tell recognizer to run - self.recognizer_event.set() - - def endpointer_cb(self): - self.recorder.remove_processor(self.recognizer) - self.status_ui.status('thinking') - - def _recognize(self): - while self.running: - self.recognizer_event.wait() - if not self.running: - break - - logger.info('recognizing...') - try: - self._handle_result(self.recognizer.do_request()) - except speech.Error: - logger.exception('Unexpected error') - self.say(_('Unexpected error. Try again or check the logs.')) - - self.recognizer_event.clear() - if self.recognizer.dialog_follow_on: - self.recognize() - else: - self.triggerer.start() - self.status_ui.status('ready') - - def _handle_result(self, result): - if result.transcript and self.actor.handle(result.transcript): - logger.info('handled local command: %s', result.transcript) - if result.response_audio and self.assistant_always_responds: - self._play_assistant_response(result.response_audio) - elif result.response_audio: - self._play_assistant_response(result.response_audio) - elif result.transcript: - logger.warning('%r was not handled', result.transcript) - else: - logger.warning('no command recognized') - - def _play_assistant_response(self, audio_bytes): - bytes_per_sample = speech.AUDIO_SAMPLE_SIZE - sample_rate_hz = speech.AUDIO_SAMPLE_RATE_HZ - logger.info('Playing %.4f seconds of audio...', - len(audio_bytes) / (bytes_per_sample * sample_rate_hz)) - self.player.play_bytes(audio_bytes, sample_width=bytes_per_sample, - sample_rate=sample_rate_hz) - - -if __name__ == '__main__': - main() diff --git a/src/speech.py b/src/speech.py deleted file mode 100644 index feca3a71..00000000 --- a/src/speech.py +++ /dev/null @@ -1,461 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Classes for speech interaction.""" - -from abc import abstractmethod -import collections -import logging -import os -import tempfile -import wave - -import google.auth -import google.auth.exceptions -import google.auth.transport.grpc -import google.auth.transport.requests -from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as cloud_speech -from google.rpc import code_pb2 as error_code -from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2 -import grpc -from six.moves import queue - -import aiy.i18n - -logger = logging.getLogger('speech') - -AUDIO_SAMPLE_SIZE = 2 # bytes per sample -AUDIO_SAMPLE_RATE_HZ = 16000 - - -_Result = collections.namedtuple('_Result', ['transcript', 'response_audio']) - - -class Error(Exception): - pass - - -class _ChannelFactory(object): - - """Creates gRPC channels with a given configuration.""" - - def __init__(self, api_host, credentials): - self._api_host = api_host - self._credentials = credentials - - self._checked = False - - def make_channel(self): - """Creates a secure channel.""" - - request = google.auth.transport.requests.Request() - target = self._api_host + ':443' - - if not self._checked: - # Refresh now, to catch any errors early. Otherwise, they'll be - # raised and swallowed somewhere inside gRPC. - self._credentials.refresh(request) - self._checked = True - - return google.auth.transport.grpc.secure_authorized_channel( - self._credentials, request, target) - - -class GenericSpeechRequest(object): - - """Common base class for Cloud Speech and Assistant APIs.""" - - # TODO(rodrigoq): Refactor audio logging. - # pylint: disable=attribute-defined-outside-init,too-many-instance-attributes - - DEADLINE_SECS = 185 - - def __init__(self, api_host, credentials): - self.dialog_follow_on = False - self._audio_queue = queue.Queue() - self._phrases = [] - self._channel_factory = _ChannelFactory(api_host, credentials) - self._endpointer_cb = None - self._audio_logging_enabled = False - self._request_log_wav = None - - def add_phrases(self, phrases): - """Makes the recognition more likely to recognize the given phrase(s). - phrases: an object with a method get_phrases() that returns a list of - phrases. - """ - - self._phrases.extend(phrases.get_phrases()) - - def set_endpointer_cb(self, cb): - """Callback to invoke on end of speech.""" - self._endpointer_cb = cb - - def set_audio_logging_enabled(self, audio_logging_enabled=True): - self._audio_logging_enabled = audio_logging_enabled - - if audio_logging_enabled: - self._audio_log_dir = tempfile.mkdtemp() - self._audio_log_ix = 0 - - def reset(self): - while True: - try: - self._audio_queue.get(False) - except queue.Empty: - return - - self.dialog_follow_on = False - - def add_data(self, data): - self._audio_queue.put(data) - - def end_audio(self): - self.add_data(None) - - def _get_speech_context(self): - """Return a SpeechContext instance to bias recognition towards certain - phrases. - """ - return cloud_speech.SpeechContext( - phrases=self._phrases, - ) - - @abstractmethod - def _make_service(self, channel): - """Create a service stub. - """ - return - - @abstractmethod - def _create_config_request(self): - """Create a config request for the given endpoint. - - This is sent first to the server to configure the speech recognition. - """ - return - - @abstractmethod - def _create_audio_request(self, data): - """Create an audio request for the given endpoint. - - This is sent to the server with audio to be recognized. - """ - return - - def _request_stream(self): - """Yields a config request followed by requests constructed from the - audio queue. - """ - yield self._create_config_request() - - while True: - data = self._audio_queue.get() - - if not data: - return - - if self._request_log_wav: - self._request_log_wav.writeframes(data) - - yield self._create_audio_request(data) - - @abstractmethod - def _create_response_stream(self, service, request_stream, deadline): - """Given a request stream, start the gRPC call to get the response - stream. - """ - return - - @abstractmethod - def _stop_sending_audio(self, resp): - """Return true if this response says user has stopped speaking. - - This stops the request from sending further audio. - """ - return - - @abstractmethod - def _handle_response(self, resp): - """Handle a response from the remote API. - - Args: - resp: StreamingRecognizeResponse instance - """ - return - - def _end_audio_request(self): - self.end_audio() - if self._endpointer_cb: - self._endpointer_cb() - - def _handle_response_stream(self, response_stream): - for resp in response_stream: - if resp.error.code != error_code.OK: - self._end_audio_request() - raise Error('Server error: ' + resp.error.message) - - if self._stop_sending_audio(resp): - self._end_audio_request() - - self._handle_response(resp) - - # Server has closed the connection - return self._finish_request() or '' - - def _start_logging_request(self): - """Open a WAV file to log the request audio.""" - self._audio_log_ix += 1 - request_filename = '%s/request.%03d.wav' % ( - self._audio_log_dir, self._audio_log_ix) - logger.info('Writing request to %s', request_filename) - - self._request_log_wav = wave.open(request_filename, 'w') - - self._request_log_wav.setnchannels(1) - self._request_log_wav.setsampwidth(AUDIO_SAMPLE_SIZE) - self._request_log_wav.setframerate(AUDIO_SAMPLE_RATE_HZ) - - def _finish_request(self): - """Called after the final response is received.""" - - if self._request_log_wav: - self._request_log_wav.close() - - return _Result(None, None) - - def do_request(self): - """Establishes a connection and starts sending audio to the cloud - endpoint. Responses are handled by the subclass until one returns a - result. - - Returns: - namedtuple with the following fields: - transcript: string with transcript of user query - response_audio: optionally, an audio response from the server - - Raises speech.Error on error. - """ - try: - service = self._make_service(self._channel_factory.make_channel()) - - response_stream = self._create_response_stream( - service, self._request_stream(), self.DEADLINE_SECS) - - if self._audio_logging_enabled: - self._start_logging_request() - - return self._handle_response_stream(response_stream) - except ( - google.auth.exceptions.GoogleAuthError, - grpc.RpcError, - ) as exc: - raise Error('Exception in speech request') from exc - - -class CloudSpeechRequest(GenericSpeechRequest): - - """A transcription request to the Cloud Speech API. - - Args: - credentials_file: path to service account credentials JSON file - """ - - SCOPE = 'https://www.googleapis.com/auth/cloud-platform' - - def __init__(self, credentials_file): - os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file - credentials, _ = google.auth.default(scopes=[self.SCOPE]) - - super().__init__('speech.googleapis.com', credentials) - - self.language_code = aiy.i18n.get_language_code() - - if not hasattr(cloud_speech, 'StreamingRecognizeRequest'): - raise ValueError("cloud_speech_pb2.py doesn't have StreamingRecognizeRequest.") - - self._transcript = None - - def reset(self): - super().reset() - self._transcript = None - - def _make_service(self, channel): - return cloud_speech.SpeechStub(channel) - - def _create_config_request(self): - recognition_config = cloud_speech.RecognitionConfig( - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - encoding='LINEAR16', # raw 16-bit signed LE samples - sample_rate=AUDIO_SAMPLE_RATE_HZ, - # For a list of supported languages see: - # https://cloud.google.com/speech/docs/languages. - language_code=self.language_code, # a BCP-47 language tag - speech_context=self._get_speech_context(), - ) - streaming_config = cloud_speech.StreamingRecognitionConfig( - config=recognition_config, - single_utterance=True, # TODO(rodrigoq): find a way to handle pauses - ) - - return cloud_speech.StreamingRecognizeRequest( - streaming_config=streaming_config) - - def _create_audio_request(self, data): - return cloud_speech.StreamingRecognizeRequest(audio_content=data) - - def _create_response_stream(self, service, request_stream, deadline): - return service.StreamingRecognize(request_stream, deadline) - - def _stop_sending_audio(self, resp): - """Check the endpointer type to see if an utterance has ended.""" - - if resp.endpointer_type: - endpointer_type = cloud_speech.StreamingRecognizeResponse.EndpointerType.Name( - resp.endpointer_type) - logger.info('endpointer_type: %s', endpointer_type) - - END_OF_AUDIO = cloud_speech.StreamingRecognizeResponse.EndpointerType.Value('END_OF_AUDIO') - return resp.endpointer_type == END_OF_AUDIO - - def _handle_response(self, resp): - """Store the last transcript we received.""" - if resp.results: - self._transcript = ' '.join( - result.alternatives[0].transcript for result in resp.results) - logger.info('transcript: %s', self._transcript) - - def _finish_request(self): - super()._finish_request() - return _Result(self._transcript, None) - - -class AssistantSpeechRequest(GenericSpeechRequest): - - """A request to the Assistant API, which returns audio and text.""" - - def __init__(self, credentials): - - super().__init__('embeddedassistant.googleapis.com', credentials) - - self._conversation_state = None - self._response_audio = b'' - self._transcript = None - - def reset(self): - super().reset() - self._response_audio = b'' - self._transcript = None - - def _make_service(self, channel): - return embedded_assistant_pb2.EmbeddedAssistantStub(channel) - - def _create_config_request(self): - audio_in_config = embedded_assistant_pb2.AudioInConfig( - encoding='LINEAR16', - sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, - ) - audio_out_config = embedded_assistant_pb2.AudioOutConfig( - encoding='LINEAR16', - sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, - volume_percentage=50, - ) - converse_state = embedded_assistant_pb2.ConverseState( - conversation_state=self._conversation_state, - ) - converse_config = embedded_assistant_pb2.ConverseConfig( - audio_in_config=audio_in_config, - audio_out_config=audio_out_config, - converse_state=converse_state, - ) - - return embedded_assistant_pb2.ConverseRequest(config=converse_config) - - def _create_audio_request(self, data): - return embedded_assistant_pb2.ConverseRequest(audio_in=data) - - def _create_response_stream(self, service, request_stream, deadline): - return service.Converse(request_stream, deadline) - - def _stop_sending_audio(self, resp): - if resp.event_type: - logger.info('event_type: %s', resp.event_type) - - return (resp.event_type == - embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE) - - def _handle_response(self, resp): - """Accumulate audio and text from the remote end. It will be handled - in _finish_request(). - """ - - if resp.result.spoken_request_text: - logger.info('transcript: %s', resp.result.spoken_request_text) - self._transcript = resp.result.spoken_request_text - - self._response_audio += resp.audio_out.audio_data - - if resp.result.conversation_state: - self._conversation_state = resp.result.conversation_state - - if resp.result.microphone_mode: - self.dialog_follow_on = ( - resp.result.microphone_mode == - embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON) - - def _finish_request(self): - super()._finish_request() - - if self._response_audio and self._audio_logging_enabled: - self._log_audio_out(self._response_audio) - - return _Result(self._transcript, self._response_audio) - - def _log_audio_out(self, frames): - response_filename = '%s/response.%03d.wav' % ( - self._audio_log_dir, self._audio_log_ix) - logger.info('Writing response to %s', response_filename) - - response_wav = wave.open(response_filename, 'w') - response_wav.setnchannels(1) - response_wav.setsampwidth(AUDIO_SAMPLE_SIZE) - response_wav.setframerate(AUDIO_SAMPLE_RATE_HZ) - response_wav.writeframes(frames) - response_wav.close() - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - - # for testing: use audio from a file - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('file', nargs='?', default='test_speech.raw') - args = parser.parse_args() - - if os.path.exists('/home/pi/credentials.json'): - # Legacy fallback: old location of credentials. - req = CloudSpeechRequest('/home/pi/credentials.json') - else: - req = CloudSpeechRequest('/home/pi/cloud_speech.json') - - with open(args.file, 'rb') as f: - while True: - chunk = f.read(64000) - if not chunk: - break - req.add_data(chunk) - req.end_audio() - - print('down response:', req.do_request()) diff --git a/src/status-monitor.py b/src/status-monitor.py deleted file mode 100755 index 9c851d99..00000000 --- a/src/status-monitor.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Script to monitor liveness of processes and update led status.""" - -import argparse -import logging -import os -import time - -logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" -) -logger = logging.getLogger('status-monitor') - - -def get_pid(pid_file): - if not pid_file: - # Try the default locations of the pid file, as we don't know where - # the voice-recognizer created it. - pid_file = '/run/user/%d/voice-recognizer.pid' % os.getuid() - if not os.path.isfile(pid_file): - pid_file = '/tmp/voice-recognizer.pid' - - try: - with open(pid_file, 'r') as pid: - return int(pid.read()), pid_file - except IOError: - return None, pid_file - - -def set_led_status(led_fifo): - with open(led_fifo, 'w') as led: - led.write('power-off\n') - - -def check_liveness(pid_file, led_fifo): - pid, found_pid_file = get_pid(pid_file) - if pid: - if not os.path.exists("/proc/%d" % pid): - logger.info("monitored process not running") - set_led_status(led_fifo) - try: - os.unlink(found_pid_file) - except IOError: - pass - - -def main(): - parser = argparse.ArgumentParser( - description="Monitor liveness of processes and update led status.") - parser.add_argument('-l', '--led-fifo', default='/tmp/status-led', - help='Status led control fifo') - parser.add_argument('-p', '--pid-file', - help='File containing our process id for monitoring') - args = parser.parse_args() - - while True: - check_liveness(args.pid_file, args.led_fifo) - time.sleep(1) - - -if __name__ == '__main__': - main() diff --git a/src/triggers/__init__.py b/src/triggers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/triggers/clap.py b/src/triggers/clap.py deleted file mode 100644 index ec731eaf..00000000 --- a/src/triggers/clap.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Detect claps in the audio stream.""" - -import logging -import numpy as np - -from triggers.trigger import Trigger - -logger = logging.getLogger('trigger') - - -class ClapTrigger(Trigger): - - """Detect claps in the audio stream.""" - - def __init__(self, recorder): - super().__init__() - - self.have_clap = True # don't start yet - self.prev_sample = 0 - recorder.add_processor(self) - - def start(self): - self.prev_sample = 0 - self.have_clap = False - - def add_data(self, data): - """ audio is mono 16bit signed at 16kHz """ - audio = np.fromstring(data, 'int16') - if not self.have_clap: - # alternative: np.abs(audio).sum() > thresh - shifted = np.roll(audio, 1) - shifted[0] = self.prev_sample - val = np.max(np.abs(shifted - audio)) - if val > (65536 // 4): # quarter max delta - logger.info("clap detected") - self.have_clap = True - self.callback() - self.prev_sample = audio[-1] diff --git a/src/triggers/gpio.py b/src/triggers/gpio.py deleted file mode 100644 index 67b928da..00000000 --- a/src/triggers/gpio.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Detect edges on the given GPIO channel.""" - -import time - -import RPi.GPIO as GPIO - -from triggers.trigger import Trigger - - -class GpioTrigger(Trigger): - - """Detect edges on the given GPIO channel.""" - - DEBOUNCE_TIME = 0.05 - - def __init__(self, channel, polarity=GPIO.FALLING, - pull_up_down=GPIO.PUD_UP): - super().__init__() - - self.channel = channel - self.polarity = polarity - - if polarity not in [GPIO.FALLING, GPIO.RISING]: - raise ValueError('polarity must be GPIO.FALLING or GPIO.RISING') - - self.expected_value = polarity == GPIO.RISING - self.event_detect_added = False - - GPIO.setmode(GPIO.BCM) - GPIO.setup(channel, GPIO.IN, pull_up_down=pull_up_down) - - def start(self): - if not self.event_detect_added: - GPIO.add_event_detect(self.channel, self.polarity, callback=self.debounce) - self.event_detect_added = True - - def debounce(self, _): - """Check that the input holds the expected value for the debounce period, - to avoid false trigger on short pulses.""" - - start = time.time() - while time.time() < start + self.DEBOUNCE_TIME: - if GPIO.input(self.channel) != self.expected_value: - return - time.sleep(0.01) - - self.callback() diff --git a/src/triggers/trigger.py b/src/triggers/trigger.py deleted file mode 100644 index 4cc363e1..00000000 --- a/src/triggers/trigger.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Detect trigger events that start voice recognition requests.""" - - -class Trigger(object): - - """Base class for a Trigger.""" - - def __init__(self): - self.callback = None - - def set_callback(self, callback): - self.callback = callback - - def start(self): - pass diff --git a/systemd/status-led-off.service b/systemd/status-led-off.service deleted file mode 100644 index bf44d748..00000000 --- a/systemd/status-led-off.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=status led startup update -DefaultDependencies=no -Before=shutdown.target -Requires=status-led.service - -[Service] -Type=oneshot -ExecStart=/bin/bash -c '/bin/echo "stopping" >/tmp/status-led' - -[Install] -WantedBy=reboot.target halt.target poweroff.target diff --git a/systemd/status-led-on.service b/systemd/status-led-on.service deleted file mode 100644 index 78795854..00000000 --- a/systemd/status-led-on.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=status led startup update -DefaultDependencies=no -After=status-led.service -Requires=status-led.service - -[Service] -Type=oneshot -ExecStart=/bin/bash -c '/bin/echo "starting" >/tmp/status-led' - -[Install] -WantedBy=basic.target diff --git a/systemd/status-led.service b/systemd/status-led.service deleted file mode 100644 index be0f4d2a..00000000 --- a/systemd/status-led.service +++ /dev/null @@ -1,16 +0,0 @@ -[Unit] -Description=status led service -DefaultDependencies=no -After=local-fs.target sysinit.target - -[Service] -ExecStartPre=/bin/bash -c 'test -p /tmp/status-led || /bin/mknod /tmp/status-led p' -ExecStart=/bin/bash -c '/home/pi/voice-recognizer-raspi/env/bin/python3 -u src/led.py