diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..1b726da6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +env +__pycache__ +*.pyc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..9eb3a1b5 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,424 @@ +# This file was mostly generated with pylint --generate-rcfile. To see changes +# from the default values, search for "DEFAULT" in this file. + +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Allow optimization of some AST trees. This will activate a peephole AST +# optimizer, which will apply various small optimizations. For instance, it can +# be used to obtain the result of joining multiple strings with the addition +# operator. Joining a lot of strings can lead to a maximum recursion error in +# Pylint and this flag can prevent that. It has one side effect, the resulting +# AST will be different than the one from reality. This option is deprecated +# and it will be removed in Pylint 2.0. +optimize-ast=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +# DEFAULT: without fixme,invalid-name,no-self-use +# RATIONALE: (fixme) It generates warnings for TODOs. +# RATIONALE: (invalid-name) It rejects class-level constants and short names. +# RATIONALE: (no-self-use) @staticmethod is discouraged by Google style. +disable=backtick,reduce-builtin,nonzero-method,long-suffix,file-builtin,indexing-exception,buffer-builtin,standarderror-builtin,apply-builtin,delslice-method,unicode-builtin,suppressed-message,zip-builtin-not-iterating,intern-builtin,old-octal-literal,old-division,range-builtin-not-iterating,useless-suppression,print-statement,filter-builtin-not-iterating,cmp-builtin,coerce-builtin,input-builtin,setslice-method,execfile-builtin,long-builtin,raising-string,getslice-method,cmp-method,coerce-method,next-method-called,raw_input-builtin,oct-method,import-star-module-level,unichr-builtin,round-builtin,parameter-unpacking,map-builtin-not-iterating,unpacking-in-except,dict-view-method,dict-iter-method,hex-method,old-raise-syntax,basestring-builtin,metaclass-assignment,using-cmp-argument,no-absolute-import,xrange-builtin,old-ne-operator,reload-builtin,fixme,invalid-name,no-self-use + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". This option is deprecated +# and it will be removed in Pylint 2.0. +files-output=no + +# Tells whether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=100 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma,dict-separator + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules=configargparse,google,grpc,numpy,oauth2client,RPi.GPIO,scipy,googlesamples + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=.*Response.* + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for variable names +variable-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for function names +function-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for attribute names +attr-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for argument names +argument-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming hint for method names +method-name-hint=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +# DEFAULT: ^_ +# RATIONALE: Docstring for main would duplicate file docstring. +no-docstring-rgx=^_|^main$ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +# DEFAULT: -1 +# RATIONALE: Lots of short methods, where docstrings would be repetitive. +docstring-min-length=10 + + +[ELIF] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +# DEFAULT: none +# RATIONALE: Provided by gettext. +additional-builtins=_ + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,future.builtins + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[DESIGN] + +# Maximum number of arguments for function / method +# DEFAULT: 5 +# RATIONALE: Keyword arguments make this manageable. +max-args=10 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +# DEFAULT: 2 +# RATIONALE: Classes can have docstrings, namedtuples can't. +min-public-methods=0 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=optparse + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library=audioop + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..5e286bd7 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,25 @@ +# How to contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution, +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult [GitHub Help] for more +information on using pull requests. + +[GitHub Help]: https://help.github.com/articles/about-pull-requests/ + diff --git a/HACKING.md b/HACKING.md new file mode 100644 index 00000000..71527ce0 --- /dev/null +++ b/HACKING.md @@ -0,0 +1,68 @@ +# Setting up the image + +We recommend using [the images](https://aiyprojects.withgoogle.com/voice) we +provide. Those images are based on [Raspbian](https://www.raspberrypi.org/downloads/raspbian/), +with a few customizations and are tested on the Raspberry Pi 3. + +If you prefer to set up Raspbian yourself, add a source for `stretch`, the +testing version of Raspbian: +``` shell +echo "deb http://archive.raspbian.org/raspbian/ stretch main" | sudo tee /etc/apt/sources.list.d/stretch.list >/dev/null +echo 'APT::Default-Release "jessie";' | sudo tee /etc/apt/apt.conf.d/default-release >/dev/null +sudo apt-get update +sudo apt-get upgrade +sudo rpi-update +sudo reboot +``` + +Next install the project dependencies and setup services and the ALSA +configuration for the VoiceHAT hardware: +``` shell +cd ~/voice-recognizer-raspi +scripts/install-deps.sh +scripts/install-services.sh +scripts/install-alsa-config.sh +``` + +## Get service credentials + +To access the cloud services you need to register a project and generate +credentials for cloud APIs. This is documented in the +[setup instructions](https://aiyprojects.withgoogle.com/voice) on the +webpage. + +# Making code changes + +If you edit the code on a different computer, you can deploy it to your +Raspberry Pi by running: + +``` shell +make deploy +``` +To execute the script on the Raspberry Pi run, login to it and run: +``` shell +cd ~/voice-recognizer-raspi +source env/bin/activate +python3 src/main.py +``` + +# I18N + +Strings wrapped with `_()` are marked for translation. + +``` shell +# update catalog after string changed +pygettext3 -d voice-recognizer -p po src/main.py src/action.py + +# add new language +msgmerge po/de.po po/voice-recognizer.pot +# now edit po/de.po + +# update language +msgmerge -U po/de.po po/voice-recognizer.pot +# now edit po/de.po + +# create language bundle +mkdir po/de/LC_MESSAGES/ +msgfmt po/de.po -o po/de/LC_MESSAGES/voice-recognizer.mo +``` diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..8bbe7184 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +PI ?= raspberrypi.local + +SHORTCUTS = $(wildcard shortcuts/*.desktop) + +check: + PYTHONPATH=$$PWD/src python3 -m unittest discover tests + +deploy_scripts: + git ls-files | rsync -avz --exclude=".*" --exclude="*.desktop" --files-from - . pi@$(PI):~/voice-recognizer-raspi + +deploy_shortcuts: + scp $(SHORTCUTS) pi@$(PI):~/Desktop + +deploy: deploy_scripts deploy_shortcuts diff --git a/README.md b/README.md new file mode 100644 index 00000000..0fc7a5e9 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +This repository contains the source code for the AIYProjects "Voice Kit". See +https://aiyprojects.withgoogle.com/voice/ + +## Troubleshooting + +The scripts in the `checkpoints` directory verify the Raspberry Pi's setup. +They can be run from the desktop shortcuts or from the terminal. diff --git a/checkpoints/check_audio.py b/checkpoints/check_audio.py new file mode 100755 index 00000000..596aa4c4 --- /dev/null +++ b/checkpoints/check_audio.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check that the voiceHAT audio input and output are both working. +""" + +import os +import subprocess +import tempfile +import textwrap +import time +import traceback + +CARDS_PATH = '/proc/asound/cards' +VOICEHAT_ID = 'googlevoicehat' + +SERVICE_NAME = 'voice-recognizer' +ACTIVE_STR = 'ActiveState=active' +INACTIVE_STR = 'ActiveState=inactive' + +STOP_DELAY = 1.0 + +VOICE_RECOGNIZER_PATH = '/home/pi/voice-recognizer-raspi' +PYTHON3 = 'python3' +AUDIO_PY = VOICE_RECOGNIZER_PATH + '/src/audio.py' + +TEST_SOUND_PATH = '/usr/share/sounds/alsa/Front_Center.wav' + +RECORD_DURATION_SECONDS = '3' + + +def get_sound_cards(): + """Read a dictionary of ALSA cards from /proc, indexed by number.""" + cards = {} + + with open(CARDS_PATH) as f: # pylint: disable=invalid-name + for line in f.read().splitlines(): + try: + index = int(line.strip().split()[0]) + except (IndexError, ValueError): + continue + + cards[index] = line + + return cards + + +def is_service_active(): + """Returns True if the voice-recognizer service is active.""" + output = subprocess.check_output(['systemctl', 'show', SERVICE_NAME]).decode('utf-8') + + if ACTIVE_STR in output: + return True + elif INACTIVE_STR in output: + return False + else: + print('WARNING: failed to parse output:') + print(output) + return False + + +def play_wav(wav_path): + """Play a WAV file.""" + subprocess.check_call([PYTHON3, AUDIO_PY, 'play', wav_path], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + +def ask(prompt): + """Get a yes or no answer from the user.""" + ans = input(prompt + ' (y/n) ') + + while not ans or ans[0].lower() not in 'yn': + ans = input('Please enter y or n: ') + + return ans[0].lower() == 'y' + + +def stop_service(): + """Stop the voice-recognizer so we can use the mic. + + Returns: + True if the service has been stopped. + """ + if not is_service_active(): + return False + + subprocess.check_call(['sudo', 'systemctl', 'stop', SERVICE_NAME], stdout=subprocess.PIPE) + time.sleep(STOP_DELAY) + if is_service_active(): + print('WARNING: failed to stop service, mic may not work.') + return False + + return True + + +def start_service(): + """Start the voice-recognizer again.""" + subprocess.check_call(['sudo', 'systemctl', 'start', SERVICE_NAME], stdout=subprocess.PIPE) + + +def check_voicehat_present(): + """Check that the voiceHAT is present.""" + + return any(VOICEHAT_ID in card for card in get_sound_cards().values()) + + +def check_voicehat_is_first_card(): + """Check that the voiceHAT is the first card on the system.""" + + cards = get_sound_cards() + + return 0 in cards and VOICEHAT_ID in cards[0] + + +def check_speaker_works(): + """Check the speaker makes a sound.""" + print('Playing a test sound...') + play_wav(TEST_SOUND_PATH) + + return ask('Did you hear the test sound?') + + +def check_mic_works(): + """Check the microphone records correctly.""" + temp_file, temp_path = tempfile.mkstemp(suffix='.wav') + os.close(temp_file) + + try: + input("When you're ready, press enter and say 'Testing, 1 2 3'...") + print('Recording...') + subprocess.check_call( + [PYTHON3, AUDIO_PY, 'dump', temp_path, + '-d', RECORD_DURATION_SECONDS], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print('Playing back recorded audio...') + play_wav(temp_path) + finally: + try: + os.unlink(temp_path) + except FileNotFoundError: + pass + + return ask('Did you hear your own voice?') + + +def do_checks(): + """Run all audio checks and print status.""" + if not check_voicehat_present(): + print(textwrap.fill( + """Failed to find the voiceHAT soundcard. Please try reinstalling the +voiceHAT driver:""")) + print(' cd ~/drivers-raspi && sudo ./install.sh && sudo reboot') + return + + if not check_voicehat_is_first_card(): + print(textwrap.fill( + """The voiceHAT not the first sound device, so the voice recognizer +may be unable to find it. Please try removing other sound drivers.""")) + return + + if not check_speaker_works(): + print(textwrap.fill( + """There may be a problem with your speaker. Check that it's +connected properly.""")) + return + + if not check_mic_works(): + print(textwrap.fill( + """There may be a problem with your microphone. Check that it's +connected properly.""")) + return + + print('The audio seems to be working.') + + +def main(): + """Run all checks, stopping the voice-recognizer if necessary.""" + should_restart = stop_service() + + do_checks() + + if should_restart: + start_service() + +if __name__ == '__main__': + try: + main() + input('Press Enter to close...') + except: # pylint: disable=bare-except + traceback.print_exc() + input('Press Enter to close...') diff --git a/checkpoints/check_cloud.py b/checkpoints/check_cloud.py new file mode 100755 index 00000000..2b55beb2 --- /dev/null +++ b/checkpoints/check_cloud.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check that the Cloud Speech API can be used. +""" + +import json +import os +import subprocess +import traceback + +if os.path.exists('/home/pi/credentials.json'): + # Legacy fallback: old location of credentials. + CREDENTIALS_PATH = '/home/pi/credentials.json' +else: + CREDENTIALS_PATH = '/home/pi/cloud_speech.json' + +VOICE_RECOGNIZER_PATH = '/home/pi/voice-recognizer-raspi' +PYTHON3 = VOICE_RECOGNIZER_PATH + '/env/bin/python3' +SPEECH_PY = VOICE_RECOGNIZER_PATH + '/src/speech.py' +SPEECH_PY_ENV = { + 'VIRTUAL_ENV': VOICE_RECOGNIZER_PATH + '/env', + 'PATH': VOICE_RECOGNIZER_PATH + '/env/bin:' + os.getenv('PATH'), +} +TEST_AUDIO = VOICE_RECOGNIZER_PATH + '/checkpoints/test_hello.raw' +RECOGNIZED_TEXT = 'hello' + + +def check_credentials_valid(): + """Check the credentials are JSON service credentials.""" + try: + obj = json.load(open(CREDENTIALS_PATH)) + except ValueError: + return False + + return 'type' in obj and obj['type'] == 'service_account' + + +def check_speech_reco(): + """Try to test the speech reco code from voice-recognizer-raspi.""" + print('Testing the Google Cloud Speech API...') + p = subprocess.Popen( # pylint: disable=invalid-name + [PYTHON3, SPEECH_PY, TEST_AUDIO], env=SPEECH_PY_ENV, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + output = p.communicate()[0].decode('utf-8') + + if p.returncode: + print('Speech recognition failed with', p.returncode) + print(output) + return False + else: + # speech.py succeeded, check the text was recognized + if RECOGNIZED_TEXT in output: + return True + else: + print('Speech recognition output not as expected:') + print(output) + print('Expected:', RECOGNIZED_TEXT) + return False + + +def main(): + """Run all checks and print status.""" + if not os.path.exists(CREDENTIALS_PATH): + print( + """Please follow these instructions to get Google Cloud credentials: +https://cloud.google.com/speech/docs/getting-started#set_up_your_project +and save them to""", CREDENTIALS_PATH) + return + + if not check_credentials_valid(): + print( + CREDENTIALS_PATH, """is not valid, please check that you have downloaded JSON +service credentials.""") + return + + if not check_speech_reco(): + print('Failed to test the Cloud Speech API. Please see error above.') + return + + print("Everything's set up to use the Google Cloud.") + +if __name__ == '__main__': + try: + main() + input('Press Enter to close...') + except: # pylint: disable=bare-except + traceback.print_exc() + input('Press Enter to close...') diff --git a/checkpoints/check_wifi.py b/checkpoints/check_wifi.py new file mode 100755 index 00000000..a70096b1 --- /dev/null +++ b/checkpoints/check_wifi.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check that the WiFi is working. +""" + +import socket +import subprocess +import traceback + +WPA_CONF_PATH = '/etc/wpa_supplicant/wpa_supplicant.conf' +GOOGLE_SERVER_ADDRESS = ('speech.googleapis.com', 443) + + +def check_wifi_is_configured(): + """Check wpa_supplicant.conf has at least one network configured.""" + output = subprocess.check_output(['sudo', 'cat', WPA_CONF_PATH]).decode('utf-8') + + return 'network=' in output + + +def check_wifi_is_connected(): + """Check wlan0 has an IP address.""" + output = subprocess.check_output(['ifconfig', 'wlan0']).decode('utf-8') + + return 'inet addr' in output + + +def check_can_reach_google_server(): + """Check the API server is reachable on port 443.""" + print("Trying to contact Google's servers...") + try: + sock = socket.create_connection(GOOGLE_SERVER_ADDRESS, timeout=10) + sock.close() + return True + except: # Many exceptions can come from sockets. pylint: disable=bare-except + return False + + +def main(): + """Run all checks and print status.""" + print('Checking the WiFi connection...') + + if not check_wifi_is_configured(): + print('Please click the WiFi icon at the top right to set up a WiFi network.') + return + + if not check_wifi_is_connected(): + print( + """You are not connected to WiFi. Please click the WiFi icon at the top right +to check your settings.""") + return + + if not check_can_reach_google_server(): + print( + """Failed to reach Google servers. Please check that your WiFi network is +connected to the internet.""") + return + + print('The WiFi connection seems to be working.') + +if __name__ == '__main__': + try: + main() + input('Press Enter to close...') + except: # pylint: disable=bare-except + traceback.print_exc() + input('Press Enter to close...') diff --git a/checkpoints/load_test.py b/checkpoints/load_test.py new file mode 100755 index 00000000..5cbeccd5 --- /dev/null +++ b/checkpoints/load_test.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Synthetic load test simillar to running the actual app. +""" + +import json +import os +import subprocess +import tempfile +import time +import traceback + +if os.path.exists('/home/pi/credentials.json'): + # Legacy fallback: old location of credentials. + CREDENTIALS_PATH = '/home/pi/credentials.json' +else: + CREDENTIALS_PATH = '/home/pi/cloud_speech.json' + +SERVICE_NAME = 'voice-recognizer' +ACTIVE_STR = 'ActiveState=active' +INACTIVE_STR = 'ActiveState=inactive' + +STOP_DELAY = 1.0 + +VOICE_RECOGNIZER_PATH = '/home/pi/voice-recognizer-raspi' +PYTHON3 = VOICE_RECOGNIZER_PATH + '/env/bin/python3' +AUDIO_PY = VOICE_RECOGNIZER_PATH + '/src/audio.py' +SPEECH_PY = VOICE_RECOGNIZER_PATH + '/src/speech.py' +SPEECH_PY_ENV = { + 'VIRTUAL_ENV': VOICE_RECOGNIZER_PATH + '/env', + 'PATH': VOICE_RECOGNIZER_PATH + '/env/bin:' + os.getenv('PATH'), +} +TEST_AUDIO = '/usr/share/sounds/alsa/Front_Center.wav' +LED_FIFO = '/tmp/status-led' + +RECORD_DURATION_SECONDS = '3' + + +def check_credentials_valid(): + """Check the credentials are JSON service credentials.""" + try: + obj = json.load(open(CREDENTIALS_PATH)) + except ValueError: + return False + + return 'type' in obj and obj['type'] == 'service_account' + + +def is_service_active(): + """Returns True if the voice-recognizer service is active.""" + output = subprocess.check_output(['systemctl', 'show', SERVICE_NAME]).decode('utf-8') + + if ACTIVE_STR in output: + return True + elif INACTIVE_STR in output: + return False + else: + print('WARNING: failed to parse output:') + print(output) + return False + + +def stop_service(): + """Stop the voice-recognizer so we can use the mic. + + Returns: + True if the service has been stopped. + """ + if not is_service_active(): + return False + + subprocess.check_call(['sudo', 'systemctl', 'stop', SERVICE_NAME], stdout=subprocess.PIPE) + time.sleep(STOP_DELAY) + if is_service_active(): + print('WARNING: failed to stop service, mic may not work.') + return False + + return True + + +def start_service(): + """Start the voice-recognizer again.""" + subprocess.check_call(['sudo', 'systemctl', 'start', SERVICE_NAME], stdout=subprocess.PIPE) + + +def check_speech_reco(): + """Try to test the speech reco code from voice-recognizer-raspi.""" + p = subprocess.Popen( # pylint: disable=invalid-name + [PYTHON3, SPEECH_PY, TEST_AUDIO], env=SPEECH_PY_ENV, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p.communicate()[0].decode('utf-8') + + if p.returncode: + return False + else: + return True + + +def play_wav(): + """Play a WAV file.""" + subprocess.check_call([PYTHON3, AUDIO_PY, 'play', TEST_AUDIO], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + +def record_wav(): + """Record a wav file.""" + temp_file, temp_path = tempfile.mkstemp(suffix='.wav') + os.close(temp_file) + subprocess.check_call( + [PYTHON3, AUDIO_PY, 'dump', temp_path, + '-d', RECORD_DURATION_SECONDS], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + try: + os.unlink(temp_path) + except FileNotFoundError: + pass + + +def led_status(status): + with open(LED_FIFO, 'w') as led: + led.write(status + '\n') + + +def run_test(): + print('Running test forever - press Ctrl+C to stop...') + try: + while True: + print('\rrecognizing', end='') + led_status('listening') + check_speech_reco() + time.sleep(0.5) + print('\rrecording ', end='') + led_status('thinking') + record_wav() + time.sleep(0.5) + print('\rplaying ', end='') + led_status('ready') + play_wav() + time.sleep(0.5) + except KeyboardInterrupt: + led_status('power-off') + print('\nTest finished') + + +def main(): + """Run all checks and print status.""" + if not os.path.exists(CREDENTIALS_PATH): + print( + """Please follow these instructions to get Google Cloud credentials: +https://cloud.google.com/speech/docs/getting-started#set_up_your_project +and save them to""", CREDENTIALS_PATH) + return + + if not check_credentials_valid(): + print( + CREDENTIALS_PATH, """is not valid, please check that you have downloaded JSON +service credentials.""") + return + + should_restart = stop_service() + + run_test() + + if should_restart: + start_service() + + +if __name__ == '__main__': + try: + main() + input('Press Enter to close...') + except: # pylint: disable=bare-except + traceback.print_exc() + input('Press Enter to close...') diff --git a/checkpoints/test_hello.raw b/checkpoints/test_hello.raw new file mode 100644 index 00000000..42538f81 Binary files /dev/null and b/checkpoints/test_hello.raw differ diff --git a/config/status-led.ini.default b/config/status-led.ini.default new file mode 100644 index 00000000..e963a18d --- /dev/null +++ b/config/status-led.ini.default @@ -0,0 +1,5 @@ +# Default config file for the status-led service. +# Should be installed to ~/.config/status-led.ini + +# GPIO pin (in BCM numbering) to use for the status LED (default 25) +# gpio-pin = 25 diff --git a/config/voice-recognizer.ini.default b/config/voice-recognizer.ini.default new file mode 100644 index 00000000..0e4ed939 --- /dev/null +++ b/config/voice-recognizer.ini.default @@ -0,0 +1,22 @@ +# Default config file for the voice-recognizer service. +# Should be installed to ~/.config/voice-recognizer.ini + +# Select the trigger: gpio (default), clap. +# trigger = clap + +# Uncomment to enable the Cloud Speech API for local commands. +# cloud-speech = true + +# Uncomment to change the language. The following are supported: +# Embedded Assistant API [cloud-speech = false] (at launch) +# en-US +# Cloud Speech API with local TTS [cloud-speech = true] +# de-DE en-GB en-US es-ES fr-FR it-IT +# (This is limited by the local TTS. Cloud Speech API supports many more.) +# language = en-US + +# Path to client secrets for the Assistant API. +assistant-secrets = ~/assistant.json + +# Path to service account credentials for the Cloud Speech API. +cloud-speech-secrets = ~/cloud_speech.json diff --git a/po/de.po b/po/de.po new file mode 100644 index 00000000..db130ddf --- /dev/null +++ b/po/de.po @@ -0,0 +1,127 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: voice-recognizer-1.0\n" +"POT-Creation-Date: 2017-04-21 14:33+CEST\n" +"PO-Revision-Date: 2017-02-16 09:26+0100\n" +"Last-Translator: Stefan Sauer \n" +"Language-Team: \n" +"Language: de\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" +"X-Generator: Poedit 1.5.4\n" + +#: src/action.py:172 +msgid "Volume at %d %%." +msgstr "Lautstärke auf %d %%." + +#: src/action.py:202 +msgid "ip address" +msgstr "IP Adresse" + +#: src/action.py:204 +msgid "I do not have an ip address assigned to me." +msgstr "Ich habe keine IP Adresse zugewiesen bekommen." + +#: src/action.py:206 +msgid "volume up" +msgstr "Lautstärke hoch" + +#: src/action.py:207 +msgid "volume down" +msgstr "Lautstärke runter" + +#: src/action.py:208 +msgid "max volume" +msgstr "volle Lautstärke" + +#: src/action.py:210 src/action.py:211 +msgid "repeat after me" +msgstr "sprich mir nach" + +#: src/action.py:225 +msgid "We've been friends since we were both starter projects" +msgstr "Da wir beide ganz neu sind, verstehen wir uns prima." + +#: src/action.py:229 +msgid "clap" +msgstr "klatsche" + +#: src/action.py:229 +msgid "clap clap" +msgstr "klatsch klatsch" + +#: src/action.py:230 +msgid "She taught me everything I know." +msgstr "Sie hat mir alles im Leben beigebracht." + +#: src/action.py:231 +msgid "hello" +msgstr "Hallo" + +#: src/action.py:231 +msgid "hello to you too" +msgstr "Hallo auch zu dir" + +#: src/action.py:232 +msgid "tell me a joke" +msgstr "Erzähl mir einen Witz" + +#: src/action.py:233 +msgid "What do you call an alligator in a vest? An investigator." +msgstr "Was hat vier Beine und kann fliegen? Zwei Vögel." + +#: src/action.py:234 +msgid "three laws of robotics" +msgstr "" + +#: src/action.py:235 +msgid "" +"The laws of robotics are\n" +"0: A robot may not injure a human being or, through inaction, allow a human\n" +"being to come to harm.\n" +"1: A robot must obey orders given it by human beings except where such " +"orders\n" +"would conflict with the First Law.\n" +"2: A robot must protect its own existence as long as such protection does " +"not\n" +"conflict with the First or Second Law." +msgstr "" + +#: src/action.py:242 +msgid "A galaxy far, far, just kidding. I'm from Seattle." +msgstr "" +"Aus einer weit, weit entfernten Galaxie, Ach Quatsch ich komm aus Seattle." + +#: src/action.py:242 +msgid "where are you from" +msgstr "Woher kommst du" + +#: src/action.py:243 +msgid "A machine has no name" +msgstr "Eine Maschine hat keinen Namen." + +#: src/action.py:243 +msgid "your name" +msgstr "dein Name" + +#: src/action.py:245 +msgid "time" +msgstr "Zeit" + +#: src/main.py:279 +msgid "Unexpected error. Try again or check the logs." +msgstr "Unerwarteter Fehler. Probiere nocheinmal oder kontrolliere die Logs." + +#: src/main.py:292 +msgid "I don’t know how to answer that." +msgstr "Das weis ich nicht." + +#: src/main.py:295 +msgid "Could you try that again?" +msgstr "Bitte probiere es nocheinmal." diff --git a/po/de/LC_MESSAGES/voice-recognizer.mo b/po/de/LC_MESSAGES/voice-recognizer.mo new file mode 100644 index 00000000..0226877a Binary files /dev/null and b/po/de/LC_MESSAGES/voice-recognizer.mo differ diff --git a/po/voice-recognizer.pot b/po/voice-recognizer.pot new file mode 100644 index 00000000..2bce8ab8 --- /dev/null +++ b/po/voice-recognizer.pot @@ -0,0 +1,124 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2017-04-21 14:33+CEST\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: src/action.py:172 +msgid "Volume at %d %%." +msgstr "" + +#: src/action.py:202 +msgid "ip address" +msgstr "" + +#: src/action.py:204 +msgid "I do not have an ip address assigned to me." +msgstr "" + +#: src/action.py:206 +msgid "volume up" +msgstr "" + +#: src/action.py:207 +msgid "volume down" +msgstr "" + +#: src/action.py:208 +msgid "max volume" +msgstr "" + +#: src/action.py:210 src/action.py:211 +msgid "repeat after me" +msgstr "" + +#: src/action.py:225 +msgid "We've been friends since we were both starter projects" +msgstr "" + +#: src/action.py:229 +msgid "clap" +msgstr "" + +#: src/action.py:229 +msgid "clap clap" +msgstr "" + +#: src/action.py:230 +msgid "She taught me everything I know." +msgstr "" + +#: src/action.py:231 +msgid "hello" +msgstr "" + +#: src/action.py:231 +msgid "hello to you too" +msgstr "" + +#: src/action.py:232 +msgid "tell me a joke" +msgstr "" + +#: src/action.py:233 +msgid "What do you call an alligator in a vest? An investigator." +msgstr "" + +#: src/action.py:234 +msgid "three laws of robotics" +msgstr "" + +#: src/action.py:235 +msgid "" +"The laws of robotics are\n" +"0: A robot may not injure a human being or, through inaction, allow a human\n" +"being to come to harm.\n" +"1: A robot must obey orders given it by human beings except where such orders\n" +"would conflict with the First Law.\n" +"2: A robot must protect its own existence as long as such protection does not\n" +"conflict with the First or Second Law." +msgstr "" + +#: src/action.py:242 +msgid "A galaxy far, far, just kidding. I'm from Seattle." +msgstr "" + +#: src/action.py:242 +msgid "where are you from" +msgstr "" + +#: src/action.py:243 +msgid "A machine has no name" +msgstr "" + +#: src/action.py:243 +msgid "your name" +msgstr "" + +#: src/action.py:245 +msgid "time" +msgstr "" + +#: src/main.py:279 +msgid "Unexpected error. Try again or check the logs." +msgstr "" + +#: src/main.py:292 +msgid "I don’t know how to answer that." +msgstr "" + +#: src/main.py:295 +msgid "Could you try that again?" +msgstr "" + diff --git a/scripts/asound.conf b/scripts/asound.conf new file mode 100755 index 00000000..e5b102f8 --- /dev/null +++ b/scripts/asound.conf @@ -0,0 +1,30 @@ +options snd_rpi_googlemihat_soundcard index=0 + +pcm.softvol { + type softvol + slave.pcm dmix + control { + name Master + card 0 + } +} + +pcm.micboost { + type route + slave.pcm dsnoop + ttable { + 0.0 30.0 + 1.1 30.0 + } +} + +pcm.!default { + type asym + playback.pcm "plug:softvol" + capture.pcm "plug:micboost" +} + +ctl.!default { + type hw + card 0 +} diff --git a/scripts/install-alsa-config.sh b/scripts/install-alsa-config.sh new file mode 100755 index 00000000..2ba86816 --- /dev/null +++ b/scripts/install-alsa-config.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# +# Replace the Raspberry Pi's default ALSA config with one for the voiceHAT. +# +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit + +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +asoundrc=/home/pi/.asoundrc +global_asoundrc=/etc/asound.conf + +for rcfile in "$asoundrc" "$global_asoundrc"; do + if [[ -f "$rcfile" ]] ; then + echo "Renaming $rcfile to $rcfile.bak..." + mv "$rcfile" "$rcfile.bak" + fi +done + +cp scripts/asound.conf "$global_asoundrc" +echo "Installed voiceHAT ALSA config at $global_asoundrc" diff --git a/scripts/install-deps.sh b/scripts/install-deps.sh new file mode 100755 index 00000000..9d65b94a --- /dev/null +++ b/scripts/install-deps.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +RUN_AS="pi" + +set -o errexit + +if [ "$USER" != "$RUN_AS" ] +then + echo "This script must run as $RUN_AS, trying to change user..." + exec sudo -u $RUN_AS $0 +fi + +sudo apt-get -y install alsa-utils python3-all-dev python3-pip python3-numpy \ + python3-scipy python3-virtualenv rsync sox libttspico-utils ntpdate +sudo apt-get -y install -t stretch python3-httplib2 python3-configargparse +sudo pip3 install --upgrade pip virtualenv + +cd ~/voice-recognizer-raspi +virtualenv --system-site-packages -p python3 env +env/bin/pip install google-assistant-sdk[auth_helpers]==0.1.0 \ + grpc-google-cloud-speech-v1beta1==0.14.0 protobuf==3.1.0 diff --git a/scripts/install-services.sh b/scripts/install-services.sh new file mode 100755 index 00000000..40d54882 --- /dev/null +++ b/scripts/install-services.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# +# Install systemd service files for running on startup. +# +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit + +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +for service in systemd/*.service; do + cp $service /lib/systemd/system/ +done + +# voice-recognizer is not enabled by default, as it doesn't work until +# credentials are set up, so we explicitly enable the other services. +sudo systemctl enable alsa-init.service +sudo systemctl enable ntpdate.service +sudo systemctl enable status-led.service +sudo systemctl enable status-led-on.service +sudo systemctl enable status-led-off.service +sudo systemctl enable status-monitor.service diff --git a/scripts/pre-commit b/scripts/pre-commit new file mode 100755 index 00000000..8e0fe2e7 --- /dev/null +++ b/scripts/pre-commit @@ -0,0 +1,18 @@ +#!/bin/sh +# git hook to ensure code style +# cp scripts/pre-commit .git/hooks/ + +which >/dev/null pep8 || (echo "please install pep8"; exit 1) +files=$(git diff --name-only --staged --diff-filter=ACMRTUXB | egrep "*.py$") + +if test -n "$files"; then + pep8 --max-line-length=120 $files + res=$? + if [ $res -ne 0 ]; then + echo + autopep8 --max-line-length=120 --diff $files + echo + echo "To fix run: autopep8 --max-line-length=120 -i $files" + fi + exit $res +fi diff --git a/shortcuts/check_audio.desktop b/shortcuts/check_audio.desktop new file mode 100644 index 00000000..4f54c918 --- /dev/null +++ b/shortcuts/check_audio.desktop @@ -0,0 +1,7 @@ +[Desktop Entry] +Encoding=UTF-8 +Type=Application +Name=Check audio +Comment=Check that the voiceHAT audio input and output are both working. +Exec=/home/pi/voice-recognizer-raspi/checkpoints/check_audio.py +Terminal=true diff --git a/shortcuts/check_cloud.desktop b/shortcuts/check_cloud.desktop new file mode 100644 index 00000000..0d8566e6 --- /dev/null +++ b/shortcuts/check_cloud.desktop @@ -0,0 +1,7 @@ +[Desktop Entry] +Encoding=UTF-8 +Type=Application +Name=Check Cloud +Comment=Check that the Cloud Speech API can be used. +Exec=/home/pi/voice-recognizer-raspi/checkpoints/check_cloud.py +Terminal=true diff --git a/shortcuts/check_wifi.desktop b/shortcuts/check_wifi.desktop new file mode 100644 index 00000000..1c62c99c --- /dev/null +++ b/shortcuts/check_wifi.desktop @@ -0,0 +1,7 @@ +[Desktop Entry] +Encoding=UTF-8 +Type=Application +Name=Check WiFi +Comment=Check that the WiFi is working. +Exec=/home/pi/voice-recognizer-raspi/checkpoints/check_wifi.py +Terminal=true diff --git a/src/action.py b/src/action.py new file mode 100644 index 00000000..b149d221 --- /dev/null +++ b/src/action.py @@ -0,0 +1,251 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Carry out voice commands by recognising keywords.""" + +import datetime +import logging +import subprocess + +import actionbase + +# ============================================================================= +# +# Hey, Makers! +# +# This file contains some examples of voice commands that are handled locally, +# right on your Raspberry Pi. +# +# Do you want to add a new voice command? Check out the instructions at: +# https://aiyprojects.withgoogle.com/voice/#makers-guide-3-3--create-a-new-voice-command-or-action +# (MagPi readers - watch out! You should switch to the instructions in the link +# above, since there's a mistake in the MagPi instructions.) +# +# In order to make a new voice command, you need to do two things. First, make a +# new action where it says: +# "Implement your own actions here" +# Secondly, add your new voice command to the actor near the bottom of the file, +# where it says: +# "Add your own voice commands here" +# +# ============================================================================= + +# Actions might not use the user's command. pylint: disable=unused-argument + + +# Example: Say a simple response +# ================================ +# +# This example will respond to the user by saying something. You choose what it +# says when you add the command below - look for SpeakAction at the bottom of +# the file. +# +# There are two functions: +# __init__ is called when the voice commands are configured, and stores +# information about how the action should work: +# - self.say is a function that says some text aloud. +# - self.words are the words to use as the response. +# run is called when the voice command is used. It gets the user's exact voice +# command as a parameter. + +class SpeakAction(object): + + """Says the given text via TTS.""" + + def __init__(self, say, words): + self.say = say + self.words = words + + def run(self, voice_command): + self.say(self.words) + + +# Example: Tell the current time +# ============================== +# +# This example will tell the time aloud. The to_str function will turn the time +# into helpful text (for example, "It is twenty past four."). The run function +# uses to_str say it aloud. + +class SpeakTime(object): + + """Says the current local time with TTS.""" + + def __init__(self, say): + self.say = say + + def run(self, voice_command): + time_str = self.to_str(datetime.datetime.now()) + self.say(time_str) + + def to_str(self, dt): + """Convert a datetime to a human-readable string.""" + HRS_TEXT = ['midnight', 'one', 'two', 'three', 'four', 'five', 'six', + 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve'] + MINS_TEXT = ["five", "ten", "quarter", "twenty", "twenty-five", "half"] + hour = dt.hour + minute = dt.minute + + # convert to units of five minutes to the nearest hour + minute_rounded = (minute + 2) // 5 + minute_is_inverted = minute_rounded > 6 + if minute_is_inverted: + minute_rounded = 12 - minute_rounded + hour = (hour + 1) % 24 + + # convert time from 24-hour to 12-hour + if hour > 12: + hour -= 12 + + if minute_rounded == 0: + if hour == 0: + return 'It is midnight.' + return "It is %s o'clock." % HRS_TEXT[hour] + + if minute_is_inverted: + return 'It is %s to %s.' % (MINS_TEXT[minute_rounded - 1], HRS_TEXT[hour]) + return 'It is %s past %s.' % (MINS_TEXT[minute_rounded - 1], HRS_TEXT[hour]) + + +# Example: Run a shell command and say its output +# =============================================== +# +# This example will use a shell command to work out what to say. You choose the +# shell command when you add the voice command below - look for the example +# below where it says the IP address of the Raspberry Pi. + +class SpeakShellCommandOutput(object): + + """Speaks out the output of a shell command.""" + + def __init__(self, say, shell_command, failure_text): + self.say = say + self.shell_command = shell_command + self.failure_text = failure_text + + def run(self, voice_command): + output = subprocess.check_output(self.shell_command, shell=True).strip() + if output: + self.say(output) + elif self.failure_text: + self.say(self.failure_text) + + +# Example: Change the volume +# ========================== +# +# This example will can change the speaker volume of the Raspberry Pi. It uses +# the shell command SET_VOLUME to change the volume, and then GET_VOLUME gets +# the new volume. The example says the new volume aloud after changing the +# volume. + +class VolumeControl(object): + + """Changes the volume and says the new level.""" + + GET_VOLUME = r'amixer get Master | grep "Front Left:" | sed "s/.*\[\([0-9]\+\)%\].*/\1/"' + SET_VOLUME = 'amixer -q set Master %d%%' + + def __init__(self, say, change): + self.say = say + self.change = change + + def run(self, voice_command): + res = subprocess.check_output(VolumeControl.GET_VOLUME, shell=True).strip() + try: + logging.info("volume: %s", res) + vol = int(res) + self.change + vol = max(0, min(100, vol)) + subprocess.call(VolumeControl.SET_VOLUME % vol, shell=True) + self.say(_('Volume at %d %%.') % vol) + except (ValueError, subprocess.CalledProcessError): + logging.exception("Error using amixer to adjust volume.") + + +# Example: Repeat after me +# ======================== +# +# This example will repeat what the user said. It shows how you can access what +# the user said, and change what you do or how you respond. + +class RepeatAfterMe(object): + + """Repeats the user's command.""" + + def __init__(self, say, keyword): + self.say = say + self.keyword = keyword + + def run(self, voice_command): + # The command still has the 'repeat after me' keyword, so we need to + # remove it before saying whatever is left. + to_repeat = voice_command.replace(self.keyword, '', 1) + self.say(to_repeat) + + +# ========================================= +# Makers! Implement your own actions here. +# ========================================= + + +def make_actor(say): + """Create an actor to carry out the user's commands.""" + + actor = actionbase.Actor() + + actor.add_keyword( + _('ip address'), SpeakShellCommandOutput( + say, "ip -4 route get 1 | head -1 | cut -d' ' -f8", + _('I do not have an ip address assigned to me.'))) + + actor.add_keyword(_('volume up'), VolumeControl(say, 10)) + actor.add_keyword(_('volume down'), VolumeControl(say, -10)) + actor.add_keyword(_('max volume'), VolumeControl(say, 100)) + + actor.add_keyword(_('repeat after me'), + RepeatAfterMe(say, _('repeat after me'))) + + # ========================================= + # Makers! Add your own voice commands here. + # ========================================= + + return actor + + +def add_commands_just_for_cloud_speech_api(actor, say): + """Add simple commands that are only used with the Cloud Speech API.""" + def simple_command(keyword, response): + actor.add_keyword(keyword, SpeakAction(say, response)) + + simple_command('alexa', _("We've been friends since we were both starter projects")) + simple_command( + 'beatbox', + 'pv zk pv pv zk pv zk kz zk pv pv pv zk pv zk zk pzk pzk pvzkpkzvpvzk kkkkkk bsch') + simple_command(_('clap'), _('clap clap')) + simple_command('google home', _('She taught me everything I know.')) + simple_command(_('hello'), _('hello to you too')) + simple_command(_('tell me a joke'), + _('What do you call an alligator in a vest? An investigator.')) + simple_command(_('three laws of robotics'), + _("""The laws of robotics are +0: A robot may not injure a human being or, through inaction, allow a human +being to come to harm. +1: A robot must obey orders given it by human beings except where such orders +would conflict with the First Law. +2: A robot must protect its own existence as long as such protection does not +conflict with the First or Second Law.""")) + simple_command(_('where are you from'), _("A galaxy far, far, just kidding. I'm from Seattle.")) + simple_command(_('your name'), _('A machine has no name')) + + actor.add_keyword(_('time'), SpeakTime(say)) diff --git a/src/actionbase.py b/src/actionbase.py new file mode 100644 index 00000000..7e802a4c --- /dev/null +++ b/src/actionbase.py @@ -0,0 +1,63 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Handle voice commands locally. + +This code lets you link keywords to actions. The actions are declared in +action.py. +""" + + +class Actor(object): + + """Passes commands on to a list of action handlers.""" + + def __init__(self): + self.handlers = [] + + def add_keyword(self, keyword, action): + self.handlers.append(KeywordHandler(keyword, action)) + + def get_phrases(self): + """Get a list of all phrases that are expected by the handlers.""" + return [phrase for h in self.handlers for phrase in h.get_phrases()] + + def handle(self, command): + """Pass command to handlers, stopping after one has handled the command. + + Returns True if the command was handled.""" + + for handler in self.handlers: + if handler.handle(command): + return True + return False + + +class KeywordHandler(object): + + """Perform the action when the given keyword is in the command.""" + + def __init__(self, keyword, action): + self.keyword = keyword.lower() + self.action = action + + def get_phrases(self): + return [self.keyword] + + def handle(self, command): + if self.keyword in command.lower(): + self.action.run(command) + return True + else: + return False diff --git a/src/audio.py b/src/audio.py new file mode 100644 index 00000000..c3e3fa34 --- /dev/null +++ b/src/audio.py @@ -0,0 +1,251 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Wraps the audio backend with a simple Python interface for recording and +playback. +""" + +import logging +import os +import subprocess +import threading +import wave + +logger = logging.getLogger('audio') + + +def sample_width_to_string(sample_width): + """Convert sample width (bytes) to ALSA format string.""" + return {1: 's8', 2: 's16', 4: 's32'}[sample_width] + + +class Recorder(threading.Thread): + + """Stream audio from microphone in a background thread and run processing + callbacks. It reads audio in a configurable format from the microphone, + then converts it to a known format before passing it to the processors. + """ + + CHUNK_S = 0.1 + + def __init__(self, input_device='default', + channels=1, bytes_per_sample=2, sample_rate_hz=16000): + """Create a Recorder with the given audio format. + + The Recorder will not start until start() is called. start() is called + automatically if the Recorder is used in a `with`-statement. + + - input_device: name of ALSA device (for a list, run `arecord -L`) + - channels: number of channels in audio read from the mic + - bytes_per_sample: sample width in bytes (eg 2 for 16-bit audio) + - sample_rate_hz: sample rate in hertz + """ + + super().__init__() + + self._processors = [] + + self._chunk_bytes = int(self.CHUNK_S * sample_rate_hz) * channels * bytes_per_sample + + self._cmd = [ + 'arecord', + '-q', + '-t', 'raw', + '-D', input_device, + '-c', str(channels), + '-f', sample_width_to_string(bytes_per_sample), + '-r', str(sample_rate_hz), + ] + self._arecord = None + self._closed = False + + def add_processor(self, processor): + self._processors.append(processor) + + def del_processor(self, processor): + self._processors.remove(processor) + + def run(self): + """Reads data from arecord and passes to processors.""" + + self._arecord = subprocess.Popen(self._cmd, stdout=subprocess.PIPE) + logger.info("started recording") + + # check for race-condition when __exit__ is called at the same time as + # the process is started by the background thread + if self._closed: + self._arecord.kill() + return + + this_chunk = b'' + + while True: + input_data = self._arecord.stdout.read(self._chunk_bytes) + if not input_data: + break + + this_chunk += input_data + if len(this_chunk) >= self._chunk_bytes: + self._handle_chunk(this_chunk[:self._chunk_bytes]) + this_chunk = this_chunk[self._chunk_bytes:] + + if not self._closed: + logger.error('Microphone recorder died unexpectedly, aborting...') + # sys.exit doesn't work from background threads, so use os._exit as + # an emergency measure. + logging.shutdown() + os._exit(1) # pylint: disable=protected-access + + def _handle_chunk(self, chunk): + """Send audio chunk to all processors. + """ + for p in self._processors: + p.add_data(chunk) + + def __enter__(self): + self.start() + return self + + def __exit__(self, *args): + self._closed = True + if self._arecord: + self._arecord.kill() + + +class Player(object): + + """Plays short audio clips from a buffer or file.""" + + def __init__(self, output_device='default'): + self._output_device = output_device + + def play_bytes(self, audio_bytes, sample_rate, sample_width=2): + """Play audio from the given bytes-like object. + + audio_bytes: audio data (mono) + sample_rate: sample rate in Hertz (24 kHz by default) + sample_width: sample width in bytes (eg 2 for 16-bit audio) + """ + + cmd = [ + 'aplay', + '-q', + '-t', 'raw', + '-D', self._output_device, + '-c', '1', + '-f', sample_width_to_string(sample_width), + '-r', str(sample_rate), + ] + + aplay = subprocess.Popen(cmd, stdin=subprocess.PIPE) + aplay.stdin.write(audio_bytes) + aplay.stdin.close() + retcode = aplay.wait() + + if retcode: + logger.error('aplay failed with %d', retcode) + + def play_wav(self, wav_path): + """Play audio from the given WAV file. The file should be mono and + small enough to load into memory. + + wav_path: path to wav file + """ + + with wave.open(wav_path, 'r') as wav: + if wav.getnchannels() != 1: + raise ValueError(wav_path + 'is not a mono file') + + frames = wav.readframes(wav.getnframes()) + self.play_bytes(frames, wav.getframerate(), wav.getsampwidth()) + + +class WavDump(object): + + """A processor that logs to a WAV file, for testing audio recording.""" + + def __init__(self, path, duration, + channels, bytes_per_sample, sample_rate_hz): + self._wav = wave.open(path, 'wb') + self._wav.setnchannels(channels) + self._wav.setsampwidth(bytes_per_sample) + self._wav.setframerate(sample_rate_hz) + + self._n_bytes = 0 + self._total_bytes = int(duration * sample_rate_hz) * channels * bytes_per_sample + + def add_data(self, data): + """Write frames to the file if they fit within the total size.""" + max_bytes = self._total_bytes - self._n_bytes + data = data[:max_bytes] + self._n_bytes += len(data) + + if data: + self._wav.writeframes(data) + + def is_done(self): + return self._n_bytes >= self._total_bytes + + def __enter__(self): + return self + + def __exit__(self, *args): + self._wav.close() + + +def main(): + logging.basicConfig(level=logging.INFO) + + import argparse + import time + + parser = argparse.ArgumentParser(description="Test audio wrapper") + parser.add_argument('action', choices=['dump', 'play'], + help='What to do with the audio') + parser.add_argument('-I', '--input-device', default='default', + help='Name of the audio input device') + parser.add_argument('-c', '--channels', type=int, default=1, + help='Number of channels') + parser.add_argument('-f', '--bytes-per-sample', type=int, default=2, + help='Sample width in bytes') + parser.add_argument('-r', '--rate', type=int, default=16000, + help='Sample rate in Hertz') + parser.add_argument('-O', '--output-device', default='default', + help='Name of the audio output device') + parser.add_argument('-d', '--duration', default=2, type=float, + help='Dump duration in seconds (default: 2)') + parser.add_argument('filename', help='Path to WAV file') + args = parser.parse_args() + + if args.action == 'dump': + recorder = Recorder( + input_device=args.input_device, + channels=args.channels, + bytes_per_sample=args.bytes_per_sample, + sample_rate_hz=args.rate) + + dumper = WavDump(args.filename, args.duration, args.channels, + args.bytes_per_sample, args.rate) + + with recorder, dumper: + recorder.add_processor(dumper) + + while not dumper.is_done(): + time.sleep(0.1) + + elif args.action == 'play': + Player(args.output_device).play_wav(args.filename) + +if __name__ == '__main__': + main() diff --git a/src/i18n.py b/src/i18n.py new file mode 100644 index 00000000..9b321a16 --- /dev/null +++ b/src/i18n.py @@ -0,0 +1,51 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Internationalization helpers.""" + +import gettext +import os + +DEFAULT_LANGUAGE_CODE = 'en-US' + +LOCALE_DIR = os.path.realpath( + os.path.join(os.path.abspath(os.path.dirname(__file__)), '../po')) +LOCALE_DOMAIN = 'voice-recognizer' + +_language_code = DEFAULT_LANGUAGE_CODE + + +def set_language_code(code, gettext_install=False): + """Set the BCP-47 language code that the speech systems should use. + + Args: + gettext_install: if True, gettext's _() will be installed in as a builtin. + As this has global effect, it should only be done by applications. + """ + global _language_code # pylint: disable=global-statement + _language_code = code.replace('_', '-') + + if gettext_install: + language_id = code.replace('-', '_') + t = gettext.translation(LOCALE_DOMAIN, LOCALE_DIR, [language_id], fallback=True) + t.install() + + +def get_language_code(): + """Returns the BCP-47 language code that the speech systems should use. + + We don't use the system locale because the Assistant API only supports + en-US at launch, so that should be used by default in all environments. + """ + return _language_code diff --git a/src/led.py b/src/led.py new file mode 100644 index 00000000..dca70ab5 --- /dev/null +++ b/src/led.py @@ -0,0 +1,160 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'''Signal states on a LED''' + +import itertools +import logging +import os +import threading +import time + +import RPi.GPIO as GPIO + +logger = logging.getLogger('led') + +CONFIG_DIR = os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config') +CONFIG_FILES = [ + '/etc/status-led.ini', + os.path.join(CONFIG_DIR, 'status-led.ini') +] + + +class LED: + + """Starts a background thread to show patterns with the LED.""" + + def __init__(self, channel): + self.animator = threading.Thread(target=self._animate) + self.channel = channel + self.iterator = None + self.running = False + self.state = None + self.sleep = 0 + + GPIO.setup(channel, GPIO.OUT) + self.pwm = GPIO.PWM(channel, 100) + + def start(self): + self.pwm.start(0) # off by default + self.running = True + self.animator.start() + + def stop(self): + self.running = False + self.animator.join() + self.pwm.stop() + GPIO.output(self.channel, GPIO.LOW) + + def set_state(self, state): + self.state = state + + def _animate(self): + # TODO(ensonic): refactor or add justification + # pylint: disable=too-many-branches + while self.running: + if self.state: + if self.state == 'on': + self.iterator = None + self.sleep = 0.0 + self.pwm.ChangeDutyCycle(100) + elif self.state == 'off': + self.iterator = None + self.sleep = 0.0 + self.pwm.ChangeDutyCycle(0) + elif self.state == 'blink': + self.iterator = itertools.cycle([0, 100]) + self.sleep = 0.5 + elif self.state == 'blink-3': + self.iterator = itertools.cycle([0, 100] * 3 + [0, 0]) + self.sleep = 0.25 + elif self.state == 'beacon': + self.iterator = itertools.cycle( + itertools.chain([30] * 100, [100] * 8, range(100, 30, -5))) + self.sleep = 0.05 + elif self.state == 'beacon-dark': + self.iterator = itertools.cycle( + itertools.chain([0] * 100, range(0, 30, 3), range(30, 0, -3))) + self.sleep = 0.05 + elif self.state == 'decay': + self.iterator = itertools.cycle(range(100, 0, -2)) + self.sleep = 0.05 + elif self.state == 'pulse-slow': + self.iterator = itertools.cycle( + itertools.chain(range(0, 100, 2), range(100, 0, -2))) + self.sleep = 0.1 + elif self.state == 'pulse-quick': + self.iterator = itertools.cycle( + itertools.chain(range(0, 100, 5), range(100, 0, -5))) + self.sleep = 0.05 + else: + logger.warning("unsupported state: %s", self.state) + self.state = None + if self.iterator: + self.pwm.ChangeDutyCycle(next(self.iterator)) + time.sleep(self.sleep) + else: + time.sleep(1) + + +def main(): + logging.basicConfig( + level=logging.INFO, + format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" + ) + + import configargparse + parser = configargparse.ArgParser( + default_config_files=CONFIG_FILES, + description="Status LED daemon") + parser.add_argument('-G', '--gpio-pin', default=25, type=int, + help='GPIO pin for the LED (default: 25)') + args = parser.parse_args() + + led = None + state_map = { + "starting": "pulse-quick", + "ready": "beacon-dark", + "listening": "on", + "thinking": "pulse-quick", + "stopping": "pulse-quick", + "power-off": "off", + "error": "blink-3", + } + try: + GPIO.setmode(GPIO.BCM) + + led = LED(args.gpio_pin) + led.start() + while True: + try: + state = input() + if not state: + continue + if state not in state_map: + logger.warning("unsupported state: %s, must be one of: %s", + state, ",".join(state_map.keys())) + continue + + led.set_state(state_map[state]) + except EOFError: + time.sleep(1) + except KeyboardInterrupt: + pass + finally: + led.stop() + GPIO.cleanup() + +if __name__ == '__main__': + main() diff --git a/src/main.py b/src/main.py new file mode 100755 index 00000000..88e1712d --- /dev/null +++ b/src/main.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main recognizer loop: wait for a trigger then perform and handle +recognition.""" + +import logging +import os +import sys +import threading +import time + +import configargparse +from googlesamples.assistant import auth_helpers + +import audio +import action +import i18n +import speech +import tts + +# ============================================================================= +# +# Hey, Makers! +# +# Are you looking for actor.add_keyword? Do you want to add a new command? +# You need to edit src/action.py. Check out the instructions at: +# https://aiyprojects.withgoogle.com/voice/#makers-guide-3-3--create-a-new-voice-command-or-action +# +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" +) +logger = logging.getLogger('main') + +CACHE_DIR = os.getenv('XDG_CACHE_HOME') or os.path.expanduser('~/.cache') +VR_CACHE_DIR = os.path.join(CACHE_DIR, 'voice-recognizer') + +CONFIG_DIR = os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config') +CONFIG_FILES = [ + '/etc/voice-recognizer.ini', + os.path.join(CONFIG_DIR, 'voice-recognizer.ini') +] + +# Legacy fallback: old locations of secrets/credentials. +OLD_CLIENT_SECRETS = os.path.expanduser('~/client_secrets.json') +OLD_SERVICE_CREDENTIALS = os.path.expanduser('~/credentials.json') + +ASSISTANT_CREDENTIALS = os.path.join(VR_CACHE_DIR, 'assistant_credentials.json') +ASSISTANT_OAUTH_SCOPE = 'https://www.googleapis.com/auth/assistant-sdk-prototype' + +PID_FILE = '/run/user/%d/voice-recognizer.pid' % os.getuid() + + +def try_to_get_credentials(client_secrets): + """Try to get credentials, or print an error and quit on failure.""" + + if os.path.exists(ASSISTANT_CREDENTIALS): + return auth_helpers.load_credentials( + ASSISTANT_CREDENTIALS, scopes=[ASSISTANT_OAUTH_SCOPE]) + + if not os.path.exists(VR_CACHE_DIR): + os.mkdir(VR_CACHE_DIR) + + if not os.path.exists(client_secrets) and os.path.exists(OLD_CLIENT_SECRETS): + client_secrets = OLD_CLIENT_SECRETS + + if not os.path.exists(client_secrets): + print('You need client secrets to use the Assistant API.') + print('Follow these instructions:') + print(' https://developers.google.com/api-client-library/python/auth/installed-app' + '#creatingcred') + print('and put the file at', client_secrets) + sys.exit(1) + + if not os.getenv('DISPLAY') and not sys.stdout.isatty(): + print(""" +To use the Assistant API, manually start the application from the dev terminal. +See the "Turn on the Assistant API" section of the Voice Recognizer +User's Guide for more info.""") + sys.exit(1) + + credentials = auth_helpers.credentials_flow_interactive( + client_secrets, scopes=[ASSISTANT_OAUTH_SCOPE]) + auth_helpers.save_credentials(ASSISTANT_CREDENTIALS, credentials) + logging.info('OAuth credentials initialized: %s', ASSISTANT_CREDENTIALS) + return credentials + + +def create_pid_file(file_name): + with open(file_name, 'w') as pid_file: + pid_file.write("%d" % os.getpid()) + + +def main(): + parser = configargparse.ArgParser( + default_config_files=CONFIG_FILES, + description="Act on voice commands using Google's speech recognition") + parser.add_argument('-I', '--input-device', default='default', + help='Name of the audio input device') + parser.add_argument('-O', '--output-device', default='default', + help='Name of the audio output device') + parser.add_argument('-T', '--trigger', default='gpio', + help='Trigger to use {\'clap\', \'gpio\'}') + parser.add_argument('--cloud-speech', action='store_true', + help='Use the Cloud Speech API instead of the Assistant API') + parser.add_argument('-L', '--language', default='en-US', + help='Language code to use for speech (default: en-US)') + parser.add_argument('-l', '--led-fifo', default='/tmp/status-led', + help='Status led control fifo') + parser.add_argument('-p', '--pid-file', default=PID_FILE, + help='File containing our process id for monitoring') + parser.add_argument('--audio-logging', action='store_true', + help='Log all requests and responses to WAV files in /tmp') + parser.add_argument('--assistant-secrets', + help='Path to client secrets for the Assistant API') + parser.add_argument('--cloud-speech-secrets', + help='Path to service account credentials for the ' + 'Cloud Speech API') + + args = parser.parse_args() + + create_pid_file(args.pid_file) + i18n.set_language_code(args.language, gettext_install=True) + + player = audio.Player(args.output_device) + + if args.cloud_speech: + credentials_file = os.path.expanduser(args.cloud_speech_secrets) + if not os.path.exists(credentials_file) and os.path.exists(OLD_SERVICE_CREDENTIALS): + credentials_file = OLD_SERVICE_CREDENTIALS + recognizer = speech.CloudSpeechRequest(credentials_file) + else: + credentials = try_to_get_credentials( + os.path.expanduser(args.assistant_secrets)) + recognizer = speech.AssistantSpeechRequest(credentials) + + recorder = audio.Recorder( + input_device=args.input_device, channels=1, + bytes_per_sample=speech.AUDIO_SAMPLE_SIZE, + sample_rate_hz=speech.AUDIO_SAMPLE_RATE_HZ) + with recorder: + do_recognition(args, recorder, recognizer, player) + + +def do_recognition(args, recorder, recognizer, player): + """Configure and run the recognizer.""" + say = tts.create_say(player) + + actor = action.make_actor(say) + + if args.cloud_speech: + action.add_commands_just_for_cloud_speech_api(actor, say) + + recognizer.add_phrases(actor) + recognizer.set_audio_logging_enabled(args.audio_logging) + + if args.trigger == 'gpio': + import triggers.gpio + triggerer = triggers.gpio.GpioTrigger(channel=23) + msg = 'Press the button on GPIO 23' + elif args.trigger == 'clap': + import triggers.clap + triggerer = triggers.clap.ClapTrigger(recorder) + msg = 'Clap your hands' + else: + logger.error("Unknown trigger '%s'", args.trigger) + return + + mic_recognizer = SyncMicRecognizer( + actor, recognizer, recorder, player, say, triggerer, led_fifo=args.led_fifo) + + with mic_recognizer: + if sys.stdout.isatty(): + print(msg + ' then speak, or press Ctrl+C to quit...') + + # wait for KeyboardInterrupt + while True: + time.sleep(1) + + +class SyncMicRecognizer(object): + + """Detects triggers and runs recognition in a background thread. + + This is a context manager, so it will clean up the background thread if the + main program is interrupted. + """ + + # pylint: disable=too-many-instance-attributes + + def __init__(self, actor, recognizer, recorder, player, say, triggerer, led_fifo): + self.actor = actor + self.player = player + self.recognizer = recognizer + self.recognizer.set_endpointer_cb(self.endpointer_cb) + self.recorder = recorder + self.say = say + self.triggerer = triggerer + self.triggerer.set_callback(self.recognize) + + self.running = False + + if led_fifo and os.path.exists(led_fifo): + self.led_fifo = led_fifo + else: + if led_fifo: + logger.warning( + 'File %s specified for --led-fifo does not exist.', + led_fifo) + self.led_fifo = None + self.recognizer_event = threading.Event() + + def __enter__(self): + self.running = True + threading.Thread(target=self._recognize).start() + self.triggerer.start() + self._status('ready') + + def __exit__(self, *args): + self.running = False + self.recognizer_event.set() + + self.recognizer.end_audio() + + def _status(self, status): + if self.led_fifo: + with open(self.led_fifo, 'w') as led: + led.write(status + '\n') + logger.info('%s...', status) + + def recognize(self): + if self.recognizer_event.is_set(): + # Duplicate trigger (eg multiple button presses) + return + + self.recognizer.reset() + self.recorder.add_processor(self.recognizer) + self._status('listening') + # Tell recognizer to run + self.recognizer_event.set() + + def endpointer_cb(self): + self.recorder.del_processor(self.recognizer) + self._status('thinking') + + def _recognize(self): + while self.running: + self.recognizer_event.wait() + if not self.running: + break + + logger.info('recognizing...') + try: + self._handle_result(self.recognizer.do_request()) + except speech.Error: + logger.exception('Unexpected error') + self.say(_('Unexpected error. Try again or check the logs.')) + + self.recognizer_event.clear() + self.triggerer.start() + self._status('ready') + + def _handle_result(self, result): + if result.transcript and self.actor.handle(result.transcript): + logger.info('handled local command: %s', result.transcript) + elif result.response_audio: + self._play_assistant_response(result.response_audio) + elif result.transcript: + logger.warning('%r was not handled', result.transcript) + self.say(_("I don’t know how to answer that.")) + else: + logger.warning('no command recognized') + self.say(_("Could you try that again?")) + + def _play_assistant_response(self, audio_bytes): + bytes_per_sample = speech.AUDIO_SAMPLE_SIZE + sample_rate_hz = speech.AUDIO_SAMPLE_RATE_HZ + logger.info('Playing %.4f seconds of audio...', + len(audio_bytes) / (bytes_per_sample * sample_rate_hz)) + self.player.play_bytes(audio_bytes, sample_width=bytes_per_sample, + sample_rate=sample_rate_hz) + + +if __name__ == '__main__': + main() diff --git a/src/speech.py b/src/speech.py new file mode 100644 index 00000000..6a8894d5 --- /dev/null +++ b/src/speech.py @@ -0,0 +1,445 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for speech interaction.""" + +from abc import abstractmethod +import collections +import logging +import os +import tempfile +import wave + +import google.auth +import google.auth.exceptions +import google.auth.transport.grpc +import google.auth.transport.requests +from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as cloud_speech +from google.rpc import code_pb2 as error_code +from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2 +import grpc +from six.moves import queue + +import i18n + +logger = logging.getLogger('speech') + +AUDIO_SAMPLE_SIZE = 2 # bytes per sample +AUDIO_SAMPLE_RATE_HZ = 16000 + + +_Result = collections.namedtuple('_Result', ['transcript', 'response_audio']) + + +class Error(Exception): + pass + + +class _ChannelFactory(object): + + """Creates gRPC channels with a given configuration.""" + + def __init__(self, api_host, credentials): + self._api_host = api_host + self._credentials = credentials + + self._checked = False + + def make_channel(self): + """Creates a secure channel.""" + + request = google.auth.transport.requests.Request() + target = self._api_host + ':443' + + if not self._checked: + # Refresh now, to catch any errors early. Otherwise, they'll be + # raised and swallowed somewhere inside gRPC. + self._credentials.refresh(request) + self._checked = True + + return google.auth.transport.grpc.secure_authorized_channel( + self._credentials, request, target) + + +class GenericSpeechRequest(object): + + """Common base class for Cloud Speech and Assistant APIs.""" + + # TODO(rodrigoq): Refactor audio logging. + # pylint: disable=attribute-defined-outside-init,too-many-instance-attributes + + DEADLINE_SECS = 185 + + def __init__(self, api_host, credentials): + self._audio_queue = queue.Queue() + self._phrases = [] + self._channel_factory = _ChannelFactory(api_host, credentials) + self._endpointer_cb = None + self._audio_logging_enabled = False + self._request_log_wav = None + + def add_phrases(self, phrases): + """Makes the recognition more likely to recognize the given phrase(s). + phrases: an object with a method get_phrases() that returns a list of + phrases. + """ + + self._phrases.extend(phrases.get_phrases()) + + def set_endpointer_cb(self, cb): + """Callback to invoke on end of speech.""" + self._endpointer_cb = cb + + def set_audio_logging_enabled(self, audio_logging_enabled=True): + self._audio_logging_enabled = audio_logging_enabled + + if audio_logging_enabled: + self._audio_log_dir = tempfile.mkdtemp() + self._audio_log_ix = 0 + + def reset(self): + while True: + try: + self._audio_queue.get(False) + except queue.Empty: + return + + def add_data(self, data): + self._audio_queue.put(data) + + def end_audio(self): + self.add_data(None) + + def _get_speech_context(self): + """Return a SpeechContext instance to bias recognition towards certain + phrases. + """ + return cloud_speech.SpeechContext( + phrases=self._phrases, + ) + + @abstractmethod + def _make_service(self, channel): + """Create a service stub. + """ + return + + @abstractmethod + def _create_config_request(self): + """Create a config request for the given endpoint. + + This is sent first to the server to configure the speech recognition. + """ + return + + @abstractmethod + def _create_audio_request(self, data): + """Create an audio request for the given endpoint. + + This is sent to the server with audio to be recognized. + """ + return + + def _request_stream(self): + """Yields a config request followed by requests constructed from the + audio queue. + """ + yield self._create_config_request() + + while True: + data = self._audio_queue.get() + + if not data: + return + + if self._request_log_wav: + self._request_log_wav.writeframes(data) + + yield self._create_audio_request(data) + + @abstractmethod + def _create_response_stream(self, service, request_stream, deadline): + """Given a request stream, start the gRPC call to get the response + stream. + """ + return + + @abstractmethod + def _stop_sending_audio(self, resp): + """Return true if this response says user has stopped speaking. + + This stops the request from sending further audio. + """ + return + + @abstractmethod + def _handle_response(self, resp): + """Handle a response from the remote API. + + Args: + resp: StreamingRecognizeResponse instance + """ + return + + def _end_audio_request(self): + self.end_audio() + if self._endpointer_cb: + self._endpointer_cb() + + def _handle_response_stream(self, response_stream): + for resp in response_stream: + if resp.error.code != error_code.OK: + self._end_audio_request() + raise Error('Server error: ' + resp.error.message) + + if self._stop_sending_audio(resp): + self._end_audio_request() + + self._handle_response(resp) + + # Server has closed the connection + return self._finish_request() or '' + + def _start_logging_request(self): + """Open a WAV file to log the request audio.""" + self._audio_log_ix += 1 + request_filename = '%s/request.%03d.wav' % ( + self._audio_log_dir, self._audio_log_ix) + logger.info('Writing request to %s', request_filename) + + self._request_log_wav = wave.open(request_filename, 'w') + + self._request_log_wav.setnchannels(1) + self._request_log_wav.setsampwidth(AUDIO_SAMPLE_SIZE) + self._request_log_wav.setframerate(AUDIO_SAMPLE_RATE_HZ) + + def _finish_request(self): + """Called after the final response is received.""" + + if self._request_log_wav: + self._request_log_wav.close() + + return _Result(None, None) + + def do_request(self): + """Establishes a connection and starts sending audio to the cloud + endpoint. Responses are handled by the subclass until one returns a + result. + + Returns: + namedtuple with the following fields: + transcript: string with transcript of user query + response_audio: optionally, an audio response from the server + + Raises speech.Error on error. + """ + try: + service = self._make_service(self._channel_factory.make_channel()) + + response_stream = self._create_response_stream( + service, self._request_stream(), self.DEADLINE_SECS) + + if self._audio_logging_enabled: + self._start_logging_request() + + return self._handle_response_stream(response_stream) + except ( + google.auth.exceptions.GoogleAuthError, + grpc.RpcError, + ) as exc: + raise Error('Exception in speech request') from exc + + +class CloudSpeechRequest(GenericSpeechRequest): + + """A transcription request to the Cloud Speech API. + + Args: + credentials_file: path to service account credentials JSON file + """ + + SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + + def __init__(self, credentials_file): + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file + credentials, _ = google.auth.default(scopes=[self.SCOPE]) + + super().__init__('speech.googleapis.com', credentials) + + self.language_code = i18n.get_language_code() + + if not hasattr(cloud_speech, 'StreamingRecognizeRequest'): + raise ValueError("cloud_speech_pb2.py doesn't have StreamingRecognizeRequest.") + + self._transcript = None + + def reset(self): + super().reset() + self._transcript = None + + def _make_service(self, channel): + return cloud_speech.SpeechStub(channel) + + def _create_config_request(self): + recognition_config = cloud_speech.RecognitionConfig( + # There are a bunch of config options you can specify. See + # https://goo.gl/KPZn97 for the full list. + encoding='LINEAR16', # raw 16-bit signed LE samples + sample_rate=AUDIO_SAMPLE_RATE_HZ, + # For a list of supported languages see: + # https://cloud.google.com/speech/docs/languages. + language_code=self.language_code, # a BCP-47 language tag + speech_context=self._get_speech_context(), + ) + streaming_config = cloud_speech.StreamingRecognitionConfig( + config=recognition_config, + single_utterance=True, # TODO(rodrigoq): find a way to handle pauses + ) + + return cloud_speech.StreamingRecognizeRequest( + streaming_config=streaming_config) + + def _create_audio_request(self, data): + return cloud_speech.StreamingRecognizeRequest(audio_content=data) + + def _create_response_stream(self, service, request_stream, deadline): + return service.StreamingRecognize(request_stream, deadline) + + def _stop_sending_audio(self, resp): + """Check the endpointer type to see if an utterance has ended.""" + + if resp.endpointer_type: + endpointer_type = cloud_speech.StreamingRecognizeResponse.EndpointerType.Name( + resp.endpointer_type) + logger.info('endpointer_type: %s', endpointer_type) + + END_OF_AUDIO = cloud_speech.StreamingRecognizeResponse.EndpointerType.Value('END_OF_AUDIO') + return resp.endpointer_type == END_OF_AUDIO + + def _handle_response(self, resp): + """Store the last transcript we received.""" + if resp.results: + self._transcript = ' '.join( + result.alternatives[0].transcript for result in resp.results) + logger.info('transcript: %s', self._transcript) + + def _finish_request(self): + super()._finish_request() + return _Result(self._transcript, None) + + +class AssistantSpeechRequest(GenericSpeechRequest): + + """A request to the Assistant API, which returns audio and text.""" + + def __init__(self, credentials): + + super().__init__('embeddedassistant.googleapis.com', credentials) + + self._response_audio = b'' + self._transcript = None + + def reset(self): + super().reset() + self._response_audio = b'' + self._transcript = None + + def _make_service(self, channel): + return embedded_assistant_pb2.EmbeddedAssistantStub(channel) + + def _create_config_request(self): + audio_in_config = embedded_assistant_pb2.AudioInConfig( + encoding='LINEAR16', + sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, + ) + audio_out_config = embedded_assistant_pb2.AudioOutConfig( + encoding='LINEAR16', + sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, + volume_percentage=50, + ) + converse_config = embedded_assistant_pb2.ConverseConfig( + audio_in_config=audio_in_config, + audio_out_config=audio_out_config, + ) + + return embedded_assistant_pb2.ConverseRequest(config=converse_config) + + def _create_audio_request(self, data): + return embedded_assistant_pb2.ConverseRequest(audio_in=data) + + def _create_response_stream(self, service, request_stream, deadline): + return service.Converse(request_stream, deadline) + + def _stop_sending_audio(self, resp): + if resp.event_type: + logger.info('event_type: %s', resp.event_type) + + return (resp.event_type == + embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE) + + def _handle_response(self, resp): + """Accumulate audio and text from the remote end. It will be handled + in _finish_request(). + """ + + if resp.result.spoken_request_text: + logger.info('transcript: %s', resp.result.spoken_request_text) + self._transcript = resp.result.spoken_request_text + + self._response_audio += resp.audio_out.audio_data + + def _finish_request(self): + super()._finish_request() + + if self._response_audio and self._audio_logging_enabled: + self._log_audio_out(self._response_audio) + + return _Result(self._transcript, self._response_audio) + + def _log_audio_out(self, frames): + response_filename = '%s/response.%03d.wav' % ( + self._audio_log_dir, self._audio_log_ix) + logger.info('Writing response to %s', response_filename) + + response_wav = wave.open(response_filename, 'w') + response_wav.setnchannels(1) + response_wav.setsampwidth(AUDIO_SAMPLE_SIZE) + response_wav.setframerate(AUDIO_SAMPLE_RATE_HZ) + response_wav.writeframes(frames) + response_wav.close() + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + + # for testing: use audio from a file + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('file', nargs='?', default='test_speech.raw') + args = parser.parse_args() + + if os.path.exists('/home/pi/credentials.json'): + # Legacy fallback: old location of credentials. + req = CloudSpeechRequest('/home/pi/credentials.json') + else: + req = CloudSpeechRequest('/home/pi/cloud_speech.json') + + with open(args.file, 'rb') as f: + while True: + chunk = f.read(64000) + if not chunk: + break + req.add_data(chunk) + req.end_audio() + + print('down response:', req.do_request()) diff --git a/src/status-monitor.py b/src/status-monitor.py new file mode 100755 index 00000000..0d5c9ecf --- /dev/null +++ b/src/status-monitor.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to monitor liveness of processes and update led status.""" + +import argparse +import logging +import os +import time + +logging.basicConfig( + level=logging.INFO, + format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s" +) +logger = logging.getLogger('status-monitor') + +PID_FILE = '/run/user/%d/voice-recognizer.pid' % os.getuid() + + +def get_pid(pid_file): + try: + with open(pid_file, 'r') as pid: + return int(pid.read()) + except IOError: + return None + + +def set_led_status(led_fifo): + with open(led_fifo, 'w') as led: + led.write('power-off\n') + + +def check_liveness(pid_file, led_fifo): + pid = get_pid(pid_file) + if pid: + if not os.path.exists("/proc/%d" % pid): + logger.info("monitored process not running") + set_led_status(led_fifo) + try: + os.unlink(pid_file) + except IOError: + pass + + +def main(): + parser = argparse.ArgumentParser( + description="Monitor liveness of processes and update led status.") + parser.add_argument('-l', '--led-fifo', default='/tmp/status-led', + help='Status led control fifo') + parser.add_argument('-p', '--pid-file', default=PID_FILE, + help='File containing our process id for monitoring') + args = parser.parse_args() + + while True: + check_liveness(args.pid_file, args.led_fifo) + time.sleep(1) + + +if __name__ == '__main__': + main() diff --git a/src/triggers/__init__.py b/src/triggers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/triggers/clap.py b/src/triggers/clap.py new file mode 100644 index 00000000..ec731eaf --- /dev/null +++ b/src/triggers/clap.py @@ -0,0 +1,52 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Detect claps in the audio stream.""" + +import logging +import numpy as np + +from triggers.trigger import Trigger + +logger = logging.getLogger('trigger') + + +class ClapTrigger(Trigger): + + """Detect claps in the audio stream.""" + + def __init__(self, recorder): + super().__init__() + + self.have_clap = True # don't start yet + self.prev_sample = 0 + recorder.add_processor(self) + + def start(self): + self.prev_sample = 0 + self.have_clap = False + + def add_data(self, data): + """ audio is mono 16bit signed at 16kHz """ + audio = np.fromstring(data, 'int16') + if not self.have_clap: + # alternative: np.abs(audio).sum() > thresh + shifted = np.roll(audio, 1) + shifted[0] = self.prev_sample + val = np.max(np.abs(shifted - audio)) + if val > (65536 // 4): # quarter max delta + logger.info("clap detected") + self.have_clap = True + self.callback() + self.prev_sample = audio[-1] diff --git a/src/triggers/gpio.py b/src/triggers/gpio.py new file mode 100644 index 00000000..67b928da --- /dev/null +++ b/src/triggers/gpio.py @@ -0,0 +1,61 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Detect edges on the given GPIO channel.""" + +import time + +import RPi.GPIO as GPIO + +from triggers.trigger import Trigger + + +class GpioTrigger(Trigger): + + """Detect edges on the given GPIO channel.""" + + DEBOUNCE_TIME = 0.05 + + def __init__(self, channel, polarity=GPIO.FALLING, + pull_up_down=GPIO.PUD_UP): + super().__init__() + + self.channel = channel + self.polarity = polarity + + if polarity not in [GPIO.FALLING, GPIO.RISING]: + raise ValueError('polarity must be GPIO.FALLING or GPIO.RISING') + + self.expected_value = polarity == GPIO.RISING + self.event_detect_added = False + + GPIO.setmode(GPIO.BCM) + GPIO.setup(channel, GPIO.IN, pull_up_down=pull_up_down) + + def start(self): + if not self.event_detect_added: + GPIO.add_event_detect(self.channel, self.polarity, callback=self.debounce) + self.event_detect_added = True + + def debounce(self, _): + """Check that the input holds the expected value for the debounce period, + to avoid false trigger on short pulses.""" + + start = time.time() + while time.time() < start + self.DEBOUNCE_TIME: + if GPIO.input(self.channel) != self.expected_value: + return + time.sleep(0.01) + + self.callback() diff --git a/src/triggers/trigger.py b/src/triggers/trigger.py new file mode 100644 index 00000000..4cc363e1 --- /dev/null +++ b/src/triggers/trigger.py @@ -0,0 +1,29 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Detect trigger events that start voice recognition requests.""" + + +class Trigger(object): + + """Base class for a Trigger.""" + + def __init__(self): + self.callback = None + + def set_callback(self, callback): + self.callback = callback + + def start(self): + pass diff --git a/src/tts.py b/src/tts.py new file mode 100644 index 00000000..317aca24 --- /dev/null +++ b/src/tts.py @@ -0,0 +1,126 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Wrapper around a TTS system.""" + +import functools +import logging +import os +import subprocess +import tempfile +import wave + +import numpy as np +from scipy import signal + +import i18n + +# Path to a tmpfs directory to avoid SD card wear +TMP_DIR = '/run/user/%d' % os.getuid() + +# Expected sample rate from the TTS tool +SAMPLE_RATE = 16000 + +# Parameters for the equalization filter. These remove low-frequency sound +# from the result, avoiding resonance on the speaker and making the TTS easier +# to understand. Calculated with: +# python3 src/tts.py --hpf-order 4 --hpf-freq-hz 1400 --hpf-gain-db 8 +FILTER_A = np.array([1., -3.28274474, 4.09441957, -2.29386174, 0.48627065]) +FILTER_B = np.array([1.75161639, -7.00646555, 10.50969833, -7.00646555, 1.75161639]) + +logger = logging.getLogger('tts') + + +def print_eq_coefficients(hpf_order, hpf_freq_hz, hpf_gain_db): + """Calculate and print the coefficients of the equalization filter.""" + b, a = signal.butter(hpf_order, hpf_freq_hz / SAMPLE_RATE, 'highpass') + gain_factor = pow(10, hpf_gain_db / 20) + + print('FILTER_A = np.%r' % a) + print('FILTER_B = np.%r' % (b * gain_factor)) + + +def create_eq_filter(): + """Return a function that applies equalization to a numpy array.""" + + def eq_filter(raw_audio): + return signal.lfilter(FILTER_B, FILTER_A, raw_audio) + + return eq_filter + + +def create_say(player): + """Return a function say(words) for the given player, using the default EQ + filter. + """ + lang = i18n.get_language_code() + return functools.partial(say, player, eq_filter=create_eq_filter(), lang=lang) + + +def say(player, words, eq_filter=None, lang='en-US'): + """Say the given words with TTS.""" + + try: + (fd, raw_wav) = tempfile.mkstemp(suffix='.wav', dir=TMP_DIR) + except IOError: + logger.exception('Using fallback directory for TTS output') + (fd, raw_wav) = tempfile.mkstemp(suffix='.wav') + + os.close(fd) + + try: + subprocess.call(['pico2wave', '-l', lang, '-w', raw_wav, words]) + with wave.open(raw_wav, 'rb') as f: + raw_bytes = f.readframes(f.getnframes()) + finally: + os.unlink(raw_wav) + + # Deserialize and apply equalization filter + eq_audio = np.frombuffer(raw_bytes, dtype=np.int16) + if eq_filter: + eq_audio = eq_filter(eq_audio) + + # Clip and serialize + int16_info = np.iinfo(np.int16) + eq_audio = np.clip(eq_audio, int16_info.min, int16_info.max) + eq_bytes = eq_audio.astype(np.int16).tostring() + + player.play_bytes(eq_bytes, sample_rate=SAMPLE_RATE) + + +def main(): + import argparse + + import audio + + logging.basicConfig(level=logging.INFO) + + parser = argparse.ArgumentParser(description='Test TTS wrapper') + parser.add_argument('words', nargs='*', help='Words to say') + parser.add_argument('--hpf-order', type=int, help='Order of high-pass filter') + parser.add_argument('--hpf-freq-hz', type=int, help='Corner frequency of high-pass filter') + parser.add_argument('--hpf-gain-db', type=int, help='High-frequency gain of filter') + args = parser.parse_args() + + if args.words: + words = ' '.join(args.words) + player = audio.Player() + create_say(player)(words) + + if args.hpf_order: + print_eq_coefficients(args.hpf_order, args.hpf_freq_hz, args.hpf_gain_db) + + +if __name__ == '__main__': + main() diff --git a/systemd/alsa-init.service b/systemd/alsa-init.service new file mode 100644 index 00000000..5d87326d --- /dev/null +++ b/systemd/alsa-init.service @@ -0,0 +1,16 @@ +# Play 1 s of silence if asound.state does not exists so lxpanel's volumealsa +# can initialize properly. + +[Unit] +Description=alsa init service +ConditionPathExists=!/etc/alsa/state-daemon.conf +ConditionPathExists=!/var/lib/alsa/asound.state +DefaultDependencies=no +After=local-fs.target sysinit.target + +[Service] +Type=oneshot +ExecStart=/usr/bin/aplay -q -d 1 -c 1 -t raw -f S32_LE /dev/zero + +[Install] +WantedBy=basic.target diff --git a/systemd/ntpdate.service b/systemd/ntpdate.service new file mode 100644 index 00000000..2720f174 --- /dev/null +++ b/systemd/ntpdate.service @@ -0,0 +1,14 @@ +[Unit] +Description=Set time with ntpdate +After=network.target + +[Service] +# use -u to avoid conflict with ntpd +ExecStart=/usr/sbin/ntpdate -u pool.ntp.org + +# we may not have network yet, so retry until success +Restart=on-failure +RestartSec=3s + +[Install] +WantedBy=multi-user.target diff --git a/systemd/status-led-off.service b/systemd/status-led-off.service new file mode 100644 index 00000000..bf44d748 --- /dev/null +++ b/systemd/status-led-off.service @@ -0,0 +1,12 @@ +[Unit] +Description=status led startup update +DefaultDependencies=no +Before=shutdown.target +Requires=status-led.service + +[Service] +Type=oneshot +ExecStart=/bin/bash -c '/bin/echo "stopping" >/tmp/status-led' + +[Install] +WantedBy=reboot.target halt.target poweroff.target diff --git a/systemd/status-led-on.service b/systemd/status-led-on.service new file mode 100644 index 00000000..78795854 --- /dev/null +++ b/systemd/status-led-on.service @@ -0,0 +1,12 @@ +[Unit] +Description=status led startup update +DefaultDependencies=no +After=status-led.service +Requires=status-led.service + +[Service] +Type=oneshot +ExecStart=/bin/bash -c '/bin/echo "starting" >/tmp/status-led' + +[Install] +WantedBy=basic.target diff --git a/systemd/status-led.service b/systemd/status-led.service new file mode 100644 index 00000000..93cf8af9 --- /dev/null +++ b/systemd/status-led.service @@ -0,0 +1,16 @@ +[Unit] +Description=status led service +DefaultDependencies=no +After=local-fs.target sysinit.target + +[Service] +ExecStartPre=/bin/bash -c 'test -p /tmp/status-led || /bin/mknod /tmp/status-led p' +ExecStart=/bin/bash -c '/usr/bin/python3 -u src/led.py