diff --git a/lucida/botframework-interface/interface.js b/lucida/botframework-interface/interface.js index f93ed2835..992f22852 100644 --- a/lucida/botframework-interface/interface.js +++ b/lucida/botframework-interface/interface.js @@ -8,11 +8,16 @@ Description : This file handles dialogs from bot framework, forwards messages var restify = require('restify') var server = restify.createServer() var builder = require('botbuilder') +var calling = require('botbuilder-calling') var request = require('request') var credentials = require('./credentials') -var url = require("url") +var url = require('url') var bfw_port -var cc_host +var cc_api_host +var cc_ws_host +var WebSocket = require('ws') + +var util = require('util') //========================================================= // Bot Setup @@ -37,12 +42,16 @@ function check_bfw_port(str_port) { function check_cc_host(host) { match = host.match(/^((\d+\.\d+\.\d+\.\d+)|localhost)(:\d+)?(.*)?$/) if ( match != null ) { - cc_host = "http://" + host.replace(/\/$/, "") - console.log("[INFO] Remote command center host is set to " + cc_host) + cc_api_host = "http://" + host.replace(/\/$/, "") + '/api' + cc_ws_host = "ws://" + host.replace(/\/$/, "") + '/ws' + console.log("[INFO] Remote command center API host is set to " + cc_api_host) + console.log("[INFO] Remote command center WS host is set to " + cc_ws_host) return true } else if ( url.parse(host)['host'] != null ) { - cc_host = host.replace(/\/$/, "") - console.log("[INFO] Remote command center host is set to " + cc_host) + cc_api_host = host.replace(/\/$/, "") + '/api' + cc_ws_host = cc_api_host.replace(/^http/, "ws") + '/ws' + console.log("[INFO] Remote command center API host is set to " + cc_api_host) + console.log("[INFO] Remote command center WS host is set to " + cc_ws_host) return true } else { return false @@ -78,28 +87,39 @@ function check_args() { } check_args() +// Create web socket +var ws = new WebSocket(cc_ws_host + '/status') + +ws.on('message', function incoming(data) { + console.log("WS RECVD: " + data); +}); + // Create chat bot -var connector = new builder.ChatConnector(credentials.credentials) -var bot = new builder.UniversalBot(connector) -server.post('/api/messages', connector.listen()) +var chat_connector = new builder.ChatConnector(credentials.chat_credentials) +var chat_bot = new builder.UniversalBot(chat_connector) +server.post('/api/messages', chat_connector.listen()) +var call_connector = new calling.CallConnector(credentials.call_credentials) +var call_bot = new calling.UniversalCallBot(call_connector); +server.post('/api/calls', call_connector.listen()); //========================================================= // Bots Dialogs //========================================================= -var addresses = {} +var chat_addresses = {} -bot.dialog('/', [ +chat_bot.dialog('/', [ function (session) { + console.log(util.inspect(session.message.address)) var address = session.message.address - addresses[address.channelId] = {channelId: address.channelId, bot: {id: address.bot.id, name: address.bot.name}, serviceUrl: address.serviceUrl, useAuth: address.useAuth} + chat_addresses[address.channelId] = {channelId: address.channelId, bot: {id: address.bot.id, name: address.bot.name}, serviceUrl: address.serviceUrl, useAuth: address.useAuth} request.post({ headers: {'content-type' : 'application/x-www-form-urlencoded'}, form: { interface: session.message.address.channelId, username: session.message.address.user.id, text_input: session.message.text }, - url: cc_host + '/api/infer', + url: cc_api_host + '/infer', form: { interface: session.message.address.channelId, username: session.message.address.user.id, speech_input: session.message.text } }, function(error, response, body){ - address = addresses[session.message.address.channelId] + address = chat_addresses[session.message.address.channelId] address['user'] = { id: session.message.address.user.id } if (error) { text = "Error occured '" + error.code + "'!!! Is command center running?" @@ -112,7 +132,7 @@ bot.dialog('/', [ if ( result ) { request.post({ headers: {'content-type' : 'application/x-www-form-urlencoded'}, - url: cc_host + '/api/add_interface', + url: cc_api_host + '/add_interface', form: { interface: session.message.address.channelId, token: result[1], username: session.message.address.user.id } }, function(error, response, body){ if (error) { @@ -127,7 +147,7 @@ bot.dialog('/', [ text = response.statusCode + " " + response.statusMessage + " received. Go through the logs and figure. Otherwise create an issue on github with logs attached." } var reply = new builder.Message().address(address).text(text) - bot.send(reply) + chat_bot.send(reply) }) return } else { @@ -139,7 +159,15 @@ bot.dialog('/', [ text = response.statusCode + " " + response.statusMessage + " received. Go through the logs and figure. Otherwise create an issue on github with logs attached." } var reply = new builder.Message().address(address).text(text) - bot.send(reply) + chat_bot.send(reply) }) } ]) + + +call_bot.dialog('/', [ + function (session) { + session.send('Hey there! How can I help you?'); + console.log(util.inspect(session.message)) + } +]) diff --git a/lucida/botframework-interface/package.json b/lucida/botframework-interface/package.json index a0c433d9c..fec9cdbe3 100644 --- a/lucida/botframework-interface/package.json +++ b/lucida/botframework-interface/package.json @@ -11,6 +11,7 @@ "botbuilder-calling": "^3.0.1", "request": "^2.81.0", "restify": "^4.3.0", - "url": "^0.11.0" + "url": "^0.11.0", + "ws": "^3.0.0" } } diff --git a/lucida/botframework-interface/start_interface.sh b/lucida/botframework-interface/start_interface.sh index 026863752..851c418ce 100644 --- a/lucida/botframework-interface/start_interface.sh +++ b/lucida/botframework-interface/start_interface.sh @@ -358,7 +358,12 @@ rm -f phantom.out cat << EOF > credentials.js // DO NOT EDIT THIS FILE BY HAND -- YOUR CHANGES WILL BE OVERWRITTEN -exports.credentials = { +exports.chat_credentials = { + appId: '$BFW_APPID', + appPassword: '$BFW_APPPWD' +} +exports.call_credentials = { + callbackUrl: '$BFW_HOST/api/calls', appId: '$BFW_APPID', appPassword: '$BFW_APPPWD' } diff --git a/lucida/commandcenter/app.py b/lucida/commandcenter/app.py index 92d8f91df..d3111bbb7 100644 --- a/lucida/commandcenter/app.py +++ b/lucida/commandcenter/app.py @@ -2,66 +2,27 @@ from __future__ import division from __future__ import unicode_literals -import sys, glob, os -sys.path.insert(0, glob.glob(os.path.abspath(os.path.dirname(__file__)) + - '/../../tools/thrift-0.9.3/lib/py/build/lib*')[0]) - -from controllers import * +from controllers.FlaskApp import app +from controllers import WebServer from controllers.Parser import cmd_port -from flask import * -from threading import Thread import logging +import os +def main(): + cmd_host = '0.0.0.0' + logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") + WebServer.tornado.options.parse_command_line() -# Initialize the Flask app with the template folder address. -app = Flask(__name__, template_folder='templates') - -# app.config.from_object('config') -app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16 MB due to MongoDB - -# Register the controllers. -app.register_blueprint(Main.main) -app.register_blueprint(User.user) -app.register_blueprint(Create.create) -app.register_blueprint(Learn.learn) -app.register_blueprint(Infer.infer) - -# Session. -app.secret_key = 'A0Zr98j/3yX R~XHH!jmN]LWX/,?RT' - -def flask_listener(): - - # For https (with ASR capability) - if os.environ.get('SECURE_HOST'): - print 'Starting secure flask' - app.run(host='0.0.0.0', port=3000, debug=True, use_reloader=False, - threaded=True, ssl_context=('certs/server.crt', 'certs/server.key')) - # For http (without ASR capability) - else: - print 'Starting non-secure flask' - app.run(host='0.0.0.0', port=3000, debug=True, use_reloader=False, - threaded=True) - -def web_socket_listener(): - print 'Start web socket at ' + str(cmd_port) - logging.basicConfig(level=logging.DEBUG, - format="%(levelname)8s %(asctime)s %(message)s ") - logging.debug('Starting up server') - WebSocket.tornado.options.parse_command_line() - - # For wss (with ASR capability) if os.environ.get('SECURE_HOST'): - print 'Starting secure web socket' - WebSocket.Application().listen(cmd_port, ssl_options={ + logging.info('Spinning up web server at https://' + str(cmd_host) + ':' + str(cmd_port)) + WebServer.Application(app).listen(cmd_port, address=str(cmd_host), ssl_options={ "certfile":"certs/server.crt", "keyfile":"certs/server.key"}) - # For ws (without ASR capability) else: - print 'Starting non-secure web socket' - WebSocket.Application().listen(cmd_port) + logging.info('Spinning up web server at http://' + str(cmd_host) + ':' + str(cmd_port)) + WebServer.Application(app).listen(cmd_port, address=str(cmd_host)) - WebSocket.tornado.ioloop.IOLoop.instance().start() + WebServer.tornado.ioloop.IOLoop.instance().start() if __name__ == '__main__': - Thread(target = flask_listener).start() - web_socket_listener() + main() diff --git a/lucida/commandcenter/controllers/Database.py b/lucida/commandcenter/controllers/Database.py index 8c81af8fd..993982561 100644 --- a/lucida/commandcenter/controllers/Database.py +++ b/lucida/commandcenter/controllers/Database.py @@ -85,8 +85,11 @@ def username_exists(self, username): #Returns true if the username already exists. def get_username(self, interface, interface_uid): - interface += "_interface" - row = self.users.find_one({interface: interface_uid}); + if interface and interface != "web": + interface += "_interface" + row = self.users.find_one({interface: interface_uid}) + else: + row = self.users.find_one({'username': interface_uid}) if not row is None: return row['username'] return None diff --git a/lucida/commandcenter/controllers/FlaskApp.py b/lucida/commandcenter/controllers/FlaskApp.py new file mode 100644 index 000000000..05554ae85 --- /dev/null +++ b/lucida/commandcenter/controllers/FlaskApp.py @@ -0,0 +1,33 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +import sys, glob, os +sys.path.insert(0, glob.glob(os.path.abspath(os.path.dirname(__file__)) + + '/../../../tools/thrift-0.9.3/lib/py/build/lib*')[0]) + +from . import Main +from . import User +from . import Create +from . import Learn +from . import Infer +from .Parser import cmd_port +from flask import * +import logging + + +# Initialize the Flask app with the template folder address. +app = Flask(__name__, template_folder='../templates') + +# app.config.from_object('config') +app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16 MB due to MongoDB + +# Register the controllers. +app.register_blueprint(Main.main) +app.register_blueprint(User.user) +app.register_blueprint(Create.create) +app.register_blueprint(Learn.learn) +app.register_blueprint(Infer.infer) + +# Session. +app.secret_key = 'A0Zr98j/3yX R~XHH!jmN]LWX/,?RT' diff --git a/lucida/commandcenter/controllers/ThriftClient.py b/lucida/commandcenter/controllers/ThriftClient.py index 85010eacc..e151ad3e0 100644 --- a/lucida/commandcenter/controllers/ThriftClient.py +++ b/lucida/commandcenter/controllers/ThriftClient.py @@ -125,7 +125,7 @@ def infer(self, LUCID, workflow_name, text_data, image_data): i = 0 for x in resultText: - resultText[i] = [unicode(resultText)] # Text information must be unicode'd and array'd to be properly passed. IMAGE DATA DOES NOT HAVE THIS DONE TO IT. + resultText[i] = [unicode(x)] # Text information must be unicode'd and array'd to be properly passed. IMAGE DATA DOES NOT HAVE THIS DONE TO IT. i+= 1 # Processes the current workflow state, and in the process finds if this is the final stage or if next stage exists. diff --git a/lucida/commandcenter/controllers/WebServer.py b/lucida/commandcenter/controllers/WebServer.py new file mode 100644 index 000000000..519949ff1 --- /dev/null +++ b/lucida/commandcenter/controllers/WebServer.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +# +# Copyright 2013 Tanel Alumae + +""" +Reads speech data via websocket requests, sends it to Redis, waits for results from Redis and +forwards to client via websocket +""" +import sys +import logging +import json +import codecs +import os.path +import uuid +import time +import threading +import functools +from Queue import Queue + +import tornado.ioloop +import tornado.options +import tornado.web +import tornado.websocket +import tornado.gen +import tornado.concurrent +import tornado.wsgi + +from Database import database + +from Parser import cmd_port + +from tornado.options import define +define("port", default=cmd_port, help="run on the given port", type=int) + +STATUS_EOS = -1 +STATUS_SUCCESS = 0 +STATUS_NO_SPEECH = 1 +STATUS_ABORTED = 2 +STATUS_AUDIO_CAPTURE = 3 +STATUS_NETWORK = 4 +STATUS_NOT_ALLOWED = 5 +STATUS_SERVICE_NOT_ALLOWED = 6 +STATUS_BAD_GRAMMAR = 7 +STATUS_LANGUAGE_NOT_SUPPORTED = 8 +STATUS_NOT_AVAILABLE = 9 +STATUS_NOT_AUTHENTICATED = 401 +STATUS_CONNECTED = 100 + +class Application(tornado.web.Application): + def __init__(self, flaskApp): + settings = dict( + cookie_secret="43oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + static_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "static"), + xsrf_cookies=False, + autoescape=None + ) + + flaskWebHandler = tornado.wsgi.WSGIContainer(flaskApp) + handlers = [ + (r"/ws/speech", DecoderSocketHandler), + (r"/ws/status", StatusSocketHandler), + (r"/api/speech", DecoderAPIHandler), + (r"/worker/ws/speech", WorkerSocketHandler), + (r"/client/static/(.*)", tornado.web.StaticFileHandler, {'path': settings["static_path"]}), + (r".*", tornado.web.FallbackHandler, dict(fallback=flaskWebHandler)) + ] + tornado.web.Application.__init__(self, handlers, **settings) + self.available_workers = set() + self.status_listeners = set() + self.num_requests_processed = 0 + + def send_status_update_single(self, ws): + status = dict(num_workers_available=len(self.available_workers), num_requests_processed=self.num_requests_processed) + ws.write_message(json.dumps(status)) + + def send_status_update(self): + for ws in self.status_listeners: + self.send_status_update_single(ws) + +def run_async(func): + @functools.wraps(func) + def async_func(*args, **kwargs): + func_hl = threading.Thread(target=func, args=args, kwargs=kwargs) + func_hl.start() + return func_hl + + return async_func + + +def content_type_to_caps(content_type): + """ + Converts MIME-style raw audio content type specifier to GStreamer CAPS string + """ + default_attributes= {"rate": 16000, "format" : "S16LE", "channels" : 1, "layout" : "interleaved"} + media_type, _, attr_string = content_type.replace(";", ",").partition(",") + if media_type in ["audio/x-raw", "audio/x-raw-int"]: + media_type = "audio/x-raw" + attributes = default_attributes + for (key,_,value) in [p.partition("=") for p in attr_string.split(",")]: + attributes[key.strip()] = value.strip() + return "%s, %s" % (media_type, ", ".join(["%s=%s" % (key, value) for (key,value) in attributes.iteritems()])) + else: + return content_type + + +@tornado.web.stream_request_body +class DecoderAPIHandler(tornado.web.RequestHandler): + """ + Provides a HTTP POST/PUT interface supporting chunked transfer requests + """ + + def prepare(self): + self.id = str(uuid.uuid4()) + self.final_result = "" + self.final_result_queue = Queue() + user = self.request.headers.get("user", None) + interface = self.request.headers.get("interface", None) + token = self.request.headers.get("token", None) + user = database.get_username(interface, user) + if user == None: + self.set_status(401) + self.finish("Not Authorized") + return + logging.info("API::SPEECH %s: Received request from user '%s' over interface '%s'" % (self.id, user, interface)) + self.worker = None + self.error_status = 0 + self.error_message = None + try: + self.worker = self.application.available_workers.pop() + self.application.send_status_update() + logging.info("API::SPEECH %s: Using worker %s" % (self.id, self.__str__())) + self.worker.set_client_socket(self) + context = self.request.headers.get("context", None) + content_type = self.request.headers.get("Content-Type", None) + if content_type: + content_type = content_type_to_caps(content_type) + logging.info("API::SPEECH %s: Using content type: %s" % (self.id, content_type)) + self.worker.write_message(json.dumps(dict(id=self.id, caps=content_type, context=context, user=user, isCall=False))) + except: + logging.warn("API::SPEECH %s: No worker available for client request" % self.id) + self.set_status(503) + self.finish("No workers available") + + def data_received(self, chunk): + assert self.worker is not None + logging.info("API::SPEECH %s: Forwarding client message of length %d to worker" % (self.id, len(chunk))) + self.worker.write_message(chunk, binary=True) + + def post(self, *args, **kwargs): + self.end_request(args, kwargs) + + def put(self, *args, **kwargs): + self.end_request(args, kwargs) + + @run_async + def get_final_result(self, callback=None): + logging.info("API::SPEECH %s: Waiting for final result..." % self.id) + callback(self.final_result_queue.get(block=True)) + + @tornado.web.asynchronous + @tornado.gen.coroutine + def end_request(self, *args, **kwargs): + logging.info("API::SPEECH %s: Handling the end of request" % self.id) + assert self.worker is not None + self.worker.write_message("EOS", binary=True) + result = yield tornado.gen.Task(self.get_final_result) + if self.error_status == 0: + logging.info("%s: Final result: %s" % (self.id, result)) + self.write(result) + else: + logging.info("%s: Error (status=%d) processing HTTP request: %s" % (self.id, self.error_status, self.error_message)) + response = {"status" : self.error_status, "id": self.id, "message": self.error_message} + self.write(response) + self.worker.set_client_socket(None) + self.worker.close() + self.finish() + logging.info("API::SPEECH Everything done") + + def send_event(self, event): + event_str = str(event) + if len(event_str) > 100: + event_str = event_str[:97] + "..." + logging.info("%s: Receiving event %s from worker" % (self.id, event_str)) + if event["status"] == 0 and ("result" in event): + try: + if len(event["result"]["hypotheses"]) > 0 and event["result"]["final"]: + self.final_result = str(event) + except: + e = sys.exc_info()[0] + logging.warn("Failed to extract hypothesis from recognition result:" + e) + elif event["status"] != 0: + self.error_status = event["status"] + self.error_message = event.get("message", "") + + def close(self): + logging.info("API::SPEECH %s: Receiving close from worker" % (self.id)) + self.final_result_queue.put(self.final_result) + self.application.send_status_update() + +class StatusSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + logging.debug("Opened a new control channel") + self.application.status_listeners.add(self) + self.application.send_status_update_single(self) + + def on_message(self, message): + assert self.client_socket is not None + event = json.loads(message) + self.client_socket.send_event(event) + + def on_close(self): + logging.info("Status listener left") + self.application.status_listeners.remove(self) + +class WorkerSocketHandler(tornado.websocket.WebSocketHandler): + def __init__(self, application, request, **kwargs): + tornado.websocket.WebSocketHandler.__init__(self, application, request, **kwargs) + self.client_socket = None + + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + self.client_socket = None + self.application.available_workers.add(self) + logging.info("New worker available " + self.__str__()) + self.application.send_status_update() + + def on_close(self): + logging.info("Worker " + self.__str__() + " leaving") + self.application.available_workers.discard(self) + if self.client_socket: + self.client_socket.close() + self.application.send_status_update() + + def on_message(self, message): + assert self.client_socket is not None + event = json.loads(message) + self.client_socket.send_event(event) + if 'next_id' in event: + self.client_socket.set_id(event['id']) + + def set_client_socket(self, client_socket): + self.client_socket = client_socket + + +class DecoderSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def set_id(self, id): + self.id = id + + def send_event(self, event): + event["id"] = self.id + event_str = str(event) + if len(event_str) > 100: + event_str = event_str[:97] + "..." + logging.info("%s: Sending event %s to client" % (self.id, event_str)) + self.write_message(json.dumps(event)) + + def open(self): + self.id = str(uuid.uuid4()) + logging.info("%s: OPEN" % (self.id)) + logging.info("%s: Request arguments: %s" % (self.id, " ".join(["%s=\"%s\"" % (a, self.get_argument(a)) for a in self.request.arguments]))) + user = self.get_argument("user", None, True) + interface = self.get_argument("interface", None, True) + user = database.get_username(interface, user) + if user == None: + logging.warn("Not authorised") + event = dict(status=STATUS_NOT_AVAILABLE, message="User not authorised!!!") + self.send_event(event) + self.close() + return + self.worker = None + try: + self.worker = self.application.available_workers.pop() + self.application.send_status_update() + logging.info("%s: Using worker %s" % (self.id, self.__str__())) + self.worker.set_client_socket(self) + + context = self.get_argument("context", None, True) + self.isCall = self.get_argument("isCall", False, True) + content_type = self.get_argument("content-type", None, True) + if content_type: + logging.info("%s: Using content type: %s" % (self.id, content_type)) + + self.worker.write_message(json.dumps(dict(id=self.id, caps=content_type, context=context, user=user, isCall=self.isCall))) + except KeyError: + logging.warn("%s: No worker available for client request" % self.id) + event = dict(status=STATUS_NOT_AVAILABLE, message="No decoder available, try again later") + self.send_event(event) + self.close() + + def on_connection_close(self): + logging.info("%s: Handling on_connection_close()" % self.id) + self.application.num_requests_processed += 1 + self.application.send_status_update() + if self.worker: + try: + self.worker.set_client_socket(None) + logging.info("%s: Closing worker connection" % self.id) + self.worker.close() + except: + pass + + def on_message(self, message): + assert self.worker is not None + logging.info("%s: Forwarding client message (%s) of length %d to worker" % (self.id, type(message), len(message))) + if isinstance(message, unicode): + self.worker.write_message(message, binary=False) + else: + self.worker.write_message(message, binary=True) diff --git a/lucida/commandcenter/controllers/WebServer.py.bak b/lucida/commandcenter/controllers/WebServer.py.bak new file mode 100644 index 000000000..c41ff52d9 --- /dev/null +++ b/lucida/commandcenter/controllers/WebServer.py.bak @@ -0,0 +1,305 @@ +#!/usr/bin/env python +# +# Copyright 2013 Tanel Alumae +# Copyright 2017 Kamal Galrani + +""" +Reads speech data via websocket requests, sends it to Redis, waits for results from Redis and +forwards to client via websocket +""" +import sys +import logging +import json +import codecs +import os.path +import uuid +import time +import threading +import functools +from Queue import Queue + +import tornado.ioloop +import tornado.options +import tornado.web +import tornado.websocket +import tornado.gen +import tornado.concurrent +import tornado.wsgi + +from Parser import cmd_port + +from tornado.options import define +define("port", default=cmd_port, help="run on the given port", type=int) + +STATUS_EOS = -1 +STATUS_SUCCESS = 0 +STATUS_NO_SPEECH = 1 +STATUS_ABORTED = 2 +STATUS_AUDIO_CAPTURE = 3 +STATUS_NETWORK = 4 +STATUS_NOT_ALLOWED = 5 +STATUS_SERVICE_NOT_ALLOWED = 6 +STATUS_BAD_GRAMMAR = 7 +STATUS_LANGUAGE_NOT_SUPPORTED = 8 +STATUS_NOT_AVAILABLE = 9 + + +class Application(tornado.web.Application): + def __init__(self, flaskApp): + settings = dict( + cookie_secret="43oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + static_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "static"), + xsrf_cookies=False, + autoescape=None + ) + + flaskWebHandler = tornado.wsgi.WSGIContainer(flaskApp) + handlers = [ + (r"/ws/speech", DecoderSocketHandler), + (r"/ws/control", ControlSocketHandler), + (r"/api/speech", DecoderAPIHandler), + (r"/worker/ws/speech", WorkerSocketHandler), + (r"/static/(.*)", tornado.web.StaticFileHandler, {'path': settings["static_path"]}), + (r".*", tornado.web.FallbackHandler, dict(fallback=flaskWebHandler)) + ] + tornado.web.Application.__init__(self, handlers, **settings) + self.available_decoders = set() + self.connected_clients = set() + + def send_status_update_single(self, ws): + status = dict(speech_decoder_available=len(self.available_workers) > 0) # TODO: Add stuff + ws.write_message(json.dumps(status)) + + def send_status_update(self): + for ws in self.status_listeners: + self.send_status_update_single(ws) + +def run_async(func): + @functools.wraps(func) + def async_func(*args, **kwargs): + func_hl = threading.Thread(target=func, args=args, kwargs=kwargs) + func_hl.daemon = True + func_hl.start() + return func_hl + return async_func + + +def content_type_to_caps(content_type): + """ + Converts MIME-style raw audio content type specifier to GStreamer CAPS string + """ + default_attributes= {"rate": 16000, "format" : "S16LE", "channels" : 1, "layout" : "interleaved"} + media_type, _, attr_string = content_type.replace(";", ",").partition(",") + if media_type in ["audio/x-raw", "audio/x-raw-int"]: + media_type = "audio/x-raw" + attributes = default_attributes + for (key,_,value) in [p.partition("=") for p in attr_string.split(",")]: + attributes[key.strip()] = value.strip() + return "%s, %s" % (media_type, ", ".join(["%s=%s" % (key, value) for (key,value) in attributes.iteritems()])) + else: + return content_type + + +@tornado.web.stream_request_body +class DecoderAPIHandler(tornado.web.RequestHandler): + """ + Provides a HTTP POST/PUT interface supporting chunked transfer requests, similar to that provided by + http://github.com/alumae/ruby-pocketsphinx-server. + """ + + def prepare(self): + self.id = str(uuid.uuid4()) + self.final_result = "" + self.final_result_queue = Queue() + user = self.request.headers.get("user", None) + interface = self.request.headers.get("interface", "web") + user = database.get_username(interface, user) + if user == None: + self.set_status(403) + self.finish("Not Authorized") + logging.debug("%s: /api/speech: Received request from user '%s' over interface '%s'" % (self.id, user, interface)) + self.worker = None + self.error_status = 0 + self.error_message = None + try: + self.worker = self.application.available_workers.pop() + self.application.send_status_update() + logging.debug("%s: /api/speech: Using worker %s" % (self.id, self.__str__())) + self.worker.set_client_socket(self) + context = self.request.headers.get("context", None) + content_type = self.request.headers.get("Content-Type", None) + if content_type: + content_type = content_type_to_caps(content_type) + logging.debug("%s: /api/speech: Using content type: %s" % (self.id, content_type)) + self.worker.write_message(json.dumps(dict(id=self.id, caps=content_type, context=context, user=user))) + except KeyError: + logging.warn("%s: /api/speech: No worker available for client request" % self.id) + self.set_status(503) + self.finish("No workers available") + + def data_received(self, chunk): + assert self.worker is not None + logging.debug("%s: /api/speech: Forwarding client message of length %d to worker" % (self.id, len(chunk))) + self.worker.write_message(chunk, binary=True) + + def post(self, *args, **kwargs): + self.end_request(args, kwargs) + + def put(self, *args, **kwargs): + self.end_request(args, kwargs) + + @run_async + def get_final_result(self, callback=None): + logging.debug("%s: /api/speech: Waiting for final result..." % self.id) + callback(self.final_result_queue.get(block=True)) + + @tornado.web.asynchronous + @tornado.gen.coroutine + def end_request(self, *args, **kwargs): + logging.debug("%s: /api/speech: Handling the end of request" % self.id) + assert self.worker is not None + self.worker.write_message("EOS", binary=True) + result = yield tornado.gen.Task(self.get_final_result) + result = json.loads(result) + if self.error_status == 0: + logging.debug("%s: /api/speech: Received final result") + response = {"status" : 0, "id": self.id, "hypotheses": [{"utterance" : hyp}]} ###### + self.write(response) + else: + logging.error("%s: /api/speech: ERROR: (status=%d) processing HTTP request: %s" % (self.id, self.error_status, self.error_message)) + response = {"status" : self.error_status, "id": self.id, "message": self.error_message} + self.write(response) + self.application.send_status_update() + self.worker.set_client_socket(None) + self.worker.close() + self.finish() + + def send_event(self, event): #################3 + if event["status"] == 0 and ("result" in event): + try: + if len(event["result"]["hypotheses"]) > 0 and event["result"]["final"]: + self.final_result = event["result"]["hypotheses"][0]["transcript"] + self.context = event["result"]["hypotheses"][0]["transcript"] + except: + e = sys.exc_info()[0] + logging.warn("Failed to extract hypothesis from recognition result:" + e) + elif event["status"] != 0: + event_str = str(event) + if len(event_str) > 87: + event_str = event_str[:84] + "..." + logging.debug("%s: /api/speech: Receiving event %s from worker" % (self.id, event_str)) + self.error_status = event["status"] + self.error_message = event.get("message", "") + + def close(self): + logging.info("%s: Receiving 'close' from worker" % (self.id)) + self.final_result_queue.put(self.final_hyp) + + +class StatusSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + logging.debug("Opened a new control channel") + self.application.status_listeners.add(self) + self.application.send_status_update_single(self) + + def on_message(self, message): + assert self.client_socket is not None + event = json.loads(message) + self.client_socket.send_event(event) + + def on_close(self): + logging.info("Status listener left") + self.application.status_listeners.remove(self) + + +class WorkerSocketHandler(tornado.websocket.WebSocketHandler): + def __init__(self, application, request, **kwargs): + tornado.websocket.WebSocketHandler.__init__(self, application, request, **kwargs) + self.client_socket = None + + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + self.client_socket = None + self.application.available_workers.add(self) + logging.info("New worker available " + self.__str__()) + self.application.send_status_update() + + def on_close(self): + logging.info("Worker " + self.__str__() + " leaving") + self.application.available_workers.discard(self) + if self.client_socket: + self.client_socket.close() + self.application.send_status_update() + + def on_message(self, message): + assert self.client_socket is not None + event = json.loads(message) + self.client_socket.send_event(event) + + def set_client_socket(self, client_socket): + self.client_socket = client_socket + + +class DecoderSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def send_event(self, event): + event["id"] = self.id + event_str = str(event) + if len(event_str) > 100: + event_str = event_str[:97] + "..." + logging.info("%s: Sending event %s to client" % (self.id, event_str)) + self.write_message(json.dumps(event)) + + def open(self): + self.id = str(uuid.uuid4()) + logging.info("%s: OPEN" % (self.id)) + logging.info("%s: Request arguments: %s" % (self.id, " ".join(["%s=\"%s\"" % (a, self.get_argument(a)) for a in self.request.arguments]))) + self.user_id = self.get_argument("user-id", "none", True) + self.content_id = self.get_argument("content-id", "none", True) + self.worker = None + try: + self.worker = self.application.available_workers.pop() + self.application.send_status_update() + logging.info("%s: Using worker %s" % (self.id, self.__str__())) + self.worker.set_client_socket(self) + + content_type = self.get_argument("content-type", None, True) + if content_type: + logging.info("%s: Using content type: %s" % (self.id, content_type)) + + self.worker.write_message(json.dumps(dict(id=self.id, caps=content_type, context="{}"))) + except KeyError: + logging.warn("%s: No worker available for client request" % self.id) + event = dict(status=STATUS_NOT_AVAILABLE, message="No decoder available, try again later") + self.send_event(event) + self.close() + + def on_connection_close(self): + logging.info("%s: Handling on_connection_close()" % self.id) + self.application.num_requests_processed += 1 + self.application.send_status_update() + if self.worker: + try: + self.worker.set_client_socket(None) + logging.info("%s: Closing worker connection" % self.id) + self.worker.close() + except: + pass + + def on_message(self, message): + assert self.worker is not None + logging.info("%s: Forwarding client message (%s) of length %d to worker" % (self.id, type(message), len(message))) + if isinstance(message, unicode): + self.worker.write_message(message, binary=False) + else: + self.worker.write_message(message, binary=True) diff --git a/lucida/commandcenter/controllers/WebServer.py.forward b/lucida/commandcenter/controllers/WebServer.py.forward new file mode 100644 index 000000000..2de833c7f --- /dev/null +++ b/lucida/commandcenter/controllers/WebServer.py.forward @@ -0,0 +1,399 @@ +#!/usr/bin/env python +# +# Copyright 2013 Tanel Alumae + +""" +Reads speech data via websocket requests, sends it to Redis, waits for results from Redis and +forwards to client via websocket +""" +import sys +import logging +import json +import codecs +import os.path +import uuid +import time +import threading +import functools +from Queue import Queue + +import tornado.ioloop +import tornado.options +import tornado.web +import tornado.websocket +import tornado.gen +import tornado.concurrent +import tornado.wsgi + +from Parser import cmd_port + +from tornado.options import define +define("port", default=cmd_port, help="run on the given port", type=int) + +STATUS_EOS = -1 +STATUS_SUCCESS = 0 +STATUS_NO_SPEECH = 1 +STATUS_ABORTED = 2 +STATUS_AUDIO_CAPTURE = 3 +STATUS_NETWORK = 4 +STATUS_NOT_ALLOWED = 5 +STATUS_SERVICE_NOT_ALLOWED = 6 +STATUS_BAD_GRAMMAR = 7 +STATUS_LANGUAGE_NOT_SUPPORTED = 8 +STATUS_NOT_AVAILABLE = 9 +STATUS_NOT_AUTHENTICATED = 401 +STATUS_CONNECTED = 100 + +class Application(tornado.web.Application): + def __init__(self, flaskApp): + settings = dict( + cookie_secret="43oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", + static_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "static"), + xsrf_cookies=False, + autoescape=None + ) + + flaskWebHandler = tornado.wsgi.WSGIContainer(flaskApp) + handlers = [ + (r"/ws/speech", DecoderSocketHandler), + (r"/ws/status", StatusSocketHandler), + (r"/api/speech", DecoderAPIHandler), + (r"/worker/ws/speech", WorkerSocketHandler), + (r"/client/static/(.*)", tornado.web.StaticFileHandler, {'path': settings["static_path"]}), + (r".*", tornado.web.FallbackHandler, dict(fallback=flaskWebHandler)) + ] + tornado.web.Application.__init__(self, handlers, **settings) + self.available_workers = set() + self.status_listeners = set() + self.num_requests_processed = 0 + + def send_status_update_single(self, ws): + status = dict(num_workers_available=len(self.available_workers), num_requests_processed=self.num_requests_processed) + ws.write_message(json.dumps(status)) + + def send_status_update(self): + for ws in self.status_listeners: + self.send_status_update_single(ws) + +def run_async(func): + @functools.wraps(func) + def async_func(*args, **kwargs): + func_hl = threading.Thread(target=func, args=args, kwargs=kwargs) + func_hl.start() + return func_hl + + return async_func + + +def content_type_to_caps(content_type): + """ + Converts MIME-style raw audio content type specifier to GStreamer CAPS string + """ + default_attributes= {"rate": 16000, "format" : "S16LE", "channels" : 1, "layout" : "interleaved"} + media_type, _, attr_string = content_type.replace(";", ",").partition(",") + if media_type in ["audio/x-raw", "audio/x-raw-int"]: + media_type = "audio/x-raw" + attributes = default_attributes + for (key,_,value) in [p.partition("=") for p in attr_string.split(",")]: + attributes[key.strip()] = value.strip() + return "%s, %s" % (media_type, ", ".join(["%s=%s" % (key, value) for (key,value) in attributes.iteritems()])) + else: + return content_type + + +@tornado.web.stream_request_body +class DecoderAPIHandler(tornado.web.RequestHandler): + """ + Provides a HTTP POST/PUT interface supporting chunked transfer requests + """ + + def prepare(self): + self.id = str(uuid.uuid4()) + self.final_result = "" + self.final_result_queue = Queue() + user = self.request.headers.get("user", None) + interface = self.request.headers.get("interface", None) + token = self.request.headers.get("token", None) + user = database.get_username(interface, user) + if user == None: + self.set_status(401) + self.finish("Not Authorized") + return + logging.info("API::SPEECH %s: Received request from user '%s' over interface '%s'" % (self.id, user, interface)) + self.worker = None + self.error_status = 0 + self.error_message = None + try: + self.worker = self.application.available_workers.pop() + self.application.send_status_update() + logging.info("API::SPEECH %s: Using worker %s" % (self.id, self.__str__())) + self.worker.set_client_socket(self) + context = self.request.headers.get("context", None) + content_type = self.request.headers.get("Content-Type", None) + if content_type: + content_type = content_type_to_caps(content_type) + logging.info("API::SPEECH %s: Using content type: %s" % (self.id, content_type)) + self.worker.write_message(json.dumps(dict(id=self.id, caps=content_type, context=context, user=user, isCall=False))) + except: + logging.warn("API::SPEECH %s: No worker available for client request" % self.id) + self.set_status(503) + self.finish("No workers available") + + def data_received(self, chunk): + assert self.worker is not None + logging.info("API::SPEECH %s: Forwarding client message of length %d to worker" % (self.id, len(chunk))) + self.worker.write_message(chunk, binary=True) + + def post(self, *args, **kwargs): + self.end_request(args, kwargs) + + def put(self, *args, **kwargs): + self.end_request(args, kwargs) + + @run_async + def get_final_result(self, callback=None): + logging.info("API::SPEECH %s: Waiting for final result..." % self.id) + callback(self.final_result_queue.get(block=True)) + + @tornado.web.asynchronous + @tornado.gen.coroutine + def end_request(self, *args, **kwargs): + logging.info("API::SPEECH %s: Handling the end of request" % self.id) + assert self.worker is not None + self.worker.write_message("EOS", binary=True) + result = yield tornado.gen.Task(self.get_final_result) + if self.error_status == 0: + logging.info("%s: Final result: %s" % (self.id, result)) + response = {"status" : 0, "id": self.id, "hypotheses": [{"utterance" : result}]} + self.write(response) + else: + logging.info("%s: Error (status=%d) processing HTTP request: %s" % (self.id, self.error_status, self.error_message)) + response = {"status" : self.error_status, "id": self.id, "message": self.error_message} + self.write(response) + self.worker.set_client_socket(None) + self.worker.close() + self.finish() + logging.info("API::SPEECH Everything done") + + def send_event(self, event): ########################### + event_str = str(event) + if len(event_str) > 100: + event_str = event_str[:97] + "..." + logging.info("%s: Receiving event %s from worker" % (self.id, event_str)) + if event["status"] == 0 and ("result" in event): + try: + if len(event["result"]["hypotheses"]) > 0 and event["result"]["final"]: + self.final_result = event["result"]["hypotheses"][0]["transcript"] + except: + e = sys.exc_info()[0] + logging.warn("Failed to extract hypothesis from recognition result:" + e) + elif event["status"] != 0: + self.error_status = event["status"] + self.error_message = event.get("message", "") + + def close(self): + logging.info("API::SPEECH %s: Receiving close from worker" % (self.id)) + self.final_result_queue.put(self.final_result) + self.application.send_status_update() + +class StatusSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + logging.debug("Opened a new control channel") + self.application.status_listeners.add(self) + self.application.send_status_update_single(self) + + def on_message(self, message): + assert self.client_socket is not None + event = json.loads(message) + self.client_socket.send_event(event) + + def on_close(self): + logging.info("Status listener left") + self.application.status_listeners.remove(self) + +class ControlSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + logging.info("CTRL: Opened new control channel to client. Waiting for authentication...") + self.set_nodelay(True) + self.status = STATUS_NOT_AUTHENTICATED + + def authenticate(self, message): + self.status = STATUS_CONNECTED + self.application.status_listeners.add(self) + message = dict(status= 200, message= "Client successfully authenticated...") + self.write_message(json.dumps(message)) + logging.info("CTRL: Client successfully authenticated...") + + def online(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: A user has come online...") + + def offline(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: A user has gone offline...") + + def query(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received query command...") + + def add_image(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received add_image command...") + + def delete_image(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received delete_image command...") + + def add_text(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received add_text command...") + + def add_url(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received add_url command...") + + def delete_text(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received delete_text command...") + + def infer(self, message): + message = dict(status= 501, message= "Method not implemented!!!") + self.write_message(json.dumps(message)) + logging.info("CTRL: Received infer command...") + + + # map commands to functions + execute = { + 'authenticate' : authenticate, + 'online' : online, + 'offline' : offline, + 'query' : query, + 'add_image' : add_image, + 'delete_image' : delete_image, + 'add_text' : add_text, + 'add_url' : add_url, + 'delete_text' : delete_text, + 'infer' : infer + } + + def on_message(self, message): + try: + logging.info("CTRL: Received %s" % message) + message = json.loads(message) + if self.status == STATUS_NOT_AUTHENTICATED and message['command'] != "authenticate": + message = dict(status= 400, message= "Data received before authentication!!1 Please raise an issue with the interface maintainer...") + self.write_message(json.dumps(message)) + return + self.execute[message['command']](self, message) + except: + message = dict(status= 400, message= "Invalid message received!!1 Please raise an issue with the interface maintainer...") + self.write_message(json.dumps(message)) + + def on_close(self): + logging.info("Status listener left") + self.application.status_listeners.remove(self) + + +class WorkerSocketHandler(tornado.websocket.WebSocketHandler): + def __init__(self, application, request, **kwargs): + tornado.websocket.WebSocketHandler.__init__(self, application, request, **kwargs) + self.client_socket = None + + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def open(self): + self.client_socket = None + self.application.available_workers.add(self) + logging.info("New worker available " + self.__str__()) + self.application.send_status_update() + + def on_close(self): + logging.info("Worker " + self.__str__() + " leaving") + self.application.available_workers.discard(self) + if self.client_socket: + self.client_socket.close() + self.application.send_status_update() + + def on_message(self, message): + assert self.client_socket is not None + event = json.loads(message) + self.client_socket.send_event(event) + + def set_client_socket(self, client_socket): + self.client_socket = client_socket + + +class DecoderSocketHandler(tornado.websocket.WebSocketHandler): + # needed for Tornado 4.0 + def check_origin(self, origin): + return True + + def send_event(self, event): + event["id"] = self.id + event_str = str(event) + if len(event_str) > 100: + event_str = event_str[:97] + "..." + logging.info("%s: Sending event %s to client" % (self.id, event_str)) + self.write_message(json.dumps(event)) + + def open(self): + self.id = str(uuid.uuid4()) + logging.info("%s: OPEN" % (self.id)) + logging.info("%s: Request arguments: %s" % (self.id, " ".join(["%s=\"%s\"" % (a, self.get_argument(a)) for a in self.request.arguments]))) + self.user_id = self.get_argument("user-id", "none", True) + self.content_id = self.get_argument("content-id", "none", True) + self.worker = None + try: + self.worker = self.application.available_workers.pop() + self.application.send_status_update() + logging.info("%s: Using worker %s" % (self.id, self.__str__())) + self.worker.set_client_socket(self) + + content_type = self.get_argument("content-type", None, True) + if content_type: + logging.info("%s: Using content type: %s" % (self.id, content_type)) + + self.worker.write_message(json.dumps(dict(id=self.id, caps=content_type, context="{}", user=self.user_id))) + except KeyError: + logging.warn("%s: No worker available for client request" % self.id) + event = dict(status=STATUS_NOT_AVAILABLE, message="No decoder available, try again later") + self.send_event(event) + self.close() + + def on_connection_close(self): + logging.info("%s: Handling on_connection_close()" % self.id) + self.application.num_requests_processed += 1 + self.application.send_status_update() + if self.worker: + try: + self.worker.set_client_socket(None) + logging.info("%s: Closing worker connection" % self.id) + self.worker.close() + except: + pass + + def on_message(self, message): + assert self.worker is not None + logging.info("%s: Forwarding client message (%s) of length %d to worker" % (self.id, type(message), len(message))) + if isinstance(message, unicode): + self.worker.write_message(message, binary=False) + else: + self.worker.write_message(message, binary=True) diff --git a/lucida/commandcenter/controllers/__init__.py b/lucida/commandcenter/controllers/__init__.py index fd97c3acd..42411012e 100644 --- a/lucida/commandcenter/controllers/__init__.py +++ b/lucida/commandcenter/controllers/__init__.py @@ -1,3 +1,3 @@ -__all__ = ['Main', 'AccessManagement', 'WebSocket', 'Service', 'Graph', +__all__ = ['Main', 'AccessManagement', 'WebServer', 'Service', 'Graph', 'ThriftClient', 'Create', 'Learn', 'Infer', 'Parser', - 'QueryClassifier', 'Config', 'User', 'Utilities', 'Database', 'Memcached', 'Decision'] + 'QueryClassifier', 'Config', 'User', 'Utilities', 'Database', 'Memcached', 'Decision', 'FlaskApp'] diff --git a/lucida/commandcenter/dump/Speech.py b/lucida/commandcenter/dump/Speech.py new file mode 100644 index 000000000..caccf630f --- /dev/null +++ b/lucida/commandcenter/dump/Speech.py @@ -0,0 +1,35 @@ +from flask import * +from Database import database +from AccessManagement import login_required +from ThriftClient import thrift_client +from QueryClassifier import query_classifier +from Utilities import log, check_image_extension +from Parser import port_dic +import Config +import os +import json +from flask_socketio import emit +from . import socketio + +speech = Blueprint('speech', __name__, template_folder='templates') + +@speech.route('/socketio') +def socketio_demo(): + return render_template('socketio.html') + +@socketio.on('connect') +def socket_connect(): + emit('stt_status', { 'status': 200, 'engines': ['kaldi'] }) + +@socketio.on('stt_control') +def stt_control(message): + print('received control message: ', str(message)) + emit('stt_status', { 'status': 200, 'message': 'Accepted' }) + +@socketio.on('stt_audio') +def receive_audio(message): + print('received audio chunk: ', str(message)) + +@socketio.on('disconnect') +def socket_disconnect(): + print('Client disconnected') diff --git a/lucida/commandcenter/controllers/WebSocket.py b/lucida/commandcenter/dump/WebSocket.bak.py similarity index 99% rename from lucida/commandcenter/controllers/WebSocket.py rename to lucida/commandcenter/dump/WebSocket.bak.py index 5c6c77398..d76612383 100644 --- a/lucida/commandcenter/controllers/WebSocket.py +++ b/lucida/commandcenter/dump/WebSocket.bak.py @@ -53,7 +53,6 @@ def __init__(self): ) handlers = [ - (r"/", MainHandler), (r"/client/ws/speech", DecoderSocketHandler), (r"/client/ws/status", StatusSocketHandler), (r"/client/dynamic/reference", ReferenceHandler), diff --git a/lucida/commandcenter/dump/__init__.py b/lucida/commandcenter/dump/__init__.py new file mode 100644 index 000000000..dcfada909 --- /dev/null +++ b/lucida/commandcenter/dump/__init__.py @@ -0,0 +1,7 @@ +from flask_socketio import SocketIO + +socketio = SocketIO() + +__all__ = ['Main', 'AccessManagement', 'WebSocket', 'Service', 'Graph', + 'ThriftClient', 'Create', 'Learn', 'Infer', 'Parser', + 'QueryClassifier', 'Config', 'User', 'Utilities', 'Database', 'Memcached', 'Decision', 'Speech', 'socketio'] diff --git a/lucida/commandcenter/dump/app.py b/lucida/commandcenter/dump/app.py new file mode 100644 index 000000000..43f0b4923 --- /dev/null +++ b/lucida/commandcenter/dump/app.py @@ -0,0 +1,64 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +import sys, glob, os +sys.path.insert(0, glob.glob(os.path.abspath(os.path.dirname(__file__)) + + '/../../tools/thrift-0.9.3/lib/py/build/lib*')[0]) + +from controllers import * +from controllers.Parser import cmd_port +from flask import * +from flask_socketio import SocketIO, emit +from threading import Thread +import logging + + +# Initialize the Flask app with the template folder address. +app = Flask(__name__, template_folder='templates') + +# app.config.from_object('config') +app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16 MB due to MongoDB + +# Register the controllers. +app.register_blueprint(Main.main) +app.register_blueprint(User.user) +app.register_blueprint(Create.create) +app.register_blueprint(Learn.learn) +app.register_blueprint(Infer.infer) +app.register_blueprint(Speech.speech) + +# Session. +app.secret_key = 'A0Zr98j/3yX R~XHH!jmN]LWX/,?RT' + +socketio.init_app(app, async_mode='eventlet', allow_upgrades='true') + +def flask_listener(): + print 'Starting non-secure flask' +# socketio.run(app, host='0.0.0.0', port=3000, debug=True, use_reloader=False, certfile='certs/server.crt', keyfile='certs/server.key') + socketio.run(app, host='0.0.0.0', port=3000, debug=True, use_reloader=False) + +def web_socket_listener(): + print 'Start web socket at ' + str(cmd_port) + logging.basicConfig(level=logging.DEBUG, + format="%(levelname)8s %(asctime)s %(message)s ") + logging.debug('Starting up server') + WebSocket.tornado.options.parse_command_line() + + # For wss (with ASR capability) + if os.environ.get('SECURE_HOST'): + print 'Starting secure web socket' + WebSocket.Application().listen(cmd_port, ssl_options={ + "certfile":"certs/server.crt", + "keyfile":"certs/server.key"}) + # For ws (without ASR capability) + else: + print 'Starting non-secure web socket' + WebSocket.Application().listen(cmd_port) + + WebSocket.tornado.ioloop.IOLoop.instance().start() + +if __name__ == '__main__': +# Thread(target = flask_listener).start() +# web_socket_listener() + flask_listener() diff --git a/lucida/commandcenter/dump/dictate.js b/lucida/commandcenter/dump/dictate.js new file mode 100644 index 000000000..9cf6c3646 --- /dev/null +++ b/lucida/commandcenter/dump/dictate.js @@ -0,0 +1,416 @@ +(function(window){ + + // Defaults + var SERVER = "ws://bark.phon.ioc.ee:82/dev/duplex-speech-api/ws/speech"; + var SERVER_STATUS = "ws://bark.phon.ioc.ee:82/dev/duplex-speech-api/ws/status"; + var REFERENCE_HANDLER = "http://bark.phon.ioc.ee:82/dev/duplex-speech-api/dynamic/reference"; + var CONTENT_TYPE = "content-type=audio/x-raw,+layout=(string)interleaved,+rate=(int)16000,+format=(string)S16LE,+channels=(int)1"; + // Send blocks 4 x per second as recommended in the server doc. + var INTERVAL = 250; + var TAG_END_OF_SENTENCE = "EOS"; + var RECORDER_WORKER_PATH = 'recorderWorker.js'; + + // Error codes (mostly following Android error names and codes) + var ERR_NETWORK = 2; + var ERR_AUDIO = 3; + var ERR_SERVER = 4; + var ERR_CLIENT = 5; + + // Event codes + var MSG_WAITING_MICROPHONE = 1; + var MSG_MEDIA_STREAM_CREATED = 2; + var MSG_INIT_RECORDER = 3; + var MSG_RECORDING = 4; + var MSG_SEND = 5; + var MSG_SEND_EMPTY = 6; + var MSG_SEND_EOS = 7; + var MSG_WEB_SOCKET = 8; + var MSG_WEB_SOCKET_OPEN = 9; + var MSG_WEB_SOCKET_CLOSE = 10; + var MSG_STOP = 11; + var MSG_SERVER_CHANGED = 12; + + // Server status codes + // from https://github.com/alumae/kaldi-gstreamer-server + var SERVER_STATUS_CODE = { + 0: 'Success', // Usually used when recognition results are sent + 1: 'No speech', // Incoming audio contained a large portion of silence or non-speech + 2: 'Aborted', // Recognition was aborted for some reason + 9: 'No available', // recognizer processes are currently in use and recognition cannot be performed + }; + + var Dictate = function(cfg) { + var config = cfg || {}; + config.server = config.server || SERVER; + config.audioSourceId = config.audioSourceId; + config.serverStatus = config.serverStatus || SERVER_STATUS; + config.referenceHandler = config.referenceHandler || REFERENCE_HANDLER; + config.contentType = config.contentType || CONTENT_TYPE; + config.interval = config.interval || INTERVAL; + config.recorderWorkerPath = config.recorderWorkerPath || RECORDER_WORKER_PATH; + config.onReadyForSpeech = config.onReadyForSpeech || function() {}; + config.onEndOfSpeech = config.onEndOfSpeech || function() {}; + config.onPartialResults = config.onPartialResults || function(data) {}; + config.onResults = config.onResults || function(data) {}; + config.onEndOfSession = config.onEndOfSession || function() {}; + config.onEvent = config.onEvent || function(e, data) {}; + config.onError = config.onError || function(e, data) {}; + config.rafCallback = config.rafCallback || function(time) {}; + if (config.onServerStatus) { + monitorServerStatus(); + } + + // Initialized by init() + var audioContext; + var recorder; + // Initialized by startListening() + var ws; + var intervalKey; + // Initialized during construction + var wsServerStatus; + + // Returns the configuration + this.getConfig = function() { + return config; + } + + // Set up the recorder (incl. asking permission) + // Initializes audioContext + // Can be called multiple times. + // TODO: call something on success (MSG_INIT_RECORDER is currently called) + this.init = function() { + var audioSourceConstraints = {}; + config.onEvent(MSG_WAITING_MICROPHONE, "Waiting for approval to access your microphone ..."); + try { + window.AudioContext = window.AudioContext || window.webkitAudioContext; + navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia; + window.URL = window.URL || window.webkitURL; + audioContext = new AudioContext(); + } catch (e) { + // Firefox 24: TypeError: AudioContext is not a constructor + // Set media.webaudio.enabled = true (in about:config) to fix this. + config.onError(ERR_CLIENT, "Error initializing Web Audio browser: " + e); + } + + if (navigator.getUserMedia) { + if(config.audioSourceId) { + audioSourceConstraints.audio = { + optional: [{ sourceId: config.audioSourceId }] + }; + } else { + audioSourceConstraints.audio = true; + } + navigator.getUserMedia(audioSourceConstraints, startUserMedia, function(e) { + config.onError(ERR_CLIENT, "No live audio input in this browser: " + e); + }); + } else { + config.onError(ERR_CLIENT, "No user media support"); + } + } + + // Start recording and transcribing + this.startListening = function() { + if (!recorder) { + config.onError(ERR_AUDIO, "Recorder undefined"); + return; + } + + if (ws) { + cancel(); + } + + try { + ws = createWebSocket(); + } catch (e) { + config.onError(ERR_CLIENT, "No web socket support in this browser!"); + } + } + + // Cancel everything without waiting on the server + this.cancel = function() { + // Stop the regular sending of audio (if present) + clearInterval(intervalKey); + if (recorder) { + recorder.stop(); + recorder.clear(); + config.onEvent(MSG_STOP, 'Stopped recording'); + } + if (ws) { + ws.close(); + ws = null; + } + } + + // Sets the URL of the speech server + this.setServer = function(server) { + config.server = server; + config.onEvent(MSG_SERVER_CHANGED, 'Server changed: ' + server); + } + + // Sets the URL of the speech server status server + this.setServerStatus = function(serverStatus) { + config.serverStatus = serverStatus; + + if (config.onServerStatus) { + monitorServerStatus(); + } + + config.onEvent(MSG_SERVER_CHANGED, 'Server status server changed: ' + serverStatus); + } + + // Sends reference text to speech server + this.submitReference = function submitReference(text, successCallback, errorCallback) { + var headers = {} + if (config["user_id"]) { + headers["User-Id"] = config["user_id"] + } + if (config["content_id"]) { + headers["Content-Id"] = config["content_id"] + } + $.ajax({ + url: config.referenceHandler, + type: "POST", + headers: headers, + data: text, + dataType: "text", + success: successCallback, + error: errorCallback, + }); + } + + // Private methods + function startUserMedia(stream) { + var input = audioContext.createMediaStreamSource(stream); + config.onEvent(MSG_MEDIA_STREAM_CREATED, 'Media stream created'); + + // make the analyser available in window context + window.userSpeechAnalyser = audioContext.createAnalyser(); + input.connect(window.userSpeechAnalyser); + + config.rafCallback(); + + recorder = new Recorder(input, { workerPath : config.recorderWorkerPath }); + this.recorder = recorder; + config.onEvent(MSG_INIT_RECORDER, 'Recorder initialized'); + } + + function socketSend(item) { + if (ws) { + var state = ws.readyState; + if (state == 1) { + // If item is an audio blob + if (item instanceof Blob) { + if (item.size > 0) { + ws.send(item); + config.onEvent(MSG_SEND, 'Send: blob: ' + item.type + ', ' + item.size); + } else { + config.onEvent(MSG_SEND_EMPTY, 'Send: blob: ' + item.type + ', EMPTY'); + } + // Otherwise it's the EOS tag (string) + } else { + ws.send(item); + config.onEvent(MSG_SEND_EOS, 'Send tag: ' + item); + } + } else { + config.onError(ERR_NETWORK, 'WebSocket: readyState!=1: ' + state + ": failed to send: " + item); + } + } else { + config.onError(ERR_CLIENT, 'No web socket connection: failed to send: ' + item); + } + } + + + + var stopListeningFunc = function() { + // Stop the regular sending of audio + clearInterval(intervalKey); + // Stop recording + if (recorder) { + recorder.stop(); + config.onEvent(MSG_STOP, 'Stopped recording'); + // Push the remaining audio to the server + recorder.export16kMono(function(blob) { + console.log(blob) + socketSend(blob); + socketSend(TAG_END_OF_SENTENCE); + recorder.clear(); + }, 'audio/x-raw'); + config.onEndOfSpeech(); + } else { + config.onError(ERR_AUDIO, "Recorder undefined"); + } + + console.log($("#clinc").val()) + + + + + + // $.ajax({ + // type: "POST", + // url: "/api/login", + // data: JSON.stringify(user_login_info), + // contentType: "application/json", + // success: function(success_data) { + // if ($.url().param("url") !== undefined) { + + // console.log($.url().param("url")); + // window.location.href = secret_url_str + $.url().param("url"); + // } else { + // window.location.href = secret_url_str + "/"; //redirect to the main page + // } + // }, + // error: function(error_data) { + // // {readyState: 4, responseText: "{"errors": [{ + // // "message": "Username does not exist"}]}", + // // responseJSON: Object, status: 404, statusText: "NOT FOUND"} + // var response = error_data.responseJSON; + // if (error_data.status == 404) { + // X_username.append( + // error_html_start + response.errors[0].message + error_html_end); + + // } else { + // X_password.append( + // error_html_start + response.errors[0].message + error_html_end); + // } + // } + // }); + + + + + + } + + // Stop listening, i.e. recording and sending of new input. + this.stopListening = stopListeningFunc + + function createWebSocket() { + // TODO: do we need to use a protocol? + //var ws = new WebSocket("ws://127.0.0.1:8081", "echo-protocol"); + var url = config.server + '?' + config.contentType; + if (config["user_id"]) { + url += '&user-id=' + config["user_id"] + } + if (config["content_id"]) { + url += '&content-id=' + config["content_id"] + } + var ws = new WebSocket(url); + + ws.onmessage = function(e) { + var data = e.data; + config.onEvent(MSG_WEB_SOCKET, data); + var r = JSON.parse(data); + // if (r.hascoachresponse) { + // config.onClinc(r.coachresponse); + // } + if (data instanceof Object && ! (data instanceof Blob)) { + config.onError(ERR_SERVER, 'WebSocket: onEvent: got Object that is not a Blob'); + } else if (data instanceof Blob) { + config.onError(ERR_SERVER, 'WebSocket: got Blob'); + } else { + var res = JSON.parse(data); + if (res.status == 0) { + // TODO: final is undefined sometimes + if (res.result === undefined || res.result.final) { + if (res.result !== undefined) { + config.onResults(res.result.hypotheses); + } + stopListeningFunc(); + } else { + config.onPartialResults(res.result.hypotheses); + } + // if (res.hascoachresponse) { + // config.onClinc(res.coachresponse); + // } + } else { + config.onError(ERR_SERVER, 'Server error: ' + res.status + ': ' + getDescription(res.status)); + } + } + } + + // Start recording only if the socket becomes open + ws.onopen = function(e) { + intervalKey = setInterval(function() { + recorder.export16kMono(function(blob) { + socketSend(blob); + console.log(blob) + recorder.clear(); + }, 'audio/x-raw'); + }, config.interval); + // Start recording + recorder.record(); + config.onReadyForSpeech(); + config.onEvent(MSG_WEB_SOCKET_OPEN, e); + }; + + // This can happen if the blob was too big + // E.g. "Frame size of 65580 bytes exceeds maximum accepted frame size" + // Status codes + // http://tools.ietf.org/html/rfc6455#section-7.4.1 + // 1005: + // 1006: + ws.onclose = function(e) { + var code = e.code; + var reason = e.reason; + var wasClean = e.wasClean; + // The server closes the connection (only?) + // when its endpointer triggers. + config.onEndOfSession(); + config.onEvent(MSG_WEB_SOCKET_CLOSE, e.code + "/" + e.reason + "/" + e.wasClean); + }; + + ws.onerror = function(e) { + var data = e.data; + config.onError(ERR_NETWORK, data); + } + + return ws; + } + + + function monitorServerStatus() { + if (wsServerStatus) { + wsServerStatus.close(); + } + wsServerStatus = new WebSocket(config.serverStatus); + wsServerStatus.onmessage = function(evt) { + config.onServerStatus(JSON.parse(evt.data)); + }; + } + + + function getDescription(code) { + if (code in SERVER_STATUS_CODE) { + return SERVER_STATUS_CODE[code]; + } + return "Unknown error"; + } + + + + }; // Dictate + + // Simple class for persisting the transcription. + // If isFinal==true then a new line is started in the transcription list + // (which only keeps the final transcriptions). + var Transcription = function(cfg) { + var index = 0; + var list = []; + + this.add = function(text, isFinal) { + list[index] = text; + if (isFinal) { + index++; + } + } + + this.toString = function() { + return list.join('. '); + } + } + + window.Dictate = Dictate; + window.Transcription = Transcription; + +})(window); diff --git a/lucida/commandcenter/dump/infer.html b/lucida/commandcenter/dump/infer.html new file mode 100644 index 000000000..8623f5894 --- /dev/null +++ b/lucida/commandcenter/dump/infer.html @@ -0,0 +1,144 @@ +{% extends "base.html" %} + +{% block content %} + +{% if dates %} + + + +{% endif %} + + + +
+
+

Main » Infer

+
+ +
+
+

Ask a question:

+
+
+ +
+
+ + +
+ + +

Upload your picture!

+
+ +
+ +
+
+ + {% if error %} +

{{ error }}

+ {% endif %} +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+ +
+
+
+ +
+
+ + + +
+

+
+ +
+ +
+ Powered by Lucida +
+
+
+ + + +
+
+ + + +
+
+ + +
+ + + +{% endblock %} diff --git a/lucida/commandcenter/dump/mob.js b/lucida/commandcenter/dump/mob.js new file mode 100644 index 000000000..e98a42551 --- /dev/null +++ b/lucida/commandcenter/dump/mob.js @@ -0,0 +1,256 @@ +// Global UI elements: +// - log: event log +// - trans: transcription window + +// Global objects: +// - isConnected: true iff we are connected to a worker +// - tt: simple structure for managing the list of hypotheses +// - dictate: dictate object with control methods 'init', 'startListening', ... +// and event callbacks onResults, onError, ... +var isConnected = false; +var isMicrophoneInitialized = false; + +var tt = new Transcription(); + +var startPosition = 0; +var endPosition = 0; +var doUpper = false; +var doPrependSpace = true; +var textToVoice = true; + +function capitaliseFirstLetter(string) { + return string.charAt(0).toUpperCase() + string.slice(1); +} + +function updateDisabledState() { + var disabled = true; + // var text = "{{ _('Dikteerimiseks vajuta nuppu') }}"; + // if (!isMicrophoneInitialized) { + // disabled = true; + // text = "{{ _('Mikrofon initsialiseerimata') }}"; + // } else if (isConnected) { + // disabled = false; + // text = "{{ _('Räägi...') }}"; + // } else if (numWorkersAvailable == 0) { + // disabled = true; + // text = "{{ _('Server ülekoormatud või rivist väljas') }}"; + // } + if (disabled) { + $("#recbutton").addClass("disabled"); + //$("#helptext").html(text); + } else { + $("#recbutton").removeClass("disabled"); + //$("#helptext").html(text); + } +} + +function prettyfyHyp(text, doCapFirst, doPrependSpace) { + if (doCapFirst) { + text = capitaliseFirstLetter(text); + } + tokens = text.split(" "); + text = ""; + if (doPrependSpace) { + text = " "; + } + doCapitalizeNext = false; + tokens.map(function(token) { + if (text.trim().length > 0) { + text = text + " "; + } + if (doCapitalizeNext) { + text = text + capitaliseFirstLetter(token); + } else { + text = text + token; + } + if (token == "." || /\n$/.test(token)) { + doCapitalizeNext = true; + } else { + doCapitalizeNext = false; + } + }); + + text = text.replace(/ ([,.!?:;])/g, "\$1"); + text = text.replace(/ ?\n ?/g, "\n"); + return text; +} + +var dictate = new Dictate({ + server : $("#servers").val().split('|')[0], + serverStatus : $("#servers").val().split('|')[1], + recorderWorkerPath : 'static/js/recorderWorker.js', + onReadyForSpeech : function() { + isConnected = true; + __message("READY FOR SPEECH"); + document.getElementById('startImg').src = 'static/image/microphone_off.png'; + $("#buttonToggleListening").html('Stop'); + $("#buttonToggleListening").addClass('highlight'); + $("#buttonToggleListening").prop("disabled", false); + $("#buttonCancel").prop("disabled", false); + $("#recbutton").addClass("playing"); + startPosition = $("#trans").prop("selectionStart"); + endPosition = startPosition; + var textBeforeCaret = $("#trans").val().slice(0, startPosition); + if ((textBeforeCaret.length == 0) || /\. *$/.test(textBeforeCaret) || /\n *$/.test(textBeforeCaret)) { + doUpper = true; + } else { + doUpper = false; + } + doPrependSpace = (textBeforeCaret.length > 0) && !(/\n *$/.test(textBeforeCaret)); + }, + onEndOfSpeech : function() { + __message("END OF SPEECH"); + document.getElementById('startImg').src = 'static/image/microphone.png'; + $("#buttonToggleListening").html('Stopping...'); + $("#buttonToggleListening").prop("disabled", true); + }, + onEndOfSession : function() { + isConnected = false; + __message("END OF SESSION"); + $("#buttonToggleListening").html('Start'); + $("#buttonToggleListening").removeClass('highlight'); + $("#buttonToggleListening").prop("disabled", false); + $("#buttonCancel").prop("disabled", true); + $("#recbutton").removeClass("playing"); + }, + onServerStatus : function(json) { + __serverStatus(json.num_workers_available); + $("#serverStatusBar").toggleClass("highlight", json.num_workers_available == 0); + // If there are no workers and we are currently not connected + // then disable the Start/Stop button. + if (json.num_workers_available == 0 && ! isConnected) { + $("#buttonToggleListening").prop("disabled", true); + } else { + $("#buttonToggleListening").prop("disabled", false); + } + }, + onPartialResults : function(hypos) { + hypText = prettyfyHyp(hypos[0].transcript, doUpper, doPrependSpace); + val = $("#trans").val(); + $("#trans").val(val.slice(0, startPosition) + hypText + val.slice(endPosition)); + endPosition = startPosition + hypText.length; + $("#trans").prop("selectionStart", endPosition); + }, + onResults : function(hypos) { + hypText = prettyfyHyp(hypos[0].transcript, doUpper, doPrependSpace); + val = $("#trans").val(); + $("#trans").val(val.slice(0, startPosition) + hypText + val.slice(endPosition)); + startPosition = startPosition + hypText.length; + endPosition = startPosition; + $("#trans").prop("selectionStart", endPosition); + if (/\. *$/.test(hypText) || /\n *$/.test(hypText)) { + doUpper = true; + } else { + doUpper = false; + } + doPrependSpace = (hypText.length > 0) && !(/\n *$/.test(hypText)); + }, + onClinc : function(hypos) { + hypText = prettyfyHyp(hypos, doUpper, doPrependSpace); + val = $("#clinc").val(); + $("#clinc").val(val.slice(0, startPosition) + hypText + val.slice(endPosition)); + if (textToVoice){ + speakable = hypText.split('\n')[0]; + var u = new SpeechSynthesisUtterance(); + u.text = speakable; + u.lang = 'en-US'; + u.rate = 1.0; + speechSynthesis.speak(u); + } + startPosition = startPosition + hypText.length; + endPosition = startPosition; + $("#clinc").prop("selectionStart", endPosition); + if (/\. *$/.test(hypText) || /\n *$/.test(hypText)) { + doUpper = true; + } else { + doUpper = false; + } + doPrependSpace = (hypText.length > 0) && !(/\n *$/.test(hypText)); + }, + onError : function(code, data) { + dictate.cancel(); + __error(code, data); + // TODO: show error in the GUI + }, + onEvent : function(code, data) { + __message(code, data); + if (code == 3 /* MSG_INIT_RECORDER */) { + isMicrophoneInitialized = true; + updateDisabledState(); + } + } +}); + +// Private methods (called from the callbacks) +function __message(code, data) { + log.innerHTML = "msg: " + code + ": " + (data || '') + "\n" + log.innerHTML; +} + +function __error(code, data) { + log.innerHTML = "ERR: " + code + ": " + (data || '') + "\n" + log.innerHTML; +} + +function __serverStatus(msg) { + serverStatusBar.innerHTML = msg; +} + +function __updateTranscript(text) { + $("#trans").val(text); +} + +// Public methods (called from the GUI) +function startButtonFunc() { + if (isConnected) { + dictate.stopListening(); + $("#recbutton").addClass("disabled"); + } else { + clearTranscription(); + document.getElementById('startImg').src = 'static/image/microphone.png'; + dictate.startListening(); + } +} + +function cancel() { + dictate.cancel(); +} + +function clearTranscription() { + $("#trans").val(""); + // needed, otherwise selectionStart will retain its old value + $("#trans").prop("selectionStart", 0); + $("#trans").prop("selectionEnd", 0); + + $("#clinc").val(""); + // needed, otherwise selectionStart will retain its old value + $("#clinc").prop("selectionStart", 0); + $("#clinc").prop("selectionEnd", 0); +} + +$(document).ready(function() { + dictate.init(); + + $("#servers").change(function() { + dictate.cancel(); + var servers = $("#servers").val().split('|'); + dictate.setServer(servers[0]); +// dictate.setServerStatus(servers[1]); + }); + +}); + +function readURL(input) { + if (input.files && input.files[0]) { + var reader = new FileReader(); + + reader.onload = function (e) { + $('#image_upload_preview').attr('src', e.target.result); + } + + reader.readAsDataURL(input.files[0]); + } +} + +$("#file_input").change(function () { + readURL(this); +}); + diff --git a/lucida/commandcenter/dump/socketio.html b/lucida/commandcenter/dump/socketio.html new file mode 100644 index 000000000..a8ef8f9d8 --- /dev/null +++ b/lucida/commandcenter/dump/socketio.html @@ -0,0 +1,37 @@ + + + + Socket.IO chat + + + + + + + + + + + + diff --git a/lucida/commandcenter/dump/stream_audio.js b/lucida/commandcenter/dump/stream_audio.js new file mode 100644 index 000000000..55949654e --- /dev/null +++ b/lucida/commandcenter/dump/stream_audio.js @@ -0,0 +1,96 @@ +var sendInterval; +var socket; +var input; + +// Private methods +function startUserMedia(stream) { + input = audioContext.createMediaStreamSource(stream); + + // make the analyser available in window context + window.userSpeechAnalyser = audioContext.createAnalyser(); + input.connect(window.userSpeechAnalyser); + + recorder = new Recorder(input, { workerPath : 'static/js/recorderWorker.js' }); +} + +startListening = function() { + if (!recorder) { + alert("Recorder undefined"); + return; + } + socket.emit('stt_control', {command: 'I start recording...'}); + sendInterval = setInterval(function() { + recorder.export16kMono(function(blob) { + console.log(blob); + socket.emit('stt_audio', blob); + recorder.clear(); + }, 'audio/x-raw'); + }, 250); + recorder.record(); + clearTranscription(); + document.getElementById('startImg').src = 'static/image/microphone_off.png'; +} + +stopListening = function() { + clearInterval(sendInterval); + sendInterval = undefined; + if (recorder) { + recorder.stop(); + recorder.export16kMono(function(blob) { + socket.emit('stt_audio',blob); + socket.emit('stt_control', {command: 'I stop recording!!!'}); + recorder.clear(); + }, 'audio/x-raw'); + } else { + alert("Recorder undefined"); + } + document.getElementById('startImg').src = 'static/image/microphone.png'; +} + +function clearTranscription() { + $("#trans").val(""); + $("#trans").prop("selectionStart", 0); + $("#trans").prop("selectionEnd", 0); + + $("#clinc").val(""); + $("#clinc").prop("selectionStart", 0); + $("#clinc").prop("selectionEnd", 0); +} + +// Public methods (called from the GUI) +function startButtonFunc() { + if (sendInterval) { + stopListening(); + } else { + startListening(); + } +} + +// Document ready function +$(document).ready(function() { + var audioSourceConstraints = {}; + try { + window.AudioContext = window.AudioContext || window.webkitAudioContext; + navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia; + window.URL = window.URL || window.webkitURL; + audioContext = new AudioContext(); + } catch (e) { + alert("Error initializing Web Audio browser: " + e); + } + + if (navigator.getUserMedia) { +// if(config.audioSourceId) { +// audioSourceConstraints.audio = { optional: [{ sourceId: config.audioSourceId }] }; +// } else { + audioSourceConstraints.audio = true; +// } + navigator.getUserMedia(audioSourceConstraints, startUserMedia, function(e) { alert("No live audio input in this browser: " + e); }); + } else { + alert("No user media support"); + } + + socket = io(); + socket.on('stt_status', function(message) { + console.log(JSON.stringify(message)); + }); +}); diff --git a/lucida/commandcenter/static/js/mob.js b/lucida/commandcenter/static/js/mob.js index 005e02c0f..c5c34dc84 100644 --- a/lucida/commandcenter/static/js/mob.js +++ b/lucida/commandcenter/static/js/mob.js @@ -82,6 +82,7 @@ var dictate = new Dictate({ onReadyForSpeech : function() { isConnected = true; __message("READY FOR SPEECH"); + document.getElementById('startImg').src = 'static/image/microphone_off.png'; $("#buttonToggleListening").html('Stop'); $("#buttonToggleListening").addClass('highlight'); $("#buttonToggleListening").prop("disabled", false); @@ -99,12 +100,14 @@ var dictate = new Dictate({ }, onEndOfSpeech : function() { __message("END OF SPEECH"); + document.getElementById('startImg').src = 'static/image/microphone.png'; $("#buttonToggleListening").html('Stopping...'); $("#buttonToggleListening").prop("disabled", true); }, onEndOfSession : function() { isConnected = false; __message("END OF SESSION"); + document.getElementById('startImg').src = 'static/image/microphone.png'; $("#buttonToggleListening").html('Start'); $("#buttonToggleListening").removeClass('highlight'); $("#buttonToggleListening").prop("disabled", false); @@ -166,6 +169,7 @@ var dictate = new Dictate({ doPrependSpace = (hypText.length > 0) && !(/\n *$/.test(hypText)); }, onError : function(code, data) { + document.getElementById('startImg').src = 'static/image/microphone.png'; dictate.cancel(); __error(code, data); // TODO: show error in the GUI @@ -197,11 +201,13 @@ function __updateTranscript(text) { } // Public methods (called from the GUI) -function toggleListening() { +function startButtonFunc() { if (isConnected) { dictate.stopListening(); $("#recbutton").addClass("disabled"); } else { + clearTranscription(); + document.getElementById('startImg').src = 'static/image/microphone.png'; dictate.startListening(); } } @@ -249,3 +255,4 @@ function readURL(input) { $("#file_input").change(function () { readURL(this); }); + diff --git a/lucida/commandcenter/templates/infer.html b/lucida/commandcenter/templates/infer.html index 65f5cdf80..436b46e3c 100644 --- a/lucida/commandcenter/templates/infer.html +++ b/lucida/commandcenter/templates/infer.html @@ -46,13 +46,18 @@ - + - @@ -69,24 +74,14 @@ @@ -111,7 +106,7 @@ - - - - - - {% endblock %} diff --git a/lucida/speechrecognition/.gitignore b/lucida/speechrecognition/.gitignore new file mode 100644 index 000000000..433d65f44 --- /dev/null +++ b/lucida/speechrecognition/.gitignore @@ -0,0 +1,4 @@ +*.pyc +*.pyo +worker_config.yaml +include/ diff --git a/lucida/speechrecognition/Makefile b/lucida/speechrecognition/Makefile index b8653cff5..758a75f37 100644 --- a/lucida/speechrecognition/Makefile +++ b/lucida/speechrecognition/Makefile @@ -1,2 +1,25 @@ -SUBDIRS=kaldi_gstreamer_asr -include ../../Makefile.common \ No newline at end of file +.PHONY: include plugin setup decoders all start_server + +include: + ./gen_include.sh + +plugin: include + cd src/gstplugin && [ -f configure ] || ./autogen.sh && [ -f Makefile ] || ./configure && make + +setup: + ./setup.sh + +decoders: + @for decoder in $$(find decoders -maxdepth 1 -mindepth 1 -type d -not -path decoders/templates); do \ + if [ -d $$decoder ]; then \ + cd $$decoder; \ + make all; \ + make setup; \ + cd ../..; \ + fi; \ + done; + +all: plugin decoders #setup + +start_server: + GST_PLUGIN_PATH=`pwd`"/src/gstplugin/src" python src/server.py diff --git a/lucida/speechrecognition/README.md b/lucida/speechrecognition/README.md deleted file mode 100644 index 99848824a..000000000 --- a/lucida/speechrecognition/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Automatic Speech Recognition (ASR) - -The current implementation of ASR uses [Kaldi](http://kaldi.sourceforge.net/), -a speech recognition toolkit written in C++ that is freely available under the Apache license. - -## Notes: - -1. `kaldi_gstreamer_asr` contains the implementation of the Kaldi ASR service. - -2. If you want to create and use another ASR implementation, -you can start by making a directory parallel to `kaldi_gstreamer_asr` and modify `Makefile`. -Make sure to reference `../lucidaservice.thrift` and `../lucidatypes.thrift`. - -3. Type `make` to build all ASR implementations, -or type `cd kaldi_gstreamer_asr` and `make` to only build the Kaldi ASR service. - diff --git a/lucida/speechrecognition/asrthriftservice.thrift b/lucida/speechrecognition/asrthriftservice.thrift new file mode 100644 index 000000000..7f53fe1bc --- /dev/null +++ b/lucida/speechrecognition/asrthriftservice.thrift @@ -0,0 +1,9 @@ +service ASRThriftService { + void request_id(1:string id) + void user(1:string user) + void context(1:string cntxt) + void start() + void push(1:binary data) + void stop() + void abort() +} diff --git a/lucida/speechrecognition/decoders/README.md b/lucida/speechrecognition/decoders/README.md new file mode 100644 index 000000000..43e8f94bb --- /dev/null +++ b/lucida/speechrecognition/decoders/README.md @@ -0,0 +1,3 @@ +Speech Decoder +============= + diff --git a/lucida/speechrecognition/decoders/googleassist/.gitignore b/lucida/speechrecognition/decoders/googleassist/.gitignore new file mode 100644 index 000000000..a9514fb40 --- /dev/null +++ b/lucida/speechrecognition/decoders/googleassist/.gitignore @@ -0,0 +1,4 @@ +auth/ +*.pyc +configuration.py +.config diff --git a/lucida/speechrecognition/decoders/googleassist/Makefile b/lucida/speechrecognition/decoders/googleassist/Makefile new file mode 100644 index 000000000..52fe9d772 --- /dev/null +++ b/lucida/speechrecognition/decoders/googleassist/Makefile @@ -0,0 +1,10 @@ +.PHONY: all setup clean + +all: setup + +setup: + ./setup.sh + +clean: + rm -rf configuration.py + rm -rf configuration.pyc diff --git a/lucida/speechrecognition/decoders/googleassist/decoder b/lucida/speechrecognition/decoders/googleassist/decoder new file mode 100755 index 000000000..51c49e00b --- /dev/null +++ b/lucida/speechrecognition/decoders/googleassist/decoder @@ -0,0 +1,387 @@ +#!/usr/bin/env python + +""" +Created on Jun 14 2017 + +@author: kamal1210 +""" +from __future__ import print_function + +from configuration import * + +import os, sys, time, base64 +import threading, json +import requests, click + +from thrift.transport import TSocket +from thrift.transport import TTransport +from thrift.protocol import TBinaryProtocol +from thrift.server import TServer + +import grpc +import google.auth.transport.grpc +import google.auth.transport.requests +import google.oauth2.credentials + +from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2 +from google.rpc import code_pb2 +from tenacity import retry, stop_after_attempt, retry_if_exception, wait_random + +sys.path.insert(0, "include") +sys.path.insert(0, "include/gen-py") + +from asrthriftservice import ASRThriftService +import defs + +STATE_CREATED = 0 +STATE_READY = 1 +STATE_LISTENING = 2 +STATE_EOS_RECEIVED = 3 +STATE_WAITING = 4 + +DEFAULT_AUDIO_SAMPLE_RATE = 16000 +DEFAULT_AUDIO_SAMPLE_WIDTH = 2 +DEFAULT_AUDIO_ITER_SIZE = 3200 + +ASSISTANT_API_ENDPOINT = "embeddedassistant.googleapis.com" +END_OF_UTTERANCE = embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE +DIALOG_FOLLOW_ON = embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON +CLOSE_MICROPHONE = embedded_assistant_pb2.ConverseResult.CLOSE_MICROPHONE +DEFAULT_GRPC_DEADLINE = 60 * 3 + 5 + +SPEECH_TMPDIR = "/tmp/lucida/speech" +DEFAULT_VOLUME_PERCENTAGE = 50 + +class SpeechDecoder(object): + + def _send_event(self, type, status, data, id=None): + if id: + if not id == self.id: + return False + if not (type == "final_result" or type == "eos"): + data = "[GA] " + data + message = json.dumps({'event': type, 'status': status, 'data': data}) + try: + print(message) + sys.stdout.flush() + return True + except IOError: + pass + return False + + def _log_converse_request(self, req, id): + req_copy = embedded_assistant_pb2.ConverseRequest() + req_copy.CopyFrom(req) + if len(req_copy.audio_in) > 0: + size = len(req_copy.audio_in) + self._send_event('debug', defs.SUCCESS_OK, "Sending conversation request with %d bytes of audio" % (size), id) + return + self._send_event('debug', defs.SUCCESS_OK, "Sending configuration request : %s" % (str(req_copy)), id) + + def _log_converse_response(self, resp, id): + resp_copy = embedded_assistant_pb2.ConverseResponse() + resp_copy.CopyFrom(resp) + size = 0 + if resp_copy.HasField('audio_out') and len(resp_copy.audio_out.audio_data) > 0: + size = len(resp_copy.audio_out.audio_data) + resp_copy.audio_out.ClearField('audio_data') + if resp_copy.audio_out.ListFields(): + self._send_event('debug', defs.SUCCESS_OK, "resp_copy.audio_out.ListFields: True", id) + if resp_copy.ListFields(): + self._send_event('debug', defs.SUCCESS_OK, "resp_copy.ListFields: True", id) + self._send_event('debug', defs.SUCCESS_OK, "Received conversation response with %d bytes of audio: %s" % (size, str(resp_copy)), id) + + def _converse_data_gen(self, id): + converse_state = None + if self.cntxt: + self._send_event('debug', defs.SUCCESS_OK, "Sending conversation state: %s" % self.cntxt, id) + converse_state = embedded_assistant_pb2.ConverseState(conversation_state=self.cntxt) + + config = embedded_assistant_pb2.ConverseConfig( + audio_in_config = embedded_assistant_pb2.AudioInConfig( + encoding = 'LINEAR16', + sample_rate_hertz = DEFAULT_AUDIO_SAMPLE_RATE + ), + audio_out_config = embedded_assistant_pb2.AudioOutConfig( + encoding = 'LINEAR16', + sample_rate_hertz = DEFAULT_AUDIO_SAMPLE_RATE, + volume_percentage = DEFAULT_VOLUME_PERCENTAGE + ), + converse_state = converse_state + ) + + # The first ConverseRequest must contain the ConverseConfig + # and no audio data. + req = embedded_assistant_pb2.ConverseRequest(config=config) + self._log_converse_request(req, id) + yield req + + self._send_event('debug', defs.SUCCESS_OK, "Opened data stream for request audio.", id) + + # Subsequent requests need audio data, but not config. + while self.state == STATE_LISTENING: + if len(self.input_buffer) > 0: + req = embedded_assistant_pb2.ConverseRequest(audio_in=self.input_buffer.pop(0)) + self._log_converse_request(req, id) + yield req + while len(self.input_buffer) > 0 and self.state == STATE_EOS_RECEIVED: + req = embedded_assistant_pb2.ConverseRequest(audio_in=self.input_buffer.pop(0)) + self._log_converse_request(req, id) + yield req + self.state = STATE_WAITING + self._send_event('debug', defs.SUCCESS_OK, "Closed data stream for request audio. Waiting for transcript...", id) + + def _response_data_gen(self, id): + config = embedded_assistant_pb2.ConverseConfig( + audio_in_config = embedded_assistant_pb2.AudioInConfig( + encoding = 'LINEAR16', + sample_rate_hertz = DEFAULT_AUDIO_SAMPLE_RATE + ), + audio_out_config = embedded_assistant_pb2.AudioOutConfig( + encoding = 'LINEAR16', + sample_rate_hertz = DEFAULT_AUDIO_SAMPLE_RATE, + volume_percentage = DEFAULT_VOLUME_PERCENTAGE + ), + converse_state = None + ) + + # The first ConverseRequest must contain the ConverseConfig + # and no audio data. + req = embedded_assistant_pb2.ConverseRequest(config=config) + self._log_converse_request(req, id) + yield req + + self._send_event('debug', defs.SUCCESS_OK, "Opened data stream for response audio", id) + + # Subsequent requests need audio data, but not config. + with open(SPEECH_TMPDIR + "/" + str(id) + "_out.raw", 'rb') as fp: + while True: + data = fp.read(DEFAULT_AUDIO_ITER_SIZE) + if not data: + break + req = embedded_assistant_pb2.ConverseRequest(audio_in=data) + self._log_converse_request(req, id) + yield req + + self._send_event('debug', defs.SUCCESS_OK, "Closed data stream for response audio. Waiting for transcript...", id) + + def _converse_req_gen(self): + self.state = STATE_LISTENING + data = {'id': self.id, 'result': {'hypotheses': [], 'final': True}, 'context_in': self.cntxt, 'response': "", 'service': "GA", 'service_type': "QA", 'dialog_follow_on': False} + + response_stt = False + if not os.path.isfile(SPEECH_TMPDIR + "/" + str(data['id']) + "_out.raw"): + self._send_event('debug', defs.SUCCESS_OK, "Initiating dialogue with Google Assitant (STT) for decoding request audio", data['id']) + fp = open(SPEECH_TMPDIR + "/" + str(data['id']) + "_out.raw", 'wb') + try: + # This generator yields ConverseResponse proto messages + # received from the gRPC Google Assistant API. + for resp in self.assistant.Converse(self._converse_data_gen(data['id']), DEFAULT_GRPC_DEADLINE): + if not data['id'] == self.id: + self._send_event('debug', defs.SUCCESS_OK, "[%s] Converse request for aborted request successfully terminated" % data['id']) + try: + fp.close() + os.remove(SPEECH_TMPDIR + "/" + str(id) + "_out.raw") + except: + pass + return + self._log_converse_response(resp, id) + if resp.error.code != code_pb2.OK: + try: + fp.close() + os.remove(SPEECH_TMPDIR + "/" + str(id) + "_out.raw") + except: + pass + self._send_event('error', defs.TRY_AGAIN, "Error occured while decoding request audio: %s" % resp.error.message, data['id']) + return + if resp.event_type == END_OF_UTTERANCE: + self._send_event('debug', defs.OTHER, 'End of audio request detected', data['id']) + if self.state == STATE_LISTENING: + self.stop() + if self.state == STATE_EOS_RECEIVED: + self.state = STATE_WAITING + if resp.result.spoken_request_text: + self._send_event('debug', defs.SUCCESS_OK, "Received final transcript from Google Assistant", data['id']) + self.result = {'hypotheses': [{'transcript': resp.result.spoken_request_text, 'confidence': 1}], 'final': True} + if len(resp.audio_out.audio_data) > 0: + self._send_event('debug', defs.SUCCESS_OK, "Received response audio of length %u" % len(resp.audio_out.audio_data), data['id']) + fp.write(resp.audio_out.audio_data) + if resp.result.spoken_response_text: + self._send_event('debug', defs.SUCCESS_OK, "Received response text", data['id']) + data['response'] = resp.result.spoken_response_text + if resp.result.conversation_state: + self._send_event('debug', defs.SUCCESS_OK, "Received response context", data['id']) + self.cntxt = resp.result.conversation_state + if resp.result.microphone_mode == DIALOG_FOLLOW_ON: + data['dialog_follow_on'] = True + self._send_event('info', defs.SUCCESS_OK, "Expecting follow-on query from user", data['id']) + elif resp.result.microphone_mode == CLOSE_MICROPHONE: + data['dialog_follow_on'] = False + self.result['hypotheses'][0]['context'] = base64.b64encode(self.cntxt) + data['result'] = self.result + self._send_event('info', defs.SUCCESS_OK, "Dialogue with Google Assistant (STT) terminated (request audio)", data['id']) + try: + fp.close() + except: + pass + except Exception as e: + try: + fp.close() + os.remove(SPEECH_TMPDIR + "/" + str(id) + "_out.raw") + except: + pass + self._send_event('error', defs.TRY_AGAIN, "Something went terribly wrong while decoding speech!!! Exception: %s" % e.message, data['id']) + return + else: + response_stt = True + + # Convert Google Assistant response to text if asked for + if FORCE_RESPONSE_STT or response_stt: + self._send_event('debug', defs.SUCCESS_OK, "Initiating dialogue with Google Assitant (STT) for decoding response audio", data['id']) + try: + for resp in self.assistant.Converse(self._response_data_gen(data['id']), DEFAULT_GRPC_DEADLINE): + if not data['id'] == self.id: + self._send_event('debug', defs.SUCCESS_OK, "[%s] Converse request for aborted request successfully terminated" % data['id']) + return + self._log_converse_response(resp, id) + if resp.error.code != code_pb2.OK: + self._send_event('warn', defs.TRY_AGAIN, "Error occurred while decoding response audio: %s" % resp.error.message, data['id']) + break + if resp.result.spoken_request_text: + self._send_event('debug', defs.SUCCESS_OK, "Received response transcript from Google Assistant", data['id']) + data['response'] = resp.result.spoken_request_text + break + self._send_event('info', defs.SUCCESS_OK, "Dialogue with Google Assistant (STT) terminated (response audio)", data['id']) + except Exception as e: + if response_stt: + self._send_event('error', defs.TRY_AGAIN, "Something went terribly wrong while decoding assistant response!!! Exception: %s" % e.message, data['id']) + else: + self._send_event('warn', defs.TRY_AGAIN, "Something went terribly wrong while decoding assistant response!!! Exception: %s" % e.message, data['id']) + + if response_stt: + data['result']['hypotheses'] = [{'transcript': data['response'], 'confidence': 1}] + if self._send_event('final_result', defs.SUCCESS_OK, json.dumps(data), data['id']): + self.abort(verbose=False, eos=True) + + def abort(self, verbose=True, eos=True): + self.state = STATE_CREATED + self.input_buffer = [] + self.cntxt = None + if eos: + self._send_event('eos', defs.SUCCESS_OK, str(self.id)) + if verbose: + self._send_event('warn', defs.DECODER_ABORTED, "Decoder was reset and all data was cleared!!!") + self.id = None + + def __init__(self): + self.send_event_lock = threading.Lock() + self.id = None + self.abort(verbose=False, eos=False) + + def stop(self): + if self.state == STATE_LISTENING : + self._send_event('debug', defs.SUCCESS_OK, "Received end of stream. Finishing up...") + self.state = STATE_EOS_RECEIVED + return + if self.state == STATE_CREATED or self.state == STATE_READY: + self._send_event('warn', defs.NOT_IN_ORDER, "End of stream received before decoding was started!!! Ignoring message...") + return + self._send_event('warn', defs.DATA_AFTER_EOS, "Duplicate end of stream received!!! Ignoring message...") + + def push(self, data): + if self.state == STATE_LISTENING : + self._send_event('debug', defs.SUCCESS_OK, "Pushing audio chunk of size %d to buffer..." % len(data)) + self.input_buffer.append(data) + return + if self.state == STATE_CREATED or self.state == STATE_READY: + self._send_event('warn', defs.NOT_IN_ORDER, "Audio data received before decoding was started!!! Ignoring data...") + return + self._send_event('warn', defs.DATA_AFTER_EOS, "Audio data received after end of stream!!! Ignoring data...") + return + + def start(self): + if self.state == STATE_READY : + self.request_generator = threading.Thread(target=self._converse_req_gen) + self.request_generator.daemon = True + self.request_generator.start() + return + if self.state == STATE_CREATED : + self._send_event('warn', defs.NOT_IN_ORDER, "Recieved start decoding command before user details!!! Ignoring command...") + return + self._send_event('warn', defs.NOT_IN_ORDER, "Received start decoding command while decoding!!! Ignoring command...", self.id) + + @retry(reraise=True, stop=stop_after_attempt(3), wait=wait_random(min=1, max=3)) + def _authorise(self, credentials): + # Refresh OAuth 2.0 access token. + self._send_event('debug', defs.SUCCESS_OK, "Refreshing user access token...") + http_request = google.auth.transport.requests.Request() + credentials.refresh(http_request) + + # Create an authorized gRPC channel. + self._send_event('debug', defs.SUCCESS_OK, "Connecting to %s..." % ASSISTANT_API_ENDPOINT) + grpc_channel = google.auth.transport.grpc.secure_authorized_channel(credentials, http_request, ASSISTANT_API_ENDPOINT) + + # Create Google Assistant API gRPC client. + self.assistant = embedded_assistant_pb2.EmbeddedAssistantStub(grpc_channel) + + def request_id(self, id): + self.id = id + + def user(self, user): + if not self.id: + self._send_event('warn', defs.NOT_IN_ORDER, "Received user details before request identifier!!! Ignoring user details...") + return + if self.state == STATE_READY or self.state == STATE_CREATED : + self.assistant = None + # Load OAuth 2.0 credentials. + self._send_event('debug', defs.SUCCESS_OK, "Loading user credentials...") + try: + with open(CREDENTIALS_DIR + "/" + user + ".json", 'r') as f: + credentials = google.oauth2.credentials.Credentials(token=None, **json.load(f)) + except Exception as e: + try: + os.remove(CREDENTIALS_DIR + "/" + user + ".json") + except: + pass + self._send_event('error', defs.NOT_AUTHORISED, "User not connected with Google Assistant!!! Exception: %s" % str(e)) + return + + self._send_event('debug', defs.SUCCESS_OK, "Authenticating user...") + try: + self._authorise(credentials) + self.state = STATE_READY + except Exception as e: + self._send_event('error', defs.AUTH_FAILED, "Error occurred while authenticating user!!! Exception: %s" % str(e)) + return + self._send_event('warn', defs.NOT_IN_ORDER, "Received user details while decoding audio!!! Ignoring user details...") + + def context(self, cntxt): + if self.state == STATE_READY or self.state == STATE_CREATED : + if not cntxt: + self.cntxt = None + self._send_event('debug', defs.SUCCESS_OK, "Succesfully cleared context...", self.id) + return + if cntxt.endswith("="): + self.cntxt = base64.b64decode(cntxt) + self._send_event('debug', defs.SUCCESS_OK, "Succesfully set context...", self.id) + return + self._send_event('warn', defs.NOT_IN_ORDER, "Invalid context!!! Ignoring message context", self.id) + self._send_event('warn', defs.NOT_IN_ORDER, "Received message context while decoding audio!!! Ignoring message context...", self.id) + return + +@click.command() +@click.option('--port', '-p', required=True, type=click.INT, metavar='', help='Port on which we should run thrift server') +def main(port): + handler = SpeechDecoder() + processor = ASRThriftService.Processor(handler) + transport = TSocket.TServerSocket(port=port) + tfactory = TTransport.TBufferedTransportFactory() + pfactory = TBinaryProtocol.TBinaryProtocolFactory() + + server = TServer.TSimpleServer(processor, transport, tfactory, pfactory) + + handler._send_event('debug', defs.SUCCESS_OK, "Starting Google Assistant speech decoder on port %u..." % port) + server.serve() + +if __name__ == "__main__": + main() diff --git a/lucida/speechrecognition/decoders/googleassist/setup.sh b/lucida/speechrecognition/decoders/googleassist/setup.sh new file mode 100755 index 000000000..d65b90f3f --- /dev/null +++ b/lucida/speechrecognition/decoders/googleassist/setup.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +LUCIDA_ROOT="`pwd`" +LUCIDA_ROOT="${LUCIDA_ROOT%/speechrecognition/decoders/googleassist}" + +if [ -f 'configuration.py' ]; then + if [ -f '.config' ]; then + source ./.config + fi + set -e + source ./configuration.py + set +e + echo "# ----- LAST STABLE CONFIGURATION ----- #" > .config + echo "# DO NOT EDIT THIS FILE BY HAND -- YOUR CHANGES WILL BE OVERWRITTEN" >> .config + echo "LUCIDA_ROOT=\"$LUCIDA_ROOT\"" >> .config + echo "CREDENTIALS_DIR=\"$CREDENTIALS_DIR\"" >> .config + echo "GA_CLIENT_SECRET=\"$GA_CLIENT_SECRET\"" >> .config + echo "FORCE_RESPONSE_STT=$FORCE_RESPONSE_STT" >> .config +else + echo "You'll need to answer a few questions for first time setup. To change any of these options see configuration.py. To reset delete configuration.py" + rm -f .config +fi + + +echo "#" > configuration.py +echo "# Configuration file for Google Assistant Speech to Text microservice" >> configuration.py +echo "#" >> configuration.py +echo "" >> configuration.py +echo "# NOTE: If you manually update the file keep in mind that this is both a python and bash file. Take care of the syntax accordingly" >> configuration.py + +while [ -z "$CREDENTIALS_DIR" ] || [ -z "$GA_CLIENT_SECRET" ]; do + echo "" + secrets=($(find "`pwd`" -type f -name "*.json" | while read file; do grep "$file" -e "project_id" 2>&1 1>/dev/null; if [ $? -eq 0 ]; then echo "$file"; fi; done)) + if [ -d "$LUCIDA_ROOT/questionanswering/googleassist" ]; then + secrets=(${secrets[@]} $(find "$LUCIDA_ROOT/questionanswering/googleassist" -type f -name "*.json" | while read file; do grep "$file" -e "project_id" 2>&1 1>/dev/null; if [ $? -eq 0 ]; then echo "$file"; fi; done)) + if [ -f "$LUCIDA_ROOT/questionanswering/googleassist/configuration.py" ]; then + set -e + source "$LUCIDA_ROOT/questionanswering/googleassist/configuration.py" + set +e + if [ ! -z "$CREDENTIALS_DIR" ] && [ -d "$CREDENTIALS_DIR" ] && [ ! -z "$GA_CLIENT_SECRET" ] && [ -f "$GA_CLIENT_SECRET"]; then + echo "Successfully loaded credentials directory and Google client secret from Google Asssitant question answering microservice" + break + fi + fi + fi + GA_CLIENT_SECRET= + CREDENTIALS_DIR= + mkdir -p auth + CREDENTIALS_DIR=`pwd`"/auth" + if [ ! -z "$secrets" ]; then + echo "The following possible credentials were found in the Google Assitant directory. Select which one you want to use. You can also hit 0 to type in a custom path. (0-"${#secrets[*]}")" + echo "" + count=1 + for secret in ${secrets[@]}; do echo "[$count] $secret"; count=$(($count+1)); done + echo "[0] Enter a custom path" + while [ -z "$GA_CLIENT_SECRET" ]; do + echo "" + printf "Enter your choice [1]: " + read response + if [ -z "$response" ]; then response=1; fi + if ! [[ "$response" =~ ^[0-9]+$ ]] ; then + echo "Choice should be a number!! Try again..." + continue + fi + if [ "$response" -eq 0 ]; then + printf "Enter path to JSON file containing credentials: " + read response + response=`echo "$response" | sed s#^~/#$HOME/#` + if [ -f "$response" ]; then + grep "$response" -e "project_id" 2>&1 1>/dev/null + if [ $? -eq 0 ]; then + GA_CLIENT_SECRET=`pwd`"/auth/"$(basename "$response") + if [[ $response != $GA_CLIENT_SECRET ]]; then + cp -f "$response" auth 2>&1 1>/dev/null + fi + else + echo "Not a valid client secret: $response" + fi + else + echo "No such file: $response" + fi + elif [ $(($response-1)) -lt ${#secrets[*]} ]; then + response=${secrets[$(($response-1))]} + if [ -f "$response" ]; then + GA_CLIENT_SECRET=`pwd`"/auth/"$(basename "$response") + if [[ $response != $GA_CLIENT_SECRET ]]; then + cp -f "$response" auth 2>&1 1>/dev/null + fi + else + echo "File $response no longer exists!!! Please try a different choice.." + fi + else + echo "Invalid choice!!! Please try again.." + fi + done + else + echo "No possible credential was found in the Google Assitant speech decoder directory. If you have one provide its path otherwise create one as explained in README" + while [ -z "$GA_CLIENT_SECRET" ]; do + printf "Enter path to JSON file containing credentials: " + read response + response=`echo "$response" | sed s#^~/#$HOME/#` + if [ -f "$response" ]; then + grep "$response" -e "project_id" 2>&1 1>/dev/null + if [ $? -eq 0 ]; then + GA_CLIENT_SECRET=`pwd`"/auth/"$(basename "$response") + if [[ $response != $GA_CLIENT_SECRET ]]; then + cp -f "$response" auth 2>&1 1>/dev/null + fi + else + echo "Not a valid client secret: $response" + fi + else + echo "No such file: $response" + fi + done + fi +done + +echo "" >> configuration.py +echo "# Lucida root directory. This is the directory which contains commandcenter. This will be shared between QA and STT microservices" >> configuration.py +echo "LUCIDA_ROOT=\"$LUCIDA_ROOT\"" >> configuration.py +echo "" >> configuration.py +echo "# Directory used to store Google Assistant credentials for the client and users. This will be shared between QA and STT microservices" >> configuration.py +echo "CREDENTIALS_DIR=\"$CREDENTIALS_DIR\"" >> configuration.py +echo "" >> configuration.py +echo "# Path to JSON file containing Google client credentials. This will be shared between QA and STT microservices" >> configuration.py +echo "GA_CLIENT_SECRET=\"$GA_CLIENT_SECRET\"" >> configuration.py + +if [ -z "$FORCE_RESPONSE_STT" ]; then + echo "" + echo "Do you want to force a text transcript for Google Assistant response? This may be useful later when features are added to Lucida (y/n)" + printf "FORCE_RESPONSE_STT [n]: " + read response + if [ -z "$respone" ]; then response="n"; fi + if [[ "$response" == "n" ]] || [[ "$response" == "N" ]]; then + FORCE_RESPONSE_STT=False + else + FORCE_RESPONSE_STT=True + fi +fi + +echo "" >> configuration.py +echo "# Do you want to force a text transcript for Google Assistant response? This may be useful later when features are added to Lucida (y/n)" >> configuration.py +echo "FORCE_RESPONSE_STT=$FORCE_RESPONSE_STT" >> configuration.py +echo "" + +rm -f .config diff --git a/lucida/speechrecognition/decoders/templates/python/conf b/lucida/speechrecognition/decoders/templates/python/conf new file mode 100755 index 000000000..1cb4919f8 --- /dev/null +++ b/lucida/speechrecognition/decoders/templates/python/conf @@ -0,0 +1,5 @@ +#!/bin/bash + +# This should print configuration JSON and nothing else. This JSON is passed as it is to conf function of decoder + +echo "{}" diff --git a/lucida/speechrecognition/decoders/templates/python/decoder b/lucida/speechrecognition/decoders/templates/python/decoder new file mode 100755 index 000000000..f50bef0bf --- /dev/null +++ b/lucida/speechrecognition/decoders/templates/python/decoder @@ -0,0 +1,162 @@ +#!/usr/bin/env python + +""" +Created on Jul 13 2017 + +@author: kamal1210 +""" +from __future__ import print_function + +import os, sys, time +import threading, json +import click + +from thrift.transport import TSocket +from thrift.transport import TTransport +from thrift.protocol import TBinaryProtocol +from thrift.server import TServer + +sys.path.insert(0, "../../include") +sys.path.insert(0, "../../include/gen-py") + +from asrthriftservice import ASRThriftService +import common + +STATE_CREATED = 0 +STATE_READY = 1 +STATE_LISTENING = 2 +STATE_EOS_RECEIVED = 3 +STATE_WAITING = 4 + +class SpeechDecoder(object): + + # Writes event messages to output. See README in speechrecognition/decoders for list of all available events. + def _send_event(self, type, status, data): + message = json.dumps({'event': type, 'status': status, 'data': data}) + try: + print(message) + sys.stdout.flush() + except IOError: + pass + + # Processes audio data. This function reads audio data from input_buffer and processes it + def processor(self): + self.state = STATE_LISTENING + id = self.id + self._send_event("debug", common.SUCCESS_OK, "Starting to process audio data") + + # Read continously while listeneing + while self.state == STATE_LISTENING: + if len(self.input_buffer) > 0: + # TODO: process self.input_buffer.pop(0) and send interim results if the thread is not aborted + if id == self.id: + self._send_event("interim_result", common.SUCCESS_OK, "INTERIM_TRANSCRIPT_GOES_HERE") + + # Read till end once EOS received + while len(self.input_buffer) > 0: + # TODO: process self.input_buffer.pop(0) and send interim results if the thread is not aborted + if id == self.id: + self._send_event("interim_result", common.SUCCESS_OK, "INTERIM_TRANSCRIPT_GOES_HERE") + break + + # Check if this thread has been aborted + if not id == self.id: + self._send_event("debug", common.SUCCESS_OK, "An old decoder thread was aborted succesfully") + return + + self.state = STATE_WAITING + self._send_event("final_result", common.SUCCESS_OK, json.dumps(dict(transcript="FINAL_TRANSCRIPT_GOES_HERE",entities="OPTIONAL. SEE README",response="OPTIONAL. SEE README"))) + self._send_event("debug", common.SUCCESS_OK, "Closed post data stream. Waiting for response...") + self.state = STATE_READY + + # This function aborts processing discarding all pending output + def abort(self): + if self.state == STATE_READY : + self._send_event("warn", common.WARN_GENERIC, "Abort message received when decoder is idle!!! Ignoring message...") + return + if self.state == STATE_CREATED : + self._send_event("warn", common.WARN_GENERIC, "Abort message received when decoder is not configured!!! Ignoring message...") + return + self.id = "" + self.state = STATE_READY + self.input_buffer = [] + self.context = "" + self._send_event("warn", common.WARN_ABORTED, "Decoder was reset!!! All data cleared") + + # Class constructor + def __init__(self): + self.send_event_lock = threading.Lock() + self.input_buffer = [] + self.state = STATE_CREATED + self.context = "" + self.id = "" + + # EOS handler + def stop(self): + if self.state == STATE_LISTENING : + self._send_event("debug", common.SUCCESS_OK, "Received End-Of-Stream. Finishing up...") + self.state = STATE_EOS_RECEIVED + return + self._send_event("warn", common.WARN_GENERIC, "End-Of-Stream message received while not listening!!! Ignoring message...") + + # Receives audio data. This should just store data in input_buffer and return + def push(self, data): + if self.state == STATE_LISTENING : + self._send_event("debug", common.SUCCESS_OK, "Pushing audio chunk of size %d to post buffer..." % len(data)) + self.input_buffer.append(data) + return + self._send_event("warn", common.WARN_GENERIC, "Push message received while not listening!!! Ignoring message...") + + # This should start decoder thread and update current content id + def start(self): + if self.state == STATE_READY : + self.id = uuid.uuid4() + self.decoder_thread = threading.Thread(target=self.processor) + self.decoder_thread.daemon = True + self.decoder_thread.start() + return + if self.state == STATE_CREATED : + self._send_event("warn", common.WARN_GENERIC, "Start requested before configuration!!! Ignoring message...") + return + self._send_event("warn", common.WARN_GENERIC, "Start message received while decoding!!! Ignoring message...") + + # This should handle update context. For those who need it. + def context(self, cntxt): + if self.state == STATE_READY : + self.context = cntxt + if self.state == STATE_CREATED : + self._send_event("warn", common.WARN_GENERIC, "Context received before configuration!!! Ignoring message...") + return + self._send_event("warn", common.WARN_GENERIC, "Context received while decoding!!! Ignoring message...") + + # This handles decoder configuration. Whatever conf executable prints is passed onto this function as message. Return True or False based on whether configuration was successful + def conf(self, message): + if self.state == STATE_CREATED or self.state == STATE_READY : + try: + msg = json.loads(message) + # TODO: Configure Decoder + except Exception as e: + self._send_event("error", common.ERROR_CRITICAL, "Something went terribly wrong while configuring decoder!!! Please raise an issue with the maintainer... Exception: %s" % e.message) + return False + self.state = STATE_READY + self._send_event("debug", common.SUCCESS_OK, "Decoder successfully configured") + return True + self._send_event("warn", common.WARN_GENERIC, "Configuration message received while decoding!!! Ignoring message...") + return False + +# Main function starts thrift server on specified port +@click.command() +@click.option('--port', '-p', required=True, type=click.INT, metavar='', help='Port on which we should run thrift server') +def main(port): + handler = SpeechDecoder() + processor = ASRThriftService.Processor(handler) + transport = TSocket.TServerSocket(port=port) + tfactory = TTransport.TBufferedTransportFactory() + pfactory = TBinaryProtocol.TBinaryProtocolFactory() + + server = TServer.TSimpleServer(processor, transport, tfactory, pfactory) + server.serve() + +if __name__ == "__main__": + main() + diff --git a/lucida/speechrecognition/decoders/wit/.gitignore b/lucida/speechrecognition/decoders/wit/.gitignore new file mode 100644 index 000000000..ff3b2c603 --- /dev/null +++ b/lucida/speechrecognition/decoders/wit/.gitignore @@ -0,0 +1,2 @@ +configuration.py +.config diff --git a/lucida/speechrecognition/decoders/wit/Makefile b/lucida/speechrecognition/decoders/wit/Makefile new file mode 100644 index 000000000..52fe9d772 --- /dev/null +++ b/lucida/speechrecognition/decoders/wit/Makefile @@ -0,0 +1,10 @@ +.PHONY: all setup clean + +all: setup + +setup: + ./setup.sh + +clean: + rm -rf configuration.py + rm -rf configuration.pyc diff --git a/lucida/speechrecognition/decoders/wit/decoder b/lucida/speechrecognition/decoders/wit/decoder new file mode 100755 index 000000000..942078623 --- /dev/null +++ b/lucida/speechrecognition/decoders/wit/decoder @@ -0,0 +1,178 @@ +#!/usr/bin/env python + +""" +Created on Jun 14 2017 + +@author: kamal1210 +""" +from __future__ import print_function + +from configuration import * + +import os, sys, time +import threading, json, uuid +import requests, click + +from thrift.transport import TSocket +from thrift.transport import TTransport +from thrift.protocol import TBinaryProtocol +from thrift.server import TServer + +sys.path.insert(0, "include") +sys.path.insert(0, "include/gen-py") + +from asrthriftservice import ASRThriftService +import defs + +STATE_CREATED = 0 +STATE_READY = 1 +STATE_LISTENING = 2 +STATE_EOS_RECEIVED = 3 +STATE_WAITING = 4 + +class SpeechDecoder(object): + + def _send_event(self, type, status, data, id=None): + if id: + if not id == self.id: + return False + if not (type == "final_result" or type == "eos"): + data = "[WIT] " + data + message = json.dumps({'event': type, 'status': status, 'data': data}) + try: + print(message) + sys.stdout.flush() + return True + except IOError: + pass + return False + + def _post_data_gen(self, id): + self._send_event("debug", defs.SUCCESS_OK, "Opened post data stream for request audio", id) + while self.state == STATE_LISTENING: + if len(self.input_buffer) > 0: + yield self.input_buffer.pop(0) + while len(self.input_buffer) > 0 and self.state == STATE_EOS_RECEIVED: + yield self.input_buffer.pop(0) + self.state = STATE_WAITING + self._send_event("debug", defs.SUCCESS_OK, "Closed post data stream for request audio. Waiting for transcript...") + + def _post_req_gen(self): + self.state = STATE_LISTENING + data = {'id': self.id, 'result': {'hypotheses': [], 'final': True}, 'context_in': self.cntxt} + + self._send_event("debug", defs.SUCCESS_OK, "Initiating post request to WIT.AI", data['id']) + try: + response = requests.post(self.api_host, headers=self.headers, data=self._post_data_gen(data['id']), timeout=(5, self.request_timeout)) + message = json.loads(response.text) + response.raise_for_status() + data['result']['hypotheses'] = [{'transcript': message['_text'], 'confidence': 1}] + except requests.exceptions.ConnectionError: + self._send_event("error", defs.TRY_AGAIN, "Could not connect to WIT.AI!!! Are you connected to the internet?", data['id']) + except requests.exceptions.Timeout: + self._send_event("error", defs.TRY_AGAIN, "Connection to WIT.AI timed out!!!", data['id']) + except requests.exceptions.HTTPError: + self._send_event("error", defs.OTHER, message['error'], data['id']) + except Exception as e: + self._send_event("error", defs.FATAL, "Something went terribly wrong while configuring decoder!!! Exception: %s" % e.message) + + if not data['id'] == self.id: + self._send_event("debug", defs.SUCCESS_OK, "[%s] Converse request for aborted request successfully terminated" % data['id']) + return + + if self._send_event("final_result", defs.SUCCESS_OK, json.dumps(data), data['id']): + self.abort(verbose=False, eos=True) + + def abort(self, verbose=True, eos=True): + self.state = STATE_CREATED + self.input_buffer = [] + self.cntxt = None + if eos: + self._send_event('eos', defs.SUCCESS_OK, str(self.id)) + if verbose: + self._send_event('warn', defs.DECODER_ABORTED, "Decoder was reset and all data was cleared!!!") + self.id = None + + def __init__(self): + self.send_event_lock = threading.Lock() + self.headers = {'Authorization': 'Bearer ' + WIT_API_TOKEN, 'Content-Type': 'audio/raw;encoding=signed-integer;bits=16;rate=16000;endian=little', 'Accept': 'application/json', 'Transfer-Encoding': 'chunked'} + self.api_host = "https://api.wit.ai/speech?v=" + WIT_API_VERSION + self.request_timeout = WIT_REQUEST_TIMEOUT + self.id = None + self.abort(verbose=False, eos=False) + + def stop(self): + if self.state == STATE_LISTENING : + self._send_event('debug', defs.SUCCESS_OK, "Received end of stream. Finishing up...") + self.state = STATE_EOS_RECEIVED + return + if self.state == STATE_CREATED or self.state == STATE_READY: + self._send_event('warn', defs.NOT_IN_ORDER, "End of stream received before decoding was started!!! Ignoring message...") + return + self._send_event('warn', defs.DATA_AFTER_EOS, "Duplicate end of stream received!!! Ignoring message...") + return False + + def push(self, data): + if self.state == STATE_LISTENING : + self._send_event('debug', defs.SUCCESS_OK, "Pushing audio chunk of size %d to buffer..." % len(data)) + self.input_buffer.append(data) + return + if self.state == STATE_CREATED or self.state == STATE_READY: + self._send_event('warn', defs.NOT_IN_ORDER, "Audio data received before decoding was started!!! Ignoring data...") + return + self._send_event('warn', defs.DATA_AFTER_EOS, "Audio data received after end of stream!!! Ignoring data...") + + def start(self): + if self.state == STATE_READY : + self.request_generator = threading.Thread(target=self._post_req_gen) + self.request_generator.daemon = True + self.request_generator.start() + return + if self.state == STATE_CREATED : + self._send_event("warn", common.WARN_GENERIC, "Start requested before configuration!!! Ignoring message...") + return + self._send_event("warn", common.WARN_GENERIC, "Start message received while decoding!!! Ignoring message...") + + def request_id(self, id): + self.id = id + + def user(self, user): + if not self.id: + self._send_event('warn', defs.NOT_IN_ORDER, "Received user details before request identifier!!! Ignoring user details...") + return + if self.state == STATE_READY or self.state == STATE_CREATED : + self.user = user + self.state = STATE_READY + return + self._send_event('warn', defs.NOT_IN_ORDER, "Received user details while decoding audio!!! Ignoring user details...") + + + def context(self, cntxt): + if self.state == STATE_READY or self.state == STATE_CREATED : + if not cntxt: + self.cntxt = None + self._send_event('debug', defs.SUCCESS_OK, "Succesfully cleared context...") + return + if cntxt.endswith("="): + self.cntxt = base64.b64decode(cntxt) + self._send_event('debug', defs.SUCCESS_OK, "Succesfully set context...") + return + self._send_event('warn', defs.NOT_IN_ORDER, "Invalid context!!! Ignoring message context") + self._send_event('warn', defs.NOT_IN_ORDER, "Received message context while decoding audio!!! Ignoring message context...") + return + +@click.command() +@click.option('--port', '-p', required=True, type=click.INT, metavar='', help='Port on which we should run thrift server') +def main(port): + handler = SpeechDecoder() + processor = ASRThriftService.Processor(handler) + transport = TSocket.TServerSocket(port=port) + tfactory = TTransport.TBufferedTransportFactory() + pfactory = TBinaryProtocol.TBinaryProtocolFactory() + + handler._send_event('debug', defs.SUCCESS_OK, "Starting WIT speech decoder on port %u..." % port) + server = TServer.TSimpleServer(processor, transport, tfactory, pfactory) + server.serve() + +if __name__ == "__main__": + main() diff --git a/lucida/speechrecognition/decoders/wit/setup.sh b/lucida/speechrecognition/decoders/wit/setup.sh new file mode 100755 index 000000000..292ad895f --- /dev/null +++ b/lucida/speechrecognition/decoders/wit/setup.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +if [ -f 'configuration.py' ]; then + if [ -f '.config' ]; then + source ./.config + fi + set -e + source ./configuration.py + set +e + echo "# ----- LAST STABLE CONFIGURATION ----- #" > .config + echo "# DO NOT EDIT THIS FILE BY HAND -- YOUR CHANGES WILL BE OVERWRITTEN" >> .config + echo "WIT_API_VERSION=\"$WIT_API_VERSION\"" >> .config + echo "WIT_API_TOKEN=\"$WIT_API_TOKEN\"" >> .config + echo "WIT_REQUEST_TIMEOUT=$WIT_REQUEST_TIMEOUT" >> .config +else + echo "You'll need to answer a few questions for first time setup. To change any of these options see configuration.py. To reset delete configuration.py" + rm -f .config +fi + +while [ 1 ]; do + echo "#" > configuration.py + echo "# Configuration file for Lucida's WIT.AI Speech Decoder" >> configuration.py + echo "#" >> configuration.py + echo "" >> configuration.py + echo "# NOTE: If you manually update the file keep in mind that this is both a python and bash file. Take care of the syntax accordingly" >> configuration.py + + if [ -z "$WIT_API_VERSION" ]; then + while [ 1 ]; do + echo "" + echo "Version of WIT.AI API to use. Latest version against which this script is tested is 27/06/2017." + printf "WIT_API_VERSION [27/06/2017]: " + read WIT_API_VERSION + if [ -z "$WIT_API_VERSION" ]; then WIT_API_VERSION="27/06/2017"; fi + echo "$WIT_API_VERSION" | grep -Poe "^[\d/]+$" > /dev/null + if [ "$?" -ne 0 ]; then + echo "[ERROR] Version string should be all numbers and '/'!!!" + continue + fi + break + done + fi + + if [ -z "$WIT_API_TOKEN" ]; then + while [ 1 ]; do + echo "" + echo "This may be the server access or client access token found on settings page for your WIT.AI application. See README" + printf "WIT_API_TOKEN: " + read WIT_API_TOKEN + echo "$WIT_API_TOKEN" | grep -Poe "^[A-Z0-9]+$" > /dev/null + if [ "$?" -ne 0 ]; then + echo "[ERROR] Token string should be all capital letters and numbers!!!" + continue + fi + break + done + fi + + while [ 1 ]; do + echo "" + echo "Verifying version and token strings..." + echo "" + RESPONSE=`curl -s -w "%{http_code}" -H "Authorization: Bearer $WIT_API_TOKEN" -H "Accept: application/json" "https://api.wit.ai/message?v=$WIT_API_VERSION&q=hello"` + echo "$RESPONSE" | grep -Poe "200$" > /dev/null + if [ "$?" -ne 0 ]; then + RESPONSE=`echo "$RESPONSE" | grep -Poe "(?<=\"error\":\")[^\"]+"` + if [ -z "$RESPONSE" ]; then + echo "Could not connect to WIT.AI!!! Are you connected to the interenet???" + echo "Will try again in five seconds..." + sleep 5 + continue + else + echo "[ERROR] $RESPONSE" + WIT_API_VERSION="" + WIT_API_TOKEN="" + continue 2 + fi + fi + echo "[INFO] Verified version and token strings." + break + done + + echo "" >> configuration.py + echo "# This may be the server access or client access token found on settings page for your WIT.AI application. See README" >> configuration.py + echo "WIT_API_TOKEN=\"$WIT_API_TOKEN\"" >> configuration.py + echo "" >> configuration.py + echo "# Version of WIT.AI API to use. Latest version against which this script is tested is 14/06/2017." >> configuration.py + echo "WIT_API_VERSION=\"$WIT_API_VERSION\"" >> configuration.py + + if [ -z "$WIT_REQUEST_TIMEOUT" ]; then + while [ 1 ]; do + echo "" + echo "Maximum tolerable time in seconds between sending last chunk of audio data and recieving results" + printf "WIT_REQUEST_TIMEOUT [30]: " + read WIT_REQUEST_TIMEOUT + if [ -z "$WIT_REQUEST_TIMEOUT" ]; then WIT_REQUEST_TIMEOUT="30"; fi + echo "$WIT_REQUEST_TIMEOUT" | grep -Poe "^\d+$" > /dev/null + if [ "$?" -ne 0 ]; then + echo "[ERROR] Request timeout should be a number!!!" + continue + fi + break + done + fi + echo "" >> configuration.py + echo "# Maximum tolerable time in seconds between sending last chunk of audio data and recieving results" >> configuration.py + echo "WIT_REQUEST_TIMEOUT=$WIT_REQUEST_TIMEOUT" >> configuration.py + break +done +rm -f .config diff --git a/lucida/speechrecognition/defs.yaml b/lucida/speechrecognition/defs.yaml new file mode 100644 index 000000000..b45d5b7cb --- /dev/null +++ b/lucida/speechrecognition/defs.yaml @@ -0,0 +1,15 @@ +# Common include variables for decoders +# NOTE: Currently you need to make speech recognition and all its modules again by typing make all followed by make decoders after modifying this file + +# Success status +SUCCESS_OK : 0 + +# Types of errors/warnings +OTHER : 1 +TRY_AGAIN : 2 +NOT_AUTHORISED : 3 +AUTH_FAILED : 4 +NOT_IN_ORDER : 5 +DATA_AFTER_EOS : 6 +FATAL : 7 +DECODER_ABORTED: 8 diff --git a/lucida/speechrecognition/gen_include.sh b/lucida/speechrecognition/gen_include.sh new file mode 100755 index 000000000..35ab8d7cc --- /dev/null +++ b/lucida/speechrecognition/gen_include.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +cd "`dirname $(readlink -f $0)`" +rm -rf include +mkdir -p include +cat defs.yaml | grep -Poe "^[A-Z_]+\s*:\s*\d+$" | while read line; do name=`echo $line | grep -Poe "^[A-Z_]+"`; value=`echo $line | grep -Poe "\d+$"`; + # Create Python header + echo "$name = $value" >> include/defs.py + # Create C header + echo "#define $name $value" >> include/defs.h +done + +thrift -o include --gen c_glib asrthriftservice.thrift +thrift -o include --gen py asrthriftservice.thrift + +rm -f include/__init__.py diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/.gitignore b/lucida/speechrecognition/kaldi_gstreamer_asr/.gitignore deleted file mode 100755 index 858a701bf..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -kaldi -tmp -test/models/english/fisher_nnet_a_gpu_online -test/models/english/tedlium_nnet_ms_sp_online -lucida-demos diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/Dockerfile b/lucida/speechrecognition/kaldi_gstreamer_asr/Dockerfile deleted file mode 100644 index b9a44456d..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -#### -# based on the lucida base image -FROM lucida_base - -#### environment variables -ENV LUCIDAROOT /usr/local/lucida/lucida -ENV LD_LIBRARY_PATH /usr/local/lib - -## install ASR -RUN mkdir -p /usr/local/lucida/lucida/speechrecognition/kaldi_gstreamer_asr -ADD . /usr/local/lucida/lucida/speechrecognition/kaldi_gstreamer_asr -WORKDIR "/usr/local/lucida/lucida/speechrecognition/kaldi_gstreamer_asr" -RUN /bin/bash install_kaldi.sh diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/LICENSE b/lucida/speechrecognition/kaldi_gstreamer_asr/LICENSE deleted file mode 100644 index f2637bc5c..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/LICENSE +++ /dev/null @@ -1,23 +0,0 @@ -Copyright (c) 2014, alumae -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/Makefile b/lucida/speechrecognition/kaldi_gstreamer_asr/Makefile deleted file mode 100644 index 77bb38c1a..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -all: - ./install_kaldi.sh - -start_master_server: - gnome-terminal -x bash -c "source ${PWD}/../../../tools/python_2_7_12/bin/activate && python kaldigstserver/master_server.py --port=8081; read -n1" - -start_server: - ./simple_start.sh - -start_test: - gnome-terminal -x bash -c "source ${PWD}/../../../tools/python_2_7_12/bin/activate && python kaldigstserver/client.py -r 8192 test/data/bill_gates-TED.mp3; read -n1" - -.PHONY: all start_server start_test diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/README.md b/lucida/speechrecognition/kaldi_gstreamer_asr/README.md deleted file mode 100644 index 5eb9b8bca..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/README.md +++ /dev/null @@ -1,394 +0,0 @@ -Kaldi GStreamer server -====================== -[![GitHub license](https://img.shields.io/github/license/alumae/kaldi-gstreamer-server.svg)](https://github.com/alumae/kaldi-gstreamer-server/blob/master/LICENSE) -[![Code Climate](https://img.shields.io/codeclimate/github/alumae/kaldi-gstreamer-server.svg)](https://codeclimate.com/github/alumae/kaldi-gstreamer-server) - -This is a real-time full-duplex speech recognition server, based on -the Kaldi toolkit and the GStreamer framework and implemented in Python. - -Notes for Lucida Users -====================== - -Build --------- - -``` -make -``` - -It runs `install_kaldi.sh` only if the directory `kaldi` does not exist. -Therefore, if previous intallation fails, please remove this directory and compile again. -It may take up to 4 hours to complete. - -Run --------- - -Start the master server followed by the worker: - -``` -make start_master_server -make start_server -``` - -Wait until you see `Opened websocket connection to server` from the worker.` - -Test --------- - -Test the installtion process: - -``` -make start_test -``` - -If you see results popping up, it should be ready to go. - -Text to Speech --------- - -In addition to the speech recognition service, we provide a text-to-speech (TTS) service based on [kaldi idlak](https://github.com/bpotard/idlak). -Currently, it is experimental because our web front end uses the [web APIs](https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesis) to perform voice synthesis. -However, you are welcome to use this service by running the script `install_kaldi_tts.sh`. -It installs both the kaldi toolkit (~4 hours for C++ compilation) and the idlak component (~4 hours for DNN training), and at the end of this process, we provide one way to use it -- -convert text input from command line to audio file saved on disk. - -(End of Notes for Lucida Users) - -Features --------- - - * Full duplex communication based on websockets: speech goes in, partial hypotheses come out (think of Android's voice typing) - * Very scalable: the server consists of a master component and workers; one worker is needed per concurrent recognition session; workers can be - started and stopped independently of the master on remote machines - * Can do speech segmentation, i.e., a long speech signal is broken into shorter segments based on silences - * Supports arbitrarily long speech input (e.g., you can stream live speech into it) - * Supports Kaldi's GMM and "online DNN" models - * Supports rescoring of the recognition lattice with a large language model - * Supports persisting the acoustic model adaptation state between requests - * Supports unlimited set of audio codecs (actually only those supported by GStreamer) - * Supports rewriting raw recognition results using external programs (can be used for converting words to numbers, etc) - * Python, Java, Javascript clients are available - -English demo that uses the server: http://bark.phon.ioc.ee/dictate/ - -Estonian demo: http://bark.phon.ioc.ee/dikteeri/ - -Changelog ---------- - * 2015-12-04: added a link to the Dockerfile. - - * 2015-06-30: server now uses the recently added "full final results" functionality of gst-kaldi-nnet2-online. - Full results can include things like n-best hypotheses, word and phone alignment information, - and possibly other things in the future. You have to upgrade gst-kaldi-nnet2-online (when using this plugin instead of - the GMM-based Kaldi GStreamer plugin) prior to using this. Also added a sample full results post-processing - script `sample_full_post_processor.py` (see `sample_english_nnet2.yaml` on how to use it). - - - -Installation ------------- - -### Docker - -Building Kaldi and all the other packages required by this software can be quite complicated. Instead of building -all the prerequisites manually, one could use the Dockerfile created by José Eduardo Silva: https://github.com/jcsilva/docker-kaldi-gstreamer-server. - -### Requirements - -#### Python 2.7 with the following packages: - - * Tornado 4, see http://www.tornadoweb.org/en/stable/ - * ws4py (0.3.0 .. 0.3.2) - * YAML - * JSON - -*NB!*: The server doesn't work quite correctly with ws4py 0.3.5 because of a bug I reported here: https://github.com/Lawouach/WebSocket-for-Python/issues/152. -Use ws4py 0.3.2 instead. To install ws4py 0.3.2 using `pip`, run: - - pip install ws4py==0.3.2 - -In addition, you need Python 2.x bindings for gobject-introspection libraries, provided by the `python-gi` -package on Debian and Ubuntu. - -#### Kaldi - -Download and compile Kaldi (http://kaldi.sourceforge.net). Also compile the online extensions (`make ext`) -and the Kaldi GStreamer plugin (see `README` in Kaldi's `src/gst-plugin` directory). - -#### Acoustic and language models for Kaldi - -You need GMM-HMM-based acoustic and n-gram language models (actually their FST cascade) for your language. - -Working (but not very accurate) recognition models are available for English and Estonian in the `test/models/` directory. -English models are based on Voxforge acoustic models and the CMU Sphinx 2013 general English trigram language model (http://cmusphinx.sourceforge.net/2013/01/a-new-english-language-model-release/). -The language models were heavily pruned so that the resulting FST cascade would be less than the -100 MB GitHub file size limit. - -*Update:* the server also supports Kaldi's new "online2" online decoder that uses DNN-based acoustic models with i-vector input. See below on -how to use it. According to experiments on two Estonian online decoding setups, the DNN-based models result in about 20% (or more) relatively less -errors than GMM-based models (e.g., WER dropped from 13% to 9%). - - -Running the server ------------------- - -### Running the master server - -The following starts the main server on localhost:8888 - - python kaldigstserver/master_server.py --port=8888 - -### Running workers - - -The master server doesn't perform speech recognition itself, it simply delegates client recognition -requests to workers. You need one worker per recognition session. So, the number of running workers -should be at least the number of potential concurrent recognition sessions. Good thing is that -workers are fully independent and do not even have to be running on the same machine, thus -offering practically unlimited parallelness. - -There are two decoders that a worker can use: based on the Kaldi `onlinegmmdecodefaster` GStreamer plugin -or based on the newer `kaldinnet2onlinedecoder` plugin. The first one supports GMM models, the latter one needs -"online2" DNN-based models with i-vector input. - -To run a worker, first write a configuration file. A sample configuration that uses the English GMM-HMM -models that come with this project is available in `sample_worker.yaml`. A sample worker that uses -"online2" DNN-based models is in `sample_english_nnet2.yaml`. - -#### Using the 'onlinegmmdecodefaster' based worker - -Before starting a worker, make sure that the GST plugin path includes Kaldi's `src/gst-plugin` directory -(which should contain the file `libgstkaldi.so`), something like: - - export GST_PLUGIN_PATH=~/tools/kaldi-trunk/src/gst-plugin - -Test if it worked: - - gst-inspect-1.0 onlinegmmdecodefaster - -The latter should print out information about the Kaldi's GStreamer plugin. - -Now, you can start a worker: - - python kaldigstserver/worker.py -u ws://localhost:8888/worker/ws/speech -c sample_worker.yaml - -The `-u ws://localhost:8890/worker/ws/speech` argument specifies the address of the main server -that the worker should connect to. Make sure you are using the same port as in the server invocation. - -You can start any number of worker processes, just use the same command to start the next workers. - -It might be a good idea to use [supervisord](http://supervisord.org) to start and stop the main server and -several workers. A sample supervisord configuration file is in `etc/english-supervisord.conf`. - -Server usage ------------- - -A sample implementation of the client is in `kaldigstserver/client.py`. - -If you started the server/worker as described above, you should be able to test the installation by invoking: - - python kaldigstserver/client.py -r 32000 test/data/english_test.raw - -Expected output: - - THE. ONE TWO THREE FOUR FIVE SIX SEVEN EIGHT. - -Expected output when using using the DNN-based online models based on Fisher: - - one two or three you fall five six seven eight. yeah. - -The `-r 32000` in the last command tells the client to send audio to the server at 32000 bytes per second. The raw -sample audio file uses a sample rate of 16k with a 16-bit encoding which results in a byterate of 32000. - -You can also send ogg audio: - - python kaldigstserver/client.py -r 4800 test/data/english_test.ogg - -The rate in the last command is 4800. The bit rate of the ogg file is 37.5k, which results in a byte rate of 4800. - - -Using the 'kaldinnet2onlinedecoder' based worker ------------------------------------------------- - -The DNN-based online decoder requires a newer GStreamer plugin that is not in the Kaldi codebase and has to be compiled -seperately. It's available at https://github.com/alumae/gst-kaldi-nnet2-online. Clone it, e.g., under `~/tools/gst-kaldi-nnet2-online`. -Follow the instuctions and compile it. This should result in a file `~/tools/gst-kaldi-nnet2-online/src/libgstkaldionline2.so`. - -Also, download the DNN-based models for English, trained on the TEDLIUM speech corpus and combined with a generic English language model -provided by Cantab Research. Run the `download-tedlium-nnet2.sh` under `test/models` to download the models (attention, 1.5 GB): - - ./test/models/download-tedlium-nnet2.sh - -Before starting a worker, make sure that the GST plugin path includes the path where the `libgstkaldionline2.so` library you compiled earlier -resides, something like: - - export GST_PLUGIN_PATH=~/tools/gst-kaldi-nnet2-online/src - -Test if it worked: - - gst-inspect-1.0 kaldinnet2onlinedecoder - -The latter should print out information about the new Kaldi's GStreamer plugin. - -Now, you can start a worker: - - python kaldigstserver/worker.py -u ws://localhost:8888/worker/ws/speech -c sample_english_nnet2.yaml - -As the acoustic models are trained on TED data, we also test on TED data. The file `test/data/bill_gates-TED.mp3` contains about one -minute of a TED talk by Bill Gates. It's encoded as 64 kb MP3, so let's send it to the server at 64*1024/8=8192 bytes per second: - - python kaldigstserver/client.py -r 8192 test/data/bill_gates-TED.mp3 - -Recognized words should start appearing at the terminal. The final result should be something like: - -> when i was a kid the disaster we worry about most was a nuclear war. that's why we had a bear like this down our basement filled with cans of food and water. nuclear attack came we were supposed to go downstairs hunker down and eat out of that barrel. today the greatest risk of global catastrophe. don't look like this instead it looks like this. if anything kills over ten million people in the next few decades it's most likely to be a highly infectious virus rather than a war. not missiles that microbes now part of the reason for this is that we have invested a huge amount in nuclear deterrence we've actually invested very little in a system to stop an epidemic. we're not ready for the next epidemic. - -Compare that to the original transcript in `test/data/bill_gates-TED.txt`: - -> When I was a kid, the disaster we worried about most was a nuclear war. That's why we had a barrel like this down in our basement, filled with cans of food and water. When the nuclear attack came, we were supposed to go downstairs, hunker down, and eat out of that barrel. -> Today the greatest risk of global catastrophe doesn't look like this. Instead, it looks like this. If anything kills over 10 million people in the next few decades, it's most likely to be a highly infectious virus rather than a war. Not missiles, but microbes. Now, part of the reason for this is that we've invested a huge amount in nuclear deterrents. But we've actually invested very little in a system to stop an epidemic. We're not ready for the next epidemic. - - -#### Retrieving and sending adaptation state #### - -If you use the 'kaldinnet2onlinedecoder' based worker, you can retrieve the adaptation state after the decoding session -finishes, and send the previously retrieved adaptation state when starting a new session. - -The 'kaldinnet2onlinedecoder' worker always sends the adaptation state encoded in a JSON container once the session ends. Client -can store it in a file. This is functionality is implemented by the `client.py`. Assuming that you started the server and a worker as in the last -example, you can do: - - python kaldigstserver/client.py -r 32000 --save-adaptation-state adaptation-state.json test/data/english_test.wav - -The `adaptation-state.json` file will contain something like this: - - {"type": "string+gzip+base64", "value": "eJxlvUuPdEmSHbavXx...", "time": "2014-11-14T11:08:49"} - -As you can see, the adaptation state is not human-readable, it's actually gzipped and base64-encoded text data. - -To start another decoding session using the saved adaptation state, you can do something like this: - - python kaldigstserver/client.py -r 32000 --send-adaptation-state adaptation-state.json test/data/english_test.wav - - - -Alternative usage through a HTTP API ---------------------------------------- - -One can also use the server through a very simple HTTP-based API. This allows to simply send audio via a PUT or POST request -to http://server:port/client/dynamic/recognize and read the JSON ouput. Note that the JSON output is differently structured -than the output of the websocket-based API. This interface is compatible to the one implemented by http://github.com/alumae/ruby-pocketsphinx-server. - -The HTTP API supports chunked transfer encoding which means that server can read and decode an audio stream before it is complete. - -Example: - -Send audio to server: - - curl -T test/data/english_test.wav "http://localhost:8888/client/dynamic/recognize" - -Output: - - {"status": 0, "hypotheses": [{"utterance": "one two or three you fall five six seven eight. [noise]."}], "id": "7851281f-e187-4c24-9b58-4f3a5cba3dce"} - -Send audio using chunked transfer encoding at an audio byte rate; you can see from the worker logs that decoding starts already when the first chunks -have been received: - - curl -v -T test/data/english_test.raw -H "Content-Type: audio/x-raw-int; rate=16000" --header "Transfer-Encoding: chunked" --limit-rate 32000 "http://localhost:8888/client/dynamic/recognize" - -Output (like before): - - {"status": 0, "hypotheses": [{"utterance": "one two or three you fall five six seven eight. yeah."}], "id": "4e4594ee-bdb2-401f-8114-41a541d89eb8"} - - -Websocket-based client-server protocol ----------------------- - -### Opening a session - -To open a session, connect to the specified server websocket address (e.g. ws://localhost:8888/client/ws/speech). -The server assumes by default that incoming audio is sent using 16 kHz, mono, 16bit little-endian format. This can be overriden -using the 'content-type' request parameter. The content type has to be specified using GStreamer 1.0 caps format, -e.g. to send 44100 Hz mono 16-bit data, use: "audio/x-raw, layout=(string)interleaved, rate=(int)44100, format=(string)S16LE, channels=(int)1". -This needs to be url-encoded of course, so the actual request is something like: - - ws://localhost:8888/client/ws/speech?content-type=audio/x-raw,+layout=(string)interleaved,+rate=(int)44100,+format=(string)S16LE,+channels=(int)1 - -Audio can also be encoded using any codec recognized by GStreamer (assuming the needed packages are installed on the server). -GStreamer should recognize the container and codec automatically from the stream, you don't have to specify the content type. -E.g., to send audio encoded using the Speex codec in an Ogg container, use the following URL to open the session (server should -automatically recognize the codec): - - ws://localhost:8888/client/ws/speech - -### Sending audio - -Speech should be sent to the server in raw blocks of data, using the encoding specified when session was opened. -It is recommended that a new block is sent at least 4 times per second (less frequent blocks would increase the recognition lag). -Blocks do not have to be of equal size. - -After the last block of speech data, a special 3-byte ANSI-encoded string "EOS" ("end-of-stream") needs to be sent to the server. This tells the -server that no more speech is coming and the recognition can be finalized. - -After sending "EOS", client has to keep the websocket open to receive recognition results from the server. Server -closes the connection itself when all recognition results have been sent to the client. -No more audio can be sent via the same websocket after an "EOS" has been sent. In order to process a new -audio stream, a new websocket connection has to be created by the client. - -### Reading results - -Server sends recognition results and other information to the client using the JSON format. -The response can contain the following fields: - - * status -- response status (integer), see codes below - * message -- (optional) status message - * result -- (optional) recognition result, containing the following fields: - - hypotheses - recognized words, a list with each item containing the following: - + transcript -- recognized words - + confidence -- (optional) confidence of the hypothesis (float, 0..1) - - final -- true when the hypothesis is final, i.e., doesn't change any more - -The following status codes are currently in use: - - * 0 -- Success. Usually used when recognition results are sent - * 2 -- Aborted. Recognition was aborted for some reason. - * 1 -- No speech. Sent when the incoming audio contains a large portion of silence or non-speech. - * 9 -- Not available. Used when all recognizer processes are currently in use and recognition cannot be performed. - -Websocket is always closed by the server after sending a non-zero status update. - -Examples of server responses: - - {"status": 9} - {"status": 0, "result": {"hypotheses": [{"transcript": "see on"}], "final": false}} - {"status": 0, "result": {"hypotheses": [{"transcript": "see on teine lause."}], "final": true}} - -Server segments incoming audio on the fly. For each segment, many non-final hypotheses, followed by one final -hypothesis are sent. Non-final hypotheses are used to present partial recognition hypotheses -to the client. A sequence of non-final hypotheses is always followed by a final hypothesis for that segment. -After sending a final hypothesis for a segment, -server starts decoding the next segment, or closes the connection, if all audio sent by the client has been processed. - -Client is reponsible for presenting the results to the user in a way -suitable for the application. - -Client software ---------------- - -Javascript client is available here: http://kaljurand.github.io/dictate.js - -Citing ------- - -If you use this software for research, you can cite the paper where this software is -described (available here: http://ebooks.iospress.nl/volumearticle/37996): - - @inproceedigs{alumae2014, - author={Tanel Alum\"{a}e}, - title="Full-duplex Speech-to-text System for {Estonian}", - booktitle="Baltic HLT 2014", - year=2014, - address="Kaunas, Lihtuania" - } - -Of course, you should also acknowledge Kaldi, which does all the hard work. diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/estonian_worker.yaml b/lucida/speechrecognition/kaldi_gstreamer_asr/estonian_worker.yaml deleted file mode 100644 index 4b5fab95c..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/estonian_worker.yaml +++ /dev/null @@ -1,29 +0,0 @@ -timeout-decoder : 10 -decoder: - model: test/models/estonian/tri2b_mmi_pruned/final.mdl - lda-mat: test/models/estonian/tri2b_mmi_pruned/final.mat - word-syms: test/models/estonian/tri2b_mmi_pruned/words.txt - fst: test/models/estonian/tri2b_mmi_pruned/HCLG.fst - silence-phones: 6 -out-dir: tmp - -use-vad: True -silence-timeout: 60 - -# Reconstructs compound words. Requires SRILM. Just comment out when no SRILM available. -post-processor: hidden-ngram -hidden-vocab test/models/estonian/compounder/hidden.vocab -lm test/models/estonian/compounder/compounder-pruned.vestlused-dev.splitw.arpa.gz -text - -keep-unk | perl -npe 'BEGIN {use IO::Handle; STDOUT->autoflush(1);} s/ ?\+C\+ ?//g; s/ ?\+D\+ ?/-/g;' -logging: - version : 1 - disable_existing_loggers: False - formatters: - simpleFormater: - format: '%(asctime)s - %(levelname)7s: %(name)10s: %(message)s' - datefmt: '%Y-%m-%d %H:%M:%S' - handlers: - console: - class: logging.StreamHandler - formatter: simpleFormater - level: DEBUG - root: - level: DEBUG - handlers: [console] diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/etc/english-supervisord.conf b/lucida/speechrecognition/kaldi_gstreamer_asr/etc/english-supervisord.conf deleted file mode 100644 index 24fe6b9e7..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/etc/english-supervisord.conf +++ /dev/null @@ -1,21 +0,0 @@ -; Sample supervisord configuration for kaldi-gstreamer-server. -; Modify according to you needs and put under /etc/supervisor/conf.d/ - -[program:full-duplex-english-server] -command = /usr/bin/python2.7 /home/tanel/devel/kaldi-gstreamer-server/kaldigstserver/master_server.py --port=8890 -user = tanel -redirect_stderr = true -stdout_logfile = /home/tanel/service/duplex-speech/english/server.log -environment= GST_PLUGIN_PATH="/home/tanel/tools/kaldi-trunk/src/gst-plugin" -directory = /home/tanel/service/duplex-speech/english - -[program:full-duplex-english-worker] -command = /usr/bin/python2.7 /home/tanel/devel/kaldi-gstreamer-server/kaldigstserver/worker.py -u ws://localhost:8890/worker/ws/speech -c worker.yaml -numprocs = 5 -process_name=%(program_name)s-%(process_num)s -user = tanel -redirect_stderr = true -stdout_logfile = /home/tanel/service/duplex-speech/english/worker-%(process_num)s.log -environment= GST_PLUGIN_PATH="/home/tanel/tools/kaldi-trunk/src/gst-plugin",LC_ALL="en_US.UTF8" -directory = /home/tanel/service/duplex-speech/english - diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/install_idlak.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/install_idlak.sh deleted file mode 100755 index ca8e3cee9..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/install_idlak.sh +++ /dev/null @@ -1,218 +0,0 @@ -#!/bin/bash -# Only run this script inside `kaldi_tts`. -# This script attempts to automatically execute the instructions in -# INSTALL_IDLAK. - -# (1) Install instructions for expat - -if ! which wget >&/dev/null; then - echo "This script requires you to first install wget"; - exit 1; -fi - -if ! which cmake >&/dev/null; then - echo "This script requires cmake: install it first."; - exit 1; -fi - -echo "****(1) Installing expat" - -( - rm -f expat-2.1.0.tar.gz 2>/dev/null - wget https://sourceforge.net/projects/expat/files/expat/2.1.0/expat-2.1.0.tar.gz - if [ ! -e expat-2.1.0.tar.gz ]; then - echo "****download of expat-2.1.0.tar.gz failed." - exit 1 - else - tar -xovzf expat-2.1.0.tar.gz || exit 1 - cd expat-2.1.0 - ./configure --prefix=`pwd` || exit 1 - sed -i "s/CPPFLAGS = -DHAVE_EXPAT_CONFIG_H/CPPFLAGS = -fPIC -DHAVE_EXPAT_CONFIG_H/g" Makefile - make || exit 1 - make install || exit 1 - cd .. - fi -) -ok_expat=$? -if [ $ok_expat -ne 0 ]; then - echo "****expat install failed." - exit 1 -fi - -echo "****(2) Installing pugixml" - -( - rm -rf pugixml-1.2 pugixml-1.2.tar.gz 2>/dev/null - #wget -T 10 -t 3 http://pugixml.googlecode.com/files/pugixml-1.2.tar.gz - wget https://github.com/zeux/pugixml/releases/download/v1.2/pugixml-1.2.tar.gz - if [ ! -e pugixml-1.2.tar.gz ]; then - echo "****download of pugixml-1.2.tar.gz failed." - exit 1 - else - mkdir pugixml-1.2 - cd pugixml-1.2 - tar -xovzf ../pugixml-1.2.tar.gz || exit 1 - cd scripts - if [ "$(uname)" == "Darwin" ]; then - # OS X 10.9, 10.10 require CXXFLAGS += -stdlib=libstdc++ to compile pugixml - osx_ver=$(sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"\."); print a[1] "." a[2]; }') - echo "Configuring for OS X version $osx_ver ..." - if [ "$osx_ver" == "10.9" ]; then - cmake -DCMAKE_CXX_FLAGS=-stdlib=libstdc++ - elif [ "$osx_ver" == "10.10" ]; then - cmake -DCMAKE_CXX_FLAGS=-stdlib=libstdc++ - else - cmake . - fi - else - cmake -DCMAKE_CXX_FLAGS=-fPIC || - cmake . || exit 1 - fi - make || exit 1 - cd ../.. - fi -) -ok_pugixml=$? -if [ $ok_pugixml -ne 0 ]; then - echo "****pugixml install failed." - exit 1 -fi - -echo "****(3) Installing pcre with utf8 support" - -( - rm -f pcre-8.20.tar.bz2 2>/dev/null - wget https://sourceforge.net/projects/pcre/files/pcre/8.20/pcre-8.20.tar.bz2 - if [ ! -e pcre-8.20.tar.bz2 ]; then - echo "****download of pcre-8.20.tar.bz2 failed." - exit 1 - else - tar -xovjf pcre-8.20.tar.bz2 || exit 1 - cd pcre-8.20 - ./configure --enable-utf8 --enable-unicode-properties --enable-newline-is-anycrlf --prefix="$(pwd)" || exit 1 - sed -i "s/CPPFLAGS =/CPPFLAGS = -fPIC/g" Makefile - make || exit 1 - make install || exit 1 - cd .. - fi -) -ok_pcre=$? -if [ $ok_pcre -ne 0 ]; then - echo "****pcre install failed." - exit 1 -fi - -echo "****(4) Installing SPTK" -( - rm -f SPTK-3.9.tar.gz 2>/dev/null - wget https://sourceforge.net/projects/sp-tk/files/SPTK/SPTK-3.9/SPTK-3.9.tar.gz - if [ ! -e SPTK-3.9.tar.gz ]; then - echo "****download of SPTK-3.9.tar.gz failed." - exit 1 - else - mkdir -p SPTK - tar -xovzf SPTK-3.9.tar.gz || exit 1 - cd SPTK-3.9 - ./configure --prefix="$(pwd)"/../SPTK || exit 1 - make || exit 1 - make install || exit 1 - cd .. - fi -) -ok_sptk=$? -if [ $ok_sptk -ne 0 ]; then - echo "****sptk install failed." - exit 1 -fi - -echo "****(5) Installing phonetisaurus" -#( -# rm -f Phonetisaurus -# git clone https://github.com/AdolfVonKleist/Phonetisaurus.git -# if [ ! -e Phonetisaurus ]; then -# echo "****cloning of Phonetisaurus failed." -# exit 1 -# else -# cd Phonetisaurus -# git checkout 09651ed5f6e9040d6dd30070601ecccfad254df4 . || exit 1 -# patch -p1 -N < ../extras/phonetisaurus.patch -# cd src/.autoconf -# autoconf -o ../configure || exit 1 -# cd .. -# LDFLAGS="-Wl,-rpath=`pwd`/../../openfst/lib/" ./configure --with-openfst-libs=`pwd`/../../openfst/lib --with-openfst-includes=`pwd`/../../openfst/include --with-install-bin=`pwd`/.. || exit 1 -# make -j4 || exit 1 -# make install || exit 1 -# cd .. -# fi -#) -#ok_phonetisaurus=$? -#if [ $ok_phonetisaurus -ne 0 ]; then -# echo "****phonetisaurus install failed." -# exit 1 -#fi - -# echo "****(1) Installing Apache Xerces C++ XML Parser" - -# ( -# rm xerces-c-3.1.1.tar.gz 2>/dev/null -# wget -T 10 -t 3 http://mirror.rmg.io/apache//xerces/c/3/sources/xerces-c-3.1.1.tar.gz -# if [ ! -e xerces-c-3.1.1.tar.gz]; then -# echo "****download of xerces-c-3.1.1.tar.gz failed." -# exit 1 -# else -# tar -xovzf xerces-c-3.1.1.tar.gz || exit 1 -# cd xerces-c-3.1.1 -# ./configure --prefix=`pwd` || exit 1 -# make || exit 1 -# make install || exit 1 -# cd .. -# fi -# ) -# ok_xerces=$? -# if [ $ok_xerces -ne 0 ]; then -# echo "**** Apache Xerces C++ XML Parser install failed." -# fi - -# echo "****(1) Installing libXML" - -# ( -# rm libxml2-2.8.0.tar.gz 2>/dev/null -# wget -T 10 -t 3 ftp://xmlsoft.org/libxml2/libxml2-2.8.0.tar.gz -# if [ ! -e libxml2-2.8.0.tar.gz]; then -# echo "****download of libxml2-2.8.0.tar.gz failed." -# exit 1 -# else -# tar -xovzf libxml2-2.8.0.tar.gz || exit 1 -# cd libxml2-2.8.0 -# ./configure --prefix=`pwd` || exit 1 -# make || exit 1 -# make install || exit 1 -# cd .. -# fi -# ) -# ok_libxml=$? -# if [ $ok_libxml -ne 0 ]; then -# echo "**** libXML install failed." -# fi - -# echo "****(2) Installing Arabica" - -# ( -# rm arabica-2010-November.tar.bz2 2>/dev/null -# wget -T 10 -t 3 http://sourceforge.net/projects/arabica/files/latest/download?source=files -# if [ ! -e arabica-2010-November.tar.bz2]; then -# echo "****download of arabica-2010-November.tar.bz2 failed." -# exit 1 -# else -# tar -xovjf arabica-2010-November.tar.bz2 || exit 1 -# cd libxml2-2.8.0 -# ./configure --prefix=`pwd` --with-libxml2=`pwd`/../libxml2-2.8.0/lib || exit 1 -# make || exit 1 -# make install || exit 1 -# cd .. -# fi -# ) -# ok_xerces=$? -# if [ $ok_xerces -ne 0 ]; then -# echo "**** Apache Xerces C++ XML Parser install failed." -# fi diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/install_kaldi.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/install_kaldi.sh deleted file mode 100755 index ae195add3..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/install_kaldi.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -LUCIDAROOT=$(pwd)/../../ -if [ ! -d kaldi ]; then - git clone https://github.com/kaldi-asr/kaldi.git - if [ $? -ne 0 ]; then - echo "Could not clone kaldi!!! Please try again later..." - exit 1 - fi -fi -cd kaldi \ - && git checkout 01576867802ae5c499f9a4b66591ce35499e28f5 \ - && cd tools \ - && ( sudo ln -s -f bash /bin/sh || : ) \ - && sudo apt-get install -y zlib1g-dev automake autoconf libtool subversion \ - && sudo apt-get install -y libatlas3-base \ - && extras/check_dependencies.sh \ - && make \ - && cd .. \ - && cd src \ - && ./configure --shared \ - && sed -i '7s/^/COMPILE_FLAGS += -fPIC\n/' Makefile \ - && make depend \ - && make \ - && make ext \ - && cd gst-plugin \ - && sudo apt-get install -y libgstreamer1.0-dev \ - && sudo apt-get install -y gstreamer1.0-plugins-good \ - && sudo apt-get install -y gstreamer1.0-plugins-bad \ - && sudo apt-get install -y gstreamer1.0-plugins-ugly \ - && sudo apt-get install -y gstreamer1.0-tools \ - && make depend \ - && make \ - && cd ../../ \ - && cd tools \ - && if [ ! -d gst-kaldi-nnet2-online ]; then git clone https://github.com/alumae/gst-kaldi-nnet2-online.git; if [ $? -ne 0 ]; then echo "Could not download gst-kaldi-nnet2-online!!! Please try again later..."; exit 1; fi; fi \ - && cd gst-kaldi-nnet2-online \ - && git checkout 2d395396c5bf88628a1af0127eebe0a84bd02923 \ - && ( sudo add-apt-repository -y ppa:gstreamer-developers/ppa || : ) \ - && ( sudo apt-get -y update || : ) \ - && sudo apt-get install -y libjansson-dev \ - && cd src \ - && export KALDI_ROOT=$LUCIDAROOT/speechrecognition/kaldi_gstreamer_asr/kaldi \ - && make depend \ - && make \ - && cd ../../../../ \ - && ./test/models/download-fisher-nnet2.sh \ - && export GST_PLUGIN_PATH=$LUCIDAROOT/speechrecognition/kaldi_gstreamer_asr/kaldi/tools/gst-kaldi-nnet2-online/src \ - && sudo pip install tornado \ - && sudo apt-get install -y python3-dev \ - && sudo apt-get install -y python2.7-dev \ - && sudo apt-get install -y libblas3 \ - && sudo apt-get install -y libblas-dev \ - && sudo apt-get install -y liblapack3 \ - && sudo apt-get install -y liblapack-dev \ - && sudo apt-get install -y gfortran \ - && sudo apt-get install -y libc6 \ - && sudo ldconfig diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/install_kaldi_tts.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/install_kaldi_tts.sh deleted file mode 100755 index 60ed33146..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/install_kaldi_tts.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -LUCIDAROOT=$(pwd)/../../ -git clone https://github.com/bpotard/idlak.git -mv idlak kaldi_tts -cd kaldi_tts -git checkout 02b24dc6f79b84779e423bfbb17bdf8e70c95aec -cd tools -extras/check_dependencies.sh -cp ../../install_idlak.sh . # contains modifications -make -cd .. -cd src -sed -i '7s/^/COMPILE_FLAGS += -fPIC\n/' Makefile -./configure --shared -make depend -make -cd .. -cd egs/tts_dnn_arctic/s1 -./run.sh -# Replace the following text with your own. -echo '########## Test: This is a test from Lucida.' -echo 'This is a test from Lucida.' | utils/synthesis_test.sh -echo 'Please "cd exp_dnn/tts_dnn_train_3_deltasc2_quin5/tst_forward_tmp/wav_mlpg" to retrieve the audio!' diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/client.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/client.py deleted file mode 100644 index 3d8be6be8..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/client.py +++ /dev/null @@ -1,128 +0,0 @@ -__author__ = 'tanel' - -import argparse -from ws4py.client.threadedclient import WebSocketClient -import time -import threading -import sys -import urllib -import Queue -import json -import time -import os - -def rate_limited(maxPerSecond): - minInterval = 1.0 / float(maxPerSecond) - def decorate(func): - lastTimeCalled = [0.0] - def rate_limited_function(*args,**kargs): - elapsed = time.clock() - lastTimeCalled[0] - leftToWait = minInterval - elapsed - if leftToWait>0: - time.sleep(leftToWait) - ret = func(*args,**kargs) - lastTimeCalled[0] = time.clock() - return ret - return rate_limited_function - return decorate - - -class MyClient(WebSocketClient): - - def __init__(self, filename, url, protocols=None, extensions=None, heartbeat_freq=None, byterate=32000, - save_adaptation_state_filename=None, send_adaptation_state_filename=None): - super(MyClient, self).__init__(url, protocols, extensions, heartbeat_freq) - self.final_hyps = [] - self.fn = filename - self.byterate = byterate - self.final_hyp_queue = Queue.Queue() - self.save_adaptation_state_filename = save_adaptation_state_filename - self.send_adaptation_state_filename = send_adaptation_state_filename - - @rate_limited(4) - def send_data(self, data): - self.send(data, binary=True) - - def opened(self): - #print "Socket opened!" - def send_data_to_ws(): - f = open(self.fn, "rb") - if self.send_adaptation_state_filename is not None: - print >> sys.stderr, "Sending adaptation state from %s" % self.send_adaptation_state_filename - try: - adaptation_state_props = json.load(open(self.send_adaptation_state_filename, "r")) - self.send(json.dumps(dict(adaptation_state=adaptation_state_props))) - except: - e = sys.exc_info()[0] - print >> sys.stderr, "Failed to send adaptation state: ", e - for block in iter(lambda: f.read(self.byterate/4), ""): - self.send_data(block) - print >> sys.stderr, "Audio sent, now sending EOS" - self.send("EOS") - - t = threading.Thread(target=send_data_to_ws) - t.start() - - - def received_message(self, m): - response = json.loads(str(m)) - #print >> sys.stderr, "RESPONSE:", response - #print >> sys.stderr, "JSON was:", m - if response['status'] == 0: - if 'result' in response: - trans = response['result']['hypotheses'][0]['transcript'] - if response['result']['final']: - #print >> sys.stderr, trans, - self.final_hyps.append(trans) - print >> sys.stderr, '\r%s' % trans.replace("\n", "\\n") - else: - print_trans = trans.replace("\n", "\\n") - if len(print_trans) > 80: - print_trans = "... %s" % print_trans[-76:] - print >> sys.stderr, '\r%s' % print_trans, - if 'adaptation_state' in response: - if self.save_adaptation_state_filename: - print >> sys.stderr, "Saving adaptation state to %s" % self.save_adaptation_state_filename - with open(self.save_adaptation_state_filename, "w") as f: - f.write(json.dumps(response['adaptation_state'])) - else: - print >> sys.stderr, "Received error from server (status %d)" % response['status'] - if 'message' in response: - print >> sys.stderr, "Error message:", response['message'] - - - def get_full_hyp(self, timeout=60): - return self.final_hyp_queue.get(timeout) - - def closed(self, code, reason=None): - #print "Websocket closed() called" - #print >> sys.stderr - self.final_hyp_queue.put(" ".join(self.final_hyps)) - - -def main(): - - parser = argparse.ArgumentParser(description='Command line client for kaldigstserver') - parser.add_argument('-u', '--uri', default="ws://localhost:8081/client/ws/speech", dest="uri", help="Server websocket URI") - parser.add_argument('-r', '--rate', default=32000, dest="rate", type=int, help="Rate in bytes/sec at which audio should be sent to the server. NB! For raw 16-bit audio it must be 2*samplerate!") - parser.add_argument('--save-adaptation-state', help="Save adaptation state to file") - parser.add_argument('--send-adaptation-state', help="Send adaptation state from file") - parser.add_argument('--content-type', default='', help="Use the specified content type (empty by default, for raw files the default is audio/x-raw, layout=(string)interleaved, rate=(int), format=(string)S16LE, channels=(int)1") - parser.add_argument('audiofile', help="Audio file to be sent to the server") - args = parser.parse_args() - - content_type = args.content_type - if content_type == '' and args.audiofile.endswith(".raw"): - content_type = "audio/x-raw, layout=(string)interleaved, rate=(int)%d, format=(string)S16LE, channels=(int)1" %(args.rate/2) - - - - ws = MyClient(args.audiofile, args.uri + '?%s' % (urllib.urlencode([("content-type", content_type)])), byterate=args.rate, - save_adaptation_state_filename=args.save_adaptation_state, send_adaptation_state_filename=args.send_adaptation_state) - ws.connect() - result = ws.get_full_hyp() - print result.encode('utf-8') - -if __name__ == "__main__": - main() - diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/commandcenter.thrift b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/commandcenter.thrift deleted file mode 100644 index 91bee229c..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/commandcenter.thrift +++ /dev/null @@ -1,38 +0,0 @@ -# This file outlines the command center's API. -# Thrift will generate stubs that handle serialization/deserialization -# during remote procedure calls. -# Services register with the command center using the registerService() function. - -namespace cpp cmdcenterstubs -namespace java cmdcenterstubs - -# MachineData is the information that services send -# when they wish to register with the command center. -struct MachineData -{ - 1:string name, - 2:i32 port -} - -struct QueryData -{ - 1:string audioData = "", - 2:string audioFormat = "", - 3:bool audioB64Encoding = false, - 4:string imgData = "", - 5:string imgFormat = "", - 6:bool imgB64Encoding = false, - 7:string textData = "" -} - -service CommandCenter -{ - # service <--> command center API - void registerService(1:string serviceType, 2:MachineData mDataObj) - - # command center <--> client API - string handleRequest(1:QueryData data) - - # simple function to test connections - void ping() -} diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/common.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/common.py deleted file mode 100644 index cefd67743..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/common.py +++ /dev/null @@ -1,13 +0,0 @@ -__author__ = 'tanel' - -STATUS_EOS = -1 -STATUS_SUCCESS = 0 -STATUS_NO_SPEECH = 1 -STATUS_ABORTED = 2 -STATUS_AUDIO_CAPTURE = 3 -STATUS_NETWORK = 4 -STATUS_NOT_ALLOWED = 5 -STATUS_SERVICE_NOT_ALLOWED = 6 -STATUS_BAD_GRAMMAR = 7 -STATUS_LANGUAGE_NOT_SUPPORTED = 8 -STATUS_NOT_AVAILABLE = 9 \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder.py deleted file mode 100644 index 93fd5b097..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -Created on May 17, 2013 - -@author: tanel -""" -import gi - -gi.require_version('Gst', '1.0') -from gi.repository import GObject, Gst - -GObject.threads_init() -Gst.init(None) -import logging -import thread -import os - -logger = logging.getLogger(__name__) - -import pdb - -class DecoderPipeline(object): - def __init__(self, conf={}): - logger.info("Creating decoder using conf: %s" % conf) - self.use_cutter = conf.get("use-vad", False) - self.create_pipeline(conf) - self.outdir = conf.get("out-dir", None) - if not os.path.exists(self.outdir): - os.mkdir(self.outdir) - elif not os.path.isdir(self.outdir): - raise Exception("Output directory %s already exists as a file" % self.outdir) - - self.word_handler = None - self.eos_handler = None - self.request_id = "" - - - def create_pipeline(self, conf): - - self.appsrc = Gst.ElementFactory.make("appsrc", "appsrc") - - self.decodebin = Gst.ElementFactory.make("decodebin", "decodebin") - self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert") - self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample") - self.tee = Gst.ElementFactory.make("tee", "tee") - self.queue1 = Gst.ElementFactory.make("queue", "queue1") - self.filesink = Gst.ElementFactory.make("filesink", "filesink") - self.queue2 = Gst.ElementFactory.make("queue", "queue2") - self.cutter = Gst.ElementFactory.make("cutter", "cutter") - self.asr = Gst.ElementFactory.make("onlinegmmdecodefaster", "asr") - self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") - - for (key, val) in conf.get("decoder", {}).iteritems(): - logger.info("Setting decoder property: %s = %s" % (key, val)) - self.asr.set_property(key, val) - - self.appsrc.set_property("is-live", True) - self.filesink.set_property("location", "/dev/null") - self.cutter.set_property("leaky", False) - self.cutter.set_property("pre-length", 1000 * 1000000) - self.cutter.set_property("run-length", 1000 * 1000000) - self.cutter.set_property("threshold", 0.01) - if self.use_cutter: - self.asr.set_property("silent", True) - logger.info('Created GStreamer elements') - - self.pipeline = Gst.Pipeline() - for element in [self.appsrc, self.decodebin, self.audioconvert, self.audioresample, self.tee, - self.queue1, self.filesink, - self.queue2, self.cutter, self.asr, self.fakesink]: - logger.debug("Adding %s to the pipeline" % element) - self.pipeline.add(element) - - logger.info('Linking GStreamer elements') - - self.appsrc.link(self.decodebin) - #self.appsrc.link(self.audioconvert) - self.decodebin.connect('pad-added', self._connect_decoder) - if self.use_cutter: - self.cutter.link(self.audioconvert) - - self.audioconvert.link(self.audioresample) - - self.audioresample.link(self.tee) - #self.audioresample.link(self.cutter) - #self.cutter.link(self.tee) - - self.tee.link(self.queue1) - self.queue1.link(self.filesink) - - self.tee.link(self.queue2) - self.queue2.link(self.asr) - - - self.asr.link(self.fakesink) - - # Create bus and connect several handlers - self.bus = self.pipeline.get_bus() - self.bus.add_signal_watch() - self.bus.enable_sync_message_emission() - self.bus.connect('message::eos', self._on_eos) - self.bus.connect('message::error', self._on_error) - #self.bus.connect('message::cutter', self._on_cutter) - - cutter_type = 'sync' - if cutter_type == 'async': - self.bus.connect('message::element', self._on_element_message) - else: - #self.bus.set_sync_handler(self.bus.sync_signal_handler) - self.bus.connect('sync-message::element', self._on_element_message) - self.asr.connect('hyp-word', self._on_word) - logger.info("Setting pipeline to READY") - self.pipeline.set_state(Gst.State.READY) - logger.info("Set pipeline to READY") - - def _connect_decoder(self, element, pad): - logger.info("%s: Connecting audio decoder" % self.request_id) - if self.use_cutter: - pad.link(self.cutter.get_static_pad("sink")) - else: - pad.link(self.audioconvert.get_static_pad("sink")) - - logger.info("%s: Connected audio decoder" % self.request_id) - - def _on_element_message(self, bus, message): - if message.has_name("cutter"): - if message.get_structure().get_value('above'): - logger.info("LEVEL ABOVE") - self.asr.set_property("silent", False) - else: - logger.info("LEVEL BELOW") - self.asr.set_property("silent", True) - - def _on_word(self, asr, word): - logger.info("%s: Got word: %s" % (self.request_id, word.decode('utf8'))) - if self.word_handler: - self.word_handler(word) - - - def _on_error(self, bus, msg): - self.error = msg.parse_error() - logger.error(self.error) - self.finish_request() - if self.error_handler: - self.error_handler(self.error[0].message) - - def _on_eos(self, bus, msg): - logger.info('%s: Pipeline received eos signal' % self.request_id) - self.finish_request() - if self.eos_handler: - self.eos_handler[0](self.eos_handler[1]) - - def finish_request(self): - logger.info('%s: Finishing request' % self.request_id) - if self.outdir: - self.filesink.set_state(Gst.State.NULL) - self.filesink.set_property('location', "/dev/null") - self.filesink.set_state(Gst.State.PLAYING) - self.pipeline.set_state(Gst.State.NULL) - self.request_id = "" - - def init_request(self, id, caps_str): - self.request_id = id - if caps_str and len(caps_str) > 0: - logger.info("%s: Setting caps to %s" % (self.request_id, caps_str)) - caps = Gst.caps_from_string(caps_str) - self.appsrc.set_property("caps", caps) - else: - #caps = Gst.caps_from_string(None) - self.appsrc.set_property("caps", None) - #self.pipeline.set_state(Gst.State.READY) - pass - #self.appsrc.set_state(Gst.State.PAUSED) - - if self.outdir: - self.pipeline.set_state(Gst.State.PAUSED) - self.filesink.set_state(Gst.State.NULL) - self.filesink.set_property('location', "%s/%s.raw" % (self.outdir, id)) - self.filesink.set_state(Gst.State.PLAYING) - - #self.filesink.set_state(Gst.State.PLAYING) - #self.decodebin.set_state(Gst.State.PLAYING) - self.pipeline.set_state(Gst.State.PLAYING) - self.filesink.set_state(Gst.State.PLAYING) - # push empty buffer (to avoid hang on client diconnect) - buf = Gst.Buffer.new_allocate(None, 0, None) - self.appsrc.emit("push-buffer", buf) - logger.info('%s: Pipeline initialized' % (self.request_id)) - - - def process_data(self, data): - logger.debug('%s: Pushing buffer of size %d to pipeline' % (self.request_id, len(data))) - buf = Gst.Buffer.new_allocate(None, len(data), None) - buf.fill(0, data) - self.appsrc.emit("push-buffer", buf) - - - def end_request(self): - logger.info("%s: Pushing EOS to pipeline" % self.request_id) - self.appsrc.emit("end-of-stream") - - def set_word_handler(self, handler): - self.word_handler = handler - - def set_eos_handler(self, handler, user_data=None): - self.eos_handler = (handler, user_data) - - def set_error_handler(self, handler): - self.error_handler = handler - - - def cancel(self): - logger.info("%s: Cancelling pipeline" % self.request_id) - self.pipeline.send_event(Gst.Event.new_eos()) - #self.asr.set_property("silent", True) - #self.pipeline.set_state(Gst.State.NULL) - - #if (self.pipeline.get_state() == Gst.State.PLAYING): - #logger.debug("Sending EOS to pipeline") - #self.pipeline.send_event(Gst.Event.new_eos()) - #self.pipeline.set_state(Gst.State.READY) - logger.info("%s: Cancelled pipeline" % self.request_id) \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder2.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder2.py deleted file mode 100644 index f70fa4716..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder2.py +++ /dev/null @@ -1,226 +0,0 @@ -""" -Created on May 17, 2013 - -@author: tanel -""" -import gi - -gi.require_version('Gst', '1.0') -from gi.repository import GObject, Gst - -GObject.threads_init() -Gst.init(None) -import logging -import thread -import os - -logger = logging.getLogger(__name__) - -import pdb - -class DecoderPipeline2(object): - def __init__(self, conf={}): - logger.info("Creating decoder using conf: %s" % conf) - self.create_pipeline(conf) - self.outdir = conf.get("out-dir", None) - if not os.path.exists(self.outdir): - os.makedirs(self.outdir) - elif not os.path.isdir(self.outdir): - raise Exception("Output directory %s already exists as a file" % self.outdir) - - self.result_handler = None - self.full_result_handler = None - self.eos_handler = None - self.error_handler = None - self.request_id = "" - - - def create_pipeline(self, conf): - - self.appsrc = Gst.ElementFactory.make("appsrc", "appsrc") - self.decodebin = Gst.ElementFactory.make("decodebin", "decodebin") - self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert") - self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample") - self.tee = Gst.ElementFactory.make("tee", "tee") - self.queue1 = Gst.ElementFactory.make("queue", "queue1") - self.filesink = Gst.ElementFactory.make("filesink", "filesink") - self.queue2 = Gst.ElementFactory.make("queue", "queue2") - self.asr = Gst.ElementFactory.make("kaldinnet2onlinedecoder", "asr") - self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") - - # This needs to be set first - if "use-threaded-decoder" in conf["decoder"]: - self.asr.set_property("use-threaded-decoder", conf["decoder"]["use-threaded-decoder"]) - - for (key, val) in conf.get("decoder", {}).iteritems(): - if key != "use-threaded-decoder": - logger.info("Setting decoder property: %s = %s" % (key, val)) - self.asr.set_property(key, val) - - self.appsrc.set_property("is-live", True) - self.filesink.set_property("location", "/dev/null") - logger.info('Created GStreamer elements') - - self.pipeline = Gst.Pipeline() - for element in [self.appsrc, self.decodebin, self.audioconvert, self.audioresample, self.tee, - self.queue1, self.filesink, - self.queue2, self.asr, self.fakesink]: - logger.debug("Adding %s to the pipeline" % element) - self.pipeline.add(element) - - logger.info('Linking GStreamer elements') - - self.appsrc.link(self.decodebin) - #self.appsrc.link(self.audioconvert) - self.decodebin.connect('pad-added', self._connect_decoder) - self.audioconvert.link(self.audioresample) - - self.audioresample.link(self.tee) - - self.tee.link(self.queue1) - self.queue1.link(self.filesink) - - self.tee.link(self.queue2) - self.queue2.link(self.asr) - - self.asr.link(self.fakesink) - - # Create bus and connect several handlers - self.bus = self.pipeline.get_bus() - self.bus.add_signal_watch() - self.bus.enable_sync_message_emission() - self.bus.connect('message::eos', self._on_eos) - self.bus.connect('message::error', self._on_error) - #self.bus.connect('message::cutter', self._on_cutter) - - self.asr.connect('partial-result', self._on_partial_result) - self.asr.connect('final-result', self._on_final_result) - self.asr.connect('full-final-result', self._on_full_final_result) - - logger.info("Setting pipeline to READY") - self.pipeline.set_state(Gst.State.READY) - logger.info("Set pipeline to READY") - - def _connect_decoder(self, element, pad): - logger.info("%s: Connecting audio decoder" % self.request_id) - pad.link(self.audioconvert.get_static_pad("sink")) - logger.info("%s: Connected audio decoder" % self.request_id) - - - def _on_partial_result(self, asr, hyp): - logger.info("%s: Got partial result: %s" % (self.request_id, hyp.decode('utf8'))) - if self.result_handler: - self.result_handler(hyp, False) - - def _on_final_result(self, asr, hyp): - logger.info("%s: Got final result: %s" % (self.request_id, hyp.decode('utf8'))) - if self.result_handler: - self.result_handler(hyp, True) - - def _on_full_final_result(self, asr, result_json): - logger.info("%s: Got full final result: %s" % (self.request_id, result_json.decode('utf8'))) - if self.full_result_handler: - self.full_result_handler(result_json) - - def _on_error(self, bus, msg): - self.error = msg.parse_error() - logger.error(self.error) - self.finish_request() - if self.error_handler: - self.error_handler(self.error[0].message) - - def _on_eos(self, bus, msg): - logger.info('%s: Pipeline received eos signal' % self.request_id) - #self.decodebin.unlink(self.audioconvert) - self.finish_request() - if self.eos_handler: - self.eos_handler[0](self.eos_handler[1]) - - def get_adaptation_state(self): - return self.asr.get_property("adaptation-state") - - def set_adaptation_state(self, adaptation_state): - """Sets the adaptation state to a certian value, previously retrieved using get_adaptation_state() - - Should be called after init_request(..) - """ - - return self.asr.set_property("adaptation-state", adaptation_state) - - def finish_request(self): - logger.info("%s: Resetting decoder state" % self.request_id) - if self.outdir: - self.filesink.set_state(Gst.State.NULL) - self.filesink.set_property('location', "/dev/null") - self.filesink.set_state(Gst.State.PLAYING) - self.pipeline.set_state(Gst.State.NULL) - self.request_id = "" - - - def init_request(self, id, caps_str): - self.request_id = id - logger.info("%s: Initializing request" % (self.request_id)) - if caps_str and len(caps_str) > 0: - logger.info("%s: Setting caps to %s" % (self.request_id, caps_str)) - caps = Gst.caps_from_string(caps_str) - self.appsrc.set_property("caps", caps) - else: - #caps = Gst.caps_from_string("") - self.appsrc.set_property("caps", None) - #self.pipeline.set_state(Gst.State.READY) - pass - #self.appsrc.set_state(Gst.State.PAUSED) - - if self.outdir: - self.pipeline.set_state(Gst.State.PAUSED) - self.filesink.set_state(Gst.State.NULL) - self.filesink.set_property('location', "%s/%s.raw" % (self.outdir, id)) - self.filesink.set_state(Gst.State.PLAYING) - - #self.filesink.set_state(Gst.State.PLAYING) - #self.decodebin.set_state(Gst.State.PLAYING) - self.pipeline.set_state(Gst.State.PLAYING) - self.filesink.set_state(Gst.State.PLAYING) - # push empty buffer (to avoid hang on client diconnect) - #buf = Gst.Buffer.new_allocate(None, 0, None) - #self.appsrc.emit("push-buffer", buf) - - # reset adaptation state - self.set_adaptation_state("") - - def process_data(self, data): - logger.debug('%s: Pushing buffer of size %d to pipeline' % (self.request_id, len(data))) - buf = Gst.Buffer.new_allocate(None, len(data), None) - buf.fill(0, data) - self.appsrc.emit("push-buffer", buf) - logger.debug('%s: Pushing buffer done' % self.request_id) - - - def end_request(self): - logger.info("%s: Pushing EOS to pipeline" % self.request_id) - self.appsrc.emit("end-of-stream") - - def set_result_handler(self, handler): - self.result_handler = handler - - def set_full_result_handler(self, handler): - self.full_result_handler = handler - - def set_eos_handler(self, handler, user_data=None): - self.eos_handler = (handler, user_data) - - def set_error_handler(self, handler): - self.error_handler = handler - - - def cancel(self): - logger.info("%s: Sending EOS to pipeline in order to cancel processing" % self.request_id) - self.appsrc.emit("end-of-stream") - #self.asr.set_property("silent", True) - #self.pipeline.set_state(Gst.State.NULL) - - #if (self.pipeline.get_state() == Gst.State.PLAYING): - #logger.debug("Sending EOS to pipeline") - #self.pipeline.send_event(Gst.Event.new_eos()) - #self.pipeline.set_state(Gst.State.READY) - logger.info("%s: Cancelled pipeline" % self.request_id) diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder2_test.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder2_test.py deleted file mode 100644 index cdb723c95..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder2_test.py +++ /dev/null @@ -1,145 +0,0 @@ -# -*- coding: UTF-8 -*- - -''' -Created on Jun 27, 2013 - -@author: tanel -''' -import unittest -from gi.repository import GObject, Gst -import thread -import logging -from decoder2 import DecoderPipeline2 -import time - -class DecoderPipeline2Tests(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(DecoderPipeline2Tests, self).__init__(*args, **kwargs) - logging.basicConfig(level=logging.INFO) - - @classmethod - def setUpClass(cls): - decoder_conf = {"model" : "test/models/estonian/nnet2_online_ivector/final.mdl", - "word-syms" : "test/models/estonian/nnet2_online_ivector/words.txt", - "fst" : "test/models/estonian/nnet2_online_ivector/HCLG.fst", - "mfcc-config" : "test/models/estonian/nnet2_online_ivector/conf/mfcc.conf", - "ivector-extraction-config": "test/models/estonian/nnet2_online_ivector/conf/ivector_extractor.conf", - "max-active": 7000, - "beam": 11.0, - "lattice-beam": 6.0, - "do-endpointing" : True, - "endpoint-silence-phones":"1:2:3:4:5:6:7:8:9:10"} - cls.decoder_pipeline = DecoderPipeline2({"decoder" : decoder_conf}) - cls.final_hyps = [] - cls.finished = False - - cls.decoder_pipeline.set_result_handler(cls.result_getter) - cls.decoder_pipeline.set_eos_handler(cls.set_finished, cls.finished) - - loop = GObject.MainLoop() - thread.start_new_thread(loop.run, ()) - - @classmethod - def result_getter(cls, hyp, final): - if final: - cls.final_hyps.append(hyp) - - @classmethod - def set_finished(cls, finished): - cls.finished = True - - def setUp(self): - self.__class__.final_hyps = [] - self.__class__.finished = False - - - - def testCancelAfterEOS(self): - self.decoder_pipeline.init_request("testCancelAfterEOS", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - f = open("test/data/1234-5678.raw", "rb") - for block in iter(lambda: f.read(8000), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - self.decoder_pipeline.cancel() - while not self.finished: - time.sleep(1) - - #self.assertEqual(["üks", "kaks", "kolm", "neli", "<#s>", "viis", "kuus", "seitse", "kaheksa", "<#s>"], self.words) - - - def test12345678(self): - self.decoder_pipeline.init_request("test12345678", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - adaptation_state = open("test/data/adaptation_state.txt").read() - self.decoder_pipeline.set_adaptation_state(adaptation_state) - f = open("test/data/1234-5678.raw", "rb") - for block in iter(lambda: f.read(8000), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual(["üks kaks kolm neli", "viis kuus seitse kaheksa"], self.final_hyps) - - def test8k(self): - self.decoder_pipeline.init_request("test8k", "audio/x-raw, layout=(string)interleaved, rate=(int)8000, format=(string)S16LE, channels=(int)1") - f = open("test/data/1234-5678.8k.raw", "rb") - for block in iter(lambda: f.read(4000), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual(["üks kaks kolm neli", "viis kuus seitse kaheksa"], self.final_hyps) - - def testDisconnect(self): - self.decoder_pipeline.init_request("testDisconnect", "audio/x-raw, layout=(string)interleaved, rate=(int)8000, format=(string)S16LE, channels=(int)1") - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual([], self.final_hyps) - - - def testWav(self): - self.decoder_pipeline.init_request("testWav", "") - f = open("test/data/test_with_silence.wav", "rb") - for block in iter(lambda: f.read(48000*2*2/4), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - while not self.finished: - time.sleep(1) - self.assertEqual(["see on esimene lause pärast mida tuleb vaikus", "nüüd tuleb teine lause"], self.final_hyps) - - def testOgg(self): - self.decoder_pipeline.init_request("testOgg", "") - f = open("test/data/test_2lauset.ogg", "rb") - for block in iter(lambda: f.read(86*1024/8/4), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual("see on esimene lause see on teine lause", " ".join(self.final_hyps)) - -def main(): - unittest.main() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder_test.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder_test.py deleted file mode 100644 index f2c118405..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/decoder_test.py +++ /dev/null @@ -1,194 +0,0 @@ -# -*- coding: UTF-8 -*- - -''' -Created on Jun 27, 2013 - -@author: tanel -''' -import unittest -from gi.repository import GObject, Gst -import thread -import logging -from decoder import DecoderPipeline -import time - -class DecoderPipelineTests(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(DecoderPipelineTests, self).__init__(*args, **kwargs) - logging.basicConfig(level=logging.INFO) - - @classmethod - def setUpClass(cls): - decoder_conf = {"model" : "test/models/estonian/tri2b_mmi_pruned/final.mdl", - "lda-mat" : "test/models/estonian/tri2b_mmi_pruned/final.mat", - "word-syms" : "test/models/estonian/tri2b_mmi_pruned/words.txt", - "fst" : "test/models/estonian/tri2b_mmi_pruned/HCLG.fst", - "silence-phones" : "6"} - cls.decoder_pipeline = DecoderPipeline({"decoder" : decoder_conf}) - cls.words = [] - cls.finished = False - - cls.decoder_pipeline.set_word_handler(cls.word_getter) - cls.decoder_pipeline.set_eos_handler(cls.set_finished, cls.finished) - - loop = GObject.MainLoop() - thread.start_new_thread(loop.run, ()) - - @classmethod - def word_getter(cls, word): - cls.words.append(word) - - @classmethod - def set_finished(cls, finished): - cls.finished = True - - def setUp(self): - self.__class__.words = [] - self.__class__.finished = False - - - - def testCancelAfterEOS(self): - self.decoder_pipeline.init_request("testCancelAfterEOS", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - f = open("test/data/1234-5678.raw", "rb") - for block in iter(lambda: f.read(8000), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - self.decoder_pipeline.cancel() - while not self.finished: - time.sleep(1) - - #self.assertEqual(["üks", "kaks", "kolm", "neli", "<#s>", "viis", "kuus", "seitse", "kaheksa", "<#s>"], self.words) - - - def test12345678(self): - self.decoder_pipeline.init_request("test12345678", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - f = open("test/data/1234-5678.raw", "rb") - for block in iter(lambda: f.read(8000), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual(["üks", "kaks", "kolm", "neli", "<#s>", "viis", "kuus", "seitse", "kaheksa", "<#s>"], self.words) - - def testWav(self): - self.decoder_pipeline.init_request("testWav", "") - f = open("test/data/lause2.wav", "rb") - for block in iter(lambda: f.read(16000*2*2/4), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual("see on teine lause <#s>".split(), self.words) - - def testOgg(self): - self.decoder_pipeline.init_request("testOgg", "") - f = open("test/data/test_2lauset.ogg", "rb") - for block in iter(lambda: f.read(86*1024/8/4), ""): - time.sleep(0.25) - self.decoder_pipeline.process_data(block) - - self.decoder_pipeline.end_request() - - - while not self.finished: - time.sleep(1) - self.assertEqual("see on esimene lause <#s> see on teine lause <#s>".split(), self.words) - - - - def __testDecoder(self): - finished = [False] - - - - - def do_shit(): - decoder_pipeline.init_request("test0", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - f = open("test/data/1234-5678.raw", "rb") - for block in iter(lambda: f.read(8000), ""): - time.sleep(0.25) - decoder_pipeline.process_data(block) - - decoder_pipeline.end_request() - - do_shit() - - while not finished[0]: - time.sleep(1) - self.assertEqual(["üks", "kaks", "kolm", "neli", "<#s>", "viis", "kuus", "seitse", "kaheksa", "<#s>"], words) - - words = [] - - finished[0] = False - do_shit() - while not finished[0]: - time.sleep(1) - - self.assertItemsEqual(["see", "on", "teine", "lause", "<#s>"], words, "Recognition result") - - # Now test cancelation of a long submitted file - words = [] - decoder_pipeline.init_request("test0", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - f = open("test/data/etteytlus.raw", "rb") - decoder_pipeline.process_data(f.read()) - time.sleep(3) - decoder_pipeline.cancel() - print "Pipeline cancelled" - - words = [] - finished[0] = False - decoder_pipeline.init_request("test0", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - # read and send everything - f = open("test/data/lause2.raw", "rb") - decoder_pipeline.process_data(f.read(10*16000)) - decoder_pipeline.end_request() - while not finished[0]: - time.sleep(1) - self.assertItemsEqual(["see", "on", "teine", "lause", "<#s>"], words, "Recognition result") - - #test cancelling without anything sent - decoder_pipeline.init_request("test0", "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1") - decoder_pipeline.cancel() - print "Pipeline cancelled" - - words = [] - finished[0] = False - decoder_pipeline.init_request("test0", "audio/x-wav") - # read and send everything - f = open("test/data/lause2.wav", "rb") - decoder_pipeline.process_data(f.read()) - decoder_pipeline.end_request() - while not finished[0]: - time.sleep(1) - self.assertItemsEqual(["see", "on", "teine", "lause", "<#s>"], words, "Recognition result") - - words = [] - finished[0] = False - decoder_pipeline.init_request("test0", "audio/ogg") - # read and send everything - f = open("test/data/test_2lauset.ogg", "rb") - decoder_pipeline.process_data(f.read(10*16000)) - - decoder_pipeline.end_request() - while not finished[0]: - time.sleep(1) - self.assertItemsEqual("see on esimene lause <#s> see on teine lause <#s>".split(), words, "Recognition result") - - -def main(): - unittest.main() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/master_server.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/master_server.py deleted file mode 100644 index dd7b51506..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/master_server.py +++ /dev/null @@ -1,351 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2013 Tanel Alumae - -""" -Reads speech data via websocket requests, sends it to Redis, waits for results from Redis and -forwards to client via websocket -""" -import sys -import logging -import json -import codecs -import os.path -import uuid -import time -import threading -import functools -from Queue import Queue - -import tornado.ioloop -import tornado.options -import tornado.web -import tornado.websocket -import tornado.gen -import tornado.concurrent -import settings -import common -import os -if os.environ.get('WSS'): - import ssl - - -class Application(tornado.web.Application): - def __init__(self): - settings = dict( - cookie_secret="43oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", - template_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "templates"), - static_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "static"), - xsrf_cookies=False, - autoescape=None, - ) - - handlers = [ - (r"/", MainHandler), - (r"/client/ws/speech", DecoderSocketHandler), - (r"/client/ws/status", StatusSocketHandler), - (r"/client/dynamic/reference", ReferenceHandler), - (r"/client/dynamic/recognize", HttpChunkedRecognizeHandler), - (r"/worker/ws/speech", WorkerSocketHandler), - (r"/client/static/(.*)", tornado.web.StaticFileHandler, {'path': settings["static_path"]}), - ] - tornado.web.Application.__init__(self, handlers, **settings) - self.available_workers = set() - self.status_listeners = set() - self.num_requests_processed = 0 - - def send_status_update_single(self, ws): - status = dict(num_workers_available=len(self.available_workers), num_requests_processed=self.num_requests_processed) - ws.write_message(json.dumps(status)) - - def send_status_update(self): - for ws in self.status_listeners: - self.send_status_update_single(ws) - - def save_reference(self, content_id, content): - refs = {} - try: - with open("reference-content.json") as f: - refs = json.load(f) - except: - pass - refs[content_id] = content - with open("reference-content.json", "w") as f: - json.dump(refs, f, indent=2) - - -class MainHandler(tornado.web.RequestHandler): - def get(self): - current_directory = os.path.dirname(os.path.abspath(__file__)) - parent_directory = os.path.join(current_directory, os.pardir) - readme = os.path.join(parent_directory, "README.md") - self.render(readme) - - -def run_async(func): - @functools.wraps(func) - def async_func(*args, **kwargs): - func_hl = threading.Thread(target=func, args=args, kwargs=kwargs) - func_hl.start() - return func_hl - - return async_func - - -def content_type_to_caps(content_type): - """ - Converts MIME-style raw audio content type specifier to GStreamer CAPS string - """ - default_attributes= {"rate": 16000, "format" : "S16LE", "channels" : 1, "layout" : "interleaved"} - media_type, _, attr_string = content_type.replace(";", ",").partition(",") - if media_type in ["audio/x-raw", "audio/x-raw-int"]: - media_type = "audio/x-raw" - attributes = default_attributes - for (key,_,value) in [p.partition("=") for p in attr_string.split(",")]: - attributes[key.strip()] = value.strip() - return "%s, %s" % (media_type, ", ".join(["%s=%s" % (key, value) for (key,value) in attributes.iteritems()])) - else: - return content_type - - -@tornado.web.stream_request_body -class HttpChunkedRecognizeHandler(tornado.web.RequestHandler): - """ - Provides a HTTP POST/PUT interface supporting chunked transfer requests, similar to that provided by - http://github.com/alumae/ruby-pocketsphinx-server. - """ - - def prepare(self): - self.id = str(uuid.uuid4()) - self.final_hyp = "" - self.final_result_queue = Queue() - self.user_id = self.request.headers.get("device-id", "none") - self.content_id = self.request.headers.get("content-id", "none") - logging.info("%s: OPEN: user='%s', content='%s'" % (self.id, self.user_id, self.content_id)) - self.worker = None - self.error_status = 0 - self.error_message = None - try: - self.worker = self.application.available_workers.pop() - self.application.send_status_update() - logging.info("%s: Using worker %s" % (self.id, self.__str__())) - self.worker.set_client_socket(self) - - content_type = self.request.headers.get("Content-Type", None) - if content_type: - content_type = content_type_to_caps(content_type) - logging.info("%s: Using content type: %s" % (self.id, content_type)) - - self.worker.write_message(json.dumps(dict(id=self.id, content_type=content_type, user_id=self.user_id, content_id=self.content_id))) - except KeyError: - logging.warn("%s: No worker available for client request" % self.id) - self.set_status(503) - self.finish("No workers available") - - def data_received(self, chunk): - assert self.worker is not None - logging.debug("%s: Forwarding client message of length %d to worker" % (self.id, len(chunk))) - self.worker.write_message(chunk, binary=True) - - def post(self, *args, **kwargs): - self.end_request(args, kwargs) - - def put(self, *args, **kwargs): - self.end_request(args, kwargs) - - @run_async - def get_final_hyp(self, callback=None): - logging.info("%s: Waiting for final result..." % self.id) - callback(self.final_result_queue.get(block=True)) - - @tornado.web.asynchronous - @tornado.gen.coroutine - def end_request(self, *args, **kwargs): - logging.info("%s: Handling the end of chunked recognize request" % self.id) - assert self.worker is not None - self.worker.write_message("EOS", binary=True) - logging.info("%s: yielding..." % self.id) - hyp = yield tornado.gen.Task(self.get_final_hyp) - if self.error_status == 0: - logging.info("%s: Final hyp: %s" % (self.id, hyp)) - response = {"status" : 0, "id": self.id, "hypotheses": [{"utterance" : hyp}]} - self.write(response) - else: - logging.info("%s: Error (status=%d) processing HTTP request: %s" % (self.id, self.error_status, self.error_message)) - response = {"status" : self.error_status, "id": self.id, "message": self.error_message} - self.write(response) - self.application.num_requests_processed += 1 - self.application.send_status_update() - self.worker.set_client_socket(None) - self.worker.close() - self.finish() - logging.info("Everything done") - - def send_event(self, event): - event_str = str(event) - if len(event_str) > 100: - event_str = event_str[:97] + "..." - logging.info("%s: Receiving event %s from worker" % (self.id, event_str)) - if event["status"] == 0 and ("result" in event): - try: - if len(event["result"]["hypotheses"]) > 0 and event["result"]["final"]: - if len(self.final_hyp) > 0: - self.final_hyp += " " - self.final_hyp += event["result"]["hypotheses"][0]["transcript"] - except: - e = sys.exc_info()[0] - logging.warn("Failed to extract hypothesis from recognition result:" + e) - elif event["status"] != 0: - self.error_status = event["status"] - self.error_message = event.get("message", "") - - def close(self): - logging.info("%s: Receiving 'close' from worker" % (self.id)) - self.final_result_queue.put(self.final_hyp) - - -class ReferenceHandler(tornado.web.RequestHandler): - def post(self, *args, **kwargs): - content_id = self.request.headers.get("Content-Id") - if content_id: - content = codecs.decode(self.request.body, "utf-8") - user_id = self.request.headers.get("User-Id", "") - self.application.save_reference(content_id, dict(content=content, user_id=user_id, time=time.strftime("%Y-%m-%dT%H:%M:%S"))) - logging.info("Received reference text for content %s and user %s" % (content_id, user_id)) - self.set_header('Access-Control-Allow-Origin', '*') - else: - self.set_status(400) - self.finish("No Content-Id specified") - - def options(self, *args, **kwargs): - self.set_header('Access-Control-Allow-Origin', '*') - self.set_header('Access-Control-Allow-Methods', 'POST, OPTIONS') - self.set_header('Access-Control-Max-Age', 1000) - # note that '*' is not valid for Access-Control-Allow-Headers - self.set_header('Access-Control-Allow-Headers', 'origin, x-csrftoken, content-type, accept, User-Id, Content-Id') - - -class StatusSocketHandler(tornado.websocket.WebSocketHandler): - # needed for Tornado 4.0 - def check_origin(self, origin): - return True - - def open(self): - logging.info("New status listener") - self.application.status_listeners.add(self) - self.application.send_status_update_single(self) - - def on_close(self): - logging.info("Status listener left") - self.application.status_listeners.remove(self) - - -class WorkerSocketHandler(tornado.websocket.WebSocketHandler): - def __init__(self, application, request, **kwargs): - tornado.websocket.WebSocketHandler.__init__(self, application, request, **kwargs) - self.client_socket = None - - # needed for Tornado 4.0 - def check_origin(self, origin): - return True - - def open(self): - self.client_socket = None - self.application.available_workers.add(self) - logging.info("New worker available " + self.__str__()) - self.application.send_status_update() - - def on_close(self): - logging.info("Worker " + self.__str__() + " leaving") - self.application.available_workers.discard(self) - if self.client_socket: - self.client_socket.close() - self.application.send_status_update() - - def on_message(self, message): - assert self.client_socket is not None - event = json.loads(message) - self.client_socket.send_event(event) - - def set_client_socket(self, client_socket): - self.client_socket = client_socket - - -class DecoderSocketHandler(tornado.websocket.WebSocketHandler): - # needed for Tornado 4.0 - def check_origin(self, origin): - return True - - def send_event(self, event): - event["id"] = self.id - event_str = str(event) - if len(event_str) > 100: - event_str = event_str[:97] + "..." - logging.info("%s: Sending event %s to client" % (self.id, event_str)) - self.write_message(json.dumps(event)) - - def open(self): - self.id = str(uuid.uuid4()) - logging.info("%s: OPEN" % (self.id)) - logging.info("%s: Request arguments: %s" % (self.id, " ".join(["%s=\"%s\"" % (a, self.get_argument(a)) for a in self.request.arguments]))) - self.user_id = self.get_argument("user-id", "none", True) - self.content_id = self.get_argument("content-id", "none", True) - self.worker = None - try: - self.worker = self.application.available_workers.pop() - self.application.send_status_update() - logging.info("%s: Using worker %s" % (self.id, self.__str__())) - self.worker.set_client_socket(self) - - content_type = self.get_argument("content-type", None, True) - if content_type: - logging.info("%s: Using content type: %s" % (self.id, content_type)) - - self.worker.write_message(json.dumps(dict(id=self.id, content_type=content_type, user_id=self.user_id, content_id=self.content_id))) - except KeyError: - logging.warn("%s: No worker available for client request" % self.id) - event = dict(status=common.STATUS_NOT_AVAILABLE, message="No decoder available, try again later") - self.send_event(event) - self.close() - - def on_connection_close(self): - logging.info("%s: Handling on_connection_close()" % self.id) - self.application.num_requests_processed += 1 - self.application.send_status_update() - if self.worker: - try: - self.worker.set_client_socket(None) - logging.info("%s: Closing worker connection" % self.id) - self.worker.close() - except: - pass - - def on_message(self, message): - assert self.worker is not None - logging.info("%s: Forwarding client message (%s) of length %d to worker" % (self.id, type(message), len(message))) - if isinstance(message, unicode): - self.worker.write_message(message, binary=False) - else: - self.worker.write_message(message, binary=True) - - -def main(): - logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") - logging.debug('Starting up server') - from tornado.options import options - - tornado.options.parse_command_line() - app = Application() - if os.environ.get('WSS'): - ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) - ssl_ctx.load_cert_chain('/etc/letsencrypt/live/host/cert1.pem', '/etc/letsencrypt/live/host/privkey1.pem') - logging.info('wss') - app.listen(options.port, ssl_options={"certfile": '/etc/letsencrypt/live/host/cert1.pem', "keyfile": '/etc/letsencrypt/live/host/privkey1.pem'}) - else: - logging.info('ws') - app.listen(options.port) - tornado.ioloop.IOLoop.instance().start() - - -if __name__ == "__main__": - main() diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/settings.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/settings.py deleted file mode 100644 index c3471b83b..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/settings.py +++ /dev/null @@ -1,9 +0,0 @@ -''' -Created on Jun 7, 2013 - -@author: tanel -''' - -from tornado.options import define - -define("port", default=8888, help="run on the given port", type=int) diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/test-buffer.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/test-buffer.py deleted file mode 100644 index 262d74f45..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/test-buffer.py +++ /dev/null @@ -1,31 +0,0 @@ -import gi -gi.require_version('Gst', '1.0') -from gi.repository import GObject, Gst - -GObject.threads_init() -Gst.init(None) - -appsrc = Gst.ElementFactory.make("appsrc", "appsrc") -filesink = Gst.ElementFactory.make("filesink", "filesink") -filesink.set_property("location", "test.dat") - -pipeline = Gst.Pipeline() -pipeline.add(appsrc) -pipeline.add(filesink) -appsrc.link(filesink) -pipeline.set_state(Gst.State.PLAYING) - -data = "1234" * 12 -print "Using data: %s" % data - -buf = Gst.Buffer.new_allocate(None, len(data), None) -buf.fill(0, data) -#for (i, c) in enumerate(data): -# buf.memset(i, c, 1) -appsrc.emit("push-buffer", buf) - -pipeline.send_event(Gst.Event.new_eos()) - -result = open("test.dat").read() - -print "Result : %s" % result \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/worker.py b/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/worker.py deleted file mode 100644 index 76437ba1e..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/worker.py +++ /dev/null @@ -1,417 +0,0 @@ -__author__ = 'tanel' - -import logging -import logging.config -import time -import thread -import argparse -from subprocess import Popen, PIPE -from gi.repository import GObject -import yaml -import json -import sys -import locale -import codecs -import zlib -import base64 -import time - - -from ws4py.client.threadedclient import WebSocketClient -import ws4py.messaging - -from decoder import DecoderPipeline -from decoder2 import DecoderPipeline2 -import common - -import os - -empty_response = '' -# unknown_response = 'UNKNOWN' -# lucida_service = os.environ.get("LUCIDA_SERVICE") - -# if not lucida_service == None: -# sys.path.append( -# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) + "/kaldigstserver/gen-py") - -# from thrift.transport import TTransport -# from thrift.transport import TSocket -# from thrift.transport import TSSLSocket -# from thrift.transport import THttpClient -# from thrift.protocol import TBinaryProtocol - -# from commandcenter import CommandCenter -# from commandcenter.ttypes import * - -# lucida_toks = lucida_service.split(':') -# assert len(lucida_toks) == 2 -# socket = TSocket.TSocket(lucida_toks[0], int(lucida_toks[1])) -# transport = TTransport.TBufferedTransport(socket) -# protocol = TBinaryProtocol.TBinaryProtocol(transport) -# client = CommandCenter.Client(protocol) -# transport.open() -# def call_commandcenter(transcript): -# qd = QueryData(textData=transcript) -# return client.handleRequest(qd) - -# else: -# def call_commandcenter(transcript): -# print '@@@@@@@@@@@@@@@', transcript -# return unknown_response - -logger = logging.getLogger(__name__) - -CONNECT_TIMEOUT = 5 -SILENCE_TIMEOUT = 5 -USE_NNET2 = False - -class ServerWebsocket(WebSocketClient): - STATE_CREATED = 0 - STATE_CONNECTED = 1 - STATE_INITIALIZED = 2 - STATE_PROCESSING = 3 - STATE_EOS_RECEIVED = 7 - STATE_CANCELLING = 8 - STATE_FINISHED = 100 - - def __init__(self, uri, decoder_pipeline, post_processor, full_post_processor=None): - self.coachtranscript = empty_response - self.uri = uri - self.decoder_pipeline = decoder_pipeline - self.post_processor = post_processor - self.full_post_processor = full_post_processor - WebSocketClient.__init__(self, url=uri, heartbeat_freq=10) - self.pipeline_initialized = False - self.partial_transcript = "" - if USE_NNET2: - self.decoder_pipeline.set_result_handler(self._on_result) - self.decoder_pipeline.set_full_result_handler(self._on_full_result) - self.decoder_pipeline.set_error_handler(self._on_error) - else: - self.decoder_pipeline.set_word_handler(self._on_word) - self.decoder_pipeline.set_error_handler(self._on_error) - self.decoder_pipeline.set_eos_handler(self._on_eos) - self.state = self.STATE_CREATED - self.last_decoder_message = time.time() - self.request_id = "" - self.timeout_decoder = 5 - self.num_segments = 0 - self.last_partial_result = "" - - def opened(self): - logger.info("Opened websocket connection to server") - self.state = self.STATE_CONNECTED - self.last_partial_result = "" - - def guard_timeout(self): - global SILENCE_TIMEOUT - while self.state in [self.STATE_CONNECTED, self.STATE_INITIALIZED, self.STATE_PROCESSING]: - if time.time() - self.last_decoder_message > SILENCE_TIMEOUT: - logger.warning("%s: More than %d seconds from last decoder hypothesis update, cancelling" % (self.request_id, SILENCE_TIMEOUT)) - self.finish_request() - event = dict(status=common.STATUS_NO_SPEECH) - try: - self.send(json.dumps(event)) - except: - logger.warning("%s: Failed to send error event to master" % (self.request_id)) - self.close() - return - logger.debug("%s: Checking that decoder hasn't been silent for more than %d seconds" % (self.request_id, SILENCE_TIMEOUT)) - time.sleep(1) - - - def received_message(self, m): - logger.debug("%s: Got message from server of type %s" % (self.request_id, str(type(m)))) - if self.state == self.__class__.STATE_CONNECTED: - props = json.loads(str(m)) - content_type = props['content_type'] - self.request_id = props['id'] - self.num_segments = 0 - self.decoder_pipeline.init_request(self.request_id, content_type) - self.last_decoder_message = time.time() - thread.start_new_thread(self.guard_timeout, ()) - logger.info("%s: Started timeout guard" % self.request_id) - logger.info("%s: Initialized request" % self.request_id) - self.state = self.STATE_INITIALIZED - elif m.data == "EOS": - if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: - self.decoder_pipeline.end_request() - self.state = self.STATE_EOS_RECEIVED - else: - logger.info("%s: Ignoring EOS, worker already in state %d" % (self.request_id, self.state)) - else: - if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: - if isinstance(m, ws4py.messaging.BinaryMessage): - self.decoder_pipeline.process_data(m.data) - self.state = self.STATE_PROCESSING - elif isinstance(m, ws4py.messaging.TextMessage): - props = json.loads(str(m)) - if 'adaptation_state' in props: - as_props = props['adaptation_state'] - if as_props.get('type', "") == "string+gzip+base64": - adaptation_state = zlib.decompress(base64.b64decode(as_props.get('value', ''))) - logger.info("%s: Setting adaptation state to user-provided value" % (self.request_id)) - self.decoder_pipeline.set_adaptation_state(adaptation_state) - else: - logger.warning("%s: Cannot handle adaptation state type " % (self.request_id, as_props.get('type', ""))) - else: - logger.warning("%s: Got JSON message but don't know what to do with it" % (self.request_id)) - else: - logger.info("%s: Ignoring data, worker already in state %d" % (self.request_id, self.state)) - - - def finish_request(self): - if self.state == self.STATE_CONNECTED: - # connection closed when we are not doing anything - self.decoder_pipeline.finish_request() - self.state = self.STATE_FINISHED - return - if self.state == self.STATE_INITIALIZED: - # connection closed when request initialized but with no data sent - self.decoder_pipeline.finish_request() - self.state = self.STATE_FINISHED - return - if self.state != self.STATE_FINISHED: - logger.info("%s: Master disconnected before decoder reached EOS?" % self.request_id) - self.state = self.STATE_CANCELLING - self.decoder_pipeline.cancel() - counter = 0 - while self.state == self.STATE_CANCELLING: - counter += 1 - if counter > 30: - # lost hope that the decoder will ever finish, likely it has hung - # FIXME: this might introduce new bugs - logger.info("%s: Giving up waiting after %d tries" % (self.request_id, counter)) - self.state = self.STATE_FINISHED - else: - logger.info("%s: Waiting for EOS from decoder" % self.request_id) - time.sleep(1) - self.decoder_pipeline.finish_request() - logger.info("%s: Finished waiting for EOS" % self.request_id) - - - def closed(self, code, reason=None): - logger.debug("%s: Websocket closed() called" % self.request_id) - self.finish_request() - logger.debug("%s: Websocket closed() finished" % self.request_id) - - def _on_result(self, result, final): - if final: - # final results are handled by _on_full_result() - transcript = result.decode('utf8') - self.coachtranscript += ' ' + transcript - return - self.last_decoder_message = time.time() - if self.last_partial_result == result: - return - self.last_partial_result = result - logger.info("%s: Postprocessing (final=%s) result.." % (self.request_id, final)) - if final: - logger.info("%s: Before postprocessing: %s" % (self.request_id, result)) - processed_transcript = self.post_process(result) - logger.info("%s: Postprocessing done." % self.request_id) - if final: - logger.info("%s: After postprocessing: %s" % (self.request_id, processed_transcript)) - - event = dict(status=common.STATUS_SUCCESS, - segment=self.num_segments, - result=dict(hypotheses=[dict(transcript=processed_transcript)], final=final)) - try: - self.send(json.dumps(event)) - except: - e = sys.exc_info()[1] - logger.warning("Failed to send event to master: %s" % e) - - def _on_full_result(self, full_result_json): - self.last_decoder_message = time.time() - full_result = json.loads(full_result_json) - if full_result.get("status", -1) == common.STATUS_SUCCESS: - #logger.info("%s: Postprocessing (final=%s) result.." % (self.request_id, final)) - logger.debug("%s: Before postprocessing: %s" % (self.request_id, full_result)) - full_result = self.post_process_full(full_result) - logger.info("%s: Postprocessing done." % self.request_id) - logger.debug("%s: After postprocessing: %s" % (self.request_id, full_result)) - - try: - self.send(json.dumps(full_result)) - except: - e = sys.exc_info()[1] - logger.warning("Failed to send event to master: %s" % e) - if full_result.get("result", {}).get("final", True): - self.num_segments += 1 - self.last_partial_result = "" - else: - logger.info("%s: Result status is %d, forwarding the result to the server anyway" % (self.request_id, full_result.get("status", -1))) - try: - self.send(json.dumps(full_result)) - except: - e = sys.exc_info()[1] - logger.warning("Failed to send event to master: %s" % e) - - - def _on_word(self, word): - self.last_decoder_message = time.time() - if word != "<#s>": - if len(self.partial_transcript) > 0: - self.partial_transcript += " " - self.partial_transcript += word - logger.debug("%s: Postprocessing partial result.." % self.request_id) - processed_transcript = self.post_process(self.partial_transcript) - logger.debug("%s: Postprocessing done." % self.request_id) - - event = dict(status=common.STATUS_SUCCESS, - segment=self.num_segments, - result=dict(hypotheses=[dict(transcript=processed_transcript)], final=False)) - self.send(json.dumps(event)) - else: - logger.info("%s: Postprocessing final result.." % self.request_id) - processed_transcript = self.post_process(self.partial_transcript) - logger.info("%s: Postprocessing done." % self.request_id) - event = dict(status=common.STATUS_SUCCESS, - segment=self.num_segments, - result=dict(hypotheses=[dict(transcript=processed_transcript)], final=True)) - self.send(json.dumps(event)) - self.partial_transcript = "" - self.num_segments += 1 - - - def _on_eos(self, data=None): - self.last_decoder_message = time.time() - self.state = self.STATE_FINISHED - #self.send_adaptation_state() - self.close() - - def _on_error(self, error): - self.state = self.STATE_FINISHED - event = dict(status=common.STATUS_NOT_ALLOWED, message=error) - try: - self.send(json.dumps(event)) - except: - e = sys.exc_info()[1] - logger.warning("Failed to send event to master: %s" % e) - self.close() - - # def send_adaptation_state(self): - # if hasattr(self.decoder_pipeline, 'get_adaptation_state'): - # logger.info("%s: Sending adaptation state to client..." % (self.request_id)) - # adaptation_state = self.decoder_pipeline.get_adaptation_state() - - - # print '########################' - # logger.info('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') - # print self.coachtranscript - # logger.info(self.coachtranscript) - - - - - # event = dict(status=common.STATUS_SUCCESS, - # hascoachresponse=True, - # coachresponse=call_commandcenter(self.coachtranscript), - # adaptation_state=dict(id=self.request_id, - # value=base64.b64encode(zlib.compress(adaptation_state)), - # type="string+gzip+base64", - # time=time.strftime("%Y-%m-%dT%H:%M:%S"))) - # self.coachtranscript = empty_response - # try: - # self.send(json.dumps(event)) - # except: - # e = sys.exc_info()[1] - # logger.warning("Failed to send event to master: " + str(e)) - # else: - # logger.info("%s: Adaptation state not supported by the decoder, not sending it." % (self.request_id)) - - - def post_process(self, text): - if self.post_processor: - self.post_processor.stdin.write("%s\n" % text) - self.post_processor.stdin.flush() - text = self.post_processor.stdout.readline() - text = text.strip() - text = text.replace("\\n", "\n") - return text - else: - return text - - def post_process_full(self, full_result): - if self.full_post_processor: - self.full_post_processor.stdin.write("%s\n\n" % json.dumps(full_result)) - self.post_processor.stdin.flush() - lines = [] - while True: - l = self.full_post_processor.stdout.readline() - if not l: break # EOF - if l.strip() == "": - break - lines.append(l) - full_result = json.loads("".join(lines)) - - elif self.post_processor: - for hyp in full_result.get("result", {}).get("hypotheses", []): - hyp["original-transcript"] = hyp["transcript"] - hyp["transcript"] = self.post_process(hyp["transcript"]) - return full_result - - - - -def main(): - logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") - logging.debug('Starting up worker') - parser = argparse.ArgumentParser(description='Worker for kaldigstserver') - parser.add_argument('-u', '--uri', default="ws://localhost:8888/worker/ws/speech", dest="uri", help="Server<-->worker websocket URI") - parser.add_argument('-f', '--fork', default=1, dest="fork", type=int) - parser.add_argument('-c', '--conf', dest="conf", help="YAML file with decoder configuration") - - args = parser.parse_args() - - if args.fork > 1: - import tornado.process - - logging.info("Forking into %d processes" % args.fork) - tornado.process.fork_processes(args.fork) - - conf = {} - if args.conf: - with open(args.conf) as f: - conf = yaml.safe_load(f) - - if "logging" in conf: - logging.config.dictConfig(conf["logging"]) - - global USE_NNET2 - USE_NNET2 = conf.get("use-nnet2", False) - - global SILENCE_TIMEOUT - SILENCE_TIMEOUT = conf.get("silence-timeout", 5) - if USE_NNET2: - decoder_pipeline = DecoderPipeline2(conf) - else: - decoder_pipeline = DecoderPipeline(conf) - - post_processor = None - if "post-processor" in conf: - post_processor = Popen(conf["post-processor"], shell=True, stdin=PIPE, stdout=PIPE) - - full_post_processor = None - if "full-post-processor" in conf: - full_post_processor = Popen(conf["full-post-processor"], shell=True, stdin=PIPE, stdout=PIPE) - - - loop = GObject.MainLoop() - thread.start_new_thread(loop.run, ()) - while True: - ws = ServerWebsocket(args.uri, decoder_pipeline, post_processor, full_post_processor=full_post_processor) - try: - logger.info("Opening websocket connection to master server") - ws.connect() - ws.run_forever() - except Exception: - logger.error("Couldn't connect to server, waiting for %d seconds", CONNECT_TIMEOUT) - time.sleep(CONNECT_TIMEOUT) - # fixes a race condition - time.sleep(1) - -if __name__ == "__main__": - main() - diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/sample_english_nnet2.yaml b/lucida/speechrecognition/kaldi_gstreamer_asr/sample_english_nnet2.yaml deleted file mode 100644 index 35945ed82..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/sample_english_nnet2.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# You have to download TEDLIUM "online nnet2" models in order to use this sample -# Run download-tedlium-nnet2.sh in 'test/models' to download them. -use-nnet2: True -decoder: - # All the properties nested here correspond to the kaldinnet2onlinedecoder GStreamer plugin properties. - # Use gst-inspect-1.0 ./libgstkaldionline2.so kaldinnet2onlinedecoder to discover the available properties - use-threaded-decoder: true - model : test/models/english/fisher_nnet_a_gpu_online/final.mdl - word-syms : test/models/english/fisher_nnet_a_gpu_online/words.txt - fst : test/models/english/fisher_nnet_a_gpu_online/HCLG.fst - mfcc-config : test/models/english/fisher_nnet_a_gpu_online/conf/mfcc.conf - ivector-extraction-config : test/models/english/fisher_nnet_a_gpu_online/conf/ivector_extractor.fixed.conf - max-active: 10000 - beam: 10.0 - lattice-beam: 6.0 - acoustic-scale: 0.083 - do-endpointing : true - endpoint-silence-phones : "1:2:3:4:5:6:7:8:9:10" - traceback-period-in-secs: 0.25 - chunk-length-in-secs: 0.25 - num-nbest: 10 - #Additional functionality that you can play with: - #lm-fst: test/models/english/tedlium_nnet_ms_sp_online/G.fst - #big-lm-const-arpa: test/models/english/tedlium_nnet_ms_sp_online/G.carpa - #phone-syms: test/models/english/tedlium_nnet_ms_sp_online/phones.txt - #word-boundary-file: test/models/english/tedlium_nnet_ms_sp_online/word_boundary.int - #do-phone-alignment: true -out-dir: tmp - -use-vad: False -silence-timeout: 10 - -# Just a sample post-processor that appends "." to the hypothesis -post-processor: perl -npe 'BEGIN {use IO::Handle; STDOUT->autoflush(1);} s/(.*)/\1./;' - -# A sample full post processor that add a confidence score to 1-best hyp and deletes other n-best hyps -full-post-processor: ./sample_full_post_processor.py - -logging: - version : 1 - disable_existing_loggers: False - formatters: - simpleFormater: - format: '%(asctime)s - %(levelname)7s: %(name)10s: %(message)s' - datefmt: '%Y-%m-%d %H:%M:%S' - handlers: - console: - class: logging.StreamHandler - formatter: simpleFormater - level: DEBUG - root: - level: DEBUG - handlers: [console] - diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/sample_full_post_processor.py b/lucida/speechrecognition/kaldi_gstreamer_asr/sample_full_post_processor.py deleted file mode 100755 index b6fe2a76b..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/sample_full_post_processor.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python -''' -Sample script that shows how to postprocess full results from the kaldi-gstreamer-worker, encoded as JSON. - -It adds a sentence confidence score to the 1-best hypothesis, deletes all other hypotheses and -adds a dot (.) to the end of the 1-best hypothesis. It assumes that the results contain at least two hypotheses, -The confidence scores are now normalized -''' - -import sys -import json -import logging -from math import exp - -def post_process_json(str): - try: - event = json.loads(str) - if "result" in event: - if len(event["result"]["hypotheses"]) > 1: - likelihood1 = event["result"]["hypotheses"][0]["likelihood"] - likelihood2 = event["result"]["hypotheses"][1]["likelihood"] - confidence = likelihood1 - likelihood2 - confidence = 1 - exp(-confidence) - else: - confidence = 1.0e+10; - event["result"]["hypotheses"][0]["confidence"] = confidence - - event["result"]["hypotheses"][0]["transcript"] += "." - del event["result"]["hypotheses"][1:] - return json.dumps(event) - except: - exc_type, exc_value, exc_traceback = sys.exc_info() - logging.error("Failed to process JSON result: %s : %s " % (exc_type, exc_value)) - return str - - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG, format="%(levelname)8s %(asctime)s %(message)s ") - - lines = [] - while True: - l = sys.stdin.readline() - if not l: break # EOF - if l.strip() == "": - if len(lines) > 0: - result_json = post_process_json("".join(lines)) - print result_json - print - sys.stdout.flush() - lines = [] - else: - lines.append(l) - - if len(lines) > 0: - result_json = post_process_json("".join(lines)) - print result_json - lines = [] diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/sample_worker.yaml b/lucida/speechrecognition/kaldi_gstreamer_asr/sample_worker.yaml deleted file mode 100644 index ef3fa13ae..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/sample_worker.yaml +++ /dev/null @@ -1,29 +0,0 @@ -timeout-decoder : 10 -decoder: - model: test/models/english/voxforge/tri2b_mmi_b0.05/final.mdl - lda-mat: test/models/english/voxforge/tri2b_mmi_b0.05/final.mat - word-syms: test/models/english/voxforge/tri2b_mmi_b0.05/words.txt - fst: test/models/english/voxforge/tri2b_mmi_b0.05/HCLG.fst - silence-phones: "1:2:3:4:5" -out-dir: tmp - -use-vad: False -silence-timeout: 60 - -# Just a sample post-processor that appends "." to the hypothesis -post-processor: perl -npe 'BEGIN {use IO::Handle; STDOUT->autoflush(1);} s/(.*)/\1./;' -logging: - version : 1 - disable_existing_loggers: False - formatters: - simpleFormater: - format: '%(asctime)s - %(levelname)7s: %(name)10s: %(message)s' - datefmt: '%Y-%m-%d %H:%M:%S' - handlers: - console: - class: logging.StreamHandler - formatter: simpleFormater - level: DEBUG - root: - level: DEBUG - handlers: [console] diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/simple_start.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/simple_start.sh deleted file mode 100755 index 678b34468..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/simple_start.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -export GST_PLUGIN_PATH=$(pwd)/kaldi/tools/gst-kaldi-nnet2-online/src - -python kaldigstserver/worker.py -u ${ASR_ADDR_PORT}/worker/ws/speech -c sample_english_nnet2.yaml diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/start.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/start.sh deleted file mode 100755 index 12402d2c7..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/start.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -MASTER="localhost" -PORT=8888 - -usage(){ - echo "Creates a worker and connects it to a master."; - echo "If the master address is not given, a master will be created at localhost:80"; - echo "Usage: $0 -y yaml_file [-m master address] [-p port number]"; -} - -while getopts "h?m:p:y:" opt; do - case "$opt" in - h|\?) - usage - exit 0 - ;; - m) MASTER=$OPTARG - ;; - p) PORT=$OPTARG - ;; - y) YAML=$OPTARG - ;; - esac -done - -#yaml file must be specified -if [ "$YAML" == "" ] ; then - usage; - exit 1; -fi; - - -if [ "$MASTER" == "localhost" ] ; then - # start a local master - python /kaldigstserver/master_server.py --port="$PORT" 2>> master.log & -fi - -#start worker and connect it to the master -export GST_PLUGIN_PATH=kaldi/tools/gst-kaldi-nnet2-online/src/:kaldi/src/gst-plugin/ - -python kaldigstserver/worker.py -c "$YAML" -u ws://"$MASTER:$PORT"/worker/ws/speech 2>> worker.log & diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/static/status.html b/lucida/speechrecognition/kaldi_gstreamer_asr/static/status.html deleted file mode 100644 index 26e4619c8..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/static/status.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - -Speech server status:
- - - - - \ No newline at end of file diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/stop.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/stop.sh deleted file mode 100755 index 575bbd3c8..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/stop.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -#kill worker -ps axf | grep worker.py | grep -v grep | awk '{print "kill -15 " $1}' | sh - -#kill master -ps axf | grep master_server.py | grep -v grep | awk '{print "kill -15 " $1}' | sh - diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/test/data/bill_gates-TED.mp3 b/lucida/speechrecognition/kaldi_gstreamer_asr/test/data/bill_gates-TED.mp3 deleted file mode 100644 index 06cbb26ac..000000000 Binary files a/lucida/speechrecognition/kaldi_gstreamer_asr/test/data/bill_gates-TED.mp3 and /dev/null differ diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/test/models/download-fisher-nnet2.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/test/models/download-fisher-nnet2.sh deleted file mode 100755 index 2125c7173..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/test/models/download-fisher-nnet2.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_english/s5 - -MODEL=exp/nnet2_online/nnet_a_gpu_online -GRAPH=exp/tri5a - -modeldir=$(dirname "$0")/english/fisher_nnet_a_gpu_online - -mkdir -p "$modeldir" - -cd "$modeldir" - -wget -N $BASE_URL/$MODEL/final.mdl -if [ $? -ne 0 ]; then rm -rf final.mdl; exit 1; fi - -(mkdir -p ivector_extractor; cd ivector_extractor; wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}) -if [ $? -ne 0 ]; then rm -rf ivector_extractor; exit 1; fi -(mkdir -p conf; cd conf; wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}) -if [ $? -ne 0 ]; then rm -rf conf; exit 1; fi - -wget -N $BASE_URL/$GRAPH/graph/HCLG.fst -if [ $? -ne 0 ]; then rm -rf HCLG.fst; exit 1; fi -wget -N $BASE_URL/$GRAPH/graph/words.txt -if [ $? -ne 0 ]; then rm -rf words.txt; exit 1; fi - - -cat conf/ivector_extractor.conf | perl -npe "s/=.*nnet_a_gpu_online\//=test\/models\/english\/fisher_nnet_a_gpu_online\//" > conf/ivector_extractor.fixed.conf - -cd - diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/test/models/download-tedlium-nnet2.sh b/lucida/speechrecognition/kaldi_gstreamer_asr/test/models/download-tedlium-nnet2.sh deleted file mode 100755 index ca2a54061..000000000 --- a/lucida/speechrecognition/kaldi_gstreamer_asr/test/models/download-tedlium-nnet2.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -curl https://phon.ioc.ee/~tanela/tedlium_nnet_ms_sp_online.tgz | tar zxv diff --git a/lucida/speechrecognition/src/common.py b/lucida/speechrecognition/src/common.py new file mode 100644 index 000000000..840f48c75 --- /dev/null +++ b/lucida/speechrecognition/src/common.py @@ -0,0 +1,13 @@ + +SUCCESS_OK = 0 + +ERROR_GENERIC = 1 +ERROR_INTERNET = 2 +ERROR_TIMEOUT = 3 +ERROR_CRITICAL = 4 +ERROR_MAX_RETRIES = 5 + +WARN_GENERIC = 1 +WARN_ABORTED = 2 +WARN_INTERNET = 3 +WARN_TIMEOUT = 4 diff --git a/lucida/speechrecognition/src/configuration.py b/lucida/speechrecognition/src/configuration.py new file mode 100644 index 000000000..4c473b39b --- /dev/null +++ b/lucida/speechrecognition/src/configuration.py @@ -0,0 +1,180 @@ +# configuration.py + +import logging, logging.config, yaml +import os, sys, errno +import click +import re +from urlparse import urlparse +import json + +from logger import * + +logger = logging.getLogger("CONFIG") + +GST_DEBUG = os.environ.get('GST_DEBUG') +CONFIG = dict( + master = "http://localhost:8081", + retry_after = 5, + max_segment_duration = 180, + max_call_duration = 3600, + silence_timeout = 2, + initial_silence_timeout = 5, + silence_threshold = -20, + response_timeout = 30, + worker_verbosity = "info", + gstreamer_verbosity = "none", + + config_options = dict( + master_prompt = "URL for Lucida commandcenter", + retry_after_prompt = "Number of seconds to wait between subsequent reconnects to master", retry_after_min = 1, retry_after_max = 30, + max_segment_duration_prompt = "Maximum length of audio segment in seconds", max_segment_duration_min = 10, max_segment_duration_max = 300, + max_call_duration_prompt = "Maximum length of audio segment in seconds", max_call_duration_min = 60, max_call_duration_max = 18000, + silence_timeout_prompt = "Maximum silence (in seconds) that should be tolerated", silence_timeout_min = 1, silence_timeout_max = 10, + initial_silence_timeout_prompt = "Maximum initial silence (in seconds) that should be tolerated", initial_silence_timeout_min = 1, initial_silence_timeout_max = 30, + silence_threshold_prompt = "Audio segments with RMS below this value will be considered silent", silence_threshold_min = -100, silence_threshold_max = 100, + response_timeout_prompt = "Maximum time (in seconds) between end of speech and receiving of transcription that should be tolerated", response_timeout_min = 5, response_timeout_max = 60, + worker_verbosity_prompt = "Verbosity level for worker", worker_verbosity_choices = ["critical", "error", "warning", "info", "debug"], + gstreamer_verbosity_prompt = "Verbosity level for GStreamer", gstreamer_verbosity_choices = ["none", "error", "warning", "fixme", "info", "debug", "log", "trace", "memdump"] + ) +) + +def validate_master(ctx, param, value): + if value.startswith("localhost"): + value = "http://" + value + if value.startswith("//"): + value = "http:" + value + scheme = re.compile('[A-Za-z0-9]{2,5}://') + if scheme.match(value) == None: + value = "http://" + value + value = urlparse(value) + if value.hostname == None: + if ctx == None: + logger.error("Invalid URL provided for Lucida commandcenter!!!") + return False + raise click.BadParameter("Invalid URL provided for Lucida commandcenter!!!") + if value.port != None: + url = value.hostname + ":" + str(value.port) + "/worker/ws/speech" + else: + url = value.hostname + "/worker/ws/speech" + if value.scheme == "http" or value.scheme == "ws": + url = "ws://" + url + elif value.scheme == "https" or value.scheme == "wss": + url = "wss://" + url + else: + if ctx == None: + logger.error("Unrecognized scheme '%s' while parsing URL for Lucida commandcenter!!!" % (value.scheme)) + return False + raise click.BadParameter("Unrecognized scheme '%s' while parsing URL for Lucida commandcenter!!!" % (value.scheme)) + return url + +@click.command() +@click.option("--master", prompt=CONFIG['config_options']['master_prompt'], + default=CONFIG['master'], type=click.STRING, callback=validate_master, required=True, show_default=True, help=CONFIG['config_options']['master_prompt']) +@click.option("--retry-after", prompt=CONFIG['config_options']['retry_after_prompt'], + default=CONFIG['retry_after'], type=click.IntRange(min=CONFIG['config_options']['retry_after_min'], max=CONFIG['config_options']['retry_after_max']), + required=True, show_default=True, help=CONFIG['config_options']['retry_after_prompt']) +@click.option("--max-segment-duration", prompt=CONFIG['config_options']['max_segment_duration_prompt'], + default=CONFIG['max_segment_duration'], type=click.IntRange(min=CONFIG['config_options']['max_segment_duration_min'], max=CONFIG['config_options']['max_segment_duration_max']), + required=True, show_default=True, help=CONFIG['config_options']['max_segment_duration_prompt']) +@click.option("--max-call-duration", prompt=CONFIG['config_options']['max_call_duration_prompt'], + default=CONFIG['max_call_duration'], type=click.IntRange(min=CONFIG['config_options']['max_call_duration_min'], max=CONFIG['config_options']['max_call_duration_max']), + required=True, show_default=True, help=CONFIG['config_options']['max_call_duration_prompt']) +@click.option("--silence-timeout", prompt=CONFIG['config_options']['silence_timeout_prompt'], + default=CONFIG['silence_timeout'], type=click.IntRange(min=CONFIG['config_options']['silence_timeout_min'], max=CONFIG['config_options']['silence_timeout_max']), + required=True, show_default=True, help=CONFIG['config_options']['silence_timeout_prompt']) +@click.option("--initial-silence-timeout", prompt=CONFIG['config_options']['initial_silence_timeout_prompt'], + default=CONFIG['initial_silence_timeout'], type=click.IntRange(min=CONFIG['config_options']['initial_silence_timeout_min'], + max=CONFIG['config_options']['initial_silence_timeout_max']), required=True, show_default=True, help=CONFIG['config_options']['initial_silence_timeout_prompt']) +@click.option("--silence-threshold", prompt=CONFIG['config_options']['silence_threshold_prompt'], + default=CONFIG['silence_threshold'], type=click.IntRange(min=CONFIG['config_options']['silence_threshold_min'], max=CONFIG['config_options']['silence_threshold_max']), + required=True, show_default=True, help=CONFIG['config_options']['silence_threshold_prompt']) +@click.option("--response-timeout", prompt=CONFIG['config_options']['response_timeout_prompt'], + default=CONFIG['response_timeout'], type=click.IntRange(min=CONFIG['config_options']['response_timeout_min'], max=CONFIG['config_options']['response_timeout_max']), + required=True, show_default=True, help=CONFIG['config_options']['response_timeout_prompt']) +@click.option("--worker-verbosity", prompt=(CONFIG['config_options']['worker_verbosity_prompt'] + " " + str(CONFIG['config_options']['worker_verbosity_choices'])), + default=CONFIG['worker_verbosity'], type=click.Choice(CONFIG['config_options']['worker_verbosity_choices']), required=True, + show_default=True, help=CONFIG['config_options']['worker_verbosity_prompt']) +@click.option("--gstreamer-verbosity", prompt=(CONFIG['config_options']['gstreamer_verbosity_prompt'] + " " + str(CONFIG['config_options']['gstreamer_verbosity_choices'])), + default=CONFIG['gstreamer_verbosity'], type=click.Choice(CONFIG['config_options']['gstreamer_verbosity_choices']), required=True, + show_default=True, help=CONFIG['config_options']['gstreamer_verbosity_prompt']) +def first_run(master, retry_after, max_segment_duration, max_call_duration, silence_timeout, initial_silence_timeout, silence_threshold, response_timeout, worker_verbosity, gstreamer_verbosity): + conf = dict( + master = master, + retry_after = retry_after, + max_segment_duration = max_segment_duration, + max_call_duration = max_call_duration, + silence_timeout = silence_timeout, + initial_silence_timeout = initial_silence_timeout, + silence_threshold = silence_threshold, + response_timeout = response_timeout, + worker_verbosity = worker_verbosity, + gstreamer_verbosity = gstreamer_verbosity + ) + conf_str = ( + "# Configuration file for speech recognition worker\n\n" + "# " + CONFIG['config_options']['master_prompt'] + "\n" + "master: '" + conf['master'] + "'\n\n" + "# " + CONFIG['config_options']['retry_after_prompt'] + "\n" + "retry_after: " + str(conf['retry_after']) + "\n\n" + "# " + CONFIG['config_options']['max_segment_duration_prompt'] + "\n" + "max_segment_duration: " + str(conf['max_segment_duration']) + "\n\n" + "# " + CONFIG['config_options']['max_call_duration_prompt'] + "\n" + "max_call_duration: " + str(conf['max_call_duration']) + "\n\n" + "# " + CONFIG['config_options']['silence_timeout_prompt'] + "\n" + "silence_timeout: " + str(conf['silence_timeout']) + "\n\n" + "# " + CONFIG['config_options']['initial_silence_timeout_prompt'] + "\n" + "initial_silence_timeout: " + str(conf['initial_silence_timeout']) + "\n\n" + "# " + CONFIG['config_options']['response_timeout_prompt'] + "\n" + "response_timeout: " + str(conf['response_timeout']) + "\n\n" + "# " + CONFIG['config_options']['silence_threshold_prompt'] + "\n" + "silence_threshold: " + str(conf['silence_threshold']) + "\n\n" + "# " + CONFIG['config_options']['worker_verbosity_prompt'] + " " + str(CONFIG['config_options']['worker_verbosity_choices']) + "\n" + "worker_verbosity: '" + conf['worker_verbosity'] + "'\n\n" + "# " + CONFIG['config_options']['gstreamer_verbosity_prompt'] + " " + str(CONFIG['config_options']['gstreamer_verbosity_choices']) + "\n" + "gstreamer_verbosity: '" + conf['gstreamer_verbosity'] + "'" + ) + with open("worker_config.yaml", "w") as worker_config: + worker_config.write(conf_str) + logger.warn("Saving configuration to file and quitting...") + sys.exit(0) + +def process(conf): + conf['worker_verbosity'] = logging.getLevelName(conf['worker_verbosity'].upper()) + if GST_DEBUG == None: + conf['gstreamer_verbosity'] = "asrplugin:" + str(CONFIG['config_options']['gstreamer_verbosity_choices'].index(conf['gstreamer_verbosity'])) + else: + logger.warning("Using GST_DEBUG='%s' value from environment. If you don't want this unset GST_DEBUG before running this script" % GST_DEBUG) + conf['gstreamer_verbosity'] = GST_DEBUG + if ( not validate_master(None, None, conf['master']) or + not isinstance( conf['silence_timeout'], int ) or not isinstance( conf['initial_silence_timeout'], int ) or + not isinstance( conf['response_timeout'], int ) or not isinstance( conf['worker_verbosity'], int ) or + not isinstance( conf['silence_threshold'], int ) or not isinstance( conf['retry_after'], int ) or + not isinstance( conf['max_segment_duration'], int ) or not isinstance( conf['max_call_duration'], int ) or + conf['silence_timeout'] > CONFIG['config_options']['silence_timeout_max'] or conf['silence_timeout'] < CONFIG['config_options']['silence_timeout_min'] or + conf['initial_silence_timeout'] > CONFIG['config_options']['initial_silence_timeout_max'] or conf['initial_silence_timeout'] < CONFIG['config_options']['initial_silence_timeout_min'] or + conf['response_timeout'] > CONFIG['config_options']['response_timeout_max'] or conf['response_timeout'] < CONFIG['config_options']['response_timeout_min'] or + conf['retry_after'] > CONFIG['config_options']['retry_after_max'] or conf['retry_after'] < CONFIG['config_options']['retry_after_min'] or + conf['silence_threshold'] > CONFIG['config_options']['silence_threshold_max'] or conf['silence_threshold'] < CONFIG['config_options']['silence_threshold_min'] or + conf['max_segment_duration'] > CONFIG['config_options']['max_segment_duration_max'] or conf['max_segment_duration'] < CONFIG['config_options']['max_segment_duration_min'] or + conf['max_call_duration'] > CONFIG['config_options']['max_call_duration_max'] or conf['max_call_duration'] < CONFIG['config_options']['max_call_duration_min'] ): + raise ValueError + return conf + +def load(): + try: +# with open("logger_config.yaml") as f: +# logging.config.dictConfig(yaml.safe_load(f)) + logger.info("Loaded logger configuration from logger_config.yaml") + except Exception as e: + logger.error("Error while loading configuration from logger_config.yaml: %s" % (e)) + + global CONFIG + + conf = dict() + try: + with open("worker_config.yaml") as f: + conf = process(yaml.safe_load(f)) + logger.info("Loaded worker configuration from worker_config.yaml: %s" % str(conf)) + except: + first_run() + return conf diff --git a/lucida/speechrecognition/src/get_free_port b/lucida/speechrecognition/src/get_free_port new file mode 100755 index 000000000..a26fb5f94 --- /dev/null +++ b/lucida/speechrecognition/src/get_free_port @@ -0,0 +1,7 @@ +#!/bin/bash +read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range +while :; do + PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`" + ss -lpn | grep -q ":$PORT " || break +done +echo $PORT diff --git a/lucida/speechrecognition/src/gstplugin/.gitignore b/lucida/speechrecognition/src/gstplugin/.gitignore new file mode 100644 index 000000000..82d454711 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/.gitignore @@ -0,0 +1,16 @@ +# Ignore everything +* + +# But not these files... +!.gitignore +!AUTHORS +!autogen.sh +!ChangeLog +!configure.ac +!COPYING +!Makefile.am +!NEWS +!README +!src/Makefile.am +!src/gstasrplugin.c +!src/gstasrplugin.h diff --git a/lucida/speechrecognition/src/gstplugin/AUTHORS b/lucida/speechrecognition/src/gstplugin/AUTHORS new file mode 100644 index 000000000..1bb7449a0 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/AUTHORS @@ -0,0 +1 @@ +Thomas Vander Stichele diff --git a/lucida/speechrecognition/src/gstplugin/COPYING b/lucida/speechrecognition/src/gstplugin/COPYING new file mode 100644 index 000000000..09ec995da --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/COPYING @@ -0,0 +1,2 @@ +Put your license in here! + diff --git a/lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/__init__.py b/lucida/speechrecognition/src/gstplugin/ChangeLog similarity index 100% rename from lucida/speechrecognition/kaldi_gstreamer_asr/kaldigstserver/__init__.py rename to lucida/speechrecognition/src/gstplugin/ChangeLog diff --git a/lucida/speechrecognition/src/gstplugin/Makefile.am b/lucida/speechrecognition/src/gstplugin/Makefile.am new file mode 100644 index 000000000..5ff2f5860 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +EXTRA_DIST = autogen.sh diff --git a/lucida/speechrecognition/src/gstplugin/NEWS b/lucida/speechrecognition/src/gstplugin/NEWS new file mode 100644 index 000000000..3474a99e2 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/NEWS @@ -0,0 +1 @@ +Nothing much yet. diff --git a/lucida/speechrecognition/src/gstplugin/README b/lucida/speechrecognition/src/gstplugin/README new file mode 100644 index 000000000..190568447 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/README @@ -0,0 +1,34 @@ +WHAT IT IS +---------- + +gst-plugin is a template for writing your own GStreamer plug-in. + +The code is deliberately kept simple so that you quickly understand the basics +of how to set up autotools and your source tree. + +This template demonstrates : +- what to do in autogen.sh +- how to setup configure.ac (your package name and version, GStreamer flags) +- how to setup your source dir +- what to put in Makefile.am + +More features and templates might get added later on. + +HOW TO USE IT +------------- + +To use it, either make a copy for yourself and rename the parts or use the +make_element script in tools. To create sources for "myfilter" based on the +"gsttransform" template run: + +cd src; +../tools/make_element myfilter gsttransform + +This will create gstmyfilter.c and gstmyfilter.h. Open them in an editor and +start editing. There are several occurances of the string "template", update +those with real values. The plugin will be called 'myfilter' and it will have +one element called 'myfilter' too. Also look for "FIXME:" markers that point you +to places where you need to edit the code. + +You still need to adjust the Makefile.am. + diff --git a/lucida/speechrecognition/src/gstplugin/autogen.sh b/lucida/speechrecognition/src/gstplugin/autogen.sh new file mode 100755 index 000000000..9df38d3fa --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/autogen.sh @@ -0,0 +1,18 @@ +#!/bin/sh +# you can either set the environment variables AUTOCONF, AUTOHEADER, AUTOMAKE, +# ACLOCAL, AUTOPOINT and/or LIBTOOLIZE to the right versions, or leave them +# unset and get the defaults + +autoreconf --verbose --force --install --make || { + echo 'autogen.sh failed'; + exit 1; +} + +./configure || { + echo 'configure failed'; + exit 1; +} + +echo +echo "Now type 'make' to compile this module." +echo diff --git a/lucida/speechrecognition/src/gstplugin/configure.ac b/lucida/speechrecognition/src/gstplugin/configure.ac new file mode 100644 index 000000000..626376cb8 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/configure.ac @@ -0,0 +1,90 @@ +dnl required version of autoconf +AC_PREREQ([2.53]) + +dnl fill in your package name and package version here +AC_INIT([asrplugin],[1.0.0],[https://github.com/claritylab/lucida/issues],[asrplugin],[https://github.com/claritylab/lucida/tree/master/lucida/speechrecognition]) +AC_DEFINE([DESCRIPTION],["Generic speech to text converter for Lucida AI"],[Description of package]) + +dnl required versions of gstreamer and plugins-base +GST_REQUIRED=1.0.0 +GSTPB_REQUIRED=1.0.0 + +AC_CONFIG_SRCDIR([src/gstasrplugin.c]) +AC_CONFIG_HEADERS([config.h]) + +dnl required version of automake +AM_INIT_AUTOMAKE([1.10]) + +dnl enable mainainer mode by default +AM_MAINTAINER_MODE([enable]) + +dnl check for tools (compiler etc.) +AC_PROG_CC + +dnl required version of libtool +LT_PREREQ([2.2.6]) +LT_INIT + +dnl give error and exit if we don't have pkgconfig +AC_CHECK_PROG(HAVE_PKGCONFIG, pkg-config, [ ], [ + AC_MSG_ERROR([You need to have pkg-config installed!]) +]) + +dnl Check for the required version of GStreamer core (and gst-plugins-base) +dnl This will export GST_CFLAGS and GST_LIBS variables for use in Makefile.am +dnl +dnl If you need libraries from gst-plugins-base here, also add: +dnl for libgstaudio-1.0: gstreamer-audio-1.0 >= $GST_REQUIRED +dnl for libgstvideo-1.0: gstreamer-video-1.0 >= $GST_REQUIRED +dnl for libgsttag-1.0: gstreamer-tag-1.0 >= $GST_REQUIRED +dnl for libgstpbutils-1.0: gstreamer-pbutils-1.0 >= $GST_REQUIRED +dnl for libgstfft-1.0: gstreamer-fft-1.0 >= $GST_REQUIRED +dnl for libgstinterfaces-1.0: gstreamer-interfaces-1.0 >= $GST_REQUIRED +dnl for libgstrtp-1.0: gstreamer-rtp-1.0 >= $GST_REQUIRED +dnl for libgstrtsp-1.0: gstreamer-rtsp-1.0 >= $GST_REQUIRED +dnl etc. +PKG_CHECK_MODULES(GST, [ + gstreamer-1.0 >= $GST_REQUIRED + gstreamer-base-1.0 >= $GST_REQUIRED + gstreamer-controller-1.0 >= $GST_REQUIRED + gstreamer-audio-1.0 >= $GST_REQUIRED + jansson +], [ + AC_SUBST(GST_CFLAGS) + AC_SUBST(GST_LIBS) +], [ + AC_MSG_ERROR([ + You need to install or upgrade the GStreamer development + packages on your system. On debian-based systems these are + libgstreamer1.0-dev and libgstreamer-plugins-base1.0-dev. + on RPM-based systems gstreamer1.0-devel, libgstreamer1.0-devel + or similar. The minimum version required is $GST_REQUIRED. + ]) +]) + +dnl check if compiler understands -Wall (if yes, add -Wall to GST_CFLAGS) +AC_MSG_CHECKING([to see if compiler understands -Wall]) +save_CFLAGS="$CFLAGS" +CFLAGS="$CFLAGS -Wall" +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ ], [ ])], [ + GST_CFLAGS="$GST_CFLAGS -Wall" + AC_MSG_RESULT([yes]) +], [ + AC_MSG_RESULT([no]) +]) + +dnl set the plugindir where plugins should be installed (for src/Makefile.am) +if test "x${prefix}" = "x$HOME"; then + plugindir="$HOME/.gstreamer-1.0/plugins" +else + plugindir="\$(libdir)/gstreamer-1.0" +fi +AC_SUBST(plugindir) + +dnl set proper LDFLAGS for plugins +GST_PLUGIN_LDFLAGS='-module -avoid-version -export-symbols-regex [_]*\(gst_\|Gst\|GST_\).*' +AC_SUBST(GST_PLUGIN_LDFLAGS) + +AC_CONFIG_FILES([Makefile src/Makefile]) +AC_OUTPUT + diff --git a/lucida/speechrecognition/src/gstplugin/src/Makefile.am b/lucida/speechrecognition/src/gstplugin/src/Makefile.am new file mode 100644 index 000000000..59858a975 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/src/Makefile.am @@ -0,0 +1,14 @@ +# Note: plugindir is set in configure +plugin_LTLIBRARIES = libgstasrplugin.la + +# sources used to compile this plug-in +libgstasrplugin_la_SOURCES = gstasrplugin.c gstasrplugin.h ../../../include/gen-c_glib/a_s_r_thrift_service.h ../../../include/gen-c_glib/asrthriftservice_types.h ../../../include/gen-c_glib/a_s_r_thrift_service.c ../../../include/gen-c_glib/asrthriftservice_types.c + +# compiler and linker flags used to compile this plugin, set in configure.ac +libgstasrplugin_la_CFLAGS = $(GST_CFLAGS) -I ../../../include -I ../../../include/gen-c_glib -I/usr/local/include/thrift +libgstasrplugin_la_LIBADD = $(GST_LIBS) +libgstasrplugin_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) -L/usr/local/lib -lthrift -lthrift_c_glib -lthriftprotocol +libgstasrplugin_la_LIBTOOLFLAGS = --tag=disable-static + +# headers we need but don't want installed +noinst_HEADERS = gstasrplugin.h diff --git a/lucida/speechrecognition/src/gstplugin/src/gstasrplugin.c b/lucida/speechrecognition/src/gstplugin/src/gstasrplugin.c new file mode 100644 index 000000000..94e8a209b --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/src/gstasrplugin.c @@ -0,0 +1,658 @@ +/* + * GStreamer + * Copyright (C) 2005 Thomas Vander Stichele + * Copyright (C) 2005 Ronald S. Bultje + * Copyright (C) 2017 Kamal Galrani <> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Alternatively, the contents of this file may be used under the + * GNU Lesser General Public License Version 2.1 (the "LGPL"), in + * which case the following provisions apply instead of the ones + * mentioned above: + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/** + * SECTION:element-asrplugin + * + * Generic speech to text converter for Lucida AI + * + * + * Example launch line + * |[ + * GST_PLUGIN_PATH=. gst-launch-1.0 --gst-debug="asrplugin:5" \ + * -q filesrc location=sample.wav ! decodebin ! audioconvert ! \ + * audioresample ! asrplugin ! filesink location=output.txt + * ]| + * + */ + +#include +#include +#include "gstasrplugin.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include "a_s_r_thrift_service.h" + +#include + +/* JSON_REAL_PRECISION is a macro from libjansson 2.7. Ubuntu 12.04 only has 2.2.1-1 */ +#ifndef JSON_REAL_PRECISION +#define JSON_REAL_PRECISION(n) (((n) & 0x1F) << 11) +#endif // JSON_REAL_PRECISION + +GST_DEBUG_CATEGORY_STATIC (gst_asrplugin_debug); +#define GST_CAT_DEFAULT gst_asrplugin_debug + +/* Filter signals and args */ +enum +{ + INTERIM_RESULT_SIGNAL, + FINAL_RESULT_SIGNAL, + LAST_SIGNAL +}; + +enum +{ + PROP_0, + PROP_DECODER_EXECUTABLE, + PROP_REQUEST_ID, + PROP_LUCIDA_USER, + PROP_MESSAGE_CONTEXT +}; + +/* + * the capabilities of the inputs and outputs. + * + * describe the real formats here. + */ +static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("audio/x-raw, format = (string) S16LE, channels = (int) 1, rate = (int) 16000") +); + +static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("text/x-raw, format= { utf8 }") +); + +static guint gst_asrplugin_signals[LAST_SIGNAL]; + +#define gst_asrplugin_parent_class parent_class +G_DEFINE_TYPE (Gstasrplugin, gst_asrplugin, GST_TYPE_ELEMENT); + +static void gst_asrplugin_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); + +//static void gst_asrplugin_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); + +static gboolean gst_asrplugin_sink_event (GstPad * pad, GstObject * parent, GstEvent * event); + +static GstFlowReturn gst_asrplugin_sink_chain (GstPad * pad, GstObject * parent, GstBuffer * buf); + +static gboolean gst_asrplugin_sink_query(GstPad *pad, GstObject * parent, GstQuery * query); + +static void gst_asrplugin_finalize (GObject * object); + +static void *gst_asrplugin_read_decoder( void *ptr ); + +/* TODO: Declare other functons too */ + +/* GObject vmethod implementations */ + +/* initialize the asrplugin's class */ +static void gst_asrplugin_class_init (GstasrpluginClass * klass) +{ + GObjectClass *gobject_class; + GstElementClass *gstelement_class; + + gobject_class = (GObjectClass *) klass; + gstelement_class = (GstElementClass *) klass; + + gobject_class->set_property = gst_asrplugin_set_property; + + gobject_class->finalize = gst_asrplugin_finalize; + + g_object_class_install_property (gobject_class, PROP_DECODER_EXECUTABLE, + g_param_spec_string ("decoder_executable", "Decoder Executable", "Path to decoder executable either absolute or relative to speechrecognition/decoder directory.", + "", G_PARAM_WRITABLE)); + + g_object_class_install_property (gobject_class, PROP_REQUEST_ID, + g_param_spec_string ("request_id", "Request ID", "Identifier of the request. Should be unique at least for the clear interval set in speech recognition settings.", + "", G_PARAM_WRITABLE)); + + g_object_class_install_property (gobject_class, PROP_LUCIDA_USER, + g_param_spec_string ("lucida_user", "Lucida user", "User name of the user stored in Lucida database. This is required for some decoders optional for others.", + "", G_PARAM_WRITABLE)); + + g_object_class_install_property (gobject_class, PROP_MESSAGE_CONTEXT, + g_param_spec_string ("message_context", "Message Context", "Context of the message. This typically will contain user name and decoder specific context received during last request.", + "", G_PARAM_WRITABLE)); + + gst_asrplugin_signals[INTERIM_RESULT_SIGNAL] = g_signal_new( + "interim-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST, + G_STRUCT_OFFSET(GstasrpluginClass, interim_result), + NULL, NULL, NULL, G_TYPE_NONE, 1, G_TYPE_STRING); + + gst_asrplugin_signals[FINAL_RESULT_SIGNAL] = g_signal_new( + "final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST, + G_STRUCT_OFFSET(GstasrpluginClass, final_result), + NULL, NULL, NULL, G_TYPE_NONE, 1, G_TYPE_STRING); + + gst_element_class_set_details_simple (gstelement_class, + "asrplugin", + "Speech/Audio", + "Generic speech to text converter for Lucida AI", + "Kamal Galrani <>"); + + gst_element_class_add_pad_template (gstelement_class, gst_static_pad_template_get (&src_factory)); + gst_element_class_add_pad_template (gstelement_class, gst_static_pad_template_get (&sink_factory)); +} + +/* initialize the new element + * instantiate pads and add them to element + * set pad calback functions + * initialize instance structure + */ +static void gst_asrplugin_init (Gstasrplugin * filter) +{ + filter->sinkpad = gst_pad_new_from_static_template (&sink_factory, "sink"); + gst_pad_set_event_function (filter->sinkpad, gst_asrplugin_sink_event); + gst_pad_set_chain_function (filter->sinkpad, gst_asrplugin_sink_chain); + gst_pad_set_query_function (filter->sinkpad, gst_asrplugin_sink_query); + gst_pad_use_fixed_caps (filter->sinkpad); + gst_element_add_pad (GST_ELEMENT (filter), filter->sinkpad); + + filter->srcpad = gst_pad_new_from_static_template (&src_factory, "src"); + gst_pad_use_fixed_caps (filter->srcpad); + gst_element_add_pad (GST_ELEMENT (filter), filter->srcpad); + + filter->error = NULL; + filter->decoder_out = NULL; + filter->segment_length = 0.0; + filter->shutting_down = false; + filter->count = 0; + g_strlcpy(filter->request_id, "00000000-0000-0000-0000-000000000000", 64); + g_strlcpy(filter->decoder_executable, "", 512); +} + +/* handle_error function + * Forwards errors and does the needful + */ +static void GST_ASRPLUGIN_HANDLE_ERROR(Gstasrplugin * filter, int type, const gchar* message, ...) +{ + gchar buffer[1024]; + va_list args; + vsnprintf(buffer, sizeof(buffer), message, args); + va_end(args); + GST_ERROR_OBJECT (filter, "%s", buffer); + GST_ERROR_OBJECT (filter, "%d", type); + + json_t *root = json_object(); + + json_object_set_new( root, "error", json_string (buffer) ); + json_object_set_new( root, "type", json_integer (type)); + + g_signal_emit (filter, gst_asrplugin_signals[FINAL_RESULT_SIGNAL], 0, json_dumps(root, 0)); + json_decref(root); + + gst_pad_push_event(filter->srcpad, gst_event_new_eos()); +} + +/* load_decoder function + * Loadss decoder + */ +static void gst_asrplugin_load_decoder(Gstasrplugin * filter, const gchar* executable) +{ + gchar decoder_command[512]; + FILE *fp; + guint port; + fp = popen("./src/get_free_port 2>/dev/null", "r"); + if (fp == NULL || fscanf(fp, "%u", &port) != 1 ) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, TRY_AGAIN, "I couldn't find a free port to run decoder on!!! Please try again later..."); + return; + } + pclose(fp); + + sprintf (decoder_command, "%s --port %u", executable, port); + + GST_DEBUG_OBJECT (filter, "Loading decoder with command: %s", decoder_command); + filter->decoder_out = popen (decoder_command, "r"); + + if (G_UNLIKELY (filter->decoder_out == NULL)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, FATAL, "Something went wrong while loading decoder!!! Please check the log..."); + return; + } + + sleep(1); + + #if (!GLIB_CHECK_VERSION (2, 36, 0)) + g_type_init (); + #endif + + filter->socket = g_object_new (THRIFT_TYPE_SOCKET, "hostname", "localhost", "port", port, NULL); + filter->transport = g_object_new (THRIFT_TYPE_BUFFERED_TRANSPORT, "transport", filter->socket, NULL); + filter->protocol = g_object_new (THRIFT_TYPE_BINARY_PROTOCOL, "transport", filter->transport, NULL); + thrift_transport_open (filter->transport, &(filter->error)); + + if (filter->error) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, FATAL, "Could not initiate thrift transport to decoder!!! Please check the log..."); + g_clear_error (&(filter->error)); + pclose (filter->decoder_out); + filter->decoder_out = NULL; + return; + } + + filter->client = g_object_new (TYPE_A_S_R_THRIFT_SERVICE_CLIENT, "input_protocol", filter->protocol, "output_protocol", filter->protocol, NULL); + + filter->shutting_down = false; + if (G_UNLIKELY (pthread_create(&filter->tid, NULL, gst_asrplugin_read_decoder, (void *) filter) != 0)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, FATAL, "Something went wrong while trying to start decoder read thread!!! Please check the log..."); + filter->shutting_down = true; + pclose(filter->decoder_out); + filter->decoder_out = NULL; + return; + } + + GST_DEBUG_OBJECT (filter, "Successfully loaded decoder: %s", executable); + g_strlcpy(filter->decoder_executable, executable, 512); +} + +/* unload_decoder function + * Unloads decoder + */ +static void gst_asrplugin_unload_decoder(Gstasrplugin * filter) +{ + filter->shutting_down = true; + + GST_DEBUG_OBJECT (filter, "Stopping decoder if it is running..."); + a_s_r_thrift_service_if_abort(filter->client, &(filter->error)); + + GST_DEBUG_OBJECT (filter, "Closing thrift transports..."); + thrift_transport_close (filter->transport, NULL); + + g_object_unref (filter->client); + g_object_unref (filter->protocol); + g_object_unref (filter->transport); + g_object_unref (filter->socket); + + GST_DEBUG_OBJECT (filter, "Waiting for decoder read thread to join..."); + pthread_join (filter->tid, NULL); + + GST_DEBUG_OBJECT (filter, "Shutting down decoder subprocess..."); + pclose(filter->decoder_out); + filter->decoder_out = NULL; +} + +/* set_property function + * Set properties of plugin. + */ +static void gst_asrplugin_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) +{ + Gstasrplugin *filter = GST_ASRPLUGIN (object); + + switch (prop_id) { + case PROP_DECODER_EXECUTABLE: + if (G_UNLIKELY (filter->decoder_out != NULL)) { + GST_WARNING_OBJECT (filter, "A decoder is already loaded while trying to load decoder!!! I'll try to unload existing decoder..."); + gst_asrplugin_unload_decoder(filter); + } + gst_asrplugin_load_decoder(filter, g_value_get_string (value)); + break; + case PROP_REQUEST_ID: + if (G_UNLIKELY (filter->decoder_out == NULL)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, NOT_IN_ORDER, "Setting request identifier before loading decoder makes no sense!!! Trying to restart worker..."); + break; + } + a_s_r_thrift_service_if_request_id(filter->client, g_value_get_string (value), &(filter->error)); + g_strlcpy(filter->request_id, g_value_get_string (value), 64); + break; + case PROP_LUCIDA_USER: + if (G_UNLIKELY (filter->decoder_out == NULL)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, NOT_IN_ORDER, "Setting user details before loading decoder makes no sense!!! Trying to restart worker..."); + break; + } + a_s_r_thrift_service_if_user(filter->client, g_value_get_string (value), &(filter->error)); + break; + case PROP_MESSAGE_CONTEXT: + if (G_UNLIKELY (filter->decoder_out == NULL)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, NOT_IN_ORDER, "Setting message context before loading decoder makes no sense!!! Trying to restart worker..."); + break; + } + a_s_r_thrift_service_if_context(filter->client, g_value_get_string (value), &(filter->error)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +/* stop_decoder function + * Stop decoder by closing data FIFO + */ +static gboolean gst_asrplugin_stop_decoder (Gstasrplugin * filter) +{ + if (G_UNLIKELY (filter->decoder_out == NULL)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, NOT_IN_ORDER, "Stopping decoder before loading it makes no sense!!! Trying to restart worker..."); + return false; + } + GST_DEBUG_OBJECT (filter, "%s: Sending stop command to decoder...", filter->request_id); + a_s_r_thrift_service_if_stop(filter->client, &(filter->error)); + filter->segment_length = 0; + return true; +} + +/* start_decoder function + * Send start command to decoder control channel + */ +static gboolean gst_asrplugin_start_decoder (Gstasrplugin * filter) +{ + if (G_UNLIKELY (filter->decoder_out == NULL)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, NOT_IN_ORDER, "Starting decoder before loading it makes no sense!!! Trying to restart worker..."); + return false; + } + GST_DEBUG_OBJECT (filter, "%s: Sending start command to decoder...", filter->request_id); + a_s_r_thrift_service_if_start(filter->client, &(filter->error)); + filter->segment_length = 0; + return true; +} + +static void *gst_asrplugin_read_decoder( void *ptr ) +{ + Gstasrplugin *filter = (Gstasrplugin *) ptr; + GST_DEBUG_OBJECT (filter, "%s: Starting decoder listener thread...", filter->request_id); + gchar * line = NULL; + size_t dummy = 0; + ssize_t bytes; + + while ( ! filter->shutting_down ) { + bytes = getline(&line, &dummy, filter->decoder_out); + if (G_UNLIKELY (bytes == -1)) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, FATAL, "Decoder crashed while processing data!!! Trying to restart worker..."); + break; + } + + json_t *root; + json_t *event; + json_t *status; + json_t *data; + json_error_t error; + root = json_loads (line, 0, &error); + if ( !root ) { + GST_ELEMENT_WARNING (filter, RESOURCE, READ, (NULL), ("%s: Received invalid JSON message from decoder!!! Ignoring message...", filter->request_id)); + continue; + } + event = json_object_get (root, "event"); + status = json_object_get (root, "status"); + data = json_object_get (root, "data"); + if ( !json_is_string (event) ) { + GST_ELEMENT_WARNING (filter, RESOURCE, READ, (NULL), ("%s: Received invalid JSON message from decoder: No field 'event' found. Ignoring message...", filter->request_id)); + json_decref(root); + continue; + } + if ( !json_is_integer (status) ) { + GST_ELEMENT_WARNING (filter, RESOURCE, READ, (NULL), ("%s: Received invalid JSON message from decoder: No field 'status' found. Ignoring message...", filter->request_id)); + json_decref(root); + continue; + } + if ( !json_is_string (data) ) { + GST_ELEMENT_WARNING (filter, RESOURCE, READ, (NULL), ("%s: Received invalid JSON message from decoder: No field 'data' found. Ignoring message...", filter->request_id)); + json_decref(root); + continue; + } + + if (strcmp(json_string_value (event), "interim_result") == 0) { + GST_DEBUG_OBJECT (filter, "%s: Interim results received from decoder", filter->request_id); + g_signal_emit (filter, gst_asrplugin_signals[INTERIM_RESULT_SIGNAL], 0, json_string_value (data)); + } else if (strcmp(json_string_value (event), "final_result") == 0) { + GST_DEBUG_OBJECT (filter, "%s: Final results received from decoder", filter->request_id); + + GstBuffer *buffer = gst_buffer_new_and_alloc (strlen (json_string_value (data)) + 1); + gst_buffer_fill (buffer, 0, json_string_value (data), strlen (json_string_value (data))); + gst_buffer_memset (buffer, strlen (json_string_value (data)) , '\n', 1); + gst_pad_push(filter->srcpad, buffer); + + g_signal_emit (filter, gst_asrplugin_signals[FINAL_RESULT_SIGNAL], 0, json_string_value (data)); + gst_pad_push_event(filter->srcpad, gst_event_new_eos()); + } else if (strcmp(json_string_value (event), "eos") == 0) { + GST_DEBUG_OBJECT (filter, "%s: End of stream received from decoder", filter->request_id); + g_strlcpy(filter->request_id, "00000000-0000-0000-0000-000000000000", 64); + gst_pad_push_event(filter->srcpad, gst_event_new_eos()); + } else if (strcmp(json_string_value (event), "error") == 0) { + GST_ASRPLUGIN_HANDLE_ERROR(filter, json_integer_value (status), "%s: %s", filter->request_id, json_string_value (data)); + } else if (strcmp(json_string_value (event), "warn") == 0) { + GST_WARNING_OBJECT (filter, "%s: %s", filter->request_id, json_string_value (data)); + } else if (strcmp(json_string_value (event), "info") == 0) { + GST_INFO_OBJECT (filter, "%s: %s", filter->request_id, json_string_value (data)); + } else if (strcmp(json_string_value (event), "debug") == 0) { + GST_DEBUG_OBJECT (filter, "%s: %s", filter->request_id, json_string_value (data)); + } + json_decref(root); + } + + GST_DEBUG_OBJECT (filter, "Terminating decoder listener thread..."); + return NULL; +} + + +/* GstElement vmethod implementations */ + +/* sink_query function + * this function handles sink queries + */ +static gboolean gst_asrplugin_sink_query (GstPad *pad, GstObject * parent, GstQuery * query) +{ + gboolean ret; + + switch (GST_QUERY_TYPE (query)) { + case GST_QUERY_CAPS: + ret = TRUE; + GstCaps *new_caps = gst_caps_new_simple ("audio/x-raw", + "format", G_TYPE_STRING, "S16LE", + "rate", G_TYPE_INT, 16000, + "channels", G_TYPE_INT, 1, NULL); + + gst_query_set_caps_result (query, new_caps); + gst_caps_unref (new_caps); + break; + default: + ret = gst_pad_query_default (pad, parent, query); + break; + } + return ret; +} + +/* sink_event function + * this function handles sink events + */ +static gboolean gst_asrplugin_sink_event (GstPad * pad, GstObject * parent, GstEvent * event) +{ + Gstasrplugin *filter; + filter = GST_ASRPLUGIN (parent); + + GST_DEBUG_OBJECT(filter, "%s: Handling %s event", filter->request_id, GST_EVENT_TYPE_NAME(event)); + + switch (GST_EVENT_TYPE (event)) { + case GST_EVENT_SEGMENT: + return gst_asrplugin_start_decoder (filter); + case GST_EVENT_EOS: + return gst_asrplugin_stop_decoder (filter); + default: + return gst_pad_event_default (pad, parent, event); + } +} + +/* sink_chain function + * this function does the actual processing + */ +static GstFlowReturn gst_asrplugin_sink_chain (GstPad * pad, GstObject * parent, GstBuffer * buf) +{ + Gstasrplugin *filter; + filter = GST_ASRPLUGIN (parent); + + GstMapInfo map; + if (G_UNLIKELY (gst_buffer_map (buf, &map, GST_MAP_READ) == false)) { + goto memory_map_issue; + } + if (G_UNLIKELY (filter->decoder_out == NULL)) { + goto decoder_not_loaded; + } + GST_DEBUG_OBJECT (filter, "%s: Pushing chunk to decoder... %ld", filter->request_id, map.size); + + GByteArray* chunk = g_byte_array_new_take (map.data, map.size); + + a_s_r_thrift_service_if_push(filter->client, chunk, &(filter->error)); + filter->segment_length = filter->segment_length + (float) map.size / 32000.0; + + gst_buffer_unmap (buf, &map); + gst_buffer_unref (buf); + + GST_DEBUG_OBJECT (filter, "%s: Pushed chunk to decoder... %ld", filter->request_id, map.size); + return GST_FLOW_OK; + + /* special cases */ + memory_map_issue: { + GST_ELEMENT_WARNING (filter, RESOURCE, READ, (NULL), ("%s: Error while reading data from buffer!!! Ignoring chunk...", filter->request_id)); + + gst_buffer_unmap (buf, &map); + gst_buffer_unref (buf); + return GST_FLOW_OK; + } + decoder_not_loaded: { + GST_ASRPLUGIN_HANDLE_ERROR(filter, FATAL, "Data recieved before decoder was loaded!!! Trying to restart worker..."); + + gst_buffer_unmap (buf, &map); + gst_buffer_unref (buf); + return GST_FLOW_ERROR; + } +} + +static void gst_asrplugin_finalize (GObject * object) +{ + Gstasrplugin *filter; + filter = GST_ASRPLUGIN (object); + + GST_WARNING_OBJECT (filter, "Shutting down ASR Plugin..."); + gst_asrplugin_unload_decoder(filter); + + G_OBJECT_CLASS(parent_class)->finalize(object); +} + +void G_GNUC_NO_INSTRUMENT gst_lucida_logger (GstDebugCategory *category, GstDebugLevel level, const gchar *file, const gchar *function, gint line, GObject *object, GstDebugMessage *message, gpointer user_data) { + time_t t = time(NULL); + struct tm tm = * localtime (&t); + if (level <= gst_debug_category_get_threshold (category)) { + gchar* level_str; + if (level == GST_LEVEL_ERROR) { + level_str = "\033[31m ERROR"; + } else if (level == GST_LEVEL_WARNING) { + level_str = "\033[33m WARN"; + } else if (level == GST_LEVEL_FIXME) { + level_str = "\033[43m FIXME"; + } else if (level == GST_LEVEL_INFO) { + level_str = "\033[36m INFO"; + } else if (level == GST_LEVEL_DEBUG) { + level_str = "\033[37m DEBUG"; + } else if (level == GST_LEVEL_LOG) { + level_str = " LOG"; + } else if (level == GST_LEVEL_TRACE) { + level_str = " TRACE"; + } else if (level == GST_LEVEL_MEMDUMP) { + level_str = "MEMDUMP"; + } + gchar category_str[32]; + gchar* category_ptr; + g_strlcpy(category_str, category->name, 32); + category_ptr = &category_str[0]; + while (*category_ptr) { + *category_ptr = toupper((unsigned char) *category_ptr); + category_ptr++; + } + printf ("%04d-%02d-%02d %02d:%02d:%02d - \033[1m%7s\033[0m: \033[1m%10s\033[0m: %s\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, level_str, category_str, gst_debug_message_get(message)); + } +} + +/* entry point to initialize the plug-in + * initialize the plug-in itself + * register the element factories and other features + */ +static gboolean asrplugin_init (GstPlugin * asrplugin) +{ + GST_DEBUG_CATEGORY_INIT (gst_asrplugin_debug, "asrplugin", 0, DESCRIPTION); + gst_debug_remove_log_function (gst_debug_log_default); + gst_debug_add_log_function(&gst_lucida_logger, NULL, NULL); + return gst_element_register (asrplugin, "asrplugin", GST_RANK_NONE, GST_TYPE_ASRPLUGIN); +} + +/* PACKAGE: this is usually set by autotools depending on some _INIT macro + * in configure.ac and then written into and defined in config.h, but we can + * just set it ourselves here in case someone doesn't use autotools to + * compile this code. GST_PLUGIN_DEFINE needs PACKAGE to be defined. + */ +#ifndef PACKAGE +#define PACKAGE "asrplugin" +#endif + +/* gstreamer looks for this structure to register asrplugins + */ +GST_PLUGIN_DEFINE ( + GST_VERSION_MAJOR, + GST_VERSION_MINOR, + asrplugin, + "Generic GST plugin for speech recognition in Lucida AI", + asrplugin_init, + VERSION, + "LGPL", + "GStreamer", + "http://gstreamer.net/" +) diff --git a/lucida/speechrecognition/src/gstplugin/src/gstasrplugin.h b/lucida/speechrecognition/src/gstplugin/src/gstasrplugin.h new file mode 100644 index 000000000..387125374 --- /dev/null +++ b/lucida/speechrecognition/src/gstplugin/src/gstasrplugin.h @@ -0,0 +1,107 @@ +/* + * GStreamer + * Copyright (C) 2005 Thomas Vander Stichele + * Copyright (C) 2005 Ronald S. Bultje + * Copyright (C) 2017 Kamal Galrani <> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Alternatively, the contents of this file may be used under the + * GNU Lesser General Public License Version 2.1 (the "LGPL"), in + * which case the following provisions apply instead of the ones + * mentioned above: + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef __GST_ASRPLUGIN_H__ +#define __GST_ASRPLUGIN_H__ + +#include +#include +#include + +#include +#include +#include +#include "a_s_r_thrift_service.h" + +G_BEGIN_DECLS + +/* #defines don't like whitespacey bits */ + +#define GST_TYPE_ASRPLUGIN \ + (gst_asrplugin_get_type()) +#define GST_ASRPLUGIN(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_ASRPLUGIN,Gstasrplugin)) +#define GST_ASRPLUGIN_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_ASRPLUGIN,GstasrpluginClass)) +#define GST_IS_ASRPLUGIN(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_ASRPLUGIN)) +#define GST_IS_ASRPLUGIN_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_ASRPLUGIN)) + +typedef struct _Gstasrplugin Gstasrplugin; +typedef struct _GstasrpluginClass GstasrpluginClass; + +struct _Gstasrplugin +{ + GstElement element; + + GstPad *sinkpad, *srcpad; + + gint count; + gboolean shutting_down; + ThriftSocket *socket; + ThriftTransport *transport; + ThriftProtocol *protocol; + ASRThriftServiceIf *client; + GError *error; + FILE* decoder_out; + gfloat segment_length; + pthread_t tid; + gchar request_id[64]; + gchar decoder_executable[512]; +}; + +struct _GstasrpluginClass +{ + GstElementClass parent_class; + void (*interim_result)(GstElement *element, const gchar *result_str); + void (*final_result)(GstElement *element, const gchar *result_str); +}; + +GType gst_asrplugin_get_type (void); + +G_END_DECLS + +#endif /* __GST_ASRPLUGIN_H__ */ diff --git a/lucida/speechrecognition/src/logger.py b/lucida/speechrecognition/src/logger.py new file mode 100644 index 000000000..c7b0cda12 --- /dev/null +++ b/lucida/speechrecognition/src/logger.py @@ -0,0 +1,47 @@ +import logging +from copy import copy + +#These are the sequences need to get colored ouput +RESET = "\033[0m" +BOLD = "\033[1m" + +COLORS = { + 'CRITICAL': '\033[41m', + 'ERROR': '\033[31m', + 'WARNING': '\033[33m', + 'INFO': '\033[36m', + 'DEBUG': '\033[37m' +} + +class ColoredFormatter(logging.Formatter): + def __init__(self, fmt, datefmt): + logging.Formatter.__init__(self, fmt=fmt, datefmt=datefmt) + + def format(self, record): + record_cpy = copy(record) + + if record_cpy.levelname in COLORS: + record_cpy.levelname = COLORS[record_cpy.levelname] + record_cpy.levelname.rjust(7) + record_cpy.levelname = BOLD + record_cpy.levelname + RESET + + record_cpy.name = BOLD + record_cpy.name.rjust(10) + RESET + + return logging.Formatter.format(self, record_cpy) + +class ColoredLogger(logging.Logger): + fmt = '%(asctime)s - %(levelname)7s: %(name)10s: %(message)s' + datefmt = '%Y-%m-%d %H:%M:%S' + + def __init__(self, name): + logging.Logger.__init__(self, name, logging.DEBUG) + + color_formatter = ColoredFormatter(fmt=self.fmt, datefmt=self.datefmt) + + console = logging.StreamHandler() + console.setFormatter(color_formatter) + + self.addHandler(console) + return + + +logging.setLoggerClass(ColoredLogger) diff --git a/lucida/speechrecognition/src/pipeline.py b/lucida/speechrecognition/src/pipeline.py new file mode 100644 index 000000000..14ad12ffd --- /dev/null +++ b/lucida/speechrecognition/src/pipeline.py @@ -0,0 +1,196 @@ +import os, sys +import gi +gi.require_version('Gst', '1.0') +from gi.repository import GObject, Gst +GObject.threads_init() +Gst.init(None) + +import logging +import thread +import subprocess + +logger = logging.getLogger("PIPELINE") + +class DecoderPipeline(object): + def __init__(self, conf): + logger.info("Initialising %s speech to text decoder" % (conf['decoder'])) + + self.interim_result_handler = None + self.final_result_handler = None + self.eos_handler = None + self.error_handler = None + + self.silence_threshold = conf['silence_threshold'] + self.silence_timeout = conf['silence_timeout'] + self.silence_timeout_diff = conf['silence_timeout'] - conf['initial_silence_timeout'] + self.silent_for = self.silence_timeout_diff + self.data_directory = "/tmp/lucida/speech" + + self.request_id = "00000000-0000-0000-0000-000000000000" + + self._create_pipeline(conf) + + def _create_pipeline(self, conf): + Gst.Registry.get().scan_path(os.getcwd() + "/src/gstplugin/src") + Gst.debug_set_threshold_from_string(conf['gstreamer_verbosity'], True) + + self.asr = Gst.ElementFactory.make("asrplugin", "asr") + self.appsrc = Gst.ElementFactory.make("appsrc", "appsrc") + self.decodebin = Gst.ElementFactory.make("decodebin", "decodebin") + self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert") + self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample") + self.tee = Gst.ElementFactory.make("tee", "tee") + self.queue1 = Gst.ElementFactory.make("queue", "queue1") + self.level = Gst.ElementFactory.make("level", "level") + self.audiosink = Gst.ElementFactory.make("filesink", "audiosink") + self.queue2 = Gst.ElementFactory.make("queue", "queue2") + self.datasink = Gst.ElementFactory.make("fakesink", "datasink") + + self.asr.set_property("decoder_executable", os.getcwd() + "/decoders/" + conf['decoder'] + "/decoder") + + self.level.set_property("post-messages", True) + self.appsrc.set_property("is-live", True) + self.audiosink.set_property("location", "/dev/null") + + logger.info("Created GStreamer elements") + + self.pipeline = Gst.Pipeline() + for element in [self.appsrc, self.decodebin, self.audioconvert, self.audioresample, self.tee, self.queue1, self.level, self.audiosink, self.queue2, self.asr, self.datasink]: + logger.debug("Adding element %s to the pipeline" % element.__class__.__name__) + self.pipeline.add(element) + + logger.info("Linking GStreamer elements") + + self.appsrc.link(self.decodebin) + self.decodebin.connect('pad-added', self._connect_audio_converter) + self.audioconvert.link(self.audioresample) + self.audioresample.link(self.tee) + + self.tee.link(self.queue1) + self.queue1.link(self.level) + self.level.link(self.audiosink) + + self.tee.link(self.queue2) + self.queue2.link(self.asr) + self.asr.link(self.datasink) + + # Create bus and connect several handlers + self.bus = self.pipeline.get_bus() + self.bus.add_signal_watch() + self.bus.enable_sync_message_emission() + self.bus.connect('message::eos', self._on_eos) + self.bus.connect('message::error', self._on_error) + self.bus.connect('sync-message::element', self._on_sync_message) + + self.asr.connect('interim-result', self._on_interim_result) + self.asr.connect('final-result', self._on_final_result) + + logger.info("Setting pipeline to READY") + self.pipeline.set_state(Gst.State.READY) + logger.info("Set pipeline to READY") + + def _connect_audio_converter(self, element, pad): + logger.info("%s: Connecting audio converter" % self.request_id) + pad.link(self.audioconvert.get_static_pad("sink")) + logger.info("%s: Connected audio converter" % self.request_id) + + def _on_eos(self, bus, msg): + self.silent_for = 0.0 + logger.info("%s: Received EOS signal from decoder" % self.request_id) + self.finish_request() + if self.eos_handler: + self.eos_handler() + + def _on_error(self, bus, msg): + error = msg.parse_error() + logger.error(error) + self.cancel() + if self.error_handler: + self.error_handler(error[0].message) + + def _on_sync_message(self, bus, msg): + if msg.get_structure() is None: + return + if msg.get_structure().get_name() == 'level': + if msg.get_structure().get_value("rms")[0] < self.silence_threshold: + self.silent_for = self.silent_for + 0.1 + elif self.silent_for != self.silence_timeout_diff: + self.silent_for = 0.0 + if self.silent_for > self.silence_timeout: + logger.info("%s: Silent for more than %s seconds!!! Pushing EOS to pipeline..." % (self.request_id, str(self.silence_timeout))) + self.appsrc.emit("end-of-stream") + logger.info("%s: Silent for %s seconds...." % (self.request_id, str(self.silent_for))) + + def _on_interim_result(self, asr, hyp): + logger.debug("%s: Received interim result from decoder" % (self.request_id)) + if self.interim_result_handler: + self.interim_result_handler(hyp) + + def _on_final_result(self, asr, hyp): + logger.debug("%s: Received final result from decoder %s" % (self.request_id, hyp)) + if self.final_result_handler: + self.final_result_handler(hyp) + + def finish_request(self): + logger.info("%s: Resetting decoder state" % self.request_id) + self.audiosink.set_state(Gst.State.NULL) + self.audiosink.set_property("location", "/dev/null") + self.audiosink.set_state(Gst.State.PLAYING) + self.pipeline.set_state(Gst.State.NULL) + self.request_id = "00000000-0000-0000-0000-000000000000" + self.silent_for = 0.0 + + def init_request(self, id, caps_str, user, context=""): + self.pipeline.set_state(Gst.State.PAUSED) + self.request_id = id + self.silent_for = self.silence_timeout_diff + logger.info("%s: Initializing request" % (self.request_id)) + if caps_str and len(caps_str) > 0: + logger.info("%s: Setting caps to %s" % (self.request_id, caps_str)) + caps = Gst.caps_from_string(caps_str) + self.appsrc.set_property("caps", caps) + else: + self.appsrc.set_property("caps", None) + + self.asr.set_property("request_id", self.request_id) + self.asr.set_property("lucida_user", user) + self.asr.set_property("message_context", context) + + self.audiosink.set_state(Gst.State.NULL) + self.audiosink.set_property('location', "%s/%s.raw" % (self.data_directory, id)) + self.audiosink.set_state(Gst.State.PLAYING) + + self.pipeline.set_state(Gst.State.PLAYING) + self.audiosink.set_state(Gst.State.PLAYING) + + def process_data(self, data): + logger.debug('%s: Pushing buffer of size %d to decoder' % (self.request_id, len(data))) + buf = Gst.Buffer.new_allocate(None, len(data), None) + buf.fill(0, data) + self.appsrc.emit("push-buffer", buf) + + def end_request(self): + logger.info("%s: Pushing EOS to pipeline" % self.request_id) + self.appsrc.emit("end-of-stream") + self.silent_for = 0.0 + + def set_interim_result_handler(self, handler): + self.interim_result_handler = handler + + def set_final_result_handler(self, handler): + self.final_result_handler = handler + + def set_eos_handler(self, handler): + self.eos_handler = handler + + def set_error_handler(self, handler): + self.error_handler = handler + + def cancel(self): + self.end_request() + id = self.request_id + self.finish_request() + try: + os.remove("%s/%s.raw" % (self.data_directory, id)) + except: + pass diff --git a/lucida/speechrecognition/src/server.py b/lucida/speechrecognition/src/server.py new file mode 100644 index 000000000..7599d7482 --- /dev/null +++ b/lucida/speechrecognition/src/server.py @@ -0,0 +1,83 @@ +import os, sys +if os.path.basename(os.getcwd()) != "speechrecognition": + print "This script should be run using `make start_server` command in speechrecognition directory" + sys.exit(1) + +import logging +import logging.handlers +import os, sys +import click +from gi.repository import GObject +import threading +import time + +import configuration +from pipeline import DecoderPipeline +from worker import SocketHandler +logger = logging.getLogger("SERVER") + +SILENCE_TIMEOUT = 2 +INITIAL_SILENCE_TIMEOUT = 5 +RESPONSE_TIMEOUT = 30 +CONNECT_TIMEOUT = 5 +MAX_CALL_DURATION = 3600 +MAX_SEGMENT_DURATION = 600 + +try: + os.makedirs("/tmp/lucida/speech") +except: + pass + +DECODERS = [] +for decoder in next(os.walk("decoders/"))[1]: + if os.path.isfile("decoders/" + decoder + "/decoder") and os.access("decoders/" + decoder + "/decoder", os.X_OK): + DECODERS.append(decoder) + +@click.command() +@click.option("--decoder", prompt="Select speech to text decoder " + str(DECODERS), type=click.Choice(DECODERS), required=True, help="Speech to text decoder") +@click.option("--threads", prompt="Enter number of decoders to run in parallel [1-500]", default=1, type=click.IntRange(min=1, max=500), required=True, show_default=True, help="Number of decoders to run in parallel") +def main(decoder, threads): + conf = configuration.load() + + global SILENCE_TIMEOUT + SILENCE_TIMEOUT = conf['silence_timeout'] + global INITIAL_SILENCE_TIMEOUT + INITIAL_SILENCE_TIMEOUT = conf['initial_silence_timeout'] + global RESPONSE_TIMEOUT + RESPONSE_TIMEOUT = conf['response_timeout'] + global CONNECT_TIMEOUT + CONNECT_TIMEOUT = conf['retry_after'] + global MAX_CALL_DURATION + MAX_CALL_DURATION = conf['max_call_duration'] + global MAX_SEGMENT_DURATION + MAX_SEGMENT_DURATION = conf['max_segment_duration'] + + logger.setLevel(conf['worker_verbosity']) + + conf['decoder'] = decoder + + if threads > 1: + import tornado.process + logging.info("Forking into %d processes" % threads) + tornado.process.fork_processes(threads) + + pipeline = DecoderPipeline(conf) + + GObjectLoop = GObject.MainLoop() + GObjectThread = threading.Thread(target=GObjectLoop.run) + GObjectThread.daemon = True + GObjectThread.start() + + while True: + ws = SocketHandler(conf['master'], pipeline, conf) + try: + logger.info("Opening websocket connection to master server") + ws.connect() + ws.run_forever() + except Exception: + logger.error("Couldn't connect to server, waiting for %d seconds", CONNECT_TIMEOUT) + time.sleep(CONNECT_TIMEOUT - 1) + time.sleep(1) + +if __name__ == "__main__": + main() diff --git a/lucida/speechrecognition/src/worker.py b/lucida/speechrecognition/src/worker.py new file mode 100644 index 000000000..b3b29ccfc --- /dev/null +++ b/lucida/speechrecognition/src/worker.py @@ -0,0 +1,197 @@ +import os, sys, time + +import logging +import logging.config +import logging.handlers +import thread +import json + +from ws4py.client.threadedclient import WebSocketClient +import ws4py.messaging + +# TODO: Replace this with defs +import common + +sys.path.insert(0, os.getcwd() + "/include") +import defs + +logger = logging.getLogger("WORKER") + +class SocketHandler(WebSocketClient): + STATE_CREATED = 0 + STATE_CONNECTED = 1 + STATE_INITIALIZED = 2 + STATE_PROCESSING = 3 + STATE_EOP_RECEIVED = 6 + STATE_EOS_RECEIVED = 7 + STATE_CANCELLING = 8 + STATE_FINISHED = 100 + + def __init__(self, master, pipeline, conf): + WebSocketClient.__init__(self, url=master, heartbeat_freq=10) + + self.pipeline = pipeline + + self.pipeline.set_interim_result_handler(self._on_interim_result) + self.pipeline.set_final_result_handler(self._on_final_result) + self.pipeline.set_error_handler(self._on_error) + self.pipeline.set_eos_handler(self._on_eos) + + self.request_id = "00000000-0000-0000-0000-000000000000" + self.silence_timeout = conf['silence_timeout'] + self.initial_silence_timeout = conf['initial_silence_timeout'] + + self.call_timeout = conf['max_segment_duration'] + + self.state = self.STATE_CREATED + + def opened(self): + logger.info("Opened websocket connection to server") + self.state = self.STATE_CONNECTED + + def master_timeout(self): + while self.state in [self.STATE_INITIALIZED, self.STATE_PROCESSING]: + if time.time() - self.last_master_message > self.silence_timeout: + logger.warning("%s: More than %d seconds since last message from master, pushing EOS to pipeline" % (self.request_id, self.silence_timeout)) + self.pipeline.end_request() + self.state = self.STATE_EOS_RECEIVED + event = dict(status=common.WARN_TIMEOUT) + try: + self.send(json.dumps(event)) + except: + logger.warning("%s: Failed to send error event to master" % (self.request_id)) + return + logger.debug("%s: Checking that master hasn't been silent for more than %d seconds" % (self.request_id, self.silence_timeout)) + time.sleep(1) + + def call_timeout(self): + id = self.request_id + time.sleep(self.call_timeout) + if id == self.request_id: + logger.warning("%s: More than %d seconds since call on hold, closing connection" % (self.request_id, self.call_timeout)) + in_call = False + self._on_eos() + + def received_message(self, message): + logger.debug("%s: Got message from server of type %s" % (self.request_id, str(type(message)))) + self.last_master_message = time.time() + + if self.state == self.STATE_CONNECTED: + if isinstance(message, ws4py.messaging.TextMessage): + props = json.loads(str(message)) + self.context = props['context'] + self.caps_str = props['caps'] + self.request_id = props['id'] + self.user = props['user'] + self.in_call = props['isCall'] + self.pipeline.init_request(self.request_id, self.caps_str, self.user, self.context) + self.state = self.STATE_INITIALIZED + self.last_master_message = time.time() + self.initial_silence_timeout - self.silence_timeout + thread.start_new_thread(self.master_timeout, ()) + logger.info("%s: Started master timeout thread" % self.request_id) + else: + logger.info("%s: Non-text message received while waiting for initialisation!!! Resetting..." % self.request_id) + self.finish_request() + elif message.data == "EOS": + if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: + self.pipeline.end_request() + self.state = self.STATE_EOS_RECEIVED + else: + logger.info("%s: Ignoring EOS, worker already in state %d" % (self.request_id, self.state)) + else: + if self.state != self.STATE_CANCELLING and self.state != self.STATE_EOS_RECEIVED and self.state != self.STATE_FINISHED: + if isinstance(message, ws4py.messaging.BinaryMessage): + self.pipeline.process_data(message.data) + self.state = self.STATE_PROCESSING + else: + logger.info("%s: Non-binary message received while waiting for audio!!! Ignoring message..." % (self.request_id)) + else: + logger.info("%s: Ignoring data, worker already in state %d" % (self.request_id, self.state)) + + + def finish_request(self): + if self.state == self.STATE_CONNECTED: + self.pipeline.finish_request() + self.state = self.STATE_FINISHED + return + if self.state == self.STATE_INITIALIZED: + self.pipeline.finish_request() + self.state = self.STATE_FINISHED + return + if self.state != self.STATE_FINISHED: + logger.info("%s: Master disconnected before decoder reached EOS?" % self.request_id) + self.state = self.STATE_CANCELLING + self.pipeline.cancel() + counter = 0 + while self.state == self.STATE_CANCELLING: + counter += 1 + if counter > 30: + logger.info("%s: Giving up waiting after %d tries" % (self.request_id, counter)) + self.state = self.STATE_FINISHED + else: + logger.info("%s: Waiting for EOS from decoder" % self.request_id) + time.sleep(1) + logger.info("%s: Finished waiting for EOS" % self.request_id) + + + def closed(self, code, reason=None): + logger.debug("%s: Closed websocket connection to server. Cleaning up..." % self.request_id) + self.finish_request() + logger.debug("%s: Done cleaning up after websocket connection closed." % self.request_id) + + def _on_interim_result(self, result): + logger.debug("%s: Received interim result." % (self.request_id)) + data = json.loads(result) + event = dict(id=self.request_id, status=common.SUCCESS_OK, result=data['result']) + try: + self.send(json.dumps(event)) + except: + e = sys.exc_info()[1] + logger.warning("%s: Failed to send event to master: %s" % (self.request_id, e)) + + def _on_final_result(self, result): + restart = False + logger.debug("%s: Received final result" % (self.request_id)) + data = json.loads(result) + if 'result' in data: + data['id'] = self.request_id + data['status'] = common.SUCCESS_OK + event = data + elif 'error' in data: + logger.error(data['error']) + data['error'] = "I could not understand you! Please try again..." + if data['type'] == defs.NOT_AUTHORISED: + data['error'] = "You are not authorised to use this service!!! Please visit the web interface to authorise speeech recognition" + event = dict(status=common.ERROR_GENERIC, message=data['error']) + if data['type'] == defs.FATAL or data['type'] == defs.FATAL: + restart = True + pass + try: + self.send(json.dumps(event)) + except: + e = sys.exc_info()[1] + logger.warning("%s: Failed to send event to master: %s" % (self.request_id, e)) + + if restart: + # TODO: Restart Worker + pass + + def _on_eos(self, data=None): + if self.in_call: + self.state = self.STATE_CONNECTED + thread.start_new_thread(self.call_timeout, ()) + logger.info("%s: Started call timeout thread" % self.request_id) + else: + self.state = self.STATE_FINISHED + self.close() + + def _on_error(self, error): + self.state = self.STATE_FINISHED + event = dict(status=common.ERROR_GENERIC, message=error) + logger.warning("ERROR: %s" % (error)) + try: + self.send(json.dumps(event)) + except: + e = sys.exc_info()[1] + logger.warning("%s: Failed to send event to master: %s" % (self.request_id, e)) + self.close()
- -
+
+ +
+
+
+
+
-

- +

+
+ +
-
- -
- - - -
- -
- -
- Powered by Lucida -
+
+ Powered by Lucida +