zaicli

#!/usr/bin/env python
# -*- coding: utf-8 -*- 

#
# Copyright 2016, 2017, 2018 Guenter Bartsch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# consolidated shell for Zamia AI
#

import os
import sys
import traceback
import codecs
import logging
import cmdln
import time
import readline
import atexit

from six.moves            import input

from zamiaai.ai_kernal    import AIKernal, AIContext, USER_PREFIX, LANGUAGES, DEFAULT_DB_URL, DEFAULT_XSB_ARCH_DIR, \
                                 DEFAULT_TOPLEVEL, DEFAULT_SKILL_PATHS, DEFAULT_NUM_EPOCHS, DEFAULT_LANG, \
                                 DEFAULT_NUM_EPOCHS_UTTCLASS

from zamiaai.ai_dbg       import AIDbg

from nltools              import misc
from pyxsb                import pyxsb_query

DEFAULT_LOGLEVEL   = logging.INFO
CLI_REALM          = '__cli__'

class AICli(cmdln.Cmdln):

    name = "zaicli"

    def __init__(self):
       
        cmdln.Cmdln.__init__(self)

        self.kernal = AIKernal.from_ini_file()

    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_clean(self, subcmd, opts, *skills):
        """${cmd_name}: clean skill related data

        ${cmd_usage}
        ${cmd_option_list}
        """

        if len(skills)==0:
            logging.error ('specify at least one skill or "all" to clean all skills')
            return

        if len(skills)==1 and skills[0] == 'all':
            skills = self.kernal.all_skills

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
        else:
            logging.getLogger().setLevel(logging.INFO)

        self.kernal.clean(skills)

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

    @cmdln.option("-g", "--trace", dest="run_trace", action="store_true",
           help="enable tracing when running tests")
    @cmdln.option("-t", "--test", dest="run_tests", action="store_true",
           help="run tests")
    @cmdln.option("-N", "--test-name", dest="test_name", type="str",
           help="run specific test only, default: all tests are run")
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="enable verbose logging")
    def do_compile(self, subcmd, opts, *skills):
        """${cmd_name}: compile skill(s)

        ${cmd_usage}
        ${cmd_option_list}
        """

        if len(skills)==0:
            logging.error ('specify at least one skill')
            return

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

        try:
            self.kernal.compile_skill_multi (skills)

            if opts.run_tests:
                num_tests, num_fails = self.kernal.run_tests_multi (skills, run_trace=opts.run_trace, test_name=opts.test_name)

                if num_fails:
                    logging.error('%d test(s) failed out of %d test(s) run.' % (num_fails, num_tests))
                else:
                    logging.info('all %d test(s) worked!' % num_tests)

        except:
            logging.error(traceback.format_exc())

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

    @cmdln.option("-g", "--trace", dest="run_trace", action="store_true",
           help="enable tracing")
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    @cmdln.option("-N", "--test-name", dest="test_name", type="str",
           help="run specific test only, default: all tests are run")
    def do_test(self, subcmd, opts, *skills):
        """${cmd_name}: run tests from skill(s)

        ${cmd_usage}
        ${cmd_option_list}
        """

        if len(skills)==0:
            logging.error ('specify at least one skill (or all to run tests from all skills)')
            return

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            logging.debug('verbose logging enabled.')
        else:
            logging.getLogger().setLevel(logging.INFO)

        try:
            num_tests, num_fails = self.kernal.run_tests_multi (skills, run_trace=opts.run_trace, test_name=opts.test_name)
            if num_fails:
                logging.error('%d test(s) failed out of %d test(s) run.' % (num_fails, num_tests))
            else:
                logging.info('all %d test(s) worked!' % num_tests)

        except PrologError as e:
            logging.error("*** ERROR: %s" % e)

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

    @cmdln.option("-i", "--incremental", dest="incremental", action="store_true",
           help="incremental training (load previously saved variables)")
    @cmdln.option("-n", "--num-epochs", dest="num_epochs", type = "int", default=DEFAULT_NUM_EPOCHS,
           help="number of epochs to train for, default: %d" % DEFAULT_NUM_EPOCHS)
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_train(self, subcmd, opts):
        """${cmd_name}: train tensorflow model

        ${cmd_usage}
        ${cmd_option_list}
        """

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

        self.kernal.train (num_epochs  = opts.num_epochs, 
                           incremental = opts.incremental)

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

    @cmdln.option("-O", "--offset", dest="offset", type = "int", default=0,
           help="offset for incremental dump, default: 0")
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_gpt2(self, subcmd, opts):
        """${cmd_name}: export gpt-2 training data

        ${cmd_usage}
        ${cmd_option_list}
        """

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

        self.kernal.export_gpt2 (offset=opts.offset)

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

    @cmdln.option("-g", "--trace", dest="run_trace", action="store_true",
           help="enable prolog tracing")
    @cmdln.option("-u", "--user", dest="username", type = "str", default="chat",
           help="username, default: chat")
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_chat(self, subcmd, opts, *models):
        """${cmd_name}: chat with model in natural language

        ${cmd_usage}
        ${cmd_option_list}
        """

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

        for mn2 in self.kernal.all_skills:
            self.kernal.consult_skill (mn2)
        self.kernal.setup_nlp_model()

        user_uri = USER_PREFIX + opts.username
        ctx      = self.kernal.create_context(user=user_uri, realm=CLI_REALM)

        while True:

            line = input ('ai> ')

            if line == 'quit' or line == 'exit':
                break

            out, score, action = self.kernal.process_input(ctx, line, run_trace=opts.run_trace)

            if action:
                logging.info(u'RESP: [%6.1f] %s | action: %s' % (score, out, unicode(action)))
            else:
                logging.info(u'RESP: [%6.1f] %s ' % (score, out))

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

    @cmdln.option("-u", "--user", dest="username", type = "str", default="chat",
           help="username, default: chat")
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_dbg(self, subcmd, opts, *models):
        """${cmd_name}: enter interactive debugging session

        ${cmd_usage}
        ${cmd_option_list}
        """

        for mn2 in self.kernal.all_skills:
            self.kernal.consult_skill (mn2)
        self.kernal.setup_nlp_model()

        dbg = AIDbg (self.kernal, USER_PREFIX + opts.username, CLI_REALM, opts.verbose)

        dbg.run()

    @cmdln.option ("-d", "--dict", dest="dictfn", type = "str", default=None,
           help="dictionary to use to detect unknown words, default: none")
    @cmdln.option ("-s", "--skill", dest="skill", type = "str", default='all',
           help="extract utterances from specific skill only, default: all skills")
    @cmdln.option ("-n", "--num-utterances", dest="num_utterances", type = "int", default=0,
           help="number of utterances to extract, default: 0 (all)")
    def do_utterances(self, subcmd, opts):
        """${cmd_name}: get sample or all utterances from DB

        ${cmd_usage}
        ${cmd_option_list}
        """

        self.kernal.dump_utterances(opts.num_utterances, opts.dictfn, opts.skill)


    @cmdln.option("-i", "--incremental", dest="incremental", action="store_true",
           help="incremental training (load previously saved variables)")
    @cmdln.option("-n", "--num-epochs", dest="num_epochs", type = "int", default=DEFAULT_NUM_EPOCHS_UTTCLASS,
           help="number of epochs to train for, default: %d" % DEFAULT_NUM_EPOCHS_UTTCLASS)
    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_utt_class_train(self, subcmd, opts, *paths):
        """${cmd_name}: train utt-to-skill classification model

        ${cmd_usage}
        ${cmd_option_list}
        """

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

        self.kernal.uttclass_train(num_epochs  = opts.num_epochs, 
                                   incremental = opts.incremental)

    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_utt_class(self, subcmd, opts, *paths):
        """${cmd_name}: utt-to-skill classify utterance(s)

        ${cmd_usage}
        ${cmd_option_list}
        """

        if len(paths) < 1:
            raise Exception ("at least one argument (utterance or file name) expected")

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
        else:
            logging.getLogger().setLevel(logging.INFO)

        # collect utterances from paths

        utterances = []
        for uttfn in paths:
            if os.path.exists(uttfn):
                with codecs.open(uttfn, 'r', 'utf8') as uttf:
                    for line in uttf:
                        utterances.append(line.strip())
            else:
                utterances.append(uttfn)

        self.kernal.uttclass_predict(utterances)

    def do_prolog(self, subcmd, opts, *skills):
        """${cmd_name}: open prolog shell for debugging

        ${cmd_usage}
        ${cmd_option_list}
        """

        if len(skills) == 0:
            for mn2 in self.kernal.all_skills:
                self.kernal.consult_skill (mn2)
        else:
            self.kernal.consult_skill (skills[0])

        histfile = os.path.join(os.path.expanduser("~"), ".xsb_hist")
        try:
            readline.read_history_file(histfile)
            # default history len is -1 (infinite), which may grow unruly
            readline.set_history_length(1000)
        except IOError:
            pass
        atexit.register(readline.write_history_file, histfile)

        while True:

            line = input ('prolog> ')

            if line == 'quit' or line == 'exit':
                break

            try:
                for res in pyxsb_query(line):
                    logging.info('  %s' % repr(res))

            except Exception as e:
                logging.error(traceback.format_exc())

    @cmdln.option("-v", "--verbose", dest="verbose", action="store_true",
           help="verbose logging")
    def do_stats(self, subcmd, opts):
        """${cmd_name}: print DB statistics

        ${cmd_usage}
        ${cmd_option_list}
        """

        if opts.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
        else:
            logging.getLogger().setLevel(logging.INFO)

        stats = self.kernal.stats()

        totals = {}
        for m in stats:
            for lang in stats[m]:
                if not lang in totals:
                    totals[lang] = 0
                totals[lang] += stats[m][lang]

        stats2 = []

        for m in stats:

            s = '%-20s' % m

            for lang in LANGUAGES:
                s += '%3s:%9d (%5.1f%%)' % (lang, stats[m][lang], stats[m][lang]*100.0/totals[lang])
    
            stats2.append((s, stats[m]['en']))

        for t in sorted(stats2, key=lambda tup: tup[1]):
            logging.info(t[0])

        logging.getLogger().setLevel(DEFAULT_LOGLEVEL)

#
# init terminal
#

misc.init_app('ai_cli')

#
# logging
#

logging.basicConfig(level=DEFAULT_LOGLEVEL)
logging.getLogger("requests").setLevel(logging.WARNING)

#
# run cli
#

aicli = AICli()
sys.exit(aicli.main(loop=cmdln.LOOP_IF_EMPTY))