From 2d0bf9f6ab57c0b06e972d5e1d7042948680853d Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Tue, 9 Apr 2019 16:45:35 +0200 Subject: [PATCH 01/12] Set post-release version to 0.65.0-SNAPSHOT --- Cargo.toml | 2 +- ffi/Cargo.toml | 2 +- ffi/cbindgen.toml | 2 +- platforms/c/libsnips_nlu.h | 2 +- platforms/kotlin/build.gradle | 2 +- platforms/python/ffi/Cargo.toml | 4 ++-- platforms/python/snips_nlu_rust/__version__ | 2 +- platforms/swift/SnipsNlu/Dependencies/build.sh | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bb24f3cd..9e2ac35f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-lib" -version = "0.64.2" +version = "0.65.0-SNAPSHOT" authors = [ "Adrien Ball ", "Clement Doumouro ", diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 18889bc5..36403c0a 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ffi" -version = "0.64.2" +version = "0.65.0-SNAPSHOT" edition = "2018" authors = [ "Adrien Ball ", diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml index 19cecd3b..07c8e95e 100644 --- a/ffi/cbindgen.toml +++ b/ffi/cbindgen.toml @@ -2,7 +2,7 @@ language = "c" include_guard = "LIBSNIPS_NLU_H_" -header = "#define SNIPS_NLU_VERSION \"0.64.2\"" +header = "#define SNIPS_NLU_VERSION \"0.65.0-SNAPSHOT\"" [parse] parse_deps = true diff --git a/platforms/c/libsnips_nlu.h b/platforms/c/libsnips_nlu.h index a316c420..11e9e6ec 100644 --- a/platforms/c/libsnips_nlu.h +++ b/platforms/c/libsnips_nlu.h @@ -1,4 +1,4 @@ -#define SNIPS_NLU_VERSION "0.64.2" +#define SNIPS_NLU_VERSION "0.65.0-SNAPSHOT" #ifndef LIBSNIPS_NLU_H_ #define LIBSNIPS_NLU_H_ diff --git a/platforms/kotlin/build.gradle b/platforms/kotlin/build.gradle index 85e381fa..389073fc 100644 --- a/platforms/kotlin/build.gradle +++ b/platforms/kotlin/build.gradle @@ -11,7 +11,7 @@ buildscript { apply plugin: 'kotlin' -version = "0.64.2" +version = "0.65.0-SNAPSHOT" group = "ai.snips" repositories { diff --git a/platforms/python/ffi/Cargo.toml b/platforms/python/ffi/Cargo.toml index 8a29ec0f..885183ed 100644 --- a/platforms/python/ffi/Cargo.toml +++ b/platforms/python/ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-python-ffi" -version = "0.64.2" +version = "0.65.0-SNAPSHOT" authors = ["Adrien Ball "] edition = "2018" @@ -11,4 +11,4 @@ crate-type = ["cdylib"] [dependencies] libc = "0.2" ffi-utils = { git = "https://github.com/snipsco/snips-utils-rs", rev = "4292ad9" } -snips-nlu-ffi = { git = "https://github.com/snipsco/snips-nlu-rs", tag = "0.64.2" } +snips-nlu-ffi = { path = "../../../ffi" } diff --git a/platforms/python/snips_nlu_rust/__version__ b/platforms/python/snips_nlu_rust/__version__ index cc2f8ef5..6c913f83 100644 --- a/platforms/python/snips_nlu_rust/__version__ +++ b/platforms/python/snips_nlu_rust/__version__ @@ -1 +1 @@ -0.64.2 +0.65.0-SNAPSHOT diff --git a/platforms/swift/SnipsNlu/Dependencies/build.sh b/platforms/swift/SnipsNlu/Dependencies/build.sh index 9288094f..9ac04b5a 100755 --- a/platforms/swift/SnipsNlu/Dependencies/build.sh +++ b/platforms/swift/SnipsNlu/Dependencies/build.sh @@ -4,7 +4,7 @@ set -e -VERSION="0.64.2" +VERSION="0.65.0-SNAPSHOT" SYSTEM=$(echo $1 | tr '[:upper:]' '[:lower:]') LIBRARY_NAME=libsnips_nlu_ffi LIBRARY_NAME_A=${LIBRARY_NAME}.a From 14aae2222e555afd551570872c5d146b11bea48e Mon Sep 17 00:00:00 2001 From: Thibaut Lorrain Date: Wed, 10 Apr 2019 11:24:37 +0200 Subject: [PATCH 02/12] make the WronModelVersion error message intelligible --- src/errors.rs | 4 ++-- src/nlu_engine.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 56d14b6e..592bf479 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -4,8 +4,8 @@ use failure::Fail; pub enum SnipsNluError { #[fail(display = "Unable to read file '{}'", _0)] ModelLoad(String), - #[fail(display = "Expected model version {} but found {}", _1, _0)] - WrongModelVersion(String, &'static str), + #[fail(display = "Mismatched model version: model is {} but runner is {}", model, runner)] + WrongModelVersion{ model: String, runner: &'static str}, #[fail(display = "Unknown intent: '{}'", _0)] UnknownIntent(String), #[fail(display = "Internal error: {}", _0)] diff --git a/src/nlu_engine.rs b/src/nlu_engine.rs index cc4d0102..22334f80 100644 --- a/src/nlu_engine.rs +++ b/src/nlu_engine.rs @@ -56,10 +56,10 @@ impl SnipsNluEngine { let model_version: ModelVersion = serde_json::from_reader(model_file)?; if model_version.model_version != crate::MODEL_VERSION { - bail!(SnipsNluError::WrongModelVersion( - model_version.model_version, - crate::MODEL_VERSION - )); + bail!(SnipsNluError::WrongModelVersion { + model: model_version.model_version, + runner: crate::MODEL_VERSION + }); } Ok(()) } From 95e70b224789d33706f825edfde322bf9e06a181 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Wed, 10 Apr 2019 18:36:18 +0200 Subject: [PATCH 03/12] Fix error handling in Python wrapper --- CHANGELOG.md | 6 ++ ffi/src/lib.rs | 17 +++- platforms/python/snips_nlu_rust/nlu_engine.py | 81 +++++++++++-------- .../tests/test_nlu_engine_wrapper.py | 34 +++++++- platforms/python/snips_nlu_rust/utils.py | 14 +++- .../log_reg_intent_classifier.rs | 5 +- src/resources/loading.rs | 10 ++- 7 files changed, 122 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a15f3751..314c10ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog All notable changes to this project will be documented in this file. +## [Unreleased] +### Fixed +- Make the `WrongModelVersion` error message intelligible [#133](https://github.com/snipsco/snips-nlu-rs/pull/133) +- Fix error handling in Python wrapper [#134](https://github.com/snipsco/snips-nlu-rs/pull/134) + ## [0.64.2] - 2019-04-09 ### Fixed - Fix handling of ambiguous utterances in `DeterministicIntentParser` [#129](https://github.com/snipsco/snips-nlu-rs/pull/129) @@ -189,6 +194,7 @@ being statically hardcoded, reducing the binary size by 31Mb. - Improve support for japanese - Rename python package to `snips_nlu_rust` +[Unreleased]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.2...HEAD [0.64.2]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.1...0.64.2 [0.64.1]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.0...0.64.1 [0.64.0]: https://github.com/snipsco/snips-nlu-rs/compare/0.63.1...0.64.0 diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 8c4c281c..25d5cad9 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -3,7 +3,7 @@ extern crate ffi_utils; extern crate snips_nlu_ontology_ffi_macros; -use std::ffi::{CStr, CString}; +use std::ffi::CStr; use std::io::Cursor; use std::slice; use std::sync::Mutex; @@ -119,11 +119,15 @@ pub extern "C" fn snips_nlu_engine_run_get_intents_into_json( #[no_mangle] pub extern "C" fn snips_nlu_engine_destroy_string(string: *mut libc::c_char) -> SNIPS_RESULT { - wrap!(unsafe { CString::from_raw_pointer(string) }) + take_back_nullable_c_string!(string); + SNIPS_RESULT::SNIPS_RESULT_OK } #[no_mangle] pub extern "C" fn snips_nlu_engine_destroy_client(client: *mut CSnipsNluEngine) -> SNIPS_RESULT { + if client.is_null() { + return SNIPS_RESULT::SNIPS_RESULT_OK + } wrap!(unsafe { CSnipsNluEngine::from_raw_pointer(client) }) } @@ -131,11 +135,17 @@ pub extern "C" fn snips_nlu_engine_destroy_client(client: *mut CSnipsNluEngine) pub extern "C" fn snips_nlu_engine_destroy_result( result: *mut CIntentParserResult, ) -> SNIPS_RESULT { + if result.is_null() { + return SNIPS_RESULT::SNIPS_RESULT_OK + } wrap!(unsafe { CIntentParserResult::from_raw_pointer(result) }) } #[no_mangle] pub extern "C" fn snips_nlu_engine_destroy_slots(result: *mut CSlotList) -> SNIPS_RESULT { + if result.is_null() { + return SNIPS_RESULT::SNIPS_RESULT_OK + } wrap!(unsafe { CSlotList::from_raw_pointer(result) }) } @@ -143,6 +153,9 @@ pub extern "C" fn snips_nlu_engine_destroy_slots(result: *mut CSlotList) -> SNIP pub extern "C" fn snips_nlu_engine_destroy_intent_classifier_results( result: *mut CIntentClassifierResultArray, ) -> SNIPS_RESULT { + if result.is_null() { + return SNIPS_RESULT::SNIPS_RESULT_OK + } wrap!(unsafe { CIntentClassifierResultArray::from_raw_pointer(result) }) } diff --git a/platforms/python/snips_nlu_rust/nlu_engine.py b/platforms/python/snips_nlu_rust/nlu_engine.py index b6662dea..7d3cb4a0 100644 --- a/platforms/python/snips_nlu_rust/nlu_engine.py +++ b/platforms/python/snips_nlu_rust/nlu_engine.py @@ -4,10 +4,11 @@ import json from builtins import object, str -from ctypes import byref, c_char_p, c_void_p, pointer, string_at, c_char, c_int +from ctypes import byref, c_char_p, c_void_p, string_at, c_char, c_int from pathlib import Path -from snips_nlu_rust.utils import lib, string_pointer, CStringArray +from snips_nlu_rust.utils import ( + lib, string_pointer, CStringArray, check_ffi_error) class NLUEngine(object): @@ -32,8 +33,8 @@ class NLUEngine(object): ... engine_dir="/path/to/nlu_engine") >>> inference_engine.parse("Turn on the lights in the kitchen") """ + def __init__(self, engine_dir=None, engine_bytes=None): - exit_code = 1 self._engine = None if engine_dir is None and engine_bytes is None: @@ -41,61 +42,65 @@ def __init__(self, engine_dir=None, engine_bytes=None): if engine_dir is not None: engine_dir = Path(engine_dir) - if not engine_dir.is_dir(): - raise OSError("NLU engine directory not found: %s" - % str(engine_dir)) - self._engine = pointer(c_void_p()) + self._engine = c_void_p() exit_code = lib.ffi_snips_nlu_engine_create_from_dir( str(engine_dir).encode("utf8"), byref(self._engine)) - elif engine_bytes is not None: - self._engine = pointer(c_void_p()) + err_msg = "Something went wrong when creating the engine from " \ + "directory" + + else: + self._engine = c_void_p() bytearray_type = c_char * len(engine_bytes) exit_code = lib.ffi_snips_nlu_engine_create_from_zip( bytearray_type.from_buffer(engine_bytes), len(engine_bytes), byref(self._engine)) + err_msg = "Something went wrong when creating the engine from " \ + "bytes" - if exit_code: - raise ImportError('Something wrong happened while creating the ' - 'intent parser. See stderr.') + check_ffi_error(exit_code, err_msg) def parse(self, query, intents_whitelist=None, intents_blacklist=None): """Extracts intent and slots from an input query Args: query (str): input to process - intents_whitelist (list of str, optional): if defined, this will restrict the scope of - intent parsing to the provided intents - intents_blacklist (list of str, optional): if defined, these intents will be excluded - from the scope of intent parsing + intents_whitelist (list of str, optional): if defined, this will + restrict the scope of intent parsing to the provided intents + intents_blacklist (list of str, optional): if defined, these + intents will be excluded from the scope of intent parsing Returns: A python dict containing data about intent and slots. See - https://snips-nlu.readthedocs.io/en/latest/tutorial.html#parsing for details about the - format. + https://snips-nlu.readthedocs.io/en/latest/tutorial.html#parsing + for details about the format. """ if intents_whitelist is not None: - if not all(isinstance(intent, str) for intent in intents_whitelist): - raise TypeError( - "Expected 'intents_whitelist' to contain objects of type 'str'") + if not all( + isinstance(intent, str) for intent in intents_whitelist): + raise TypeError("Expected 'intents_whitelist' to contain " + "objects of type 'str'") intents = [intent.encode("utf8") for intent in intents_whitelist] arr = CStringArray() arr.size = c_int(len(intents)) arr.data = (c_char_p * len(intents))(*intents) intents_whitelist = byref(arr) if intents_blacklist is not None: - if not all(isinstance(intent, str) for intent in intents_blacklist): - raise TypeError( - "Expected 'intents_blacklist' to contain objects of type 'str'") + if not all( + isinstance(intent, str) for intent in intents_blacklist): + raise TypeError("Expected 'intents_blacklist' to contain " + "objects of type 'str'") intents = [intent.encode("utf8") for intent in intents_blacklist] arr = CStringArray() arr.size = c_int(len(intents)) arr.data = (c_char_p * len(intents))(*intents) intents_blacklist = byref(arr) with string_pointer(c_char_p()) as ptr: - lib.ffi_snips_nlu_engine_run_parse_into_json( - self._engine, query.encode("utf8"), intents_whitelist, intents_blacklist, - byref(ptr)) + exit_code = lib.ffi_snips_nlu_engine_run_parse_into_json( + self._engine, query.encode("utf8"), intents_whitelist, + intents_blacklist, byref(ptr)) result = string_at(ptr) + msg = "Something went wrong when parsing query '%s'" % query + check_ffi_error(exit_code, msg) return json.loads(result.decode("utf8")) @@ -108,13 +113,18 @@ def get_slots(self, query, intent): Returns: A list of slots. See - https://snips-nlu.readthedocs.io/en/latest/tutorial.html#parsing for details about the - format. + https://snips-nlu.readthedocs.io/en/latest/tutorial.html#parsing + for details about the format. """ with string_pointer(c_char_p()) as ptr: - lib.ffi_snips_nlu_engine_run_get_slots_into_json( - self._engine, query.encode("utf8"), intent.encode("utf8"), byref(ptr)) + exit_code = lib.ffi_snips_nlu_engine_run_get_slots_into_json( + self._engine, query.encode("utf8"), intent.encode("utf8"), + byref(ptr)) + msg = "Something went wrong when extracting slots from query " \ + "'%s' with intent '%s'" % (query, intent) + check_ffi_error(exit_code, msg) result = string_at(ptr) + return json.loads(result.decode("utf8")) def get_intents(self, query): @@ -125,12 +135,15 @@ def get_intents(self, query): Returns: A list of intents along with their probability. See - https://snips-nlu.readthedocs.io/en/latest/tutorial.html#parsing for details about the - format. + https://snips-nlu.readthedocs.io/en/latest/tutorial.html#parsing + for details about the format. """ with string_pointer(c_char_p()) as ptr: - lib.ffi_snips_nlu_engine_run_get_intents_into_json( + exit_code = lib.ffi_snips_nlu_engine_run_get_intents_into_json( self._engine, query.encode("utf8"), byref(ptr)) + msg = "Something went wrong when extracting intents from query " \ + "'%s'" % query + check_ffi_error(exit_code, msg) result = string_at(ptr) return json.loads(result.decode("utf8")) diff --git a/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py b/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py index 09b30244..1fc5ad87 100644 --- a/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py +++ b/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py @@ -19,6 +19,12 @@ def test_should_load_from_dir_and_parse(self): # Then self.assertEqual("MakeCoffee", res["intent"]["intentName"]) + def test_load_from_dir_should_fail_with_invalid_path(self): + with self.assertRaises(ValueError) as cm: + NLUEngine(engine_dir="/tmp/invalid/path/to/engine") + + self.assertTrue("No such file or directory" in str(cm.exception)) + def test_should_load_from_zip_and_parse(self): # Given engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) @@ -29,12 +35,19 @@ def test_should_load_from_zip_and_parse(self): # Then self.assertEqual("MakeCoffee", res["intent"]["intentName"]) + def test_load_from_zip_should_fail_with_invalid_data(self): + with self.assertRaises(ValueError) as cm: + NLUEngine(engine_bytes=bytearray()) + + self.assertTrue("Invalid Zip archive" in str(cm.exception)) + def test_should_parse_with_whitelist(self): # Given engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) # Then - res = engine.parse("Make me two cups of coffee please", intents_whitelist=["MakeTea"]) + res = engine.parse("Make me two cups of coffee please", + intents_whitelist=["MakeTea"]) # Then self.assertEqual("MakeTea", res["intent"]["intentName"]) @@ -44,7 +57,8 @@ def test_should_parse_with_blacklist(self): engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) # Then - res = engine.parse("Make me two cups of coffee please", intents_blacklist=["MakeCoffee"]) + res = engine.parse("Make me two cups of coffee please", + intents_blacklist=["MakeCoffee"]) # Then self.assertEqual("MakeTea", res["intent"]["intentName"]) @@ -54,7 +68,8 @@ def test_should_get_slots(self): engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) # Then - slots = engine.get_slots("Make me two cups of coffee please", intent="MakeCoffee") + slots = engine.get_slots("Make me two cups of coffee please", + intent="MakeCoffee") # Then expected_slots = [ @@ -68,12 +83,23 @@ def test_should_get_slots(self): ] self.assertEqual(expected_slots, slots) + def test_get_slots_should_fail_with_unknown_intent(self): + # Given + engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) + + # Then + with self.assertRaises(ValueError) as cm: + engine.get_slots( + "Make me two cups of coffee please", intent="my_intent") + self.assertTrue("Unknown intent" in str(cm.exception)) + def test_should_get_intents(self): # Given engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) # Then - intents_results = engine.get_intents("Make me two cups of coffee please") + intents_results = engine.get_intents( + "Make me two cups of coffee please") intents = [res["intentName"] for res in intents_results] # Then diff --git a/platforms/python/snips_nlu_rust/utils.py b/platforms/python/snips_nlu_rust/utils.py index aac25d4b..2e2f7a49 100644 --- a/platforms/python/snips_nlu_rust/utils.py +++ b/platforms/python/snips_nlu_rust/utils.py @@ -1,6 +1,6 @@ -from _ctypes import Structure, POINTER +from _ctypes import Structure, POINTER, byref from contextlib import contextmanager -from ctypes import cdll, c_char_p, c_int32 +from ctypes import cdll, c_char_p, c_int32, string_at from pathlib import Path dylib_dir = Path(__file__).parent / "dylib" @@ -21,3 +21,13 @@ class CStringArray(Structure): ("data", POINTER(c_char_p)), ("size", c_int32) ] + + +def check_ffi_error(exit_code, error_context_msg): + if exit_code != 0: + with string_pointer(c_char_p()) as ptr: + if lib.snips_nlu_engine_get_last_error(byref(ptr)) == 0: + ffi_error_message = string_at(ptr).decode("utf8") + else: + ffi_error_message = "see stderr" + raise ValueError("%s: %s" % (error_context_msg, ffi_error_message)) diff --git a/src/intent_classifier/log_reg_intent_classifier.rs b/src/intent_classifier/log_reg_intent_classifier.rs index 07fc9f75..21471a25 100644 --- a/src/intent_classifier/log_reg_intent_classifier.rs +++ b/src/intent_classifier/log_reg_intent_classifier.rs @@ -29,7 +29,10 @@ impl LogRegIntentClassifier { path: P, shared_resources: Arc, ) -> Result { - info!("Loading log reg intent classifier ({:?}) ...", path.as_ref()); + info!( + "Loading log reg intent classifier ({:?}) ...", + path.as_ref() + ); let classifier_model_path = path.as_ref().join("intent_classifier.json"); let model_file = File::open(&classifier_model_path).with_context(|_| { format!( diff --git a/src/resources/loading.rs b/src/resources/loading.rs index 623fc10a..f51e2da1 100644 --- a/src/resources/loading.rs +++ b/src/resources/loading.rs @@ -106,7 +106,10 @@ fn load_gazetteers>( let gazetteer_path = gazetteers_directory .join(gazetteer_name.clone()) .with_extension("txt"); - info!("Loading gazetteer '{}' ({:?}) ...", gazetteer_name, gazetteer_path); + info!( + "Loading gazetteer '{}' ({:?}) ...", + gazetteer_name, gazetteer_path + ); let file = File::open(&gazetteer_path) .with_context(|_| format!("Cannot open gazetteer file {:?}", gazetteer_path))?; let gazetteer = HashSetGazetteer::from_reader(file) @@ -130,7 +133,10 @@ fn load_word_clusterers>( .join(clusters_name.clone()) .with_extension("txt"); ; - info!("Loading word clusters '{}' ({:?}) ...", clusters_name, clusters_path); + info!( + "Loading word clusters '{}' ({:?}) ...", + clusters_name, clusters_path + ); let word_clusters_reader = File::open(&clusters_path) .with_context(|_| format!("Cannot open word clusters file {:?}", clusters_path))?; let word_clusterer = HashMapWordClusterer::from_reader(word_clusters_reader) From 9542e887b13fdb3ccdfc3bcb22df4c2d5ad1d493 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Wed, 10 Apr 2019 19:18:04 +0200 Subject: [PATCH 04/12] Fix ffi issues --- ffi/src/lib.rs | 17 ++--------------- platforms/python/snips_nlu_rust/nlu_engine.py | 2 +- .../tests/test_nlu_engine_wrapper.py | 7 +++++++ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 25d5cad9..8c4c281c 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -3,7 +3,7 @@ extern crate ffi_utils; extern crate snips_nlu_ontology_ffi_macros; -use std::ffi::CStr; +use std::ffi::{CStr, CString}; use std::io::Cursor; use std::slice; use std::sync::Mutex; @@ -119,15 +119,11 @@ pub extern "C" fn snips_nlu_engine_run_get_intents_into_json( #[no_mangle] pub extern "C" fn snips_nlu_engine_destroy_string(string: *mut libc::c_char) -> SNIPS_RESULT { - take_back_nullable_c_string!(string); - SNIPS_RESULT::SNIPS_RESULT_OK + wrap!(unsafe { CString::from_raw_pointer(string) }) } #[no_mangle] pub extern "C" fn snips_nlu_engine_destroy_client(client: *mut CSnipsNluEngine) -> SNIPS_RESULT { - if client.is_null() { - return SNIPS_RESULT::SNIPS_RESULT_OK - } wrap!(unsafe { CSnipsNluEngine::from_raw_pointer(client) }) } @@ -135,17 +131,11 @@ pub extern "C" fn snips_nlu_engine_destroy_client(client: *mut CSnipsNluEngine) pub extern "C" fn snips_nlu_engine_destroy_result( result: *mut CIntentParserResult, ) -> SNIPS_RESULT { - if result.is_null() { - return SNIPS_RESULT::SNIPS_RESULT_OK - } wrap!(unsafe { CIntentParserResult::from_raw_pointer(result) }) } #[no_mangle] pub extern "C" fn snips_nlu_engine_destroy_slots(result: *mut CSlotList) -> SNIPS_RESULT { - if result.is_null() { - return SNIPS_RESULT::SNIPS_RESULT_OK - } wrap!(unsafe { CSlotList::from_raw_pointer(result) }) } @@ -153,9 +143,6 @@ pub extern "C" fn snips_nlu_engine_destroy_slots(result: *mut CSlotList) -> SNIP pub extern "C" fn snips_nlu_engine_destroy_intent_classifier_results( result: *mut CIntentClassifierResultArray, ) -> SNIPS_RESULT { - if result.is_null() { - return SNIPS_RESULT::SNIPS_RESULT_OK - } wrap!(unsafe { CIntentClassifierResultArray::from_raw_pointer(result) }) } diff --git a/platforms/python/snips_nlu_rust/nlu_engine.py b/platforms/python/snips_nlu_rust/nlu_engine.py index 7d3cb4a0..0e9d5e72 100644 --- a/platforms/python/snips_nlu_rust/nlu_engine.py +++ b/platforms/python/snips_nlu_rust/nlu_engine.py @@ -98,9 +98,9 @@ def parse(self, query, intents_whitelist=None, intents_blacklist=None): exit_code = lib.ffi_snips_nlu_engine_run_parse_into_json( self._engine, query.encode("utf8"), intents_whitelist, intents_blacklist, byref(ptr)) - result = string_at(ptr) msg = "Something went wrong when parsing query '%s'" % query check_ffi_error(exit_code, msg) + result = string_at(ptr) return json.loads(result.decode("utf8")) diff --git a/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py b/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py index 1fc5ad87..b55d898d 100644 --- a/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py +++ b/platforms/python/snips_nlu_rust/tests/test_nlu_engine_wrapper.py @@ -105,3 +105,10 @@ def test_should_get_intents(self): # Then expected_intents = ["MakeCoffee", "MakeTea", None] self.assertEqual(expected_intents, intents) + + def test_engine_should_destroy_itself(self): + # Given + engine = NLUEngine(engine_bytes=SAMPLE_ENGINE_ZIP_BYTES) + + # When / Then + del engine From 285de08cee985efb037e5d16cbc4be81093a554d Mon Sep 17 00:00:00 2001 From: cpoisson Date: Fri, 12 Apr 2019 11:04:12 +0200 Subject: [PATCH 05/12] Fix broken link to example in README (#135) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 6a5b9c5e..4f70aeb2 100644 --- a/README.rst +++ b/README.rst @@ -67,7 +67,7 @@ the `Snips NLU python library `_. Example and API Usage --------------------- -The `interactive parsing CLI `_ is a good example +The `interactive parsing CLI `_ is a good example of to how to use ``snips-nlu-rs``. Here is how you can run the CLI example: From d687298f3d6b47bd4f766e7c77005e42a7f43865 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Wed, 24 Apr 2019 17:07:06 +0200 Subject: [PATCH 06/12] Return an error when using unknown intents in whitelist or blacklist (#136) --- CHANGELOG.md | 1 + src/nlu_engine.rs | 94 ++++++++++++++++++++++++++++++++++++++++------- src/utils.rs | 18 +++++++++ 3 files changed, 99 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 314c10ed..01c577b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ### Fixed - Make the `WrongModelVersion` error message intelligible [#133](https://github.com/snipsco/snips-nlu-rs/pull/133) - Fix error handling in Python wrapper [#134](https://github.com/snipsco/snips-nlu-rs/pull/134) +- Return an error when using unknown intents in whitelist or blacklist [#136](https://github.com/snipsco/snips-nlu-rs/pull/136) ## [0.64.2] - 2019-04-09 ### Fixed diff --git a/src/nlu_engine.rs b/src/nlu_engine.rs index 22334f80..74988fec 100644 --- a/src/nlu_engine.rs +++ b/src/nlu_engine.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs; use std::io; use std::path::Path; @@ -21,7 +21,7 @@ use crate::models::{ use crate::resources::loading::load_shared_resources; use crate::resources::SharedResources; use crate::slot_utils::*; -use crate::utils::{extract_nlu_engine_zip_archive, EntityName, SlotName}; +use crate::utils::{EntityName, extract_nlu_engine_zip_archive, IterOps, SlotName}; pub struct SnipsNluEngine { dataset_metadata: DatasetMetadata, @@ -140,15 +140,8 @@ impl SnipsNluEngine { W: Into>>, B: Into>>, { - let reverted_whitelist: Option> = intents_blacklist.into().map(|blacklist| { - self.dataset_metadata - .slot_name_mappings - .keys() - .map(|intent| &**intent) - .filter(|intent_name| !blacklist.contains(intent_name)) - .collect() - }); - let intents_whitelist_owned = intents_whitelist.into().or_else(|| reverted_whitelist); + let intents_whitelist_owned = + self.get_intents_whitelist(intents_whitelist, intents_blacklist)?; let intents_whitelist = intents_whitelist_owned .as_ref() .map(|whitelist| whitelist.as_ref()); @@ -182,6 +175,51 @@ impl SnipsNluEngine { }) } + fn get_intents_whitelist<'a: 'c, 'b: 'c, 'c, W, B>( + &'c self, + intents_whitelist: W, + intents_blacklist: B, + ) -> Result>> + where + W: Into>>, + B: Into>>, + { + let all_intents: HashSet<&str> = self + .dataset_metadata + .slot_name_mappings + .keys() + .map(|intent| &**intent) + .collect(); + let intents_whitelist = intents_whitelist.into(); + let intents_blacklist = intents_blacklist.into(); + if let Some(unknown_intent) = vec![intents_whitelist.as_ref(), intents_blacklist.as_ref()] + .into_iter() + .flatten() + .flatten() + .find(|intent| !all_intents.contains(*intent)) + { + return Err(format_err!( + "Cannot use unknown intent '{}' in intents filter", + unknown_intent + )); + }; + let reverted_whitelist: Option> = intents_blacklist.map(|blacklist| { + all_intents + .iter() + .filter(|intent_name| !blacklist.contains(intent_name)) + .cloned() + .collect() + }); + Ok(intents_whitelist + .map(|whitelist| { + reverted_whitelist + .clone() + .map(|rev_whitelist| rev_whitelist.intersect(whitelist.clone())) + .unwrap_or_else(|| whitelist) + }) + .or_else(|| reverted_whitelist)) + } + pub fn get_intents(&self, input: &str) -> Result> { let nb_intents = self.dataset_metadata.slot_name_mappings.len(); let mut results = HashMap::with_capacity(nb_intents + 1); @@ -363,11 +401,14 @@ fn extract_builtin_slot( #[cfg(test)] mod tests { - use super::*; + use std::iter::FromIterator; + + use snips_nlu_ontology::{NumberValue, StringValue}; + use crate::entity_parser::custom_entity_parser::CustomEntity; use crate::testutils::*; - use snips_nlu_ontology::{NumberValue, StringValue}; - use std::iter::FromIterator; + + use super::*; #[test] fn test_load_from_zip() { @@ -457,6 +498,25 @@ mod tests { ) .unwrap(); + let combined_result = nlu_engine + .parse( + "Make me two cups of coffee please", + vec!["MakeCoffee", "MakeTea"], + vec!["MakeCoffee"], + ) + .unwrap(); + + let failed_result = nlu_engine.parse( + "Make me two cups of coffee please", + vec!["MakeCoffee"], + vec!["MakeChocolate"], + ); + let failed_result_2 = nlu_engine.parse( + "Make me two cups of coffee please", + vec!["MakeChocolate"], + vec!["MakeCoffee"], + ); + // Then assert_eq!( Some("MakeTea".to_string()), @@ -466,6 +526,12 @@ mod tests { Some("MakeTea".to_string()), blacklist_result.intent.intent_name ); + assert_eq!( + Some("MakeTea".to_string()), + combined_result.intent.intent_name + ); + assert!(failed_result.is_err()); + assert!(failed_result_2.is_err()); } #[test] diff --git a/src/utils.rs b/src/utils.rs index c4b15d06..5d513258 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -18,6 +18,24 @@ pub type IntentName = String; pub type SlotName = String; pub type EntityName = String; +pub trait IterOps: IntoIterator + where I: IntoIterator, + T: PartialEq { + fn intersect(self, other: I) -> Vec; +} + +impl IterOps for I + where I: IntoIterator, + T: PartialEq +{ + fn intersect(self, other: I) -> Vec { + let v_other: Vec<_> = other.into_iter().collect(); + self.into_iter() + .filter(|e1| v_other.iter().any(|e2| e1 == e2)) + .collect() + } +} + pub fn deduplicate_overlapping_items( items: Vec, overlap: O, From 3eaad7ec2a719e290323a5fd036b9b0f167f1e08 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 26 Apr 2019 11:40:12 +0200 Subject: [PATCH 07/12] Fix issue with stop words in DeterministicIntentParser (#137) --- CHANGELOG.md | 1 + .../deterministic_intent_parser.rs | 144 +++++++++++++----- src/models/intent_parser.rs | 2 + 3 files changed, 106 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01c577b8..f223b521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. - Make the `WrongModelVersion` error message intelligible [#133](https://github.com/snipsco/snips-nlu-rs/pull/133) - Fix error handling in Python wrapper [#134](https://github.com/snipsco/snips-nlu-rs/pull/134) - Return an error when using unknown intents in whitelist or blacklist [#136](https://github.com/snipsco/snips-nlu-rs/pull/136) +- Fix issue with stop words in `DeterministicIntentParser` [#137](https://github.com/snipsco/snips-nlu-rs/pull/137) ## [0.64.2] - 2019-04-09 ### Fixed diff --git a/src/intent_parser/deterministic_intent_parser.rs b/src/intent_parser/deterministic_intent_parser.rs index 8d4cea11..4830c3a5 100644 --- a/src/intent_parser/deterministic_intent_parser.rs +++ b/src/intent_parser/deterministic_intent_parser.rs @@ -35,6 +35,7 @@ pub struct DeterministicIntentParser { entity_scopes: HashMap, Vec)>, ignore_stop_words: bool, shared_resources: Arc, + stop_words_whitelist: HashMap>, } impl DeterministicIntentParser { @@ -97,6 +98,11 @@ impl DeterministicIntentParser { slot_names_to_entities: model.slot_names_to_entities, entity_scopes, ignore_stop_words: model.config.ignore_stop_words, + stop_words_whitelist: model + .stop_words_whitelist + .into_iter() + .map(|(intent, whitelist)| (intent, whitelist.into_iter().collect())) + .collect(), shared_resources, }) } @@ -171,13 +177,13 @@ impl IntentParser for DeterministicIntentParser { } impl DeterministicIntentParser { + #[allow(clippy::map_clone)] fn parse_top_intents( &self, input: &str, top_n: usize, intents: Option<&[&str]>, ) -> Result> { - let cleaned_input = self.preprocess_text(input); let mut results = vec![]; let intents_set: HashSet<&str> = intents @@ -214,8 +220,10 @@ impl DeterministicIntentParser { let (ranges_mapping, formatted_input) = replace_entities(input, matched_entities, get_entity_placeholder); - let cleaned_formatted_input = self.preprocess_text(&*formatted_input); - self.regexes_per_intent + let cleaned_input = self.preprocess_text(input, &**intent); + let cleaned_formatted_input = self.preprocess_text(&*formatted_input, &**intent); + if let Some(matching_result_formatted) = self + .regexes_per_intent .get(intent) .ok_or_else(|| format_err!("No associated regexes for intent '{}'", intent))? .iter() @@ -231,7 +239,9 @@ impl DeterministicIntentParser { self.get_matching_result(input, &*cleaned_input, regex, intent, None) }) }) - .map(|matching_result_formatted| results.push(matching_result_formatted)); + { + results.push(matching_result_formatted); + } } let confidence_score = if results.is_empty() { @@ -250,17 +260,13 @@ impl DeterministicIntentParser { .collect()) } - fn preprocess_text(&self, string: &str) -> String { + fn preprocess_text(&self, string: &str, intent: &str) -> String { + let stop_words = self.get_intent_stop_words(intent); let tokens = tokenize(string, NluUtilsLanguage::from_language(self.language)); let mut current_idx = 0; let mut cleaned_string = "".to_string(); for mut token in tokens { - if self.ignore_stop_words - && self - .shared_resources - .stop_words - .contains(&token.normalized_value()) - { + if self.ignore_stop_words && stop_words.contains(&token.normalized_value()) { token.value = (0..token.value.chars().count()).map(|_| " ").collect(); } let prefix_length = token.char_range.start - current_idx; @@ -274,6 +280,18 @@ impl DeterministicIntentParser { cleaned_string } + fn get_intent_stop_words(&self, intent: &str) -> HashSet<&String> { + self.stop_words_whitelist + .get(intent) + .map(|whitelist| { + self.shared_resources + .stop_words + .difference(whitelist) + .collect() + }) + .unwrap_or_else(|| self.shared_resources.stop_words.iter().collect()) + } + fn get_matching_result( &self, input: &str, @@ -414,7 +432,7 @@ mod tests { use crate::entity_parser::builtin_entity_parser::BuiltinEntityParser; use crate::entity_parser::custom_entity_parser::CustomEntityParser; - fn test_configuration() -> DeterministicParserModel { + fn sample_model() -> DeterministicParserModel { DeterministicParserModel { language_code: "en".to_string(), patterns: hashmap![ @@ -438,6 +456,10 @@ mod tests { "dummy_intent_5".to_string() => vec![ r"^\s*Send\s*5\s*dollars\s*to\s*john\s*$".to_string(), ], + "dummy_intent_6".to_string() => vec![ + r"^\s*search\s*$".to_string(), + r"^\s*search\s*(?P%SEARCH_OBJECT%)\s*$".to_string(), + ], ], group_names_to_slot_names: hashmap![ "group0".to_string() => "dummy_slot_name".to_string(), @@ -449,6 +471,7 @@ mod tests { "group6".to_string() => "dummy_slot_name4".to_string(), "group7".to_string() => "dummy_slot_name2".to_string(), "group8".to_string() => "dummy_slot_name5".to_string(), + "group9".to_string() => "dummy_slot_name6".to_string(), ], slot_names_to_entities: hashmap![ "dummy_intent_1".to_string() => hashmap![ @@ -467,10 +490,14 @@ mod tests { "dummy_slot_name5".to_string() => "snips/number".to_string(), ], "dummy_intent_5".to_string() => hashmap![], + "dummy_intent_6".to_string() => hashmap![ + "dummy_slot_name6".to_string() => "search_object".to_string(), + ], ], config: DeterministicParserConfig { ignore_stop_words: true, }, + stop_words_whitelist: HashMap::new(), } } @@ -528,8 +555,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intent = parser.parse(text, None).unwrap().intent; @@ -564,8 +590,7 @@ mod tests { .builtin_entity_parser(mocked_builtin_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intent = parser.parse(text, None).unwrap().intent; @@ -580,7 +605,7 @@ mod tests { } #[test] - fn test_parse_intent_with_whitelist() { + fn test_parse_intent_with_intents_whitelist() { // Given let text = "this is a dummy_a query with another dummy_c"; let mocked_custom_entity_parser = MockedCustomEntityParser::from_iter(vec![( @@ -604,8 +629,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intent = parser @@ -643,8 +667,7 @@ mod tests { .builtin_entity_parser(mocked_builtin_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intents = parser.get_intents(text).unwrap(); @@ -654,17 +677,22 @@ mod tests { .iter() .map(|res| res.confidence_score) .collect::>(); - let expected_scores = vec![0.5, 0.5, 0.0, 0.0, 0.0, 0.0]; + let expected_scores = vec![0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0]; let intent_names = intents .into_iter() .skip(2) - .map(|res| res.intent_name.unwrap_or("null".to_string()).to_string()) + .map(|res| { + res.intent_name + .unwrap_or_else(|| "null".to_string()) + .to_string() + }) .sorted() .collect::>(); let expected_intent_names = vec![ "dummy_intent_1".to_string(), "dummy_intent_2".to_string(), "dummy_intent_4".to_string(), + "dummy_intent_6".to_string(), "null".to_string(), ]; assert_eq!(expected_scores, scores); @@ -692,8 +720,7 @@ mod tests { .builtin_entity_parser(mocked_builtin_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intent = parser.parse(text, None).unwrap().intent; @@ -805,8 +832,7 @@ mod tests { .custom_entity_parser(my_mocked_custom_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intent = parser.parse(text, None).unwrap().intent; @@ -845,8 +871,7 @@ mod tests { .builtin_entity_parser(mocked_builtin_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let parsing_result = parser.parse(text, None).unwrap(); @@ -906,8 +931,7 @@ mod tests { .stop_words(stop_words) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let intent = parser.parse(text, None).unwrap().intent; @@ -947,8 +971,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(), ); - let parser = - DeterministicIntentParser::new(test_configuration(), shared_resources).unwrap(); + let parser = DeterministicIntentParser::new(sample_model(), shared_resources).unwrap(); // When let slots = parser.parse(text, None).unwrap().slots; @@ -989,8 +1012,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(), ); - let parser = - DeterministicIntentParser::new(test_configuration(), shared_resources).unwrap(); + let parser = DeterministicIntentParser::new(sample_model(), shared_resources).unwrap(); // When let slots = parser.parse(text, None).unwrap().slots; @@ -1036,8 +1058,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let slots = parser.parse(text, None).unwrap().slots; @@ -1078,8 +1099,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(), ); - let parser = - DeterministicIntentParser::new(test_configuration(), shared_resources).unwrap(); + let parser = DeterministicIntentParser::new(sample_model(), shared_resources).unwrap(); // When let slots = parser.parse(text, None).unwrap().slots; @@ -1094,6 +1114,49 @@ mod tests { assert_eq!(slots, expected_slots); } + #[test] + fn test_parse_slots_with_stop_word_entity_value() { + // Given + let text = "search this please"; + let mocked_custom_entity_parser = MockedCustomEntityParser::from_iter(vec![( + text.to_string(), + vec![CustomEntity { + value: "this".to_string(), + resolved_value: "this".to_string(), + range: 7..11, + entity_identifier: "search_object".to_string(), + }], + )]); + let stop_words = vec!["this".to_string(), "that".to_string(), "please".to_string()] + .into_iter() + .collect(); + let shared_resources = Arc::new( + SharedResourcesBuilder::default() + .custom_entity_parser(mocked_custom_entity_parser) + .stop_words(stop_words) + .build(), + ); + let mut parser_model = sample_model(); + parser_model.stop_words_whitelist = hashmap! { + "dummy_intent_6".to_string() => vec!["this".to_string()].into_iter().collect() + }; + let parser = DeterministicIntentParser::new(parser_model, shared_resources).unwrap(); + + // When + let result = parser.parse(text, None).unwrap(); + + // Then + let expected_slots = vec![InternalSlot { + value: "this".to_string(), + char_range: 7..11, + entity: "search_object".to_string(), + slot_name: "dummy_slot_name6".to_string(), + }]; + let expected_intent = Some("dummy_intent_6".to_string()); + assert_eq!(expected_intent, result.intent.intent_name); + assert_eq!(expected_slots, result.slots); + } + #[test] fn test_get_slots() { // Given @@ -1125,8 +1188,7 @@ mod tests { .custom_entity_parser(mocked_custom_entity_parser) .build(); let parser = - DeterministicIntentParser::new(test_configuration(), Arc::new(shared_resources)) - .unwrap(); + DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When let slots = parser.get_slots(text, "dummy_intent_3").unwrap(); diff --git a/src/models/intent_parser.rs b/src/models/intent_parser.rs index c8f35321..26175301 100644 --- a/src/models/intent_parser.rs +++ b/src/models/intent_parser.rs @@ -10,6 +10,8 @@ pub struct DeterministicParserModel { pub patterns: HashMap>, pub group_names_to_slot_names: HashMap, pub slot_names_to_entities: HashMap>, + #[serde(default)] + pub stop_words_whitelist: HashMap>, pub config: DeterministicParserConfig, } From e02656a039e8c2bdfa8850da742e5af8f7b9f62f Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 26 Apr 2019 16:57:10 +0200 Subject: [PATCH 08/12] Fix caching issue in CustomEntityParser (#138) --- CHANGELOG.md | 1 + src/entity_parser/custom_entity_parser.rs | 34 ++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f223b521..f7c55da4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. - Fix error handling in Python wrapper [#134](https://github.com/snipsco/snips-nlu-rs/pull/134) - Return an error when using unknown intents in whitelist or blacklist [#136](https://github.com/snipsco/snips-nlu-rs/pull/136) - Fix issue with stop words in `DeterministicIntentParser` [#137](https://github.com/snipsco/snips-nlu-rs/pull/137) +- Fix caching issue in `CustomEntityParser` [#138](https://github.com/snipsco/snips-nlu-rs/pull/138) ## [0.64.2] - 2019-04-09 ### Fixed diff --git a/src/entity_parser/custom_entity_parser.rs b/src/entity_parser/custom_entity_parser.rs index 7234b35d..310d187f 100644 --- a/src/entity_parser/custom_entity_parser.rs +++ b/src/entity_parser/custom_entity_parser.rs @@ -61,7 +61,7 @@ pub struct CachingCustomEntityParser { #[derive(Debug, Clone, PartialEq, Eq, Hash)] struct CacheKey { input: String, - kinds: Vec, + kinds: Option>, } impl CustomEntityParser for CachingCustomEntityParser { @@ -73,9 +73,7 @@ impl CustomEntityParser for CachingCustomEntityParser { let lowercased_sentence = sentence.to_lowercase(); let cache_key = CacheKey { input: lowercased_sentence, - kinds: filter_entity_kinds - .map(|entity_kinds| entity_kinds.to_vec()) - .unwrap_or_else(|| vec![]), + kinds: filter_entity_kinds.map(|entity_kinds| entity_kinds.to_vec()), }; self.cache @@ -219,4 +217,32 @@ mod tests { assert_eq!(expected_entities, entities); } + + #[test] + fn test_custom_entity_parser_caches_properly() { + // Given + let parser_path = Path::new("data") + .join("tests") + .join("models") + .join("nlu_engine") + .join("custom_entity_parser"); + + let custom_entity_parser = CachingCustomEntityParser::from_path(parser_path, 1000).unwrap(); + let input = "Make me a hot tea"; + + // When + let entities_empty_scope = custom_entity_parser.extract_entities(input, Some(&[])).unwrap(); + let entities_no_scope = custom_entity_parser.extract_entities(input, None).unwrap(); + + // Then + let expected_entities_no_scope = vec![CustomEntity { + value: "hot".to_string(), + resolved_value: "hot".to_string(), + range: 10..13, + entity_identifier: "Temperature".to_string(), + }]; + let expected_entities_empty_scope: Vec = vec![]; + assert_eq!(expected_entities_no_scope, entities_no_scope); + assert_eq!(expected_entities_empty_scope, entities_empty_scope); + } } From 005734dff4cf1d04a84b17a7382f84a4c767a198 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Mon, 29 Apr 2019 15:46:17 +0200 Subject: [PATCH 09/12] Re-score ambiguous DeterministicIntentParser results based on slots (#139) --- CHANGELOG.md | 3 + .../deterministic_intent_parser.rs | 148 ++++++++++++++---- 2 files changed, 121 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7c55da4..4f882e2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ All notable changes to this project will be documented in this file. - Fix issue with stop words in `DeterministicIntentParser` [#137](https://github.com/snipsco/snips-nlu-rs/pull/137) - Fix caching issue in `CustomEntityParser` [#138](https://github.com/snipsco/snips-nlu-rs/pull/138) +### Changed +- Re-score ambiguous `DeterministicIntentParser` results based on slots [#139](https://github.com/snipsco/snips-nlu-rs/pull/139) + ## [0.64.2] - 2019-04-09 ### Fixed - Fix handling of ambiguous utterances in `DeterministicIntentParser` [#129](https://github.com/snipsco/snips-nlu-rs/pull/129) diff --git a/src/intent_parser/deterministic_intent_parser.rs b/src/intent_parser/deterministic_intent_parser.rs index 4830c3a5..b7baf492 100644 --- a/src/intent_parser/deterministic_intent_parser.rs +++ b/src/intent_parser/deterministic_intent_parser.rs @@ -244,19 +244,28 @@ impl DeterministicIntentParser { } } - let confidence_score = if results.is_empty() { - 1.0 - } else { - 1.0 / (results.len() as f32) - }; + // In some rare cases there can be multiple ambiguous intents + // In such cases, priority is given to results containing fewer slots + let weights = results + .iter() + .map(|res| 1. / (1. + res.slots.len() as f32)) + .collect::>(); + let total_weight: f32 = weights.iter().sum(); Ok(results .into_iter() - .take(top_n) - .map(|mut res| { - res.intent.confidence_score = confidence_score; + .enumerate() + .map(|(idx, mut res)| { + res.intent.confidence_score = weights[idx] / total_weight; res }) + .sorted_by(|res1, res2| { + res2.intent + .confidence_score + .partial_cmp(&res1.intent.confidence_score) + .unwrap() + }) + .take(top_n) .collect()) } @@ -501,6 +510,35 @@ mod tests { } } + fn ambiguous_model() -> DeterministicParserModel { + DeterministicParserModel { + language_code: "en".to_string(), + patterns: hashmap![ + "intent_1".to_string() => vec![ + r"^\s*(?P%EVENT_TYPE%)\s*(?P%CLIENT_NAME%)\s*$".to_string(), + ], + "intent_2".to_string() => vec![ + r"^\s*meeting\s*snips\s*$".to_string(), + ], + ], + group_names_to_slot_names: hashmap![ + "group0".to_string() => "event_type".to_string(), + "group1".to_string() => "client_name".to_string(), + ], + slot_names_to_entities: hashmap![ + "intent_1".to_string() => hashmap![ + "event_type".to_string() => "event_type".to_string(), + "client_name".to_string() => "client_name".to_string(), + ], + "intent_2".to_string() => hashmap![], + ], + config: DeterministicParserConfig { + ignore_stop_words: true, + }, + stop_words_whitelist: HashMap::new(), + } + } + #[test] fn test_load_from_path() { // Given @@ -604,6 +642,58 @@ mod tests { assert_eq!(intent, expected_intent); } + #[test] + fn test_should_disambiguate_intents() { + // Given + let text = "meeting snips"; + let mocked_custom_entity_parser = MockedCustomEntityParser::from_iter(vec![( + text.to_string(), + vec![ + CustomEntity { + value: "meeting".to_string(), + resolved_value: "meeting".to_string(), + range: 0..7, + entity_identifier: "event_type".to_string(), + }, + CustomEntity { + value: "snips".to_string(), + resolved_value: "snips".to_string(), + range: 8..13, + entity_identifier: "client_name".to_string(), + }, + ], + )]); + let shared_resources = SharedResourcesBuilder::default() + .custom_entity_parser(mocked_custom_entity_parser) + .build(); + let parser = + DeterministicIntentParser::new(ambiguous_model(), Arc::new(shared_resources)).unwrap(); + + // When + let intents = parser.get_intents(text).unwrap(); + + // Then + let weight_intent_1 = 1. / 3.; + let weight_intent_2 = 1.; + let total_weight = weight_intent_1 + weight_intent_2; + let expected_intents = vec![ + IntentClassifierResult { + intent_name: Some("intent_2".to_string()), + confidence_score: weight_intent_2 / total_weight, + }, + IntentClassifierResult { + intent_name: Some("intent_1".to_string()), + confidence_score: weight_intent_1 / total_weight, + }, + IntentClassifierResult { + intent_name: None, + confidence_score: 0.0, + }, + ]; + + assert_eq!(expected_intents, intents); + } + #[test] fn test_parse_intent_with_intents_whitelist() { // Given @@ -670,33 +760,31 @@ mod tests { DeterministicIntentParser::new(sample_model(), Arc::new(shared_resources)).unwrap(); // When - let intents = parser.get_intents(text).unwrap(); - - // Then - let scores = intents - .iter() - .map(|res| res.confidence_score) - .collect::>(); - let expected_scores = vec![0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0]; - let intent_names = intents + let results = parser + .get_intents(text) + .unwrap() .into_iter() - .skip(2) .map(|res| { - res.intent_name - .unwrap_or_else(|| "null".to_string()) - .to_string() + let intent_alias = if res.confidence_score > 0. { + res.intent_name.unwrap_or_else(|| "null".to_string()) + } else { + "unmatched_intent".to_string() + }; + (intent_alias, res.confidence_score) }) - .sorted() .collect::>(); - let expected_intent_names = vec![ - "dummy_intent_1".to_string(), - "dummy_intent_2".to_string(), - "dummy_intent_4".to_string(), - "dummy_intent_6".to_string(), - "null".to_string(), + + // Then + let expected_results = vec![ + ("dummy_intent_5".to_string(), 2. / 3.), + ("dummy_intent_3".to_string(), 1. / 3.), + ("unmatched_intent".to_string(), 0.0), + ("unmatched_intent".to_string(), 0.0), + ("unmatched_intent".to_string(), 0.0), + ("unmatched_intent".to_string(), 0.0), + ("unmatched_intent".to_string(), 0.0), ]; - assert_eq!(expected_scores, scores); - assert_eq!(expected_intent_names, intent_names); + assert_eq!(expected_results, results); } #[test] From bc4c091ba51f0beebbe12a53a82a1a46457946eb Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Mon, 29 Apr 2019 15:47:28 +0200 Subject: [PATCH 10/12] Update Changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f882e2a..acd02439 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Changelog All notable changes to this project will be documented in this file. -## [Unreleased] +## [0.64.3] - 2019-04-29 ### Fixed - Make the `WrongModelVersion` error message intelligible [#133](https://github.com/snipsco/snips-nlu-rs/pull/133) - Fix error handling in Python wrapper [#134](https://github.com/snipsco/snips-nlu-rs/pull/134) @@ -200,7 +200,7 @@ being statically hardcoded, reducing the binary size by 31Mb. - Improve support for japanese - Rename python package to `snips_nlu_rust` -[Unreleased]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.2...HEAD +[0.64.3]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.2...0.64.3 [0.64.2]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.1...0.64.2 [0.64.1]: https://github.com/snipsco/snips-nlu-rs/compare/0.64.0...0.64.1 [0.64.0]: https://github.com/snipsco/snips-nlu-rs/compare/0.63.1...0.64.0 From 8fb9ce1abb47fde499c1d83792f904da79ba46fa Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Mon, 29 Apr 2019 15:47:47 +0200 Subject: [PATCH 11/12] Bump version to 0.64.3 --- Cargo.toml | 2 +- ffi/Cargo.toml | 2 +- ffi/cbindgen.toml | 2 +- platforms/c/libsnips_nlu.h | 2 +- platforms/kotlin/build.gradle | 2 +- platforms/python/ffi/Cargo.toml | 4 ++-- platforms/python/snips_nlu_rust/__version__ | 2 +- platforms/swift/SnipsNlu/Dependencies/build.sh | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9e2ac35f..37130695 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-lib" -version = "0.65.0-SNAPSHOT" +version = "0.64.3" authors = [ "Adrien Ball ", "Clement Doumouro ", diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 36403c0a..4f0a43b4 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ffi" -version = "0.65.0-SNAPSHOT" +version = "0.64.3" edition = "2018" authors = [ "Adrien Ball ", diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml index 07c8e95e..5fc699cd 100644 --- a/ffi/cbindgen.toml +++ b/ffi/cbindgen.toml @@ -2,7 +2,7 @@ language = "c" include_guard = "LIBSNIPS_NLU_H_" -header = "#define SNIPS_NLU_VERSION \"0.65.0-SNAPSHOT\"" +header = "#define SNIPS_NLU_VERSION \"0.64.3\"" [parse] parse_deps = true diff --git a/platforms/c/libsnips_nlu.h b/platforms/c/libsnips_nlu.h index 11e9e6ec..e93d8deb 100644 --- a/platforms/c/libsnips_nlu.h +++ b/platforms/c/libsnips_nlu.h @@ -1,4 +1,4 @@ -#define SNIPS_NLU_VERSION "0.65.0-SNAPSHOT" +#define SNIPS_NLU_VERSION "0.64.3" #ifndef LIBSNIPS_NLU_H_ #define LIBSNIPS_NLU_H_ diff --git a/platforms/kotlin/build.gradle b/platforms/kotlin/build.gradle index 389073fc..4c17bccf 100644 --- a/platforms/kotlin/build.gradle +++ b/platforms/kotlin/build.gradle @@ -11,7 +11,7 @@ buildscript { apply plugin: 'kotlin' -version = "0.65.0-SNAPSHOT" +version = "0.64.3" group = "ai.snips" repositories { diff --git a/platforms/python/ffi/Cargo.toml b/platforms/python/ffi/Cargo.toml index 885183ed..eb589ccb 100644 --- a/platforms/python/ffi/Cargo.toml +++ b/platforms/python/ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-python-ffi" -version = "0.65.0-SNAPSHOT" +version = "0.64.3" authors = ["Adrien Ball "] edition = "2018" @@ -11,4 +11,4 @@ crate-type = ["cdylib"] [dependencies] libc = "0.2" ffi-utils = { git = "https://github.com/snipsco/snips-utils-rs", rev = "4292ad9" } -snips-nlu-ffi = { path = "../../../ffi" } +snips-nlu-ffi = { git = "https://github.com/snipsco/snips-nlu-rs", tag = "0.64.3" } diff --git a/platforms/python/snips_nlu_rust/__version__ b/platforms/python/snips_nlu_rust/__version__ index 6c913f83..f6057729 100644 --- a/platforms/python/snips_nlu_rust/__version__ +++ b/platforms/python/snips_nlu_rust/__version__ @@ -1 +1 @@ -0.65.0-SNAPSHOT +0.64.3 diff --git a/platforms/swift/SnipsNlu/Dependencies/build.sh b/platforms/swift/SnipsNlu/Dependencies/build.sh index 9ac04b5a..a1cdc68e 100755 --- a/platforms/swift/SnipsNlu/Dependencies/build.sh +++ b/platforms/swift/SnipsNlu/Dependencies/build.sh @@ -4,7 +4,7 @@ set -e -VERSION="0.65.0-SNAPSHOT" +VERSION="0.64.3" SYSTEM=$(echo $1 | tr '[:upper:]' '[:lower:]') LIBRARY_NAME=libsnips_nlu_ffi LIBRARY_NAME_A=${LIBRARY_NAME}.a From 0a9b0b8bd3c1e54d08cd00c1006ad8b2935752ae Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Mon, 29 Apr 2019 15:49:31 +0200 Subject: [PATCH 12/12] Fix README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4f70aeb2..0a857255 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ Properly trained, the Snips NLU engine will be able to extract structured data s { "intent": { "intentName": "searchWeatherForecast", - "probability": 0.95 + "confidenceScore": 0.95 }, "slots": [ {