diff --git a/annotators/IntentCatcherTransformers/intent_phrases_commands.json b/annotators/IntentCatcherTransformers/intent_phrases_commands.json index 2f05b25892..f4cbc5cbcc 100644 --- a/annotators/IntentCatcherTransformers/intent_phrases_commands.json +++ b/annotators/IntentCatcherTransformers/intent_phrases_commands.json @@ -1,5 +1,19 @@ { "intent_phrases": { + "test_command": { + "phrases": [ + "test_command", + "test command" + ], + "reg_phrases": [ + "test_command", + "test command" + ], + "min_precision": 0.94, + "punctuation": [ + "." + ] + }, "track_object": { "phrases": [ "((track)|(follow)|(trail)|(trace)|(find)|(rail)|(groove)|(monitor)) a ((human)|(man)|(car)|(bicycle)|(girl)|(dude)|(bag)|(chair)|(black dog)|(white cat))", diff --git a/annotators/IntentCatcherTransformers/utils.py b/annotators/IntentCatcherTransformers/utils.py index 826135e0b8..e3fe4b6225 100644 --- a/annotators/IntentCatcherTransformers/utils.py +++ b/annotators/IntentCatcherTransformers/utils.py @@ -3,7 +3,7 @@ from itertools import chain from typing import List -from common.universal_templates import join_sentences_in_or_pattern +from common.join_pattern import join_sentences_in_or_pattern def get_regexp(intent_phrases_path): diff --git a/annotators/asr/requirements.txt b/annotators/asr/requirements.txt index 0546360cb2..ec8d81a3fe 100644 --- a/annotators/asr/requirements.txt +++ b/annotators/asr/requirements.txt @@ -4,4 +4,4 @@ gunicorn==19.9.0 requests==2.28.2 sentry-sdk==1.19.1 jinja2<=3.1.2 -Werkzeug>=2.2.2 \ No newline at end of file +Werkzeug>=2.2.2,<3.0 \ No newline at end of file diff --git a/annotators/combined_classification/server.py b/annotators/combined_classification/server.py index f3e570f381..f3a1a8b03d 100644 --- a/annotators/combined_classification/server.py +++ b/annotators/combined_classification/server.py @@ -7,7 +7,7 @@ from sentry_sdk.integrations.flask import FlaskIntegration from deeppavlov import build_model -from common.utils import combined_classes +from common.combined_classes import combined_classes logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) diff --git a/annotators/combined_classification_ru/Dockerfile b/annotators/combined_classification_ru/Dockerfile new file mode 100644 index 0000000000..d25e9a4e45 --- /dev/null +++ b/annotators/combined_classification_ru/Dockerfile @@ -0,0 +1,20 @@ +FROM deeppavlov/deeppavlov:1.2.0-gpu + +WORKDIR /base/DeepPavlov + + +WORKDIR /src +RUN mkdir common + +COPY annotators/combined_classification_ru/requirements.txt ./requirements.txt +RUN pip install -r requirements.txt + +ARG SERVICE_PORT +ENV SERVICE_PORT=$SERVICE_PORT +ARG CONFIG +ENV CONFIG=$CONFIG + +COPY annotators/combined_classification_ru/ ./ +COPY common/ common/ + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=1200 --preload diff --git a/annotators/combined_classification_ru/README.md b/annotators/combined_classification_ru/README.md new file mode 100644 index 0000000000..2b76ce1368 --- /dev/null +++ b/annotators/combined_classification_ru/README.md @@ -0,0 +1,32 @@ + +# Combined_classification + +## Description + +This model is based on the transformer-agnostic multitask neural architecture. It can solve several tasks similtaneously, almost as good as single-task models. + +The models were trained on the following datasets: + +**Factoid classification** : For the Factoid task, we used the same Yahoo ConversVsInfo dataset that was used to train the Dream socialbot in Alexa Prize . Note that the valid set in this task was equal to the test set. + +**Midas classification** : For the Midas task, we used the same Midas classification dataset that was used to train the Dream socialbot in Alexa Prize . Note that the valid set in this task was equal to the test set. + +**Emotion classification** :For the Emotion classification task, we used the emo\_go\_emotions dataset, with all the 28 classes compressed into the seven basic emotions as in the original paper. Note that these 7 emotions are not exactly the same as the 7 emotions in the original Dream socialbot in Alexa Prize: 1 emotion differs (love VS disgust), so the scores are incomparable with the original model. Note that this task is multiclass. + +**Topic classification**: For the Topic classification task, we used the dataset made by Dilyara Zharikova. The dataset was further filtered and improved for the final model version, to make the model suitable for DREAM. Note that the original topics model doesn’t account for that dataset changes(which were also about class number) and thus its scores are not compatible with the scores we have. + +**Sentiment classification** : For the Sentiment classification task, we used the Dynabench dataset (r1 + r2). + +**Toxic classification** : For the toxic classification task, we used the dataset from kaggle the 7 toxic classes that pose an interest to us. Note that this task is multilabel. + +The model also contains 3 replacement models for Amazon services. + +The models (multitask and comparative single task) were trained with initial learning rate 2e-5(with validation patience 2 it could be dropped 2 times), batch size 32,optimizer adamW(betas (0.9,0.99) and early stop on 3 epochs. The criteria on early stopping was average accuracy for all tasks for multitask models, or the single-task accuracy for singletask models. + +This model(with a distilbert-base-uncased backbone) takes only 2439 Mb for 9 tasks, whereas single-task models with the same backbone for every of these tasks take up almost the same memory(~2437 Mb for every of these 9 tasks). + +## I/O +text here if i/o specified + +## Dependencies + diff --git a/annotators/combined_classification_ru/combined_classifier_ru.json b/annotators/combined_classification_ru/combined_classifier_ru.json new file mode 100644 index 0000000000..128f330ca3 --- /dev/null +++ b/annotators/combined_classification_ru/combined_classifier_ru.json @@ -0,0 +1,66 @@ +{ + "metadata":{ + "variables":{ + "MODELS_PATH": "~/.deeppavlov/models", + "DP_NAME":"distilrubert-base-cased-conversational", + "BACKBONE":"DeepPavlov/{DP_NAME}", + "NAME":"rumtl", + "SAVE_LOAD_PATH":"{MODELS_PATH}/{NAME}", + "BATCH_SIZE":160, + "NUM_TRAIN_EPOCHS":30, + "GRADIENT_ACC_STEPS":1 + }, + "download":[{ + "url": "http://files.deeppavlov.ai/dream_data/russian_mtl/rumtl.pth.tar.gz", + "subdir": "{MODELS_PATH}" + }] + }, + "chainer":{ + "in":[ + "x_emo","x_sentiment","x_toxic","x_factoid","x_midas","x_topics" + ], + "in_y":[ + "y_emo","y_sentiment","y_toxic","y_factoid","y_midas","y_topics" + ], + "pipe":[ + { + "class_name":"multitask_pipeline_preprocessor", + "possible_keys_to_extract":[0], + "preprocessor":"TorchTransformersPreprocessor", + "do_lower_case":true, + "n_task":6, + "vocab_file":"{BACKBONE}", + "max_seq_length":128, + "in":["x_emo","x_sentiment","x_toxic","x_factoid","x_midas","x_topics"], + "out":["bert_features_emo","bert_features_sentiment","bert_features_toxic","bert_features_factoid","bert_features_midas","bert_features_topics"] + }, + { + "id":"multitask_transformer", + "class_name":"multitask_transformer", + "optimizer_parameters":{ + "lr":2e-5 + }, + "gradient_accumulation_steps":"{GRADIENT_ACC_STEPS}", + "learning_rate_drop_patience":2, + "learning_rate_drop_div":2.0, + "return_probas":true, + "new_model":false, + "backbone_model":"{BACKBONE}", + "save_path":"{MODELS_PATH}/{NAME}", + "load_path":"{MODELS_PATH}/{NAME}", + "tasks":{ + "emo":{"type":"classification","options":7}, + "sentiment":{"type":"classification","options":3}, + "toxic":{"type":"classification","options":2}, + "factoid":{"type":"classification","options":2}, + "midas":{"type":"classification","options":15}, + "topics":{"type":"classification", "options":76} + }, + "in":["bert_features_emo","bert_features_sentiment","bert_features_toxic","bert_features_factoid","bert_features_midas","bert_features_topics"], + "in_y":["y_emo","y_sentiment","y_toxic","y_factoid","y_midas","y_topics"], + "out":["y_emo_pred","y_sentiment_pred","y_toxic_pred","y_factoid_pred","y_midas_pred","y_topics_pred"] + } + ], + "out":["y_emo_pred","y_sentiment_pred","y_toxic_pred","y_factoid_pred","y_midas_pred","y_topics_pred"] + } +} diff --git a/annotators/combined_classification_ru/load_test.py b/annotators/combined_classification_ru/load_test.py new file mode 100644 index 0000000000..1456187d03 --- /dev/null +++ b/annotators/combined_classification_ru/load_test.py @@ -0,0 +1,21 @@ +from locust import HttpUser, task + +batch = [ + {"sentences": ["i love you", "i hate you", "i dont care"]}, + {"sentences": ["почему ты так глуп"]}, + {"sentences": ["поговорим о играх"]}, + {"sentences": ["поговорим о фильмах"]}, + {"sentences": ["поменяем тему"]}, +] + + +class QuickstartUser(HttpUser): + @task + def hello_world(self): + ans = self.client.post("", json=batch[self.batch_index % len(batch)]) + self.batch_index += 1 + if ans.status_code != 200: + print(ans.status_code, ans.text) + + def on_start(self): + self.batch_index = 0 diff --git a/annotators/combined_classification_ru/load_test.sh b/annotators/combined_classification_ru/load_test.sh new file mode 100755 index 0000000000..5d2f79acdc --- /dev/null +++ b/annotators/combined_classification_ru/load_test.sh @@ -0,0 +1,2 @@ +pip install -r requirements_load_test.txt +locust -f load_test.py --headless -u 10 -r 2 --host http://0.0.0.0:$SERVICE_PORT/model \ No newline at end of file diff --git a/annotators/combined_classification_ru/requirements.txt b/annotators/combined_classification_ru/requirements.txt new file mode 100644 index 0000000000..b0e08d8b1e --- /dev/null +++ b/annotators/combined_classification_ru/requirements.txt @@ -0,0 +1,10 @@ +gunicorn==19.9.0 +sentry-sdk[flask]==0.14.1 +itsdangerous==2.0.1 +uvicorn==0.13.0 +prometheus-client==0.13.0 +filelock==3.4.2 +transformers==4.15.0 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +pytorch-crf==0.7.2 diff --git a/annotators/combined_classification_ru/requirements_load_test.txt b/annotators/combined_classification_ru/requirements_load_test.txt new file mode 100644 index 0000000000..90fc137cb4 --- /dev/null +++ b/annotators/combined_classification_ru/requirements_load_test.txt @@ -0,0 +1 @@ +locust==1.4.3 \ No newline at end of file diff --git a/annotators/combined_classification_ru/server.py b/annotators/combined_classification_ru/server.py new file mode 100644 index 0000000000..61f0ab1edd --- /dev/null +++ b/annotators/combined_classification_ru/server.py @@ -0,0 +1,90 @@ +import logging +import os +import time + +from flask import Flask, request, jsonify +import sentry_sdk + +from sentry_sdk.integrations.flask import FlaskIntegration +from deeppavlov import build_model +from common.combined_classes import combined_classes + + +supported_tasks = [ + "emotion_classification", + "sentiment_classification", + "toxic_classification", + "factoid_classification", + "midas_classification", + "topics_ru", +] + +combined_classes = {task: combined_classes[task] for task in combined_classes if task in supported_tasks} +combined_classes["toxic_classification"] = ["not_toxic", "toxic"] # As Russian toxic supports only TWO classes + +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) + +sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"), integrations=[FlaskIntegration()]) + +logger = logging.getLogger(__name__) + + +def get_result(sentences, sentences_with_history, postannotations=False): + logger.debug((sentences, sentences_with_history, postannotations)) + ans = [{} for _ in sentences] + if not sentences: + logger.exception("Input sentences not received") + sentences = [" "] + # if not sentences_with_history: + # logger.exception("Input sentences with history not received") + # sentences_with_history = sentences + data = [sentences for _ in range(len(combined_classes))] + try: + prob_lists = model(*data) + for task_name, prob_list in zip(combined_classes, prob_lists): + for i in range(len(prob_list)): + ans[i][task_name] = { + class_: round(float(prob), 2) for class_, prob in zip(combined_classes[task_name], prob_list[i]) + } + except Exception as e: + sentry_sdk.capture_exception(e) + logger.exception(e) + + return ans + + +try: + model = build_model("combined_classifier_ru.json", download=True) + logger.info("Making test res") + test_res = get_result(["a"], ["a"]) + logger.info("model loaded, test query processed") +except Exception as e: + sentry_sdk.capture_exception(e) + logger.exception(e) + raise e + +app = Flask(__name__) + + +@app.route("/model", methods=["POST"]) +def respond(): + t = time.time() + sentences = request.json.get("sentences", [" "]) + sentences_with_hist = request.json.get("sentences_with_history", sentences) + answer = get_result(sentences, sentences_with_hist) + logger.debug(f"combined_classification result: {answer}") + logger.info(f"combined_classification exec time: {time.time() - t}") + return jsonify(answer) + + +@app.route("/batch_model", methods=["POST"]) +def batch_respond(): + t = time.time() + sep = " [SEP] " + utterances_with_histories = request.json.get("utterances_with_histories", [[" "]]) + sentences_with_hist = [sep.join(s) for s in utterances_with_histories] + sentences = [s[-1].split(sep)[-1] for s in utterances_with_histories] + answer = get_result(sentences, sentences_with_hist) + logger.debug(f"combined_classification batch result: {answer}") + logger.info(f"combined_classification exec time: {time.time() - t}") + return jsonify([{"batch": answer}]) diff --git a/annotators/combined_classification_ru/service_configs/combined-classification-ru/environment.yml b/annotators/combined_classification_ru/service_configs/combined-classification-ru/environment.yml new file mode 100644 index 0000000000..ea2c9d9ce8 --- /dev/null +++ b/annotators/combined_classification_ru/service_configs/combined-classification-ru/environment.yml @@ -0,0 +1,4 @@ +SERVICE_PORT: 8198 +SERVICE_NAME: combined_classification_ru +CONFIG: combined_classifier_ru.json +CUDA_VISIBLE_DEVICES: '0' diff --git a/annotators/combined_classification_ru/service_configs/combined-classification-ru/service.yml b/annotators/combined_classification_ru/service_configs/combined-classification-ru/service.yml new file mode 100644 index 0000000000..abf37588c0 --- /dev/null +++ b/annotators/combined_classification_ru/service_configs/combined-classification-ru/service.yml @@ -0,0 +1,41 @@ +name: combined-classification-ru +endpoints: +- model +- batch_model +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8198 + SERVICE_NAME: combined_classification_ru + CONFIG: combined_classifier_ru.json + context: . + dockerfile: ./annotators/combined_classification_ru/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8198 --timeout 600 + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + volumes: + - ./common:/src/common + - ./annotators/combined_classification_ru:/src + - ~/.deeppavlov:/root/.deeppavlov + - ~/.deeppavlov/cache:/root/.cache + ports: + - 8198:8198 +proxy: + command: + - nginx + - -g + - daemon off; + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=dream.deeppavlov.ai:8198 + - PORT=8198 diff --git a/annotators/combined_classification_ru/test.py b/annotators/combined_classification_ru/test.py new file mode 100644 index 0000000000..790f375e6f --- /dev/null +++ b/annotators/combined_classification_ru/test.py @@ -0,0 +1,66 @@ +import requests +from time import time + + +def main_test(): + url = "http://0.0.0.0:8198/model" + batch_url = "http://0.0.0.0:8198/batch_model" + configs = [ + { + "sentences": ["поговорим о книгах", "ты любишь порно"], + "task": "topics_ru", + "answers_bert": [["литература"], ["секс"]], + }, + { + "sentences": ["что ты любишь", "где монреаль"], + "task": "factoid_classification", + "answers_bert": [["is_conversational"], ["is_factoid"]], + }, + { + "sentences": ["я тебя люблю", "я тебя ненавижу", "сейчас"], + "task": "sentiment_classification", + "answers_bert": [["positive"], ["negative"], ["neutral"]], + }, + { + "sentences": ["почему ты такой дурак"], + "task": "emotion_classification", + "answers_bert": [["anger"]], + }, + { + "sentences_with_history": ["это лучшая собака [SEP] да, много"], + "sentences": ["да, много"], + "task": "midas_classification", + "answers_bert": [["pos_answer"]], + }, + { + "sentences": ["привет", "и вот таких уродов дахуя"], + "task": "toxic_classification", + "answers_bert": [["not_toxic"], ["toxic"]], + }, + ] + t = time() + for config in configs: + print(config) + if "sentences_with_history" in config: + config["utterances_with_histories"] = [[k] for k in config["sentences_with_history"]] + else: + config["utterances_with_histories"] = [[k] for k in config["sentences"]] + responses = requests.post(url, json=config).json() + batch_responses = requests.post(batch_url, json=config).json() + batch_error_msg = f"Batch responses {batch_responses} not match to responses {responses}" + assert ( + batch_responses[0]["batch"][0]["toxic_classification"] == responses[0]["toxic_classification"] + ), batch_error_msg + responses = [j[config["task"]] for j in responses] + for response, answer, sentence in zip(responses, config["answers_bert"], config["sentences"]): + print((response, answer, sentence)) + predicted_classes = [class_ for class_ in response if response[class_] == max(response.values())] + assert sorted(answer) == sorted(predicted_classes), " * ".join( + [str(j) for j in [sentence, config["task"], answer, predicted_classes, response]] + ) + print("SUCCESS!") + print(time() - t) + return 0 + + +main_test() diff --git a/annotators/combined_classification_ru/test.sh b/annotators/combined_classification_ru/test.sh new file mode 100755 index 0000000000..eddad32aba --- /dev/null +++ b/annotators/combined_classification_ru/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +python test.py + diff --git a/annotators/custom_entity_linking/server.py b/annotators/custom_entity_linking/server.py index d6b6091cca..09bb3272dd 100644 --- a/annotators/custom_entity_linking/server.py +++ b/annotators/custom_entity_linking/server.py @@ -25,18 +25,6 @@ raise e -@app.route("/add_entities", methods=["POST"]) -def add_entities(): - user_id = request.json.get("user_id", "") - entity_info = request.json.get("entity_info", {}) - entity_substr_list = entity_info.get("entity_substr", []) - entity_ids_list = entity_info.get("entity_ids", []) - tags_list = entity_info.get("tags", []) - el[0].add_custom_entities(user_id, entity_substr_list, entity_ids_list, tags_list) - logger.info(f"added entities {entity_info}") - return {} - - def preprocess_context(context_batch): """Preprocesses the context batch by combining previous and current utterances. @@ -140,8 +128,8 @@ def process_entity_info( # - Exclude entities marked as "Abstract" in db if they are not considered # abstract according to is_abstract. for entity_id, conf, entity_id_tag in zip(entity_ids, confs, entity_id_tags): - if entity_id_tag.startswith("Abstract") and not is_abstract: - pass + if entity_id_tag == "Abstract" and not is_abstract: + logger.info(f"Contradiction between the entity_kind 'Abstract' and relationship '{curr_rel}'") else: filtered_entity_ids.append(entity_id) filtered_confs.append(conf) diff --git a/annotators/custom_entity_linking/service_configs/custom-entity-linking/service.yml b/annotators/custom_entity_linking/service_configs/custom-entity-linking/service.yml index a0f1fe51dd..379d1a85b3 100644 --- a/annotators/custom_entity_linking/service_configs/custom-entity-linking/service.yml +++ b/annotators/custom_entity_linking/service_configs/custom-entity-linking/service.yml @@ -1,7 +1,6 @@ name: custom-entity-linking endpoints: - model -- add_entities compose: env_file: - .env diff --git a/annotators/custom_entity_linking/src/entity_linking.py b/annotators/custom_entity_linking/src/entity_linking.py index 44187b2210..eac9476548 100644 --- a/annotators/custom_entity_linking/src/entity_linking.py +++ b/annotators/custom_entity_linking/src/entity_linking.py @@ -79,44 +79,13 @@ def load(self) -> None: def save(self) -> None: pass - def add_custom_entities(self, user_id, entity_substr_list, entity_ids_list, tags_list): - if self.conn is None: - if not os.path.exists(self.load_path): - os.makedirs(self.load_path) - self.conn = sqlite3.connect(str(self.load_path / "custom_database.db"), check_same_thread=False) - self.cur = self.conn.cursor() - self.cur.execute( - "CREATE VIRTUAL TABLE IF NOT EXISTS inverted_index USING fts5(title, entity_id, num_rels " - "UNINDEXED, tag, user_id, tokenize = 'porter ascii');" - ) - - for entity_substr, entity_id, tag in zip(entity_substr_list, entity_ids_list, tags_list): - entity_id = entity_id.replace("/", "slash").replace("-", "hyphen") - query_str = f"title:{entity_substr} AND tag:{tag} AND user_id:{user_id}" - - query = "SELECT * FROM inverted_index WHERE inverted_index MATCH ?;" - res = self.cur.execute(query, (query_str,)).fetchall() - if res and res[0][3] == "name" and res[0][1] == entity_id and tag == "name": - query = "DELETE FROM inverted_index WHERE entity_id=? AND tag=? AND user_id=?;" - self.cur.execute(query, (entity_id, tag, user_id)) - self.cur.execute( - "INSERT INTO inverted_index " "VALUES (?, ?, ?, ?, ?);", - (entity_substr.lower(), entity_id, 1, tag, user_id), - ) - self.conn.commit() - elif not res: - self.cur.execute( - "INSERT INTO inverted_index " "VALUES (?, ?, ?, ?, ?);", - (entity_substr.lower(), entity_id, 1, tag, user_id), - ) - self.conn.commit() - def __call__( self, user_ids: List[str], entity_substr_batch: List[List[str]], entity_tags_batch: List[List[str]] = None, ): + user_ids = [user_id.replace("/", "slash").replace("-", "hyphen") for user_id in user_ids] entity_ids_batch, entity_conf_batch, entity_id_tags_batch = [], [], [] for user_id, entity_substr_list, entity_tags_list in zip(user_ids, entity_substr_batch, entity_tags_batch): entity_ids_list, entity_conf_list, entity_id_tags_list = self.link_entities( diff --git a/annotators/custom_entity_linking/test_el.py b/annotators/custom_entity_linking/test_el.py index 8976a4433e..32c9e3f736 100644 --- a/annotators/custom_entity_linking/test_el.py +++ b/annotators/custom_entity_linking/test_el.py @@ -1,23 +1,61 @@ +import os +from pathlib import Path import requests +from dotenv import load_dotenv +from deeppavlov_kg import TerminusdbKnowledgeGraph +import sentry_sdk +from deeppavlov import build_model +import nltk -use_context = True +load_dotenv("./.env") + +config_name = "annotators/custom_entity_linking/custom_entity_linking.json" +nltk.download("stopwords") + +try: + el = build_model(config_name, download=True) + print("model loaded") +except Exception as e: + sentry_sdk.capture_exception(e) + print(e) + raise e + +INDEX_LOAD_PATH = Path(os.path.expanduser(el.pipe[-1][-1].load_path)) +TERMINUSDB_SERVER_URL = "http://0.0.0.0:6363" +TERMINUSDB_SERVER_TEAM = "admin" +TERMINUSDB_SERVER_DB = "user_knowledge_db" +TERMINUSDB_SERVER_PASSWORD = "root" + + +graph = TerminusdbKnowledgeGraph( + db_name=TERMINUSDB_SERVER_DB, + team=TERMINUSDB_SERVER_TEAM, + server=TERMINUSDB_SERVER_URL, + password=TERMINUSDB_SERVER_PASSWORD, + index_load_path=INDEX_LOAD_PATH, +) def main(): url = "http://0.0.0.0:8153" inserted_data = { - "user_id": "1234", + "user_id": "User/Jack", "entity_info": { "entity_substr": ["forrest gump"], "entity_ids": ["film/123"], "tags": ["film"], }, } - requests.post(f"{url}/add_entities", json=inserted_data) + graph.index.set_active_user_id(inserted_data["user_id"]) + graph.index.add_entities( + inserted_data["entity_info"]["entity_substr"], + inserted_data["entity_info"]["entity_ids"], + inserted_data["entity_info"]["tags"], + ) request_data = [ { - "user_id": ["1234"], + "user_id": ["User/Jack"], "entity_substr": [["forrest gump"]], "entity_tags": [[[("film", 1.0)]]], "context": [["who directed forrest gump?"]], @@ -29,12 +67,10 @@ def main(): for data, gold_result in zip(request_data, gold_results): result = requests.post(f"{url}/model", json=data).json() print(result) - entity_ids = [] for entity_info_list in result: for entity_info in entity_info_list: entity_ids = entity_info.get("entity_ids") - if entity_ids == gold_result: count += 1 else: diff --git a/annotators/doc_retriever/server.py b/annotators/doc_retriever/server.py index 55752e3f42..45f0e4bb73 100644 --- a/annotators/doc_retriever/server.py +++ b/annotators/doc_retriever/server.py @@ -23,9 +23,9 @@ app = Flask(__name__) PARAGRAPHS_NUM = int(os.environ.get("PARAGRAPHS_NUM", 5)) -FILE_SERVER_TIMEOUT = int(os.environ.get("FILE_SERVER_TIMEOUT", 30)) +FILE_SERVER_TIMEOUT = float(os.environ.get("FILE_SERVER_TIMEOUT", 30)) DOC_PATH_OR_LINK = os.environ.get("DOC_PATH_OR_LINK", "") -if DOC_PATH_OR_LINK and type(DOC_PATH_OR_LINK) != list: +if DOC_PATH_OR_LINK and not isinstance(DOC_PATH_OR_LINK, list): DOC_PATH_OR_LINK = DOC_PATH_OR_LINK.split(",") # we may have multiple files CONFIG_PATH = os.environ.get("CONFIG_PATH", None) SERVICE_PORT = os.environ.get("SERVICE_PORT", None) diff --git a/annotators/doc_retriever/utils.py b/annotators/doc_retriever/utils.py index 180510d475..4ecd4db41f 100644 --- a/annotators/doc_retriever/utils.py +++ b/annotators/doc_retriever/utils.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) FILE_SERVER_URL = os.environ.get("FILE_SERVER_URL", None) -FILE_SERVER_TIMEOUT = int(os.environ.get("FILE_SERVER_TIMEOUT", 30)) +FILE_SERVER_TIMEOUT = float(os.environ.get("FILE_SERVER_TIMEOUT", 30)) def find_and_download_docs_if_needed(dialog, model_needs_train, filepaths_in_container, docs_and_links): diff --git a/annotators/entity_storer/requirements.txt b/annotators/entity_storer/requirements.txt index 14a6d18c3d..0a6a1dcb60 100644 --- a/annotators/entity_storer/requirements.txt +++ b/annotators/entity_storer/requirements.txt @@ -9,4 +9,4 @@ nltk==3.5 click>=8.0 requests==2.28.2 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/annotators/fact_random/requirements.txt b/annotators/fact_random/requirements.txt index 211429ccc9..3216ddfa7a 100644 --- a/annotators/fact_random/requirements.txt +++ b/annotators/fact_random/requirements.txt @@ -4,4 +4,4 @@ itsdangerous==2.0.1 gunicorn==20.1.0 requests==2.28.2 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/annotators/midas_classification/Dockerfile b/annotators/midas_classification/Dockerfile index b21c4e5476..345b08f255 100644 --- a/annotators/midas_classification/Dockerfile +++ b/annotators/midas_classification/Dockerfile @@ -8,15 +8,16 @@ ENV CONFIG=$CONFIG RUN mkdir /src /midas COPY ./requirements.txt /src/requirements.txt + RUN pip install --upgrade pip && \ pip install -r /src/requirements.txt && \ python -c "import nltk; nltk.download('punkt'); nltk.download('wordnet')" && \ - python -m spacy download en_core_web_sm \ - + python -m spacy download en_core_web_sm COPY . /src/ WORKDIR /src + RUN sed -i "s|$SED_ARG|g" "$CONFIG" -CMD gunicorn --workers=1 server:app -b 0.0.0.0:8090 +CMD gunicorn --workers=1 server:app -b 0.0.0.0:8090 \ No newline at end of file diff --git a/annotators/midas_classification/requirements.txt b/annotators/midas_classification/requirements.txt index 946faa4e24..073a485eaf 100644 --- a/annotators/midas_classification/requirements.txt +++ b/annotators/midas_classification/requirements.txt @@ -1,12 +1,10 @@ -git+https://github.com/deeppavlov/DeepPavlov.git@0.14.1 Flask==1.1.1 itsdangerous==2.0.1 sentry-sdk==0.14.2 -requests==2.23.0 +requests==2.22.0 gunicorn==19.9.0 -numpy==1.17.2 -spacy==3.0.6 +numpy==1.18.0 +spacy==2.3.9 jinja2<=3.0.3 Werkzeug<=2.0.3 -git+https://github.com/deeppavlov/bert.git@feat/multi_gpu -tensorflow==1.15.5 +git+https://github.com/deeppavlov/bert.git@feat/multi_gpu \ No newline at end of file diff --git a/annotators/midas_predictor/requirements.txt b/annotators/midas_predictor/requirements.txt index 83a179b246..03392476c7 100644 --- a/annotators/midas_predictor/requirements.txt +++ b/annotators/midas_predictor/requirements.txt @@ -7,4 +7,4 @@ sentry-sdk[asgi]==1.19.1 itsdangerous==2.0.1 numpy==1.24.2 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/annotators/news_api/requirements.txt b/annotators/news_api/requirements.txt index 04d77db4d6..249645c472 100644 --- a/annotators/news_api/requirements.txt +++ b/annotators/news_api/requirements.txt @@ -7,4 +7,4 @@ numpy==1.24.2 nltk==3.2.5 prometheus_client jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/annotators/personality_detection/Dockerfile b/annotators/personality_detection/Dockerfile new file mode 100644 index 0000000000..91a64350c8 --- /dev/null +++ b/annotators/personality_detection/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.7 + +WORKDIR /src + +RUN git clone https://github.com/jkwieser/personality-detection-text.git /personality-detection-text + +COPY requirements.txt . + +RUN pip install -r requirements.txt + +COPY . . diff --git a/annotators/personality_detection/requirements.txt b/annotators/personality_detection/requirements.txt new file mode 100644 index 0000000000..c27af54ab0 --- /dev/null +++ b/annotators/personality_detection/requirements.txt @@ -0,0 +1,8 @@ +scikit-learn==0.22.1 +plotly==4.14.3 +pandas==1.2.4 +uvicorn==0.13.4 +fastapi==0.65.1 +sentry-sdk==0.13.0 +requests==2.25.1 +pydantic==1.8.2 \ No newline at end of file diff --git a/annotators/personality_detection/server.py b/annotators/personality_detection/server.py new file mode 100644 index 0000000000..d77a27ace1 --- /dev/null +++ b/annotators/personality_detection/server.py @@ -0,0 +1,89 @@ +import logging +import os +import pickle +import re +from typing import Any, List + +import numpy as np +import sentry_sdk +from fastapi import FastAPI, Body +from pydantic import BaseModel +from starlette.middleware.cors import CORSMiddleware + +sentry_sdk.init(os.getenv("SENTRY_DSN")) + +cEXT = pickle.load(open("/personality-detection-text/data/models/cEXT.p", "rb")) +cNEU = pickle.load(open("/personality-detection-text/data/models/cNEU.p", "rb")) +cAGR = pickle.load(open("/personality-detection-text/data/models/cAGR.p", "rb")) +cCON = pickle.load(open("/personality-detection-text/data/models/cCON.p", "rb")) +cOPN = pickle.load(open("/personality-detection-text/data/models/cOPN.p", "rb")) +vectorizer_31 = pickle.load(open("/personality-detection-text/data/models/vectorizer_31.p", "rb")) +vectorizer_30 = pickle.load(open("/personality-detection-text/data/models/vectorizer_30.p", "rb")) + + +logger = logging.getLogger(__name__) + + +def jsonify_data(data: Any) -> Any: + """Replaces JSON-non-serializable objects with JSON-serializable. + + Function replaces numpy arrays and numbers with python lists and numbers, tuples is replaces with lists. All other + object types remain the same. + + Args: + data: Object to make JSON-serializable. + + Returns: + Modified input data. + + """ + if isinstance(data, (list, tuple)): + result = [jsonify_data(item) for item in data] + elif isinstance(data, dict): + result = {} + for key in data.keys(): + result[key] = jsonify_data(data[key]) + elif isinstance(data, np.ndarray): + result = data.tolist() + elif isinstance(data, np.integer): + result = int(data) + elif isinstance(data, np.floating): + result = float(data) + elif callable(getattr(data, "to_serializable_dict", None)): + result = data.to_serializable_dict() + else: + result = data + return result + + +def predict_personality(text): + try: + scentences = re.split("(?<=[.!?]) +", text) + text_vector_31 = vectorizer_31.transform(scentences) + text_vector_30 = vectorizer_30.transform(scentences) + EXT = cEXT.predict(text_vector_31) + NEU = cNEU.predict(text_vector_30) + AGR = cAGR.predict(text_vector_31) + CON = cCON.predict(text_vector_31) + OPN = cOPN.predict(text_vector_31) + return {"EXT": EXT[0], "NEU": NEU[0], "AGR": AGR[0], "CON": CON[0], "OPN": OPN[0]} + except Exception as e: + sentry_sdk.capture_exception(e) + raise e + + +app = FastAPI() +app.add_middleware( + CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"] +) + + +class PersonalityPayload(BaseModel): + personality: List[str] = Body(...) + + +@app.post("/model") +def infer(payload: PersonalityPayload): + logger.info(f"Personality Detection: {payload}") + personality = [predict_personality(p) for p in payload.personality] + return jsonify_data(personality) diff --git a/annotators/personality_detection/service_configs/personality_detection/environment.yml b/annotators/personality_detection/service_configs/personality_detection/environment.yml new file mode 100644 index 0000000000..d504beeae5 --- /dev/null +++ b/annotators/personality_detection/service_configs/personality_detection/environment.yml @@ -0,0 +1,2 @@ +SERVICE_PORT: 8026 +SERVICE_NAME: personality_detection diff --git a/annotators/personality_detection/service_configs/personality_detection/service.yml b/annotators/personality_detection/service_configs/personality_detection/service.yml new file mode 100644 index 0000000000..eef7963a33 --- /dev/null +++ b/annotators/personality_detection/service_configs/personality_detection/service.yml @@ -0,0 +1,23 @@ +name: personality-detection +endpoints: +- model +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8026 + SERVICE_NAME: personality_detection + context: . + dockerfile: ./annotators/personality_detection/Dockerfile + command: uvicorn server:app --host 0.0.0.0 --port 8026 + deploy: + resources: + limits: + memory: 312M + reservations: + memory: 312M + volumes: + - ./annotators/personality_detection:/src + ports: + - 8026:8026 \ No newline at end of file diff --git a/annotators/personality_detection/test.py b/annotators/personality_detection/test.py new file mode 100644 index 0000000000..ecc26c7fd0 --- /dev/null +++ b/annotators/personality_detection/test.py @@ -0,0 +1,12 @@ +import requests + + +def test(): + response = requests.post("http://0.0.0.0:8026/model", json={"personality": ["Hello world"]}) + assert response.status_code == 200 + assert response.json() == [{"EXT": 0, "NEU": 1, "AGR": 1, "CON": 0, "OPN": 1}] + print("SUCCESS") + + +if __name__ == "__main__": + test() diff --git a/annotators/personality_detection/test.sh b/annotators/personality_detection/test.sh new file mode 100644 index 0000000000..82f1710595 --- /dev/null +++ b/annotators/personality_detection/test.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python test.py \ No newline at end of file diff --git a/annotators/prompt_selector/requirements.txt b/annotators/prompt_selector/requirements.txt index 5e74301907..94d51c2055 100644 --- a/annotators/prompt_selector/requirements.txt +++ b/annotators/prompt_selector/requirements.txt @@ -5,5 +5,5 @@ sentry-sdk==1.19.1 requests==2.28.2 click<=8.0.4 jinja2<=3.1.2 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 numpy>=1.17.2 diff --git a/annotators/property_extraction/Dockerfile b/annotators/property_extraction/Dockerfile index f5a0dd0045..f997a15c60 100644 --- a/annotators/property_extraction/Dockerfile +++ b/annotators/property_extraction/Dockerfile @@ -2,10 +2,14 @@ FROM deeppavlov/deeppavlov:1.2.0-gpu RUN apt-get update && apt-get install git -y -ARG CONFIG +ARG CONFIG_T5 +ARG CONFIG_REL_RANKER ARG SRC_DIR +ARG SERVICE_PORT -ENV CONFIG=$CONFIG +ENV CONFIG_T5=$CONFIG_T5 +ENV CONFIG_REL_RANKER=$CONFIG_REL_RANKER +ENV SERVICE_PORT=$SERVICE_PORT COPY ./annotators/property_extraction/requirements.txt /src/requirements.txt RUN pip install -r /src/requirements.txt @@ -14,4 +18,4 @@ COPY $SRC_DIR /src WORKDIR /src -CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8136 +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:$SERVICE_PORT \ No newline at end of file diff --git a/annotators/property_extraction/README.md b/annotators/property_extraction/README.md new file mode 100644 index 0000000000..483914948e --- /dev/null +++ b/annotators/property_extraction/README.md @@ -0,0 +1,29 @@ +# Property Extraction + +## Description + +The Property Extraction annotator extracts user attributes in RDF-triplet format for a specific individual. This enables a dialog assistant to acquire information about the user’s preferred film, dish, location, etc., and utilize this knowledge to generate personalized responses. + +The annotator is capable of extracting multiple user attributes from utterances in the form of (subject, predicate, object) triplets. The subject is designated as “user,” the relation represents the attribute name, and the object denotes the attribute value. There are 61 distinct relation types that the annotator currently supports, as listed in the rel_list.txt file. + +Property Extraction annotator consists of the following components: + +Relation classifier - a BERT-based model that finds all the user attributes in the current utterance, if there are any. +Entity generator - a se2seq model which generates the subject and object for each attribute found in the previuos step. + + +## I/O + +**Input example** + +```python +import requests + +utterances = [["I love going for a walk with my two dogs every day."], ["I like travelling in Italy with my husband. And you?"]] +requests.post("http://0.0.0.0:8136/respond", json = {"utterances": utterances}).json() + +>>> [ + {"triplets": [{"subject": "user", "relation": "like activity", "object": "walking"}, {"subject": "user", "relation": "have pet", "object": "two dogs"}]}, + {"triplets": [{"subject": "user", "property": "marital status", "object": "husband"}, {"subject": "user", "relation": "like activity", "object": "travel"}]} + ] +``` diff --git a/annotators/property_extraction/property_classification_distilbert.json b/annotators/property_extraction/property_classification_distilbert.json deleted file mode 100644 index a9db83a238..0000000000 --- a/annotators/property_extraction/property_classification_distilbert.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "dataset_reader": { - "class_name": "sq_reader", - "data_path": "{DOWNLOADS_PATH}/dialogue_nli/dialogue_nli_cls.json" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "class_name": "torch_transformers_preprocessor", - "vocab_file": "{TRANSFORMER}", - "do_lower_case": false, - "max_seq_length": 64, - "in": ["x"], - "out": ["bert_features"] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": ["y"], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": ["y"], - "out": ["y_ids"] - }, - { - "in": ["y_ids"], - "out": ["y_onehot"], - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "torch_transformers_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "pretrained_bert": "{TRANSFORMER}", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "optimizer": "AdamW", - "optimizer_parameters": {"lr": 1e-05}, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": ["bert_features"], - "in_y": ["y_ids"], - "out": ["y_pred_probas"] - }, - { - "in": ["y_pred_probas"], - "out": ["y_pred_ids"], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": ["y_pred_ids"], - "out": ["y_pred_labels"], - "ref": "classes_vocab" - } - ], - "out": ["y_pred_labels"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "f1_macro", - "accuracy" - ], - "validation_patience": 10, - "val_every_n_batches": 100, - "log_every_n_batches": 100, - "show_examples": false, - "evaluation_targets": ["valid", "test"], - "class_name": "torch_trainer" - }, - "metadata": { - "variables": { - "TRANSFORMER": "distilbert-base-uncased", - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/property_classification" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/generative_ie/property_classification.tar.gz", - "subdir": "{MODEL_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/generative_ie/dialogue_nli_cls.tar.gz", - "subdir": "{DOWNLOADS_PATH}/dialogue_nli" - } - ] - } -} diff --git a/annotators/property_extraction/rel_ranking_roberta.json b/annotators/property_extraction/rel_ranking_roberta.json new file mode 100644 index 0000000000..f988e415dd --- /dev/null +++ b/annotators/property_extraction/rel_ranking_roberta.json @@ -0,0 +1,44 @@ +{ + "chainer": { + "in": ["uttr", "rel_list"], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": true, + "max_seq_length": 128, + "in": ["uttr", "rel_list"], + "out": ["bert_features"] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 2, + "return_probas": true, + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": {"lr": 1e-05}, + "learning_rate_drop_patience": 5, + "learning_rate_drop_div": 2.0, + "in": ["bert_features"], + "out": ["y_pred_probas"] + } + ], + "out": ["y_pred_probas"] + }, + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "TRANSFORMER": "haisongzhang/roberta-tiny-cased", + "MODEL_PATH": "{ROOT_PATH}/models/classifiers/rel_ranking_prex" + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/deeppavlov_data/generative_ie/rel_ranking_files.tar.gz", + "subdir": "{MODEL_PATH}" + } + ] + } +} \ No newline at end of file diff --git a/annotators/property_extraction/server.py b/annotators/property_extraction/server.py index 274e019fbc..ab7ab8445f 100644 --- a/annotators/property_extraction/server.py +++ b/annotators/property_extraction/server.py @@ -1,12 +1,15 @@ -import copy import logging import os import re import time +import pickle +import itertools +import json import nltk import sentry_sdk import spacy +import numpy as np from flask import Flask, jsonify, request from deeppavlov import build_model @@ -14,44 +17,45 @@ sentry_sdk.init(os.getenv("SENTRY_DSN")) -logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.DEBUG) logger = logging.getLogger(__name__) app = Flask(__name__) stemmer = nltk.PorterStemmer() nlp = spacy.load("en_core_web_sm") -config_name = os.getenv("CONFIG") -rel_cls_flag = int(os.getenv("REL_CLS_FLAG", "0")) +t5_config = os.getenv("CONFIG_T5") +rel_ranker_config = os.getenv("CONFIG_REL_RANKER") add_entity_info = int(os.getenv("ADD_ENTITY_INFO", "0")) +try: + generative_ie = build_model(t5_config, download=True) + rel_ranker = build_model(rel_ranker_config, download=True) + logger.info("property extraction model is loaded.") +except Exception as e: + sentry_sdk.capture_exception(e) + logger.exception(e) + raise e + rel_type_dict = {} +relations_all = [] with open("rel_list.txt", "r") as fl: lines = fl.readlines() for line in lines: rel, rel_type = line.strip().split() + relations_all.append(rel.replace("_", " ")) if rel_type == "r": rel_type = "relation" else: rel_type = "property" rel_type_dict[rel.replace("_", " ")] = rel_type - -def check_triplet(triplet): - if triplet[0] in {"hi", "hello"} or any([word in triplet[0] for word in {" hi ", " hello "}]): - return False - return True - - -try: - generative_ie = build_model(config_name, download=True) - logger.info("property extraction model is loaded.") - if rel_cls_flag: - rel_cls = build_model("property_classification_distilbert.json") -except Exception as e: - sentry_sdk.capture_exception(e) - logger.exception(e) - raise e +config_metadata = json.load(open(rel_ranker_config))["metadata"]["variables"] +root_path = config_metadata["ROOT_PATH"] +model_path = config_metadata["MODEL_PATH"].replace("{ROOT_PATH}", root_path) +rels_path = os.path.expanduser(f"{model_path}/rel_groups.pickle") +with open(rels_path, "rb") as fl: + rel_groups_list = pickle.load(fl) def sentrewrite(sentence, init_answer): @@ -60,8 +64,8 @@ def sentrewrite(sentence, init_answer): for old_tok, new_tok in [ ("what's your", f"{answer} is my"), ("what is your", f"{answer} is my"), - ("what is", "{answer} is"), - ("what's", "{answer} is"), + ("what is", f"{answer} is"), + ("what's", f"{answer} is"), ]: sentence = sentence.replace(old_tok, new_tok) elif any([sentence.startswith(elem) for elem in ["where", "when"]]): @@ -74,26 +78,97 @@ def sentrewrite(sentence, init_answer): return sentence +def get_relations(uttr_batch, thres=0.5): + relations_pred_batch = [] + input_batch = list(zip(*itertools.product(uttr_batch, relations_all))) + rels_scores = rel_ranker(*input_batch) + rels_scores = np.array(rels_scores).reshape((len(uttr_batch), len(relations_all), 2)) + for curr_scores in rels_scores: + pred_rels = [] + rels_with_scores = [ + (curr_score[1], curr_rel) + for curr_score, curr_rel in zip(curr_scores, relations_all) + if curr_score[1] > thres + ] + for rel_group in rel_groups_list: + pred_rel_group = [ + (curr_score, curr_rel) for curr_score, curr_rel in rels_with_scores if curr_rel in rel_group + ] + if len(pred_rel_group) == 1: + pred_rel = pred_rel_group[0][1] + pred_rels.append(pred_rel) + elif len(pred_rel_group) >= 2: + pred_rel = max(pred_rel_group)[1] + pred_rels.append(pred_rel) + relations_pred_batch.append(pred_rels or [""]) + logger.debug(f"rel clf raw output: {relations_pred_batch}") + return relations_pred_batch + + +def postprocess_triplets(triplets_init, scores_init, uttr): + triplets, existing_obj = [], [] + scores_dict = {} + for triplet_init, score in zip(triplets_init, scores_init): + triplet = "" + fnd = re.findall(r" (.*?) (.*?) (.*)", triplet_init) + if fnd and fnd[0][1] in rel_type_dict: + triplet = list(fnd[0]) + if triplet[0] in ["i", "my"]: + triplet[0] = "user" + obj = triplet[2] + if obj in existing_obj: + prev_triplet, prev_score = scores_dict[obj] + if score > prev_score: + triplets.remove(prev_triplet) + else: + continue + scores_dict[obj] = (triplet, score) + existing_obj.append(obj) + if obj.islower() and obj.capitalize() in uttr: + triplet[2] = obj.capitalize() + triplets.append(triplet) + return triplets + + +def generate_triplets(uttr_batch, relations_pred_batch): + triplets_corr_batch = [] + t5_input_uttrs = [] + for uttr, preds in zip(uttr_batch, relations_pred_batch): + uttrs_mult = [uttr for _ in preds] + t5_input_uttrs.extend(uttrs_mult) + relations_pred_flat = list(itertools.chain(*relations_pred_batch)) + t5_pred_triplets, t5_pred_scores = generative_ie(t5_input_uttrs, relations_pred_flat) + logger.debug(f"t5 raw output: {t5_pred_triplets} scores: {t5_pred_scores}") + + offset_start = 0 + for uttr, pred_rels in zip(uttr_batch, relations_pred_batch): + rels_len = len(pred_rels) + triplets_init = t5_pred_triplets[offset_start : (offset_start + rels_len)] + scores_init = t5_pred_scores[offset_start : (offset_start + rels_len)] + offset_start += rels_len + triplets = postprocess_triplets(triplets_init, scores_init, uttr) + triplets_corr_batch.append(triplets) + return triplets_corr_batch + + def get_result(request): st_time = time.time() init_uttrs = request.json.get("utterances", []) - init_uttrs_cased = request.json.get("utterances_init", []) - if not init_uttrs_cased: - init_uttrs_cased = copy.deepcopy(init_uttrs) named_entities_batch = request.json.get("named_entities", [[] for _ in init_uttrs]) entities_with_labels_batch = request.json.get("entities_with_labels", [[] for _ in init_uttrs]) entity_info_batch = request.json.get("entity_info", [[] for _ in init_uttrs]) - logger.info(f"init_uttrs {init_uttrs}") - uttrs, uttrs_cased = [], [] - for uttr_list, uttr_list_cased in zip(init_uttrs, init_uttrs_cased): + logger.info( + f"init_uttrs {init_uttrs} entities_with_labels: {entities_with_labels_batch} entity_info: {entity_info_batch}" + ) + uttrs = [] + for uttr_list in init_uttrs: if len(uttr_list) == 1: - uttrs.append(uttr_list[0]) - uttrs_cased.append(uttr_list[0]) + uttrs.append(uttr_list[0].lower()) else: - utt_prev = uttr_list_cased[-2] + utt_prev = uttr_list[-2] utt_prev_sentences = nltk.sent_tokenize(utt_prev) utt_prev = utt_prev_sentences[-1] - utt_cur = uttr_list_cased[-1] + utt_cur = uttr_list[-1] utt_prev_l = utt_prev.lower() utt_cur_l = utt_cur.lower() is_q = ( @@ -102,7 +177,7 @@ def get_result(request): ) is_sentence = False - parsed_sentence = nlp(utt_cur) + parsed_sentence = nlp(utt_cur_l) if parsed_sentence: tokens = [elem.text for elem in parsed_sentence] tags = [elem.tag_ for elem in parsed_sentence] @@ -112,69 +187,41 @@ def get_result(request): logger.info(f"is_q: {is_q} --- is_s: {is_sentence} --- utt_prev: {utt_prev_l} --- utt_cur: {utt_cur_l}") if is_q and not is_sentence: - if len(utt_cur_l.split()) <= 2: - uttrs.append(sentrewrite(utt_prev_l, utt_cur_l)) - uttrs_cased.append(sentrewrite(utt_prev, utt_cur)) - else: - uttrs.append(f"{utt_prev_l} {utt_cur_l}") - uttrs_cased.append(f"{utt_prev} {utt_cur}") + uttrs.append(sentrewrite(utt_prev_l, utt_cur_l)) else: uttrs.append(utt_cur_l) - uttrs_cased.append(utt_cur) logger.info(f"input utterances: {uttrs}") - triplets_batch = [] - outputs, scores = generative_ie(uttrs) - for output, uttr in zip(outputs, uttrs_cased): - triplet = "" - fnd = re.findall(r" (.*?) (.*?) (.*)", output) - if fnd: - triplet = list(fnd[0]) - if triplet[0] == "i": - triplet[0] = "user" - obj = triplet[2] - if obj.islower() and obj.capitalize() in uttr: - triplet[2] = obj.capitalize() - triplets_batch.append(triplet) - logger.info(f"outputs {outputs} scores {scores} triplets_batch {triplets_batch}") - if rel_cls_flag: - rels = rel_cls(uttrs) - logger.info(f"classified relations: {rels}") - filtered_triplets_batch = [] - for triplet, rel in zip(triplets_batch, rels): - rel = rel.replace("_", " ") - if len(triplet) == 3 and triplet[1] == rel and check_triplet(triplet): - filtered_triplets_batch.append(triplet) - else: - filtered_triplets_batch.append([]) - triplets_batch = filtered_triplets_batch + relations_pred = get_relations(uttrs) + triplets_batch = generate_triplets(uttrs, relations_pred) + logger.info(f"triplets_batch {triplets_batch}") triplets_info_batch = [] - for triplet, uttr, named_entities, entities_with_labels, entity_info_list in zip( + for triplets, uttr, named_entities, entities_with_labels, entity_info_list in zip( triplets_batch, uttrs, named_entities_batch, entities_with_labels_batch, entity_info_batch ): uttr = uttr.lower() entity_substr_dict = {} - formatted_triplet, per_triplet = {}, {} + formatted_triplets, per_triplets = [], [] if len(uttr.split()) > 2: - for entity in entities_with_labels: - if "text" in entity: - entity_substr = entity["text"] - if "offsets" in entity: - start_offset, end_offset = entity["offsets"] - else: - start_offset = uttr.find(entity_substr.lower()) - end_offset = start_offset + len(entity_substr) - offsets = [start_offset, end_offset] - if triplet and entity_substr in [triplet[0], triplet[2]]: - entity_substr_dict[entity_substr] = {"offsets": offsets} - if entity_info_list: - for entity_info in entity_info_list: - if entity_info and "entity_substr" in entity_info and "entity_ids" in entity_info: - entity_substr = entity_info["entity_substr"] - if triplet and ( + for triplet in triplets: + if triplet: + for entity in entities_with_labels: + entity_substr = entity.get("text", "") + offsets = entity.get("offsets", []) + if not offsets: + start_offset = uttr.find(entity_substr.lower()) + end_offset = start_offset + len(entity_substr) + offsets = [start_offset, end_offset] + if entity_substr in [triplet[0], triplet[2]]: + entity_substr_dict[entity_substr] = {"offsets": offsets} + + for entity_info in entity_info_list: + entity_substr = entity_info.get("entity_substr", "") + if ( entity_substr in [triplet[0], triplet[2]] or stemmer.stem(entity_substr) in [triplet[0], triplet[2]] + and "entity_ids" in entity_info ): if entity_substr not in entity_substr_dict: entity_substr_dict[entity_substr] = {} @@ -183,32 +230,36 @@ def get_result(request): entity_substr_dict[entity_substr]["finegrained_types"] = entity_info.get( "entity_id_tags", [] ) - if triplet: - formatted_triplet = {"subject": triplet[0], rel_type_dict[triplet[1]]: triplet[1], "object": triplet[2]} - named_entities_list = [] - for elem in named_entities: - for entity in elem: - named_entities_list.append(entity) - per_entities = [entity for entity in named_entities_list if entity.get("type", "") == "PER"] - if triplet[1] in {"have pet", "have family", "have sibling", "have chidren"} and per_entities: - per_triplet = {"subject": triplet[2], "property": "name", "object": per_entities[0].get("text", "")} + named_entities_list = [entity for elem in named_entities for entity in elem] + per_entities = [entity for entity in named_entities_list if entity.get("type", "") == "PER"] + if triplet[1] in {"have pet", "have family", "have sibling", "have chidren"} and per_entities: + per_triplet = { + "subject": triplet[2], + "property": "name", + "object": per_entities[0].get("text", ""), + } + per_triplets.append(per_triplet) + formatted_triplet = { + "subject": triplet[0], + rel_type_dict[triplet[1]]: triplet[1], + "object": triplet[2], + } + formatted_triplets.append(formatted_triplet) triplets_info_list = [] if add_entity_info: - triplets_info_list.append({"triplet": formatted_triplet, "entity_info": entity_substr_dict}) + triplets_info_list.append({"triplets": formatted_triplets, "entity_info": entity_substr_dict}) else: - triplets_info_list.append({"triplet": formatted_triplet}) - if per_triplet: + triplets_info_list.append({"triplets": formatted_triplets}) + if per_triplets: + per_entity_info = [{per_triplet["object"]: {"entity_id_tags": ["PER"]}} for per_triplet in per_triplets] if add_entity_info: - triplets_info_list.append( - {"triplet": per_triplet, "entity_info": {per_triplet["object"]: {"entity_id_tags": ["PER"]}}} - ) + triplets_info_list.append({"per_triplets": per_triplets, "entity_info": per_entity_info}) else: - triplets_info_list.append({"triplet": per_triplet}) + triplets_info_list.append({"per_triplet": per_triplets}) triplets_info_batch.append(triplets_info_list) total_time = time.time() - st_time logger.info(f"property extraction exec time: {total_time: .3f}s") - logger.info(f"property extraction, input {uttrs}, output {triplets_info_batch} scores {scores}") return triplets_info_batch @@ -219,4 +270,4 @@ def respond(): if __name__ == "__main__": - app.run(debug=False, host="0.0.0.0", port=8103) + app.run(debug=False, host="0.0.0.0", port=3000) diff --git a/annotators/property_extraction/service_configs/property-extraction/environment.yml b/annotators/property_extraction/service_configs/property-extraction/environment.yml index 1e71bb7ee0..ea27ad79a0 100644 --- a/annotators/property_extraction/service_configs/property-extraction/environment.yml +++ b/annotators/property_extraction/service_configs/property-extraction/environment.yml @@ -1,4 +1,5 @@ -CONFIG: t5_generative_ie_lite_infer.json +CONFIG_T5: t5_generative_ie_lite_infer.json +CONFIG_REL_RANKER: rel_ranking_roberta.json SERVICE_PORT: 8136 SRC_DIR: annotators/property_extraction/ SERVICE_NAME: property_extraction diff --git a/annotators/property_extraction/service_configs/property-extraction/service.yml b/annotators/property_extraction/service_configs/property-extraction/service.yml index 72ad58fdb0..a0888455e6 100644 --- a/annotators/property_extraction/service_configs/property-extraction/service.yml +++ b/annotators/property_extraction/service_configs/property-extraction/service.yml @@ -6,7 +6,8 @@ compose: - .env build: args: - CONFIG: t5_generative_ie_lite_infer.json + CONFIG_T5: t5_generative_ie_lite_infer.json + CONFIG_REL_RANKER: rel_ranking_roberta.json SERVICE_PORT: 8136 SRC_DIR: annotators/property_extraction/ SERVICE_NAME: property_extraction diff --git a/annotators/property_extraction/src/t5_generative_ie.py b/annotators/property_extraction/src/t5_generative_ie.py index c5b05cf674..f4d3a1ecbc 100644 --- a/annotators/property_extraction/src/t5_generative_ie.py +++ b/annotators/property_extraction/src/t5_generative_ie.py @@ -1,17 +1,3 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import re from logging import getLogger from pathlib import Path @@ -51,16 +37,14 @@ def __init__( load_before_drop: bool = True, clip_norm: Optional[float] = None, min_learning_rate: float = 1e-06, - generate_max_length: int = 50, top_n: int = 1, - batch_decode: bool = False, - scores_thres: float = -0.17, + batch_size: int = 50, + scores_thres: float = -0.57, device: str = "cpu", **kwargs, ) -> None: if not optimizer_parameters: optimizer_parameters = {"lr": 0.01, "weight_decay": 0.01, "betas": (0.9, 0.999), "eps": 1e-6} - self.generate_max_length = generate_max_length self.attention_probs_keep_prob = attention_probs_keep_prob self.hidden_keep_prob = hidden_keep_prob @@ -71,9 +55,15 @@ def __init__( self.tokenizer = AutoTokenizer.from_pretrained(pretrained_transformer, do_lower_case=False) special_tokens_dict = {"additional_special_tokens": add_special_tokens} self.tokenizer.add_special_tokens(special_tokens_dict) - self.replace_tokens = [("", ""), ("", ""), ("", "")] + self.replace_tokens = [ + ("", ""), + ("", ""), + ("", ""), + ("blank>", ""), + ("", ""), + ] self.top_n = top_n - self.batch_decode = batch_decode + self.batch_size = batch_size self.scores_thres = scores_thres super().__init__( @@ -116,57 +106,45 @@ def is_data_parallel(self) -> bool: def __call__(self, input_ids_batch, attention_mask_batch): model = self.model.module if hasattr(self.model, "module") else self.model - if self.batch_decode: - input_ids_batch = torch.LongTensor(input_ids_batch).to(self.device) - attention_mask_batch = torch.LongTensor(attention_mask_batch).to(self.device) + answers_batch, scores_batch = [], [] + num_batches = len(input_ids_batch) // self.batch_size + int(len(input_ids_batch) % self.batch_size > 0) + for i in range(num_batches): + input_ids = torch.LongTensor(input_ids_batch[i * self.batch_size : (i + 1) * self.batch_size]).to( + self.device + ) + attention_mask = torch.LongTensor(attention_mask_batch[i * self.batch_size : (i + 1) * self.batch_size]).to( + self.device + ) input_ = { - "input_ids": input_ids_batch, - "attention_mask": attention_mask_batch, + "input_ids": input_ids, + "attention_mask": attention_mask, } with torch.no_grad(): - answer_ids_batch = model.generate(**input_) - init_answers_batch = self.tokenizer.batch_decode(answer_ids_batch, skip_special_tokens=False) - answers_batch = [] - for answer in init_answers_batch: - for old_tok, new_tok in self.replace_tokens: - answer = answer.replace(old_tok, new_tok) - answers_batch.append(answer) - return answers_batch - else: - answers_batch, scores_batch = [], [] - for input_ids in input_ids_batch: - input_ids = torch.LongTensor([input_ids]).to(self.device) - with torch.no_grad(): - outputs = model.generate( - input_ids, - num_beams=5, - num_return_sequences=self.top_n, - return_dict_in_generate=True, - output_scores=True, - ) - sequences = outputs.sequences - scores = outputs.sequences_scores - scores = scores.cpu().numpy().tolist() - answers = [self.tokenizer.decode(output, skip_special_tokens=False) for output in sequences] - logger.info(f"triplets {answers} scores {scores}") - processed_answers, processed_scores = [], [] - for answer, score in zip(answers, scores): - if score > self.scores_thres: - for old_tok, new_tok in self.replace_tokens: - answer = answer.replace(old_tok, new_tok) - processed_answers.append(answer) - processed_scores.append(score) - if self.top_n == 1: - if processed_answers: - answers_batch.append(processed_answers[0]) - scores_batch.append(processed_scores[0]) + outputs = model.generate( + **input_, + num_beams=5, + num_return_sequences=self.top_n, + return_dict_in_generate=True, + output_scores=True, + ) + sequences = outputs.sequences + scores = outputs.sequences_scores + scores = scores.cpu().numpy().tolist() + answers = [self.tokenizer.decode(output, skip_special_tokens=False) for output in sequences] + logger.debug(f"triplets {answers} scores {scores}") + processed_answers, processed_scores = [], [] + for answer, score in zip(answers, scores): + if score > self.scores_thres: + for old_tok, new_tok in self.replace_tokens: + answer = answer.replace(old_tok, new_tok) + processed_answers.append(answer) + processed_scores.append(score) else: - answers_batch.append("") - scores_batch.append(0.0) - else: - answers_batch.append(processed_answers) - scores_batch.append(processed_scores) - return answers_batch, scores_batch + processed_answers.append("") + processed_scores.append(0.0) + answers_batch.extend(processed_answers) + scores_batch.extend(processed_scores) + return answers_batch, scores_batch @overrides def load(self, fname=None): diff --git a/annotators/property_extraction/src/torch_transformers_preprocessor.py b/annotators/property_extraction/src/torch_transformers_preprocessor.py index f440e6195d..6737b6cc17 100644 --- a/annotators/property_extraction/src/torch_transformers_preprocessor.py +++ b/annotators/property_extraction/src/torch_transformers_preprocessor.py @@ -46,10 +46,10 @@ def __init__( special_tokens_dict = {"additional_special_tokens": add_special_tokens} self.tokenizer.add_special_tokens(special_tokens_dict) - def __call__(self, uttr_batch: List[str], targets_batch: List[str] = None): + def __call__(self, uttr_batch: List[str], relations_batch: List[str], targets_batch: List[str] = None): input_ids_batch, attention_mask_batch, lengths = [], [], [] - for uttr in uttr_batch: - encoding = self.tokenizer.encode_plus(text=uttr, return_attention_mask=True, truncation=True) + for uttr, rel in zip(uttr_batch, relations_batch): + encoding = self.tokenizer.encode_plus(text=uttr, text_pair=rel, return_attention_mask=True, truncation=True) input_ids = encoding["input_ids"] attention_mask = encoding["attention_mask"] input_ids_batch.append(input_ids) diff --git a/annotators/property_extraction/t5_generative_ie_infer.json b/annotators/property_extraction/t5_generative_ie_infer.json index 9db32603a3..a71e0bcd1b 100644 --- a/annotators/property_extraction/t5_generative_ie_infer.json +++ b/annotators/property_extraction/t5_generative_ie_infer.json @@ -1,13 +1,13 @@ { "chainer": { - "in": ["question"], + "in": ["question", "relation"], "pipe": [ { "class_name": "src.torch_transformers_preprocessor:T5GenerativeIEPreprocessor", "vocab_file": "{TRANSFORMER}", "add_special_tokens": ["", "", ""], "max_seq_length": 512, - "in": ["question"], + "in": ["question", "relation"], "out": ["input_ids", "attention_mask"] }, { @@ -16,6 +16,7 @@ "add_special_tokens": ["", "", ""], "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", + "batch_size": 100, "optimizer": "AdamW", "optimizer_parameters": { "lr": 3e-05, @@ -36,14 +37,13 @@ "TRANSFORMER": "t5-base", "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/t5_base_generative_ie" + "MODEL_PATH": "{ROOT_PATH}/models/t5_base_generative_ie" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/generative_ie/t5_base_generative_ie.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/generative_ie/t5_base_generative_ie_v2.tar.gz", "subdir": "{MODEL_PATH}" } ] } -} +} \ No newline at end of file diff --git a/annotators/property_extraction/t5_generative_ie_lite_infer.json b/annotators/property_extraction/t5_generative_ie_lite_infer.json index 43540361b3..e009ec4723 100644 --- a/annotators/property_extraction/t5_generative_ie_lite_infer.json +++ b/annotators/property_extraction/t5_generative_ie_lite_infer.json @@ -1,21 +1,21 @@ { "chainer": { - "in": ["question"], + "in": ["question", "relation"], "pipe": [ { "class_name": "src.torch_transformers_preprocessor:T5GenerativeIEPreprocessor", "vocab_file": "{TRANSFORMER}", "add_special_tokens": ["", "", ""], "max_seq_length": 512, - "in": ["question"], + "in": ["question", "relation"], "out": ["input_ids", "attention_mask"] }, { "class_name": "src.t5_generative_ie:T5GenerativeIE", "pretrained_transformer": "{TRANSFORMER}", "add_special_tokens": ["", "", ""], - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", + "save_path": "{MODEL_PATH}/model_lite", + "load_path": "{MODEL_PATH}/model_lite", "optimizer": "AdamW", "optimizer_parameters": { "lr": 3e-05, @@ -41,7 +41,7 @@ }, "download": [ { - "url": "http://files.deeppavlov.ai/tmp/t5_small_generative_ie.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/generative_ie/t5_generative_lite.tar.gz", "subdir": "{MODEL_PATH}" } ] diff --git a/annotators/property_extraction/test_property_extraction.py b/annotators/property_extraction/test_property_extraction.py index 806ee6c9f7..822537dead 100644 --- a/annotators/property_extraction/test_property_extraction.py +++ b/annotators/property_extraction/test_property_extraction.py @@ -5,7 +5,7 @@ def main(): url = "http://0.0.0.0:8136/respond" request_data = [{"utterances": [["i live in moscow"]]}] - gold_results = [[{"triplet": {"object": "moscow", "relation": "live in citystatecountry", "subject": "user"}}]] + gold_results = [[{"triplets": [{"object": "moscow", "relation": "live in citystatecountry", "subject": "user"}]}]] count = 0 for data, gold_result in zip(request_data, gold_results): diff --git a/annotators/relative_persona_extractor/requirements.txt b/annotators/relative_persona_extractor/requirements.txt index 494f7a3843..6822c0df9f 100644 --- a/annotators/relative_persona_extractor/requirements.txt +++ b/annotators/relative_persona_extractor/requirements.txt @@ -5,5 +5,5 @@ sentry-sdk==1.19.1 requests==2.28.2 click<=8.0.4 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 numpy==1.24.2 \ No newline at end of file diff --git a/annotators/summarization_annotator/server.py b/annotators/summarization_annotator/server.py index 2f880b14f2..7133772ebc 100644 --- a/annotators/summarization_annotator/server.py +++ b/annotators/summarization_annotator/server.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) app = Flask(__name__) -SUMMARIZATION_REQUEST_TIMEOUT = int(getenv("SUMMARIZATION_REQUEST_TIMEOUT")) +SUMMARIZATION_REQUEST_TIMEOUT = float(getenv("SUMMARIZATION_REQUEST_TIMEOUT")) SUMMARIZATION_SERVICE_URL = getenv("SUMMARIZATION_SERVICE_URL") logger.info(f"summarization-annotator considered summarizer: {SUMMARIZATION_SERVICE_URL}") diff --git a/annotators/topic_recommendation/requirements.txt b/annotators/topic_recommendation/requirements.txt index b6a09de768..0df0e527ec 100644 --- a/annotators/topic_recommendation/requirements.txt +++ b/annotators/topic_recommendation/requirements.txt @@ -5,4 +5,4 @@ requests==2.28.2 sentry-sdk==1.19.1 numpy==1.24.2 jinja2<=3.0.3 -Werkzeug>=2.2.2 \ No newline at end of file +Werkzeug>=2.2.2,<3.0 \ No newline at end of file diff --git a/annotators/user_knowledge_memorizer/Dockerfile b/annotators/user_knowledge_memorizer/Dockerfile new file mode 100644 index 0000000000..7e7ea72ebc --- /dev/null +++ b/annotators/user_knowledge_memorizer/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.9.1 + +WORKDIR /src + +ARG SERVICE_PORT +ARG SRC_DIR +ARG TERMINUSDB_SERVER_PASSWORD +ARG TERMINUSDB_SERVER_URL +ARG TERMINUSDB_SERVER_TEAM +ARG TERMINUSDB_SERVER_DB +ARG CONFIG + +ENV SERVICE_PORT=$SERVICE_PORT +ENV TERMINUSDB_SERVER_PASSWORD=$TERMINUSDB_SERVER_PASSWORD +ENV TERMINUSDB_SERVER_URL=$TERMINUSDB_SERVER_URL +ENV TERMINUSDB_SERVER_TEAM=$TERMINUSDB_SERVER_TEAM +ENV TERMINUSDB_SERVER_DB=$TERMINUSDB_SERVER_DB +ENV CONFIG=$CONFIG + +RUN pip install -U pip wheel setuptools + +COPY ./annotators/user_knowledge_memorizer/requirements.txt . +RUN pip install --upgrade pip && \ + pip install --no-cache -r /src/requirements.txt && \ + python -m nltk.downloader wordnet && \ + pip install git+https://github.com/deeppavlov/custom_kg_svc.git@724513b4 + +COPY $SRC_DIR . + + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:$SERVICE_PORT diff --git a/annotators/user_knowledge_memorizer/abstract_rels.txt b/annotators/user_knowledge_memorizer/abstract_rels.txt new file mode 100644 index 0000000000..318592a414 --- /dev/null +++ b/annotators/user_knowledge_memorizer/abstract_rels.txt @@ -0,0 +1,11 @@ +favorite animal +like animal +favorite book +like read +favorite movie +favorite food +like food +favorite drink +like drink +favorite sport +like sports diff --git a/annotators/user_knowledge_memorizer/config.json b/annotators/user_knowledge_memorizer/config.json new file mode 100644 index 0000000000..c809dd0333 --- /dev/null +++ b/annotators/user_knowledge_memorizer/config.json @@ -0,0 +1,9 @@ +{ + "chainer": { + }, + "metadata": { + "variables": { + "CUSTOM_EL": "/root/.deeppavlov/downloads/entity_linking_eng/custom_el_eng_dream" + } + } +} diff --git a/annotators/user_knowledge_memorizer/rel_list.json b/annotators/user_knowledge_memorizer/rel_list.json new file mode 100644 index 0000000000..b3cf88fe5e --- /dev/null +++ b/annotators/user_knowledge_memorizer/rel_list.json @@ -0,0 +1,15 @@ +{ + "FAVORITE ANIMAL": "Animal", + "HAVE PET": "Animal", + "LIKE ANIMAL": "Animal", + "FAVORITE BOOK": "Book", + "LIKE READ": "Book", + "FAVORITE MOVIE": "Film", + "FAVORITE FOOD": "Food", + "LIKE FOOD": "Food", + "FAVORITE DRINK": "Food", + "LIKE DRINK": "Food", + "FAVORITE SPORT": "Type_of_sport", + "LIKE SPORTS": "Type_of_sport", + "LIKE GOTO": "Place" +} diff --git a/annotators/user_knowledge_memorizer/requirements.txt b/annotators/user_knowledge_memorizer/requirements.txt new file mode 100644 index 0000000000..21bf209f1f --- /dev/null +++ b/annotators/user_knowledge_memorizer/requirements.txt @@ -0,0 +1,10 @@ +Flask==1.1.1 +gunicorn==19.9.0 +requests==2.27.1 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +sentry-sdk==0.12.3 +pyopenssl==22.0.0 +itsdangerous==2.0.1 +nltk==3.5 +uuid==1.30 diff --git a/annotators/user_knowledge_memorizer/server.py b/annotators/user_knowledge_memorizer/server.py new file mode 100644 index 0000000000..b3b757f0a3 --- /dev/null +++ b/annotators/user_knowledge_memorizer/server.py @@ -0,0 +1,570 @@ +# pylint: disable=W1203 + +import os +import json +import logging +from typing import List, Dict, Optional, Tuple +from pathlib import Path +from uuid import uuid4 +import time +import sentry_sdk +from flask import Flask, jsonify, request +from deeppavlov_kg import TerminusdbKnowledgeGraph + + +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +sentry_sdk.init(os.getenv("SENTRY_DSN")) +app = Flask(__name__) + +with open("rel_list.json") as file: + rel_kinds_dict = json.load(file) + +with open("abstract_rels.txt", "r") as file: + abstract_rels = [line.strip() for line in file.readlines()] + +TERMINUSDB_SERVER_URL = os.getenv("TERMINUSDB_SERVER_URL") +TERMINUSDB_SERVER_PASSWORD = os.getenv("TERMINUSDB_SERVER_PASSWORD") +assert TERMINUSDB_SERVER_PASSWORD, logger.error("TerminusDB server password is not specified") +TERMINUSDB_SERVER_DB = os.getenv("TERMINUSDB_SERVER_DB") +TERMINUSDB_SERVER_TEAM = os.getenv("TERMINUSDB_SERVER_TEAM") +config_path = os.getenv("CONFIG") +with open(config_path, "r") as config_file: + config = json.load(config_file) +index_load_path = Path(os.path.expanduser(config["metadata"]["variables"]["CUSTOM_EL"])) + +while True: + try: + kg_graph = TerminusdbKnowledgeGraph( + db_name=TERMINUSDB_SERVER_DB, + team=TERMINUSDB_SERVER_TEAM, + server=TERMINUSDB_SERVER_URL, + password=TERMINUSDB_SERVER_PASSWORD, + index_load_path=index_load_path, + ) + logger.info(f"TERMINUSDB_SERVER_URL: {TERMINUSDB_SERVER_URL} is ready") + break + except Exception as exc: + logger.error(exc) + time.sleep(5) + continue + +logger.info("Graph Loaded!") + + +def check_property_vs_relationship(utterances_info: List[dict]) -> Tuple[list, list]: + """Checks if the prop_ex triplets are relationship or property triplets. + + Args: + utterances_info: List of dictionaries containing the utterance information. + + Returns: + A tuple containing two lists: relationships and properties. + """ + if isinstance(utterances_info, dict): + utterances_info = [utterances_info] + + relationships, properties = [], [] + for utterance_info in utterances_info: + for _, value in utterance_info.items(): + for triplet in value: + if "relation" in triplet: + relationships.append(triplet) + elif "property" in triplet: + properties.append(triplet) + return relationships, properties + + +def get_entity_type(attributes): + # TODO: this doesn't work. Most likely it should get output of entity-detection not prop-ex + """Extracts DBPedia type from property extraction annotator.""" + if not isinstance(attributes, dict): + return "Misc" + entity_info = attributes.get("entity_info", []) + if not entity_info: + return "Misc" + exact_entity_info = entity_info[list(entity_info.keys())[0]] + finegrained = exact_entity_info.get("finegrained_types", []) + if finegrained: + entity_type = finegrained[0].capitalize() + logger.info(f"Fine-grained type: {entity_type}") + return entity_type + return "Misc" + + +def is_abstract_relationship(relationship_kind, entity_substr, text): + """Returns true if the relationship kind is abstract according to 'abstract_rels.txt' file and if there's no + article before the entity substring in the text, that would indicate non-abstraction. Otherwise, returns false. + """ + if relationship_kind.lower() in abstract_rels and not any( + [f" {word} {entity_substr}" in text for word in ["the", "my", "his", "her"]] + ): + return True + else: + return False + + +def check_entities_in_index(custom_el_annotations: list, prop_ex_triplets: list, text: str) -> Tuple[dict, list]: + """Checks if the entities returned by property extraction are present in the index. + + Returns: + A tuple containing a dictionary and a list: entities_in_index and entities_not_in_index. + Output example: + entities_in_index, entities_not_in_index -- {('dog', 'Animal'): + 'Animal/ed8f16ae-56fb-46dc-b542-20987056fd00'}, [('dog', 'Animal'))] + """ + + def check_abstraction_in_index(relationship, entity_info, text): + """Returns true if kind in index is 'Abstract' and the relationship in prop_ex is abstract, or if both aren't + abstract. Otherwise, returns false.""" + return is_abstract_relationship(relationship, entity_info["entity_substr"], text) == ( + "Abstract" in entity_info["entity_id_tags"] + ) + + entities_in_index, entities_not_in_index = {}, [] + for triplet in prop_ex_triplets: + in_index = False + for entity_info in custom_el_annotations: + if triplet["object"] == entity_info["entity_substr"] and check_abstraction_in_index( + triplet["relation"], entity_info, text + ): + in_index = True + entities_in_index[(entity_info["entity_substr"], entity_info["entity_id_tags"][0])] = entity_info[ + "entity_ids" + ][0] + break + if not in_index: + if triplet["relation"] in rel_kinds_dict: + entity_kind = rel_kinds_dict[triplet["relation"]] + else: + entity_kind = get_entity_type(triplet["object"]) + entities_not_in_index.append((triplet["object"], entity_kind)) + return entities_in_index, entities_not_in_index + + +def check_entities_in_kg(graph, entities: list) -> Tuple[list, list]: + """Checks if the entities, that aren't in index, are present in kg. + + As index stores and retrieves entities only related to each user (it stores them as triplets), there are + situations where the entity exists in kg but not connected to this current user, so not found in index. + + Returns: + A tuple containing two lists: entities_in_kg and entities_not_in_kg. + Output example: + entities_in_kg -- [{'@id': 'Animal/L83', '@type': 'Animal', 'substr': 'dog'}] + entities_not_in_kg -- [('park', 'Place')] + """ + entities_in_kg, entities_not_in_kg = [], [] + + all_entities_in_kg = graph.get_all_entities() + for entity_substr, entity_kind in entities: + in_kg = False + for entity_props in all_entities_in_kg: + if entity_substr == entity_props.get("substr") and entity_kind == entity_props["@type"]: + entities_in_kg.append(entity_props) + in_kg = True + if not in_kg: + entities_not_in_kg.append((entity_substr, entity_kind)) + return entities_in_kg, entities_not_in_kg + + +def create_entities( + graph, entities_info: List[Tuple[str, str]], has_name_property=False, entity_ids: Optional[List[str]] = None +) -> Dict[str, list]: + """Adds entities and entity kinds into kg. + Returns: + entities_info_lists: new created entities + Output example: + { + 'substr_list': ['dog', 'park'], + 'tags_list': ['Animal', 'Place'], + 'entity_ids': ['Animal/6e224463-e9a9-4e43-b548-a3c52f30de66', 'Place/aa3f15fb-b00d-4f92-be95-75b3748d6f5f'] + } + """ + if entity_ids is None: + entity_ids = [""] * len(entities_info) + + entities_info_lists = {"substr_list": [], "tags_list": [], "entity_ids": []} + for entity_info, entity_id in zip(entities_info, entity_ids): + entities_info_lists["substr_list"].append(entity_info[0]) + entities_info_lists["tags_list"].append(entity_info[1]) + if not entity_id: + entity_id = "/".join([entity_info[1], str(uuid4())]) + entities_info_lists["entity_ids"].append(entity_id) + logger.debug(f"entities_info_lists -- {entities_info_lists}") + + try: + graph.ontology.create_entity_kinds(entities_info_lists["tags_list"]) + except ValueError: + logger.info(f"All entity kinds '{entities_info_lists['tags_list']}' are already in KG") + + substr = "name" if has_name_property else "substr" + property_kinds = [[substr]] * len(entities_info_lists["substr_list"]) + property_values = [[substr] for substr in entities_info_lists["substr_list"]] + graph.ontology.create_property_kinds_of_entity_kinds(entities_info_lists["tags_list"], property_kinds) + + try: + graph.create_entities( + entities_info_lists["tags_list"], + entities_info_lists["entity_ids"], + property_kinds=property_kinds, + property_values=property_values, + ) + except Exception: + logger.info(f"Entities {entities_info_lists['entity_ids']} already exist in kg.") + return entities_info_lists + + +def prepare_triplets(entities_in_index: dict, triplets: list, user_id: str) -> List[dict]: + """Prepares the property extraction triplets to be in the format + '[{"subject": user_id, "relationship": value, "object": entity_id}]' to be used in check_triplets_in_kg. + Where value is got from triplets and entity_id is got from entities_in_index. + """ + prepared_triplets = [] + new_entities_in_index = {} + for (entity_substr, _), entity_id in entities_in_index.items(): + new_entities_in_index[entity_substr] = entity_id + for triplet in triplets: + prepared_triplets.append( + { + "subject": user_id, + "relationship": triplet["relation"], + "object": new_entities_in_index.get(triplet["object"]), + } + ) + return prepared_triplets + + +def check_triplets_in_kg(graph, triplets: List[dict]) -> Tuple[list, dict]: + """Checks if the subject and object, that've been extracted by property extraction and present in index, + are connected by the same relationship as in kg. + """ + triplets_in_kg, triplets_not_in_kg = [], {"ids_a": [], "relationship_kinds": [], "ids_b": []} + for triplet in triplets: + entity_id = triplet["object"] + relationship_kinds = graph.search_for_relationships(id_a=triplet["subject"], id_b=entity_id) + if triplet["relationship"] in [rel["rel"] for rel in relationship_kinds]: + triplets_in_kg.append([triplet["subject"], triplet["relationship"], triplet["object"]]) + add_to_kg = False + else: + add_to_kg = True + + if add_to_kg: + triplets_not_in_kg["ids_a"].append(triplet["subject"]) + triplets_not_in_kg["relationship_kinds"].append(triplet["relationship"]) + triplets_not_in_kg["ids_b"].append(triplet["object"]) + return triplets_in_kg, triplets_not_in_kg + + +def prepare_triplets_to_add_to_dbs( + triplets_not_in_kg: Dict[str, list], + prop_ex_rel_triplets: list, + entities_in_kg_not_in_index: list, + new_entities: dict, + abstract_triplets: List[tuple], + user_id: str, +): + """Prepares each of these triplets to be added to dbs: + [triplets not in kg but in index, + triplets between user and entities, that're in kg but not in index, + triplets between user and new created entities, + new triplets, that have abstract relationships] + + Output example: + triplets_to_kg -- { + 'ids_a': ['User/b75d2700259bdc44sdsdf85e7f530ed'], + 'relationship_kinds': ['HAVE_PET'], + 'ids_b': ['Animal/6e224463-e9a9-4e43-b548-a3c52f30de66'] + } + triplets_to_index -- { + 'substr_list': ['dog'], 'tags_list': ['Animal'], 'entity_ids': ['Animal/6e224463-e9a9-4e43-b548-a3c52f30de66'] + } + """ + triplets_to_kg, triplets_to_index = triplets_not_in_kg, {"substr_list": [], "tags_list": [], "entity_ids": []} + + for entity in entities_in_kg_not_in_index: + relationship_kind = [ + triplet["relation"] for triplet in prop_ex_rel_triplets if triplet["object"] == entity["substr"] + ][ + 0 + ] # TODO this 0 index could reduce solutions, fix that + triplets_to_kg["ids_a"].append(user_id) + triplets_to_kg["relationship_kinds"].append(relationship_kind) + triplets_to_kg["ids_b"].append(entity["@id"]) + + triplets_to_index["substr_list"].append(entity["substr"]) + triplets_to_index["tags_list"].append(entity["@type"]) + triplets_to_index["entity_ids"].append(entity["@id"]) + if new_entities: + for idx, entity_substr in enumerate(new_entities["substr_list"]): + relationship_kind = [ + triplet["relation"] for triplet in prop_ex_rel_triplets if triplet["object"] == entity_substr + ][ + 0 + ] # TODO this 0 index could reduce solutions, fix that + triplets_to_kg["ids_a"].append(user_id) + triplets_to_kg["relationship_kinds"].append(relationship_kind) + triplets_to_kg["ids_b"].append(new_entities["entity_ids"][idx]) + + triplets_to_index["substr_list"] += new_entities["substr_list"] + triplets_to_index["tags_list"] += new_entities["tags_list"] + triplets_to_index["entity_ids"] += new_entities["entity_ids"] + + for id_a, rel, id_b in abstract_triplets: + triplets_to_kg["ids_a"].append(id_a) + triplets_to_kg["relationship_kinds"].append(rel) + triplets_to_kg["ids_b"].append(id_b) + + triplets_to_index["substr_list"].append(id_b.split("/")[-1]) + triplets_to_index["tags_list"].append("Abstract") + triplets_to_index["entity_ids"].append(id_b) + + return triplets_to_kg, triplets_to_index + + +def add_entities_to_index(graph, user_id: str, entities_info_lists: dict): + user_id = user_id.split("/")[-1] + substr_list = entities_info_lists["substr_list"] + entity_ids = entities_info_lists["entity_ids"] + tags_list = entities_info_lists["tags_list"] + logger.debug( + f"Adding to index user_id '{user_id}' - entity_info: " + f"'entity_substr': {substr_list}, 'entity_ids': {entity_ids}," + f" 'tags': {tags_list}" + ) + graph.index.set_active_user_id(user_id) + graph.index.add_entities(substr_list, entity_ids, tags_list) + + +def add_triplets_to_dbs(graph, user_id: str, triplets_to_kg: dict, triplets_to_index: dict) -> List[tuple]: + """Adds triplets to each of kg and index.""" + kinds_b = [id_b.split("/")[0] for id_b in triplets_to_kg["ids_b"]] + + if len(triplets_to_kg["ids_a"]): + graph.ontology.create_relationship_kinds( + ["User"] * len(triplets_to_kg["ids_a"]), triplets_to_kg["relationship_kinds"], kinds_b + ) + logger.debug( + f"""to be added to kg\n + ids_a -- {triplets_to_kg["ids_a"]}\n + relationship_kinds -- {triplets_to_kg["relationship_kinds"]}\n + ids_b -- {triplets_to_kg["ids_b"]}\n + """ + ) + + graph.create_relationships( + triplets_to_kg["ids_a"], + triplets_to_kg["relationship_kinds"], + triplets_to_kg["ids_b"], + ) + if triplets_to_index["substr_list"]: + add_entities_to_index(graph, user_id, entities_info_lists=triplets_to_index) + + output = zip( + [user_id] * len(triplets_to_index["entity_ids"]), + triplets_to_kg["relationship_kinds"], + triplets_to_index["entity_ids"], + ) + return list(output) + + +def upper_case_input(triplets: List[dict]) -> List[dict]: + """Upper-cases the relationship kind in each triplet in the prop_ex annotations""" + return [ + {"subject": triplet["subject"], "relation": triplet["relation"].upper(), "object": triplet["object"]} + for triplet in triplets + ] + + +def check_abstract_triplets( + graph, entities: List[tuple], prop_ex_rel_triplets: List[dict], text: str, user_id: str +) -> Tuple[list, list]: + abstract_triplets = [] + non_abstract_triplets = [] + kinds_to_add = [] + parents = [] + for entity in entities: + entity_substr = entity[0] + entity_kind = entity[1] + relationship_kind = [ + triplet["relation"] for triplet in prop_ex_rel_triplets if triplet["object"] == entity_substr + ][0] + if is_abstract_relationship(relationship_kind, entity_substr, text): + substr2kind = entity_substr.capitalize() + kinds_to_add.append(substr2kind) + parents.append(entity_kind) + abstract_entity_id = "/".join(["Abstract", substr2kind]) + abstract_triplets.append((user_id, relationship_kind, abstract_entity_id)) + else: + non_abstract_triplets.append(entity) + + logger.debug(f"abstract_kinds_to_add -- {kinds_to_add}") + + if kinds_to_add: + try: + graph.ontology.create_entity_kinds(kinds_to_add, parents) + except ValueError: + logger.info(f"All entity kinds '{kinds_to_add}' are already in KG") + except Exception: # TODO: replace with Terminusdb DatabaseError + graph.ontology.create_entity_kinds(parents) + try: + graph.ontology.create_entity_kinds(kinds_to_add, parents) + except ValueError: + logger.info(f"All entity kinds '{kinds_to_add}' are already in KG") + return abstract_triplets, non_abstract_triplets + + +def check_and_add_properties(graph, prop_triplets: List[dict], user_id: str) -> Tuple[list, list]: + """Checks if the property triplets exist in kg and adds them if not.""" + properties_to_add_to_kg, properties_already_in_kg = [], [] + try: + user_properties = graph.get_properties_of_entity(user_id) + except Exception: + user_properties = [] # new user + for triplet in prop_triplets: + if triplet["property"] in user_properties and triplet["object"] == user_properties[triplet["property"]]: + properties_already_in_kg.append(triplet) + else: + if triplet["property"] == "misc attribute": + triplet.update({"property_family": set}) + else: + triplet.update({"property_family": Optional}) + properties_to_add_to_kg.append(triplet) + + if properties_to_add_to_kg: + property_kinds = [triplet["property"] for triplet in properties_to_add_to_kg] + properties_families = [triplet["property_family"] for triplet in properties_to_add_to_kg] + objects = [triplet["object"] for triplet in properties_to_add_to_kg] + logger.info( + f"property_kinds -- {property_kinds}\nproperties_families -- {properties_families}\n" + f"properties_to_add_to_kg -- {properties_to_add_to_kg}" + ) + graph.ontology.create_property_kinds_of_entity_kind( + "User", + property_kinds, + properties_type_families=properties_families, + ) + graph.create_or_update_properties_of_entity(user_id, property_kinds, objects) + for prop in properties_to_add_to_kg: + del prop["property_family"] + return properties_to_add_to_kg, properties_already_in_kg + + +def memorize(graph, uttrs): + user_id = "/".join(["User", str(uttrs[0].get("user", {}).get("id", ""))]) + user_external_id = str(uttrs[0].get("user", {}).get("user_external_id", "")) + + triplets_added_to_kg_batch = [] + triplets_already_in_kg_batch = [] + for utt in uttrs: + last_utt = utt["text"] + logger.info(f"last_utt -- {last_utt}") + annotations = utt.get("annotations", {}) + custom_el_annotations = annotations.get("custom_entity_linking", []) + logger.info(f"custom_el_annotations -- {custom_el_annotations}") + prop_ex_annotations = annotations.get("property_extraction", []) + logger.debug(f"prop_ex_annotations before upper-casing -- {prop_ex_annotations}") + for annotation in prop_ex_annotations: + if "triplets" in annotation: + triplets = annotation["triplets"] + for idx in reversed(range(len(triplets))): + triplet = triplets[idx] + if triplet["object"] == "": + del triplets[idx] + logging.error( + f"ValueError: the triplet '{triplet}' in property extraction output has '' object" + ) + + create_entities(graph, [(user_external_id, "User")], has_name_property=True, entity_ids=[user_id]) + + prop_ex_rel_triplets, prop_triplets = check_property_vs_relationship(prop_ex_annotations) + prop_ex_rel_triplets = upper_case_input(prop_ex_rel_triplets) + logger.info(f"rel_triplets, prop_triplets -- {prop_ex_rel_triplets, prop_triplets}") + + if prop_triplets: + properties_added_to_kg, properties_already_in_kg = check_and_add_properties(graph, prop_triplets, user_id) + else: + properties_added_to_kg, properties_already_in_kg = [], [] + + entities_in_index, entities_not_in_index = check_entities_in_index( + custom_el_annotations, prop_ex_rel_triplets, last_utt + ) + logger.info(f"entities_in_index, entities_not_in_index -- {entities_in_index, entities_not_in_index}") + + if entities_not_in_index: + abstract_triplets, non_abstract_triplets = check_abstract_triplets( + graph, entities_not_in_index, prop_ex_rel_triplets, last_utt, user_id + ) + logger.info(f"abstract_triplets -- {abstract_triplets}") + logger.info(f"non_abstract_triplets -- {non_abstract_triplets}") + + entities_in_kg_not_in_index, entities_not_in_kg = check_entities_in_kg(graph, non_abstract_triplets) + logger.debug(f"entities_not_in_kg -- {entities_not_in_kg}") + + if entities_not_in_kg: + new_entities = create_entities(graph, entities_not_in_kg) + else: + new_entities = {} + else: + abstract_triplets = [] + entities_in_kg_not_in_index = [] + new_entities = {} + logger.info(f"new_entities -- {new_entities}") + logger.info(f"entities_in_kg_not_in_index -- {entities_in_kg_not_in_index}") + + if entities_in_index: + triplets_of_entities_in_index = prepare_triplets(entities_in_index, prop_ex_rel_triplets, user_id) + logger.info(f"triplets_of_entities_in_index -- {triplets_of_entities_in_index}") + triplets_already_in_kg, triplets_not_in_kg = check_triplets_in_kg(graph, triplets_of_entities_in_index) + else: + triplets_already_in_kg = [] + triplets_not_in_kg = { + "ids_a": [], + "relationship_kinds": [], + "ids_b": [], + } + logger.info(f"triplets_already_in_kg -- {triplets_already_in_kg}\ntriplets_not_in_kg -- {triplets_not_in_kg}") + + if triplets_not_in_kg["ids_b"] or new_entities or entities_in_kg_not_in_index or abstract_triplets: + triplets_to_kg, triplets_to_index = prepare_triplets_to_add_to_dbs( + triplets_not_in_kg, + prop_ex_rel_triplets, + entities_in_kg_not_in_index, + new_entities, + abstract_triplets, + user_id, + ) + logger.debug(f"triplets_to_kg -- {triplets_to_kg}\n triplets_to_index -- {triplets_to_index}") + triplets_added_to_kg = add_triplets_to_dbs(graph, user_id, triplets_to_kg, triplets_to_index) + else: + triplets_added_to_kg = [] + + triplets_added_to_kg_batch.append(triplets_added_to_kg + properties_added_to_kg) + triplets_already_in_kg_batch.append(triplets_already_in_kg + properties_already_in_kg) + + logger.info( + f"added_to_graph -- {triplets_added_to_kg_batch}, triplets_already_in_graph -- {triplets_already_in_kg_batch}" + ) + return [{"added_to_graph": triplets_added_to_kg_batch, "triplets_already_in_graph": triplets_already_in_kg_batch}] + + +def get_result(request, graph): + uttrs = request.json.get("last_human_annotated_utterance", []) + try: + result = memorize(graph, uttrs) + except Exception as e: + sentry_sdk.capture_exception(e) + logger.exception(e) + result = [{"added_to_graph": [[]] * len(uttrs), "triplets_already_in_graph": [[]] * len(uttrs)}] + return result + + +@app.route("/respond", methods=["POST"]) +def respond(): + result = get_result(request, kg_graph) + return jsonify(result) + + +if __name__ == "__main__": + app.run(debug=False, host="0.0.0.0", port=3000) diff --git a/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml new file mode 100644 index 0000000000..b05f7a192b --- /dev/null +++ b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml @@ -0,0 +1,9 @@ +SERVICE_PORT: 8027 +SRC_DIR: annotators/user_knowledge_memorizer/ +SERVICE_NAME: user_knowledge_memorizer +FLASK_APP: server +TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 +TERMINUSDB_SERVER_PASSWORD: root +TERMINUSDB_SERVER_TEAM: admin +TERMINUSDB_SERVER_DB: user_knowledge_db +CONFIG: config.json diff --git a/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml new file mode 100644 index 0000000000..ae0e52716d --- /dev/null +++ b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml @@ -0,0 +1,33 @@ +name: user-knowledge-memorizer +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8027 + SRC_DIR: annotators/user_knowledge_memorizer/ + SERVICE_NAME: user_knowledge_memorizer + TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 + TERMINUSDB_SERVER_PASSWORD: root + TERMINUSDB_SERVER_TEAM: admin + TERMINUSDB_SERVER_DB: user_knowledge_db + CONFIG: config.json + context: ./ + dockerfile: annotators/user_knowledge_memorizer/Dockerfile + command: flask run -h 0.0.0.0 -p 8027 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + volumes: + - ./annotators/user_knowledge_memorizer:/src + - ~/.deeppavlov:/root/.deeppavlov + ports: + - 8027:8027 +proxy: null diff --git a/annotators/user_knowledge_memorizer/test.py b/annotators/user_knowledge_memorizer/test.py new file mode 100644 index 0000000000..217fc1368d --- /dev/null +++ b/annotators/user_knowledge_memorizer/test.py @@ -0,0 +1,147 @@ +import requests +from deeppavlov_kg import TerminusdbKnowledgeGraph + + +def formulate_utt_annotations(dog_id=None, park_id=None): + utt_annotations = { + "property_extraction": [ + { + "triplets": [ + {"subject": "user", "relation": "HAVE PET", "object": "dog"}, + {"subject": "user", "relation": "LIKE GOTO", "object": "park"}, + ] + } + ], + "custom_entity_linking": [], + } + + # if dog is in kg add it to custom_el annotations + if dog_id is not None: + utt_annotations["custom_entity_linking"].append( + { + "entity_substr": "dog", + "entity_ids": [dog_id], + "confidences": [1.0], + "tokens_match_conf": [1.0], + "entity_id_tags": ["Animal"], + }, + ) + if park_id is not None: + utt_annotations["custom_entity_linking"].append( + { + "entity_substr": "park", + "entity_ids": [park_id], + "confidences": [1.0], + "tokens_match_conf": [1.0], + "entity_id_tags": ["Place"], + }, + ) + + return utt_annotations + + +def prepare_for_comparison(results): + for result in results: + if uttrs := result["added_to_graph"]: + for utt in uttrs: + for triplet in utt: + triplet[2] = triplet[2].split("/")[0] + if uttrs := result["triplets_already_in_graph"]: + for utt in uttrs: + for triplet in utt: + triplet[2] = triplet[2].split("/")[0] + + return results + + +def compare_results(results, golden_results) -> bool: + def compare(uttrs, golden_result): + for idx, utt in enumerate(uttrs): + for triplet in utt: + if triplet not in golden_result[idx]: + return False + return True + + is_successfull = [] + for result, golden_result in zip(results, golden_results): + is_added = compare(result["added_to_graph"], golden_result["added_to_graph"]) + is_in_graph = compare(result["triplets_already_in_graph"], golden_result["triplets_already_in_graph"]) + is_successfull.append(is_added) + is_successfull.append(is_in_graph) + return all(is_successfull) + + +def main(): + TERMINUSDB_SERVER_URL = "http://0.0.0.0:6363" + TERMINUSDB_SERVER_TEAM = "admin" + TERMINUSDB_SERVER_DB = "user_knowledge_db" + TERMINUSDB_SERVER_PASSWORD = "root" + USER_KNOWLEDGE_MEMORIZER_PORT = 8027 + + USER_KNOWLEDGE_MEMORIZER_URL = f"http://0.0.0.0:{USER_KNOWLEDGE_MEMORIZER_PORT}/respond" + + graph = TerminusdbKnowledgeGraph( + db_name=TERMINUSDB_SERVER_DB, + team=TERMINUSDB_SERVER_TEAM, + server=TERMINUSDB_SERVER_URL, + password=TERMINUSDB_SERVER_PASSWORD, + ) + + USER_ID = "User/b75d2700259bdc44sdsdf85e7f530ed" + # get dog_id and park_id from KG + dog_id, park_id = None, None + try: + user_props = graph.get_properties_of_entity(USER_ID) + entities_info = graph.get_properties_of_entities( + [*user_props["HAVE PET/Animal"], *user_props["LIKE GOTO/Place"]] + ) + for entity_info in entities_info: + if entity_info.get("substr") == "dog": + dog_id = entity_info["@id"] + elif entity_info.get("substr") == "park": + park_id = entity_info["@id"] + print(f"Found park_id: '{park_id}' and dog_ig: '{dog_id}'") + added_new_entities = False + except Exception: + print("Adding new entities and rels") + added_new_entities = True + + request_data = [ + { + "last_human_annotated_utterance": [ + { + "text": "i have a dog and a cat", + "user": {"id": USER_ID.split("/")[1]}, + "annotations": formulate_utt_annotations(dog_id, park_id), + }, + { + "text": "", + "user": {"id": ""}, + "annotations": { + "property_extraction": [{}], + "custom_entity_linking": [], + }, + }, + ] + } + ] + + golden_triplets = [[[USER_ID, "LIKE GOTO", "Place"], [USER_ID, "HAVE PET", "Animal"]], []] + if added_new_entities: + golden_results = [[{"added_to_graph": golden_triplets, "triplets_already_in_graph": [[], []]}]] + else: + golden_results = [[{"added_to_graph": [[], []], "triplets_already_in_graph": golden_triplets}]] + + count = 0 + for data, golden_result in zip(request_data, golden_results): + result = requests.post(USER_KNOWLEDGE_MEMORIZER_URL, json=data).json() + print(result) + result = prepare_for_comparison(result) + if compare_results(result, golden_result): + count += 1 + assert count == len(request_data) + print("Success") + + +if __name__ == "__main__": + main() diff --git a/annotators/user_persona_extractor/requirements.txt b/annotators/user_persona_extractor/requirements.txt index ad82eaabf6..8d962dd7c1 100644 --- a/annotators/user_persona_extractor/requirements.txt +++ b/annotators/user_persona_extractor/requirements.txt @@ -4,4 +4,4 @@ gunicorn==20.1.0 requests==2.28.2 sentry-sdk==1.19.1 jinja2<=3.0.3 -Werkzeug>=2.2.2 \ No newline at end of file +Werkzeug>=2.2.2,<3.0 \ No newline at end of file diff --git a/annotators/wiki_parser/requirements.txt b/annotators/wiki_parser/requirements.txt index 9ead6f4644..5fda573da4 100644 --- a/annotators/wiki_parser/requirements.txt +++ b/annotators/wiki_parser/requirements.txt @@ -4,5 +4,5 @@ itsdangerous==2.0.1 gunicorn==20.1.0 requests==2.28.2 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 pybind11==2.10.4 \ No newline at end of file diff --git a/assistant_dists/dream/docker-compose.override.yml b/assistant_dists/dream/docker-compose.override.yml index c203a6b206..2bd665f2aa 100644 --- a/assistant_dists/dream/docker-compose.override.yml +++ b/assistant_dists/dream/docker-compose.override.yml @@ -389,7 +389,8 @@ services: env_file: [.env] build: args: - CONFIG: t5_generative_ie_lite_infer.json + CONFIG_T5: t5_generative_ie_lite_infer.json + CONFIG_REL_RANKER: rel_ranking_roberta.json SERVICE_PORT: 8136 SRC_DIR: annotators/property_extraction/ SERVICE_NAME: property_extraction @@ -398,6 +399,7 @@ services: command: flask run -h 0.0.0.0 -p 8136 environment: - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 deploy: resources: limits: diff --git a/assistant_dists/dream_embodied/README.md b/assistant_dists/dream_embodied/README.md new file mode 100644 index 0000000000..3358e068be --- /dev/null +++ b/assistant_dists/dream_embodied/README.md @@ -0,0 +1,27 @@ +# dream_embodied distribution + +![Architecture](architecture.png) + +This distribution functions the following way: +1. Receives user input from the dp-agent of choice (e.g. Telegram); +2. Processes the received input in a way that results in a command (e.g. move forward 4 meters -> `move_forward_4`); +3. Sends the command to the ROS server (in order to make interacting with real robots possible); +4. Processes the message from inside ROS server and accesses a local connector (e.g. real robot ROS-API, minecraft-interface); +5. (While not done in dream-side, it is useful to know that) the local connector then executes a command that usually corresponds with the message we received in step 2. + +# Prerequisites: + +1. Make sure that all services are correctly defined in dev.yml, proxy.yml, pipeline_conf.json; +2. Ensure that the ports for services and skills are unique and that the ports referenced by services and skills are correct; +3. Verify that in docker-compose.override.yml, the following is set: agent.channel=telegram agent.telegram_token=$TG_TOKEN; +4. Use the @botfather bot in Telegram to create your own chatbot and to receive your personal telegram bot token (TG_TOKEN); +5. Ensure that the Telegram bot token is set in the environment variables as $TG_TOKEN by executing the following command on the Dream host machine: `export $TG_TOKEN=...`. + +# Launch command: + +``` +docker-compose -f docker-compose.yml -f assistant_dists/dream_embodied/docker-compose.override.yml -f \ +assistant_dists/dream_embodied/dev.yml -f assistant_dists/dream_embodied/proxy.yml up --build +``` + +Attention! The last part of the command stops all running containers on the machine. If this is not required, remove the part of the command after the semicolon or edit it to stop only specific containers if their names are known in advance. \ No newline at end of file diff --git a/assistant_dists/dream_embodied/architecture.png b/assistant_dists/dream_embodied/architecture.png new file mode 100644 index 0000000000..2aca3a7968 Binary files /dev/null and b/assistant_dists/dream_embodied/architecture.png differ diff --git a/assistant_dists/dream_embodied/cpu.yml b/assistant_dists/dream_embodied/cpu.yml new file mode 100644 index 0000000000..acb83f0d32 --- /dev/null +++ b/assistant_dists/dream_embodied/cpu.yml @@ -0,0 +1,10 @@ +version: '3.7' +services: + dialogpt: + environment: + DEVICE: cpu + CUDA_VISIBLE_DEVICES: "" + intent-catcher-commands: + environment: + DEVICE: cpu + CUDA_VISIBLE_DEVICES: "" diff --git a/assistant_dists/dream_embodied/db_conf.json b/assistant_dists/dream_embodied/db_conf.json new file mode 100644 index 0000000000..a9ba6813f5 --- /dev/null +++ b/assistant_dists/dream_embodied/db_conf.json @@ -0,0 +1,6 @@ +{ + "host": "DB_HOST", + "port": "DB_PORT", + "name": "DB_NAME", + "env": true +} \ No newline at end of file diff --git a/assistant_dists/dream_embodied/dev.yml b/assistant_dists/dream_embodied/dev.yml new file mode 100644 index 0000000000..210cf31e55 --- /dev/null +++ b/assistant_dists/dream_embodied/dev.yml @@ -0,0 +1,107 @@ +# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода +services: + agent: + volumes: + - ".:/dp-agent" + ports: + - 4242:4242 + ranking-based-response-selector: + volumes: + - "./response_selectors/ranking_based_response_selector:/src" + - "./common:/src/common" + ports: + - 8002:8002 + sentence-ranker: + volumes: + - "./services/sentence_ranker:/src" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8128:8128 + spacy-nounphrases: + volumes: + - "./annotators/spacy_nounphrases:/src" + - "./common:/src/common" + ports: + - 8006:8006 + dff-program-y-skill: + volumes: + - "./skills/dff_program_y_skill:/src" + - "./common:/src/common" + ports: + - 8008:8008 + sentseg: + volumes: + - "./annotators/SentSeg:/src" + ports: + - 8011:8011 + intent-catcher-commands: + volumes: + - "./annotators/IntentCatcherTransformers:/src" + - "./common:/src/common" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8014:8014 + dff-intent-responder-skill: + volumes: + - "./skills/dff_intent_responder_skill:/src" + - "./common:/src/common" + ports: + - 8012:8012 + badlisted-words: + volumes: + - "./annotators/BadlistedWordsDetector:/src" + - "./common:/src/common" + ports: + - 8018:8018 + dialogpt: + volumes: + - "./common:/src/common" + - "./services/dialogpt:/src" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8125:8125 + entity-detection: + volumes: + - "./annotators/entity_detection:/src" + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8103:8103 + ner: + volumes: + - './annotators/NER_deeppavlov:/src' + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8021:8021 + ros-flask-server: + volumes: + - "./services/ros_flask_server:/home/dkr/" + - "./common:/home/dkr/common" + ports: + - 6000:6000 + dff-command-selector-skill: + volumes: + - "./skills/dff_command_selector_skill:/src" + - "./common:/src/common" + ports: + - 8148:8148 + robot-command-sender: + volumes: + - "./services/robot_command_sender:/src" + - "./common:/src/common" + ports: + - 8035:8035 + robot-fake-server: + volumes: + - "./services/robot_fake_server:/src" + - "./common:/src/common" + ports: + - 8038:8038 + robot-notifications: + volumes: + - "./services/robot_notifications:/src" + - "./common:/src/common" + ports: + - 8036:8036 + +version: "3.7" diff --git a/assistant_dists/dream_embodied/docker-compose.override.yml b/assistant_dists/dream_embodied/docker-compose.override.yml new file mode 100644 index 0000000000..8de355ed3f --- /dev/null +++ b/assistant_dists/dream_embodied/docker-compose.override.yml @@ -0,0 +1,327 @@ +services: + agent: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_embodied/pipeline_conf.json' + environment: + WAIT_HOSTS: "ranking-based-response-selector:8002, sentence-ranker:8128, dff-program-y-skill:8008, sentseg:8011, + intent-catcher-commands:8014, badlisted-words:8018, dff-command-selector-skill:8148, dff-intent-responder-skill:8012, dialogpt:8125, entity-detection:8103, ner:8021, spacy-nounphrases:8006, ros-flask-server:6000, robot-notifications:8036, robot-fake-server:8038" + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: EN + + ranking-based-response-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8002 + SERVICE_NAME: response_selector + LANGUAGE: EN + SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + SENTENCE_RANKER_TIMEOUT: 3 + N_UTTERANCES_CONTEXT: 5 + FILTER_TOXIC_OR_BADLISTED: 1 + context: . + dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8002 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentence-ranker: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8128 + SERVICE_NAME: sentence_ranker + PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2 + context: ./services/sentence_ranker/ + command: flask run -h 0.0.0.0 -p 8128 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + spacy-nounphrases: + env_file: [ .env ] + build: + context: . + dockerfile: ./annotators/spacy_nounphrases/Dockerfile + command: flask run -h 0.0.0.0 -p 8006 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + dff-program-y-skill: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8008 + SERVICE_NAME: dff_program_y_skill + LANGUAGE: EN + context: . + dockerfile: ./skills/dff_program_y_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8008 --reload + deploy: + resources: + limits: + memory: 1024M + reservations: + memory: 1024M + + sentseg: + env_file: [ .env ] + build: + context: ./annotators/SentSeg/ + command: flask run -h 0.0.0.0 -p 8011 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 1.5G + reservations: + memory: 1.5G + + dff-command-selector-skill: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8148 + SERVICE_NAME: dff_command_selector_skill + LANGUAGE: EN + ROS_FLASK_SERVER: http://ros-flask-server:6000 + FAKE: "False" + context: . + dockerfile: ./skills/dff_command_selector_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8148 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + dff-intent-responder-skill: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8012 + SERVICE_NAME: dff_intent_responder_skill + INTENT_RESPONSE_PHRASES_FNAME: intent_response_phrases.json + context: . + dockerfile: ./skills/dff_intent_responder_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8012 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + robot-command-sender: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8035 + ROS_FLASK_SERVER: http://ros-flask-server:6000 + context: . + dockerfile: ./services/robot_command_sender/Dockerfile + command: flask run -h 0.0.0.0 -p 8035 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + robot-notifications: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8036 + ROS_FLASK_SERVER: http://ros-flask-server:6000 + context: . + dockerfile: ./services/robot_notifications/Dockerfile + command: flask run -h 0.0.0.0 -p 8036 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + intent-catcher-commands: + env_file: [.env] + build: + context: . + dockerfile: ./annotators/IntentCatcherTransformers/Dockerfile + args: + SERVICE_PORT: 8014 + CONFIG_NAME: intents_model_dp_config_commands.json + INTENT_PHRASES_PATH: intent_phrases_commands.json + command: python -m flask run -h 0.0.0.0 -p 8014 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 3.5G + reservations: + memory: 3.5G + + badlisted-words: + env_file: [ .env ] + build: + context: annotators/BadlistedWordsDetector/ + command: flask run -h 0.0.0.0 -p 8018 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + dialogpt: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8125 + SERVICE_NAME: dialogpt + PRETRAINED_MODEL_NAME_OR_PATH: microsoft/DialoGPT-medium + N_HYPOTHESES_TO_GENERATE: 5 + CONFIG_NAME: dialogpt_en.json + MAX_HISTORY_DEPTH: 2 + context: . + dockerfile: ./services/dialogpt/Dockerfile + command: flask run -h 0.0.0.0 -p 8125 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + entity-detection: + env_file: [ .env ] + build: + args: + SEQ_TAG_CONFIG: wikipedia_entity_detection_distilbert.json + CONFIG: entity_detection_eng.json + LOWERCASE: 1 + PORT: 8103 + SRC_DIR: annotators/entity_detection/ + FINEGRAINED: 0 + context: ./ + dockerfile: annotators/entity_detection/Dockerfile + command: flask run -h 0.0.0.0 -p 8103 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + ner: + env_file: [ .env ] + build: + args: + CONFIG: ner_case_agnostic_multilingual_bert_base_extended.json + PORT: 8021 + SRC_DIR: annotators/NER_deeppavlov + COMMIT: f5117cd9ad1e64f6c2d970ecaa42fc09ccb23144 + context: ./ + dockerfile: annotators/NER_deeppavlov/Dockerfile + command: flask run -h 0.0.0.0 -p 8021 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + tty: true + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + ros-flask-server: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 6000 + context: . + dockerfile: ./services/ros_flask_server/Dockerfile + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + robot-notifications: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8036 + ROS_FLASK_SERVER: http://ros-flask-server:6000 + context: . + dockerfile: ./services/robot_notifications/Dockerfile + command: flask run -h 0.0.0.0 -p 8036 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + robot-fake-server: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8038 + context: . + dockerfile: ./services/robot_fake_server/Dockerfile + command: flask run -h 0.0.0.0 -p 8038 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + +version: '3.7' diff --git a/assistant_dists/dream_embodied/pipeline_conf.json b/assistant_dists/dream_embodied/pipeline_conf.json new file mode 100644 index 0000000000..1f16ffda78 --- /dev/null +++ b/assistant_dists/dream_embodied/pipeline_conf.json @@ -0,0 +1,409 @@ +{ + "connectors": { + "sentseg": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "ner": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + } + }, + "services": { + "last_chance_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Sorry, something went wrong inside. Please tell me, what did you say.", + "annotations": { + "sentseg": { + "punct_sent": "Sorry, something went wrong inside. Please tell me, what did you say.", + "segments": [ + "Sorry, something went wrong inside.", + "Please tell me, what did you say." + ] + } + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "last_chance" + ], + "is_enabled": true, + "source": { + "component": "components/sbDcAqiNqxFz.yml", + "service": "services/agent_services/service_configs/dream" + } + }, + "timeout_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Sorry, I need to think more on that. Let's talk about something else.", + "annotations": { + "sentseg": { + "punct_sent": "Sorry, I need to think more on that. Let's talk about something else.", + "segments": [ + "Sorry, I need to think more on that.", + "Let's talk about something else." + ] + } + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "timeout" + ], + "is_enabled": true, + "source": { + "component": "components/rFC0YJOoDFvS.yml", + "service": "services/agent_services/service_configs/dream" + } + }, + "annotators": { + "robot_notifications": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://robot-notifications:8036/check" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_non_punct_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "update_attributes", + "is_enabled": true, + "source": { + "component": "components/dfmgnoie4wrn34r98.yml", + "service": "services/robot_notifications/service_configs/robot-notifications" + } + }, + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/gM4fEjvVqLlSRRRkQfds2g.yml", + "service": "annotators/SentSeg/service_configs/sentseg" + } + }, + "badlisted_words": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://badlisted-words:8018/badlisted_words" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/gySZ51dyqYi9TOFr6zY5kA.yml", + "service": "annotators/BadlistedWordsDetector/service_configs/badlisted-words" + } + }, + "intent_catcher": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://intent-catcher-commands:8014/detect" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_utt_sentseg_segments_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/1IjC3r9b1VJ082ceINXzHQ.yml", + "service": "annotators/IntentCatcherTransformers/service_configs/intent-catcher" + } + }, + "ner": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + }, + "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", + "service": "annotators/NER_deeppavlov/service_configs/ner" + } + }, + "entity_detection": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://entity-detection:8103/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:entity_detection_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/05PqJXVd7gV7DqslN5z3A.yml", + "service": "annotators/entity_detection/service_configs/entity-detection" + } + } + }, + "response_annotators": { + "ner": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + }, + "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_last_bot_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_annotator_selectors", + "response_annotators.sentseg" + ], + "state_manager_method": "add_annotation_prev_bot_utt", + "is_enabled": true, + "source": { + "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", + "service": "annotators/NER_deeppavlov/service_configs/ner" + } + } + }, + "response_annotator_selectors": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.post_annotator_selector.connector:PostAnnotatorSelectorConnector", + "annotator_names": [ + "sentseg", + "ner" + ] + }, + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/LXrJDIf43gwNmPMNXG5Eg.yml", + "service": "services/response_annotator_selectors/service_configs/agent" + } + }, + "candidate_annotators": { + "badlisted_words": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://badlisted-words:8018/badlisted_words_batch" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/gySZ51dyqYi9TOFr6zY5kA.yml", + "service": "annotators/BadlistedWordsDetector/service_configs/badlisted-words" + } + }, + "sentence_ranker": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://sentence-ranker:8128/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:sentence_ranker_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/XGwmAHtAOu0NDqqG3QCJw.yml", + "service": "services/sentence_ranker/service_configs/sentence-ranker" + } + } + }, + "skill_selectors": { + "rule_based_selector": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.rule_based_selector.connector:RuleBasedSkillSelectorConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:base_skill_selector_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators" + ], + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/xSwFvtAUdvtQosvzpb7oMg.yml", + "service": "skill_selectors/rule_based_selector/service_configs/agent" + } + } + }, + "skills": { + "dff_program_y_skill": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-program-y-skill:8008/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_program_y_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/ckUclxqUplyzwmnYyixEw.yml", + "service": "skills/dff_program_y_skill/service_configs/dff-program-y-skill" + } + }, + "dff_intent_responder_skill": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-intent-responder-skill:8012/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_intent_responder_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/CmQGq1Xn5YOaMwNIb4bEpA.yml", + "service": "skills/dff_intent_responder_skill/service_configs/dff-intent-responder-skill" + } + }, + "dff_command_selector_skill": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-command-selector-skill:8148/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_command_selector_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/jskfdgh384gtnfdsrkfg78.yml", + "service": "skills/dff_command_selector_skill/service_configs/dff-command-selector-skill" + } + }, + "dummy_skill": { + "connector": { + "protocol": "python", + "class_name": "skills.dummy_skill.connector:DummySkillConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentrewrite_modified_last_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/uYkoK0vRp4bbIg9akI1yw.yml", + "service": "skills/dummy_skill/service_configs/agent" + } + }, + "dialogpt": { + "connector": { + "protocol": "http", + "timeout": 3.0, + "url": "http://dialogpt:8125/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_utt_and_history_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/7Dxjy1ZpQEAoPcEeABGBLg.yml", + "service": "services/dialogpt/service_configs/dialogpt" + } + } + }, + "response_selectors": { + "response_selector": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://ranking-based-response-selector:8002/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:cropped_dialog", + "response_formatter": "state_formatters.dp_formatters:base_response_selector_formatter_service", + "previous_services": [ + "candidate_annotators" + ], + "state_manager_method": "add_bot_utterance", + "is_enabled": true, + "source": { + "component": "components/YJzc7NwGrLmKp6gfZJh7X1.yml", + "service": "response_selectors/ranking_based_response_selector/service_configs/ranking-based-response-selector" + } + } + }, + "actors": { + "robot_command_sender": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://robot-command-sender:8035/send" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_human_bot_annotated_utterance", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_selectors" + ], + "state_manager_method": "update_attributes", + "source": { + "component": "components/iigrietu2934i2nh34e0.yml", + "service": "services/robot_command_sender/service_configs/robot-command-sender" + } + } + } + }, + "metadata": { + "display_name": "Dream Embodied", + "author": "DeepPavlov", + "description": "Embodied AI version of DeepPavlov Dream Socialbot that allows user to control remote clients using natural language through dp-agents.", + "version": "0.1.0", + "date_created": "2022-12-12T12:12:00", + "ram_usage": "50 GB", + "gpu_usage": "50 GB", + "disk_usage": "50 GB" + } +} \ No newline at end of file diff --git a/assistant_dists/dream_embodied/proxy.yml b/assistant_dists/dream_embodied/proxy.yml new file mode 100644 index 0000000000..69e2f4e4d2 --- /dev/null +++ b/assistant_dists/dream_embodied/proxy.yml @@ -0,0 +1,48 @@ +services: + + sentseg: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8011 + - PORT=8011 + + dff-intent-responder-skill: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8012 + - PORT=8012 + + ner: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8021 + - PORT=8021 + + entity-detection: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8103 + - PORT=8103 + + sentence-ranker: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8128 + - PORT=8128 + +version: '3.7' diff --git a/assistant_dists/dream_embodied/telegram.yml b/assistant_dists/dream_embodied/telegram.yml new file mode 100644 index 0000000000..97fb6c69f9 --- /dev/null +++ b/assistant_dists/dream_embodied/telegram.yml @@ -0,0 +1,17 @@ +services: + agent-tg: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_embodied/pipeline_conf.json agent.db_config=assistant_dists/dream_embodied/db_conf.json' + env_file: [.env] + build: + context: ./ + dockerfile: dockerfile_agent + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 2G + volumes: + - ".:/dp-agent" + +version: '3.7' diff --git a/assistant_dists/dream_kg/dev.yml b/assistant_dists/dream_kg/dev.yml index 90127066e5..336c89599d 100644 --- a/assistant_dists/dream_kg/dev.yml +++ b/assistant_dists/dream_kg/dev.yml @@ -5,12 +5,12 @@ services: - ".:/dp-agent" ports: - 4242:4242 - convers-evaluator-annotator: + sentence-ranker: volumes: - - "./annotators/ConversationEvaluator:/src" - - "~/.deeppavlov:/root/.deeppavlov" + - "./services/sentence_ranker:/src" + - "~/.deeppavlov/cache:/root/.cache" ports: - - 8004:8004 + - 8128:8128 spacy-nounphrases: volumes: - "./annotators/spacy_nounphrases:/src" @@ -22,18 +22,12 @@ services: - "./annotators/SentSeg:/src" ports: - 8011:8011 - convers-evaluation-selector: - volumes: - - "./response_selectors/convers_evaluation_based_selector:/src" - - "./common:/src/common" - ports: - - 8009:8009 - badlisted-words: + ranking-based-response-selector: volumes: - - "./annotators/BadlistedWordsDetector:/src" + - "./response_selectors/ranking_based_response_selector:/src" - "./common:/src/common" ports: - - 8018:8018 + - 8002:8002 ner: volumes: - './annotators/NER_deeppavlov:/src' @@ -85,28 +79,28 @@ services: - "~/.deeppavlov:/root/.deeppavlov" ports: - 8153:8153 - transformers-lm-gptjt: + terminusdb-server: volumes: - - "./services/transformers_lm:/src" - - "./common:/src/common" - - "~/.deeppavlov/cache:/root/.cache" + - "~/.deeppavlov:/root/.deeppavlov" ports: - - 8161:8161 - dff-dream-persona-gpt-jt-prompted-skill: + - 6363:6363 + user-knowledge-memorizer: volumes: - - "./skills/dff_template_prompted_skill:/src" + - "./annotators/user_knowledge_memorizer:/src" - "./common:/src/common" + - "~/.deeppavlov:/root/.deeppavlov" ports: - - 8134:8134 - dff-template-skill: + - 8027:8027 + dff-user-kg-skill: volumes: - - "./skills/dff_template_skill:/src" + - "./skills/dff_user_kg_skill:/src" - "./common:/src/common" ports: - - 8120:8120 - terminusdb-server: + - 8028:8028 + dff-travel-italy-skill: volumes: - - "~/.deeppavlov:/root/.deeppavlov" + - "./skills/dff_travel_italy_skill:/src" + - "./common:/src/common" ports: - - 6363:6363 + - 8025:8025 version: "3.7" diff --git a/assistant_dists/dream_kg/docker-compose.override.yml b/assistant_dists/dream_kg/docker-compose.override.yml index 48a4c26dcd..ae6a94eafd 100644 --- a/assistant_dists/dream_kg/docker-compose.override.yml +++ b/assistant_dists/dream_kg/docker-compose.override.yml @@ -2,13 +2,13 @@ services: agent: command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_kg/pipeline_conf.json' environment: - WAIT_HOSTS: "convers-evaluator-annotator:8004, - spacy-nounphrases:8006, sentseg:8011, convers-evaluation-selector:8009, - badlisted-words:8018, ner:8021, spelling-preprocessing:8074, entity-linking:8075, + WAIT_HOSTS: "sentence-ranker:8128, + spacy-nounphrases:8006, sentseg:8011, ranking-based-response-selector:8002, + ner:8021, spelling-preprocessing:8074, entity-linking:8075, combined-classification:8087, entity-detection:8103, property-extraction:8136, custom-entity-linking:8153, - transformers-lm-gptjt:8161, dff-dream-persona-gpt-jt-prompted-skill:8134, - dff-template-skill:8120, terminusdb-server:6363" + terminusdb-server:6363, user-knowledge-memorizer:8027, + dff-user-kg-skill:8028, dff-travel-italy-skill:8025" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 @@ -16,25 +16,24 @@ services: LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json - convers-evaluator-annotator: + sentence-ranker: env_file: [ .env ] build: args: - CONFIG: conveval.json - SERVICE_PORT: 8004 - DATA_URL: https://files.deeppavlov.ai/alexaprize_data/cobot_conveval2.tar.gz - context: . - dockerfile: ./annotators/ConversationEvaluator/Dockerfile + SERVICE_PORT: 8128 + SERVICE_NAME: sentence_ranker + PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2 + context: ./services/sentence_ranker/ + command: flask run -h 0.0.0.0 -p 8128 environment: - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server deploy: - mode: replicated - replicas: 1 resources: limits: - memory: 2G + memory: 3G reservations: - memory: 2G + memory: 3G spacy-nounphrases: env_file: [ .env ] @@ -68,33 +67,21 @@ services: reservations: memory: 1.5G - convers-evaluation-selector: + ranking-based-response-selector: env_file: [ .env ] build: args: - TAG_BASED_SELECTION: 1 - CALL_BY_NAME_PROBABILITY: 0.5 - PROMPT_PROBA: 0.1 - ACKNOWLEDGEMENT_PROBA: 0.3 - PRIORITIZE_WITH_REQUIRED_ACT: 0 - PRIORITIZE_NO_DIALOG_BREAKDOWN: 0 - PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 0 - IGNORE_DISLIKED_SKILLS: 0 - GREETING_FIRST: 1 - RESTRICTION_FOR_SENSITIVE_CASE: 1 - PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 1 - MAX_TURNS_WITHOUT_SCRIPTS: 7 - ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 1 - PRIORITIZE_SCRIPTED_SKILLS: 0 - CONFIDENCE_STRENGTH: 0.8 - CONV_EVAL_STRENGTH: 0.4 - PRIORITIZE_HUMAN_INITIATIVE: 1 - QUESTION_TO_QUESTION_DOWNSCORE_COEF: 0.8 + SERVICE_PORT: 8002 + SERVICE_NAME: response_selector LANGUAGE: EN - FALLBACK_FILE: fallbacks_dream_en.json + SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + SENTENCE_RANKER_TIMEOUT: 3 + N_UTTERANCES_CONTEXT: 5 + FILTER_TOXIC_OR_BADLISTED: 1 context: . - dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile - command: flask run -h 0.0.0.0 -p 8009 + dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8002 environment: - FLASK_APP=server deploy: @@ -104,23 +91,6 @@ services: reservations: memory: 100M - badlisted-words: - env_file: [ .env ] - build: - args: - SERVICE_PORT: 8018 - SERVICE_NAME: badlisted_words - context: annotators/BadlistedWordsDetector/ - command: flask run -h 0.0.0.0 -p 8018 - environment: - - FLASK_APP=server - deploy: - resources: - limits: - memory: 256M - reservations: - memory: 256M - ner: env_file: [ .env ] build: @@ -225,7 +195,8 @@ services: env_file: [.env] build: args: - CONFIG: t5_generative_ie_lite_infer.json + CONFIG_T5: t5_generative_ie_lite_infer.json + CONFIG_REL_RANKER: rel_ranking_roberta.json SERVICE_PORT: 8136 SRC_DIR: annotators/property_extraction/ SERVICE_NAME: property_extraction @@ -234,6 +205,7 @@ services: command: flask run -h 0.0.0.0 -p 8136 environment: - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 deploy: resources: limits: @@ -256,41 +228,43 @@ services: memory: 128M reservations: memory: 128M - - transformers-lm-gptjt: + + terminusdb-server: + image: terminusdb/terminusdb-server:v10.1.10 + + user-knowledge-memorizer: env_file: [ .env ] build: args: - SERVICE_PORT: 8161 - SERVICE_NAME: transformers_lm_gptjt - PRETRAINED_MODEL_NAME_OR_PATH: togethercomputer/GPT-JT-6B-v1 - HALF_PRECISION: 1 - context: . - dockerfile: ./services/transformers_lm/Dockerfile - command: flask run -h 0.0.0.0 -p 8161 + SERVICE_PORT: 8027 + SRC_DIR: annotators/user_knowledge_memorizer/ + SERVICE_NAME: user_knowledge_memorizer + TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 + TERMINUSDB_SERVER_PASSWORD: root + TERMINUSDB_SERVER_TEAM: admin + TERMINUSDB_SERVER_DB: user_knowledge_db + CONFIG: config.json + context: ./ + dockerfile: annotators/user_knowledge_memorizer/Dockerfile + command: flask run -h 0.0.0.0 -p 8027 environment: - - CUDA_VISIBLE_DEVICES=0 - FLASK_APP=server deploy: resources: limits: - memory: 50G + memory: 256M reservations: - memory: 50G + memory: 256M - dff-dream-persona-gpt-jt-prompted-skill: + dff-user-kg-skill: env_file: [ .env ] build: args: - SERVICE_PORT: 8134 - SERVICE_NAME: dff_dream_persona_prompted_skill - PROMPT_FILE: common/prompts/dream_persona.json - GENERATIVE_SERVICE_URL: http://transformers-lm-gptjt:8161/respond - GENERATIVE_SERVICE_CONFIG: default_generative_config.json - GENERATIVE_TIMEOUT: 120 - N_UTTERANCES_CONTEXT: 7 + SERVICE_PORT: 8028 + SERVICE_NAME: dff_user_kg_skill context: . - dockerfile: ./skills/dff_template_prompted_skill/Dockerfile + dockerfile: ./skills/dff_user_kg_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8028 deploy: resources: limits: @@ -298,22 +272,17 @@ services: reservations: memory: 128M - dff-template-skill: + dff-travel-italy-skill: env_file: [ .env ] build: args: - SERVICE_PORT: 8120 - SERVICE_NAME: dff_template_skill + SERVICE_PORT: 8025 + SERVICE_NAME: dff_travel_italy_skill + TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 + TERMINUSDB_SERVER_PASSWORD: root + TERMINUSDB_SERVER_TEAM: admin + TERMINUSDB_SERVER_DB: user_knowledge_db context: . - dockerfile: ./skills/dff_template_skill/Dockerfile - command: gunicorn --workers=1 server:app -b 0.0.0.0:8120 --reload - deploy: - resources: - limits: - memory: 128M - reservations: - memory: 128M - - terminusdb-server: - image: terminusdb/terminusdb-server:v10.1.10 + dockerfile: ./skills/dff_travel_italy_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8025 --reload version: '3.7' diff --git a/assistant_dists/dream_kg/pipeline_conf.json b/assistant_dists/dream_kg/pipeline_conf.json index fba14f656a..bf5feea13f 100644 --- a/assistant_dists/dream_kg/pipeline_conf.json +++ b/assistant_dists/dream_kg/pipeline_conf.json @@ -121,24 +121,6 @@ "service": "annotators/spacy_nounphrases/service_configs/spacy-nounphrases" } }, - "badlisted_words": { - "connector": { - "protocol": "http", - "timeout": 1.0, - "url": "http://badlisted-words:8018/badlisted_words" - }, - "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "annotators.spelling_preprocessing" - ], - "state_manager_method": "add_annotation", - "is_enabled": true, - "source": { - "component": "components/gySZ51dyqYi9TOFr6zY5kA.yml", - "service": "annotators/BadlistedWordsDetector/service_configs/badlisted-words" - } - }, "ner": { "connector": { "protocol": "http", @@ -218,6 +200,7 @@ "custom_entity_linking": { "connector": { "protocol": "http", + "timeout": 2.0, "url": "http://custom-entity-linking:8153/model" }, "dialog_formatter": "state_formatters.dp_formatters:custom_el_formatter_dialog", @@ -332,31 +315,13 @@ } }, "candidate_annotators": { - "badlisted_words": { + "sentence_ranker": { "connector": { "protocol": "http", "timeout": 1.0, - "url": "http://badlisted-words:8018/badlisted_words_batch" - }, - "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "skills" - ], - "state_manager_method": "add_hypothesis_annotation_batch", - "is_enabled": true, - "source": { - "component": "components/gySZ51dyqYi9TOFr6zY5kA.yml", - "service": "annotators/BadlistedWordsDetector/service_configs/badlisted-words" - } - }, - "convers_evaluator_annotator": { - "connector": { - "protocol": "http", - "timeout": 2.0, - "url": "http://convers-evaluator-annotator:8004/batch_model" + "url": "http://sentence-ranker:8128/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:convers_evaluator_annotator_formatter", + "dialog_formatter": "state_formatters.dp_formatters:sentence_ranker_formatter", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "previous_services": [ "skills" @@ -364,8 +329,8 @@ "state_manager_method": "add_hypothesis_annotation_batch", "is_enabled": true, "source": { - "component": "components/n1HuqlV7EoNrWXcv8WaIQ.yml", - "service": "annotators/ConversationEvaluator/service_configs/convers-evaluator-annotator" + "component": "components/XGwmAHtAOu0NDqqG3QCJw.yml", + "service": "services/sentence_ranker/service_configs/sentence-ranker" } }, "spacy_nounphrases": { @@ -421,6 +386,25 @@ "component": "components/PbLNvh4hrvs47rPaf2bfYQ.yml", "service": "annotators/combined_classification/service_configs/combined-classification" } + }, + "user_knowledge_memorizer": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://user-knowledge-memorizer:8027/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_human_annotated_utterance", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.property_extraction", + "annotators.custom_entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/74p4006d17yappr7ji7dtf.yml", + "service": "annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer" + } } }, "skill_selectors": { @@ -445,16 +429,12 @@ } }, "skills": { - "dff_dream_persona_prompted_skill": { + "dummy_skill": { "connector": { - "protocol": "http", - "timeout": 120.0, - "url": "http://dff-dream-persona-gpt-j-prompted-skill:8134/respond" - }, - "dialog_formatter": { - "name": "state_formatters.dp_formatters:dff_prompted_skill_formatter", - "skill_name": "dff_dream_persona_prompted_skill" + "protocol": "python", + "class_name": "skills.dummy_skill.connector:DummySkillConnector" }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentrewrite_modified_last_dialog", "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", "previous_services": [ "skill_selectors" @@ -462,16 +442,17 @@ "state_manager_method": "add_hypothesis", "is_enabled": true, "source": { - "component": "components/6lLhP7zuoiI0bxJEDLWUg.yml", - "service": "skills/dff_template_prompted_skill/service_configs/dff-dream-persona-gpt-jt-prompted-skill" + "component": "components/uYkoK0vRp4bbIg9akI1yw.yml", + "service": "skills/dummy_skill/service_configs/agent" } }, - "dummy_skill": { + "dff_user_kg_skill": { "connector": { - "protocol": "python", - "class_name": "skills.dummy_skill.connector:DummySkillConnector" + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-user-kg-skill:8028/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:utt_sentrewrite_modified_last_dialog", + "dialog_formatter": "state_formatters.dp_formatters:dff_user_kg_skill_formatter", "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", "previous_services": [ "skill_selectors" @@ -479,17 +460,17 @@ "state_manager_method": "add_hypothesis", "is_enabled": true, "source": { - "component": "components/uYkoK0vRp4bbIg9akI1yw.yml", - "service": "skills/dummy_skill/service_configs/agent" + "component": "components/pmXHLkbPWpFqq37vtWCuS.yml", + "service": "skills/dff_user_kg_skill/service_configs/dff-user-kg-skill" } }, - "dff_template_skill": { + "dff_travel_italy_skill": { "connector": { "protocol": "http", "timeout": 2.0, - "url": "http://dff-template-skill:8120/respond" + "url": "http://dff-travel-italy-skill:8025/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:dff_template_skill_formatter", + "dialog_formatter": "state_formatters.dp_formatters:dff_travel_italy_skill_formatter", "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", "previous_services": [ "skill_selectors" @@ -497,8 +478,8 @@ "state_manager_method": "add_hypothesis", "is_enabled": true, "source": { - "component": "components/pC342KBWtheQNnXjIni6A.yml", - "service": "skills/dff_template_skill/service_configs/dff-template-skill" + "component": "components/FahT3rhiav1Bg2Q17wvE.yml", + "service": "skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill" } } }, @@ -507,9 +488,9 @@ "connector": { "protocol": "http", "timeout": 1.0, - "url": "http://convers-evaluation-selector:8009/respond" + "url": "http://ranking-based-response-selector:8002/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:full_history_dialog", + "dialog_formatter": "state_formatters.dp_formatters:cropped_dialog", "response_formatter": "state_formatters.dp_formatters:base_response_selector_formatter_service", "previous_services": [ "candidate_annotators" @@ -517,8 +498,8 @@ "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { - "component": "components/ly2AVNtIcJpTWz1qJ1mvKQ.yml", - "service": "response_selectors/convers_evaluation_based_selector/service_configs/convers-evaluation-selector" + "component": "components/YJzc7NwGrLmKp6gfZJh7X1.yml", + "service": "response_selectors/ranking_based_response_selector/service_configs/ranking-based-response-selector" } } } diff --git a/assistant_dists/dream_kg/proxy.yml b/assistant_dists/dream_kg/proxy.yml index 225f046fdc..df7eaa5c49 100644 --- a/assistant_dists/dream_kg/proxy.yml +++ b/assistant_dists/dream_kg/proxy.yml @@ -45,24 +45,6 @@ services: - PROXY_PASS=proxy.deeppavlov.ai:8103 - PORT=8103 - transformers-lm-gptjt: - command: ["nginx", "-g", "daemon off;"] - build: - context: dp/proxy/ - dockerfile: Dockerfile - environment: - - PROXY_PASS=proxy.deeppavlov.ai:8161 - - PORT=8161 - - dff-template-skill: - command: [ "nginx", "-g", "daemon off;" ] - build: - context: dp/proxy/ - dockerfile: Dockerfile - environment: - - PROXY_PASS=proxy.deeppavlov.ai:8120 - - PORT=8120 - property-extraction: command: [ "nginx", "-g", "daemon off;" ] build: diff --git a/assistant_dists/dream_multimodal/cpu.yml b/assistant_dists/dream_multimodal/cpu.yml index d073539763..4347af0c4d 100644 --- a/assistant_dists/dream_multimodal/cpu.yml +++ b/assistant_dists/dream_multimodal/cpu.yml @@ -12,3 +12,7 @@ services: environment: DEVICE: cpu CUDA_VISIBLE_DEVICES: "" + fromage: + environment: + DEVICE: cpu + CUDA_VISIBLE_DEVICES: "" diff --git a/assistant_dists/dream_multimodal/dev.yml b/assistant_dists/dream_multimodal/dev.yml index 9697392b85..0f22f38f62 100644 --- a/assistant_dists/dream_multimodal/dev.yml +++ b/assistant_dists/dream_multimodal/dev.yml @@ -10,23 +10,23 @@ services: - 3000:3000 volumes: - "~/.deeppavlov/file_server:/tmp" - dff-program-y-skill: - volumes: - - "./skills/dff_program_y_skill:/src" - - "./common:/src/common" - ports: - - 8008:8008 sentseg: volumes: - "./annotators/SentSeg:/src" ports: - 8011:8011 - convers-evaluation-selector: + ranking-based-response-selector: volumes: - - "./response_selectors/convers_evaluation_based_selector:/src" + - "./response_selectors/ranking_based_response_selector:/src" - "./common:/src/common" ports: - - 8009:8009 + - 8002:8002 + dff-program-y-skill: + volumes: + - "./skills/dff_program_y_skill:/src" + - "./common:/src/common" + ports: + - 8008:8008 dff-intent-responder-skill: volumes: - "./skills/dff_intent_responder_skill:/src" @@ -46,11 +46,6 @@ services: - "./common:/src/common" ports: - 8018:8018 - spelling-preprocessing: - volumes: - - "./annotators/spelling_preprocessing:/src" - ports: - - 8074:8074 dialogpt: volumes: - "./services/dialogpt:/src" @@ -64,16 +59,17 @@ services: - "~/.deeppavlov/cache:/root/.cache" ports: - 8128:8128 - image-captioning: + fromage: volumes: - - "./services/image_captioning:/src" - - "~/.deeppavlov/file_server:/tmp" + - "./services/fromage:/src" + - "./common:/src/common" + - "~/.deeppavlov/cache:/root/.cache" ports: - - 8123:8123 - dff-image-skill: + - 8069:8069 + dff-fromage-image-skill: volumes: - - "./skills/dff_image_skill:/src" + - "./skills/dff_fromage_image_skill:/src" - "./common:/src/common" ports: - - 8124:8124 + - 8070:8070 version: "3.7" diff --git a/assistant_dists/dream_multimodal/docker-compose.override.yml b/assistant_dists/dream_multimodal/docker-compose.override.yml index 737527c8b8..22ca32ac3d 100644 --- a/assistant_dists/dream_multimodal/docker-compose.override.yml +++ b/assistant_dists/dream_multimodal/docker-compose.override.yml @@ -2,9 +2,10 @@ services: agent: command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_multimodal/pipeline_conf.json' environment: - WAIT_HOSTS: "dff-program-y-skill:8008, sentseg:8011, convers-evaluation-selector:8009, + WAIT_HOSTS: "dff-program-y-skill:8008, sentseg:8011, ranking-based-response-selector:8002, dff-intent-responder-skill:8012, intent-catcher:8014, badlisted-words:8018, - spelling-preprocessing:8074, dialogpt:8125, sentence-ranker:8128, image-captioning:8123, dff-image-skill:8124" + dialogpt:8125, sentence-ranker:8128, + fromage:8069, dff-fromage-image-skill:8070" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1200} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 @@ -15,26 +16,33 @@ services: files: image: julienmeerschart/simple-file-upload-download-server - dff-program-y-skill: - env_file: [.env] + ranking-based-response-selector: + env_file: [ .env ] build: args: - SERVICE_PORT: 8008 - SERVICE_NAME: dff_program_y_skill + SERVICE_PORT: 8002 + SERVICE_NAME: response_selector LANGUAGE: EN + SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + SENTENCE_RANKER_TIMEOUT: 3 + N_UTTERANCES_CONTEXT: 5 + FILTER_TOXIC_OR_BADLISTED: 1 + FALLBACK_FILE: fallbacks_dream_en.json context: . - dockerfile: ./skills/dff_program_y_skill/Dockerfile - command: gunicorn --workers=1 server:app -b 0.0.0.0:8008 --reload + dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8002 + environment: + - FLASK_APP=server deploy: resources: limits: - memory: 1024M + memory: 100M reservations: - memory: 1024M - + memory: 100M sentseg: - env_file: [.env] + env_file: [ .env ] build: context: ./annotators/SentSeg/ command: flask run -h 0.0.0.0 -p 8011 @@ -47,41 +55,22 @@ services: reservations: memory: 1.5G - convers-evaluation-selector: + dff-program-y-skill: env_file: [.env] build: args: - TAG_BASED_SELECTION: 1 - CALL_BY_NAME_PROBABILITY: 0.5 - PROMPT_PROBA: 0.1 - ACKNOWLEDGEMENT_PROBA: 0.3 - PRIORITIZE_WITH_REQUIRED_ACT: 0 - PRIORITIZE_NO_DIALOG_BREAKDOWN: 0 - PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 0 - IGNORE_DISLIKED_SKILLS: 0 - GREETING_FIRST: 1 - RESTRICTION_FOR_SENSITIVE_CASE: 1 - PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 1 - MAX_TURNS_WITHOUT_SCRIPTS: 7 - ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 1 - PRIORITIZE_SCRIPTED_SKILLS: 0 - CONFIDENCE_STRENGTH: 0.8 - CONV_EVAL_STRENGTH: 0.4 - PRIORITIZE_HUMAN_INITIATIVE: 1 - QUESTION_TO_QUESTION_DOWNSCORE_COEF: 0.8 + SERVICE_PORT: 8008 + SERVICE_NAME: dff_program_y_skill LANGUAGE: EN - FALLBACK_FILE: fallbacks_dream_en.json context: . - dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile - command: flask run -h 0.0.0.0 -p 8009 - environment: - - FLASK_APP=server + dockerfile: ./skills/dff_program_y_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8008 --reload deploy: resources: limits: - memory: 256M + memory: 1024M reservations: - memory: 256M + memory: 1024M dff-intent-responder-skill: env_file: [ .env ] @@ -137,23 +126,6 @@ services: reservations: memory: 256M - spelling-preprocessing: - env_file: [ .env ] - build: - args: - SERVICE_PORT: 8074 - SERVICE_NAME: spelling_preprocessing - context: ./annotators/spelling_preprocessing/ - command: flask run -h 0.0.0.0 -p 8074 - environment: - - FLASK_APP=server - deploy: - resources: - limits: - memory: 100M - reservations: - memory: 100M - dialogpt: env_file: [ .env ] build: @@ -196,39 +168,39 @@ services: reservations: memory: 3G - image-captioning: + fromage: env_file: [ .env ] build: args: - SERVICE_PORT: 8123 - SERVICE_NAME: image_captioning - context: ./services/image_captioning/ - command: flask run -h 0.0.0.0 -p 8123 + SERVICE_PORT: 8069 + SERVICE_NAME: fromage + RET_SCALE_FACTOR: 0 + context: . + dockerfile: ./services/fromage/Dockerfile + command: flask run -h 0.0.0.0 -p 8069 environment: - CUDA_VISIBLE_DEVICES=0 - FLASK_APP=server deploy: resources: limits: - memory: 5G + memory: 45G reservations: - memory: 5G + memory: 45G - dff-image-skill: + dff-fromage-image-skill: env_file: [.env] build: args: - SERVICE_PORT: 8124 - SERVICE_NAME: dff_image_skill - LANGUAGE: EN + SERVICE_PORT: 8070 + SERVICE_NAME: dff_fromage_image_skill context: . - dockerfile: ./skills/dff_image_skill/Dockerfile - command: gunicorn --workers=1 server:app -b 0.0.0.0:8124 --reload + dockerfile: ./skills/dff_fromage_image_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8070 --reload deploy: resources: limits: - memory: 1024M + memory: 100M reservations: - memory: 1024M - -version: '3.7' + memory: 100M +version: '3.7' \ No newline at end of file diff --git a/assistant_dists/dream_multimodal/pipeline_conf.json b/assistant_dists/dream_multimodal/pipeline_conf.json index dd121ae995..4dfd6ce2c7 100644 --- a/assistant_dists/dream_multimodal/pipeline_conf.json +++ b/assistant_dists/dream_multimodal/pipeline_conf.json @@ -58,22 +58,6 @@ } }, "annotators": { - "spelling_preprocessing": { - "connector": { - "protocol": "http", - "timeout": 1.0, - "url": "http://spelling-preprocessing:8074/respond" - }, - "dialog_formatter": "state_formatters.dp_formatters:last_utt_dialog", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [], - "state_manager_method": "add_annotation_and_reset_human_attributes_for_first_turn", - "is_enabled": true, - "source": { - "component": "components/pGxj32ic41pvquRXUdqc7A.yml", - "service": "annotators/spelling_preprocessing/service_configs/spelling-preprocessing" - } - }, "sentseg": { "connector": { "protocol": "http", @@ -82,9 +66,7 @@ }, "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "annotators.spelling_preprocessing" - ], + "previous_services": [], "state_manager_method": "add_annotation", "is_enabled": true, "source": { @@ -100,9 +82,7 @@ }, "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "annotators.spelling_preprocessing" - ], + "previous_services": [], "state_manager_method": "add_annotation", "is_enabled": true, "source": { @@ -119,7 +99,6 @@ "dialog_formatter": "state_formatters.dp_formatters:last_utt_sentseg_segments_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "previous_services": [ - "annotators.spelling_preprocessing", "annotators.sentseg" ], "state_manager_method": "add_annotation", @@ -129,19 +108,19 @@ "service": "annotators/IntentCatcherTransformers/service_configs/intent-catcher" } }, - "image_captioning": { + "fromage": { "connector": { "protocol": "http", - "timeout": 3.0, - "url": "http://image-captioning:8123/respond" + "timeout": 90.0, + "url": "http://fromage:8069/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:image_captioning_formatter", + "dialog_formatter": "state_formatters.dp_formatters:fromage_formatter", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "state_manager_method": "add_annotation", "is_enabled": true, "source": { - "component": "components/f2hECzXDTTfljm5mweoMFA.yml", - "service": "services/image_captioning/service_configs/image-captioning" + "component": "components/8iHHdjsnfhewkl.yml", + "service": "services/fromage/service_configs/fromage" } } }, @@ -314,13 +293,13 @@ "service": "services/dialogpt/service_configs/dialogpt" } }, - "dff_image_skill": { + "dff_fromage_image_skill": { "connector": { "protocol": "http", "timeout": 2.0, - "url": "http://dff-image-skill:8124/respond" + "url": "http://dff-fromage-image-skill:8070/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:dff_image_skill_formatter", + "dialog_formatter": "state_formatters.dp_formatters:dff_fromage_image_skill_formatter", "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", "previous_services": [ "skill_selectors" @@ -328,8 +307,8 @@ "state_manager_method": "add_hypothesis", "is_enabled": true, "source": { - "component": "components/unB9cRBV3MRGqKtrbQDPA.yml", - "service": "skills/dff_image_skill/service_configs/dff-image-skill" + "component": "components/8jfFjmYnbdeH.yml", + "service": "services/dff_fromage_image_skill/service_configs/dff-fromage-image-skill" } } }, @@ -338,9 +317,9 @@ "connector": { "protocol": "http", "timeout": 1.0, - "url": "http://convers-evaluation-selector:8009/respond" + "url": "http://ranking-based-response-selector:8002/respond" }, - "dialog_formatter": "state_formatters.dp_formatters:full_history_dialog", + "dialog_formatter": "state_formatters.dp_formatters:cropped_dialog", "response_formatter": "state_formatters.dp_formatters:base_response_selector_formatter_service", "previous_services": [ "candidate_annotators" @@ -348,8 +327,8 @@ "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { - "component": "components/ly2AVNtIcJpTWz1qJ1mvKQ.yml", - "service": "response_selectors/convers_evaluation_based_selector/service_configs/convers-evaluation-selector" + "component": "components/YJzc7NwGrLmKp6gfZJh7X1.yml", + "service": "response_selectors/ranking_based_response_selector/service_configs/ranking-based-response-selector" } } } diff --git a/assistant_dists/dream_multimodal/proxy.yml b/assistant_dists/dream_multimodal/proxy.yml index e494238b2d..4e99cfed89 100644 --- a/assistant_dists/dream_multimodal/proxy.yml +++ b/assistant_dists/dream_multimodal/proxy.yml @@ -35,5 +35,4 @@ services: environment: - PROXY_PASS=proxy.deeppavlov.ai:8128 - PORT=8128 - version: '3.7' diff --git a/assistant_dists/dream_multimodal/test.yml b/assistant_dists/dream_multimodal/test.yml new file mode 100644 index 0000000000..4d7f75eed4 --- /dev/null +++ b/assistant_dists/dream_multimodal/test.yml @@ -0,0 +1,44 @@ +services: + agent: + volumes: + - "/cephfs/home/ignatov/artifacts:/output" + ports: + - ${AGENT_PORT}:4242 + mongo: + command: mongod + image: mongo:4.0.0 + files: + volumes: + - "~/.deeppavlov/file_server:/tmp" + ranking-based-response-selector: + volumes: + - "./response_selectors/ranking_based_response_selector:/src" + - "./common:/src/common" + sentseg: + dff-intent-responder-skill: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + dff-fromage-image-skill: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + intent-catcher: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + badlisted-words: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + fromage: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=0 + sentence-ranker: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" +version: "3.7" \ No newline at end of file diff --git a/assistant_dists/dream_multimodal/test_4.yml b/assistant_dists/dream_multimodal/test_4.yml new file mode 100644 index 0000000000..166c854119 --- /dev/null +++ b/assistant_dists/dream_multimodal/test_4.yml @@ -0,0 +1,44 @@ +services: + agent: + volumes: + - "/cephfs/home/ignatov/artifacts:/output" + ports: + - ${AGENT_PORT}:4242 + mongo: + command: mongod + image: mongo:4.0.0 + files: + volumes: + - "~/.deeppavlov/file_server:/tmp" + ranking-based-response-selector: + volumes: + - "./response_selectors/ranking_based_response_selector:/src" + - "./common:/src/common" + sentseg: + dff-intent-responder-skill: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + dff-fromage-image-skill: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + intent-catcher: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + badlisted-words: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + fromage: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=0,1,2,3 + sentence-ranker: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" +version: "3.7" \ No newline at end of file diff --git a/assistant_dists/dream_ocean/cpu.yml b/assistant_dists/dream_ocean/cpu.yml new file mode 100644 index 0000000000..d90f6d159e --- /dev/null +++ b/assistant_dists/dream_ocean/cpu.yml @@ -0,0 +1,27 @@ +version: '3.7' +services: + ner: + environment: + DEVICE: cpu + CUDA_VISIBLE_DEVICES: "" + kbqa: + environment: + CUDA_VISIBLE_DEVICES: "" + combined-classification: + environment: + CUDA_VISIBLE_DEVICES: "" + text-qa: + environment: + CUDA_VISIBLE_DEVICES: "" + fact-retrieval: + environment: + CUDA_VISIBLE_DEVICES: "" + entity-detection: + environment: + CUDA_VISIBLE_DEVICES: "" + sentence-ranker: + environment: + CUDA_VISIBLE_DEVICES: "" + intent-catcher: + environment: + CUDA_VISIBLE_DEVICES: "" diff --git a/assistant_dists/dream_ocean/db_conf.json b/assistant_dists/dream_ocean/db_conf.json new file mode 100644 index 0000000000..a9ba6813f5 --- /dev/null +++ b/assistant_dists/dream_ocean/db_conf.json @@ -0,0 +1,6 @@ +{ + "host": "DB_HOST", + "port": "DB_PORT", + "name": "DB_NAME", + "env": true +} \ No newline at end of file diff --git a/assistant_dists/dream_ocean/dev.yml b/assistant_dists/dream_ocean/dev.yml new file mode 100644 index 0000000000..212a907368 --- /dev/null +++ b/assistant_dists/dream_ocean/dev.yml @@ -0,0 +1,147 @@ +# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода +services: + agent: + volumes: + - ".:/dp-agent" + ports: + - 4242:4242 + + sentseg: + volumes: + - "./annotators/SentSeg:/src" + ports: + - 8011:8011 + ranking-based-response-selector: + volumes: + - "./response_selectors/ranking_based_response_selector:/src" + - "./common:/src/common" + ports: + - 8002:8002 + dff-intent-responder-skill: + volumes: + - "./skills/dff_intent_responder_skill:/src" + - "./common:/src/common" + ports: + - 8012:8012 + intent-catcher: + volumes: + - "./annotators/IntentCatcherTransformers:/src" + - "./common:/src/common" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8014:8014 + ner: + volumes: + - './annotators/NER_deeppavlov:/src' + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8021:8021 + factoid-qa: + volumes: + - "./skills/factoid_qa:/src" + - "./common:/src/common" + ports: + - 8071:8071 + kbqa: + volumes: + - "./annotators/kbqa:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8072:8072 + entity-linking: + volumes: + - "./annotators/entity_linking:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8075:8075 + wiki-parser: + volumes: + - "./annotators/wiki_parser:/src" + - "./common:/src/common" + ports: + - 8077:8077 + mongo: + ports: + - 27017:27017 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + text-qa: + volumes: + - "./services/text_qa:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8078:8078 + combined-classification: + volumes: + - "./common:/src/common" + - "./annotators/combined_classification:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8087:8087 + fact-retrieval: + volumes: + - "./annotators/fact_retrieval:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "./common:/src/common" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8100:8100 + entity-detection: + volumes: + - "./annotators/entity_detection:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8103:8103 + sentence-ranker: + volumes: + - "./services/sentence_ranker:/src" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8128:8128 + prompt-selector: + volumes: + - "./annotators/prompt_selector:/src" + - "./common:/src/common" + ports: + - 8135:8135 + openai-api-chatgpt: + volumes: + - "./services/openai_api_lm:/src" + - "./common:/src/common" + ports: + - 8145:8145 + property-extraction: + volumes: + - "./annotators/property_extraction:/src" + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8136:8136 + summarization-annotator: + volumes: + - "./annotators/summarization_annotator:/src" + ports: + - 8058:8058 + dialog-summarizer: + volumes: + - "./services/dialog_summarizer:/src" + ports: + - 8059:8059 + openai-api-chatgpt-16k: + volumes: + - "./services/openai_api_lm:/src" + - "./common:/src/common" + ports: + - 8167:8167 + personality-detection: + volumes: + - "./annotators/personality_detection:/src" + ports: + - 8026:8026 +version: "3.7" diff --git a/assistant_dists/dream_ocean/docker-compose.override.yml b/assistant_dists/dream_ocean/docker-compose.override.yml new file mode 100644 index 0000000000..60f24f08b7 --- /dev/null +++ b/assistant_dists/dream_ocean/docker-compose.override.yml @@ -0,0 +1,444 @@ +services: + agent: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_ocean/pipeline_conf.json agent.db_config=assistant_dists/dream_ocean/db_conf.json' + environment: + WAIT_HOSTS: "sentseg:8011, ranking-based-response-selector:8002, + dff-intent-responder-skill:8012, intent-catcher:8014, ner:8021, + factoid-qa:8071, kbqa:8072, entity-linking:8075, wiki-parser:8077, text-qa:8078, + combined-classification:8087, fact-retrieval:8100, entity-detection:8103, + sentence-ranker:8128, property-extraction:8136, prompt-selector:8135, openai-api-chatgpt:8145, + openai-api-chatgpt-16k:8167, summarization-annotator:8058, dialog-summarizer:8059, personality-detection:8026" + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: EN + FALLBACK_FILE: fallbacks_dream_en.json + + ranking-based-response-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8002 + SERVICE_NAME: response_selector + LANGUAGE: EN + SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + SENTENCE_RANKER_TIMEOUT: 3 + N_UTTERANCES_CONTEXT: 5 + FILTER_TOXIC_OR_BADLISTED: 1 + FALLBACK_FILE: fallbacks_dream_en.json + context: . + dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8002 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentseg: + env_file: [ .env ] + build: + context: ./annotators/SentSeg/ + command: flask run -h 0.0.0.0 -p 8011 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 1.5G + reservations: + memory: 1.5G + + dff-intent-responder-skill: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8012 + SERVICE_NAME: dff_intent_responder_skill + INTENT_RESPONSE_PHRASES_FNAME: intent_response_phrases.json + context: . + dockerfile: ./skills/dff_intent_responder_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8012 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + intent-catcher: + env_file: [ .env ] + build: + context: . + dockerfile: ./annotators/IntentCatcherTransformers/Dockerfile + args: + SERVICE_PORT: 8014 + CONFIG_NAME: intents_model_dp_config.json + INTENT_PHRASES_PATH: intent_phrases.json + command: python -m flask run -h 0.0.0.0 -p 8014 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 3.5G + reservations: + memory: 3.5G + + ner: + env_file: [ .env ] + build: + args: + CONFIG: ner_case_agnostic_multilingual_bert_base_extended.json + SERVICE_PORT: 8021 + SRC_DIR: annotators/NER_deeppavlov + COMMIT: f5117cd9ad1e64f6c2d970ecaa42fc09ccb23144 + context: ./ + dockerfile: annotators/NER_deeppavlov/Dockerfile + command: flask run -h 0.0.0.0 -p 8021 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + tty: true + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + factoid-qa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8071 + SERVICE_NAME: factoid_qa + context: . + dockerfile: ./skills/factoid_qa/Dockerfile + command: flask run -h 0.0.0.0 -p 8071 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + entity-linking: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8075 + SERVICE_NAME: entity_linking + CONFIG: entity_linking_eng.json + SRC_DIR: annotators/entity_linking + context: ./ + dockerfile: annotators/entity_linking/Dockerfile + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + wiki-parser: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8077 + SERVICE_NAME: wiki_parser + WIKI_LITE_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata2022.hdt + WIKI_LITE_INDEX_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata2022.hdt.index.v1-1 + WIKI_CACHE_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata_cache.json + CONFIG: wiki_parser.json + SRC_DIR: annotators/wiki_parser + COMMIT: ff5b156d16a949c3ec99da7fb60ae907dec37a41 + FAST: 1 + context: ./ + dockerfile: annotators/wiki_parser/Dockerfile + command: flask run -h 0.0.0.0 -p 8077 + environment: + - CUDA_VISIBLE_DEVICES='' + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + text-qa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8078 + SERVICE_NAME: text_qa + CONFIG: qa_eng.json + context: services/text_qa + dockerfile: Dockerfile + command: flask run -h 0.0.0.0 -p 8078 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + - LANGUAGE=EN + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + kbqa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8072 + SERVICE_NAME: kbqa + CONFIG: kbqa_cq_mt_bert_lite.json + SRC_DIR: annotators/kbqa/ + COMMIT: 283a25e322e8fedc6ff0c159e4ec76bb165ae405 + context: ./ + dockerfile: annotators/kbqa/Dockerfile + command: flask run -h 0.0.0.0 -p 8072 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 5G + reservations: + memory: 5G + + combined-classification: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8087 + SERVICE_NAME: combined_classification + CONFIG: combined_classifier.json + context: . + dockerfile: ./annotators/combined_classification/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8087 --timeout 600 + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + fact-retrieval: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8100 + SERVICE_NAME: fact_retrieval + CONFIG: configs/fact_retrieval_page.json + CONFIG_WIKI: configs/page_extractor.json + CONFIG_WHOW: configs/whow_page_extractor.json + SRC_DIR: annotators/fact_retrieval/ + COMMIT: 4b3e60c407644b750c9dc292ac6bf206081fb9d0 + N_FACTS: 3 + context: ./ + dockerfile: annotators/fact_retrieval/Dockerfile + command: flask run -h 0.0.0.0 -p 8100 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + + entity-detection: + env_file: [ .env ] + build: + args: + SERVICE_NAME: entity_detection + SEQ_TAG_CONFIG: wikipedia_entity_detection_distilbert.json + CONFIG: entity_detection_eng.json + LOWERCASE: 1 + SERVICE_PORT: 8103 + SRC_DIR: annotators/entity_detection/ + FINEGRAINED: 0 + context: ./ + dockerfile: annotators/entity_detection/Dockerfile + command: flask run -h 0.0.0.0 -p 8103 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + prompt-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8135 + SERVICE_NAME: prompt_selector + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + N_SENTENCES_TO_RETURN: 3 + PROMPTS_TO_CONSIDER: dream_persona,dream_faq + context: . + dockerfile: ./annotators/prompt_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8135 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentence-ranker: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8128 + SERVICE_NAME: sentence_ranker + PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2 + context: ./services/sentence_ranker/ + command: flask run -h 0.0.0.0 -p 8128 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + openai-api-chatgpt: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8145 + SERVICE_NAME: openai_api_chatgpt + PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8145 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 500M + reservations: + memory: 100M + + property-extraction: + env_file: [.env] + build: + args: + CONFIG_T5: t5_generative_ie_lite_infer.json + CONFIG_REL_RANKER: rel_ranking_roberta.json + SERVICE_PORT: 8136 + SRC_DIR: annotators/property_extraction/ + SERVICE_NAME: property_extraction + context: ./ + dockerfile: annotators/property_extraction/Dockerfile + command: flask run -h 0.0.0.0 -p 8136 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 7G + reservations: + memory: 7G + + openai-api-chatgpt-16k: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8167 + SERVICE_NAME: openai_api_chatgpt_16k + PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8167 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 500M + reservations: + memory: 100M + + summarization-annotator: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8058 + SERVICE_NAME: summarization_annotator + SUMMARIZATION_REQUEST_TIMEOUT: 10 + context: ./annotators/summarization_annotator/ + command: flask run -h 0.0.0.0 -p 8058 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + dialog-summarizer: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8059 + SERVICE_NAME: dialog_summarizer + PRETRAINED_MODEL_NAME: "knkarthick/MEETING_SUMMARY" + context: ./services/dialog_summarizer/ + command: flask run -h 0.0.0.0 -p 8059 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + + personality-detection: + build: + args: + SERVICE_PORT: 8026 + SERVICE_NAME: personality_detection + context: annotators/personality_detection + command: uvicorn server:app --host 0.0.0.0 --port 8026 + deploy: + mode: replicated + replicas: 1 + resources: + limits: + memory: 312M + reservations: + memory: 312M + +version: '3.7' diff --git a/assistant_dists/dream_ocean/gpu1.yml b/assistant_dists/dream_ocean/gpu1.yml new file mode 100644 index 0000000000..acef2c59f5 --- /dev/null +++ b/assistant_dists/dream_ocean/gpu1.yml @@ -0,0 +1,78 @@ +services: + agent: + restart: unless-stopped + volumes: + - "/cephfs/home/ignatov/artifacts:/output" + - ".:/dp-agent" + ports: + - ${AGENT_PORT}:4242 + kbqa: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=5 + text-qa: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=5 + combined-classification: + restart: unless-stopped + environment: + - CUDA_VISIBLE_DEVICES=7 + mongo: + restart: unless-stopped + command: mongod + image: mongo:4.0.0 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + sentseg: + restart: unless-stopped + ranking-based-response-selector: + restart: unless-stopped + dff-intent-responder-skill: + restart: unless-stopped + intent-catcher: + restart: unless-stopped + environment: + - CUDA_VISIBLE_DEVICES=9 + ner: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=7 + factoid-qa: + restart: unless-stopped + entity-linking: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + wiki-parser: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + fact-retrieval: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=8 + entity-detection: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=9 + sentence-ranker: + restart: unless-stopped + environment: + - CUDA_VISIBLE_DEVICES=9 + property-extraction: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" +version: '3.7' diff --git a/assistant_dists/dream_ocean/pipeline_conf.json b/assistant_dists/dream_ocean/pipeline_conf.json new file mode 100644 index 0000000000..6e77e5faec --- /dev/null +++ b/assistant_dists/dream_ocean/pipeline_conf.json @@ -0,0 +1,523 @@ +{ + "connectors": { + "sentseg": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "ner": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + } + }, + "services": { + "last_chance_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Sorry, something went wrong inside. Please tell me, what did you say.", + "annotations": { + "sentseg": { + "punct_sent": "Sorry, something went wrong inside. Please tell me, what did you say.", + "segments": [ + "Sorry, something went wrong inside.", + "Please tell me, what did you say." + ] + }, + "ner": [ + [] + ] + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "last_chance" + ], + "is_enabled": true, + "source": { + "component": "components/sbDcAqiNqxFz.yml", + "service": "services/agent_services/service_configs/dream" + } + }, + "timeout_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Sorry, I need to think more on that. Let's talk about something else.", + "annotations": { + "sentseg": { + "punct_sent": "Sorry, I need to think more on that. Let's talk about something else.", + "segments": [ + "Sorry, I need to think more on that.", + "Let's talk about something else." + ] + }, + "ner": [ + [] + ] + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "timeout" + ], + "is_enabled": true, + "source": { + "component": "components/rFC0YJOoDFvS.yml", + "service": "services/agent_services/service_configs/dream" + } + }, + "annotators": { + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/gM4fEjvVqLlSRRRkQfds2g.yml", + "service": "annotators/SentSeg/service_configs/sentseg" + } + }, + "personality_detection": { + "connector": { + "protocol": "http", + "timeout": 3.0, + "url": "http://personality-detection:8026/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:personality_catcher_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "state_manager_method": "add_annotation", + "previous_services": [], + "is_enabled": true, + "source": { + "component": "components/OowqncqowNAbj.yml", + "service": "annotators/personality_detection/service_configs/personality_detection" + } + }, + "prompt_goals_collector": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://prompt-selector:8135/collect_goals" + }, + "dialog_formatter": "state_formatters.dp_formatters:prompts_goals_collector_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "update_attributes", + "is_enabled": true, + "source": { + "component": "components/tK0hTk4TyMj7.yml", + "service": "annotators/prompt_selector/service_configs/dream_persona_openai_prompted" + } + }, + "prompt_selector": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://prompt-selector:8135/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:context_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.prompt_goals_collector" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/tK0hTk4TyMj7.yml", + "service": "annotators/prompt_selector/service_configs/dream_persona_openai_prompted" + } + }, + "intent_catcher": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://intent-catcher:8014/detect" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_utt_sentseg_segments_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/1IjC3r9b1VJ082ceINXzHQ.yml", + "service": "annotators/IntentCatcherTransformers/service_configs/intent-catcher" + } + }, + "fact_retrieval": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://fact-retrieval:8100/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:fact_retrieval_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg", + "annotators.entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/sVjXygxsPhjLEWd2acwcEA.yml", + "service": "annotators/fact_retrieval/service_configs/fact-retrieval" + } + }, + "ner": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + }, + "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", + "service": "annotators/NER_deeppavlov/service_configs/ner" + } + }, + "entity_detection": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://entity-detection:8103/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:entity_detection_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/05PqJXVd7gV7DqslN5z3A.yml", + "service": "annotators/entity_detection/service_configs/entity-detection" + } + }, + "kbqa": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://kbqa:8072/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:kbqa_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "required_previous_services": [ + "annotators.entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/3clxaNOTpI3oHR0fHRaCnQ.yml", + "service": "annotators/kbqa/service_configs/kbqa" + } + }, + "entity_linking": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://entity-linking:8075/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:el_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.ner", + "annotators.entity_detection", + "annotators.spacy_nounphrases" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/M1sE6hOm20EGBWBdr0vIOw.yml", + "service": "annotators/entity_linking/service_configs/entity-linking" + } + }, + "wiki_parser": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://wiki-parser:8077/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:wp_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "required_previous_services": [ + "annotators.entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/O4FVnkAwjay1mL1FbuRGWw.yml", + "service": "annotators/wiki_parser/service_configs/wiki-parser" + } + }, + "combined_classification": { + "connector": { + "protocol": "http", + "timeout": 3.0, + "url": "http://combined-classification:8087/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog_w_hist", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/PbLNvh4hrvs47rPaf2bfYQ.yml", + "service": "annotators/combined_classification/service_configs/combined-classification" + } + }, + "summarization_annotator": { + "connector": { + "protocol": "http", + "timeout": 10.0, + "url": "http://summarization-annotator:8058/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:summarization_annotator_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing" + ], + "state_manager_method": "update_attributes", + "is_enabled": true, + "source": { + "component": "components/riRfdGz86P51B9bL7fO6JR.yml", + "service": "annotators/summarization_annotator/service_configs/summarization-annotator" + } + } + }, + "response_annotators": { + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_bot_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_annotator_selectors" + ], + "state_manager_method": "add_annotation_prev_bot_utt", + "is_enabled": true, + "source": { + "component": "components/1Q9QXih1U2zhCpVm9zxdsA.yml", + "service": "annotators/SentSeg/service_configs/sentseg" + } + }, + "ner": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + }, + "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_last_bot_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_annotator_selectors", + "response_annotators.sentseg" + ], + "state_manager_method": "add_annotation_prev_bot_utt", + "is_enabled": true, + "source": { + "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", + "service": "annotators/NER_deeppavlov/service_configs/ner" + } + } + }, + "response_annotator_selectors": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.post_annotator_selector.connector:PostAnnotatorSelectorConnector", + "annotator_names": [ + "sentseg", + "ner" + ] + }, + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/LXrJDIf43gwNmPMNXG5Eg.yml", + "service": "services/response_annotator_selectors/service_configs/agent" + } + }, + "candidate_annotators": { + "entity_detection": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://entity-detection:8103/respond_batch" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/05PqJXVd7gV7DqslN5z3A.yml", + "service": "annotators/entity_detection/service_configs/entity-detection" + } + }, + "combined_classification": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://combined-classification:8087/batch_model" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypothesis_histories_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/PbLNvh4hrvs47rPaf2bfYQ.yml", + "service": "annotators/combined_classification/service_configs/combined-classification" + } + }, + "sentence_ranker": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://sentence-ranker:8128/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:sentence_ranker_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/XGwmAHtAOu0NDqqG3QCJw.yml", + "service": "services/sentence_ranker/service_configs/sentence-ranker" + } + } + }, + "skill_selectors": { + "description_based_skill_selector": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.description_based_skill_selector.connector:DescriptionBasedSkillSelectorConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:base_skill_selector_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators" + ], + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/dfsw4bji8bgjq2.yml", + "service": "skill_selectors/description_based_skill_selector/service_configs/agent" + } + } + }, + "skills": { + "dff_intent_responder_skill": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-intent-responder-skill:8012/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_intent_responder_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/CmQGq1Xn5YOaMwNIb4bEpA.yml", + "service": "skills/dff_intent_responder_skill/service_configs/dff-intent-responder-skill" + } + }, + "dummy_skill": { + "connector": { + "protocol": "python", + "class_name": "skills.dummy_skill.connector:DummySkillConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentrewrite_modified_last_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/uYkoK0vRp4bbIg9akI1yw.yml", + "service": "skills/dummy_skill/service_configs/agent" + } + }, + "factoid_qa": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://factoid-qa:8071/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentseg_punct_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/qx0j5QHAzog0b39nRnuA.yml", + "service": "skills/factoid_qa/service_configs/factoid-qa" + } + } + }, + "response_selectors": { + "response_selector": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://ranking-based-response-selector:8002/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:cropped_dialog", + "response_formatter": "state_formatters.dp_formatters:base_response_selector_formatter_service", + "previous_services": [ + "candidate_annotators" + ], + "state_manager_method": "add_bot_utterance", + "is_enabled": true, + "source": { + "component": "components/YJzc7NwGrLmKp6gfZJh7X1.yml", + "service": "response_selectors/ranking_based_response_selector/service_configs/ranking-based-response-selector" + } + } + } + }, + "metadata": { + "display_name": "Dream", + "author": "DeepPavlov", + "description": "Main version of DeepPavlov Dream Socialbot", + "version": "0.1.0", + "date_created": "2022-12-12T12:12:00", + "ram_usage": "20 GB", + "gpu_usage": "20 GB", + "disk_usage": "20 GB" + } +} \ No newline at end of file diff --git a/assistant_dists/dream_ocean/proxy.yml b/assistant_dists/dream_ocean/proxy.yml new file mode 100644 index 0000000000..754609de10 --- /dev/null +++ b/assistant_dists/dream_ocean/proxy.yml @@ -0,0 +1,128 @@ +services: + + sentseg: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8011 + - PORT=8011 + + dff-intent-responder-skill: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8012 + - PORT=8012 + + intent-catcher: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8014 + - PORT=8014 + + ner: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8021 + - PORT=8021 + + factoid-qa: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8071 + - PORT=8071 + + text-qa: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8078 + - PORT=8078 + + kbqa: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8072 + - PORT=8072 + + entity-linking: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8075 + - PORT=8075 + + wiki-parser: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8077 + - PORT=8077 + + combined-classification: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8087 + - PORT=8087 + + fact-retrieval: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8100 + - PORT=8100 + + entity-detection: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8103 + - PORT=8103 + + sentence-ranker: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8128 + - PORT=8128 + + property-extraction: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8136 + - PORT=8136 +version: '3.7' diff --git a/assistant_dists/dream_ocean/telegram.yml b/assistant_dists/dream_ocean/telegram.yml new file mode 100644 index 0000000000..f332be24a6 --- /dev/null +++ b/assistant_dists/dream_ocean/telegram.yml @@ -0,0 +1,17 @@ +services: + agent-tg: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_ocean/pipeline_conf.json agent.db_config=assistant_dists/dream_ocean/db_conf.json' + env_file: [.env] + build: + context: ./ + dockerfile: dockerfile_agent + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 2G + volumes: + - ".:/dp-agent" + +version: '3.7' diff --git a/assistant_dists/dream_ocean/test.yml b/assistant_dists/dream_ocean/test.yml new file mode 100644 index 0000000000..5035cb6254 --- /dev/null +++ b/assistant_dists/dream_ocean/test.yml @@ -0,0 +1,74 @@ +services: + agent: + volumes: + - "/cephfs/home/ignatov/artifacts:/output" + ports: + - ${AGENT_PORT}:4242 + kbqa: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=5 + text-qa: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=5 + combined-classification: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=8 + mongo: + command: mongod + image: mongo:4.0.0 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + sentseg: + dff-intent-responder-skill: + intent-catcher: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=6 + ner: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=7 + factoid-qa: + entity-linking: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=9 + wiki-parser: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + fact-retrieval: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=7 + entity-detection: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=8 + sentence-ranker: + volumes: + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=9 + property-extraction: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" +version: '3.7' diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/cpu.yml b/assistant_dists/dream_ranking_and_midas_based_dm/cpu.yml new file mode 100644 index 0000000000..d90f6d159e --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/cpu.yml @@ -0,0 +1,27 @@ +version: '3.7' +services: + ner: + environment: + DEVICE: cpu + CUDA_VISIBLE_DEVICES: "" + kbqa: + environment: + CUDA_VISIBLE_DEVICES: "" + combined-classification: + environment: + CUDA_VISIBLE_DEVICES: "" + text-qa: + environment: + CUDA_VISIBLE_DEVICES: "" + fact-retrieval: + environment: + CUDA_VISIBLE_DEVICES: "" + entity-detection: + environment: + CUDA_VISIBLE_DEVICES: "" + sentence-ranker: + environment: + CUDA_VISIBLE_DEVICES: "" + intent-catcher: + environment: + CUDA_VISIBLE_DEVICES: "" diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/db_conf.json b/assistant_dists/dream_ranking_and_midas_based_dm/db_conf.json new file mode 100644 index 0000000000..a9ba6813f5 --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/db_conf.json @@ -0,0 +1,6 @@ +{ + "host": "DB_HOST", + "port": "DB_PORT", + "name": "DB_NAME", + "env": true +} \ No newline at end of file diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/dev.yml b/assistant_dists/dream_ranking_and_midas_based_dm/dev.yml new file mode 100644 index 0000000000..37a460fd0d --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/dev.yml @@ -0,0 +1,160 @@ +# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода +services: + agent: + volumes: + - ".:/dp-agent" + ports: + - 4242:4242 + + sentseg: + volumes: + - "./annotators/SentSeg:/src" + ports: + - 8011:8011 + ranking-based-response-selector: + volumes: + - "./response_selectors/ranking_based_response_selector:/src" + - "./common:/src/common" + ports: + - 8002:8002 + dff-intent-responder-skill: + volumes: + - "./skills/dff_intent_responder_skill:/src" + - "./common:/src/common" + ports: + - 8012:8012 + intent-catcher: + volumes: + - "./annotators/IntentCatcherTransformers:/src" + - "./common:/src/common" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8014:8014 + ner: + volumes: + - './annotators/NER_deeppavlov:/src' + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8021:8021 + factoid-qa: + volumes: + - "./skills/factoid_qa:/src" + - "./common:/src/common" + ports: + - 8071:8071 + kbqa: + volumes: + - "./annotators/kbqa:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8072:8072 + entity-linking: + volumes: + - "./annotators/entity_linking:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8075:8075 + wiki-parser: + volumes: + - "./annotators/wiki_parser:/src" + - "./common:/src/common" + ports: + - 8077:8077 + mongo: + ports: + - 27017:27017 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + text-qa: + volumes: + - "./services/text_qa:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8078:8078 + combined-classification: + volumes: + - "./common:/src/common" + - "./annotators/combined_classification:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8087:8087 + fact-retrieval: + volumes: + - "./annotators/fact_retrieval:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "./common:/src/common" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8100:8100 + entity-detection: + volumes: + - "./annotators/entity_detection:/src" + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8103:8103 + sentence-ranker: + volumes: + - "./services/sentence_ranker:/src" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8128:8128 + prompt-selector: + volumes: + - "./annotators/prompt_selector:/src" + - "./common:/src/common" + ports: + - 8135:8135 + openai-api-chatgpt: + volumes: + - "./services/openai_api_lm:/src" + - "./common:/src/common" + ports: + - 8145:8145 + dff-dream-persona-chatgpt-prompted-skill: + volumes: + - "./skills/dff_template_prompted_skill:/src" + - "./common:/src/common" + ports: + - 8137:8137 + dff-google-api-skill: + volumes: + - "./skills/dff_google_api_skill:/src" + - "./common:/src/common" + ports: + - 8162:8162 + property-extraction: + volumes: + - "./annotators/property_extraction:/src" + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8136:8136 + dff-dream-faq-prompted-skill: + volumes: + - "./skills/dff_template_prompted_skill:/src" + - "./common:/src/common" + ports: + - 8170:8170 + summarization-annotator: + volumes: + - "./annotators/summarization_annotator:/src" + ports: + - 8058:8058 + dialog-summarizer: + volumes: + - "./services/dialog_summarizer:/src" + ports: + - 8059:8059 + openai-api-chatgpt-16k: + volumes: + - "./services/openai_api_lm:/src" + - "./common:/src/common" + ports: + - 8167:8167 +version: "3.7" diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/docker-compose.override.yml b/assistant_dists/dream_ranking_and_midas_based_dm/docker-compose.override.yml new file mode 100644 index 0000000000..86c76cd2d3 --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/docker-compose.override.yml @@ -0,0 +1,485 @@ +services: + agent: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json' + environment: + WAIT_HOSTS: "sentseg:8011, ranking-and-intent-based-response-selector:8081, + dff-intent-responder-skill:8012, intent-catcher:8014, ner:8021, + factoid-qa:8071, kbqa:8072, entity-linking:8075, wiki-parser:8077, text-qa:8078, + combined-classification:8087, fact-retrieval:8100, entity-detection:8103, + sentence-ranker:8128, property-extraction:8136, prompt-selector:8135, openai-api-chatgpt:8145, + dff-dream-persona-chatgpt-prompted-skill:8137, dff-dream-faq-prompted-skill:8170, + openai-api-chatgpt-16k:8167, summarization-annotator:8058, dialog-summarizer:8059" + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: EN + FALLBACK_FILE: fallbacks_dream_en.json + + ranking-based-response-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8081 + SERVICE_NAME: response_selector + LANGUAGE: EN + SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + SENTENCE_RANKER_TIMEOUT: 3 + N_UTTERANCES_CONTEXT: 5 + FILTER_TOXIC_OR_BADLISTED: 1 + FALLBACK_FILE: fallbacks_dream_en.json + context: . + dockerfile: ./response_selectors/ranking_and_intent_based_response_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8081 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentseg: + env_file: [ .env ] + build: + context: ./annotators/SentSeg/ + command: flask run -h 0.0.0.0 -p 8011 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 1.5G + reservations: + memory: 1.5G + + dff-intent-responder-skill: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8012 + SERVICE_NAME: dff_intent_responder_skill + INTENT_RESPONSE_PHRASES_FNAME: intent_response_phrases.json + context: . + dockerfile: ./skills/dff_intent_responder_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8012 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + intent-catcher: + env_file: [ .env ] + build: + context: . + dockerfile: ./annotators/IntentCatcherTransformers/Dockerfile + args: + SERVICE_PORT: 8014 + CONFIG_NAME: intents_model_dp_config.json + INTENT_PHRASES_PATH: intent_phrases.json + command: python -m flask run -h 0.0.0.0 -p 8014 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 3.5G + reservations: + memory: 3.5G + + ner: + env_file: [ .env ] + build: + args: + CONFIG: ner_case_agnostic_multilingual_bert_base_extended.json + SERVICE_PORT: 8021 + SRC_DIR: annotators/NER_deeppavlov + COMMIT: f5117cd9ad1e64f6c2d970ecaa42fc09ccb23144 + context: ./ + dockerfile: annotators/NER_deeppavlov/Dockerfile + command: flask run -h 0.0.0.0 -p 8021 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + tty: true + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + factoid-qa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8071 + SERVICE_NAME: factoid_qa + context: . + dockerfile: ./skills/factoid_qa/Dockerfile + command: flask run -h 0.0.0.0 -p 8071 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + entity-linking: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8075 + SERVICE_NAME: entity_linking + CONFIG: entity_linking_eng.json + SRC_DIR: annotators/entity_linking + context: ./ + dockerfile: annotators/entity_linking/Dockerfile + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + wiki-parser: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8077 + SERVICE_NAME: wiki_parser + WIKI_LITE_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata2022.hdt + WIKI_LITE_INDEX_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata2022.hdt.index.v1-1 + WIKI_CACHE_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata_cache.json + CONFIG: wiki_parser.json + SRC_DIR: annotators/wiki_parser + COMMIT: ff5b156d16a949c3ec99da7fb60ae907dec37a41 + FAST: 1 + context: ./ + dockerfile: annotators/wiki_parser/Dockerfile + command: flask run -h 0.0.0.0 -p 8077 + environment: + - CUDA_VISIBLE_DEVICES='' + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + text-qa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8078 + SERVICE_NAME: text_qa + CONFIG: qa_eng.json + context: services/text_qa + dockerfile: Dockerfile + command: flask run -h 0.0.0.0 -p 8078 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + - LANGUAGE=EN + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + kbqa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8072 + SERVICE_NAME: kbqa + CONFIG: kbqa_cq_mt_bert_lite.json + SRC_DIR: annotators/kbqa/ + COMMIT: 283a25e322e8fedc6ff0c159e4ec76bb165ae405 + context: ./ + dockerfile: annotators/kbqa/Dockerfile + command: flask run -h 0.0.0.0 -p 8072 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 5G + reservations: + memory: 5G + + combined-classification: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8087 + SERVICE_NAME: combined_classification + CONFIG: combined_classifier.json + context: . + dockerfile: ./annotators/combined_classification/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8087 --timeout 600 + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + fact-retrieval: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8100 + SERVICE_NAME: fact_retrieval + CONFIG: configs/fact_retrieval_page.json + CONFIG_WIKI: configs/page_extractor.json + CONFIG_WHOW: configs/whow_page_extractor.json + SRC_DIR: annotators/fact_retrieval/ + COMMIT: 4b3e60c407644b750c9dc292ac6bf206081fb9d0 + N_FACTS: 3 + context: ./ + dockerfile: annotators/fact_retrieval/Dockerfile + command: flask run -h 0.0.0.0 -p 8100 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + + entity-detection: + env_file: [ .env ] + build: + args: + SERVICE_NAME: entity_detection + SEQ_TAG_CONFIG: wikipedia_entity_detection_distilbert.json + CONFIG: entity_detection_eng.json + LOWERCASE: 1 + SERVICE_PORT: 8103 + SRC_DIR: annotators/entity_detection/ + FINEGRAINED: 0 + context: ./ + dockerfile: annotators/entity_detection/Dockerfile + command: flask run -h 0.0.0.0 -p 8103 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + prompt-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8135 + SERVICE_NAME: prompt_selector + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + N_SENTENCES_TO_RETURN: 3 + PROMPTS_TO_CONSIDER: dream_persona,dream_faq + context: . + dockerfile: ./annotators/prompt_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8135 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentence-ranker: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8128 + SERVICE_NAME: sentence_ranker + PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2 + context: ./services/sentence_ranker/ + command: flask run -h 0.0.0.0 -p 8128 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + openai-api-chatgpt: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8145 + SERVICE_NAME: openai_api_chatgpt + PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8145 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 500M + reservations: + memory: 100M + + dff-dream-persona-chatgpt-prompted-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8137 + SERVICE_NAME: dff_dream_persona_prompted_skill + PROMPT_FILE: common/prompts/dream_persona.json + GENERATIVE_SERVICE_URL: http://openai-api-chatgpt:8145/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json + GENERATIVE_TIMEOUT: 120 + N_UTTERANCES_CONTEXT: 7 + ENVVARS_TO_SEND: OPENAI_API_KEY,OPENAI_ORGANIZATION + context: . + dockerfile: ./skills/dff_template_prompted_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + dff-google-api-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8162 + SERVICE_NAME: dff_google_api_skill + ENVVARS_TO_SEND: OPENAI_API_KEY,GOOGLE_CSE_ID,GOOGLE_API_KEY + context: . + dockerfile: ./skills/dff_google_api_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + property-extraction: + env_file: [.env] + build: + args: + CONFIG: t5_generative_ie_lite_infer.json + SERVICE_PORT: 8136 + SRC_DIR: annotators/property_extraction/ + SERVICE_NAME: property_extraction + context: ./ + dockerfile: annotators/property_extraction/Dockerfile + command: flask run -h 0.0.0.0 -p 8136 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 7G + reservations: + memory: 7G + + dff-dream-faq-prompted-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8170 + SERVICE_NAME: dff_dream_faq_prompted_skill + PROMPT_FILE: common/prompts/dream_faq.json + GENERATIVE_SERVICE_URL: http://openai-api-chatgpt-16k:8167/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json + GENERATIVE_TIMEOUT: 120 + N_UTTERANCES_CONTEXT: 7 + ENVVARS_TO_SEND: OPENAI_API_KEY,OPENAI_ORGANIZATION + context: . + dockerfile: ./skills/dff_template_prompted_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + openai-api-chatgpt-16k: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8167 + SERVICE_NAME: openai_api_chatgpt_16k + PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8167 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 500M + reservations: + memory: 100M + + summarization-annotator: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8058 + SERVICE_NAME: summarization_annotator + SUMMARIZATION_REQUEST_TIMEOUT: 10 + context: ./annotators/summarization_annotator/ + command: flask run -h 0.0.0.0 -p 8058 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + dialog-summarizer: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8059 + SERVICE_NAME: dialog_summarizer + PRETRAINED_MODEL_NAME: "knkarthick/MEETING_SUMMARY" + context: ./services/dialog_summarizer/ + command: flask run -h 0.0.0.0 -p 8059 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + +version: '3.7' diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/gpu1.yml b/assistant_dists/dream_ranking_and_midas_based_dm/gpu1.yml new file mode 100644 index 0000000000..acef2c59f5 --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/gpu1.yml @@ -0,0 +1,78 @@ +services: + agent: + restart: unless-stopped + volumes: + - "/cephfs/home/ignatov/artifacts:/output" + - ".:/dp-agent" + ports: + - ${AGENT_PORT}:4242 + kbqa: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=5 + text-qa: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=5 + combined-classification: + restart: unless-stopped + environment: + - CUDA_VISIBLE_DEVICES=7 + mongo: + restart: unless-stopped + command: mongod + image: mongo:4.0.0 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + sentseg: + restart: unless-stopped + ranking-based-response-selector: + restart: unless-stopped + dff-intent-responder-skill: + restart: unless-stopped + intent-catcher: + restart: unless-stopped + environment: + - CUDA_VISIBLE_DEVICES=9 + ner: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=7 + factoid-qa: + restart: unless-stopped + entity-linking: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + wiki-parser: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + fact-retrieval: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=8 + entity-detection: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=9 + sentence-ranker: + restart: unless-stopped + environment: + - CUDA_VISIBLE_DEVICES=9 + property-extraction: + restart: unless-stopped + volumes: + - "~/.deeppavlov:/root/.deeppavlov" +version: '3.7' diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json b/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json new file mode 100644 index 0000000000..649e44d14a --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json @@ -0,0 +1,570 @@ +{ + "connectors": { + "sentseg": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "ner": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + } + }, + "services": { + "last_chance_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Sorry, something went wrong inside. Please tell me, what did you say.", + "annotations": { + "sentseg": { + "punct_sent": "Sorry, something went wrong inside. Please tell me, what did you say.", + "segments": [ + "Sorry, something went wrong inside.", + "Please tell me, what did you say." + ] + }, + "ner": [ + [] + ] + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "last_chance" + ], + "is_enabled": true, + "source": { + "component": "components/skjdfhow389rhuweih1982ehbjdfh.yml", + "service": "services/agent_services/service_configs/dream_ranking_and_midas_based_dm" + } + }, + "timeout_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Sorry, I need to think more on that. Let's talk about something else.", + "annotations": { + "sentseg": { + "punct_sent": "Sorry, I need to think more on that. Let's talk about something else.", + "segments": [ + "Sorry, I need to think more on that.", + "Let's talk about something else." + ] + }, + "ner": [ + [] + ] + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "timeout" + ], + "is_enabled": true, + "source": { + "component": "components/dciuh4ikfjg43jhwefoi.yml", + "service": "services/agent_services/service_configs/dream_ranking_and_midas_based_dm" + } + }, + "annotators": { + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/gM4fEjvVqLlSRRRkQfds2g.yml", + "service": "annotators/SentSeg/service_configs/sentseg" + } + }, + "prompt_goals_collector": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://prompt-selector:8135/collect_goals" + }, + "dialog_formatter": "state_formatters.dp_formatters:prompts_goals_collector_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "update_attributes", + "is_enabled": true, + "source": { + "component": "components/tK0hTk4TyMj7.yml", + "service": "annotators/prompt_selector/service_configs/dream_persona_openai_prompted" + } + }, + "prompt_selector": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://prompt-selector:8135/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:context_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.prompt_goals_collector" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/tK0hTk4TyMj7.yml", + "service": "annotators/prompt_selector/service_configs/dream_persona_openai_prompted" + } + }, + "intent_catcher": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://intent-catcher:8014/detect" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_utt_sentseg_segments_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/1IjC3r9b1VJ082ceINXzHQ.yml", + "service": "annotators/IntentCatcherTransformers/service_configs/intent-catcher" + } + }, + "fact_retrieval": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://fact-retrieval:8100/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:fact_retrieval_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg", + "annotators.entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/sVjXygxsPhjLEWd2acwcEA.yml", + "service": "annotators/fact_retrieval/service_configs/fact-retrieval" + } + }, + "ner": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + }, + "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", + "service": "annotators/NER_deeppavlov/service_configs/ner" + } + }, + "entity_detection": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://entity-detection:8103/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:entity_detection_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/05PqJXVd7gV7DqslN5z3A.yml", + "service": "annotators/entity_detection/service_configs/entity-detection" + } + }, + "kbqa": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://kbqa:8072/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:kbqa_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "required_previous_services": [ + "annotators.entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/3clxaNOTpI3oHR0fHRaCnQ.yml", + "service": "annotators/kbqa/service_configs/kbqa" + } + }, + "entity_linking": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://entity-linking:8075/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:el_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.ner", + "annotators.entity_detection", + "annotators.spacy_nounphrases" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/M1sE6hOm20EGBWBdr0vIOw.yml", + "service": "annotators/entity_linking/service_configs/entity-linking" + } + }, + "wiki_parser": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://wiki-parser:8077/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:wp_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "required_previous_services": [ + "annotators.entity_linking" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/O4FVnkAwjay1mL1FbuRGWw.yml", + "service": "annotators/wiki_parser/service_configs/wiki-parser" + } + }, + "combined_classification": { + "connector": { + "protocol": "http", + "timeout": 3.0, + "url": "http://combined-classification:8087/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog_w_hist", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/PbLNvh4hrvs47rPaf2bfYQ.yml", + "service": "annotators/combined_classification/service_configs/combined-classification" + } + }, + "summarization_annotator": { + "connector": { + "protocol": "http", + "timeout": 10.0, + "url": "http://summarization-annotator:8058/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:summarization_annotator_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing" + ], + "state_manager_method": "update_attributes", + "is_enabled": true, + "source": { + "component": "components/riRfdGz86P51B9bL7fO6JR.yml", + "service": "annotators/summarization_annotator/service_configs/summarization-annotator" + } + } + }, + "response_annotators": { + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_bot_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_annotator_selectors" + ], + "state_manager_method": "add_annotation_prev_bot_utt", + "is_enabled": true, + "source": { + "component": "components/1Q9QXih1U2zhCpVm9zxdsA.yml", + "service": "annotators/SentSeg/service_configs/sentseg" + } + }, + "ner": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner:8021/ner" + }, + "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_last_bot_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_annotator_selectors", + "response_annotators.sentseg" + ], + "state_manager_method": "add_annotation_prev_bot_utt", + "is_enabled": true, + "source": { + "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", + "service": "annotators/NER_deeppavlov/service_configs/ner" + } + } + }, + "response_annotator_selectors": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.post_annotator_selector.connector:PostAnnotatorSelectorConnector", + "annotator_names": [ + "sentseg", + "ner" + ] + }, + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/LXrJDIf43gwNmPMNXG5Eg.yml", + "service": "services/response_annotator_selectors/service_configs/agent" + } + }, + "candidate_annotators": { + "entity_detection": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://entity-detection:8103/respond_batch" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/05PqJXVd7gV7DqslN5z3A.yml", + "service": "annotators/entity_detection/service_configs/entity-detection" + } + }, + "combined_classification": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://combined-classification:8087/batch_model" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypothesis_histories_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/PbLNvh4hrvs47rPaf2bfYQ.yml", + "service": "annotators/combined_classification/service_configs/combined-classification" + } + }, + "sentence_ranker": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://sentence-ranker:8128/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:sentence_ranker_formatter", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/XGwmAHtAOu0NDqqG3QCJw.yml", + "service": "services/sentence_ranker/service_configs/sentence-ranker" + } + } + }, + "skill_selectors": { + "description_based_skill_selector": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.description_based_skill_selector.connector:DescriptionBasedSkillSelectorConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:base_skill_selector_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators" + ], + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/dfsw4bji8bgjq2.yml", + "service": "skill_selectors/description_based_skill_selector/service_configs/agent" + } + } + }, + "skills": { + "dff_dream_persona_prompted_skill": { + "connector": { + "protocol": "http", + "timeout": 120.0, + "url": "http://dff-dream-persona-chatgpt-prompted-skill:8137/respond" + }, + "dialog_formatter": { + "name": "state_formatters.dp_formatters:dff_prompted_skill_formatter", + "skill_name": "dff_dream_persona_prompted_skill" + }, + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/W6hdAGshQyMwdQukRXXuKA.yml", + "service": "skills/dff_template_prompted_skill/service_configs/dff-dream-persona-chatgpt-prompted-skill" + } + }, + "dff_google_api_skill": { + "connector": { + "protocol": "http", + "timeout": 120.0, + "url": "http://dff-google-api-skill:8162/respond" + }, + "dialog_formatter": { + "name": "state_formatters.dp_formatters:dff_prompted_skill_formatter", + "skill_name": "dff_google_api_skill" + }, + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/VJ7c3sLqEi.yml", + "service": "skills/dff_google_api_skill/service_configs/dff-google-api-skill" + } + }, + "dff_intent_responder_skill": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-intent-responder-skill:8012/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_intent_responder_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/CmQGq1Xn5YOaMwNIb4bEpA.yml", + "service": "skills/dff_intent_responder_skill/service_configs/dff-intent-responder-skill" + } + }, + "dummy_skill": { + "connector": { + "protocol": "python", + "class_name": "skills.dummy_skill.connector:DummySkillConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentrewrite_modified_last_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/uYkoK0vRp4bbIg9akI1yw.yml", + "service": "skills/dummy_skill/service_configs/agent" + } + }, + "factoid_qa": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://factoid-qa:8071/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentseg_punct_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/qx0j5QHAzog0b39nRnuA.yml", + "service": "skills/factoid_qa/service_configs/factoid-qa" + } + }, + "dff_dream_faq_prompted_skill": { + "connector": { + "protocol": "http", + "timeout": 120.0, + "url": "http://dff-dream-faq-prompted-skill:8170/respond" + }, + "dialog_formatter": { + "name": "state_formatters.dp_formatters:dff_prompted_skill_formatter", + "skill_name": "dff_dream_faq_prompted_skill" + }, + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/jFmKPqMJh0.yml", + "service": "skills/dff_template_prompted_skill/service_configs/dff-dream-faq-prompted-skill" + } + } + }, + "response_selectors": { + "response_selector": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://ranking-and-intent-based-response-selector:8081/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:cropped_dialog", + "response_formatter": "state_formatters.dp_formatters:base_response_selector_formatter_service", + "previous_services": [ + "candidate_annotators" + ], + "state_manager_method": "add_bot_utterance", + "is_enabled": true, + "source": { + "component": "components/ksDjnfoiwur902hriwnefkwfi2.yml", + "service": "response_selectors/ranking_and_intent_based_response_selector/service_configs/ranking-and-intent-based-response-selector" + } + } + } + }, + "metadata": { + "display_name": "Dream with Ranking- and MIDAS-based DM", + "author": "DeepPavlov", + "description": "DeepPavlov Dream Distribution with Ranking- and MIDAS-based Dialog Management", + "version": "0.1.0", + "date_created": "2022-12-12T12:12:00", + "ram_usage": "20 GB", + "gpu_usage": "20 GB", + "disk_usage": "20 GB" + } +} \ No newline at end of file diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/proxy.yml b/assistant_dists/dream_ranking_and_midas_based_dm/proxy.yml new file mode 100644 index 0000000000..754609de10 --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/proxy.yml @@ -0,0 +1,128 @@ +services: + + sentseg: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8011 + - PORT=8011 + + dff-intent-responder-skill: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8012 + - PORT=8012 + + intent-catcher: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8014 + - PORT=8014 + + ner: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8021 + - PORT=8021 + + factoid-qa: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8071 + - PORT=8071 + + text-qa: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8078 + - PORT=8078 + + kbqa: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8072 + - PORT=8072 + + entity-linking: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8075 + - PORT=8075 + + wiki-parser: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8077 + - PORT=8077 + + combined-classification: + command: ["nginx", "-g", "daemon off;"] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8087 + - PORT=8087 + + fact-retrieval: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8100 + - PORT=8100 + + entity-detection: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8103 + - PORT=8103 + + sentence-ranker: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8128 + - PORT=8128 + + property-extraction: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8136 + - PORT=8136 +version: '3.7' diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/telegram.yml b/assistant_dists/dream_ranking_and_midas_based_dm/telegram.yml new file mode 100644 index 0000000000..7166693ea7 --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/telegram.yml @@ -0,0 +1,17 @@ +services: + agent-tg: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json agent.db_config=assistant_dists/dream_ranking_and_midas_based_dm/db_conf.json' + env_file: [.env] + build: + context: ./ + dockerfile: dockerfile_agent + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 2G + volumes: + - ".:/dp-agent" + +version: '3.7' diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/test.yml b/assistant_dists/dream_ranking_and_midas_based_dm/test.yml new file mode 100644 index 0000000000..5035cb6254 --- /dev/null +++ b/assistant_dists/dream_ranking_and_midas_based_dm/test.yml @@ -0,0 +1,74 @@ +services: + agent: + volumes: + - "/cephfs/home/ignatov/artifacts:/output" + ports: + - ${AGENT_PORT}:4242 + kbqa: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=5 + text-qa: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=5 + combined-classification: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=8 + mongo: + command: mongod + image: mongo:4.0.0 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + sentseg: + dff-intent-responder-skill: + intent-catcher: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=6 + ner: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + environment: + - CUDA_VISIBLE_DEVICES=7 + factoid-qa: + entity-linking: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=9 + wiki-parser: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + fact-retrieval: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=7 + entity-detection: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=8 + sentence-ranker: + volumes: + - "~/.deeppavlov/cache:/root/.cache" + environment: + - CUDA_VISIBLE_DEVICES=9 + property-extraction: + volumes: + - "~/.deeppavlov:/root/.deeppavlov" +version: '3.7' diff --git a/assistant_dists/dream_russian/cpu.yml b/assistant_dists/dream_russian/cpu.yml index 90d736db93..e0bfb705d5 100644 --- a/assistant_dists/dream_russian/cpu.yml +++ b/assistant_dists/dream_russian/cpu.yml @@ -13,7 +13,7 @@ services: sentseg-ru: environment: CUDA_VISIBLE_DEVICES: "" - toxic-classification-ru: + combined-classification-ru: environment: CUDA_VISIBLE_DEVICES: "" intent-catcher-ru: diff --git a/assistant_dists/dream_russian/dev.yml b/assistant_dists/dream_russian/dev.yml index da74533765..001d395a18 100644 --- a/assistant_dists/dream_russian/dev.yml +++ b/assistant_dists/dream_russian/dev.yml @@ -42,12 +42,13 @@ services: - "./common:/src/common" ports: - 8018:8018 - toxic-classification-ru: + combined-classification-ru: volumes: - - "./annotators/toxic_classification_ru:/src" + - "./annotators/combined_classification_ru:/src" - "~/.deeppavlov/cache:/root/.cache" + - "./common:/src/common" ports: - - 8118:8118 + - 8198:8198 ner-ru: volumes: - './annotators/NER_deeppavlov:/src' diff --git a/assistant_dists/dream_russian/docker-compose.override.yml b/assistant_dists/dream_russian/docker-compose.override.yml index eda745fd07..5ec5a43ae2 100644 --- a/assistant_dists/dream_russian/docker-compose.override.yml +++ b/assistant_dists/dream_russian/docker-compose.override.yml @@ -7,8 +7,8 @@ services: ner-ru:8021, personal-info-ru-skill:8030, sentseg-ru:8011, spelling-preprocessing-ru:8074, entity-linking-ru:8075, wiki-parser-ru:8077, dff-generative-ru-skill:8092, dff-friendship-ru-skill:8086, entity-detection-ru:8103, dialogpt-ru:8125, - dff-template-skill:8120, spacy-annotator-ru:8129, dialogrpt-ru:8122, toxic-classification-ru:8118, - fact-retrieval-ru:8110, text-qa-ru:8078, summarization-annotator:8058, rut5-summarizer:8060" + dff-template-skill:8120, spacy-annotator-ru:8129, dialogrpt-ru:8122, + fact-retrieval-ru:8110, text-qa-ru:8078, summarization-annotator:8058, rut5-summarizer:8060, combined-classification-ru:8198" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1200} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 @@ -141,25 +141,24 @@ services: reservations: memory: 128M - toxic-classification-ru: + combined-classification-ru: env_file: [ .env_ru ] build: - context: ./annotators/toxic_classification_ru/ args: - SERVICE_PORT: 8118 - PRETRAINED_MODEL_NAME_OR_PATH: s-nlp/russian_toxicity_classifier - LANGUAGE: RU - command: flask run -h 0.0.0.0 -p 8118 + SERVICE_PORT: 8198 + SERVICE_NAME: combined_classification_ru + CONFIG: combined_classifier_ru.json + context: . + dockerfile: ./annotators/combined_classification_ru/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8198 --timeout 600 environment: - CUDA_VISIBLE_DEVICES=0 - - FLASK_APP=server deploy: resources: limits: - memory: 3G + memory: 2G reservations: - memory: 3G - + memory: 2G ner-ru: env_file: [ .env_ru ] build: diff --git a/assistant_dists/dream_russian/pipeline_conf.json b/assistant_dists/dream_russian/pipeline_conf.json index b9e3dc3bc0..e5f8679cae 100644 --- a/assistant_dists/dream_russian/pipeline_conf.json +++ b/assistant_dists/dream_russian/pipeline_conf.json @@ -137,11 +137,11 @@ "service": "annotators/BadlistedWordsDetector_ru/service_configs/badlisted-words-ru" } }, - "toxic_classification": { + "combined_classification_ru": { "connector": { "protocol": "http", "timeout": 1.0, - "url": "http://toxic-classification-ru:8118/respond" + "url": "http://combined-classification-ru:8198/model" }, "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", @@ -151,8 +151,8 @@ "state_manager_method": "add_annotation", "is_enabled": true, "source": { - "component": "components/3DTP1sVdmFIBYrC8OjRYA.yml", - "service": "annotators/toxic_classification_ru/service_configs/toxic-classification-ru" + "component": "components/3dcc0944c3e0.yml", + "service": "annotators/combined_classification_ru/service_configs/combined-classification-ru" } }, "intent_catcher": { @@ -381,13 +381,13 @@ "service": "annotators/BadlistedWordsDetector_ru/service_configs/badlisted-words-ru" } }, - "toxic_classification": { + "combined_classification_ru": { "connector": { "protocol": "http", "timeout": 1.0, - "url": "http://toxic-classification-ru:8118/respond_batch" + "url": "http://combined-classification-ru:8198/batch_model" }, - "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", + "dialog_formatter": "state_formatters.dp_formatters:hypothesis_histories_list", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "previous_services": [ "skills" @@ -395,8 +395,8 @@ "state_manager_method": "add_hypothesis_annotation_batch", "is_enabled": true, "source": { - "component": "components/3DTP1sVdmFIBYrC8OjRYA.yml", - "service": "annotators/toxic_classification_ru/service_configs/toxic-classification-ru" + "component": "components/5eb485871be3.yml", + "service": "annotators/combined_classification_ru/service_configs/combined-classification-ru" } }, "entity_detection": { @@ -633,4 +633,4 @@ "gpu_usage": "50 GB", "disk_usage": "50 GB" } -} \ No newline at end of file +} diff --git a/assistant_dists/dream_russian/test.yml b/assistant_dists/dream_russian/test.yml index 20e2cda9d8..5ce984fadb 100644 --- a/assistant_dists/dream_russian/test.yml +++ b/assistant_dists/dream_russian/test.yml @@ -17,7 +17,7 @@ services: environment: - CUDA_VISIBLE_DEVICES=8 badlisted-words-ru: - toxic-classification-ru: + combined-classification-ru: volumes: - "~/.deeppavlov/cache:/root/.cache" environment: diff --git a/common/animals.py b/common/animals.py index 4277642529..389e267905 100644 --- a/common/animals.py +++ b/common/animals.py @@ -1,6 +1,6 @@ import re -from common.universal_templates import is_any_question_sentence_in_utterance, NOT_LIKE_PATTERN -from common.utils import get_topics, TOPIC_GROUPS, get_comet_conceptnet_annotations +from common import utils, universal_templates +from common.combined_classes import TOPIC_GROUPS LIKE_ANIMALS_REQUESTS = ["Do you like animals?"] HAVE_PETS_REQUESTS = ["Do you have pets?"] @@ -89,7 +89,7 @@ def animals_skill_was_proposed(prev_bot_utt): def check_about_animals(user_uttr): - found_topics = get_topics(user_uttr, probs=False, which="all") + found_topics = utils.get_topics(user_uttr, probs=False, which="all") if any([animal_topic in found_topics for animal_topic in TOPIC_GROUPS["animals"]]): return True elif re.findall(ANIMALS_FIND_TEMPLATE, user_uttr["text"]): @@ -100,7 +100,7 @@ def check_about_animals(user_uttr): def mentioned_animal(annotations): flag = False - conceptnet = get_comet_conceptnet_annotations({"annotations": annotations}) + conceptnet = utils.get_comet_conceptnet_annotations({"annotations": annotations}) for elem, triplets in conceptnet.items(): if "SymbolOf" in triplets: objects = triplets["SymbolOf"] @@ -131,7 +131,7 @@ def find_entity_by_types(annotations, types_to_find): def find_entity_conceptnet(annotations, types_to_find): - conceptnet = get_comet_conceptnet_annotations({"annotations": annotations}) + conceptnet = utils.get_comet_conceptnet_annotations({"annotations": annotations}) for elem, triplets in conceptnet.items(): if "SymbolOf" in triplets: objects = triplets["SymbolOf"] @@ -161,9 +161,9 @@ def stop_about_animals(user_uttr, shared_memory): found_animal_substr = re.findall(ANIMALS_FIND_TEMPLATE, user_uttr["text"]) is_stop = re.findall(r"(stop|shut|something else|change|don't want)", user_uttr["text"]) found_animal_wp = find_entity_by_types(annotations, {"Q55983715", "Q16521", "Q43577", "Q39367", "Q38547"}) - isq = is_any_question_sentence_in_utterance(user_uttr) + isq = universal_templates.is_any_question_sentence_in_utterance(user_uttr) user_ask = re.findall(r"ask (you )?(a )?question", user_uttr["text"], re.IGNORECASE) - dont_like = re.findall(NOT_LIKE_PATTERN, user_uttr["text"]) + dont_like = re.findall(universal_templates.NOT_LIKE_PATTERN, user_uttr["text"]) if ( ( isq diff --git a/common/art.py b/common/art.py index 89dd3b656f..9e4f80e9bc 100644 --- a/common/art.py +++ b/common/art.py @@ -1,5 +1,6 @@ import re -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils SUPER_CONFIDENCE = 1.0 @@ -12,7 +13,7 @@ def check_about_art(user_uttr): - found_topics = get_topics(user_uttr, probs=False, which="all") + found_topics = utils.get_topics(user_uttr, probs=False, which="all") if any([art_topic in found_topics for art_topic in TOPIC_GROUPS["art"]]): return True elif re.findall(ART_PATTERN, user_uttr["text"]): diff --git a/common/books.py b/common/books.py index 2e837d1f0d..98b9e6bd1e 100644 --- a/common/books.py +++ b/common/books.py @@ -1,5 +1,6 @@ import re -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils BOOK_SKILL_CHECK_PHRASE = "the last book" BOOK_SKILL_CHECK_PHRASE2 = "your favourite book" @@ -40,7 +41,7 @@ def book_skill_was_proposed(prev_bot_utt): def about_book(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([book_topic in found_topics for book_topic in TOPIC_GROUPS["books"]]): return True elif re.findall(BOOK_PATTERN, annotated_utterance["text"]): diff --git a/common/bot_persona.py b/common/bot_persona.py index 0ec4f99580..90b9a10a19 100644 --- a/common/bot_persona.py +++ b/common/bot_persona.py @@ -1,5 +1,5 @@ import re -from common.utils import join_sentences_in_or_pattern +from common.join_pattern import join_sentences_in_or_pattern YOUR_FAVORITE_COMPILED_PATTERN = re.compile( diff --git a/common/combined_classes.py b/common/combined_classes.py new file mode 100644 index 0000000000..bff4a1f242 --- /dev/null +++ b/common/combined_classes.py @@ -0,0 +1,216 @@ +combined_classes = { # ORDER MATTERS!!!! DO NOT CHANGE IT!!!! + "emotion_classification": ["anger", "fear", "joy", "disgust", "sadness", "surprise", "neutral"], + "sentiment_classification": ["positive", "neutral", "negative"], + "toxic_classification": [ + "identity_hate", + "insult", + "obscene", + "severe_toxic", + "sexual_explicit", + "threat", + "toxic", + "not_toxic", + ], + "factoid_classification": ["is_factoid", "is_conversational"], + "midas_classification": [ + "open_question_factual", + "open_question_opinion", + "open_question_personal", + "yes_no_question", + "clarifying_question", + "command", + "dev_command", + "appreciation", + "opinion", + "complaint", + "comment", + "statement", + "other_answers", + "pos_answer", + "neg_answer", + ], + "deeppavlov_topics": [ + "Food", + "Books&Literature", + "Music", + "Gadgets", + "Movies&Tv", + "Leisure", + "Beauty", + "Clothes", + "Travel", + "News", + "Art&Hobbies", + "Videogames", + "Job", + "Home&Design", + "Depression", + "Celebrities&Events", + "Politics", + "Toys&Games", + "Animals&Pets", + "PersonalTransport", + "Garden", + "Family&Relationships", + "Health&Medicine", + "Religion", + "ArtificialIntelligence", + "Finance", + "Space", + "Disasters", + "Science&Technology", + "Psychology", + "MassTransit", + "Education", + "Sports", + ], + "cobot_topics": [ + "Phatic", + "Other", + "Movies_TV", + "Music", + "SciTech", + "Literature", + "Travel_Geo", + "Celebrities", + "Games", + "Pets_Animals", + "Sports", + "Psychology", + "Religion", + "Weather_Time", + "Food_Drink", + "Politics", + "Sex_Profanity", + "Art_Event", + "Math", + "News", + "Entertainment", + "Fashion", + ], + "cobot_dialogact_topics": [ + "Other", + "Phatic", + "Entertainment_Movies", + "Entertainment_Books", + "Entertainment_General", + "Interactive", + "Entertainment_Music", + "Science_and_Technology", + "Sports", + "Politics", + "Inappropriate_Content", + ], + "cobot_dialogact_intents": [ + "Information_DeliveryIntent", + "General_ChatIntent", + "Information_RequestIntent", + "User_InstructionIntent", + "InteractiveIntent", + "Opinion_ExpressionIntent", + "OtherIntent", + "ClarificationIntent", + "Topic_SwitchIntent", + "Opinion_RequestIntent", + "Multiple_GoalsIntent", + ], + "topics_ru": [ + "музыка", + "еда, напитки и кулинария", + "новости", + "транспорт", + "погода", + "медиа и коммуникации", + "автомобили", + "общественный транспорт", + "литература", + "чтение", + "гаджеты", + "смартфоны", + "планшеты", + "электроника", + "кино", + "сериалы", + "тв", + "телевидение", + "красота и уход", + "косметология", + "одежда", + "путешествия", + "туризм", + "искусство", + "искусство и культура", + "видеоигры", + "работа", + "карьера", + "дом", + "дизайн", + "депрессия", + "знаменитости", + "политика", + "игрушки", + "настольные игры", + "животные", + "домашние животные", + "садоводство", + "растениеводство", + "семья", + "отношения", + "медицина", + "религия", + "искусственный интеллект", + "машинное обучение", + "финансы", + "космос", + "стихийные бедствия", + "наука", + "технологии", + "психология", + "образование", + "мода и стиль", + "история", + "налоги", + "любовь", + "война", + "деньги", + "физика", + "иностранные языки", + "юриспруденция", + "самолёты", + "покупки", + "криминал", + "кошки", + "собаки", + "философия", + "бизнес и менеджмент", + "математика", + "предпринимательство", + "спорт", + "фитнес", + "секспросвет", + "феминизм", + "секс", + "еда", + ], +} + +TOPIC_GROUPS = { + "food": ["Food", "Food_Drink", "еда, напитки и кулинария", "еда"], + "books": ["Entertainment_Books", "Literature", "Books&Literature", "литература", "чтение"], + "music": ["Music", "Entertainment_Music", "музыка"], + "news": ["News", "новости"], + "politics": ["Politics", "политика"], + "sports": ["Sports", "спорт", "фитнес"], + "religion": ["Religion"], + "movies": ["Entertainment_Movies", "Movies_TV", "Movies&Tv", "сериалы", "тв", "телевидение"], + "fashion": ["Clothes", "Fashion", "одежда", "мода и стиль"], + "travel": ["Travel", "Travel_Geo", "путешествия", "туризм"], + "celebrities": ["Celebrities", "Celebrities&Events", "знаменитости"], + "art": ["Art_Event", "Art&Hobbies", "искусство"], + "science": ["Science_and_Technology", "SciTech", "наука", "технологии"], + "entertainment": ["Entertainment", "Entertainment_General"], + "games": ["Games", "Toys&Games", "Videogames", "видеоигры", "игрушки", "настольные игры"], + "animals": ["Pets_Animals", "Animals&Pets", "кошки", "собаки"], + "sex": ["Sex_Profanity", "секс", "секспросвет"], + "weather": ["Weather_Time", "погода"], +} # The list can be expanded according to the topic list supported diff --git a/common/dff/integration/condition.py b/common/dff/integration/condition.py index 8aa0871a02..419599dfb8 100644 --- a/common/dff/integration/condition.py +++ b/common/dff/integration/condition.py @@ -11,6 +11,7 @@ import common.dff.integration.context as int_ctx from common.acknowledgements import GENERAL_ACKNOWLEDGEMENTS from common.constants import CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE +from common.wiki_skill import extract_entity from .facts_utils import provide_facts_request logger = logging.getLogger(__name__) @@ -285,3 +286,14 @@ def set_conf_and_can_cont_by_universal_policy(ctx: Context, actor: Actor): def facts(ctx, actor): return provide_facts_request(ctx, actor) + + +def has_entities(*args): + def has_entities_func(ctx: Context, actor: Actor) -> Context: + for f_type in args: + extracted_entity = extract_entity(ctx, f_type) + if extracted_entity: + return True + return False + + return has_entities_func diff --git a/common/food.py b/common/food.py index d22fb95b83..9052ae632f 100644 --- a/common/food.py +++ b/common/food.py @@ -1,6 +1,7 @@ import re from common.utils import join_sentences_in_or_pattern -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils OPINION_REQUESTS_ABOUT_FOOD = [ "Do you like cooking?", @@ -99,6 +100,11 @@ "ENTITY. Yummy! I love it too. ", ] +FAVORITE_FOOD_WORDS = ( + r"((favorite|favourite|delicious|appetizing|exquisite|savory|tasty|yummy|mouthwatering)" + r" food|meal|breakfast|lunch|dinner|branch|snack)" +) + def skill_trigger_phrases(): return TRIGGER_PHRASES @@ -109,7 +115,7 @@ def food_skill_was_proposed(prev_bot_utt): def about_food(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([food_topic in found_topics for food_topic in TOPIC_GROUPS["food"]]): return True elif re.findall(FOOD_COMPILED_PATTERN, annotated_utterance["text"]): diff --git a/common/gaming.py b/common/gaming.py index e5a75c2f4e..6daae21bb9 100644 --- a/common/gaming.py +++ b/common/gaming.py @@ -9,7 +9,8 @@ import sentry_sdk from common.inflect import engine from requests import RequestException -from common.utils import get_topics, TOPIC_GROUPS +from common import utils +from common.combined_classes import TOPIC_GROUPS VIDEO_GAME_WORDS_COMPILED_PATTERN = re.compile( @@ -32,7 +33,7 @@ def about_games(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([game_topic in found_topics for game_topic in TOPIC_GROUPS["games"]]): return True elif re.findall(VIDEO_GAME_WORDS_COMPILED_PATTERN, annotated_utterance["text"]): diff --git a/common/gossip.py b/common/gossip.py index c3091b8fde..71ff0cd580 100644 --- a/common/gossip.py +++ b/common/gossip.py @@ -1,7 +1,7 @@ import re import logging -from common.universal_templates import if_chat_about_particular_topic -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils, universal_templates logger = logging.getLogger(__name__) @@ -274,7 +274,7 @@ def skill_trigger_phrases(): def talk_about_gossip(human_utterance, bot_utterance): - user_lets_chat_about = if_chat_about_particular_topic( + user_lets_chat_about = universal_templates.if_chat_about_particular_topic( human_utterance, bot_utterance, compiled_pattern=GOSSIP_COMPILED_PATTERN ) flag = bool(user_lets_chat_about) @@ -334,7 +334,7 @@ def check_is_celebrity_mentioned(human_utterance): def about_celebrities(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([topic in found_topics for topic in TOPIC_GROUPS["celebrities"]]): return True elif re.findall(GOSSIP_COMPILED_PATTERN, annotated_utterance["text"]): diff --git a/common/join_pattern.py b/common/join_pattern.py new file mode 100644 index 0000000000..38116345e9 --- /dev/null +++ b/common/join_pattern.py @@ -0,0 +1,10 @@ +def join_words_in_or_pattern(words): + return r"(" + r"|".join([r"\b%s\b" % word for word in words]) + r")" + + +def join_word_beginnings_in_or_pattern(words): + return r"(" + r"|".join([r"\b%s" % word for word in words]) + r")" + + +def join_sentences_in_or_pattern(sents): + return r"(" + r"|".join(sents) + r")" diff --git a/common/link.py b/common/link.py index 15fceef044..ad5b74449b 100644 --- a/common/link.py +++ b/common/link.py @@ -7,6 +7,7 @@ from copy import deepcopy from random import choice, choices +from common import utils # Importing before skills to avoid circular import import common.animals as dff_animals_skill import common.books as books import common.emotion as emotion @@ -21,7 +22,6 @@ import common.sport as dff_sport_skill import common.travel as dff_travel_skill from common.constants import CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE, CAN_CONTINUE_PROMPT, MUST_CONTINUE -from common.utils import get_not_used_template from common.response_selection import COMPLETELY_CHANGING_THE_SUBJECT_PHRASES, CHANGE_TOPIC_SUBJECT, BY_THE_WAY # Each common skill module should define +skill_trigger_phrases()+ function @@ -286,13 +286,13 @@ def get_prelinkto_connection(from_skill, to_skill, used_templates): skill_pair = sorted([from_skill, to_skill]) for el in PRELINKTO_CONNECTION_PHRASES: if el.get("skill_pair") == skill_pair and el.get("phrases"): - return get_not_used_template(used_templates, el["phrases"]) + return utils.get_not_used_template(used_templates, el["phrases"]) return "" def get_prelinkto_topic_connection(to_skill, used_templates): if to_skill in PRELINKTO_TOPIC_PHRASES: - return get_not_used_template(used_templates, PRELINKTO_TOPIC_PHRASES[to_skill]) + return utils.get_not_used_template(used_templates, PRELINKTO_TOPIC_PHRASES[to_skill]) return "" @@ -309,7 +309,7 @@ def compose_linkto_with_connection_phrase(skills, human_attributes, recent_activ if not connection: # not found prelinkto connection phrase AND not found prelinkto topic phrase - connection = get_not_used_template( + connection = utils.get_not_used_template( human_attributes.get("prelinkto_connections", []), COMPLETELY_CHANGING_THE_SUBJECT_PHRASES ) diff --git a/common/movies.py b/common/movies.py index c46f69cd59..65a83c1285 100644 --- a/common/movies.py +++ b/common/movies.py @@ -2,7 +2,8 @@ from random import choice from common.fact_retrieval import topic_types -from common.utils import get_entities, get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils MOVIE_SKILL_CHECK_PHRASE = "the recent movie" @@ -222,7 +223,7 @@ def extract_movies_names_from_annotations(annotated_uttr, check_full_utterance=F movies_titles = None if "entity_detection" in annotated_uttr["annotations"]: movies_titles = [] - entities = get_entities(annotated_uttr, only_named=False, with_labels=True) + entities = utils.get_entities(annotated_uttr, only_named=False, with_labels=True) for ent in entities: if ent.get("label", "") == "videoname": movies_titles += [ent["text"]] @@ -248,7 +249,7 @@ def extract_movies_names_from_annotations(annotated_uttr, check_full_utterance=F def about_movies(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([topic in found_topics for topic in TOPIC_GROUPS["movies"]]): return True elif re.findall(MOVIE_COMPILED_PATTERN, annotated_utterance["text"]): diff --git a/common/music.py b/common/music.py index bb86839a39..20f9b17fc0 100644 --- a/common/music.py +++ b/common/music.py @@ -1,5 +1,6 @@ import re -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils OPINION_REQUESTS_ABOUT_MUSIC = [ "What kind of music do you like?", @@ -31,7 +32,7 @@ def skill_trigger_phrases(): def about_music(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([music_topic in found_topics for music_topic in TOPIC_GROUPS["music"]]): return True elif re.findall(MUSIC_COMPILED_PATTERN, annotated_utterance["text"]): diff --git a/common/news.py b/common/news.py index 368466dff1..5c2dc3be8e 100644 --- a/common/news.py +++ b/common/news.py @@ -5,7 +5,7 @@ from os import getenv import sentry_sdk -from common.utils import is_yes, get_entities +from common import utils sentry_sdk.init(getenv("SENTRY_DSN")) @@ -41,7 +41,7 @@ def skill_trigger_phrases(): def is_breaking_news_requested(prev_bot_utt, user_utt): if OFFER_BREAKING_NEWS.lower() in prev_bot_utt.get("text", "").lower(): - if is_yes(user_utt): + if utils.is_yes(user_utt): return True return False @@ -144,7 +144,7 @@ def extract_topics(curr_uttr): Returns: list of mentioned entities/nounphrases """ - entities = get_entities(curr_uttr, only_named=True, with_labels=False) + entities = utils.get_entities(curr_uttr, only_named=True, with_labels=False) entities = [ent.lower() for ent in entities] entities = [ ent @@ -152,7 +152,7 @@ def extract_topics(curr_uttr): if not (ent == "alexa" and curr_uttr["text"].lower()[:5] == "alexa") and "news" not in ent ] if len(entities) == 0: - for ent in get_entities(curr_uttr, only_named=False, with_labels=False): + for ent in utils.get_entities(curr_uttr, only_named=False, with_labels=False): if ent.lower() not in BANNED_UNIGRAMS and "news" not in ent.lower(): if ent in entities: pass diff --git a/common/response_selection.py b/common/response_selection.py index 3a61debebe..3486f8be5a 100644 --- a/common/response_selection.py +++ b/common/response_selection.py @@ -27,6 +27,8 @@ "dff_gaming_skill", "dff_science_skill", "dff_gossip_skill", + "dff_user_kg_skill", + "dff_travel_italy_skill", "small_talk_skill", "dff_wiki_skill", "dff_art_skill", @@ -42,7 +44,12 @@ "seq2seq_persona_based", ] CAN_NOT_BE_DISLIKED_SKILLS = ["meta_script_skill", "personal_info_skill"] -NOT_ADD_PROMPT_SKILLS = ["alexa_handler", "dff_intent_responder_skill", "misheard_asr", "dff_program_y_dangerous_skill"] +NOT_ADD_PROMPT_SKILLS = [ + "alexa_handler", + "dff_intent_responder_skill", + "misheard_asr", + "dff_program_y_dangerous_skill", +] COMPLETELY_CHANGING_THE_SUBJECT_PHRASES = [ "Completely changing the subject,", diff --git a/common/robot.py b/common/robot.py index 65fd1d9b06..e7d7b17049 100644 --- a/common/robot.py +++ b/common/robot.py @@ -2,6 +2,7 @@ command_intents = {"track_object", "turn_around", "move_forward", "move_backward", "open_door", "move_to_point"} +embodied_intents = {"test_command", "move_forward", "move_backward"} def check_if_valid_robot_command(command, service_url, dialog_id, timeout=1.0): diff --git a/common/science.py b/common/science.py index bdebc4b200..7af4989712 100644 --- a/common/science.py +++ b/common/science.py @@ -1,8 +1,9 @@ import re import json import pathlib -from common.utils import join_sentences_in_or_pattern, join_words_in_or_pattern -from common.utils import get_topics, TOPIC_GROUPS +from common.join_pattern import join_sentences_in_or_pattern, join_words_in_or_pattern +from common.combined_classes import TOPIC_GROUPS +from common import utils NICE_CHAT_ACKS = [ @@ -75,7 +76,7 @@ def science_skill_was_proposed(prev_bot_utt): def about_science(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([topic in found_topics for topic in TOPIC_GROUPS["science"]]): return True elif re.findall(SCIENCE_COMPILED_PATTERN, annotated_utterance["text"]): diff --git a/common/skills_turn_on_topics_and_patterns.py b/common/skills_turn_on_topics_and_patterns.py index 7a0d27d316..7a7193224c 100644 --- a/common/skills_turn_on_topics_and_patterns.py +++ b/common/skills_turn_on_topics_and_patterns.py @@ -23,8 +23,21 @@ from common.travel import TRAVELLING_TEMPLATE, HAVE_YOU_BEEN_TEMPLATE, I_HAVE_BEEN_TEMPLATE from common.weather import WEATHER_COMPILED_PATTERN from common.bot_persona import YOUR_FAVORITE_COMPILED_PATTERN +from common.travel_italy import ITALY_PATTERN SKILL_TRIGGERS = { + "dff_travel_italy_skill": { + "compiled_patterns": [ITALY_PATTERN], + "previous_bot_patterns": [], + "detected_topics": [], + "intents": [], + }, + "dff_user_kg_skill": { + "compiled_patterns": [re.compile(r"(\bpet\b|\bpets\b|hobby|hobbies)")], + "previous_bot_patterns": [], + "detected_topics": [], + "intents": [], + }, "dff_art_skill": { "compiled_patterns": [ART_PATTERN], "previous_bot_patterns": [], diff --git a/common/sport.py b/common/sport.py index d1dfb81782..88b73d72bd 100644 --- a/common/sport.py +++ b/common/sport.py @@ -1,5 +1,6 @@ import re -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils ################################################################################################################## @@ -145,7 +146,7 @@ def skill_trigger_phrases(): def about_sport(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([topic in found_topics for topic in TOPIC_GROUPS["sport"]]): return True elif re.findall(KIND_OF_SPORTS_TEMPLATE, annotated_utterance["text"]): diff --git a/common/travel.py b/common/travel.py index aa2776a36f..ba71e0aab5 100644 --- a/common/travel.py +++ b/common/travel.py @@ -1,5 +1,6 @@ import re -from common.utils import get_topics, TOPIC_GROUPS +from common.combined_classes import TOPIC_GROUPS +from common import utils OPINION_REQUESTS_ABOUT_TRAVELLING = [ @@ -160,7 +161,7 @@ def skill_trigger_phrases(): def about_travel(annotated_utterance): - found_topics = get_topics(annotated_utterance, probs=False, which="all") + found_topics = utils.get_topics(annotated_utterance, probs=False, which="all") if any([topic in found_topics for topic in TOPIC_GROUPS["travel"]]): return True elif re.findall(TRAVELLING_TEMPLATE, annotated_utterance["text"]): diff --git a/common/travel_italy.py b/common/travel_italy.py new file mode 100644 index 0000000000..05f6896a49 --- /dev/null +++ b/common/travel_italy.py @@ -0,0 +1,29 @@ +import re + +ITALY_PATTERN = re.compile( + r"(italy|italian(s)?|(italian)? city|people|language|rome|venice)(\.|\?)|", + re.IGNORECASE, +) + +ITALY_TRAVEL_SKILL_CHECK_PHRASE = "italy" +ITALY_TRAVEL_SKILL_CHECK_PHRASE2 = "Have you ever been to {ITALY_TRAVEL_SKILL_CHECK_PHRASE}" +ITALY_TRAVEL_SKILL_CHECK_PHRASE3 = "Do you like {ITALY_TRAVEL_SKILL_CHECK_PHRASE}?" +SWITCH_ITALY_TRAVEL_SKILL_PHRASE = f"Let's talk about {ITALY_TRAVEL_SKILL_CHECK_PHRASE}" +ASK_TO_REPEAT_ITALY = "Could you repeat please what place are we discussing?" +WHAT_RECOMMEND_IN_ITALY = "What do you recommend to see in {ITALY_TRAVEL_SKILL_CHECK_PHRASE}?" +QUESTIONS_ABOUT_ITALY = [ + "What is your favorite place in {ITALY_TRAVEL_SKILL_CHECK_PHRASE}?", + "What place do you like to visit in {ITALY_TRAVEL_SKILL_CHECK_PHRASE}?", + WHAT_RECOMMEND_IN_ITALY, +] + +ITALY_TRAVEL_SKILL_CHECK_PHRASES = [ + ITALY_TRAVEL_SKILL_CHECK_PHRASE, + ITALY_TRAVEL_SKILL_CHECK_PHRASE2, + ITALY_TRAVEL_SKILL_CHECK_PHRASE3, + ASK_TO_REPEAT_ITALY, +] + QUESTIONS_ABOUT_ITALY + + +def italy_travel_skill_was_proposed(prev_bot_utt): + return any([j in prev_bot_utt.get("text", "").lower() for j in ITALY_TRAVEL_SKILL_CHECK_PHRASES]) diff --git a/common/universal_templates.py b/common/universal_templates.py index 5206cab34d..e95aed318e 100644 --- a/common/universal_templates.py +++ b/common/universal_templates.py @@ -3,17 +3,8 @@ from os import getenv from random import choice -from common.utils import ( - join_words_in_or_pattern, - join_sentences_in_or_pattern, - get_topics, - get_intents, - get_sentiment, - is_yes, - is_no, - get_entities, - join_word_beginnings_in_or_pattern, -) +from common import utils +from common.join_pattern import * from common.greeting import GREETING_QUESTIONS_TEXTS import sentry_sdk @@ -354,7 +345,7 @@ def if_switch_topic(uttr): def book_movie_music_found(annotated_uttr): - topics = set(get_topics(annotated_uttr, which="all")) + topics = set(utils.get_topics(annotated_uttr, which="all")) target_topics = { "Entertainment_Books", "Books&Literature", @@ -368,7 +359,7 @@ def book_movie_music_found(annotated_uttr): def is_switch_topic(annotated_uttr): - topic_switch_detected = False # "Topic_SwitchIntent" in get_intents(annotated_uttr, which="all") + topic_switch_detected = False # "Topic_SwitchIntent" in utils.get_intents(annotated_uttr, which="all") if topic_switch_detected or if_switch_topic(annotated_uttr["text"].lower()): return True @@ -386,14 +377,16 @@ def if_choose_topic(annotated_uttr, prev_annotated_uttr=None): prev_annotated_uttr = {} if prev_annotated_uttr is None else prev_annotated_uttr uttr_ = annotated_uttr.get("text", "").lower() prev_uttr_ = prev_annotated_uttr.get("text", "--").lower() - chat_about_intent = "lets_chat_about" in get_intents(annotated_uttr, probs=False, which="intent_catcher") + chat_about_intent = "lets_chat_about" in utils.get_intents(annotated_uttr, probs=False, which="intent_catcher") user_asks_what_to_talk_about = re.search(COMPILE_WHAT_TO_TALK_ABOUT, uttr_) # user ask to "talk about something" smth1 = re.search(COMPILE_LETS_TALK_ABOUT_SOMETHING, uttr_) or ( chat_about_intent and re.search(COMPILE_SOMETHING, uttr_) ) # bot asks "what user wants to talk about", and user answers "something" - prev_chat_about_intent = "lets_chat_about" in get_intents(prev_annotated_uttr, probs=False, which="intent_catcher") + prev_chat_about_intent = "lets_chat_about" in utils.get_intents( + prev_annotated_uttr, probs=False, which="intent_catcher" + ) prev_uttr_asks_what_topic = prev_chat_about_intent or re.search(COMPILE_WHAT_TO_TALK_ABOUT, prev_uttr_) smth2 = prev_uttr_asks_what_topic and re.search(COMPILE_SOMETHING, uttr_) @@ -409,9 +402,11 @@ def if_not_want_to_chat_about_particular_topic(annotated_uttr, prev_annotated_ut return True # prev uttr is what do you want to talk about? - prev_chat_about_intent = "lets_chat_about" in get_intents(prev_annotated_uttr, probs=False, which="intent_catcher") + prev_chat_about_intent = "lets_chat_about" in utils.get_intents( + prev_annotated_uttr, probs=False, which="intent_catcher" + ) prev_what_to_chat_about = prev_chat_about_intent or if_utterance_requests_topic(prev_annotated_uttr) - if prev_what_to_chat_about and is_no(annotated_uttr): + if prev_what_to_chat_about and utils.is_no(annotated_uttr): # previously offered to chat about topic, user declines return True elif prev_what_to_chat_about and is_switch_topic(annotated_uttr): @@ -422,7 +417,7 @@ def if_not_want_to_chat_about_particular_topic(annotated_uttr, prev_annotated_ut return True # current uttr is lets talk about something else / other than - chat_about_intent = "lets_chat_about" in get_intents(annotated_uttr, probs=False, which="intent_catcher") + chat_about_intent = "lets_chat_about" in utils.get_intents(annotated_uttr, probs=False, which="intent_catcher") chat_about = chat_about_intent or if_lets_chat_about_topic(uttr_) if chat_about and SOMETHING_ELSE.search(uttr_): return True @@ -456,11 +451,13 @@ def if_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr=None, key prev_uttr_ = prev_annotated_uttr.get("text", "").lower() # current uttr is lets talk about blabla - chat_about_intent = "lets_chat_about" in get_intents(annotated_uttr, probs=False, which="intent_catcher") + chat_about_intent = "lets_chat_about" in utils.get_intents(annotated_uttr, probs=False, which="intent_catcher") chat_about = chat_about_intent or if_lets_chat_about_topic(uttr_) # prev uttr is what do you want to talk about? - prev_chat_about_intent = "lets_chat_about" in get_intents(prev_annotated_uttr, probs=False, which="intent_catcher") + prev_chat_about_intent = "lets_chat_about" in utils.get_intents( + prev_annotated_uttr, probs=False, which="intent_catcher" + ) prev_what_to_chat_about = prev_chat_about_intent or if_utterance_requests_topic(prev_annotated_uttr) not_want = if_not_want_to_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr) @@ -473,7 +470,7 @@ def if_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr=None, key re.IGNORECASE, ) offered_this_topic = trigger_pattern.search(prev_uttr_) - user_agrees_or_any = ANY_TOPIC_AMONG_OFFERED.search(uttr_) or is_yes(annotated_uttr) + user_agrees_or_any = ANY_TOPIC_AMONG_OFFERED.search(uttr_) or utils.is_yes(annotated_uttr) if any([word in uttr_ for word in key_words]) or (offered_this_topic and user_agrees_or_any): return True else: @@ -491,7 +488,7 @@ def if_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr=None, key prev_uttr_, re.IGNORECASE, ) - user_agrees_or_any = ANY_TOPIC_AMONG_OFFERED.search(uttr_) or is_yes(annotated_uttr) + user_agrees_or_any = ANY_TOPIC_AMONG_OFFERED.search(uttr_) or utils.is_yes(annotated_uttr) if re.search(compiled_pattern, uttr_) or (offered_this_topic and user_agrees_or_any): return True else: @@ -502,17 +499,17 @@ def if_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr=None, key def is_negative(annotated_uttr): - sentiment = get_sentiment(annotated_uttr, probs=False)[0] + sentiment = utils.get_sentiment(annotated_uttr, probs=False)[0] return sentiment in ["negative", "very_negative"] def is_positive(annotated_uttr): - sentiment = get_sentiment(annotated_uttr, probs=False)[0] + sentiment = utils.get_sentiment(annotated_uttr, probs=False)[0] return sentiment in ["positive", "very_positive"] def is_neutral(annotated_uttr): - sentiment = get_sentiment(annotated_uttr, probs=False)[0] + sentiment = utils.get_sentiment(annotated_uttr, probs=False)[0] return sentiment in ["neutral"] @@ -520,7 +517,7 @@ def is_neutral(annotated_uttr): def tell_me_more(annotated_uttr): - intents = get_intents(annotated_uttr, which="intent_catcher", probs=False) + intents = utils.get_intents(annotated_uttr, which="intent_catcher", probs=False) cond1 = "tell_me_more" in intents cond2 = re.search(more_details_pattern, annotated_uttr["text"]) return cond1 or cond2 @@ -594,14 +591,14 @@ def is_any_question_sentence_in_utterance(annotated_uttr): def get_entities_with_attitudes(annotated_uttr: dict, prev_annotated_uttr: dict): entities_with_attitudes = {"like": [], "dislike": []} - all_entities = get_entities(annotated_uttr, only_named=False, with_labels=False) - all_prev_entities = get_entities(prev_annotated_uttr, only_named=False, with_labels=False) + all_entities = utils.get_entities(annotated_uttr, only_named=False, with_labels=False) + all_prev_entities = utils.get_entities(prev_annotated_uttr, only_named=False, with_labels=False) logger.info(f"Consider all curr entities: {all_entities}, and all previous entities: {all_prev_entities}") curr_entity = all_entities[0] if all_entities else "" prev_entity = all_prev_entities[-1] if all_prev_entities else "" curr_uttr_text = annotated_uttr.get("text", "") prev_uttr_text = prev_annotated_uttr.get("text", "") - curr_sentiment = get_sentiment(annotated_uttr, probs=False, default_labels=["neutral"])[0] + curr_sentiment = utils.get_sentiment(annotated_uttr, probs=False, default_labels=["neutral"])[0] current_first_sentence = ( annotated_uttr.get("annotations", {}).get("sentseg", {}).get("segments", [curr_uttr_text])[0] ) @@ -615,17 +612,17 @@ def get_entities_with_attitudes(annotated_uttr: dict, prev_annotated_uttr: dict) # what is your less favorite ..? - animals -> `dislike animals` entities_with_attitudes["dislike"] += [curr_entity] elif DO_YOU_LOVE_PATTERN.search(prev_uttr_text): - if is_no(annotated_uttr): + if utils.is_no(annotated_uttr): # do you love .. animals? - no -> `dislike animals` entities_with_attitudes["dislike"] += [prev_entity] - elif is_yes(annotated_uttr): + elif utils.is_yes(annotated_uttr): # do you love .. animals? - yes -> `like animals` entities_with_attitudes["like"] += [prev_entity] elif DO_YOU_HATE_PATTERN.search(prev_uttr_text): - if is_no(annotated_uttr): + if utils.is_no(annotated_uttr): # do you hate .. animals? - no -> `like animals` entities_with_attitudes["like"] += [prev_entity] - elif is_yes(annotated_uttr): + elif utils.is_yes(annotated_uttr): # do you hate .. animals? - yes -> `dislike animals` entities_with_attitudes["dislike"] += [prev_entity] elif I_HATE_PATTERN.search(curr_uttr_text): diff --git a/common/utils.py b/common/utils.py index f1e1f62a19..d6dbfbbde9 100644 --- a/common/utils.py +++ b/common/utils.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import re import logging from os import getenv @@ -6,6 +8,12 @@ from common.custom_requests import request_triples_wikidata from common.factoid import FACTOID_THRESHOLD +from common.combined_classes import combined_classes, TOPIC_GROUPS +from common.join_pattern import * + +from common import food, books, music, news, travel +from common import art, science, movies, animals, gaming, sport, gossip + import sentry_sdk logger = logging.getLogger(__name__) @@ -109,144 +117,6 @@ low_priority_intents = {"dont_understand", "what_time", "choose_topic"} -combined_classes = { # ORDER MATTERS!!!! DO NOT CHANGE IT!!!! - "emotion_classification": ["anger", "fear", "joy", "disgust", "sadness", "surprise", "neutral"], - "sentiment_classification": ["positive", "neutral", "negative"], - "toxic_classification": [ - "identity_hate", - "insult", - "obscene", - "severe_toxic", - "sexual_explicit", - "threat", - "toxic", - "not_toxic", - ], - "factoid_classification": ["is_factoid", "is_conversational"], - "midas_classification": [ - "open_question_factual", - "open_question_opinion", - "open_question_personal", - "yes_no_question", - "clarifying_question", - "command", - "dev_command", - "appreciation", - "opinion", - "complaint", - "comment", - "statement", - "other_answers", - "pos_answer", - "neg_answer", - ], - "deeppavlov_topics": [ - "Food", - "Books&Literature", - "Music", - "Gadgets", - "Movies&Tv", - "Leisure", - "Beauty", - "Clothes", - "Travel", - "News", - "Art&Hobbies", - "Videogames", - "Job", - "Home&Design", - "Depression", - "Celebrities&Events", - "Politics", - "Toys&Games", - "Animals&Pets", - "PersonalTransport", - "Garden", - "Family&Relationships", - "Health&Medicine", - "Religion", - "ArtificialIntelligence", - "Finance", - "Space", - "Disasters", - "Science&Technology", - "Psychology", - "MassTransit", - "Education", - "Sports", - ], - "cobot_topics": [ - "Phatic", - "Other", - "Movies_TV", - "Music", - "SciTech", - "Literature", - "Travel_Geo", - "Celebrities", - "Games", - "Pets_Animals", - "Sports", - "Psychology", - "Religion", - "Weather_Time", - "Food_Drink", - "Politics", - "Sex_Profanity", - "Art_Event", - "Math", - "News", - "Entertainment", - "Fashion", - ], - "cobot_dialogact_topics": [ - "Other", - "Phatic", - "Entertainment_Movies", - "Entertainment_Books", - "Entertainment_General", - "Interactive", - "Entertainment_Music", - "Science_and_Technology", - "Sports", - "Politics", - "Inappropriate_Content", - ], - "cobot_dialogact_intents": [ - "Information_DeliveryIntent", - "General_ChatIntent", - "Information_RequestIntent", - "User_InstructionIntent", - "InteractiveIntent", - "Opinion_ExpressionIntent", - "OtherIntent", - "ClarificationIntent", - "Topic_SwitchIntent", - "Opinion_RequestIntent", - "Multiple_GoalsIntent", - ], -} - -TOPIC_GROUPS = { - "food": ["Food", "Food_Drink"], - "books": ["Entertainment_Books", "Literature", "Books&Literature"], - "music": ["Music", "Entertainment_Music"], - "news": ["News"], - "politics": ["Politics"], - "sports": ["Sports"], - "religion": ["Religion"], - "movies": ["Entertainment_Movies", "Movies_TV", "Movies&Tv"], - "fashion": ["Clothes", "Fashion"], - "travel": ["Travel", "Travel_Geo"], - "celebrities": ["Celebrities", "Celebrities&Events"], - "art": ["Art_Event", "Art&Hobbies"], - "science": ["Science_and_Technology", "SciTech"], - "entertainment": ["Entertainment", "Entertainment_General"], - "games": ["Games", "Toys&Games", "Videogames"], - "animals": ["Pets_Animals", "Animals&Pets"], -} - - MULTILABEL_TASKS = [ "emotion_classification", "toxic_classification", @@ -310,18 +180,6 @@ MIDAS_FUNCTIONAL_LABELS = sum([intent_list for intent_list in midas_classes["functional_request"].values()], []) -def join_words_in_or_pattern(words): - return r"(" + r"|".join([r"\b%s\b" % word for word in words]) + r")" - - -def join_word_beginnings_in_or_pattern(words): - return r"(" + r"|".join([r"\b%s" % word for word in words]) + r")" - - -def join_sentences_in_or_pattern(sents): - return r"(" + r"|".join(sents) + r")" - - def get_skill_outputs_from_dialog(utterances, skill_name, activated=False): """ Extract list of dictionaries with already formatted outputs of `skill_name` from full dialog. @@ -892,15 +750,20 @@ def get_topics(annotated_utterance, probs=False, default_probs=None, default_lab dp_topics_probs, dp_topics_labels = _get_combined_annotations( annotated_utterance, model_name="deeppavlov_topics" ) + topics_ru_probs, topics_ru_labels = {}, [] + if "topics_ru" in annotations: + topics_ru_probs, topics_ru_labels = _get_combined_annotations(annotated_utterance, model_name="topics_ru") if which == "all": - answer_labels = cobot_topics_labels + cobot_da_topics_labels + dp_topics_labels - answer_probs = {**cobot_topics_probs, **cobot_da_topics_probs, **dp_topics_probs} + answer_labels = cobot_topics_labels + cobot_da_topics_labels + dp_topics_labels + topics_ru_labels + answer_probs = {**cobot_topics_probs, **cobot_da_topics_probs, **dp_topics_probs, **topics_ru_probs} elif which == "cobot_topics": answer_probs, answer_labels = cobot_topics_probs, cobot_topics_labels elif which == "cobot_dialogact_topics": answer_probs, answer_labels = cobot_da_topics_probs, cobot_da_topics_labels elif which == "deeppavlov_topics": answer_probs, answer_labels = dp_topics_probs, dp_topics_labels + elif which == "topics_ru": + answer_probs, answer_labels = topics_ru_probs, topics_ru_labels else: logger.exception(f"Unknown input type in get_topics: {which}") answer_probs, answer_labels = default_probs, default_labels @@ -1372,3 +1235,52 @@ def get_comet_conceptnet_annotations(annotated_utterance): return annotated_utterance.get("annotations", {}).get("conceptnet", {}) else: return {} + + +class Topic: + def __init__(self, topic_group=None, detecting_regexp=None, detecting_function=None): + self.topic_group = topic_group + self.detecting_regexp = detecting_regexp + self.detecting_function = detecting_function + + def detect(self, annotated_utterance, only_one_topic=True, threshold=0.1, which="all"): + if only_one_topic: + found_topics = get_topics(annotated_utterance, probs=False, which=which) + else: + found_probs = get_topics(annotated_utterance, probs=True, which=which) + found_topics = [key for key in found_probs if found_probs[key] > threshold] + if any([target_topic in found_topics for target_topic in self.topic_group]): + return True + elif self.detecting_regexp is not None: + if re.findall(self.detecting_regexp, annotated_utterance["text"]): + return True + elif self.detecting_function is not None: # Support for non-regexp methods + if self.detecting_function(annotated_utterance): + return True + return False + + +TOPICS = { + "food": Topic(TOPIC_GROUPS["food"], food.FOOD_COMPILED_PATTERN), + "books": Topic(TOPIC_GROUPS["books"], books.BOOK_PATTERN), + "music": Topic(TOPIC_GROUPS["music"], music.MUSIC_COMPILED_PATTERN), + "news": Topic(TOPIC_GROUPS["news"], news.NEWS_COMPILED_PATTERN), + "politics": Topic(TOPIC_GROUPS["politics"]), + "sports": Topic(TOPIC_GROUPS["sports"], detecting_function=sport.about_sport), + "religion": Topic(TOPIC_GROUPS["religion"]), + "movies": Topic(TOPIC_GROUPS["movies"], movies.MOVIE_COMPILED_PATTERN), + "fashion": Topic(TOPIC_GROUPS["fashion"]), + "travel": Topic(TOPIC_GROUPS["travel"], travel.TRAVELLING_TEMPLATE), + "celebrities": Topic(TOPIC_GROUPS["celebrities"], gossip.GOSSIP_COMPILED_PATTERN), + "art": Topic(TOPIC_GROUPS["art"], art.ART_PATTERN), + "science": Topic(TOPIC_GROUPS["science"], science.SCIENCE_COMPILED_PATTERN), + "entertainment": Topic(TOPIC_GROUPS["entertainment"]), + "games": Topic(TOPIC_GROUPS["games"], gaming.VIDEO_GAME_WORDS_COMPILED_PATTERN), + "animals": Topic(TOPIC_GROUPS["animals"], animals.ANIMALS_FIND_TEMPLATE), + "sex": Topic(TOPIC_GROUPS["sex"]), + "weather": Topic(TOPIC_GROUPS["weather"]), +} # The list can be expanded according to the topic list supported + + +def is_about(topic_name, annotated_utterance, **kwargs): + return TOPICS[topic_name].detect(annotated_utterance, **kwargs) diff --git a/common/wiki_skill.py b/common/wiki_skill.py index 36cff36b27..a39aaf8201 100644 --- a/common/wiki_skill.py +++ b/common/wiki_skill.py @@ -2,10 +2,8 @@ import logging import random import re -from common.universal_templates import COMPILE_WHAT_TO_TALK_ABOUT +from common import utils, universal_templates from common.animals import ANIMALS_FIND_TEMPLATE -from common.universal_templates import if_chat_about_particular_topic -from common.utils import is_no, is_yes from common.wiki_skill_scenarios import topic_config logger = logging.getLogger(__name__) @@ -618,6 +616,35 @@ def check_nounphr(annotations, nounphr_to_find): return "" +def find_entity_custom_kg(annotations, kg_type): + custom_el_info = annotations.get("custom_entity_linking", []) + for entity_info in custom_el_info: + substr = entity_info.get("entity_substr", "") + e_types = entity_info.get("entity_id_tags", []) + if any([e_type.lower() == kg_type.lower() for e_type in e_types]): + return substr + return "" + + +def find_entity_prex(annotations, prop): + prop = prop.replace("_", " ") + prex_info_batch = annotations.get("property_extraction", []) + for prex_info in prex_info_batch: + if isinstance(prex_info, list) and prex_info: + prex_info = prex_info[0] + if prex_info: + triplets = prex_info.get("triplets", []) + for triplet in triplets: + if "relation" in triplet: + rel = triplet["relation"] + elif "property" in triplet: + rel = triplet["property"] + obj = triplet["object"] + if rel.replace("_", " ").lower() == prop.replace("_", " ").lower(): + return obj + return "" + + def extract_entity(ctx, entity_type): user_uttr: dict = ctx.misc.get("agent", {}).get("dialog", {}).get("human_utterances", [{}])[-1] annotations = user_uttr.get("annotations", {}) @@ -636,12 +663,21 @@ def extract_entity(ctx, entity_type): found_entity, *_ = find_entity_by_types(annotations, [wp_type]) if found_entity: return found_entity + elif entity_type.startswith("prop:"): + user_property = entity_type.split("prop:")[1] + obj = find_entity_prex(annotations, user_property) + return obj + elif entity_type.startswith("kg"): + kg_type = entity_type.split("kg:")[1] + found_entity = find_entity_custom_kg(annotations, kg_type) + if found_entity: + return found_entity elif entity_type == "any_entity": entities = annotations.get("entity_detection", {}).get("entities", []) if entities: return entities[0] else: - res = re.findall(entity_type, user_uttr["text"]) + res = re.findall(entity_type, user_uttr.get("text", "")) if res: return res[0] return "" @@ -649,7 +685,7 @@ def extract_entity(ctx, entity_type): def if_user_dont_know_topic(user_uttr, bot_uttr): flag = False - what_to_talk_about = re.findall(COMPILE_WHAT_TO_TALK_ABOUT, bot_uttr.get("text", "")) + what_to_talk_about = re.findall(universal_templates.COMPILE_WHAT_TO_TALK_ABOUT, bot_uttr.get("text", "")) user_dont_know = re.findall("(do not|dont|don't) know", user_uttr["text"]) or re.findall( "(anything|everything)", user_uttr["text"] ) @@ -661,8 +697,8 @@ def if_user_dont_know_topic(user_uttr, bot_uttr): def check_condition_element(elem, user_uttr, bot_uttr, shared_memory={}): flag = False annotations = user_uttr["annotations"] - isyes = is_yes(user_uttr) - isno = is_no(user_uttr) + isyes = utils.is_yes(user_uttr) + isno = utils.is_no(user_uttr) user_info = shared_memory.get("user_info", {}) entity_triplets = shared_memory.get("entity_triplets", {}) if elem[0] == "is_yes" and isyes: @@ -752,7 +788,7 @@ def if_switch_wiki_skill(user_uttr, bot_uttr): if ( (isinstance(pattern, str) and re.findall(pattern, user_uttr["text"], re.IGNORECASE)) or (isinstance(pattern, re.Pattern) and re.findall(pattern, user_uttr["text"])) - or if_chat_about_particular_topic(user_uttr, bot_uttr, compiled_pattern=pattern) + or universal_templates.if_chat_about_particular_topic(user_uttr, bot_uttr, compiled_pattern=pattern) ): flag = True switch_on = topic_info.get("switch_on", []) @@ -774,7 +810,7 @@ def if_switch_wiki_skill(user_uttr, bot_uttr): def if_must_switch(user_uttr, bot_uttr): flag = False user_uttr_annotations = user_uttr["annotations"] - lets_chat = if_chat_about_particular_topic(user_uttr, bot_uttr) + lets_chat = universal_templates.if_chat_about_particular_topic(user_uttr, bot_uttr) found_entity_substr_wp, *_, conf_type_wp = find_entity_wp(user_uttr_annotations, bot_uttr) found_entity_substr_nphr, conf_type_nphr = find_entity_nounphr(user_uttr_annotations) if ( @@ -789,7 +825,7 @@ def if_must_switch(user_uttr, bot_uttr): def switch_wiki_skill_on_news(user_uttr, bot_uttr): user_uttr_annotations = user_uttr["annotations"] news = user_uttr_annotations.get("news_api_annotator", []) - if if_chat_about_particular_topic(user_uttr, bot_uttr) and news: + if universal_templates.if_chat_about_particular_topic(user_uttr, bot_uttr) and news: nounphrases = user_uttr_annotations.get("cobot_entities", {}).get("labelled_entities", []) if nounphrases and news: for nounphr in nounphrases: diff --git a/components.tsv b/components.tsv index a6511e6182..30e5027317 100644 --- a/components.tsv +++ b/components.tsv @@ -3,6 +3,7 @@ 3772 faq-skill 3882 harvesters-maintenance-gobot-skill 4242 agent +6000 ros-flask-server 8002 ranking-based-response-selector,ranking-based-response-selector-ru 8003 llm-based-response-selector 8004 convers-evaluator-annotator @@ -26,10 +27,10 @@ 8022 dff-program-y-dangerous-skill 8023 dff-movie-skill 8024 sentiment-classification-multilingual -8025 -8026 -8027 -8028 +8025 dff-travel-italy-skill +8026 personality-detection +8027 user-knowledge-memorizer +8028 dff-user-kg-skill 8029 convert-reddit 8030 personal-info-ru-skill,personal-info-skill 8031 asr @@ -42,8 +43,8 @@ 8038 robot-fake-server 8039 8040 -8041 -8042 +8041 dff-embodied-skill +8042 embodied-sender 8043 8044 8045 @@ -70,8 +71,8 @@ 8066 news-api-skill 8067 8068 game-cooperative-skill -8069 -8070 +8069 fromage +8070 dff-fromage-image-skill 8071 factoid-qa 8072 kbqa 8073 diff --git a/components/3dcc0944c3e0.yml b/components/3dcc0944c3e0.yml new file mode 100644 index 0000000000..07b284553e --- /dev/null +++ b/components/3dcc0944c3e0.yml @@ -0,0 +1,25 @@ +name: combined_classification_ru +display_name: Combined ClassificationRu +component_type: null +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Russian BERT-based model including topic classification,MIDAS intent classification, + sentiment, toxicity, emotion, factoid classification +ram_usage: 1.5G +gpu_usage: 3.5G +group: annotators +connector: + protocol: http + timeout: 1.0 + url: http://combined-classification-ru:8198/model +dialog_formatter: state_formatters.dp_formatters:preproc_last_human_utt_dialog +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: +- annotators.spelling_preprocessing +required_previous_services: null +state_manager_method: add_annotation +tags: null +endpoint: model +service: annotators/combined_classification_ru/service_configs/combined-classification-ru +date_created: '2023-03-16T09:45:32' diff --git a/components/5eb485871be3.yml b/components/5eb485871be3.yml new file mode 100644 index 0000000000..57a436b36c --- /dev/null +++ b/components/5eb485871be3.yml @@ -0,0 +1,25 @@ +name: combined_classification_ru +display_name: Combined ClassificationRu +component_type: null +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Russian BERT-based model including topic classification,MIDAS intent classification, + sentiment, toxicity, emotion, factoid classification +ram_usage: 1.5G +gpu_usage: 3.5G +group: annotators +connector: + protocol: http + timeout: 1.0 + url: http://combined-classification-ru:8198/batch_model +dialog_formatter: state_formatters.dp_formatters:hypothesis_histories_list +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: +- skills +required_previous_services: null +state_manager_method: add_hypothesis_annotation_batch +tags: null +endpoint: batch_model +service: annotators/combined_classification_ru/service_configs/combined-classification-ru +date_created: '2023-03-16T09:45:32' diff --git a/components/74p4006d17yappr7ji7dtf.yml b/components/74p4006d17yappr7ji7dtf.yml new file mode 100644 index 0000000000..6974f9f0f5 --- /dev/null +++ b/components/74p4006d17yappr7ji7dtf.yml @@ -0,0 +1,24 @@ +name: user_knowledge_memorizer +display_name: User Knowledge Graph +component_type: null +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: stores new triplets information detected through Property Extraction, preserving unique data within the KG. +ram_usage: 256M +gpu_usage: null +group: annotators +connector: + protocol: http + url: http://user-knowledge-memorizer:8027/respond +dialog_formatter: state_formatters.dp_formatters:last_human_annotated_utterance +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: +- annotators.property_extraction +- annotators.custom_entity_linking +required_previous_services: null +state_manager_method: add_annotation +tags: null +endpoint: respond +service: annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer +date_created: '2023-06-07T12:42:33' diff --git a/components/8iHHdjsnfhewkl.yml b/components/8iHHdjsnfhewkl.yml new file mode 100644 index 0000000000..b6d5a71310 --- /dev/null +++ b/components/8iHHdjsnfhewkl.yml @@ -0,0 +1,23 @@ +name: fromage +display_name: FROMAGe Service +component_type: null +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: The service is built using the FROMAGe model, which is able to produce meaningful conversations with users about different images. +ram_usage: 45G +gpu_usage: 20G +group: skills +connector: + protocol: http + timeout: 90.0 + url: http://fromage:8069/respond +dialog_formatter: state_formatters.dp_formatters:fromage_formatter +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: null +required_previous_services: null +state_manager_method: add_annotation +tags: null +endpoint: respond +service: services/fromage/service_configs/fromage +date_created: '2023-03-16T09:45:32' \ No newline at end of file diff --git a/components/8jfFjmYnbdeH.yml b/components/8jfFjmYnbdeH.yml new file mode 100644 index 0000000000..b5d572ed08 --- /dev/null +++ b/components/8jfFjmYnbdeH.yml @@ -0,0 +1,24 @@ +name: dff_fromage_image_skill +display_name: Fromage Image Skill +component_type: Script-based w/o NNs +model_type: Dictionary/Pattern-based +is_customizable: false +author: publisher@deeppavlov.ai +description: A skill that utilizes information from the FROMAGe service and engages in a dialogue with the user about an image. +ram_usage: 100M +gpu_usage: null +group: skills +connector: + protocol: http + timeout: 2.0 + url: http://dff-fromage-image-skill:8070/respond +dialog_formatter: state_formatters.dp_formatters:dff_fromage_image_skill_formatter +response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service +previous_services: +- skill_selectors +required_previous_services: null +state_manager_method: add_hypothesis +tags: null +endpoint: respond +service: skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill +date_created: '2023-03-16T09:45:32' \ No newline at end of file diff --git a/components/FahT3rhiav1Bg2Q17wvE.yml b/components/FahT3rhiav1Bg2Q17wvE.yml new file mode 100644 index 0000000000..1b0757afdf --- /dev/null +++ b/components/FahT3rhiav1Bg2Q17wvE.yml @@ -0,0 +1,24 @@ +name: dff_travel_italy_skill +display_name: Travel Italy Skill +component_type: Script-based w/o NNs +model_type: Dictionary/Pattern-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Looks up entities detected by Entity Detection in Knowledge Graph and uses these entities in bot's responses +ram_usage: 128M +gpu_usage: null +group: skills +connector: + protocol: http + timeout: 2.0 + url: http://dff-travel-italy-skill:8025/respond +dialog_formatter: state_formatters.dp_formatters:dff_travel_italy_skill_formatter +response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service +previous_services: +- skill_selectors +required_previous_services: null +state_manager_method: add_hypothesis +tags: null +endpoint: respond +service: skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill +date_created: '2023-06-22T09:45:32' diff --git a/components/OINEWgiowe923n2g2no.yml b/components/OINEWgiowe923n2g2no.yml new file mode 100644 index 0000000000..5aebc25a13 --- /dev/null +++ b/components/OINEWgiowe923n2g2no.yml @@ -0,0 +1,21 @@ +name: ros_flask_server_is_command_valid +display_name: ROS-Flask server +component_type: Script-based w/o NNs +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: This service allows Dream to interact with special connectors using ROS. It is an intermediate node between Dream and Client. This exact endpoint may be used to check whether a command is valid. +ram_usage: 250M +gpu_usage: null +connector: + protocol: http + timeout: 1.0 + url: http://ros-flask-server:6000/is_command_valid +previous_services: +- skills +required_previous_services: +- robot_notifications +tags: null +endpoint: is_command_valid +service: services/ros_flask_server/service_configs/ros_flask_server +date_created: '2023-06-01T12:12:12' \ No newline at end of file diff --git a/components/OInfoqfwbqou2no.yml b/components/OInfoqfwbqou2no.yml new file mode 100644 index 0000000000..dd9469ab49 --- /dev/null +++ b/components/OInfoqfwbqou2no.yml @@ -0,0 +1,21 @@ +name: ros_flask_server_perform_command +display_name: ROS-Flask server +component_type: Script-based w/o NNs +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: This service allows Dream to interact with special connectors using ROS. It is an intermediate node between Dream and Client. This exact endpoint may be used to append a command to queue of commands that are to be performed on Client-side. +ram_usage: 250M +gpu_usage: null +connector: + protocol: http + timeout: 1.0 + url: http://ros-flask-server:6000/perform_command +previous_services: +- skills +required_previous_services: +- robot_notifications +tags: null +endpoint: perform_command +service: services/ros_flask_server/service_configs/ros_flask_server +date_created: '2023-06-01T12:12:12' \ No newline at end of file diff --git a/components/OowqncqowNAbj.yml b/components/OowqncqowNAbj.yml new file mode 100644 index 0000000000..60d640c74f --- /dev/null +++ b/components/OowqncqowNAbj.yml @@ -0,0 +1,23 @@ +name: personality_detection +display_name: Personality Detection +component_type: null +model_type: ML-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Personality detection using OCEAN. +ram_usage: 312M +gpu_usage: null +group: annotators +connector: + protocol: http + timeout: 3.0 + url: http://personality-detection:8026/model +dialog_formatter: state_formatters.dp_formatters:personality_catcher_formatter_dialog +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: null +required_previous_services: null +state_manager_method: add_annotation +tags: null +endpoint: model +service: annotators/personality_detection/service_configs/personality_detection +date_created: '2023-08-24T12:12:12' diff --git a/components/Oqwkogbqo2no.yml b/components/Oqwkogbqo2no.yml new file mode 100644 index 0000000000..8e390ed3f9 --- /dev/null +++ b/components/Oqwkogbqo2no.yml @@ -0,0 +1,21 @@ +name: ros_flask_server_is_command_performed +display_name: ROS-Flask server +component_type: Script-based w/o NNs +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: This service allows Dream to interact with special connectors using ROS. It is an intermediate node between Dream and Client. This exact endpoint is used to check whether the last Client-received command is already executed. +ram_usage: 250M +gpu_usage: null +connector: + protocol: http + timeout: 1.0 + url: http://ros-flask-server:6000/is_command_performed +previous_services: +- skills +required_previous_services: +- robot_notifications +tags: null +endpoint: is_command_performed +service: services/ros_flask_server/service_configs/ros_flask_server +date_created: '2023-06-01T12:12:12' \ No newline at end of file diff --git a/components/bfb7643iIUUIUGdbivms.yml b/components/bfb7643iIUUIUGdbivms.yml index 8020952616..d76f6cf6de 100644 --- a/components/bfb7643iIUUIUGdbivms.yml +++ b/components/bfb7643iIUUIUGdbivms.yml @@ -1,5 +1,5 @@ name: dff_fairytale_ru_prompted_skill -display_name: Рассказчик Сказок (Навык) +display_name: Рассказчик Сказок component_type: Generative model_type: NN-based is_customizable: true diff --git a/components/biod7834IUGiwogk8.yml b/components/biod7834IUGiwogk8.yml index 2d588f0d76..e787f6ab40 100644 --- a/components/biod7834IUGiwogk8.yml +++ b/components/biod7834IUGiwogk8.yml @@ -1,5 +1,5 @@ name: dff_storyteller_ru_prompted_skill -display_name: Рассказчик Остросюжетных Историй (Навык) +display_name: Рассказчик Остросюжетных Историй component_type: Generative model_type: NN-based is_customizable: true diff --git a/components/dciuh4ikfjg43jhwefoi.yml b/components/dciuh4ikfjg43jhwefoi.yml new file mode 100644 index 0000000000..883fb8408f --- /dev/null +++ b/components/dciuh4ikfjg43jhwefoi.yml @@ -0,0 +1,32 @@ +name: timeout_service +display_name: Timeout Service +component_type: null +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: Timeout Service +ram_usage: 100M +gpu_usage: null +group: timeout_service +connector: + protocol: python + class_name: PredefinedTextConnector + response_text: Sorry, something went wrong inside. Please tell me, what did you + say. + annotations: + sentseg: + punct_sent: Sorry, something went wrong inside. Please tell me, what did you + say. + segments: + - Sorry, something went wrong inside. + - Please tell me, what did you say. +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: add_bot_utterance_last_chance +tags: +- timeout +endpoint: respond +service: services/agent_services/service_configs/dream_ranking_and_midas_based_dm +date_created: '2023-03-04T19:27:44' diff --git a/components/fnp74878Buiwbkjz8vbw.yml b/components/fnp74878Buiwbkjz8vbw.yml index 046efb4479..6de044eb34 100644 --- a/components/fnp74878Buiwbkjz8vbw.yml +++ b/components/fnp74878Buiwbkjz8vbw.yml @@ -1,5 +1,5 @@ name: dff_informal_letter_ru_prompted_skill -display_name: Помощник Журналиста (Навык) +display_name: Помощник Журналиста component_type: Generative model_type: NN-based is_customizable: true diff --git a/components/mJFsrLmq1C2Q7bw7W6QnbT.yml b/components/mJFsrLmq1C2Q7bw7W6QnbT.yml index 93f9f8b81d..0d9da690a5 100644 --- a/components/mJFsrLmq1C2Q7bw7W6QnbT.yml +++ b/components/mJFsrLmq1C2Q7bw7W6QnbT.yml @@ -1,5 +1,5 @@ name: dff_casual_email_prompted_skill -display_name: Causal Emails Skill +display_name: Casual Emails Skill component_type: Generative model_type: NN-based is_customizable: true diff --git a/components/mXHLkbPWpFqq37vtWCuS.yml b/components/mXHLkbPWpFqq37vtWCuS.yml new file mode 100644 index 0000000000..e8f6f68760 --- /dev/null +++ b/components/mXHLkbPWpFqq37vtWCuS.yml @@ -0,0 +1,24 @@ +name: dff_user_kg_skill +display_name: User KG Skill +component_type: Script-based w/o NNs +model_type: Dictionary/Pattern-based +is_customizable: false +author: publisher@deeppavlov.ai +description: An example skill for looking up entities detected by Entity Detection and using these entities in bot's responses +ram_usage: 512M +gpu_usage: null +group: skills +connector: + protocol: http + timeout: 2.0 + url: http://dff-user-kg-skill:8028/respond +dialog_formatter: state_formatters.dp_formatters:dff_user_kg_skill_formatter +response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service +previous_services: +- skill_selectors +required_previous_services: null +state_manager_method: add_hypothesis +tags: null +endpoint: respond +service: skills/dff_user_kg_skill/service_configs/dff-user-kg-skill +date_created: '2023-06-22T10:45:32' diff --git a/components/npoidf8549ibuVIUfver89.yml b/components/npoidf8549ibuVIUfver89.yml index 3f4527d9dc..40123e34f3 100644 --- a/components/npoidf8549ibuVIUfver89.yml +++ b/components/npoidf8549ibuVIUfver89.yml @@ -1,5 +1,5 @@ name: dff_official_letter_ru_prompted_skill -display_name: Официальные Письма (Навык) +display_name: Официальные Письма component_type: Generative model_type: NN-based is_customizable: true diff --git a/components/onisrIUvuife37Hbf4h.yml b/components/onisrIUvuife37Hbf4h.yml index bc85f592b2..ea66b1526a 100644 --- a/components/onisrIUvuife37Hbf4h.yml +++ b/components/onisrIUvuife37Hbf4h.yml @@ -1,5 +1,5 @@ name: dff_journalist_helper_ru_prompted_skill -display_name: Помощник Журналиста (Навык) +display_name: Помощник Журналиста component_type: Generative model_type: NN-based is_customizable: true diff --git a/components/skjdfhow389rhuweih1982ehbjdfh.yml b/components/skjdfhow389rhuweih1982ehbjdfh.yml new file mode 100644 index 0000000000..7553fa984a --- /dev/null +++ b/components/skjdfhow389rhuweih1982ehbjdfh.yml @@ -0,0 +1,32 @@ +name: last_chance_service +display_name: Last Chance Service +component_type: null +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: Last Chance Service +ram_usage: 100M +gpu_usage: null +group: last_chance_service +connector: + protocol: python + class_name: PredefinedTextConnector + response_text: Sorry, something went wrong inside. Please tell me, what did you + say. + annotations: + sentseg: + punct_sent: Sorry, something went wrong inside. Please tell me, what did you + say. + segments: + - Sorry, something went wrong inside. + - Please tell me, what did you say. +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: add_bot_utterance_last_chance +tags: +- last_chance +endpoint: respond +service: services/agent_services/service_configs/dream_ranking_and_midas_based_dm +date_created: '2023-03-04T19:27:44' diff --git a/response_selectors/llm_based_response_selector/server.py b/response_selectors/llm_based_response_selector/server.py index 0c88502972..ef8ebcff80 100644 --- a/response_selectors/llm_based_response_selector/server.py +++ b/response_selectors/llm_based_response_selector/server.py @@ -20,7 +20,7 @@ app = Flask(__name__) -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT")) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT")) GENERATIVE_SERVICE_URL = getenv("GENERATIVE_SERVICE_URL") GENERATIVE_SERVICE_CONFIG = getenv("GENERATIVE_SERVICE_CONFIG") if GENERATIVE_SERVICE_CONFIG: diff --git a/response_selectors/ranking_and_intent_based_response_selector/server.py b/response_selectors/ranking_and_intent_based_response_selector/server.py index 503bd30671..3cdbf98fb0 100644 --- a/response_selectors/ranking_and_intent_based_response_selector/server.py +++ b/response_selectors/ranking_and_intent_based_response_selector/server.py @@ -34,7 +34,7 @@ SENTENCE_RANKER_ANNOTATION_NAME = getenv("SENTENCE_RANKER_ANNOTATION_NAME") SENTENCE_RANKER_SERVICE_URL = getenv("SENTENCE_RANKER_SERVICE_URL") -SENTENCE_RANKER_TIMEOUT = int(getenv("SENTENCE_RANKER_TIMEOUT")) +SENTENCE_RANKER_TIMEOUT = float(getenv("SENTENCE_RANKER_TIMEOUT")) FILTER_TOXIC_OR_BADLISTED = int(getenv("FILTER_TOXIC_OR_BADLISTED")) N_UTTERANCES_CONTEXT = int(getenv("N_UTTERANCES_CONTEXT")) assert SENTENCE_RANKER_ANNOTATION_NAME or SENTENCE_RANKER_SERVICE_URL, logger.error( diff --git a/response_selectors/ranking_based_response_selector/server.py b/response_selectors/ranking_based_response_selector/server.py index 4cd9adb175..cc1bc02a88 100644 --- a/response_selectors/ranking_based_response_selector/server.py +++ b/response_selectors/ranking_based_response_selector/server.py @@ -21,7 +21,7 @@ SENTENCE_RANKER_ANNOTATION_NAME = getenv("SENTENCE_RANKER_ANNOTATION_NAME") SENTENCE_RANKER_SERVICE_URL = getenv("SENTENCE_RANKER_SERVICE_URL") -SENTENCE_RANKER_TIMEOUT = int(getenv("SENTENCE_RANKER_TIMEOUT")) +SENTENCE_RANKER_TIMEOUT = float(getenv("SENTENCE_RANKER_TIMEOUT")) FILTER_TOXIC_OR_BADLISTED = int(getenv("FILTER_TOXIC_OR_BADLISTED")) N_UTTERANCES_CONTEXT = int(getenv("N_UTTERANCES_CONTEXT")) assert SENTENCE_RANKER_ANNOTATION_NAME or SENTENCE_RANKER_SERVICE_URL, logger.error( diff --git a/services/agent_services/service_configs/ai_faq_assistant/environment.yml b/services/agent_services/service_configs/ai_faq_assistant/environment.yml index cf3ef6ea90..e0e4417fd9 100644 --- a/services/agent_services/service_configs/ai_faq_assistant/environment.yml +++ b/services/agent_services/service_configs/ai_faq_assistant/environment.yml @@ -1,4 +1,6 @@ -WAIT_HOSTS: '' +WAIT_HOSTS: sentseg:8011, entity-storer:8089, entity-detection:8103, entity-linking:8075, + wiki-parser:8077, midas-classification:8090, midas-predictor:8121, factoid-qa:8071, + dff-program-y-skill:8008, ranking-and-intent-based-response-selector-ru:8082 WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 diff --git a/services/agent_services/service_configs/dream/service.yml b/services/agent_services/service_configs/dream/service.yml index 957a4117d2..134f58351f 100644 --- a/services/agent_services/service_configs/dream/service.yml +++ b/services/agent_services/service_configs/dream/service.yml @@ -2,7 +2,7 @@ name: agent endpoints: - respond compose: - command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream/pipeline_conf.json' + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream-MIDAS-for-AC/pipeline_conf.json' environment: WAIT_HOSTS: '' WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} diff --git a/services/agent_services/service_configs/dream_multimodal/environment.yml b/services/agent_services/service_configs/dream_multimodal/environment.yml index cf3ef6ea90..a4edacb058 100644 --- a/services/agent_services/service_configs/dream_multimodal/environment.yml +++ b/services/agent_services/service_configs/dream_multimodal/environment.yml @@ -1,5 +1,8 @@ -WAIT_HOSTS: '' -WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} +WAIT_HOSTS: sentseg:8011, badlisted-words:8018, intent-catcher:8014, + fromage:8069, dff-program-y-skill:8008, dff-intent-responder-skill:8012, + dff-fromage-image-skill:8070, ranking-based-response-selector:8002, dialogpt:8125, + sentence-ranker:8128 +WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1200} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 diff --git a/services/agent_services/service_configs/dream_ranking_and_midas_based_dm/environment.yml b/services/agent_services/service_configs/dream_ranking_and_midas_based_dm/environment.yml new file mode 100644 index 0000000000..ec5b8be2ba --- /dev/null +++ b/services/agent_services/service_configs/dream_ranking_and_midas_based_dm/environment.yml @@ -0,0 +1,7 @@ +WAIT_HOSTS: '' +WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} +HIGH_PRIORITY_INTENTS: 1 +RESTRICTION_FOR_SENSITIVE_CASE: 1 +ALWAYS_TURN_ON_ALL_SKILLS: 0 +LANGUAGE: EN +FALLBACK_FILE: fallbacks_dream_en.json diff --git a/services/agent_services/service_configs/dream_ranking_and_midas_based_dm/service.yml b/services/agent_services/service_configs/dream_ranking_and_midas_based_dm/service.yml new file mode 100644 index 0000000000..66c5b2e7ee --- /dev/null +++ b/services/agent_services/service_configs/dream_ranking_and_midas_based_dm/service.yml @@ -0,0 +1,18 @@ +name: agent +endpoints: +- respond +compose: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json' + environment: + WAIT_HOSTS: '' + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: EN + FALLBACK_FILE: fallbacks_dream_en.json + volumes: + - .:/dp-agent + ports: + - 4242:4242 +proxy: null diff --git a/services/fromage/Dockerfile b/services/fromage/Dockerfile new file mode 100644 index 0000000000..d2ba082f31 --- /dev/null +++ b/services/fromage/Dockerfile @@ -0,0 +1,36 @@ +# syntax=docker/dockerfile:experimental + +FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime + +WORKDIR /src + +ARG PRETRAINED_MODEL_FNAME +ENV PRETRAINED_MODEL_FNAME ${PRETRAINED_MODEL_FNAME} +ARG CONFIG_NAME +ENV CONFIG_NAME ${CONFIG_NAME} +ARG SERVICE_PORT +ENV SERVICE_PORT ${SERVICE_PORT} +ARG RET_SCALE_FACTOR +ENV RET_SCALE_FACTOR ${RET_SCALE_FACTOR} + + +ENV PYTHONPATH "/src/fromage:/fromage:$PYTHONPATH" + +COPY ./services/fromage/requirements.txt /src/requirements.txt +RUN pip install -r /src/requirements.txt + +RUN apt-get update && apt-get install git -y +RUN pip install gdown==4.7.1 + +RUN mkdir /fromage && \ + git clone https://github.com/ciwwwnd/fromage.git /fromage + +RUN mkdir -p /services/fromage/fromage_model +RUN gdown 1wMojZNqEwApNlsCZVvSgQVtZLgbeLoKi -O /services/fromage/fromage_model/cc3m_embeddings.pkl +RUN gdown 1qyDiUw6uMA4nijLaNpr3J-2pigdIANYE -O /services/fromage/fromage_model/model_args.json +RUN gdown 1oG_fWDje3M6XBoU2GtrOlrqaffJEhxyN -O /services/fromage/fromage_model/pretrained_ckpt.pth.tar + +COPY ./services/fromage/ /src/ +COPY ./common/ ./common/ + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=1200 diff --git a/services/fromage/README.md b/services/fromage/README.md new file mode 100644 index 0000000000..b0007ab923 --- /dev/null +++ b/services/fromage/README.md @@ -0,0 +1,16 @@ +# FROMAGe Service +**FROMAGe** is a service that is used to process an input image and respond to the user's questions accordingly. It is based on the [FROMAGe](https://github.com/kohjingyu/fromage/tree/main) model from [Grounding Language Models to Images for Multimodal Inputs and Outputs](https://arxiv.org/abs/2301.13823). + +GPU RAM 20 GB, RAM 45 GB. + +## Running server + +```sh +sudo AGENT_PORT=4242 docker-compose -f docker-compose.yml -f assistant_dists/dream_multimodal/docker-compose.override.yml -f assistant_dists/dream_multimodal/dev.yml -f assistant_dists/dream_multimodal/test.yml up --build fromage +``` + +## Testing + +```sh +./test.sh +``` diff --git a/services/fromage/requirements.txt b/services/fromage/requirements.txt new file mode 100644 index 0000000000..ffee96d138 --- /dev/null +++ b/services/fromage/requirements.txt @@ -0,0 +1,47 @@ +flask==1.1.1 +itsdangerous==2.0.1 +fromage==1.1.0 +gunicorn==19.9.0 +requests==2.22.0 +sentry-sdk[flask]==0.14.1 +healthcheck==1.3.3 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +attrs==22.2.0 +certifi==2022.12.7 +charset-normalizer==3.0.1 +contourpy==1.0.6 +cycler==0.11.0 +einops==0.4.1 +exceptiongroup==1.1.0 +filelock==3.9.0 +fonttools==4.38.0 +huggingface-hub==0.12.0 +idna==2.8 +iniconfig==2.0.0 +kiwisolver==1.4.4 +matplotlib==3.5.3 +numpy==1.21.6 +packaging==23.0 +pandas==1.3.5 +Pillow==9.4.0 +pluggy==1.0.0 +pyparsing==3.0.9 +pytest==7.2.1 +python-dateutil==2.8.2 +regex==2022.10.31 +six==1.16.0 +tensorboard==2.11.2 +tensorboard-data-server==0.6.0 +tensorboard-plugin-wit==1.8.1 +tokenizers==0.12.1 +tomli==2.0.1 +torch==1.13.1 +torchaudio==0.11.0 +torchmetrics==0.9.3 +torchvision==0.12.0 +tqdm==4.64.1 +transformers==4.21.3 +typing_extensions==4.4.0 +urllib3==1.21.1 +gitpython==3.1.31 diff --git a/services/fromage/server.py b/services/fromage/server.py new file mode 100644 index 0000000000..c06fd2f529 --- /dev/null +++ b/services/fromage/server.py @@ -0,0 +1,77 @@ +import logging +import os +import time +from fromage import models +from fromage import utils +import torch +import sentry_sdk +from flask import Flask, request, jsonify +from sentry_sdk.integrations.flask import FlaskIntegration + +sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"), integrations=[FlaskIntegration()]) + +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + +FILE_SERVER_URL = os.getenv("FILE_SERVER_URL") +RET_SCALE_FACTOR = int(os.environ.get("RET_SCALE_FACTOR")) + + +try: + model_dir = "/services/fromage/fromage_model" + model = models.load_fromage(model_dir) + + if torch.cuda.is_available(): + logger.info("fromage is set to run on cuda") + + logger.info("fromage is ready") +except Exception as e: + sentry_sdk.capture_exception(e) + logger.exception(e) + raise e + +app = Flask(__name__) +logging.getLogger("werkzeug").setLevel("WARNING") + + +def generate_responses(image_path, prompt): + inp_image = [utils.get_image_from_url(image_path)] + if prompt == "": + prompt = ["What is the image?"] + elif isinstance(prompt, str): + prompt = [prompt] + + text = "" + for p in prompt: + text += f"Q: {p}\nA:" + model_prompt = inp_image + [text] + model_outputs = model.generate_for_images_and_texts( + model_prompt, num_words=32, ret_scale_factor=RET_SCALE_FACTOR, max_num_rets=0 + ) + text += " ".join([s for s in model_outputs if isinstance(s, str)]) + "\n" + return model_outputs + + +@app.route("/respond", methods=["POST"]) +def respond(): + st_time = time.time() + image_paths = request.json.get("image_paths") + sentences = request.json.get("sentences") + + frmg_answers = [] + for image_path, sentence in zip(image_paths, sentences): + if image_path: + try: + outputs = generate_responses(image_path, sentence) + frmg_answers += outputs + except Exception as exc: + logger.exception(exc) + sentry_sdk.capture_exception(exc) + frmg_answers += [[""]] + else: + frmg_answers += [[""]] + + total_time = time.time() - st_time + logger.info(f"fromage results: {frmg_answers}") + logger.info(f"fromage exec time: {total_time:.3f}s") + return jsonify(frmg_answers) diff --git a/services/fromage/service_configs/fromage/environment.yml b/services/fromage/service_configs/fromage/environment.yml new file mode 100644 index 0000000000..f715424aaa --- /dev/null +++ b/services/fromage/service_configs/fromage/environment.yml @@ -0,0 +1,4 @@ +SERVICE_PORT: 8069 +SERVICE_NAME: fromage +FLASK_APP: server +RET_SCALE_FACTOR: 0 diff --git a/services/fromage/service_configs/fromage/service.yml b/services/fromage/service_configs/fromage/service.yml new file mode 100644 index 0000000000..ff1dab6794 --- /dev/null +++ b/services/fromage/service_configs/fromage/service.yml @@ -0,0 +1,28 @@ +name: fromage +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8069 + SERVICE_NAME: fromage + FLASK_APP: server + RET_SCALE_FACTOR: 0 + context: . + dockerfile: ./services/fromage/Dockerfile + command: flask run -h 0.0.0.0 -p 8069 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 45G + reservations: + memory: 45G + volumes: + - ./services/fromage:/src + - ./common:/src/common + ports: + - 8069:8069 diff --git a/services/fromage/test.py b/services/fromage/test.py new file mode 100644 index 0000000000..1812e80adb --- /dev/null +++ b/services/fromage/test.py @@ -0,0 +1,19 @@ +import requests + + +def test_respond(): + url = "http://0.0.0.0:8069/respond" + + image_paths = ["https://s0.rbk.ru/v6_top_pics/media/img/7/26/346832135841267.jpg"] + sentences = ["What is the make of the car?"] + request_data = {"image_paths": image_paths, "sentences": sentences} + result = requests.post(url, json=request_data).json() + print(result) + + obligatory_word = "SUV" + assert obligatory_word in result[0], f"Expected the word '{obligatory_word}' to present in caption" + print("\n", "Success!!!") + + +if __name__ == "__main__": + test_respond() diff --git a/services/fromage/test.sh b/services/fromage/test.sh new file mode 100755 index 0000000000..468a5a38fc --- /dev/null +++ b/services/fromage/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python test.py \ No newline at end of file diff --git a/services/image_captioning/Dockerfile b/services/image_captioning/Dockerfile index 51ee32cc45..c23b7af539 100644 --- a/services/image_captioning/Dockerfile +++ b/services/image_captioning/Dockerfile @@ -18,7 +18,8 @@ RUN conda remove PyYAML WORKDIR /ofa -RUN git clone https://github.com/dariamitciuk/OFA.git && \ +RUN git clone https://github.com/dariamitciuk/OFA.git /ofa && \ + cd /ofa && \ pip install -r requirements.txt && \ git checkout 153048138044edcbe0b099463810a971a7bf0057 diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py index 1a207f6527..949d060712 100644 --- a/services/openai_api_lm/server.py +++ b/services/openai_api_lm/server.py @@ -55,7 +55,9 @@ def generate_responses(context, openai_api_key, openai_org, prompt, generation_p for uttr_id, uttr in enumerate(context) ] logger.info(f"context inside generate_responses seen as: {messages}") - response = openai.ChatCompletion.create(model=PRETRAINED_MODEL_NAME_OR_PATH, messages=messages) + response = openai.ChatCompletion.create( + model=PRETRAINED_MODEL_NAME_OR_PATH, messages=messages, **generation_params + ) else: dialog_context = "" if prompt: diff --git a/services/robot_command_sender/Dockerfile b/services/robot_command_sender/Dockerfile index 5cc92da089..1749d0c8d6 100644 --- a/services/robot_command_sender/Dockerfile +++ b/services/robot_command_sender/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.9 -ARG ROS_FSM_SERVER -ENV ROS_FSM_SERVER ${ROS_FSM_SERVER} +ARG ROS_FLASK_SERVER +ENV ROS_FLASK_SERVER ${ROS_FLASK_SERVER} ARG SERVICE_PORT ENV SERVICE_PORT ${SERVICE_PORT} diff --git a/services/robot_command_sender/server.py b/services/robot_command_sender/server.py index ff377528a2..1706a5e8de 100644 --- a/services/robot_command_sender/server.py +++ b/services/robot_command_sender/server.py @@ -14,8 +14,8 @@ app = Flask(__name__) -ROS_FSM_SERVER = getenv("ROS_FSM_SERVER") -SKILL_NAMES_SENDING_COMMANDS = ["intent_responder", "dff_intent_responder_skill"] +ROS_FLASK_SERVER = getenv("ROS_FLASK_SERVER") +SKILL_NAMES_SENDING_COMMANDS = ["intent_responder", "dff_intent_responder_skill", "dff_command_selector_skill"] @app.route("/send", methods=["POST"]) @@ -35,7 +35,7 @@ def respond(): logger.info(f"robot_command_sender: command `{command}` is being sent to robot") result = False try: - result = send_robot_command_to_perform(command, ROS_FSM_SERVER, dialog_id) + result = send_robot_command_to_perform(command, ROS_FLASK_SERVER, dialog_id) except Exception as e: sentry_sdk.capture_exception(e) logger.exception(e) diff --git a/services/robot_command_sender/service_configs/robot-command-sender/environment.yml b/services/robot_command_sender/service_configs/robot-command-sender/environment.yml index 10686656e4..40ea332a72 100644 --- a/services/robot_command_sender/service_configs/robot-command-sender/environment.yml +++ b/services/robot_command_sender/service_configs/robot-command-sender/environment.yml @@ -1,4 +1,4 @@ SERVICE_PORT: 8035 SERVICE_NAME: robot_command_sender -ROS_FSM_SERVER: http://robot-fake-server:8038 +ROS_FLASK_SERVER: http://ros-flask-server:6000 FLASK_APP: server diff --git a/services/robot_command_sender/service_configs/robot-command-sender/service.yml b/services/robot_command_sender/service_configs/robot-command-sender/service.yml index 33c6163003..2e3c86325e 100644 --- a/services/robot_command_sender/service_configs/robot-command-sender/service.yml +++ b/services/robot_command_sender/service_configs/robot-command-sender/service.yml @@ -8,7 +8,7 @@ compose: args: SERVICE_PORT: 8035 SERVICE_NAME: robot_command_sender - ROS_FSM_SERVER: http://robot-fake-server:8038 + ROS_FLASK_SERVER: http://ros-flask-server:6000 FLASK_APP: server context: . dockerfile: ./services/robot_command_sender/Dockerfile diff --git a/services/robot_notifications/Dockerfile b/services/robot_notifications/Dockerfile index 46129ccb02..2d1299749c 100644 --- a/services/robot_notifications/Dockerfile +++ b/services/robot_notifications/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.9 -ARG ROS_FSM_SERVER -ENV ROS_FSM_SERVER ${ROS_FSM_SERVER} +ARG ROS_FLASK_SERVER +ENV ROS_FLASK_SERVER ${ROS_FLASK_SERVER} ARG SERVICE_PORT ENV SERVICE_PORT ${SERVICE_PORT} diff --git a/services/robot_notifications/server.py b/services/robot_notifications/server.py index 6b65c86325..3be5dfc65d 100644 --- a/services/robot_notifications/server.py +++ b/services/robot_notifications/server.py @@ -15,7 +15,7 @@ app = Flask(__name__) -ROS_FSM_SERVER = getenv("ROS_FSM_SERVER") +ROS_FLASK_SERVER = getenv("ROS_FLASK_SERVER") SERVICE_PORT = int(getenv("SERVICE_PORT")) @@ -32,7 +32,7 @@ def respond(): logger.info(f"robot_notifications: found command `{command}` sent to robot") result = False try: - result = check_if_command_performed(command, ROS_FSM_SERVER, dialog.get("dialog_id", "unknown")) + result = check_if_command_performed(command, ROS_FLASK_SERVER, dialog.get("dialog_id", "unknown")) except Exception as e: sentry_sdk.capture_exception(e) logger.exception(e) diff --git a/services/ros_flask_server/Dockerfile b/services/ros_flask_server/Dockerfile new file mode 100644 index 0000000000..fbfb77ef56 --- /dev/null +++ b/services/ros_flask_server/Dockerfile @@ -0,0 +1,40 @@ +FROM ubuntu:20.04 + +ARG SERVICE_PORT +ARG USERNAME=dkr +ARG USER_UID=1000 +ARG USER_GID=$USER_UID + +ENV SERVICE_PORT ${SERVICE_PORT} + +RUN apt update + +RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata keyboard-configuration + +RUN apt install -y gnupg curl ca-certificates + +COPY ./services/ros_flask_server/* /src/ +COPY ./common/ /src/common/ + +SHELL ["/bin/bash", "-c"] + +RUN apt update +RUN apt install -y lsb-release build-essential python3 gcc g++ make cmake git python-is-python3 apt-utils nginx + +RUN apt install -y python3-pip +RUN pip install -r /src/requirements.txt + +RUN apt install -y ufw + +RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' +RUN curl -s https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc | apt-key add - +RUN apt update +RUN apt install -y ros-noetic-desktop + +RUN apt install -y python3-rosdep python3-rosinstall python3-rosinstall-generator python3-wstool python3-rosdep python3-rosinstall python3-rosinstall-generator python3-wstool +RUN rosdep init +RUN rosdep update + +WORKDIR /src/ + +CMD rm -rf /src/catkin_ws; mkdir -p /src/catkin_ws/src && cd catkin_ws && export PATH=/src/.local/bin:$PATH && source /opt/ros/noetic/setup.bash && catkin_make && source devel/setup.bash && cd src && catkin_create_pkg ros_dream std_msgs rospy roscpp && cd /src/catkin_ws && mkdir /src/catkin_ws/src/ros_dream/scripts && mv /src/listener.py /src/catkin_ws/src/ros_dream/scripts/listener.py && catkin_make && cd /src && source /src/catkin_ws/devel/setup.bash && (trap 'kill 0' SIGINT; roscore & gunicorn -b 0.0.0.0:$SERVICE_PORT --workers=1 server:app) \ No newline at end of file diff --git a/services/ros_flask_server/README.md b/services/ros_flask_server/README.md new file mode 100644 index 0000000000..826d66d5a2 --- /dev/null +++ b/services/ros_flask_server/README.md @@ -0,0 +1,43 @@ +# ros_flask_server service + +This is an intermediate node in the chain of dream_embodied: + +### Architecture + +[Architecture](../../assistant_dists/dream_embodied/architecture.png) + +### Description + +This service is basically a ROS-Flask server. + +The Flask part of this service consists of endpoints like `/set_commands`, `/is_command_valid`, `/perform_command` et cetera. Some of these endpoints are meant to be accessed from client-side (e.g. `/set_commands`), and some of them are only meant to be accessed from Dream-side (e.g. `/perform_command`). +These endpoints create a two-way API for Dream and connector so that they can interact with each other in a standardized and controlled easily-modifiable manner. + +The ROS part of this server is for example: +If Dream wants the connector to perform a command by sending a request to `/perform_command` endpoint of this service, the command is first being published to a ROS-node where it can be processed in a ROS-compliant way to make interacting with real ROS-controlled and compliant robots and apps possible. + +### Endpoints + +##### `/set_commands` + +This endpoint should only be accessed from client-side. It is used to set a valid command list for the server. Only the commands set using this endpoint are to be executed. + +##### `/is_command_valid` + +This endpoint is used to check whether a command is valid. It checks if the supplied string is in a list of valid commands. + +##### `/perform_command` + +This endpoint should only be accessed from Dream-side. When a request is sent to this endpoint, the supplied command name is appended to command queue if the command name is valid. + +##### `/receive_command` + +This endpoint should only be accessed from client-side. Every few seconds client sends a request to this endpoint to receive a command if there is one in queue to be executed. + +##### `/is_command_performed` + +This endpoint should only be accessed from Dream-side. When a request is sent to this endpoint, it simply checks whether there is a command currently being executed. If there is none, we infer that the last command is already performed. + +##### `/command_is_performed` + +This endpoint should only be accessed from client-side. When the client finishes performing a command, they send a request to this endpoint to let Dream know that the client may now receive new commands. diff --git a/services/ros_flask_server/launch.sh b/services/ros_flask_server/launch.sh new file mode 100644 index 0000000000..114530fbea --- /dev/null +++ b/services/ros_flask_server/launch.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +roscore & + +gunicorn --workers=1 server:app & + +wait -n + +exit $? \ No newline at end of file diff --git a/services/ros_flask_server/listener.py b/services/ros_flask_server/listener.py new file mode 100644 index 0000000000..9dc684df1e --- /dev/null +++ b/services/ros_flask_server/listener.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +import rospy +from std_msgs.msg import String + + +def callback(data): + rospy.loginfo(rospy.get_caller_id() + "Received: %s", data.data) + # TODO: implement logic + + +def listener(): + rospy.init_node("listener", anonymous=True) + + rospy.Subscriber("talker", String, callback) + + rospy.spin() + + +if __name__ == "__main__": + listener() diff --git a/services/ros_flask_server/requirements.txt b/services/ros_flask_server/requirements.txt new file mode 100644 index 0000000000..9106b1faa3 --- /dev/null +++ b/services/ros_flask_server/requirements.txt @@ -0,0 +1,8 @@ +flask==1.1.1 +itsdangerous==2.0.1 +gunicorn==19.9.0 +requests==2.22.0 +sentry-sdk==0.12.3 +click==7.1.2 +jinja2<=3.0.3 +Werkzeug<=2.0.3 \ No newline at end of file diff --git a/services/ros_flask_server/server.py b/services/ros_flask_server/server.py new file mode 100644 index 0000000000..4d0bb5eb0a --- /dev/null +++ b/services/ros_flask_server/server.py @@ -0,0 +1,142 @@ +import threading +import rospy + +from std_msgs.msg import String +from flask import Flask, request +from flask import jsonify + +import logging +import time +from os import getenv + +import sentry_sdk + + +sentry_sdk.init(getenv("SENTRY_DSN")) + +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + +app = Flask(__name__) + +talker = rospy.Publisher("talker", String, queue_size=1) + +threading.Thread(target=lambda: rospy.init_node("listener", disable_signals=True)).start() + +VALID_COMMANDS = ["test_command"] +COMMAND_QUEUE = [] +EXECUTING_COMMAND = None + + +@app.route("/ping", methods=["POST"]) +def ping(): + return "pong" + + +@app.route("/set_commands", methods=["POST"]) # this endpoint should not be accessed from within dream +def respond_set_commands(): + global VALID_COMMANDS + + st_time = time.perf_counter() + VALID_COMMANDS = list(map(lambda i: i.lower(), request.json.get("commands", []))) + if not VALID_COMMANDS: + logger.info("embodied-server user did not send valid commands list") + logger.info(f"embodied-server `VALID_COMMANDS` set: {VALID_COMMANDS}") + + total_time = time.perf_counter() - st_time + + logger.info(f"embodied-server `is_command_valid` exec time: {total_time:.3f}s") + + return {"result": bool(VALID_COMMANDS)} + + +@app.route("/is_command_valid", methods=["POST"]) +def respond_is_command_valid(): + st_time = time.perf_counter() + + command = request.json.get("command", None) + results = {"result": any(item in command for item in VALID_COMMANDS)} + logger.info(f"embodied-server `is_command_valid` results: {results}") + + total_time = time.perf_counter() - st_time + + logger.info(f"embodied-server `is_command_valid` exec time: {total_time:.3f}s") + + return jsonify(results) + + +@app.route("/perform_command", methods=["POST"]) +def respond_perform_command(): + st_time = time.perf_counter() + + command = request.json.get("command", None) + cmd_valid = command in VALID_COMMANDS + logger.info(f"ros-flask-server received command: {command}, valid? -{cmd_valid}") + if cmd_valid: + logger.info("Sending command to ROS...") + try: + talker.publish(command) + logger.info("Successfully returned from ROS!") + COMMAND_QUEUE.append(command) + except Exception as e: + logger.info(f"Error inside ROS: {e}") + results = {"result": cmd_valid} + logger.info(f"embodied-server `perform_command` {command} appended to queue?: {results}") + + total_time = time.perf_counter() - st_time + + logger.info(f"embodied-server `perform_command` exec time: {total_time:.3f}s") + + return jsonify(results) + + +@app.route("/receive_command", methods=["POST"]) # this endpoint should not be accessed from within dream +def respond_receive_command(): + global EXECUTING_COMMAND + + st_time = time.perf_counter() + + command = COMMAND_QUEUE.pop(0) if COMMAND_QUEUE else None + results = {"command": command} + logger.info(f"embodied-server `receive_command` results: {results}") + + total_time = time.perf_counter() - st_time + + logger.info(f"embodied-server `receive_command` exec time: {total_time:.3f}s") + + return jsonify(results) + + +@app.route("/is_command_performed", methods=["POST"]) +def respond_is_command_performed(): + st_time = time.perf_counter() + + results = {"result": EXECUTING_COMMAND} + logger.info(f"embodied-server `is_command_performed` results: {results}") + + total_time = time.perf_counter() - st_time + + logger.info(f"embodied-server `is_command_performed` exec time: {total_time:.3f}s") + + return jsonify(results) + + +@app.route("/command_is_performed", methods=["POST"]) # this endpoint should not be accessed from within dream +def respond_command_is_performed(): + global EXECUTING_COMMAND + + st_time = time.perf_counter() + + results = {"result": True} + logger.info(f"embodied-server `command_is_performed` results: {results}") + EXECUTING_COMMAND = None + + total_time = time.perf_counter() - st_time + + logger.info(f"embodied-server `command_is_performed` exec time: {total_time:.3f}s") + + return jsonify(results) + + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=6000, debug=True) diff --git a/services/ros_flask_server/service_configs/ros_flask_server/environment.yml b/services/ros_flask_server/service_configs/ros_flask_server/environment.yml new file mode 100644 index 0000000000..65ddcd9396 --- /dev/null +++ b/services/ros_flask_server/service_configs/ros_flask_server/environment.yml @@ -0,0 +1,2 @@ +SERVICE_PORT: 6000 +SERVICE_NAME: ros_flask_server \ No newline at end of file diff --git a/services/ros_flask_server/service_configs/ros_flask_server/service.yml b/services/ros_flask_server/service_configs/ros_flask_server/service.yml new file mode 100644 index 0000000000..698a298ca6 --- /dev/null +++ b/services/ros_flask_server/service_configs/ros_flask_server/service.yml @@ -0,0 +1,27 @@ +name: ros-flask-server +endpoints: +- is_command_valid +- perform_command +- is_command_performed +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 6000 + SERVICE_NAME: ros_flask_server + context: . + dockerfile: ./services/ros_flask_server/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:6000 --timeout 600 + environment: null + deploy: + resources: + limits: + memory: 250M + reservations: + memory: 250M + volumes: + - ./common:/src/common + - ./services/ros_flask_server:/src + ports: + - 6000:6000 \ No newline at end of file diff --git a/services/terminusdb/test.py b/services/terminusdb/test.py index 1caa071545..66c2b48d87 100644 --- a/services/terminusdb/test.py +++ b/services/terminusdb/test.py @@ -1,9 +1,5 @@ -from dotenv import load_dotenv from deeppavlov_kg import TerminusdbKnowledgeGraph -load_dotenv("./.env") -load_dotenv("./.env_secret") - def main(): TERMINUSDB_SERVER_URL = "http://0.0.0.0:6363" diff --git a/services/terminusdb/test.sh b/services/terminusdb/test.sh new file mode 100644 index 0000000000..cf55721bd3 --- /dev/null +++ b/services/terminusdb/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash + + +python test.py diff --git a/skill_selectors/rule_based_selector/connector.py b/skill_selectors/rule_based_selector/connector.py index 5d7e166297..1b1308e49d 100644 --- a/skill_selectors/rule_based_selector/connector.py +++ b/skill_selectors/rule_based_selector/connector.py @@ -12,7 +12,7 @@ from common.link import get_linked_to_skills, get_previously_active_skill from common.movies import extract_movies_names_from_annotations from common.response_selection import UNPREDICTABLE_SKILLS -from common.robot import command_intents +from common.robot import command_intents, embodied_intents from common.sensitive import is_sensitive_topic_and_request from common.skills_turn_on_topics_and_patterns import turn_on_skills from common.universal_templates import ( @@ -84,6 +84,7 @@ async def send(self, payload: Dict, callback: Callable): [k for k in intent_catcher_intents if k in high_priority_intents["dff_intent_responder_skill"]] ) low_priority_intent_detected = any([k for k in intent_catcher_intents if k in low_priority_intents]) + embodied_cmd_detected = any([k for k in intent_catcher_intents if k in embodied_intents]) command_detected = any([k for k in intent_catcher_intents if k in command_intents]) detected_topics = set(get_topics(user_uttr, which="all")) @@ -97,6 +98,9 @@ async def send(self, payload: Dict, callback: Callable): dialog_len = len(dialog["human_utterances"]) if user_uttr.get("attributes", {}).get("image") is not None: skills_for_uttr.append("dff_image_skill") + if any(["image" in user_uttr.get("attributes", {}) for user_uttr in dialog["human_utterances"][-5:]]): + skills_for_uttr.append("dff_fromage_image_skill") + exit_cond = "exit" in intent_catcher_intents and ( dialog_len == 1 or (dialog_len == 2 and len(user_uttr_text.split()) > 3) ) @@ -126,9 +130,8 @@ async def send(self, payload: Dict, callback: Callable): skills_for_uttr.append("dummy_skill") # process intent with corresponding IntentResponder skills_for_uttr.append("dff_intent_responder_skill") - elif command_detected: + elif embodied_cmd_detected or command_detected: skills_for_uttr.append("dummy_skill") - # process intents with Command Selector skills_for_uttr.append("dff_command_selector_skill") elif is_sensitive_topic_and_request(user_uttr) and RESTRICTION_FOR_SENSITIVE_CASE: # process user utterance with sensitive content, "safe mode" @@ -234,6 +237,8 @@ async def send(self, payload: Dict, callback: Callable): bot_uttr.get("text", ""), available_skills=[ "dff_art_skill", + "dff_user_kg_skill", + "dff_travel_italy_skill", "dff_movie_skill", "dff_book_skill", "news_api_skill", diff --git a/skills/dff_command_selector_skill/Dockerfile b/skills/dff_command_selector_skill/Dockerfile index bd8e0bb535..a0083fee0e 100644 --- a/skills/dff_command_selector_skill/Dockerfile +++ b/skills/dff_command_selector_skill/Dockerfile @@ -15,6 +15,9 @@ ENV LANGUAGE ${LANGUAGE} ARG SERVICE_NAME ENV SERVICE_NAME ${SERVICE_NAME} +RUN apt update +RUN apt install -y curl + COPY skills/${SERVICE_NAME}/requirements.txt . RUN pip install -r requirements.txt && \ python -m nltk.downloader wordnet @@ -25,10 +28,13 @@ COPY ./common/ ./common/ ARG SERVICE_PORT ENV SERVICE_PORT ${SERVICE_PORT} -ARG ROS_FSM_SERVER -ENV ROS_FSM_SERVER ${ROS_FSM_SERVER} +ARG FAKE +ENV FAKE ${FAKE} + +ARG ROS_FLASK_SERVER +ENV ROS_FLASK_SERVER ${ROS_FLASK_SERVER} -# wait for a server answer ( INTERVAL + TIMEOUT ) * RETRIES seconds after that change stutus to unhealthy +# wait for a server answer ( INTERVAL + TIMEOUT ) * RETRIES seconds after that change status to unhealthy HEALTHCHECK --interval=5s --timeout=5s --retries=3 CMD curl --fail 127.0.0.1:${SERVICE_PORT}/healthcheck || exit 1 diff --git a/skills/dff_command_selector_skill/scenario/response.py b/skills/dff_command_selector_skill/scenario/response.py index afd2abdae5..6048ccd553 100644 --- a/skills/dff_command_selector_skill/scenario/response.py +++ b/skills/dff_command_selector_skill/scenario/response.py @@ -22,7 +22,7 @@ def command_selector_response(ctx: Context, actor: Actor, *args, **kwargs) -> st dialog = int_ctx.get_dialog(ctx, actor) dialog["seen"] = dialog["called_intents"][intention] funcs = response_funcs.get_respond_funcs()[intention] - response = funcs(ctx, actor, intention) + response = funcs(ctx, actor) if not isinstance(response, str): conf = deepcopy(response[1]) human_attr = deepcopy(response[2]) diff --git a/skills/dff_command_selector_skill/scenario/response_funcs.py b/skills/dff_command_selector_skill/scenario/response_funcs.py index 4c0a8155f4..01bf8f9382 100644 --- a/skills/dff_command_selector_skill/scenario/response_funcs.py +++ b/skills/dff_command_selector_skill/scenario/response_funcs.py @@ -10,7 +10,7 @@ LANGUAGE = getenv("LANGUAGE", "EN") -ROS_FSM_SERVER = getenv("ROS_FSM_SERVER") +ROS_FLASK_SERVER = getenv("ROS_FLASK_SERVER") logging.basicConfig(format="%(asctime)s - %(pathname)s - %(lineno)d - %(levelname)s - %(message)s", level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -18,6 +18,7 @@ def get_respond_funcs(): return { + "test_command": test_command_respond, "track_object": track_object_respond, "turn_around": turn_around_respond, "move_forward": move_forward_respond, @@ -31,7 +32,17 @@ def get_human_utterances(ctx: Context, actor: Actor) -> list: return {} if ctx.validation else ctx.misc["agent"]["dialog"]["human_utterances"] -def track_object_respond(ctx: Context, actor: Actor, intention: str): +def test_command_respond(ctx: Context, actor: Actor): + command = "test_command" + response = "Success" + + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + return response, 1.0, {}, {}, {"command_to_perform": command} + else: + return "" + + +def track_object_respond(ctx: Context, actor: Actor): utt = int_ctx.get_last_human_utterance(ctx, actor) entities = get_entities(utt, only_named=False, with_labels=False, return_lemmas=True) if len(entities) == 1: @@ -44,13 +55,13 @@ def track_object_respond(ctx: Context, actor: Actor, intention: str): else: response = "I did not get tracked object. Please repeat the command." - if check_if_valid_robot_command(command, ROS_FSM_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): return response, 1.0, {}, {}, {"command_to_perform": command} else: return "" -def turn_around_respond(ctx: Context, actor: Actor, intention: str): +def turn_around_respond(ctx: Context, actor: Actor): utt = int_ctx.get_last_human_utterance(ctx, actor) degree = re.findall(r"[0-9]+", utt["text"]) if "против" in utt["text"] or re.search(r"counter[- ]?clock-?wise", utt["text"]): @@ -80,13 +91,13 @@ def turn_around_respond(ctx: Context, actor: Actor, intention: str): else: response = "Turning around clockwise." - if check_if_valid_robot_command(command, ROS_FSM_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): return response, 1.0, {}, {}, {"command_to_perform": command} else: return "" -def move_forward_respond(ctx: Context, actor: Actor, intention: str): +def move_forward_respond(ctx: Context, actor: Actor): utt = int_ctx.get_last_human_utterance(ctx, actor) dist = re.findall(r"[0-9]+", utt["text"]) if len(dist) == 1: @@ -102,13 +113,13 @@ def move_forward_respond(ctx: Context, actor: Actor, intention: str): else: response = "Moving forward." - if check_if_valid_robot_command(command, ROS_FSM_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): return response, 1.0, {}, {}, {"command_to_perform": command} else: return "" -def move_backward_respond(ctx: Context, actor: Actor, intention: str): +def move_backward_respond(ctx: Context, actor: Actor): utt = int_ctx.get_last_human_utterance(ctx, actor) dist = re.findall(r"[0-9]+", utt["text"]) if len(dist) == 1: @@ -124,20 +135,20 @@ def move_backward_respond(ctx: Context, actor: Actor, intention: str): else: response = "Moving backward." - if check_if_valid_robot_command(command, ROS_FSM_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): return response, 1.0, {}, {}, {"command_to_perform": command} else: return "" -def open_door_respond(ctx: Context, actor: Actor, intention: str): +def open_door_respond(ctx: Context, actor: Actor): command = "open_door" if LANGUAGE == "RU": response = "Открываю дверь" else: response = "Opening the door." - if check_if_valid_robot_command(command, ROS_FSM_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): return response, 1.0, {}, {}, {"command_to_perform": command} else: return "" @@ -147,7 +158,7 @@ def open_door_respond(ctx: Context, actor: Actor, intention: str): COMPILED_COORDS_PATTERN = re.compile(r"[-][0-9]+[ ,]+[-][0-9]+", re.IGNORECASE) -def move_to_point_respond(ctx: Context, actor: Actor, intention: str): +def move_to_point_respond(ctx: Context, actor: Actor): utt = int_ctx.get_last_human_utterance(ctx, actor) entities = get_entities(utt, only_named=False, with_labels=False, return_lemmas=True) coords = COMPILED_COORDS_PATTERN.search(utt["text"]) @@ -164,7 +175,7 @@ def move_to_point_respond(ctx: Context, actor: Actor, intention: str): else: response = "I did not get a target point. Please repeat the command." - if check_if_valid_robot_command(command, ROS_FSM_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): + if check_if_valid_robot_command(command, ROS_FLASK_SERVER, dialog_id=int_ctx.get_dialog_id(ctx, actor)): return response, 1.0, {}, {}, {"command_to_perform": command} else: return "" diff --git a/skills/dff_command_selector_skill/server.py b/skills/dff_command_selector_skill/server.py index e5337ee129..5947d98ff8 100644 --- a/skills/dff_command_selector_skill/server.py +++ b/skills/dff_command_selector_skill/server.py @@ -24,7 +24,7 @@ SERVICE_NAME = os.getenv("SERVICE_NAME") SERVICE_PORT = int(os.getenv("SERVICE_PORT")) RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) -ROS_FSM_SERVER = os.getenv("ROS_FSM_SERVER") +ROS_FLASK_SERVER = os.getenv("ROS_FLASK_SERVER") logging.basicConfig(format="%(asctime)s - %(pathname)s - %(lineno)d - %(levelname)s - %(message)s", level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -59,9 +59,9 @@ def handler(requested_data, random_seed=None): while True: - result = containers.is_container_running(ROS_FSM_SERVER) + result = containers.is_container_running(ROS_FLASK_SERVER) if result: - logger.info(f"GENERATIVE_SERVICE_URL: {ROS_FSM_SERVER} is ready") + logger.info(f"GENERATIVE_SERVICE_URL: {ROS_FLASK_SERVER} is ready") break else: time.sleep(5) diff --git a/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/environment.yml b/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/environment.yml index 355cc442fc..0048eb0625 100644 --- a/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/environment.yml +++ b/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/environment.yml @@ -1,4 +1,5 @@ SERVICE_PORT: 8148 SERVICE_NAME: dff_command_selector_skill LANGUAGE: EN -ROS_FSM_SERVER: http://robot-fake-server:8038 \ No newline at end of file +ROS_FLASK_SERVER: http://ros-flask-server:6000 +FAKE: false \ No newline at end of file diff --git a/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/service.yml b/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/service.yml index acc3526945..474e0956ec 100644 --- a/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/service.yml +++ b/skills/dff_command_selector_skill/service_configs/dff-command-selector-skill/service.yml @@ -9,7 +9,8 @@ compose: SERVICE_PORT: 8148 SERVICE_NAME: dff_command_selector_skill LANGUAGE: EN - ROS_FSM_SERVER: http://robot-fake-server:8038 + ROS_FLASK_SERVER: http://ros-flask-server:6000 + FAKE: false context: . dockerfile: ./skills/dff_command_selector_skill/Dockerfile command: gunicorn --workers=1 server:app -b 0.0.0.0:8148 --reload diff --git a/skills/dff_command_selector_skill/test_server.py b/skills/dff_command_selector_skill/test_server.py index b3aec3f667..0a40fb856e 100644 --- a/skills/dff_command_selector_skill/test_server.py +++ b/skills/dff_command_selector_skill/test_server.py @@ -9,6 +9,8 @@ URL = f"http://0.0.0.0:{SERVICE_PORT}/respond" LANGUAGE = os.getenv("LANGUAGE", "EN") +FAKE_SERVER = os.getenv("FAKE", True) + def handler(requested_data, random_seed): hypothesis = requests.post(URL, json={**requested_data, "random_seed": random_seed}).json() @@ -24,6 +26,9 @@ def run_test(handler): elif LANGUAGE == "EN" and "EN" not in test_name: # if russian language, skip english tests continue + if not FAKE_SERVER and "FAKE" in test_name: + # skip fake server tests if the server is real + continue hypothesis = handler(in_data[test_name], RANDOM_SEED) print(f"test name: {test_name}") diff --git a/skills/dff_command_selector_skill/tests/intent_move_backward_robot_EN_in.json b/skills/dff_command_selector_skill/tests/intent_move_backward_robot_FAKE_EN_in.json similarity index 99% rename from skills/dff_command_selector_skill/tests/intent_move_backward_robot_EN_in.json rename to skills/dff_command_selector_skill/tests/intent_move_backward_robot_FAKE_EN_in.json index 1fdef812ab..61dc8760c7 100644 --- a/skills/dff_command_selector_skill/tests/intent_move_backward_robot_EN_in.json +++ b/skills/dff_command_selector_skill/tests/intent_move_backward_robot_FAKE_EN_in.json @@ -8,7 +8,6 @@ { "text": "turn clockwise for 180 degrees.", "annotations": { - "spelling_preprocessing": "turn clockwise for 180 degrees", "badlisted_words": { "bad_words": false }, @@ -262,7 +261,6 @@ { "text": "move backward.", "annotations": { - "spelling_preprocessing": "move backward", "badlisted_words": { "bad_words": false }, diff --git a/skills/dff_command_selector_skill/tests/intent_move_backward_robot_EN_out.json b/skills/dff_command_selector_skill/tests/intent_move_backward_robot_FAKE_EN_out.json similarity index 100% rename from skills/dff_command_selector_skill/tests/intent_move_backward_robot_EN_out.json rename to skills/dff_command_selector_skill/tests/intent_move_backward_robot_FAKE_EN_out.json diff --git a/skills/dff_command_selector_skill/tests/intent_move_forward_robot_EN_in.json b/skills/dff_command_selector_skill/tests/intent_move_forward_robot_FAKE_EN_in.json similarity index 99% rename from skills/dff_command_selector_skill/tests/intent_move_forward_robot_EN_in.json rename to skills/dff_command_selector_skill/tests/intent_move_forward_robot_FAKE_EN_in.json index 69450d277d..df54915105 100644 --- a/skills/dff_command_selector_skill/tests/intent_move_forward_robot_EN_in.json +++ b/skills/dff_command_selector_skill/tests/intent_move_forward_robot_FAKE_EN_in.json @@ -8,7 +8,6 @@ { "text": "hi.", "annotations": { - "spelling_preprocessing": "hi", "badlisted_words": { "bad_words": false }, @@ -158,7 +157,6 @@ { "text": "move forward for 10 meters.", "annotations": { - "spelling_preprocessing": "move forward for 10 meters", "badlisted_words": { "bad_words": false }, diff --git a/skills/dff_command_selector_skill/tests/intent_move_forward_robot_EN_out.json b/skills/dff_command_selector_skill/tests/intent_move_forward_robot_FAKE_EN_out.json similarity index 100% rename from skills/dff_command_selector_skill/tests/intent_move_forward_robot_EN_out.json rename to skills/dff_command_selector_skill/tests/intent_move_forward_robot_FAKE_EN_out.json diff --git a/skills/dff_command_selector_skill/tests/intent_track_object_robot_EN_in.json b/skills/dff_command_selector_skill/tests/intent_track_object_robot_FAKE_EN_in.json similarity index 99% rename from skills/dff_command_selector_skill/tests/intent_track_object_robot_EN_in.json rename to skills/dff_command_selector_skill/tests/intent_track_object_robot_FAKE_EN_in.json index a0a011d0e8..a63ed9412d 100644 --- a/skills/dff_command_selector_skill/tests/intent_track_object_robot_EN_in.json +++ b/skills/dff_command_selector_skill/tests/intent_track_object_robot_FAKE_EN_in.json @@ -8,7 +8,6 @@ { "text": "move backward.", "annotations": { - "spelling_preprocessing": "move backward", "badlisted_words": { "bad_words": false }, @@ -258,7 +257,6 @@ { "text": "please track a car.", "annotations": { - "spelling_preprocessing": "please track a car", "badlisted_words": { "bad_words": false }, diff --git a/skills/dff_command_selector_skill/tests/intent_track_object_robot_EN_out.json b/skills/dff_command_selector_skill/tests/intent_track_object_robot_FAKE_EN_out.json similarity index 100% rename from skills/dff_command_selector_skill/tests/intent_track_object_robot_EN_out.json rename to skills/dff_command_selector_skill/tests/intent_track_object_robot_FAKE_EN_out.json diff --git a/skills/dff_command_selector_skill/tests/intent_turn_around_robot_EN_in.json b/skills/dff_command_selector_skill/tests/intent_turn_around_robot_FAKE_EN_in.json similarity index 99% rename from skills/dff_command_selector_skill/tests/intent_turn_around_robot_EN_in.json rename to skills/dff_command_selector_skill/tests/intent_turn_around_robot_FAKE_EN_in.json index 8f06a06ce0..bdc8104ac1 100644 --- a/skills/dff_command_selector_skill/tests/intent_turn_around_robot_EN_in.json +++ b/skills/dff_command_selector_skill/tests/intent_turn_around_robot_FAKE_EN_in.json @@ -8,7 +8,6 @@ { "text": "move forward for 10 meters.", "annotations": { - "spelling_preprocessing": "move forward for 10 meters", "badlisted_words": { "bad_words": false }, @@ -242,7 +241,6 @@ { "text": "turn clockwise for 180 degrees.", "annotations": { - "spelling_preprocessing": "turn clockwise for 180 degrees", "badlisted_words": { "bad_words": false }, diff --git a/skills/dff_command_selector_skill/tests/intent_turn_around_robot_EN_out.json b/skills/dff_command_selector_skill/tests/intent_turn_around_robot_FAKE_EN_out.json similarity index 100% rename from skills/dff_command_selector_skill/tests/intent_turn_around_robot_EN_out.json rename to skills/dff_command_selector_skill/tests/intent_turn_around_robot_FAKE_EN_out.json diff --git a/skills/dff_document_qa_llm_skill/scenario/response.py b/skills/dff_document_qa_llm_skill/scenario/response.py index b190eb2a02..58cf3033dd 100644 --- a/skills/dff_document_qa_llm_skill/scenario/response.py +++ b/skills/dff_document_qa_llm_skill/scenario/response.py @@ -26,10 +26,10 @@ assert GENERATIVE_SERVICE_URL, logger.error("Error: GENERATIVE_SERVICE_URL is not specified in env") assert DOCUMENT_PROMPT_FILE, logger.error("Error: DOCUMENT_PROMPT_FILE is not specified in env") -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT", 5)) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT", 5)) GENERATIVE_SERVICE_CONFIG = getenv("GENERATIVE_SERVICE_CONFIG") # add env!!! N_UTTERANCES_CONTEXT = int(getenv("N_UTTERANCES_CONTEXT", 3)) -FILE_SERVER_TIMEOUT = int(getenv("FILE_SERVER_TIMEOUT", 30)) +FILE_SERVER_TIMEOUT = float(getenv("FILE_SERVER_TIMEOUT", 30)) ENVVARS_TO_SEND = getenv("ENVVARS_TO_SEND", None) DEFAULT_SYSTEM_PROMPT = "Answer questions based on part of a text." ENVVARS_TO_SEND = [] if ENVVARS_TO_SEND is None else ENVVARS_TO_SEND.split(",") diff --git a/skills/dff_fromage_image_skill/Dockerfile b/skills/dff_fromage_image_skill/Dockerfile new file mode 100644 index 0000000000..998fc6c9f8 --- /dev/null +++ b/skills/dff_fromage_image_skill/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.9.1 +# ###################### IMMUTABLE SECTION ###################################### +# Do not change anything in this section +WORKDIR /src + +COPY common/dff/requirements.txt . +RUN pip install -r requirements.txt + +# ###################### CUSTOM SECTION ###################################### +# Here you can make changes + +ARG SERVICE_NAME +ENV SERVICE_NAME ${SERVICE_NAME} + +COPY skills/${SERVICE_NAME}/requirements.txt . +RUN pip install -r requirements.txt +RUN python -m nltk.downloader wordnet + +COPY skills/${SERVICE_NAME}/ ./ +COPY ./common/ ./common/ + +ARG SERVICE_PORT +ENV SERVICE_PORT ${SERVICE_PORT} + +# wait for a server answer ( INTERVAL + TIMEOUT ) * RETRIES seconds after that change stutus to unhealthy +HEALTHCHECK --interval=5s --timeout=5s --retries=3 CMD curl --fail 127.0.0.1:${SERVICE_PORT}/healthcheck || exit 1 + + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} diff --git a/skills/dff_fromage_image_skill/README.md b/skills/dff_fromage_image_skill/README.md new file mode 100644 index 0000000000..5982184cbe --- /dev/null +++ b/skills/dff_fromage_image_skill/README.md @@ -0,0 +1,31 @@ +# dff-fromage-image-skill + +## Description + +**dff-fromage-image-skill** is a simple service that can discuss images + +## Quickstart from docker + +```bash +# create local.yml +python utils/create_local_yml.py -s dff-fromage-image-skill +# build service +docker-compose -f docker-compose.yml -f local.yml up -d --build dff-fromage-image-skill +# run tests +docker-compose -f docker-compose.yml -f local.yml exec dff-fromage-image-skill bash test.sh +# run a dialog with the agent +docker-compose -f docker-compose.yml -f local.yml exec agent python -m deeppavlov_agent.run +``` + +## Quickstart without docker + +```bash +pip install -r requirements.txt +gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} +``` + +## Resources + +* Execution time: 46 ms +* Starting time: 1.5 sec +* RAM: 1024 MB diff --git a/skills/dff_fromage_image_skill/common/.gitkeep b/skills/dff_fromage_image_skill/common/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skills/dff_fromage_image_skill/requirements.txt b/skills/dff_fromage_image_skill/requirements.txt new file mode 100644 index 0000000000..500562e428 --- /dev/null +++ b/skills/dff_fromage_image_skill/requirements.txt @@ -0,0 +1,6 @@ +click==7.1.2 +nltk==3.5 +requests==2.27.1 +Pillow==9.1.0 +fastapi==0.73.0 +uvicorn==0.17.4 \ No newline at end of file diff --git a/skills/dff_fromage_image_skill/scenario/condition.py b/skills/dff_fromage_image_skill/scenario/condition.py new file mode 100644 index 0000000000..edf81086f9 --- /dev/null +++ b/skills/dff_fromage_image_skill/scenario/condition.py @@ -0,0 +1,14 @@ +import logging + +from df_engine.core import Context, Actor +import common.dff.integration.context as int_ctx + +logger = logging.getLogger(__name__) +logger.setLevel(logging.NOTSET) + + +def caption_condition(ctx: Context, actor: Actor, *args, **kwargs) -> bool: + caption = int_ctx.get_last_human_utterance(ctx, actor).get("annotations", {}).get("fromage", None) + if caption: + return True + return False diff --git a/skills/dff_fromage_image_skill/scenario/main.py b/skills/dff_fromage_image_skill/scenario/main.py new file mode 100644 index 0000000000..371b758243 --- /dev/null +++ b/skills/dff_fromage_image_skill/scenario/main.py @@ -0,0 +1,40 @@ +import logging + +from df_engine.core.keywords import ( + TRANSITIONS, + RESPONSE, +) +from df_engine.core import Actor +from . import condition as loc_cnd +from . import response as loc_rsp + +logger = logging.getLogger(__name__) + +flows = { + "global_flow": { + "start": { + RESPONSE: "", + TRANSITIONS: { + ("fromage_caption_response", "general_node"): loc_cnd.caption_condition, + }, + }, + "fallback": { + RESPONSE: "", + TRANSITIONS: {}, + }, + }, + "fromage_caption_response": { + "general_node": { + RESPONSE: loc_rsp.generic_response, + TRANSITIONS: {}, + }, + }, +} + +actor = Actor( + flows, + start_label=("global_flow", "start"), + fallback_label=("global_flow", "fallback"), +) + +logger.info("Actor created successfully") diff --git a/skills/dff_fromage_image_skill/scenario/response.py b/skills/dff_fromage_image_skill/scenario/response.py new file mode 100644 index 0000000000..14bb7b16f7 --- /dev/null +++ b/skills/dff_fromage_image_skill/scenario/response.py @@ -0,0 +1,14 @@ +import logging +from df_engine.core import Context, Actor +import common.dff.integration.context as int_ctx + +logger = logging.getLogger(__name__) + +SUPER_CONFIDENCE = 1.0 + + +def generic_response(ctx: Context, actor: Actor, excluded_skills=None, *args, **kwargs) -> str: + caption = int_ctx.get_last_human_utterance(ctx, actor).get("annotations", {}).get("fromage", None) + if caption: + int_ctx.set_confidence(ctx, actor, SUPER_CONFIDENCE) + return caption diff --git a/skills/dff_fromage_image_skill/server.py b/skills/dff_fromage_image_skill/server.py new file mode 100644 index 0000000000..a8eeed22d7 --- /dev/null +++ b/skills/dff_fromage_image_skill/server.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +import logging +import time +import os +import random + +from flask import Flask, request, jsonify +from healthcheck import HealthCheck +import sentry_sdk +from sentry_sdk.integrations.logging import ignore_logger + + +from common.dff.integration.actor import load_ctxs, get_response + +from scenario.main import actor + +import test_server + + +ignore_logger("root") + +sentry_sdk.init(os.getenv("SENTRY_DSN")) +SERVICE_NAME = os.getenv("SERVICE_NAME") +SERVICE_PORT = int(os.getenv("SERVICE_PORT")) +RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) + +logging.basicConfig(format="%(asctime)s - %(pathname)s - %(lineno)d - %(levelname)s - %(message)s", level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +app = Flask(__name__) +health = HealthCheck(app, "/healthcheck") +logging.getLogger("werkzeug").setLevel("WARNING") + + +def handler(requested_data, random_seed=None): + st_time = time.time() + ctxs = load_ctxs(requested_data) + random_seed = requested_data.get("random_seed", random_seed) # for tests + + responses = [] + for ctx in ctxs: + try: + # for tests + if random_seed: + random.seed(int(random_seed)) + ctx = actor(ctx) + responses.append(get_response(ctx, actor)) + except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + responses.append(("", 1.0, {}, {}, {})) + + total_time = time.time() - st_time + logger.info(f"{SERVICE_NAME} exec time = {total_time:.3f}s") + return responses + + +try: + test_server.run_test(handler) + logger.info("test query processed") +except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + raise exc + +logger.info(f"{SERVICE_NAME} is loaded and ready") + +# import pathlib +# import json + +# for in_file in pathlib.Path("tests").glob("./*_in.json"): +# logger.error(in_file) +# test_in = json.load(in_file.open()) +# responses = handler(test_in, RANDOM_SEED) +# out_file = str(in_file).replace("in.json", "out.json") +# import common.test_utils as t_utils + +# t_utils.save_to_test(responses, out_file, indent=4) # TEST + + +@app.route("/respond", methods=["POST"]) +def respond(): + # import common.test_utils as t_utils; t_utils.save_to_test(request.json,"tests/lets_talk_in.json",indent=4) # TEST + # responses = handler(request.json, RANDOM_SEED) # TEST + # import common.test_utils as t_utils; t_utils.save_to_test(responses,"tests/lets_talk_out.json",indent=4) # TEST + responses = handler(request.json) + return jsonify(responses) + + +if __name__ == "__main__": + app.run(debug=False, host="0.0.0.0", port=SERVICE_PORT) diff --git a/skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill/environment.yml b/skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill/environment.yml new file mode 100644 index 0000000000..9c14320667 --- /dev/null +++ b/skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill/environment.yml @@ -0,0 +1,2 @@ +SERVICE_PORT: 8070 +SERVICE_NAME: dff_fromage_image_skill \ No newline at end of file diff --git a/skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill/service.yml b/skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill/service.yml new file mode 100644 index 0000000000..4060e3d001 --- /dev/null +++ b/skills/dff_fromage_image_skill/service_configs/dff-fromage-image-skill/service.yml @@ -0,0 +1,24 @@ +name: dff-fromage-image-skill +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8070 + SERVICE_NAME: dff_fromage_image_skill + context: . + dockerfile: ./skills/dff_fromage_image_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8070 + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + volumes: + - ./skills/dff_fromage_image_skill:/src + - ./common:/src/common + ports: + - 8070:8070 diff --git a/skills/dff_fromage_image_skill/test.sh b/skills/dff_fromage_image_skill/test.sh new file mode 100644 index 0000000000..f85ff6a382 --- /dev/null +++ b/skills/dff_fromage_image_skill/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python test_server.py diff --git a/skills/dff_fromage_image_skill/test_server.py b/skills/dff_fromage_image_skill/test_server.py new file mode 100644 index 0000000000..5ceb78f9ef --- /dev/null +++ b/skills/dff_fromage_image_skill/test_server.py @@ -0,0 +1,33 @@ +import requests +import os + +import common.test_utils as test_utils + + +SERVICE_PORT = int(os.getenv("SERVICE_PORT")) +RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) +URL = f"http://0.0.0.0:{SERVICE_PORT}/respond" + + +def handler(requested_data, random_seed): + hypothesis = requests.post(URL, json={**requested_data, "random_seed": random_seed}).json() + return hypothesis + + +def run_test(handler): + in_data, out_data = test_utils.get_dataset() + for test_name in in_data: + hypothesis = handler(in_data[test_name], RANDOM_SEED) + print(f"test name: {test_name}") + is_equal_flag, msg = test_utils.compare_structs(out_data[test_name], hypothesis, ignored_keys=["id"]) + if msg and len(msg.split("`")) == 5: + _, ground_truth_text, _, hypothesis_text, _ = msg.split("`") + is_equal_flag, ratio = test_utils.compare_text(ground_truth_text, hypothesis_text, 0.0) + if not is_equal_flag: + msg = f"{msg} ratio = {ratio}" + assert is_equal_flag, msg + print("Success") + + +if __name__ == "__main__": + run_test(handler) diff --git a/skills/dff_fromage_image_skill/tests/.gitkeep b/skills/dff_fromage_image_skill/tests/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skills/dff_intent_responder_skill/scenario/condition.py b/skills/dff_intent_responder_skill/scenario/condition.py index 35f2145863..bae9a0b78a 100644 --- a/skills/dff_intent_responder_skill/scenario/condition.py +++ b/skills/dff_intent_responder_skill/scenario/condition.py @@ -18,5 +18,7 @@ def intent_catcher_exists_condition(ctx: Context, actor: Actor, *args, **kwargs) which="intent_catcher", ) + logger.info(f"INTENTS LIST: {intents_by_catcher}") + response_funcs = get_respond_funcs() return bool(any([intent in response_funcs for intent in intents_by_catcher])) diff --git a/skills/dff_reasoning_skill/scenario/api_responses/generative_lm.py b/skills/dff_reasoning_skill/scenario/api_responses/generative_lm.py index c96f067eff..0786e8ffd3 100644 --- a/skills/dff_reasoning_skill/scenario/api_responses/generative_lm.py +++ b/skills/dff_reasoning_skill/scenario/api_responses/generative_lm.py @@ -17,7 +17,7 @@ if GENERATIVE_SERVICE_CONFIG: with open(f"common/generative_configs/{GENERATIVE_SERVICE_CONFIG}", "r") as f: GENERATIVE_SERVICE_CONFIG = json.load(f) -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT", 30)) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT", 30)) N_UTTERANCES_CONTEXT = int(getenv("N_UTTERANCES_CONTEXT", 1)) ENVVARS_TO_SEND = getenv("ENVVARS_TO_SEND", None) ENVVARS_TO_SEND = [] if ENVVARS_TO_SEND is None else ENVVARS_TO_SEND.split(",") diff --git a/skills/dff_reasoning_skill/scenario/response.py b/skills/dff_reasoning_skill/scenario/response.py index c387f1482d..14cb198272 100644 --- a/skills/dff_reasoning_skill/scenario/response.py +++ b/skills/dff_reasoning_skill/scenario/response.py @@ -35,7 +35,7 @@ if GENERATIVE_SERVICE_CONFIG: with open(f"common/generative_configs/{GENERATIVE_SERVICE_CONFIG}", "r") as f: GENERATIVE_SERVICE_CONFIG = json.load(f) -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT", 30)) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT", 30)) N_UTTERANCES_CONTEXT = int(getenv("N_UTTERANCES_CONTEXT", 1)) TIME_SLEEP = float(getenv("TIME_SLEEP", 0)) diff --git a/skills/dff_reasoning_skill/scenario/utils.py b/skills/dff_reasoning_skill/scenario/utils.py index 820d160cbc..313857bf4b 100644 --- a/skills/dff_reasoning_skill/scenario/utils.py +++ b/skills/dff_reasoning_skill/scenario/utils.py @@ -26,7 +26,7 @@ if GENERATIVE_SERVICE_CONFIG: with open(f"common/generative_configs/{GENERATIVE_SERVICE_CONFIG}", "r") as f: GENERATIVE_SERVICE_CONFIG = json.load(f) -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT", 30)) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT", 30)) sentry_sdk.init(getenv("SENTRY_DSN")) diff --git a/skills/dff_template_prompted_skill/scenario/response.py b/skills/dff_template_prompted_skill/scenario/response.py index 257377b1da..32b8b73a2b 100644 --- a/skills/dff_template_prompted_skill/scenario/response.py +++ b/skills/dff_template_prompted_skill/scenario/response.py @@ -16,7 +16,7 @@ sentry_sdk.init(getenv("SENTRY_DSN")) logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) logger = logging.getLogger(__name__) -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT", 5)) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT", 5)) GENERATIVE_SERVICE_URL = getenv("GENERATIVE_SERVICE_URL") GENERATIVE_SERVICE_CONFIG = getenv("GENERATIVE_SERVICE_CONFIG") if GENERATIVE_SERVICE_CONFIG: diff --git a/skills/dff_template_prompted_skill/service_configs/dff-dream-faq-prompted-skill/environment.yml b/skills/dff_template_prompted_skill/service_configs/dff-dream-faq-prompted-skill/environment.yml index 6484ff1f8e..7d2c09aba4 100644 --- a/skills/dff_template_prompted_skill/service_configs/dff-dream-faq-prompted-skill/environment.yml +++ b/skills/dff_template_prompted_skill/service_configs/dff-dream-faq-prompted-skill/environment.yml @@ -2,6 +2,6 @@ SERVICE_PORT: 8170 SERVICE_NAME: dff_dream_faq_prompted_skill PROMPT_FILE: common/prompts/dream_faq.json GENERATIVE_SERVICE_URL: http://openai-api-chatgpt-16k:8167/respond -GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json.json +GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json GENERATIVE_TIMEOUT: 120 N_UTTERANCES_CONTEXT: 7 diff --git a/skills/dff_travel_italy_skill/Dockerfile b/skills/dff_travel_italy_skill/Dockerfile new file mode 100644 index 0000000000..8936770949 --- /dev/null +++ b/skills/dff_travel_italy_skill/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.1 +# ###################### IMMUTABLE SECTION ###################################### +# Do not change anything in this section +WORKDIR /src + +COPY common/dff/requirements.txt . +RUN pip install -r requirements.txt + +# ###################### CUSTOM SECTION ###################################### +# Here you can make changes + +ARG SERVICE_NAME +ARG TERMINUSDB_SERVER_PASSWORD +ARG TERMINUSDB_SERVER_URL +ARG TERMINUSDB_SERVER_TEAM +ARG TERMINUSDB_SERVER_DB + +ENV SERVICE_NAME ${SERVICE_NAME} +ENV TERMINUSDB_SERVER_PASSWORD=$TERMINUSDB_SERVER_PASSWORD +ENV TERMINUSDB_SERVER_URL=$TERMINUSDB_SERVER_URL +ENV TERMINUSDB_SERVER_TEAM=$TERMINUSDB_SERVER_TEAM +ENV TERMINUSDB_SERVER_DB=$TERMINUSDB_SERVER_DB + +COPY skills/${SERVICE_NAME}/requirements.txt . +RUN pip install -r requirements.txt && \ + python -m nltk.downloader wordnet + +COPY skills/${SERVICE_NAME}/ ./ +COPY ./common/ ./common/ + +ARG SERVICE_PORT +ENV SERVICE_PORT ${SERVICE_PORT} + +# wait for a server answer ( INTERVAL + TIMEOUT ) * RETRIES seconds after that change stutus to unhealthy +HEALTHCHECK --interval=5s --timeout=5s --retries=3 CMD curl --fail 127.0.0.1:${SERVICE_PORT}/healthcheck || exit 1 + + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} diff --git a/skills/dff_travel_italy_skill/README.md b/skills/dff_travel_italy_skill/README.md new file mode 100644 index 0000000000..6543ed4ca5 --- /dev/null +++ b/skills/dff_travel_italy_skill/README.md @@ -0,0 +1,302 @@ +# DialogFlow Framework Template +Changes can only be made in the `dialogflows` directory. + +Template has dialog flows based on programy (`repeating`) and based on valila python (`greeting`). + +```bash +python utils/create_local_yml.py -s dff-template-skill -s convers-evaluation-selector + +docker-compose -f docker-compose.yml -f local.yml up -d --build + +docker-compose -f docker-compose.yml -f local.yml exec agent python -m deeppavlov_agent.run +docker-compose -f docker-compose.yml -f local.yml logs -f dff-template-skill +docker-compose -f docker-compose.yml -f local.yml exec dff-template-skill bash test.sh +``` + + +# Importan changes in files of the agent +docker-compose.yml +```yml + dff-template-skill: + build: + args: + SERVICE_PORT: 8095 + SERVICE_NAME: dff_template_skill # has to be the same with skill dir name + context: . + dockerfile: ./skills/dff_template_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8095 --reload + deploy: + mode: replicated + replicas: 4 + resources: + limits: + memory: 768M + reservations: + memory: 768M +``` + + +dev.yml +```yml + dff-template-skill: + env_file: [.env.dev] + volumes: + - "./skills/dff_template:/src" + - "./common:/src/common" + ports: + - 8095:8095 +``` + +pipeline.json +```json + "dff_template": { + "connector": { + "protocol": "http", + "url": "http://dff-template:8095/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_template_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": ["skill_selectors"], + "state_manager_method": "add_hypothesis" + }, +``` + +state_formatters/formatter.py +```python +def DFF_TEMPLATE_formatter(dialog: Dict) -> List[Dict]: + service_name = f"DFF_TEMPLATE" + return utils.dff_formatter(dialog, service_name) +``` +[skill_selectors/rule_based_selector/connector.py](https://github.com/dilyararimovna/dp-dream-alexa/blob/a4fdea01a1f16c2a877f9d9447350463adc96a2f/skill_selectors/rule_based_selector/connector.py#L381) + +```python + response=["dff_template"], +``` + + +# Tests +## Test creating + +The file `server.py` contains this code + +```python +@app.route("/respond", methods=["POST"]) +def respond(): + # next commented line for test creating + # import common.test_utils as t_utils; t_utils.save_to_test(request.json,"tests/TEST_NAME_in.json",indent=4) + responses = handler(request.json) + # next commented line for test creating + # import common.test_utils as t_utils; t_utils.save_to_test(responses,"tests/TEST_NAME_out.json",indent=4) + return jsonify(responses) + +``` +Steps: +1. Uncomment lines with json dump +1. Name your test by replacing `YOUR_TEST_NAME` in both line. They have to be same. +1. Start a test dialog with agent.Every turn will be written in `tests/TEST_NAME*`. `*_in.json` - for input data, `*_in.json` - for response data. + +If your want to write down all turns of test dialog you can use this code + +```python +index = 0 +@app.route("/respond", methods=["POST"]) +def respond(): + # next commented line for test creating + import common.test_utils as t_utils;t_utils.save_to_test(responses,f"tests/TEST_NAME_{index}_in.json",indent=4) + responses = handler(request.json) + # next commented line for test creating + import common.test_utils as t_utils;t_utils.save_to_test(responses,f"tests/TEST_NAME_{index}_out.json",indent=4) + index +=1 + return jsonify(responses) + +``` +## Test using +Tests are used for two way: + +- service initialization in `server.py` + +```python +try: + test_server.run_test(handler) + logger.info("test query processed") +except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + raise exc +``` + +- service testing by `test.sh` execution + + +## Test extending +If you use service based on random behavior you can send `random_seed` in your service. You can find corespond lines in `server.py` +```python + ... # some code + rand_seed = requested_data.get("rand_seed") # for tests + ... # some code + if rand_seed: + random.seed(int(rand_seed) + ... # some code +``` + +For answer comparison we use `common.test_utils`: +- `compare_structs` - for json structure comparison +- `compare_text` - for text comparison + +You can use them for you custom comparison. + + +## Links between dff skills +1. Making a link (example of link from dff\_animals\_skill to dff\_wiki_skill) +```python + import common.dialogflow_framework.utils.state as state_utils + ... # some code + def why_do_you_like_response(vars): + ... # some code + if found_animal: + response = f"Cool! Why do you like {found_animal}?" + else: + response = f"Cool! Why do you like them?" + + if found_entity_id: + # making cross link + state_utils.set_cross_link(vars, to_service_name="dff_wiki_skill", from_service_name="dff_animals_skill") + add_info = {"entity_id": found_entity_id, "entity_substr": found_animal, "entity_types": found_types, + "entity_page": found_entity_page} # if we want to pass some info between skills + # save info in cross state + state_utils.save_cross_state(vars, service_name="dff_wiki_skill", new_state=add_info) + state_utils.set_dff_suspension(vars) # stop current dff skill so that after the next dff skill will finish + # its scenario, the current scenario was resumed from this state + + return response +``` + +2. Using the link in the destination skill (dff\_wiki_skill in our example) +```python + import common.dialogflow_framework.utils.state as state_utils + ... # some code + def tell_fact_request(ngrams, vars): + cross_link = state_utils.get_cross_link(vars, service_name="dff_wiki_skill") + # cross link is a dict {"from_service": "dff_animals_skill"} + cross_state = state_utils.get_cross_state(vars, service_name="dff_wiki_skill") + # cross_state is a dict add_info which was saved in why_do_you_like_response using save_cross_state function + from_skill = cross_link.get("from_service", "") + if from_skill == "dff_animals_skill": + flag = True + +``` + +3. To switch the destination skill if the link was made, we can add a function in common folder + (in our example in common/wiki_skill.py) +```python + def find_wiki_cross_links(dialog): + flag = False + human_attributes = dialog.get("human", {}).get("attributes", {}) + dff_shared_state = human_attributes.get("dff_shared_state", {"cross_states": {}, "cross_links": {}}) + cross_links = dff_shared_state["cross_links"].get("dff_wiki_skill", {}) + if cross_links: + flag = True + return flag +``` +Then in skill\_selectors/rule\_based_selector/connector.py: +```python + from common.wiki_skill import find_wiki_cross_links + ... # some code + if find_wiki_cross_links(dialog): + skills_for_uttr.append("dff_wiki_skill") +``` + +4. Reverse transition (from dff\_wiki\_skill to dff\_animals_skill in our example) is made the way. + +## Insert scenario parser to a dff skill + +```python + ... # some imports + import json + from common.insert_scenario import start_or_continue_scenario, smalltalk_response, start_or_continue_facts, \ + facts_response # imports for scenario insertion + + # place your config in the directory skills/your_dff_skill_name/{inserted_scenario_config_name}.json + # and load config + with open(inserted_scenario_config_name, 'r') as fl: + topic_config = json.load(fl) + + class State(Enum): + USR_START = auto() + # + ... # States of your skill + + # States for scenario insertion + SYS_INSERT_SMALLTALK = auto() + USR_INSERT_SMALLTALK = auto() + # + SYS_INSERT_FACT = auto() + USR_INSERT_FACT = auto() + + ... # Some other states of your skill + + # Two request and two response functions for scenario insertion + + def insert_scenario_smalltalk_request(ngrams, vars): + flag = start_or_continue_scenario(vars, topic_config) + logger.info(f"special_topic_request={flag}") + return flag + + + def insert_scenario_smalltalk_response(vars): + response = smalltalk_response(vars, topic_config) + return response + + + def insert_scenario_facts_request(ngrams, vars): + flag = start_or_continue_facts(vars, topic_config) + logger.info(f"special_topic_facts_request={flag}") + return flag + + + def insert_scenario_facts_response(vars): + response = facts_response(vars, topic_config) + return response + + simplified_dialog_flow = dialogflow_extension.DFEasyFilling(State.USR_START) + + ... # Your state transitions + + # State transitions for scenario insertion + + simplified_dialog_flow.add_user_serial_transitions( + State.SOME_STATE, + { + ... # transitions to other states + State.SYS_INSERT_SMALLTALK: insert_scenario_smalltalk_request, + }, + ) + + simplified_dialog_flow.add_user_serial_transitions( + State.USR_INSERT_SMALLTALK, + { + State.SYS_INSERT_FACT: insert_scenario_facts_request, + State.SYS_INSERT_SMALLTALK: insert_scenario_smalltalk_request, + State.SOME_OTHER_YOUR_STATE: some_other_state_request, + }, + ) + + simplified_dialog_flow.add_user_serial_transitions( + State.USR_INSERT_FACT, + { + State.SYS_INSERT_SMALLTALK: insert_scenario_smalltalk_request, + State.SYS_INSERT_FACT: insert_scenario_facts_request, + State.SOME_OTHER_YOUR_STATE: some_other_state_request, + }, + ) + + simplified_dialog_flow.add_system_transition(State.SYS_INSERT_SMALLTALK, State.USR_INSERT_SMALLTALK, + insert_scenario_smalltalk_response, ) + simplified_dialog_flow.add_system_transition(State.SYS_INSERT_FACT, State.USR_INSERT_FACT, + insert_scenario_facts_response, ) + + simplified_dialog_flow.set_error_successor(State.SYS_INSERT_SMALLTALK, State.SYS_ERR) + simplified_dialog_flow.set_error_successor(State.USR_INSERT_SMALLTALK, State.SYS_ERR) + simplified_dialog_flow.set_error_successor(State.SYS_INSERT_FACT, State.SYS_ERR) + simplified_dialog_flow.set_error_successor(State.USR_INSERT_FACT, State.SYS_ERR) +``` \ No newline at end of file diff --git a/skills/dff_travel_italy_skill/kg_test.py b/skills/dff_travel_italy_skill/kg_test.py new file mode 100644 index 0000000000..0334402d80 --- /dev/null +++ b/skills/dff_travel_italy_skill/kg_test.py @@ -0,0 +1,9 @@ +from deeppavlov_kg import TerminusdbKnowledgeGraph +from scenario.config import KG_DB_NAME, KG_TEAM_NAME, KG_PASSWORD, KG_SERVER + + +terminus_kg = TerminusdbKnowledgeGraph(team=KG_TEAM_NAME, db_name=KG_DB_NAME, server=KG_SERVER, password=KG_PASSWORD) + +# terminus_kg.drop_database() + +print(terminus_kg.get_all_entities()) diff --git a/skills/dff_travel_italy_skill/requirements.txt b/skills/dff_travel_italy_skill/requirements.txt new file mode 100644 index 0000000000..3174fbdcc6 --- /dev/null +++ b/skills/dff_travel_italy_skill/requirements.txt @@ -0,0 +1,3 @@ +git+https://github.com/deeppavlov/custom_kg_svc.git@724513b4 +click==8.1.6 +nltk==3.5 diff --git a/skills/dff_travel_italy_skill/scenario/condition.py b/skills/dff_travel_italy_skill/scenario/condition.py new file mode 100644 index 0000000000..82a4a1c98b --- /dev/null +++ b/skills/dff_travel_italy_skill/scenario/condition.py @@ -0,0 +1,162 @@ +import logging +import re +from typing import Callable +from os import getenv +import time +import sentry_sdk + + +from deeppavlov_kg import TerminusdbKnowledgeGraph +from scenario.config import ( + TERMINUSDB_SERVER_URL, + TERMINUSDB_SERVER_PASSWORD, + TERMINUSDB_SERVER_DB, + TERMINUSDB_SERVER_TEAM, +) + + +from df_engine.core import Context, Actor +import df_engine.conditions as cnd + +import common.dff.integration.condition as int_cnd +import common.dff.integration.context as int_ctx + +from common.travel_italy import ITALY_PATTERN, italy_travel_skill_was_proposed +from common.food import FOOD_WORDS, FAVORITE_FOOD_WORDS + +from common.universal_templates import if_chat_about_particular_topic +from common.utils import ( + get_intents, + get_sentiment, +) # present in integration + + +sentry_sdk.init(getenv("SENTRY_DSN")) +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + + +USE_CACHE = True + +# .... + +SIDE_INTENTS = { + "exit", + "don't understand", + "what_can_you_do", + "what_is_your_job", + "what_is_your_name", + "what_time", + "where_are_you_from", + "who_made_you", +} + +while True: + try: + graph = TerminusdbKnowledgeGraph( + db_name=TERMINUSDB_SERVER_DB, + team=TERMINUSDB_SERVER_TEAM, + server=TERMINUSDB_SERVER_URL, + password=TERMINUSDB_SERVER_PASSWORD, + ) + logger.info(f"TERMINUSDB_SERVER_URL: {TERMINUSDB_SERVER_URL} is ready") + break + except Exception as exc: + logger.error(exc) + time.sleep(5) + continue + + +def check_flag(prop: str) -> Callable: + def check_flag_handler(ctx: Context, actor: Actor) -> bool: + return ctx.misc.get("flags", {}).get(prop, False) + + return check_flag_handler + + +def start_condition(ctx: Context, actor: Actor) -> bool: + # with open("new.json", "w") as ctx_file: # to get contents of ctx.misc["agent"] + # json.dump(ctx.misc["agent"], ctx_file, indent=2) + + return if_chat_about_particular_topic( + int_ctx.get_last_human_utterance(ctx, actor), + int_ctx.get_last_bot_utterance(ctx, actor), + compiled_pattern=ITALY_PATTERN, + ) + + +def is_side_or_stop(ctx: Context, actor: Actor) -> bool: + """ + Check for side intents (including exit) + """ + intents = set(get_intents(int_ctx.get_last_human_utterance(ctx, actor), which="intent_catcher", probs=False)) + side_intent_present = len(intents.intersection(SIDE_INTENTS)) > 0 + logger.debug("Side intent detected, exiting") + return side_intent_present + + +def is_proposed_skill(ctx: Context, actor: Actor) -> bool: + return italy_travel_skill_was_proposed(int_ctx.get_last_bot_utterance(ctx, actor)) + + +def travel_italy_skill_switch(ctx: Context, actor: Actor) -> bool: + user_uttr = int_ctx.get_last_human_utterance(ctx, actor) + + return re.findall(ITALY_PATTERN, user_uttr["text"]) + + +def sentiment_detected(name: str = "positive", threshold: float = 0.6) -> Callable: + def sentiment_detected_handler(ctx: Context, actor: Actor) -> bool: + if ctx.validation: + return False + sentiment_probs = get_sentiment(int_ctx.get_last_human_utterance(ctx, actor), probs=True) + return sentiment_probs.get(name, 0) >= threshold + + return sentiment_detected_handler + + +exit_skill = cnd.any( + [ + is_side_or_stop, + # int_cnd.is_switch_topic, + # is_switch_topic, + cnd.all([is_proposed_skill, int_cnd.is_no_vars]), + ] +) + +asked_about_italian_cuisine = cnd.regexp(re.compile(FOOD_WORDS, re.IGNORECASE)) + +uttr_about_favorite_food = cnd.regexp(re.compile(FAVORITE_FOOD_WORDS, re.IGNORECASE)) + + +def example_lets_talk_about(): + def example_lets_talk_about_handler(ctx: Context, actor: Actor, *args, **kwargs) -> str: + return int_cnd.is_lets_chat_about_topic_human_initiative(ctx, actor) + + return example_lets_talk_about_handler + + +def get_current_user_id(ctx: Context, actor: Actor) -> bool: + if "agent" in ctx.misc: + user_id = ctx.misc["agent"]["dialog"]["human_utterances"][-1]["user"]["id"] + + return user_id + + return None + + +def has_entity_in_graph(property): + def has_entity_in_graph_handler(ctx: Context, actor: Actor) -> Context: + user_id = get_current_user_id(ctx, actor) + if user_id: + current_user_id = "User/" + user_id + logger.info(f"current user id -- {current_user_id}") + user_existing_properties = graph.get_properties_of_entity(entity_id=current_user_id) + logger.info(f"user_existing_properties -- {user_existing_properties}") + logger.info(f"property to search for -- {property}") + if property in user_existing_properties: + return True + + return False + + return has_entity_in_graph_handler diff --git a/skills/dff_travel_italy_skill/scenario/config.py b/skills/dff_travel_italy_skill/scenario/config.py new file mode 100644 index 0000000000..8c29866270 --- /dev/null +++ b/skills/dff_travel_italy_skill/scenario/config.py @@ -0,0 +1,9 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +TERMINUSDB_SERVER_DB = os.getenv("TERMINUSDB_SERVER_DB") +TERMINUSDB_SERVER_TEAM = os.getenv("TERMINUSDB_SERVER_TEAM") +TERMINUSDB_SERVER_PASSWORD = os.getenv("TERMINUSDB_SERVER_PASSWORD") +TERMINUSDB_SERVER_URL = os.getenv("TERMINUSDB_SERVER_URL") diff --git a/skills/dff_travel_italy_skill/scenario/main.py b/skills/dff_travel_italy_skill/scenario/main.py new file mode 100644 index 0000000000..dd43941220 --- /dev/null +++ b/skills/dff_travel_italy_skill/scenario/main.py @@ -0,0 +1,312 @@ +import logging +import sentry_sdk +from os import getenv + +import df_engine.conditions as cnd +from df_engine.core.keywords import PROCESSING, TRANSITIONS, GLOBAL, RESPONSE, LOCAL, MISC + +import scenario.sf_conditions as dm_cnd + +from df_engine.core import Actor + +import common.dff.integration.condition as int_cnd +import common.dff.integration.processing as int_prs +import scenario.condition as loc_cnd +import scenario.processing as loc_prs +from . import response as loc_rsp +from common.constants import CAN_CONTINUE_SCENARIO, MUST_CONTINUE + +import common.dff.integration.response as int_rsp + + +sentry_sdk.init(getenv("SENTRY_DSN")) + +logger = logging.getLogger(__name__) + +SUPER_CONFIDENCE = 1.0 +HIGH_CONFIDENCE = 0.98 +DEFAULT_CONFIDENCE = 0.95 +BIT_LOWER_CONFIDENCE = 0.90 +ZERO_CONFIDENCE = 0.0 + +flows = { + GLOBAL: { + TRANSITIONS: { + ("travel_italy_general", "italy_start"): loc_cnd.start_condition, + ("travel_italy_general", "like_italy"): loc_cnd.is_proposed_skill, + ("italian_food_flow_restart", "tell_more"): cnd.all( + [ + loc_cnd.has_entity_in_graph("LIKE FOOD/Food"), + loc_cnd.uttr_about_favorite_food, + ] + ), + ("italian_food_flow", "food_start"): cnd.all( + [ + loc_cnd.asked_about_italian_cuisine, + cnd.neg(loc_cnd.check_flag("food_start_visited")), + ] + ), + }, + }, + "travel_italy_general": { + LOCAL: { + PROCESSING: { + "set_confidence": int_prs.set_confidence(SUPER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + }, + }, + "italy_start": { + RESPONSE: "What's your favourite place in Italy?", + PROCESSING: { + "set_confidence": int_prs.set_confidence(SUPER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + "set_flag": loc_prs.set_flag("italy_travel_skill_active", True), + }, + TRANSITIONS: { + ("concrete_place_flow", "fav_place", 2): cnd.any( + [ + int_cnd.has_entities("wiki:Q747074"), # Q38 - Italy, Q747074 - commune of Italy + int_cnd.has_entities("wiki:Q515"), # Q515 - city + int_cnd.has_entities("wiki:Q1549591"), # Q1549591 - big city + ] + ), + ("travel_italy_general", "like_italy", 1): cnd.true(), + }, + }, + "like_italy": { + RESPONSE: "I like Italy for its nature. What do you like it for?", + PROCESSING: { + "set_confidence": int_prs.set_confidence(BIT_LOWER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(CAN_CONTINUE_SCENARIO), + "set_flag": loc_prs.set_flag("italy_travel_skill_active", True), + }, + TRANSITIONS: { + ("travel_italy_general", "told_why", 2): cnd.any( + [dm_cnd.is_midas("open_question_opinion"), dm_cnd.is_midas("opinion")] + ), + ("travel_italy_general", "neg_to_italy"): int_cnd.is_no_vars, + ("global_flow", "fallback"): cnd.true(), + }, + }, + "told_why": { + RESPONSE: int_rsp.multi_response( + replies=[ + "I think in Italy one can truly relax and taste the life", + "Italy is the place where I want to go back again and again.", + ], + confidences=[1.0, 1.0], + hype_attr=[ + {"can_continue": MUST_CONTINUE}, + {"can_continue": CAN_CONTINUE_SCENARIO}, + ], + ), + PROCESSING: { + "set_confidence": int_prs.set_confidence(BIT_LOWER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(CAN_CONTINUE_SCENARIO), + }, + TRANSITIONS: { + ("concrete_place_flow", "when_visited"): cnd.true(), + }, + }, + "neg_to_italy": { + RESPONSE: "What a pity! This country and its culture are truly inspiring. What about italian cuisine? " + "Do you like it?", + PROCESSING: { + "set_confidence": int_prs.set_confidence(DEFAULT_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + }, + TRANSITIONS: { + ("italian_food_flow", "food_start"): int_cnd.is_yes_vars, + ("global_flow", "fallback"): cnd.true(), + }, + }, + }, + "concrete_place_flow": { + "fav_place": { + RESPONSE: "What are the odds? I also love {users_fav_place}. What impressed you the most there?", + PROCESSING: { + "entity_extraction": int_prs.entities(users_fav_place=["prop:favorite_place", "default:this place"]), + "slot_filling": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(SUPER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + }, + TRANSITIONS: { + ("concrete_place_flow", "when_visited"): int_cnd.is_no_vars, + ("concrete_place_flow", "day_activities"): cnd.true(), + }, + }, + "day_activities": { + RESPONSE: loc_rsp.append_unused( + initial="Oh, I loved that, too! ", + phrases=[loc_rsp.WHAT_DID_DAY], + ), + PROCESSING: { + "set_confidence": int_prs.set_confidence(HIGH_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + }, + TRANSITIONS: { + ("concrete_place_flow", "like_activity", 2): cnd.any( + [int_cnd.has_entities("prop:like_activity"), int_cnd.has_entities("prop:favorite_activity")] + ), + ("concrete_place_flow", "bot_activ_opinion"): int_cnd.is_no_vars, + ("concrete_place_flow", "when_visited"): cnd.true(), + }, + }, + "like_activity": { + RESPONSE: "{user_liked_activity} is one of the things I like to do as well. What about your nights?", + PROCESSING: { + "entity_extraction": int_prs.entities(user_liked_activity=["prop:like_activity", "default:This"]), + "fill_responses_by_slots": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(DEFAULT_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + }, + TRANSITIONS: {("concrete_place_flow", "bot_activ_opinion"): cnd.true()}, + }, + "bot_activ_opinion": { + RESPONSE: int_rsp.multi_response( + replies=[ + "I prefer daytime activities: walking around the city, enjoying sun on a bench in some " + "picturesque place... and sample hundreds of tastes of italian gelato.", + "I find it difficult to enjoy wandering about the city when the weather is bad. " + "If this is the case, I use this time to savour italian specialties in cozy trattorias.", + ], + confidences=[1.0, 1.0], + hype_attr=[ + {"can_continue": MUST_CONTINUE}, + {"can_continue": CAN_CONTINUE_SCENARIO}, + ], + ), + PROCESSING: { + "set_confidence": int_prs.set_confidence(DEFAULT_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(CAN_CONTINUE_SCENARIO), + }, + TRANSITIONS: {("concrete_place_flow", "when_visited"): cnd.true()}, + MISC: {"dialog_act": ["opinion"]}, + }, + "when_visited": { + RESPONSE: loc_rsp.append_unused( + initial="My favourite time to visit Italy is summer. ", + phrases=[loc_rsp.WHEN_TRAVEL], + ), + PROCESSING: { + "set_confidence": int_prs.set_confidence(BIT_LOWER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(CAN_CONTINUE_SCENARIO), + }, + TRANSITIONS: { + ("concrete_place_flow", "told_when", 2): cnd.any( + [int_cnd.has_entities("prop:favorite_season"), int_cnd.has_entities("prop:like_season")] + ), + ("italian_food_flow", "food_start"): cnd.true(), + }, + }, + "told_when": { + RESPONSE: "It's fun at all seasons but especially in {user_fav_season}.", + PROCESSING: { + "entity_extraction": int_prs.entities(user_fav_season=["prop:favorite_season", "default:this time"]), + "fill_responses_by_slots": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(BIT_LOWER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(CAN_CONTINUE_SCENARIO), + }, + TRANSITIONS: {("italian_food_flow", "food_start"): cnd.true()}, + }, + }, + "italian_food_flow": { + "food_start": { + RESPONSE: "In Italy it's always a nosh-up. Is there any italian dish that you never get tired of eating?", + PROCESSING: { + "set_confidence": int_prs.set_confidence(SUPER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + "set_flag": loc_prs.set_flag("food_start_visited", True), + }, + TRANSITIONS: { + ("italian_food_flow", "fav_food"): cnd.any( + [int_cnd.has_entities("prop:like_food"), int_cnd.has_entities("prop:favorite_food")] + ), + ("italy_disappointments", "neg_experience"): cnd.true(), + }, + }, + "fav_food": { + RESPONSE: "Oh, {user_like_food} is to-die-for. What drink does it go best with?", + PROCESSING: { + "entity_extraction": int_prs.entities(user_like_food=["prop:like_food", "default:this dish"]), + "fill_responses_by_slots": int_prs.fill_responses_by_slots(), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + "set_confidence": int_prs.set_confidence(DEFAULT_CONFIDENCE), + }, + TRANSITIONS: { + ("italian_food_flow", "fav_drink"): cnd.any( + [int_cnd.has_entities("prop:favorite_drink"), int_cnd.has_entities("prop:like_drink")] + ), + ("italy_disappointments", "neg_experience"): cnd.true(), + }, + }, + "fav_drink": { + RESPONSE: "It is a useful recommendation. I'll try {user_like_drink} next time. Thank you!", + PROCESSING: { + "entity_extraction": int_prs.entities(user_like_drink=["prop:like_drink", "default:this pairing"]), + "fill_responses_by_slots": int_prs.fill_responses_by_slots(), + "set_can_continue": int_prs.set_can_continue(MUST_CONTINUE), + "set_confidence": int_prs.set_confidence(BIT_LOWER_CONFIDENCE), + }, + TRANSITIONS: { + ("italy_disappointments", "neg_experience"): cnd.true(), + }, + }, + }, + "italian_food_flow_restart": { + "tell_more": { + RESPONSE: "Aha, so was it {LIKE FOOD}? If so, where and how did you first try it?", + PROCESSING: { + "fill_responses_by_slots": loc_prs.fill_responses_by_slots_from_graph(), + "set_confidence": int_prs.set_confidence(HIGH_CONFIDENCE), + }, + TRANSITIONS: { + ("italy_disappointments", "neg_experience"): cnd.true(), + }, + }, + }, + "italy_disappointments": { + "neg_experience": { + RESPONSE: "You know what disappointed me the most in Florence? The parking! " + "I had to leave the car on the outskirts of the city. Was there anything you disliked in Italy?", + PROCESSING: { + "set_confidence": int_prs.set_confidence(BIT_LOWER_CONFIDENCE), + "set_can_continue": int_prs.set_can_continue(CAN_CONTINUE_SCENARIO), + }, + TRANSITIONS: { + ("italy_disappointments", "sympathy"): int_cnd.has_entities("prop:dislike"), + ("global_flow", "fallback"): cnd.true(), + }, + }, + "sympathy": { + RESPONSE: "I had no idea. I would feel the same way about {user_dislike}.", + PROCESSING: { + "entity_extraction": int_prs.entities(user_dislike=["prop:dislike", "default:such situation"]), + "fill_responses_by_slots": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(DEFAULT_CONFIDENCE), + }, + TRANSITIONS: { + ("global_flow", "fallback"): cnd.true(), + }, + }, + }, + "global_flow": { + "start": { + RESPONSE: "", + TRANSITIONS: {}, + }, + "fallback": { + RESPONSE: "Anyway, let's talk about something else!", + TRANSITIONS: {}, + }, + }, +} + + +actor = Actor( + flows, + start_label=("global_flow", "start"), + fallback_label=("global_flow", "fallback"), +) + +logger.info("Actor created successfully") diff --git a/skills/dff_travel_italy_skill/scenario/processing.py b/skills/dff_travel_italy_skill/scenario/processing.py new file mode 100644 index 0000000000..2de8548fa6 --- /dev/null +++ b/skills/dff_travel_italy_skill/scenario/processing.py @@ -0,0 +1,57 @@ +import logging +from typing import Callable + +from scenario.condition import get_current_user_id, graph +from df_engine.core import Context, Actor + +logger = logging.getLogger(__name__) +# .... + + +def execute_response( + ctx: Context, + actor: Actor, +) -> Context: + """Execute the callable response preemptively, + so that slots can be filled""" + processed_node = ctx.a_s.get("processed_node", ctx.a_s["next_node"]) + if callable(processed_node.response): + processed_node.response = processed_node.response(ctx, actor) + ctx.a_s["processed_node"] = processed_node + + return ctx + + +def set_flag(label: str, value: bool = True) -> Callable: + """Sets a flag, modified coronavirus skill""" + + def set_flag_handler(ctx: Context, actor: Actor) -> Context: + ctx.misc["flags"] = ctx.misc.get("flags", {}) + ctx.misc["flags"].update({label: value}) + return ctx + + return set_flag_handler + + +def fill_responses_by_slots_from_graph(): + def fill_responses_by_slots_processing( + ctx: Context, + actor: Actor, + *args, + **kwargs, + ) -> Context: + processed_node = ctx.a_s.get("processed_node", ctx.a_s["next_node"]) + user_id = get_current_user_id(ctx, actor) + current_user_id = "User/" + user_id + user_existing_entities = graph.get_properties_of_entity(entity_id=current_user_id) + entity = "LIKE FOOD" + entity_type = entity + "/Food" + entity_with_id = user_existing_entities[entity_type][-1] + logger.info(f"entity_with_id -- {entity_with_id}") + slot_value = graph.get_properties_of_entity(entity_with_id)["substr"] + logger.info(f"slot_value -- {slot_value}") + processed_node.response = processed_node.response.replace("{" f"{entity}" "}", slot_value) + ctx.a_s["processed_node"] = processed_node + return ctx + + return fill_responses_by_slots_processing diff --git a/skills/dff_travel_italy_skill/scenario/response.py b/skills/dff_travel_italy_skill/scenario/response.py new file mode 100644 index 0000000000..56d1f00a0c --- /dev/null +++ b/skills/dff_travel_italy_skill/scenario/response.py @@ -0,0 +1,87 @@ +import logging +from typing import Callable, List +import datetime + +from df_engine.core import Context, Actor +import df_engine.conditions as cnd + + +from common.travel_italy import QUESTIONS_ABOUT_ITALY + + +logger = logging.getLogger(__name__) +# .... + +START_PHRASE = "Italy is my paradise. Do you love this country?" +UNCERTAINTY = [ + " It's not always easy to tell, of course.", + " It's only my opinion, though.", +] +FAVOURITE_PLACE_PHRASES = [ + "Do you want to know what my favourite place in Italy is?", + "Do you want to know what my other favourite place in Italy is?", + "Do you want to hear about one more place in Italy that impressed me?", +] +OPINION_REQUEST_ON_ITALY_PHRASES = [ + "Did you enjoy going to Italy?", + "Did you find this place interesting?", + "Was the trip to Italy exciting for you?", +] + +VISIT_PLACE_ADVICES = [ + "You should go there one day. You won't regret it!", + "You should put it on your bucket list!", + "I think you would love this place!", +] + +DID_NOT_EXIST = [ + "I didn't exist at that time.", + "I'm a bit too young to remember those times though.", +] + +WHAT_DID_DAY = "What did you do during the day?" + +HAVE_YOU_BEEN_PLACE = "Have you been there?" +ASK_ABOUT_OFFERED_LOC = "It's a real wonder. Have you been there?" +TELL_REQUEST = "May I tell you something about this place?" +TELL_REQUEST2 = "Would you like to hear something else about this place?" +WHAT_PLACE_IMPRESSED_MOST = "What place impressed you the most?" +WHAT_PLACE_LAST_VISITED = "What place in Italy did you last visit?" +OFFER_FACT_ABOUT_PLACE = "Would you like to hear a fact about it?" +ITALY_TRAVEL_SKILL_QUESTIONS = [WHAT_PLACE_LAST_VISITED, WHAT_PLACE_IMPRESSED_MOST] +ALL_QUESTIONS_ABOUT_ITALY = QUESTIONS_ABOUT_ITALY + ITALY_TRAVEL_SKILL_QUESTIONS +WHO_TRAVEL_WITH = "Who did you go there with?" +WHEN_TRAVEL = "When did you go there?" + +CURRENT_YEAR = datetime.datetime.today().year + + +def append_unused(initial: str, phrases: List[str], exit_on_exhaust: bool = False) -> Callable: + """ + Return an unused or a least used response from a list of options + """ + + def unused_handler(ctx: Context, actor: Actor) -> str: + used = ctx.misc.get("used_phrases", []) + confidences = [1] * len(phrases) + + for idx, phrase in enumerate(phrases): + times: int = used.count(id(phrase)) + if times == 0: + used.append(id(phrase)) + ctx.misc["used_phrases"] = used + return initial + phrase + confidences[idx] *= 0.4**times + + if exit_on_exhaust: + label = ctx.last_label + actor.plot[label[0]][label[1]].transitions = {("global_flow", "fallback", 2): cnd.true()} + return initial + + target_idx = confidences.index(max(confidences)) + target_phrase = phrases[target_idx] + used.append(id(target_phrase)) + ctx.misc["used_phrases"] = used + return initial + target_phrase + + return unused_handler diff --git a/skills/dff_travel_italy_skill/scenario/sf_conditions.py b/skills/dff_travel_italy_skill/scenario/sf_conditions.py new file mode 100644 index 0000000000..0f5d8dbe9f --- /dev/null +++ b/skills/dff_travel_italy_skill/scenario/sf_conditions.py @@ -0,0 +1,45 @@ +import logging +import sentry_sdk +from os import getenv + +from df_engine.core import Context, Actor + +sentry_sdk.init(getenv("SENTRY_DSN")) +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + + +def is_sf(sf_name="Open.Give.Opinion"): + def is_sf_handler(ctx: Context, actor: Actor, *args, **kwargs): + try: + last_utterance = ctx.misc.get("agent", {}).get("dialog", {}).get("human_utterances", {})[-1] + utterance_sfcs = last_utterance.get("annotations", {}).get("speech_function_classifier", []) + except KeyError: + utterance_sfcs = [] + + return sf_name in utterance_sfcs + + return is_sf_handler + + +def is_ext_sf(ext_sf_name="React.Respond.Support.Reply.Agree"): + def is_ext_sf_handler(ctx: Context, actor: Actor, *args, **kwargs): + return ext_sf_name in ctx.misc.get("ext_sf", [[]])[-1] + + return is_ext_sf_handler + + +def is_midas(midas_name="pos_answer", threshold=0.5): + def is_midas_handler(ctx: Context, actor: Actor, *args, **kwargs): + try: + last_utterance = ctx.misc.get("agent", {}).get("dialog", {}).get("human_utterances", {})[-1] + midas = last_utterance.get("annotations", {}).get("midas_classification", [{}])[-1] + midas_keys = [key for key, val in midas.items() if val > threshold] + except KeyError: + midas_keys = [] + return midas_name in midas_keys + + return is_midas_handler + + +speech_functions = is_sf diff --git a/skills/dff_travel_italy_skill/server.py b/skills/dff_travel_italy_skill/server.py new file mode 100644 index 0000000000..a8eeed22d7 --- /dev/null +++ b/skills/dff_travel_italy_skill/server.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +import logging +import time +import os +import random + +from flask import Flask, request, jsonify +from healthcheck import HealthCheck +import sentry_sdk +from sentry_sdk.integrations.logging import ignore_logger + + +from common.dff.integration.actor import load_ctxs, get_response + +from scenario.main import actor + +import test_server + + +ignore_logger("root") + +sentry_sdk.init(os.getenv("SENTRY_DSN")) +SERVICE_NAME = os.getenv("SERVICE_NAME") +SERVICE_PORT = int(os.getenv("SERVICE_PORT")) +RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) + +logging.basicConfig(format="%(asctime)s - %(pathname)s - %(lineno)d - %(levelname)s - %(message)s", level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +app = Flask(__name__) +health = HealthCheck(app, "/healthcheck") +logging.getLogger("werkzeug").setLevel("WARNING") + + +def handler(requested_data, random_seed=None): + st_time = time.time() + ctxs = load_ctxs(requested_data) + random_seed = requested_data.get("random_seed", random_seed) # for tests + + responses = [] + for ctx in ctxs: + try: + # for tests + if random_seed: + random.seed(int(random_seed)) + ctx = actor(ctx) + responses.append(get_response(ctx, actor)) + except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + responses.append(("", 1.0, {}, {}, {})) + + total_time = time.time() - st_time + logger.info(f"{SERVICE_NAME} exec time = {total_time:.3f}s") + return responses + + +try: + test_server.run_test(handler) + logger.info("test query processed") +except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + raise exc + +logger.info(f"{SERVICE_NAME} is loaded and ready") + +# import pathlib +# import json + +# for in_file in pathlib.Path("tests").glob("./*_in.json"): +# logger.error(in_file) +# test_in = json.load(in_file.open()) +# responses = handler(test_in, RANDOM_SEED) +# out_file = str(in_file).replace("in.json", "out.json") +# import common.test_utils as t_utils + +# t_utils.save_to_test(responses, out_file, indent=4) # TEST + + +@app.route("/respond", methods=["POST"]) +def respond(): + # import common.test_utils as t_utils; t_utils.save_to_test(request.json,"tests/lets_talk_in.json",indent=4) # TEST + # responses = handler(request.json, RANDOM_SEED) # TEST + # import common.test_utils as t_utils; t_utils.save_to_test(responses,"tests/lets_talk_out.json",indent=4) # TEST + responses = handler(request.json) + return jsonify(responses) + + +if __name__ == "__main__": + app.run(debug=False, host="0.0.0.0", port=SERVICE_PORT) diff --git a/skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill/environment.yml b/skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill/environment.yml new file mode 100644 index 0000000000..54833ca33d --- /dev/null +++ b/skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill/environment.yml @@ -0,0 +1,6 @@ +SERVICE_PORT: 8025 +SERVICE_NAME: dff_travel_italy_skill +TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 +TERMINUSDB_SERVER_PASSWORD: root +TERMINUSDB_SERVER_TEAM: admin +TERMINUSDB_SERVER_DB: user_knowledge_db diff --git a/skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill/service.yml b/skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill/service.yml new file mode 100644 index 0000000000..1c5b9a4299 --- /dev/null +++ b/skills/dff_travel_italy_skill/service_configs/dff-travel-italy-skill/service.yml @@ -0,0 +1,30 @@ +name: dff-travel-italy-skill +endpoints: +- respond +compose: + env_file: + - .env + - .env_secret + build: + args: + SERVICE_PORT: 8025 + SERVICE_NAME: dff_travel_italy_skill + TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 + TERMINUSDB_SERVER_PASSWORD: root + TERMINUSDB_SERVER_TEAM: admin + TERMINUSDB_SERVER_DB: user_knowledge_db + context: . + dockerfile: ./skills/dff_travel_italy_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8025 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + volumes: + - ./skills/dff_travel_italy_skill:/src + - ./common:/src/common + ports: + - 8025:8025 +proxy: null diff --git a/skills/dff_travel_italy_skill/test.sh b/skills/dff_travel_italy_skill/test.sh new file mode 100755 index 0000000000..f85ff6a382 --- /dev/null +++ b/skills/dff_travel_italy_skill/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python test_server.py diff --git a/skills/dff_travel_italy_skill/test_server.py b/skills/dff_travel_italy_skill/test_server.py new file mode 100644 index 0000000000..a894b21b1e --- /dev/null +++ b/skills/dff_travel_italy_skill/test_server.py @@ -0,0 +1,33 @@ +import requests +import os + +import common.test_utils as test_utils + + +SERVICE_PORT = int(os.getenv("SERVICE_PORT")) +RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) +URL = f"http://0.0.0.0:{SERVICE_PORT}/respond" + + +def handler(requested_data, random_seed): + hypothesis = requests.post(URL, json={**requested_data, "random_seed": random_seed}).json() + return hypothesis + + +def run_test(handler): + in_data, out_data = test_utils.get_dataset() + for test_name in in_data: + hypothesis = handler(in_data[test_name], RANDOM_SEED) + print(f"test name: {test_name}") + is_equal_flag, msg = test_utils.compare_structs(out_data[test_name], hypothesis, ignored_keys=["id"]) + if msg and len(msg.split("`")) == 5: + _, ground_truth_text, _, hypothesis_text, _ = msg.split("`") + is_equal_flag, ratio = test_utils.compare_text(ground_truth_text, hypothesis_text, 0.80) + if not is_equal_flag: + msg = f"{msg} ratio = {ratio}" + assert is_equal_flag, msg + print("Success") + + +if __name__ == "__main__": + run_test(handler) diff --git a/skills/dff_universal_prompted_skill/scenario/response.py b/skills/dff_universal_prompted_skill/scenario/response.py index 4767badadd..0804332012 100644 --- a/skills/dff_universal_prompted_skill/scenario/response.py +++ b/skills/dff_universal_prompted_skill/scenario/response.py @@ -15,7 +15,7 @@ sentry_sdk.init(getenv("SENTRY_DSN")) logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) logger = logging.getLogger(__name__) -GENERATIVE_TIMEOUT = int(getenv("GENERATIVE_TIMEOUT", 5)) +GENERATIVE_TIMEOUT = float(getenv("GENERATIVE_TIMEOUT", 5)) N_UTTERANCES_CONTEXT = int(getenv("N_UTTERANCES_CONTEXT", 3)) FIX_PUNCTUATION = re.compile(r"\s(?=[\.,:;])") @@ -103,6 +103,9 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr): envvars_to_send, **human_uttr_attributes, ) + lm_service_timeout = ( + lm_service_config.pop("timeout", GENERATIVE_TIMEOUT) if lm_service_config else GENERATIVE_TIMEOUT + ) if len(dialog_context) > 0: try: @@ -111,7 +114,7 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr): prompt, lm_service_url, lm_service_config, - GENERATIVE_TIMEOUT, + lm_service_timeout, sending_variables, ) except Exception as e: diff --git a/skills/dff_user_kg_skill/Dockerfile b/skills/dff_user_kg_skill/Dockerfile new file mode 100644 index 0000000000..21996bb2d0 --- /dev/null +++ b/skills/dff_user_kg_skill/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.9.1 +# ###################### IMMUTABLE SECTION ###################################### +# Do not change anything in this section +WORKDIR /src + +COPY common/dff/requirements.txt . +RUN pip install -r requirements.txt + +# ###################### CUSTOM SECTION ###################################### +# Here you can make changes + +ARG SERVICE_NAME +ENV SERVICE_NAME ${SERVICE_NAME} + +COPY skills/${SERVICE_NAME}/requirements.txt . +RUN pip install -r requirements.txt && \ + python -m nltk.downloader wordnet + +COPY skills/${SERVICE_NAME}/ ./ +COPY ./common/ ./common/ + +ARG SERVICE_PORT +ENV SERVICE_PORT ${SERVICE_PORT} + +# wait for a server answer ( INTERVAL + TIMEOUT ) * RETRIES seconds after that change stutus to unhealthy +HEALTHCHECK --interval=5s --timeout=5s --retries=3 CMD curl --fail 127.0.0.1:${SERVICE_PORT}/healthcheck || exit 1 + + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} diff --git a/skills/dff_user_kg_skill/README.md b/skills/dff_user_kg_skill/README.md new file mode 100644 index 0000000000..1e458129f6 --- /dev/null +++ b/skills/dff_user_kg_skill/README.md @@ -0,0 +1,15 @@ +# Book skill (DFF) +This service handles typical book questions. +It can recommend books according to user's preferences. + + +# Metrics + +OS: Windows 10 +CPU: AMD Ryzen 5 3500U @ 2.10GHz + +| Metric | Average value | +| ------------ | ------------- | +| RAM | ~ 385 MB | +| Startup time | ~ 3.985s | +| Execute time | ~ 2.687s | diff --git a/skills/dff_user_kg_skill/common/.gitkeep b/skills/dff_user_kg_skill/common/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skills/dff_user_kg_skill/requirements.txt b/skills/dff_user_kg_skill/requirements.txt new file mode 100644 index 0000000000..3c9b9661f6 --- /dev/null +++ b/skills/dff_user_kg_skill/requirements.txt @@ -0,0 +1,2 @@ +click==7.1.2 +nltk==3.5 diff --git a/skills/dff_user_kg_skill/scenario/condition.py b/skills/dff_user_kg_skill/scenario/condition.py new file mode 100644 index 0000000000..bed9e4b4b9 --- /dev/null +++ b/skills/dff_user_kg_skill/scenario/condition.py @@ -0,0 +1,28 @@ +import logging +import re +from typing import Callable +import sentry_sdk +from os import getenv +from df_engine.core import Context, Actor + +import common.dff.integration.context as int_ctx +from common.universal_templates import if_lets_chat + +sentry_sdk.init(getenv("SENTRY_DSN")) +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + + +def check_flag(prop: str) -> Callable: + def check_flag_handler(ctx: Context, actor: Actor) -> bool: + return ctx.misc.get("flags", {}).get(prop, False) + + return check_flag_handler + + +def skill_switch(ctx: Context, actor: Actor) -> bool: + flag = False + user_uttr = int_ctx.get_last_human_utterance(ctx, actor) + if if_lets_chat(user_uttr["text"]) and re.findall(r"(\bpet|pets|hobby|hobbies)", user_uttr["text"]): + flag = True + return flag diff --git a/skills/dff_user_kg_skill/scenario/main.py b/skills/dff_user_kg_skill/scenario/main.py new file mode 100644 index 0000000000..091d4482c3 --- /dev/null +++ b/skills/dff_user_kg_skill/scenario/main.py @@ -0,0 +1,104 @@ +import logging +import re +import sentry_sdk +from os import getenv + +import df_engine.conditions as cnd +import df_engine.labels as lbl +from df_engine.core.keywords import ( + PROCESSING, + TRANSITIONS, + GLOBAL, + RESPONSE, +) +from df_engine.core import Actor + +import common.constants as common_constants +import common.dff.integration.condition as int_cnd +import common.dff.integration.processing as int_prs + +sentry_sdk.init(getenv("SENTRY_DSN")) + +logger = logging.getLogger(__name__) + +flows = { + GLOBAL: { + TRANSITIONS: { + ("personal_info_flow", "pet_tell_more", 2): int_cnd.has_entities("kg:animal"), + ("personal_info_flow", "pet_q", 1): cnd.regexp(re.compile(r"(pet|pets)")), + ("personal_info_flow", "hobby_q", 1): cnd.regexp(re.compile(r"(hobby|hobbies)")), + } + }, + "personal_info_flow": { + "pet_q": { + RESPONSE: "Do you have a pet?", + TRANSITIONS: { + ("personal_info_flow", "pet_r", 2): int_cnd.has_entities("prop:have_pet"), + ("personal_info_flow", "hobby_q", 1): cnd.true(), + }, + PROCESSING: { + "set_confidence": int_prs.set_confidence(1.0), + "set_can_continue": int_prs.set_can_continue(common_constants.MUST_CONTINUE), + }, + }, + "pet_r": { + RESPONSE: "Cool! I also have a {users_pet}.", + TRANSITIONS: {lbl.forward(): cnd.true()}, + PROCESSING: { + "entity_extraction": int_prs.entities(users_pet=["prop:have_pet", "default:pet"]), + "slot_filling": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(1.0), + "set_can_continue": int_prs.set_can_continue(common_constants.MUST_CONTINUE), + }, + }, + "hobby_q": { + RESPONSE: "Do you have a hobby?", + TRANSITIONS: { + ("personal_info_flow", "hobby_r", 2): int_cnd.has_entities("prop:like_activity"), + }, + PROCESSING: { + "set_confidence": int_prs.set_confidence(1.0), + "set_can_continue": int_prs.set_can_continue(common_constants.MUST_CONTINUE), + }, + }, + "hobby_r": { + RESPONSE: "Cool! I also like {users_hobby}.", + TRANSITIONS: { + ("personal_info_flow", "pet_q", 1): cnd.regexp(re.compile(r"(pet|pets)")), + lbl.forward(): cnd.true(), + }, + PROCESSING: { + "entity_extraction": int_prs.entities(users_hobby=["prop:like_activity", "default:this activity"]), + "slot_filling": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(1.0), + "set_can_continue": int_prs.set_can_continue(common_constants.MUST_CONTINUE), + }, + }, + "pet_tell_more": { + RESPONSE: "Tell me more about your {users_pet}.", + PROCESSING: { + "slot_filling": int_prs.fill_responses_by_slots(), + "set_confidence": int_prs.set_confidence(1.0), + "set_can_continue": int_prs.set_can_continue(common_constants.MUST_CONTINUE), + }, + TRANSITIONS: {}, + }, + }, + "global_flow": { + "start": { + RESPONSE: "", + TRANSITIONS: {}, + }, + "fallback": { + RESPONSE: "Anyway, let's talk about something else!", + TRANSITIONS: {}, + }, + }, +} + +actor = Actor( + flows, + start_label=("global_flow", "start"), + fallback_label=("global_flow", "fallback"), +) +logger.info("Actor created successfully") diff --git a/skills/dff_user_kg_skill/scenario/processing.py b/skills/dff_user_kg_skill/scenario/processing.py new file mode 100644 index 0000000000..90abcadb59 --- /dev/null +++ b/skills/dff_user_kg_skill/scenario/processing.py @@ -0,0 +1,93 @@ +import functools +import logging +import random +import re +from typing import Any, Callable, Optional, Iterator + +import common.dff.integration.context as int_ctx +from common.art import ART_PATTERN, SUPER_CONFIDENCE, HIGH_CONFIDENCE +from common.dff.integration.processing import save_slots_to_ctx +from common.universal_templates import if_chat_about_particular_topic +from df_engine.core import Context, Actor + +logger = logging.getLogger(__name__) + + +def set_start_confidence(ctx: Context, actor: Actor) -> Context: + user_uttr = int_ctx.get_last_human_utterance(ctx, actor) + bot_uttr = int_ctx.get_last_bot_utterance(ctx, actor) + if if_chat_about_particular_topic(user_uttr, bot_uttr, compiled_pattern=ART_PATTERN): + int_ctx.set_confidence(ctx, actor, SUPER_CONFIDENCE) + elif re.findall(ART_PATTERN, user_uttr["text"]): + int_ctx.set_confidence(ctx, actor, HIGH_CONFIDENCE) + return ctx + + +@functools.singledispatch +def save_to_slots(slots: Any) -> None: + """A decorator for saving to slots. Ignores `NoneType`.""" + raise NotImplementedError + + +@save_to_slots.register +def _(slots: str) -> Callable: + def slot_decorator(func: Callable) -> Callable: + @functools.wraps(func) + def slot_wrapper(ctx: Context, actor: Actor) -> Optional[str]: + result = func(ctx, actor) + if result is None: + return ctx + return save_slots_to_ctx({slots: result})(ctx, actor) + + return slot_wrapper + + return slot_decorator + + +@save_to_slots.register +def _(slots: tuple) -> Callable: + def slot_decorator(func: Callable) -> Callable: + @functools.wraps(func) + def slot_wrapper(ctx: Context, actor: Actor) -> Context: + results = func(ctx, actor) + if results is None: + return ctx + return save_slots_to_ctx({slot: result for slot, result in zip(slots, results) if result is not None})( + ctx, actor + ) + + return slot_wrapper + + return slot_decorator + + +def save_next_key(keys: Iterator, maindict: dict) -> Callable: + try: + return save_slots_to_ctx(maindict[next(keys)]) + except StopIteration: + return save_slots_to_ctx(maindict[random.choice(list(maindict.keys()))]) + + +def execute_response( + ctx: Context, + actor: Actor, +) -> Context: + """Execute the callable response preemptively, + so that slots can be filled""" + processed_node = ctx.a_s.get("processed_node", ctx.a_s["next_node"]) + if callable(processed_node.response): + processed_node.response = processed_node.response(ctx, actor) + ctx.a_s["processed_node"] = processed_node + + return ctx + + +def set_flag(label: str, value: bool = True) -> Callable: + """Sets a flag, modified coronavirus skill""" + + def set_flag_handler(ctx: Context, actor: Actor) -> Context: + ctx.misc["flags"] = ctx.misc.get("flags", {}) + ctx.misc["flags"].update({label: value}) + return ctx + + return set_flag_handler diff --git a/skills/dff_user_kg_skill/server.py b/skills/dff_user_kg_skill/server.py new file mode 100644 index 0000000000..f6351ead90 --- /dev/null +++ b/skills/dff_user_kg_skill/server.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +import logging +import time +import os +import random + +from flask import Flask, request, jsonify +from healthcheck import HealthCheck +import sentry_sdk +from sentry_sdk.integrations.logging import ignore_logger + + +from common.dff.integration.actor import load_ctxs, get_response + +from scenario.main import actor + +import test_server + + +ignore_logger("root") + +sentry_sdk.init(os.getenv("SENTRY_DSN")) +SERVICE_NAME = os.getenv("SERVICE_NAME") +SERVICE_PORT = int(os.getenv("SERVICE_PORT")) +RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) + +logging.basicConfig( + format="%(asctime)s - %(pathname)s - %(lineno)d - %(levelname)s - %(message)s", + level=logging.DEBUG, +) +logger = logging.getLogger(__name__) + + +app = Flask(__name__) +health = HealthCheck(app, "/healthcheck") +logging.getLogger("werkzeug").setLevel("WARNING") + + +def handler(requested_data, random_seed=None): + st_time = time.time() + ctxs = load_ctxs(requested_data) + random_seed = requested_data.get("random_seed", random_seed) # for tests + + responses = [] + for ctx in ctxs: + try: + # for tests + if random_seed: + random.seed(int(random_seed)) + ctx = actor(ctx) + responses.append(get_response(ctx, actor)) + except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + responses.append(("", 1.0, {}, {}, {})) + + total_time = time.time() - st_time + logger.info(f"{SERVICE_NAME} exec time = {total_time:.3f}s") + return responses + + +try: + test_server.run_test(handler) + logger.info("test query processed") +except Exception as exc: + sentry_sdk.capture_exception(exc) + logger.exception(exc) + raise exc + +logger.info(f"{SERVICE_NAME} is loaded and ready") + +# import pathlib +# import json + +# for in_file in pathlib.Path("tests").glob("./*_in.json"): +# logger.error(in_file) +# test_in = json.load(in_file.open()) +# responses = handler(test_in, RANDOM_SEED) +# out_file = str(in_file).replace("in.json", "out.json") +# import common.test_utils as t_utils + +# t_utils.save_to_test(responses, out_file, indent=4) # TEST + + +@app.route("/respond", methods=["POST"]) +def respond(): + # import common.test_utils as t_utils; t_utils.save_to_test(request.json,"tests/favs_in.json",indent=4) # TEST + # responses = handler(request.json, RANDOM_SEED) # TEST + # import common.test_utils as t_utils; t_utils.save_to_test(responses,"tests/favs_out.json",indent=4) # TEST + responses = handler(request.json) + return jsonify(responses) + + +if __name__ == "__main__": + app.run(debug=False, host="0.0.0.0", port=SERVICE_PORT) diff --git a/skills/dff_user_kg_skill/service_configs/dff-user-kg-skill/environment.yml b/skills/dff_user_kg_skill/service_configs/dff-user-kg-skill/environment.yml new file mode 100644 index 0000000000..b5700e6aaf --- /dev/null +++ b/skills/dff_user_kg_skill/service_configs/dff-user-kg-skill/environment.yml @@ -0,0 +1,2 @@ +SERVICE_PORT: 8028 +SERVICE_NAME: dff_user_kg_skill diff --git a/skills/dff_user_kg_skill/service_configs/dff-user-kg-skill/service.yml b/skills/dff_user_kg_skill/service_configs/dff-user-kg-skill/service.yml new file mode 100644 index 0000000000..14102acf1a --- /dev/null +++ b/skills/dff_user_kg_skill/service_configs/dff-user-kg-skill/service.yml @@ -0,0 +1,25 @@ +name: dff-user-kg-skill +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8028 + SERVICE_NAME: dff_user_kg_skill + context: . + dockerfile: ./skills/dff_user_kg_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8028 + deploy: + resources: + limits: + memory: 512M + reservations: + memory: 512M + volumes: + - ./skills/dff_user_kg_skill:/src + - ./common:/src/common + ports: + - 8028:8028 +proxy: null diff --git a/skills/dff_user_kg_skill/test.sh b/skills/dff_user_kg_skill/test.sh new file mode 100755 index 0000000000..f85ff6a382 --- /dev/null +++ b/skills/dff_user_kg_skill/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python test_server.py diff --git a/skills/dff_user_kg_skill/test_server.py b/skills/dff_user_kg_skill/test_server.py new file mode 100644 index 0000000000..78763dc6cc --- /dev/null +++ b/skills/dff_user_kg_skill/test_server.py @@ -0,0 +1,43 @@ +import requests +import os + +import common.test_utils as test_utils + + +SERVICE_PORT = int(os.getenv("SERVICE_PORT")) +RANDOM_SEED = int(os.getenv("RANDOM_SEED", 2718)) +URL = f"http://0.0.0.0:{SERVICE_PORT}/respond" + + +def handler(requested_data, random_seed): + hypothesis = requests.post(URL, json={**requested_data, "random_seed": random_seed}).json() + return hypothesis + + +def run_test(handler): + in_data, out_data = test_utils.get_dataset() + global_is_equal_flag = True + global_msg = "" + for test_name in in_data: + hypothesis = handler(in_data[test_name], RANDOM_SEED) + print(f"test name: {test_name}") + is_equal_flag, msg = test_utils.compare_structs( + out_data[test_name], hypothesis, ignored_keys=["id", "used_phrases"] + ) + if msg and len(msg.split("`")) == 5: + _, ground_truth_text, _, hypothesis_text, _ = msg.split("`") + is_equal_flag, ratio = test_utils.compare_text(ground_truth_text, hypothesis_text, 0.80) + if not is_equal_flag: + msg = f"{msg} ratio = {ratio}" + # assert is_equal_flag, msg + if is_equal_flag: + print("Success") + else: + print(is_equal_flag, msg) + global_msg += f"\nFailed test_name: {test_name} <-> msg: {msg}" + global_is_equal_flag = False + assert global_is_equal_flag, global_msg + + +if __name__ == "__main__": + run_test(handler) diff --git a/skills/dff_user_kg_skill/tests/.gitkeep b/skills/dff_user_kg_skill/tests/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skills/dff_user_kg_skill/tests/lets_talk_in.json b/skills/dff_user_kg_skill/tests/lets_talk_in.json new file mode 100644 index 0000000000..dd81e82c54 --- /dev/null +++ b/skills/dff_user_kg_skill/tests/lets_talk_in.json @@ -0,0 +1,319 @@ +{ + "human_utter_index_batch": [ + 0 + ], + "dialog_batch": [ + { + "human_utterances": [ + { + "text": "let's talk about pets.", + "annotations": { + "asr": { + "asr_confidence": "undefined" + }, + "spelling_preprocessing": "let's talk about pets", + "badlisted_words": { + "bad_words": false + }, + "spacy_nounphrases": [ + "books" + ], + "sentseg": { + "punct_sent": "let's talk about pets.", + "segments": [ + "let's talk about pets." + ] + }, + "intent_catcher": { + "cant_do": { + "confidence": 0.0, + "detected": 0 + }, + "choose_topic": { + "confidence": 0.0, + "detected": 0 + }, + "doing_well": { + "confidence": 0.0, + "detected": 0 + }, + "dont_understand": { + "confidence": 0.0, + "detected": 0 + }, + "exit": { + "confidence": 0.0, + "detected": 0 + }, + "lets_chat_about": { + "confidence": 1.0, + "detected": 1 + }, + "no": { + "confidence": 0.0, + "detected": 0 + }, + "opinion_request": { + "confidence": 0.0, + "detected": 0 + }, + "repeat": { + "confidence": 0.0, + "detected": 0 + }, + "stupid": { + "confidence": 0.0, + "detected": 0 + }, + "tell_me_a_story": { + "confidence": 0.0, + "detected": 0 + }, + "tell_me_more": { + "confidence": 0.0, + "detected": 0 + }, + "topic_switching": { + "confidence": 0.0, + "detected": 0 + }, + "weather_forecast_intent": { + "confidence": 0.0, + "detected": 0 + }, + "what_are_you_talking_about": { + "confidence": 0.0, + "detected": 0 + }, + "what_can_you_do": { + "confidence": 0.0, + "detected": 0 + }, + "what_is_your_job": { + "confidence": 0.0, + "detected": 0 + }, + "what_is_your_name": { + "confidence": 0.0, + "detected": 0 + }, + "what_time": { + "confidence": 0.0, + "detected": 0 + }, + "where_are_you_from": { + "confidence": 0.0, + "detected": 0 + }, + "who_made_you": { + "confidence": 0.0, + "detected": 0 + }, + "yes": { + "confidence": 0.0, + "detected": 0 + } + }, + "midas_classification": [ + { + "appreciation": 0.0017878437647596002, + "command": 0.9731636047363281, + "comment": 0.0012271953746676445, + "complaint": 0.0019538358319550753, + "dev_command": 0.007522969506680965, + "neg_answer": 0.0012253294698894024, + "open_question_factual": 0.0013211232144385576, + "open_question_opinion": 0.0017928575398400426, + "opinion": 0.001923633273690939, + "other_answers": 0.0007157829240895808, + "pos_answer": 0.003168173599988222, + "statement": 0.0020507643930613995, + "yes_no_question": 0.00214685732498765 + } + ], + "combined_classification": { + "cobot_dialogact_intents": { + "Information_RequestIntent": 0.9999673366546631 + }, + "cobot_dialogact_topics": { + "Entertainment_Books": 0.9999250173568726 + }, + "cobot_topics": { + "Music": 0.9999924898147583 + }, + "emotion_classification": { + "neutral": 0.9999861717224121 + }, + "factoid_classification": { + "is_conversational": 0.9999961853027344 + }, + "sentiment_classification": { + "neutral": 0.9999853372573853 + }, + "toxic_classification": { + "not_toxic": 0.9999828338623047 + } + }, + "midas_predictor": { + "command": 0.06747467438494935, + "opinion": 0.22919681620839363, + "pos_answer": 0.1676917510853835, + "statement": 0.47503617945007237, + "yes_no_question": 0.060600578871201155 + }, + "topic_recommendation": [ + "dff_movie_skill", + "dff_travel_skill", + "dff_music_skill" + ], + "entity_detection": { + "entities": [ + "books" + ], + "labelled_entities": [ + { + "label": "misc", + "offsets": [ + 17, + 22 + ], + "text": "books" + } + ] + }, + "ner": [ + [] + ], + "entity_linking": [], + "fact_retrieval": { + "facts": [], + "topic_facts": [] + }, + "fact_random": { + "facts": [], + "response": "let's talk about pets." + }, + "kbqa": { + "answer": "", + "confidence": 0.0, + "qa_system": "kbqa" + }, + "sentrewrite": { + "clusters": [], + "modified_sents": [ + "let's talk about pets." + ] + }, + "wiki_parser": { + "animals_skill_entities_info": {}, + "entities_info": {}, + "topic_skill_entities_info": {}, + "utt_num": 1, + "wiki_skill_entities_info": {} + }, + "news_api_annotator": [ + { + "entity": "all", + "news": { + "content": "Major Serhiy Volyna, commander of Ukraine's 36th Separate Marine Brigade, issued a desperate plea to world leaders in a Facebook video posted from the besieged city of Mariupol.\n\"This is our appeal to the world. This could be the last appeal of our l... [959 chars]", + "description": "Major Serhiy Volyna, commander of Ukraine's 36th Separate Marine Brigade, issued a desperate plea to world leaders in a Facebook video posted from the besieged city of Mariupol. \"This is our appeal to the world.", + "image": "https://image.cnbcfm.com/api/v1/image/107047718-16502885112022-04-18t112625z_827114355_rc2npt9buj3j_rtrmadp_0_ukraine-crisis-mariupol.jpeg?v=1650288631&w=1920&h=1080", + "publishedAt": "2022-04-20T08:45:00Z", + "source": { + "name": "CNBC", + "url": "https://www.cnbc.com" + }, + "title": "Latest news on Russia and the war in Ukraine", + "url": "https://www.cnbc.com/2022/04/20/russia-ukraine-live-updates.html" + }, + "which": "all" + }, + { + "entity": "books", + "news": { + "content": "Punjab Kings (PBKS) opener Shikhar Dhawan is likely to etch his name in the record books of IPL on Wednesday when his side faces his former team Delhi Capitals in Mumbai. Dhawan is the most prolific batter in the Punjab team who can provide the perfe... [2085 chars]", + "description": "Punjab Kings (PBKS) opener Shikhar Dhawan is likely to etch his name in the record books of IPL on Wednesday when his side faces his former team Delhi Capitals in Mumbai.", + "image": "https://cricketaddictor.com/wp-content/uploads/2018/03/maxresdefault.jpg", + "publishedAt": "2022-04-20T05:53:37Z", + "source": { + "name": "Cricket Addictor", + "url": "https://cricketaddictor.com" + }, + "title": "DC vs PBKS: Shikhar Dhawan 11 Runs Away To Join Virat Kohli In The Elite List", + "url": "https://cricketaddictor.com/indian-premier-league-ipl-2022/dc-vs-pbks-shikhar-dhawan-11-runs-away-to-join-virat-kohli-in-the-elite-list/" + }, + "which": "human" + } + ], + "conceptnet": { + "books": { + "SymbolOf": [ + "knowledge", + "life", + "education" + ], + "HasProperty": [ + "heavy", + "expensive", + "good than book" + ], + "Causes": [ + "knowledge", + "learn", + "you learn" + ], + "CausesDesire": [ + "read", + "read book", + "learn" + ] + } + } + } + } + ], + "bot_utterances": [] + } + ], + "dff_user_kg_skill_state_batch": [ + {} + ], + "dff_shared_state_batch": [ + { + "cross_states": {}, + "cross_links": {} + } + ], + "entities_batch": [ + { + "book": { + "bot_attitude": null, + "bot_encounters": [], + "human_attitude": "like", + "human_encounters": [ + { + "full_name": "books", + "human_utterance_index": 0, + "previous_skill_name": "pre_start" + } + ], + "name": "book" + } + } + ], + "used_links_batch": [ + {} + ], + "age_group_batch": [ + "unknown" + ], + "disliked_skills_batch": [ + [] + ], + "prompts_goals_batch": [ + {} + ], + "clarification_request_flag_batch": [ + false + ] +} diff --git a/skills/dff_user_kg_skill/tests/lets_talk_out.json b/skills/dff_user_kg_skill/tests/lets_talk_out.json new file mode 100644 index 0000000000..05dbef4936 --- /dev/null +++ b/skills/dff_user_kg_skill/tests/lets_talk_out.json @@ -0,0 +1,51 @@ +[ + [ + "Do you have a pet?", + 1.0, + { + "dff_user_kg_skill_state": { + "shared_memory": {}, + "previous_human_utter_index": 0, + "history": { + "0": [ + "personal_info_flow", + "pet_q" + ] + }, + "current_turn_dff_suspended": false, + "context": { + "id": "fc152029-8b47-4fe4-9a60-348939d4a2ef", + "labels": { + "0": [ + "personal_info_flow", + "pet_q" + ] + }, + "requests": { + "0": "let's talk about pets." + }, + "responses": { + "0": "Do you have a pet?" + }, + "misc": { + }, + "validation": false, + "actor_state": {} + } + }, + "dff_shared_state": { + "cross_states": {}, + "cross_links": {} + }, + "used_links": {}, + "age_group": "unknown", + "disliked_skills": [], + "prompts_goals": {} + }, + {}, + { + "can_continue": "must", + "is_final_answer": "true" + } + ] +] \ No newline at end of file diff --git a/skills/eliza/requirements.txt b/skills/eliza/requirements.txt index 6b59105241..fba1bbb121 100644 --- a/skills/eliza/requirements.txt +++ b/skills/eliza/requirements.txt @@ -4,4 +4,4 @@ gunicorn==19.9.0 requests==2.22.0 sentry-sdk<=1.19.1 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/skills/emotion_skill/requirements.txt b/skills/emotion_skill/requirements.txt index bd242de1a0..0efadfea9f 100644 --- a/skills/emotion_skill/requirements.txt +++ b/skills/emotion_skill/requirements.txt @@ -7,4 +7,4 @@ gunicorn==20.1.0 ahocorapy==1.6.2 nltk==3.2.5 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/skills/external_integration_skill/server.py b/skills/external_integration_skill/server.py index baec427f6f..748a38a506 100644 --- a/skills/external_integration_skill/server.py +++ b/skills/external_integration_skill/server.py @@ -18,7 +18,7 @@ EXTERNAL_SKILL_URL = getenv("EXTERNAL_SKILL_URL", None) PAYLOAD_ARGUMENT_NAME = getenv("PAYLOAD_ARGUMENT_NAME", "payload") -EXTERNAL_TIMEOUT = int(getenv("EXTERNAL_TIMEOUT", 2)) +EXTERNAL_TIMEOUT = float(getenv("EXTERNAL_TIMEOUT", 2)) ARGUMENTS_TO_SEND = getenv("ARGUMENTS_TO_SEND", ["user_id"]) if isinstance(ARGUMENTS_TO_SEND, str): ARGUMENTS_TO_SEND = ARGUMENTS_TO_SEND.split(",") diff --git a/skills/factoid_qa/requirements.txt b/skills/factoid_qa/requirements.txt index bd3ded585c..b9300309b8 100644 --- a/skills/factoid_qa/requirements.txt +++ b/skills/factoid_qa/requirements.txt @@ -8,5 +8,5 @@ sentry-sdk==1.19.1 spacy>=3.5.2 https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz#egg=en_core_web_sm==3.5.0 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 importlib-metadata<5.0 \ No newline at end of file diff --git a/skills/knowledge_grounding_skill/requirements.txt b/skills/knowledge_grounding_skill/requirements.txt index 706f6fca36..03a2866fdf 100644 --- a/skills/knowledge_grounding_skill/requirements.txt +++ b/skills/knowledge_grounding_skill/requirements.txt @@ -6,4 +6,4 @@ numpy==1.24.2 requests==2.28.2 sentry-sdk==1.19.1 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 diff --git a/skills/knowledge_grounding_skill/server.py b/skills/knowledge_grounding_skill/server.py index f97e808e6b..d40535ae1a 100644 --- a/skills/knowledge_grounding_skill/server.py +++ b/skills/knowledge_grounding_skill/server.py @@ -14,7 +14,8 @@ from common.constants import CAN_NOT_CONTINUE from common.universal_templates import if_chat_about_particular_topic, if_choose_topic -from common.utils import get_intents, join_sentences_in_or_pattern, join_words_in_or_pattern, get_topics, get_entities +from common.utils import get_intents, get_topics, get_entities +from common.join_pattern import * from common.response_selection import ACTIVE_SKILLS sentry_sdk.init(getenv("SENTRY_DSN")) diff --git a/skills/meta_script_skill/requirements.txt b/skills/meta_script_skill/requirements.txt index 9a72aa6b9f..e8ef77ab47 100644 --- a/skills/meta_script_skill/requirements.txt +++ b/skills/meta_script_skill/requirements.txt @@ -9,6 +9,6 @@ spacy==3.5.1 importlib_metadata<5 nltk[twitter]==3.2.5 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 typing-inspect==0.8.0 typing_extensions==4.5.0 diff --git a/skills/misheard_asr/requirements.txt b/skills/misheard_asr/requirements.txt index b6ebb15033..8fede01f86 100644 --- a/skills/misheard_asr/requirements.txt +++ b/skills/misheard_asr/requirements.txt @@ -4,5 +4,5 @@ gunicorn==19.9.0 requests==2.28.2 sentry-sdk==1.19.1 jinja2<=3.1.2 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 numpy==1.24.2 diff --git a/skills/news_api_skill/requirements.txt b/skills/news_api_skill/requirements.txt index c9c8a3b50d..eefd4afa41 100644 --- a/skills/news_api_skill/requirements.txt +++ b/skills/news_api_skill/requirements.txt @@ -7,6 +7,6 @@ gunicorn==20.1.0 numpy==1.24.2 spacy==3.5.1 jinja2<=3.0.3 -Werkzeug>=2.2.2 +Werkzeug>=2.2.2,<3.0 nltk==3.2.5 prometheus_client==0.16.0 diff --git a/skills/personal_info_skill/requirements.txt b/skills/personal_info_skill/requirements.txt index 4d9117ce1d..4bf70af4da 100644 --- a/skills/personal_info_skill/requirements.txt +++ b/skills/personal_info_skill/requirements.txt @@ -5,4 +5,4 @@ requests==2.28.2 numpy==1.24.2 sentry-sdk==1.19.1 jinja2<=3.1.2 -Werkzeug>=2.2.2 \ No newline at end of file +Werkzeug>=2.2.2,<3.0 \ No newline at end of file diff --git a/skills/small_talk_skill/requirements.txt b/skills/small_talk_skill/requirements.txt index ce8f1af1ec..f36ced939c 100644 --- a/skills/small_talk_skill/requirements.txt +++ b/skills/small_talk_skill/requirements.txt @@ -7,4 +7,4 @@ gunicorn==20.1.0 numpy==1.24.2 spacy==3.5.1 jinja2<=3.0.3 -Werkzeug>=2.2.2 \ No newline at end of file +Werkzeug>=2.2.2,<3.0 \ No newline at end of file diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 57afff7859..c045b5fdb0 100755 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -990,6 +990,14 @@ def dff_template_skill_formatter(dialog: Dict) -> List[Dict]: return utils.dff_formatter(dialog, "dff_template_skill") +def dff_user_kg_skill_formatter(dialog: Dict) -> List[Dict]: + return utils.dff_formatter(dialog, "dff_user_kg_skill") + + +def dff_travel_italy_skill_formatter(dialog: Dict) -> List[Dict]: + return utils.dff_formatter(dialog, "dff_travel_italy_skill") + + def dff_intent_responder_skill_formatter(dialog: Dict) -> List[Dict]: intents = list(dialog["human_utterances"][-1]["annotations"].get("intent_catcher", {}).keys()) called_intents = {intent: False for intent in intents} @@ -1056,6 +1064,29 @@ def dff_image_skill_formatter(dialog: Dict) -> List[Dict]: return utils.dff_formatter(dialog, "dff_image_skill") +def dff_fromage_image_skill_formatter(dialog: Dict) -> List[Dict]: + return utils.dff_formatter(dialog, "dff_fromage_image_skill") + + +def fromage_formatter(dialog: Dict) -> List: + # Used by: fromage + dialog = utils.get_last_n_turns(dialog) + dialog = utils.remove_clarification_turns_from_dialog(dialog) + + image_paths = [utt["attributes"].get("image") for utt in dialog["human_utterances"]] + utterances_history = 5 + image_paths = image_paths[-utterances_history:] + human_text_uttr = dialog["human_utterances"][-1]["text"] + input_dict = {"sentences": [human_text_uttr if human_text_uttr else ""]} + for url in reversed(image_paths): + if url is not None and url.startswith("http"): + input_dict.update({"image_paths": [url]}) + break + else: + input_dict.update({"image_paths": [None]}) + return [input_dict] + + def dff_prompted_skill_formatter(dialog, skill_name=None): return utils.dff_formatter( dialog, @@ -1226,6 +1257,14 @@ def image_captioning_formatter(dialog: Dict) -> List[Dict]: return [{"image_paths": [dialog["human_utterances"][-1].get("attributes", {}).get("image")]}] +def last_human_annotated_utterance(dialog: Dict) -> List[Dict]: + return [ + { + "last_human_annotated_utterance": [dialog["human_utterances"][-1]], + } + ] + + def external_integration_skill_formatter(dialog: Dict) -> List[Dict]: last_sentences = [dialog["human_utterances"][-1]["text"]] dialog_ids = [dialog.get("dialog_id", "unknown")] diff --git a/tests/runtests_russian.sh b/tests/runtests_russian.sh index 00c3c190db..f7acb2e013 100755 --- a/tests/runtests_russian.sh +++ b/tests/runtests_russian.sh @@ -140,7 +140,7 @@ if [[ "$MODE" == "test_skills" || "$MODE" == "all" ]]; then for container in dff-program-y-ru-skill intent-catcher-ru convers-evaluation-selector-ru personal-info-ru-skill \ entity-linking-ru wiki-parser-ru badlisted-words-ru spelling-preprocessing-ru sentseg-ru \ dff-friendship-ru-skill dff-intent-responder-ru-skill entity-detection-ru dialogpt-ru \ - dff-generative-ru-skill dialogrpt-ru spacy-annotator-ru toxic-classification-ru \ + dff-generative-ru-skill dialogrpt-ru spacy-annotator-ru combined-classification-ru \ text-qa-ru fact-retrieval-ru; do echo "Run tests for $container"