diff --git a/annotators/IntentCatcherTransformers/test.sh b/annotators/IntentCatcherTransformers/test.sh index 61672db785..039765945d 100755 --- a/annotators/IntentCatcherTransformers/test.sh +++ b/annotators/IntentCatcherTransformers/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test.py +python tests/test.py diff --git a/annotators/IntentCatcherTransformers/tests/conftest.py b/annotators/IntentCatcherTransformers/tests/conftest.py new file mode 100644 index 0000000000..e21585db97 --- /dev/null +++ b/annotators/IntentCatcherTransformers/tests/conftest.py @@ -0,0 +1,46 @@ +import json + +import pytest + +from os import getenv + +INTENT_PHRASES_PATH = getenv("INTENT_PHRASES_PATH") + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default="8014") + parser.addoption("--handle", action="store", default="detect") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" + + +@pytest.fixture +def tests(): + if "RU" in INTENT_PHRASES_PATH and "commands" in INTENT_PHRASES_PATH: + tests = json.load(open("tests_commands_RU.json")) + elif "RU" in INTENT_PHRASES_PATH: + tests = json.load(open("tests_RU.json")) + elif "commands" in INTENT_PHRASES_PATH: + tests = json.load(open("tests_commands.json")) + else: + tests = json.load(open("tests.json")) + return tests diff --git a/annotators/IntentCatcherTransformers/test.py b/annotators/IntentCatcherTransformers/tests/test.py similarity index 50% rename from annotators/IntentCatcherTransformers/test.py rename to annotators/IntentCatcherTransformers/tests/test.py index b2cecdf557..45221d44b6 100644 --- a/annotators/IntentCatcherTransformers/test.py +++ b/annotators/IntentCatcherTransformers/tests/test.py @@ -2,23 +2,9 @@ import requests import json -from os import getenv -INTENT_PHRASES_PATH = getenv("INTENT_PHRASES_PATH") -SERVICE_PORT = getenv("SERVICE_PORT") - - -def main_test(): - url = f"http://0.0.0.0:{SERVICE_PORT}/detect" - if "RU" in INTENT_PHRASES_PATH and "commands" in INTENT_PHRASES_PATH: - tests = json.load(open("tests_commands_RU.json")) - elif "RU" in INTENT_PHRASES_PATH: - tests = json.load(open("tests_RU.json")) - elif "commands" in INTENT_PHRASES_PATH: - tests = json.load(open("tests_commands.json")) - else: - tests = json.load(open("tests.json")) +def test_intent_catcher(url: str, tests: dict): for test in tests: r = requests.post(url=url, json={"sentences": [[test["sentence"]]]}) assert r.ok @@ -30,8 +16,3 @@ def main_test(): ), print(f"TEST FAILED!\nTest: {test}\nResult:{json.dumps(data, indent=2)}") else: assert all([intent["detected"] == 0 for intent in data.values()]), f"test: {test}\nprediction: {data}" - print("Success") - - -if __name__ == "__main__": - main_test() diff --git a/annotators/IntentCatcherTransformers/tests.json b/annotators/IntentCatcherTransformers/tests/tests.json similarity index 100% rename from annotators/IntentCatcherTransformers/tests.json rename to annotators/IntentCatcherTransformers/tests/tests.json diff --git a/annotators/IntentCatcherTransformers/tests_RU.json b/annotators/IntentCatcherTransformers/tests/tests_RU.json similarity index 100% rename from annotators/IntentCatcherTransformers/tests_RU.json rename to annotators/IntentCatcherTransformers/tests/tests_RU.json diff --git a/annotators/IntentCatcherTransformers/tests_commands.json b/annotators/IntentCatcherTransformers/tests/tests_commands.json similarity index 100% rename from annotators/IntentCatcherTransformers/tests_commands.json rename to annotators/IntentCatcherTransformers/tests/tests_commands.json diff --git a/annotators/IntentCatcherTransformers/tests_commands_RU.json b/annotators/IntentCatcherTransformers/tests/tests_commands_RU.json similarity index 100% rename from annotators/IntentCatcherTransformers/tests_commands_RU.json rename to annotators/IntentCatcherTransformers/tests/tests_commands_RU.json diff --git a/annotators/NER/tests/conftest.py b/annotators/NER/tests/conftest.py new file mode 100644 index 0000000000..e721e441c0 --- /dev/null +++ b/annotators/NER/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://proxy.deeppavlov.ai") + parser.addoption("--port", action="store", default=8021) + parser.addoption("--handle", action="store", default="/ner") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> int: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/NER/tests/test_ner.py b/annotators/NER/tests/test_ner.py new file mode 100644 index 0000000000..83a62cc292 --- /dev/null +++ b/annotators/NER/tests/test_ner.py @@ -0,0 +1,41 @@ +import json + +import pytest + +import sys +from os import path + +import requests + +PARENT_DIR = path.dirname(path.dirname(path.abspath(__file__))) +sys.path.append(PARENT_DIR) + + +@pytest.mark.parametrize( + "sentences, gold_result", + [ + ( + { + "last_utterances": [ + ["john peterson is my brother.", "he lives in New York."], + ["my laptop was broken.", "could you show me the nearest store in Moscow where i can fix it."], + ] + }, + [ + [ + [{"confidence": 1, "end_pos": 2, "start_pos": 0, "text": "john peterson", "type": "PER"}], + [{"confidence": 1, "end_pos": 5, "start_pos": 3, "text": "New York", "type": "LOC"}], + ], + [ + [], + [{"confidence": 1, "end_pos": 9, "start_pos": 8, "text": "Moscow", "type": "LOC"}], + ], + ], + ) + ], +) +def test_ner(url: str, sentences: dict, gold_result: list): + response = requests.post(url, json=sentences, headers={"Content-Type": "application/json"}) + result = json.loads(response.text) + assert response.status_code == 200 + assert result == gold_result diff --git a/annotators/NER_deeppavlov/Dockerfile-test b/annotators/NER_deeppavlov/Dockerfile-test new file mode 100644 index 0000000000..0e23c4e5c0 --- /dev/null +++ b/annotators/NER_deeppavlov/Dockerfile-test @@ -0,0 +1,20 @@ +FROM deeppavlov/deeppavlov:1.2.0-gpu + +ARG CONFIG +ARG SERVICE_PORT +ARG SRC_DIR +ARG SED_ARG=" | " + +ENV CONFIG=$CONFIG +ENV SERVICE_PORT=$SERVICE_PORT + +COPY ./annotators/NER_deeppavlov/tests/requirements.txt /src/requirements.txt + +RUN pip install --upgrade pip && \ + pip install -r /src/requirements.txt + +COPY $SRC_DIR /src + +WORKDIR /src + +CMD gunicorn --workers=1 --timeout 800 server:app -b 0.0.0.0:8021 diff --git a/annotators/NER_deeppavlov/test.sh b/annotators/NER_deeppavlov/test.sh index b37c67d44c..0736333c7f 100755 --- a/annotators/NER_deeppavlov/test.sh +++ b/annotators/NER_deeppavlov/test.sh @@ -1,4 +1,4 @@ #!/bin/bash -python test_server.py +python -m pytest tests/test_server.py diff --git a/annotators/NER_deeppavlov/test_server.py b/annotators/NER_deeppavlov/test_server.py deleted file mode 100644 index 3ab9d15e61..0000000000 --- a/annotators/NER_deeppavlov/test_server.py +++ /dev/null @@ -1,52 +0,0 @@ -import requests - - -def main(): - url = "http://0.0.0.0:8021/ner" - - request_data = { - "last_utterances": [ - ["я видела ивана в москве"], - ["Я видела Ивана в Москве"], - ["i have heard about justin. he is in sahara desert"], - ["I have heard about Justin. He is in Sahara Desert"], - ["can john smith move forward for 15 meters, then for fifteen meters, and get back to las vegas then"], - ["я бы проехала на 30 метров вперед, а потом повернула на сорок пять градусов по часовой стрелке"], - [""], - ] - } - - gold_results = [ - [[]], - [[]], - [ - [ - {"start_pos": 4, "end_pos": 5, "type": "PER", "text": "justin", "confidence": 1}, - {"start_pos": 9, "end_pos": 11, "type": "LOC", "text": "sahara desert", "confidence": 1}, - ] - ], - [ - [ - {"start_pos": 4, "end_pos": 5, "type": "PER", "text": "Justin", "confidence": 1}, - {"start_pos": 9, "end_pos": 11, "type": "LOC", "text": "Sahara Desert", "confidence": 1}, - ] - ], - [ - [ - {"start_pos": 1, "end_pos": 3, "type": "PER", "text": "john smith", "confidence": 1}, - {"start_pos": 6, "end_pos": 8, "type": "QUANTITY", "text": "15 meters", "confidence": 1}, - {"start_pos": 11, "end_pos": 13, "type": "QUANTITY", "text": "fifteen meters", "confidence": 1}, - {"start_pos": 18, "end_pos": 20, "type": "LOC", "text": "las vegas", "confidence": 1}, - ] - ], - [[]], - [[]], - ] - - result = requests.post(url, json=request_data).json() - assert result == gold_results, print(result) - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/NER_deeppavlov/tests/conftest.py b/annotators/NER_deeppavlov/tests/conftest.py new file mode 100644 index 0000000000..50189a3c3a --- /dev/null +++ b/annotators/NER_deeppavlov/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default=8021) + parser.addoption("--handle", action="store", default="/ner") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> int: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/NER_deeppavlov/tests/requirements.txt b/annotators/NER_deeppavlov/tests/requirements.txt new file mode 100644 index 0000000000..0811f230d3 --- /dev/null +++ b/annotators/NER_deeppavlov/tests/requirements.txt @@ -0,0 +1,11 @@ +sentry-sdk[flask]==0.14.1 +flask==1.1.1 +gunicorn==19.9.0 +itsdangerous==2.0.1 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +transformers==4.6.0 +datasets==1.11.0 +huggingface-hub==0.0.8 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/NER_deeppavlov/tests/test_server.py b/annotators/NER_deeppavlov/tests/test_server.py new file mode 100644 index 0000000000..b6659520d1 --- /dev/null +++ b/annotators/NER_deeppavlov/tests/test_server.py @@ -0,0 +1,56 @@ +import allure +import pytest +import requests + + +@allure.description("""Test NER""") +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + { + "last_utterances": [ + ["я видела ивана в москве"], + ["Я видела Ивана в Москве"], + ["i have heard about justin. he is in sahara desert"], + ["I have heard about Justin. He is in Sahara Desert"], + [ + "can john smith move forward for 15 meters, then for \ + fifteen meters, and get back to las vegas then" + ], + ["я бы проехала на 30 метров вперед, а потом повернула на сорок пять градусов по часовой стрелке"], + [""], + ] + }, + [ + [[]], + [[]], + [ + [ + {"start_pos": 4, "end_pos": 5, "type": "PER", "text": "justin", "confidence": 1}, + {"start_pos": 9, "end_pos": 11, "type": "LOC", "text": "sahara desert", "confidence": 1}, + ] + ], + [ + [ + {"start_pos": 4, "end_pos": 5, "type": "PER", "text": "Justin", "confidence": 1}, + {"start_pos": 9, "end_pos": 11, "type": "LOC", "text": "Sahara Desert", "confidence": 1}, + ] + ], + [ + [ + {"start_pos": 1, "end_pos": 3, "type": "PER", "text": "john smith", "confidence": 1}, + {"start_pos": 6, "end_pos": 8, "type": "QUANTITY", "text": "15 meters", "confidence": 1}, + {"start_pos": 11, "end_pos": 13, "type": "QUANTITY", "text": "fifteen meters", "confidence": 1}, + {"start_pos": 18, "end_pos": 20, "type": "LOC", "text": "las vegas", "confidence": 1}, + ] + ], + [[]], + [[]], + ], + ) + ], +) +def test_ner(url: str, request_data: dict, gold_results: list): + result = requests.post(url, json=request_data).json() + assert result == gold_results diff --git a/annotators/SentSeg/Dockerfile-test b/annotators/SentSeg/Dockerfile-test new file mode 100644 index 0000000000..4229eb488a --- /dev/null +++ b/annotators/SentSeg/Dockerfile-test @@ -0,0 +1,28 @@ +FROM python:3.7-slim + +ARG DATA_URL=files.deeppavlov.ai/alexaprize_data/sentseg/elmo2.tar.gz +ARG MODEL_META_URL=files.deeppavlov.ai/alexaprize_data/sentseg/model.meta +ARG MODEL_DATA_URL=files.deeppavlov.ai/alexaprize_data/sentseg/model.data-00000-of-00001 + +WORKDIR /src +RUN mkdir /data /elmo2 tfhub_cache_dir + +RUN apt-get update && \ + apt-get install -y curl && \ + curl -L $DATA_URL --output /tmp/elmo2.tar.gz && \ + tar -xf /tmp/elmo2.tar.gz -C /elmo2 && \ + rm /tmp/elmo2.tar.gz && \ + curl -L $MODEL_META_URL --output /data/model.meta && \ + curl -L $MODEL_DATA_URL --output /data/model.data-00000-of-00001 + +ENV TFHUB_CACHE_DIR tfhub_cache_dir + +COPY tests/requirements.txt . +RUN pip install --upgrade pip && \ + pip install -r requirements.txt && \ + python -c "import nltk; nltk.download('punkt')" + +COPY . . +COPY model.index /data/ + +CMD gunicorn --workers=1 server:app diff --git a/annotators/SentSeg/test.py b/annotators/SentSeg/test.py deleted file mode 100644 index 4d23cdfb88..0000000000 --- a/annotators/SentSeg/test.py +++ /dev/null @@ -1,11 +0,0 @@ -import requests - - -url = "http://0.0.0.0:8011/sentseg" -sentences = {"sentences": ["hey alexa how are you"]} - -gold = "hey alexa. how are you?" -response = requests.post(url, json=sentences).json() -assert response[0]["punct_sent"] == gold, print(response) - -print("SUCCESS!") diff --git a/annotators/SentSeg/test.sh b/annotators/SentSeg/test.sh index 468a5a38fc..986af2c29d 100755 --- a/annotators/SentSeg/test.sh +++ b/annotators/SentSeg/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test.py \ No newline at end of file +python -m pytest tests/test.py \ No newline at end of file diff --git a/annotators/SentSeg/tests/conftest.py b/annotators/SentSeg/tests/conftest.py new file mode 100644 index 0000000000..ccaf03b23b --- /dev/null +++ b/annotators/SentSeg/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default=8011) + parser.addoption("--handle", action="store", default="/sentseg") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> int: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/SentSeg/tests/requirements.txt b/annotators/SentSeg/tests/requirements.txt new file mode 100644 index 0000000000..4dc9f6cf99 --- /dev/null +++ b/annotators/SentSeg/tests/requirements.txt @@ -0,0 +1,14 @@ +tensorflow==1.14.0 +tensorflow_hub==0.4.0 +Flask==1.1.1 +itsdangerous==2.0.1 +nltk==3.2.5 +numpy==1.15.4 +gunicorn==19.9.0 +requests==2.22.0 +sentry-sdk==0.12.3 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +protobuf<4 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/SentSeg/tests/test.py b/annotators/SentSeg/tests/test.py new file mode 100644 index 0000000000..1976131711 --- /dev/null +++ b/annotators/SentSeg/tests/test.py @@ -0,0 +1,81 @@ +import allure +import pytest +import requests + +from typing import Dict, List + + +@allure.description("""Base response test""") +def test_response(url): + data = {"sentences": ["Hello how are you", "I am fine", "Alexa what is the weather today"]} + response = requests.post(url, json=data) + response_data = response.json() + assert response.status_code == 200 + assert len(response_data) == 3 + assert "punct_sent" in response_data[0] + assert "segments" in response_data[0] + + +@allure.description("""Base response test: pass wrong json""") +def test_response_wrong_structure(url: str): + data = {"wrong": ["Hello how are you", "I am fine", "Alexa what is the weather today"]} + response = requests.post(url, json=data) + assert response.status_code == 500 + + +@allure.description("""Test punctuation""") +@pytest.mark.parametrize( + "sentences, gold", + [ + ({"sentences": ["hey alexa how are you"]}, "hey alexa. how are you?"), + ({"sentences": [""]}, ""), + ], +) +def test_sentseg_punctuation(url: str, sentences: Dict, gold: str): + response = requests.post(url, json=sentences) + data = response.json() + assert response.status_code == 200 + assert data[0]["punct_sent"] == gold + + +@allure.description("""Test sentence split""") +@pytest.mark.parametrize( + "sentences, gold", + [ + ({"sentences": ["hey alexa how are you"]}, ["hey alexa.", "how are you?"]), + ({"sentences": [""]}, [""]), + ({"sentences": ["Hello. How are you? I am fine!"]}, ["Hello.", "How are you?", "I am fine!"]), + ], +) +def test_sentseg_split(url: str, sentences: Dict, gold: List[str]): + response = requests.post(url, json=sentences) + data = response.json() + assert response.status_code == 200 + assert data[0]["segments"] == gold + + +@allure.description("""Test preprocessing""") +@pytest.mark.parametrize( + "sentences, gold", + [ + ({"sentences": ["Fred ai n't going."]}, "Fred is not going."), + ({"sentences": ["I'm hungry."]}, "I am hungry."), + ({"sentences": ["You're funny."]}, "You are funny."), + ({"sentences": ["I've done it."]}, "I have done it."), + ({"sentences": ["I'll be there."]}, "I will be there."), + ({"sentences": ["She's reading."]}, "She is reading."), + ({"sentences": ["he's running."]}, "he is running."), + ({"sentences": ["it's raining."]}, "it is raining."), + ({"sentences": ["that's interesting."]}, "that is interesting."), + ({"sentences": ["y'all come back now."]}, "you all come back now."), + ({"sentences": ["yall come back now."]}, "you all come back now."), + ({"sentences": ["I'd like a coffee."]}, "I would like a coffee."), + ({"sentences": ["I'm gon na study."]}, "I am going to study."), + ({"sentences": ["I wan na play."]}, "I want to play."), + ], +) +def test_sentseg_preprocessing(url: str, sentences: Dict, gold: str): + response = requests.post(url, json=sentences) + data = response.json() + assert response.status_code == 200 + assert data[0]["punct_sent"] == gold diff --git a/annotators/custom_entity_linking/Dockerfile b/annotators/custom_entity_linking/Dockerfile index 755f0e473c..71de75934d 100644 --- a/annotators/custom_entity_linking/Dockerfile +++ b/annotators/custom_entity_linking/Dockerfile @@ -21,12 +21,12 @@ ARG LANGUAGE=EN ENV LANGUAGE ${LANGUAGE} ARG CONFIG -ARG PORT +ARG SERVICE_PORT ARG SRC_DIR ARG SED_ARG=" | " ENV CONFIG=$CONFIG -ENV PORT=$PORT +ENV SERVICE_PORT=$SERVICE_PORT COPY ./annotators/custom_entity_linking/requirements.txt ./requirements.txt RUN pip install -r ./requirements.txt diff --git a/annotators/custom_entity_linking/tests/conftest.py b/annotators/custom_entity_linking/tests/conftest.py new file mode 100644 index 0000000000..f17fa03374 --- /dev/null +++ b/annotators/custom_entity_linking/tests/conftest.py @@ -0,0 +1,21 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default="8153") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def url(uri, port) -> str: + return f"{uri}:{port}" diff --git a/annotators/custom_entity_linking/test_el.py b/annotators/custom_entity_linking/tests/test_el.py similarity index 100% rename from annotators/custom_entity_linking/test_el.py rename to annotators/custom_entity_linking/tests/test_el.py diff --git a/annotators/entity_detection/Dockerfile-test b/annotators/entity_detection/Dockerfile-test new file mode 100644 index 0000000000..c55b70a1ac --- /dev/null +++ b/annotators/entity_detection/Dockerfile-test @@ -0,0 +1,24 @@ +FROM deeppavlov/deeppavlov:1.2.0-gpu + +RUN apt-get update && apt-get install git -y + +ARG SEQ_TAG_CONFIG +ARG CONFIG +ARG FINEGRAINED +ARG SERVICE_PORT +ARG SRC_DIR + +ENV SEQ_TAG_CONFIG=$SEQ_TAG_CONFIG +ENV CONFIG=$CONFIG +ENV FINEGRAINED=$FINEGRAINED +ENV SERVICE_PORT=$SERVICE_PORT + +COPY ./annotators/entity_detection/tests/requirements.txt /src/requirements.txt +RUN pip install -r /src/requirements.txt && python -m spacy download en_core_web_sm + +COPY $SRC_DIR /src + +WORKDIR /src +RUN python -m deeppavlov install $SEQ_TAG_CONFIG + +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:$SERVICE_PORT diff --git a/annotators/entity_detection/__init__.py b/annotators/entity_detection/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/annotators/entity_detection/test.sh b/annotators/entity_detection/test.sh index 762d75b975..ba51800dc2 100755 --- a/annotators/entity_detection/test.sh +++ b/annotators/entity_detection/test.sh @@ -1,4 +1,4 @@ #!/bin/bash -python test_entity_detection.py +python tests/test_entity_detection.py diff --git a/annotators/entity_detection/test_entity_detection.py b/annotators/entity_detection/test_entity_detection.py deleted file mode 100644 index c8af970931..0000000000 --- a/annotators/entity_detection/test_entity_detection.py +++ /dev/null @@ -1,47 +0,0 @@ -import requests - - -def main(): - url = "http://0.0.0.0:8103/respond" - - request_data = [ - {"sentences": [["what is the capital of russia?"]]}, - {"sentences": [["let's talk about politics."]]}, - ] - - gold_results = [ - [ - { - "entities": ["capital", "russia"], - "labelled_entities": [ - {"finegrained_label": [["misc", 0.871]], "label": "misc", "offsets": [12, 19], "text": "capital"}, - { - "finegrained_label": [["loc", 0.9927]], - "label": "location", - "offsets": [23, 29], - "text": "russia", - }, - ], - } - ], - [ - { - "entities": ["politics"], - "labelled_entities": [ - {"finegrained_label": [["misc", 0.9984]], "label": "misc", "offsets": [17, 25], "text": "politics"} - ], - } - ], - ] - - count = 0 - for data, gold_result in zip(request_data, gold_results): - result = requests.post(url, json=data).json() - if result == gold_result: - count += 1 - assert count == len(request_data) - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/entity_detection/tests/__init__.py b/annotators/entity_detection/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/annotators/entity_detection/tests/conftest.py b/annotators/entity_detection/tests/conftest.py new file mode 100644 index 0000000000..1d55898e9a --- /dev/null +++ b/annotators/entity_detection/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default="8103") + parser.addoption("--handle", action="store", default="respond") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/entity_detection/tests/requirements.txt b/annotators/entity_detection/tests/requirements.txt new file mode 100644 index 0000000000..d5b292c6f5 --- /dev/null +++ b/annotators/entity_detection/tests/requirements.txt @@ -0,0 +1,7 @@ +Flask==2.1.3 +setuptools<=65.5.1 +gunicorn==19.9.0 +sentry-sdk==0.12.3 +pyopenssl==23.0.0 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/entity_detection/tests/test_entity_detection.py b/annotators/entity_detection/tests/test_entity_detection.py new file mode 100644 index 0000000000..48db34a7c3 --- /dev/null +++ b/annotators/entity_detection/tests/test_entity_detection.py @@ -0,0 +1,58 @@ +import allure +import pytest +import requests + +from typing import Dict, List + + +@allure.description("""Test entities detection and labeling""") +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + {"sentences": [["what is the capital of russia?"]]}, + [ + { + "entities": ["capital", "russia"], + "labelled_entities": [ + { + "finegrained_label": [["misc", 0.871]], + "label": "misc", + "offsets": [12, 19], + "text": "capital", + }, + { + "finegrained_label": [["loc", 0.9927]], + "label": "location", + "offsets": [23, 29], + "text": "russia", + }, + ], + } + ], + ), + ( + {"sentences": [["let's talk about politics."]]}, + [ + { + "entities": ["politics"], + "labelled_entities": [ + { + "finegrained_label": [["misc", 0.9984]], + "label": "misc", + "offsets": [17, 25], + "text": "politics", + } + ], + } + ], + ), + ], +) +def test_entity_detection(url: str, request_data: Dict[str, list], gold_results: List[Dict]): + response = requests.post(url, json=request_data) + result = response.json() + assert response.status_code == 200 + assert "entities" in result[0] + assert "labelled_entities" in result[0] + assert result == gold_results diff --git a/annotators/entity_detection_rus/Dockerfile-test b/annotators/entity_detection_rus/Dockerfile-test new file mode 100644 index 0000000000..ba920c6d83 --- /dev/null +++ b/annotators/entity_detection_rus/Dockerfile-test @@ -0,0 +1,25 @@ +FROM deeppavlov/base-gpu:0.12.1 +RUN pip install --upgrade pip && pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.12.1 + +ARG CONFIG +ARG SERVICE_PORT +ARG SRC_DIR +ARG SED_ARG=" | " + +ARG LANGUAGE=EN +ENV LANGUAGE ${LANGUAGE} + +ENV CONFIG=$CONFIG +ENV SERVICE_PORT=$SERVICE_PORT + +COPY ./annotators/entity_detection_rus/tests/requirements.txt /src/requirements.txt +RUN pip install -r /src/requirements.txt + +COPY $SRC_DIR /src + +WORKDIR /src +RUN python -m deeppavlov install $CONFIG + +RUN sed -i "s|$SED_ARG|g" "$CONFIG" + +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8103 diff --git a/annotators/entity_detection_rus/test.sh b/annotators/entity_detection_rus/test.sh index 2a5dc46295..54cb59b6fc 100755 --- a/annotators/entity_detection_rus/test.sh +++ b/annotators/entity_detection_rus/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -python test_entity_detection.py +python -m pytest tests/test_entity_detection.py diff --git a/annotators/entity_detection_rus/test_entity_detection.py b/annotators/entity_detection_rus/test_entity_detection.py deleted file mode 100644 index 273d295c7f..0000000000 --- a/annotators/entity_detection_rus/test_entity_detection.py +++ /dev/null @@ -1,29 +0,0 @@ -import requests - - -def main(): - url = "http://0.0.0.0:8103/respond" - - request_data = [{"last_utterances": [["кто написал войну и мир?"]]}] - - gold_results = [ - [ - { - "entities": ["войну и мир"], - "labelled_entities": [{"label": "literary_work", "offsets": [12, 23], "text": "войну и мир"}], - } - ] - ] - - count = 0 - for data, gold_result in zip(request_data, gold_results): - result = requests.post(url, json=data).json() - if result == gold_result: - count += 1 - - assert count == len(request_data) - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/entity_detection_rus/tests/conftest.py b/annotators/entity_detection_rus/tests/conftest.py new file mode 100644 index 0000000000..4a99a17859 --- /dev/null +++ b/annotators/entity_detection_rus/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default=8103) + parser.addoption("--handle", action="store", default="respond") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> int: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle): + return f"{uri}:{port}/{handle}" diff --git a/annotators/entity_detection_rus/tests/requirements.txt b/annotators/entity_detection_rus/tests/requirements.txt new file mode 100644 index 0000000000..7bc15cf0bf --- /dev/null +++ b/annotators/entity_detection_rus/tests/requirements.txt @@ -0,0 +1,16 @@ +Flask==1.1.1 +nltk==3.4.5 +gunicorn==19.9.0 +requests==2.22.0 +sentry-sdk==0.12.3 +torch==1.6.0 +transformers==4.6.0 +deeppavlov==0.17.2 +pymorphy2==0.8 +pymorphy2-dicts==2.4.393442.3710985 +pymorphy2-dicts-ru==2.4.417127.4579844 +itsdangerous==2.0.1 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/entity_detection_rus/tests/test_entity_detection.py b/annotators/entity_detection_rus/tests/test_entity_detection.py new file mode 100644 index 0000000000..226f2309c6 --- /dev/null +++ b/annotators/entity_detection_rus/tests/test_entity_detection.py @@ -0,0 +1,25 @@ +import pytest +import requests + +from typing import Dict, List + + +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + {"last_utterances": [["кто написал войну и мир?"]]}, + [ + { + "entities": ["войну и мир"], + "labelled_entities": [{"label": "literary_work", "offsets": [12, 23], "text": "войну и мир"}], + } + ], + ) + ], +) +def test_entity_detection_rus(url: str, request_data: Dict, gold_results: List[Dict]): + response = requests.post(url, json=request_data) + result = response.json() + assert response.status_code == 200 + assert result == gold_results diff --git a/annotators/entity_linking/Dockerfile-test b/annotators/entity_linking/Dockerfile-test new file mode 100644 index 0000000000..bf77635c55 --- /dev/null +++ b/annotators/entity_linking/Dockerfile-test @@ -0,0 +1,45 @@ +FROM tensorflow/tensorflow:1.15.2-gpu + +RUN apt-key del 7fa2af80 && \ + rm -f /etc/apt/sources.list.d/cuda*.list && \ + curl https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \ + -o cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb && \ + apt-get -y update && \ + apt-get install -y \ + build-essential \ + zlib1g-dev \ + libbz2-dev \ + libreadline-dev \ + libsqlite3-dev \ + wget \ + llvm \ + libncurses5-dev \ + libncursesw5-dev \ + xz-utils \ + libffi-dev \ + liblzma-dev \ + software-properties-common \ + git \ + sqlite3 + +ARG LANGUAGE=EN +ENV LANGUAGE ${LANGUAGE} + +ARG CONFIG +ARG SERVICE_PORT +ARG SRC_DIR +ARG SED_ARG=" | " + +ENV CONFIG=$CONFIG +ENV SERVICE_PORT=$SERVICE_PORT + +COPY $SRC_DIR /src +WORKDIR /src + +RUN pip install --upgrade pip && pip install -r /src/tests/requirements.txt && \ + python -m deeppavlov install $CONFIG + +RUN sed -i "s|$SED_ARG|g" "$CONFIG" + +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8075 diff --git a/annotators/entity_linking/test.sh b/annotators/entity_linking/test.sh index ff0c94675f..eb5e6ed1cd 100755 --- a/annotators/entity_linking/test.sh +++ b/annotators/entity_linking/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test_el.py +python -m pytest tests/test_el.py diff --git a/annotators/entity_linking/test_el.py b/annotators/entity_linking/test_el.py deleted file mode 100644 index 596a4c976e..0000000000 --- a/annotators/entity_linking/test_el.py +++ /dev/null @@ -1,38 +0,0 @@ -import requests - -use_context = True - - -def main(): - url = "http://0.0.0.0:8075/model" - - request_data = [ - { - "entity_substr": [["forrest gump"]], - "entity_tags": [[[("film", 0.9)]]], - "context": [["who directed forrest gump?"]], - }, - { - "entity_substr": [["robert lewandowski"]], - "entity_tags": [[[("per", 0.9)]]], - "context": [["what team does robert lewandowski play for?"]], - }, - ] - - gold_results = [["Q134773", "Q552213"], ["Q151269", "Q215925"]] - - count = 0 - for data, gold_result in zip(request_data, gold_results): - result = requests.post(url, json=data).json() - entity_ids = result[0][0]["entity_ids"] - if entity_ids == gold_result: - count += 1 - else: - print(f"Got {result}, but expected: {gold_result}") - - assert count == len(request_data) - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/entity_linking/tests/conftest.py b/annotators/entity_linking/tests/conftest.py new file mode 100644 index 0000000000..b79575eba7 --- /dev/null +++ b/annotators/entity_linking/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default="8075") + parser.addoption("--handle", action="store", default="model") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/entity_linking/tests/requirements.txt b/annotators/entity_linking/tests/requirements.txt new file mode 100644 index 0000000000..bccc5ef604 --- /dev/null +++ b/annotators/entity_linking/tests/requirements.txt @@ -0,0 +1,15 @@ +Flask==2.0.3 +nltk==3.4.5 +gunicorn==20.1.0 +requests==2.22.0 +sentry-sdk<1.21.0 +rapidfuzz==2.11.1 +torch==1.6.0 +transformers==4.6.0 +deeppavlov==1.1.1 +itsdangerous==2.0.1 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +cryptography==2.8 +pytest==7.0.1 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/entity_linking/tests/test_el.py b/annotators/entity_linking/tests/test_el.py new file mode 100644 index 0000000000..75d1628cfa --- /dev/null +++ b/annotators/entity_linking/tests/test_el.py @@ -0,0 +1,33 @@ +import allure +import pytest +import requests + +use_context = True + + +@allure.description("""Test linking entities to tags, context""") +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + { + "entity_substr": [["forrest gump"]], + "entity_tags": [[[("film", 0.9)]]], + "context": [["who directed forrest gump?"]], + }, + ["Q134773", "Q552213"], + ), + ( + { + "entity_substr": [["robert lewandowski"]], + "entity_tags": [[[("per", 0.9)]]], + "context": [["what team does robert lewandowski play for?"]], + }, + ["Q151269", "Q215925"], + ), + ], +) +def test_entity_linking(url: str, request_data, gold_results): + result = requests.post(url, json=request_data).json() + entity_ids = result[0][0]["entity_ids"] + assert entity_ids == gold_results diff --git a/annotators/fact_retrieval_rus/Dockerfile-test b/annotators/fact_retrieval_rus/Dockerfile-test new file mode 100644 index 0000000000..44ae140d75 --- /dev/null +++ b/annotators/fact_retrieval_rus/Dockerfile-test @@ -0,0 +1,26 @@ +FROM deeppavlov/deeppavlov:1.2.0 + +RUN apt-get update && apt-get install git -y + +ARG COMMIT=0.13.0 +ARG CONFIG +ARG SERVICE_PORT +ARG SRC_DIR +ARG TOP_N + +ENV COMMIT=$COMMIT +ENV CONFIG=$CONFIG +ENV SERVICE_PORT=$SERVICE_PORT +ENV TOP_N=$TOP_N + +COPY ./annotators/fact_retrieval_rus/tests/requirements.txt /src/requirements.txt +RUN pip install -r /src/requirements.txt + +RUN pip install git+https://github.com/deeppavlov/DeepPavlov.git@${COMMIT} + +COPY $SRC_DIR /src + +WORKDIR /src + +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8130 + diff --git a/annotators/fact_retrieval_rus/test.sh b/annotators/fact_retrieval_rus/test.sh index 9b89a64cd7..a45f82f8dd 100755 --- a/annotators/fact_retrieval_rus/test.sh +++ b/annotators/fact_retrieval_rus/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test_fact_retrieval.py +python -m pytest tests/test_fact_retrieval.py diff --git a/annotators/fact_retrieval_rus/test_fact_retrieval.py b/annotators/fact_retrieval_rus/test_fact_retrieval.py deleted file mode 100644 index 92d3fefd2d..0000000000 --- a/annotators/fact_retrieval_rus/test_fact_retrieval.py +++ /dev/null @@ -1,37 +0,0 @@ -import requests - - -def main(): - url = "http://0.0.0.0:8110/model" - - request_data = [ - { - "dialog_history": [["Какая столица России?"]], - "entity_substr": [["россии"]], - "entity_tags": [["loc"]], - "entity_pages": [[["Россия"]]], - } - ] - - gold_results = [ - "Росси́я или Росси́йская Федера́ция (РФ), — государство в Восточной Европе и Северной Азии. Территория России" - " в её конституционных границах составляет км²; население страны (в пределах её заявленной территории) " - "составляет чел. (). Занимает первое место в мире по территории, шестое — по объёму ВВП по ППС, и девятое " - "— по численности населения. Столица — Москва. Государственный язык — русский. Денежная единица — " - "российский рубль." - ] - - count = 0 - for data, gold_result in zip(request_data, gold_results): - result = requests.post(url, json=data).json() - if result[0] and result[0][0] and result[0][0][0] == gold_result: - count += 1 - else: - print(f"Got {result}, but expected: {gold_result}") - - assert count == len(request_data) - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/fact_retrieval_rus/tests/conftest.py b/annotators/fact_retrieval_rus/tests/conftest.py new file mode 100644 index 0000000000..94aa60c666 --- /dev/null +++ b/annotators/fact_retrieval_rus/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="0.0.0.0") + parser.addoption("--port", action="store", default=8110) + parser.addoption("--handle", action="store", default="model") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> int: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"http://{uri}:{port}/{handle}" diff --git a/annotators/fact_retrieval_rus/tests/requirements.txt b/annotators/fact_retrieval_rus/tests/requirements.txt new file mode 100644 index 0000000000..8d4d869a60 --- /dev/null +++ b/annotators/fact_retrieval_rus/tests/requirements.txt @@ -0,0 +1,8 @@ +gunicorn==19.9.0 +sentry-sdk[flask]==0.14.1 +pytorch-lightning==1.2.2 +pyOpenSSL==22.0.0 +faiss-cpu==1.7.0 +rusenttokenize==0.0.5 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/fact_retrieval_rus/tests/test_fact_retrieval.py b/annotators/fact_retrieval_rus/tests/test_fact_retrieval.py new file mode 100644 index 0000000000..8062e80f77 --- /dev/null +++ b/annotators/fact_retrieval_rus/tests/test_fact_retrieval.py @@ -0,0 +1,30 @@ +import allure +import pytest +import requests + + +@allure.description("""Test fact retrieval""") +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + { + "dialog_history": [["Какая столица России?"]], + "entity_substr": [["россии"]], + "entity_tags": [["loc"]], + "entity_pages": [[["Россия"]]], + }, + "Росси́я или Росси́йская Федера́ция (РФ), — государство в Восточной Европе" + " и Северной Азии. Территория России" + " в её конституционных границах составляет км²; население страны (в пределах её заявленной территории) " + "составляет чел. (). Занимает первое место в мире по территории, шестое — по объёму ВВП по ППС, и девятое " + "— по численности населения. Столица — Москва. Государственный язык — русский. Денежная единица — " + "российский рубль.", + ) + ], +) +def test_fact_retrieval_rus(url: str, request_data: dict, gold_results: str): + response = requests.post(url, json=request_data) + result = response.json() + assert response.status_code == 200 + assert result[0] and result[0][0] and result[0][0][0] == gold_results diff --git a/annotators/kbqa/Dockerfile-test b/annotators/kbqa/Dockerfile-test new file mode 100644 index 0000000000..3e8b9f45d1 --- /dev/null +++ b/annotators/kbqa/Dockerfile-test @@ -0,0 +1,32 @@ +FROM deeppavlov/deeppavlov:1.2.0-gpu + +ARG CONFIG +ARG COMMIT +ARG SERVICE_PORT +ARG SRC_DIR + +ARG SED_ARG=" | " + +ENV CONFIG=$CONFIG +ENV SERVICE_PORT=$SERVICE_PORT +ENV COMMIT=$COMMIT + +COPY ./annotators/kbqa/tests/requirements.txt /src/requirements.txt + +RUN apt update && \ + apt install -y git + +RUN pip install --upgrade pip && \ + pip install -r /src/requirements.txt && \ + pip install https://codeload.github.com/deeppavlov/DeepPavlov/tar.gz/${COMMIT} + +COPY $SRC_DIR /src + +WORKDIR /src + +RUN sed -i "s|$SED_ARG|g" "$CONFIG" + +RUN python -m deeppavlov install $CONFIG && \ + python -m spacy download en_core_web_sm + +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8072 diff --git a/annotators/kbqa/test.sh b/annotators/kbqa/test.sh index ff0e901eb2..1fb3a56856 100755 --- a/annotators/kbqa/test.sh +++ b/annotators/kbqa/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test_kbqa.py +python -m pytest tests/test_kbqa.py diff --git a/annotators/kbqa/test_kbqa.py b/annotators/kbqa/test_kbqa.py deleted file mode 100644 index a2b6d33921..0000000000 --- a/annotators/kbqa/test_kbqa.py +++ /dev/null @@ -1,27 +0,0 @@ -import requests - - -def main(): - url = "http://0.0.0.0:8072/model" - - request_data = [ - {"x_init": ["Who is Donald Trump?"], "entities": [["Donald Trump"]], "entity_tags": [[["per", 1.0]]]}, - {"x_init": ["How old is Donald Trump?"], "entities": [["Donald Trump"]], "entity_tags": [[["per", 1.0]]]}, - ] - - gold_answers = ["Donald Trump is 45th president of the United States (2017–2021).", "Donald Trump is 77 years old."] - count = 0 - for data, gold_ans in zip(request_data, gold_answers): - result = requests.post(url, json=data).json() - res_ans = result[0]["answer"] - if res_ans == gold_ans: - count += 1 - else: - print(f"Got {res_ans}, but expected: {gold_ans}") - - if count == len(request_data): - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/kbqa/tests/conftest.py b/annotators/kbqa/tests/conftest.py new file mode 100644 index 0000000000..6e37d9b590 --- /dev/null +++ b/annotators/kbqa/tests/conftest.py @@ -0,0 +1,10 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--url", action="store", default="http://0.0.0.0:8072/model") + + +@pytest.fixture +def url(request) -> str: + return request.config.getoption("--url") diff --git a/annotators/kbqa/tests/requirements.txt b/annotators/kbqa/tests/requirements.txt new file mode 100644 index 0000000000..2480fd492c --- /dev/null +++ b/annotators/kbqa/tests/requirements.txt @@ -0,0 +1,13 @@ +cryptography==38.0.4 +pyopenssl==23.0.0 +sentry-sdk[flask]==0.14.1 +gunicorn==19.9.0 +transformers==4.6.0 +datasets==1.2.0 +git+https://github.com/deeppavlov/bert.git@feat/multi_gpu +pybind11==2.2.4 +tensorflow==1.13.1 +protobuf==3.20.1 +click==7.1.2 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/kbqa/tests/test_kbqa.py b/annotators/kbqa/tests/test_kbqa.py new file mode 100644 index 0000000000..09048d8e71 --- /dev/null +++ b/annotators/kbqa/tests/test_kbqa.py @@ -0,0 +1,23 @@ +import allure +import pytest +import requests + + +@allure.description("""Test kbqa""") +@pytest.mark.parametrize( + "request_data, gold_answer", + [ + ( + {"x_init": ["Who is Donald Trump?"], "entities": [["Donald Trump"]], "entity_tags": [[["per", 1.0]]]}, + "Donald Trump is 45th president of the United States (2017–2021).", + ), + ( + {"x_init": ["How old is Donald Trump?"], "entities": [["Donald Trump"]], "entity_tags": [[["per", 1.0]]]}, + "Donald Trump is 77 years old.", + ), + ], +) +def test_kbqa(url: str, request_data: dict, gold_answer: str): + result = requests.post(url, json=request_data).json() + res_ans = result[0]["answer"] + assert res_ans == gold_answer diff --git a/annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru/environment.yml b/annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru/environment.yml new file mode 100644 index 0000000000..6f437fa390 --- /dev/null +++ b/annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru/environment.yml @@ -0,0 +1,5 @@ +SERVICE_PORT: 8133 +SERVICE_NAME: relative_persona_extractor +SENTENCE_RANKER_SERVICE_URL: http://dialogrpt-ru:8122/respond +N_SENTENCES_TO_RETURN: 3 +FLASK_APP: server diff --git a/annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru/service.yml b/annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru/service.yml new file mode 100644 index 0000000000..e8def171d8 --- /dev/null +++ b/annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru/service.yml @@ -0,0 +1,29 @@ +name: relative-persona-extractor-ru +endpoints: +- respond +compose: + env_file: + - .env_ru + build: + args: + SERVICE_PORT: 8133 + SERVICE_NAME: relative_persona_extractor + SENTENCE_RANKER_SERVICE_URL: http://dialogrpt-ru:8122/respond + N_SENTENCES_TO_RETURN: 3 + FLASK_APP: server + context: . + dockerfile: ./annotators/relative_persona_extractor/Dockerfile + command: flask run -h 0.0.0.0 -p 8133 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + volumes: + - ./annotators/relative_persona_extractor:/src + - ./common:/src/common + ports: + - 8133:8133 diff --git a/annotators/sentseg_ru/Dockerfile-test b/annotators/sentseg_ru/Dockerfile-test new file mode 100644 index 0000000000..714ce9ce39 --- /dev/null +++ b/annotators/sentseg_ru/Dockerfile-test @@ -0,0 +1,24 @@ +FROM deeppavlov/base-gpu:0.17.2 +RUN pip install --upgrade pip && pip install git+https://github.com/deeppavlov/DeepPavlov.git@0.17.2 + +ARG CONFIG +ARG SED_ARG=" | " + +ENV CONFIG=$CONFIG + +RUN mkdir /src /midas + +COPY ./tests/requirements.txt /src/requirements.txt + +RUN pip install pip==21.3.1 && \ + pip install -r /src/requirements.txt + +COPY . /src/ + +WORKDIR /src + +RUN python -m spacy download ru_core_news_sm + +RUN sed -i "s|$SED_ARG|g" "$CONFIG" + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:8011 \ No newline at end of file diff --git a/annotators/sentseg_ru/test.py b/annotators/sentseg_ru/test.py deleted file mode 100644 index 82a1ab2167..0000000000 --- a/annotators/sentseg_ru/test.py +++ /dev/null @@ -1,15 +0,0 @@ -import requests - - -url = "http://0.0.0.0:8011/sentseg" -sentences = {"sentences": ["привет как дела"]} - -gold = "привет. как дела?" -segments_gold = ["привет.", "как дела?"] - -response = requests.post(url, json=sentences).json() - -assert response[0]["punct_sent"] == gold, print(response) -assert response[0]["segments"] == segments_gold, print(response) - -print("SUCCESS!") diff --git a/annotators/sentseg_ru/test.sh b/annotators/sentseg_ru/test.sh index 61672db785..22171cf570 100755 --- a/annotators/sentseg_ru/test.sh +++ b/annotators/sentseg_ru/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test.py +python -m pytest tests/test.py diff --git a/annotators/sentseg_ru/tests/conftest.py b/annotators/sentseg_ru/tests/conftest.py new file mode 100644 index 0000000000..31dc3c4297 --- /dev/null +++ b/annotators/sentseg_ru/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default=8011) + parser.addoption("--handle", action="store", default="/sentseg") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/sentseg_ru/tests/requirements.txt b/annotators/sentseg_ru/tests/requirements.txt new file mode 100644 index 0000000000..02efbb131f --- /dev/null +++ b/annotators/sentseg_ru/tests/requirements.txt @@ -0,0 +1,16 @@ +flask==1.1.1 +itsdangerous==2.0.1 +gunicorn==20.0.4 +sentry-sdk==0.13.4 +requests==2.22.0 +spacy==3.2.0 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +transformers==4.6.0 +torch==1.13.1 +torchvision==0.14.1 +pytorch-crf==0.7.* +cryptography==2.8 +pymorphy2==0.9.1 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/sentseg_ru/tests/test.py b/annotators/sentseg_ru/tests/test.py new file mode 100644 index 0000000000..aa940c9247 --- /dev/null +++ b/annotators/sentseg_ru/tests/test.py @@ -0,0 +1,14 @@ +import pytest +import requests + + +@pytest.mark.parametrize( + "sentences, gold, segments_gold", + [({"sentences": ["привет как дела"]}, "привет. как дела?", ["привет.", "как дела?"])], +) +def test_sentseg(url: str, sentences: dict, gold: str, segments_gold: list): + response = requests.post(url, json=sentences) + result = response.json() + assert response.status_code == 200 + assert result[0]["punct_sent"] == gold + assert result[0]["segments"] == segments_gold diff --git a/annotators/user_knowledge_memorizer/Dockerfile b/annotators/user_knowledge_memorizer/Dockerfile index 7e7ea72ebc..67fa0cf38d 100644 --- a/annotators/user_knowledge_memorizer/Dockerfile +++ b/annotators/user_knowledge_memorizer/Dockerfile @@ -8,14 +8,14 @@ ARG TERMINUSDB_SERVER_PASSWORD ARG TERMINUSDB_SERVER_URL ARG TERMINUSDB_SERVER_TEAM ARG TERMINUSDB_SERVER_DB -ARG CONFIG +ARG USER_KM_SERVICE_CONFIG ENV SERVICE_PORT=$SERVICE_PORT ENV TERMINUSDB_SERVER_PASSWORD=$TERMINUSDB_SERVER_PASSWORD ENV TERMINUSDB_SERVER_URL=$TERMINUSDB_SERVER_URL ENV TERMINUSDB_SERVER_TEAM=$TERMINUSDB_SERVER_TEAM ENV TERMINUSDB_SERVER_DB=$TERMINUSDB_SERVER_DB -ENV CONFIG=$CONFIG +ENV USER_KM_SERVICE_CONFIG=$USER_KM_SERVICE_CONFIG RUN pip install -U pip wheel setuptools diff --git a/annotators/user_knowledge_memorizer/config.json b/annotators/user_knowledge_memorizer/index_load_path.json similarity index 100% rename from annotators/user_knowledge_memorizer/config.json rename to annotators/user_knowledge_memorizer/index_load_path.json diff --git a/annotators/user_knowledge_memorizer/server.py b/annotators/user_knowledge_memorizer/server.py index b3b757f0a3..e027c66dce 100644 --- a/annotators/user_knowledge_memorizer/server.py +++ b/annotators/user_knowledge_memorizer/server.py @@ -29,7 +29,7 @@ assert TERMINUSDB_SERVER_PASSWORD, logger.error("TerminusDB server password is not specified") TERMINUSDB_SERVER_DB = os.getenv("TERMINUSDB_SERVER_DB") TERMINUSDB_SERVER_TEAM = os.getenv("TERMINUSDB_SERVER_TEAM") -config_path = os.getenv("CONFIG") +config_path = os.getenv("USER_KM_SERVICE_CONFIG") with open(config_path, "r") as config_file: config = json.load(config_file) index_load_path = Path(os.path.expanduser(config["metadata"]["variables"]["CUSTOM_EL"])) diff --git a/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml index b05f7a192b..747075ec09 100644 --- a/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml +++ b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/environment.yml @@ -6,4 +6,4 @@ TERMINUSDB_SERVER_URL: http://terminusdb-server:6363 TERMINUSDB_SERVER_PASSWORD: root TERMINUSDB_SERVER_TEAM: admin TERMINUSDB_SERVER_DB: user_knowledge_db -CONFIG: config.json +USER_KM_SERVICE_CONFIG: index_load_path.json diff --git a/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml index ae0e52716d..2a87d22e0d 100644 --- a/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml +++ b/annotators/user_knowledge_memorizer/service_configs/user-knowledge-memorizer/service.yml @@ -13,7 +13,7 @@ compose: TERMINUSDB_SERVER_PASSWORD: root TERMINUSDB_SERVER_TEAM: admin TERMINUSDB_SERVER_DB: user_knowledge_db - CONFIG: config.json + USER_KM_SERVICE_CONFIG: index_load_path.json context: ./ dockerfile: annotators/user_knowledge_memorizer/Dockerfile command: flask run -h 0.0.0.0 -p 8027 diff --git a/annotators/wiki_parser/Dockerfile-test b/annotators/wiki_parser/Dockerfile-test new file mode 100644 index 0000000000..a068a9c565 --- /dev/null +++ b/annotators/wiki_parser/Dockerfile-test @@ -0,0 +1,39 @@ +FROM python:3.9.16 + +ARG CONFIG +ARG COMMIT +ARG FAST=1 +ARG SERVICE_PORT +ARG SRC_DIR +ARG LANGUAGE=EN +ENV LANGUAGE ${LANGUAGE} + +ENV CONFIG=$CONFIG +ENV FAST=$FAST +ENV SERVICE_PORT=$SERVICE_PORT +ENV COMMIT=$COMMIT +ENV LANGUAGE=$LANGUAGE + +COPY ./annotators/wiki_parser/tests/requirements.txt /src/requirements.txt +RUN pip install --upgrade pip && \ + pip install -r /src/requirements.txt && \ + echo export PYTHONPATH=$(PYTHONPATH):/home/3.9/pybind11 && \ + pip install hdt==2.3 + +ARG WIKI_LITE_DB +ENV WIKI_LITE_DB ${WIKI_LITE_DB} +ARG WIKI_LITE_INDEX_DB +ENV WIKI_LITE_INDEX_DB ${WIKI_LITE_INDEX_DB} +ARG WIKI_CACHE_DB +ENV WIKI_CACHE_DB ${WIKI_CACHE_DB} + +RUN mkdir -p /root/.deeppavlov/downloads/wikidata +RUN wget -q -P /root/.deeppavlov/downloads/wikidata ${WIKI_LITE_DB} && \ + wget -q -P /root/.deeppavlov/downloads/wikidata ${WIKI_LITE_INDEX_DB} && \ + wget -q -P /root/.deeppavlov/downloads/wikidata ${WIKI_CACHE_DB} + +COPY $SRC_DIR /src +WORKDIR /src +COPY ./common/ ./common/ + +CMD gunicorn --workers=1 --timeout 500 --graceful-timeout 500 server:app -b 0.0.0.0:8077 diff --git a/annotators/wiki_parser/test.sh b/annotators/wiki_parser/test.sh index d1bcb3a9be..942e35ddc6 100755 --- a/annotators/wiki_parser/test.sh +++ b/annotators/wiki_parser/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test_wiki_parser.py +python -m pytest tests/test_wiki_parser.py diff --git a/annotators/wiki_parser/test_wiki_parser.py b/annotators/wiki_parser/test_wiki_parser.py deleted file mode 100644 index 56b404efc7..0000000000 --- a/annotators/wiki_parser/test_wiki_parser.py +++ /dev/null @@ -1,139 +0,0 @@ -import os -import requests - -if os.getenv("LANGUAGE", "EN") == "RU": - lang = "@ru" -else: - lang = "@en" - - -def main(): - url = "http://0.0.0.0:8077/model" - - request_data_en = [ - { - "parser_info": ["find_top_triplets"], - "query": [[{"entity_substr": "Jurgen Schmidhuber", "entity_ids": ["Q92735"]}]], - } - ] - request_data_ru = [ - { - "parser_info": ["find_top_triplets"], - "query": [[{"entity_substr": "Юрген Шмидхубер", "entity_ids": ["Q92735"]}]], - } - ] - gold_results_en = [ - [ - { - "animals_skill_entities_info": {}, - "entities_info": { - "Jurgen Schmidhuber": { - "age": 60, - "conf": 1.0, - "country of sitizenship": [["Q183", "Germany"]], - "date of birth": [['"+1963-01-17^^T"', "17 January 1963"]], - "entity_label": "Jürgen Schmidhuber", - "instance of": [["Q5", "human"]], - "occupation": [ - ["Q15976092", "artificial intelligence researcher"], - ["Q1622272", "university teacher"], - ["Q1650915", "researcher"], - ["Q82594", "computer scientist"], - ], - "plain_entity": "Q92735", - "pos": 0, - "token_conf": 1.0, - "types_2hop": [ - ["Q12737077", "occupation"], - ["Q14565186", "cognitive scientist"], - ["Q15976092", "artificial intelligence researcher"], - ["Q15980158", "non-fiction writer"], - ["Q1622272", "university teacher"], - ["Q1650915", "researcher"], - ["Q28640", "profession"], - ["Q3400985", "academic"], - ["Q37226", "teacher"], - ["Q4164871", "position"], - ["Q5", "human"], - ["Q5157565", "computer professional"], - ["Q5428874", "faculty member"], - ["Q66666607", "academic profession"], - ["Q66666685", "academic professional"], - ["Q82594", "computer scientist"], - ], - } - }, - "topic_skill_entities_info": {}, - "utt_num": 0, - "wiki_skill_entities_info": {}, - } - ] - ] - gold_results_ru = [ - [ - { - "animals_skill_entities_info": {}, - "entities_info": { - "Юрген Шмидхубер": { - "age": 60, - "conf": 1.0, - "country of sitizenship": [["Q183", "Германия"]], - "date of birth": [['"+1963-01-17^^T"', "17 January 1963"]], - "entity_label": "Шмидхубер, Юрген", - "instance of": [["Q5", "человек"]], - "occupation": [ - ["Q15976092", "исследователь искусственного интеллекта"], - ["Q1622272", "преподаватель университета"], - ["Q1650915", "исследователь"], - ["Q82594", "специалист в области информатики"], - ], - "plain_entity": "Q92735", - "pos": 0, - "token_conf": 1.0, - "types_2hop": [ - ["Q12737077", "род занятий"], - ["Q15976092", "исследователь искусственного интеллекта"], - ["Q15980158", "писатель-документалист"], - ["Q1622272", "преподаватель университета"], - ["Q1650915", "исследователь"], - ["Q28640", "профессия"], - ["Q3400985", "научно-педагогический работник"], - ["Q37226", "учитель"], - ["Q4164871", "должность"], - ["Q5", "человек"], - ["Q5157565", "профессия в ИТ"], - ["Q5428874", "преподаватель"], - ["Q66666607", "академическая профессия"], - ["Q66666685", "академический профессионал"], - ["Q82594", "специалист в области информатики"], - ], - } - }, - "topic_skill_entities_info": {}, - "utt_num": 0, - "wiki_skill_entities_info": {}, - } - ] - ] - - count = 0 - if lang == "@ru": - for data, gold_result in zip(request_data_ru, gold_results_ru): - result = requests.post(url, json=data).json() - if result == gold_result: - count += 1 - assert count == len(request_data_ru), print(f"Got {result}, but expected: {gold_result}") - - print("Success") - elif lang == "@en": - for data, gold_result in zip(request_data_en, gold_results_en): - result = requests.post(url, json=data).json() - if result == gold_result: - count += 1 - assert count == len(request_data_en), print(f"Got {result}, but expected: {gold_result}") - - print("Success") - - -if __name__ == "__main__": - main() diff --git a/annotators/wiki_parser/tests/conftest.py b/annotators/wiki_parser/tests/conftest.py new file mode 100644 index 0000000000..4348746579 --- /dev/null +++ b/annotators/wiki_parser/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default=8077) + parser.addoption("--handle", action="store", default="model") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/annotators/wiki_parser/tests/requirements.txt b/annotators/wiki_parser/tests/requirements.txt new file mode 100644 index 0000000000..0a9453314e --- /dev/null +++ b/annotators/wiki_parser/tests/requirements.txt @@ -0,0 +1,10 @@ +sentry-sdk[flask]==1.19.1 +flask==2.1.3 +itsdangerous==2.0.1 +gunicorn==20.1.0 +requests==2.28.2 +jinja2<=3.0.3 +Werkzeug==2.3.7 +pybind11==2.10.4 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/annotators/wiki_parser/tests/test_wiki_parser.py b/annotators/wiki_parser/tests/test_wiki_parser.py new file mode 100644 index 0000000000..06b15f2142 --- /dev/null +++ b/annotators/wiki_parser/tests/test_wiki_parser.py @@ -0,0 +1,140 @@ +import os + +import allure +import pytest +import requests + + +if os.getenv("LANGUAGE", "EN") == "RU": + lang = "@ru" +else: + lang = "@en" + + +@allure.description("""Test parsing en wiki""") +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + { + "parser_info": ["find_top_triplets"], + "query": [[{"entity_substr": "Jurgen Schmidhuber", "entity_ids": ["Q92735"]}]], + }, + [ + { + "animals_skill_entities_info": {}, + "entities_info": { + "Jurgen Schmidhuber": { + "age": 60, + "conf": 1.0, + "country of sitizenship": [["Q183", "Germany"]], + "date of birth": [['"+1963-01-17^^T"', "17 January 1963"]], + "entity_label": "Jürgen Schmidhuber", + "instance of": [["Q5", "human"]], + "occupation": [ + ["Q15976092", "artificial intelligence researcher"], + ["Q1622272", "university teacher"], + ["Q1650915", "researcher"], + ["Q82594", "computer scientist"], + ], + "plain_entity": "Q92735", + "pos": 0, + "token_conf": 1.0, + "types_2hop": [ + ["Q12737077", "occupation"], + ["Q14565186", "cognitive scientist"], + ["Q15976092", "artificial intelligence researcher"], + ["Q15980158", "non-fiction writer"], + ["Q1622272", "university teacher"], + ["Q1650915", "researcher"], + ["Q28640", "profession"], + ["Q3400985", "academic"], + ["Q37226", "teacher"], + ["Q4164871", "position"], + ["Q5", "human"], + ["Q5157565", "computer professional"], + ["Q5428874", "faculty member"], + ["Q66666607", "academic profession"], + ["Q66666685", "academic professional"], + ["Q82594", "computer scientist"], + ], + } + }, + "topic_skill_entities_info": {}, + "utt_num": 0, + "wiki_skill_entities_info": {}, + } + ], + ) + ], +) +@pytest.mark.skipif(lang="@ru") +def test_wiki_parser_en(url: str, request_data: dict, gold_results: list[dict]): + response = requests.post(url, json=request_data) + result = response.json() + assert response.status_code == 200 + assert result == gold_results + + +@allure.description("""Test parsing ru wiki""") +@pytest.mark.parametrize( + "request_data, gold_results", + [ + ( + { + "parser_info": ["find_top_triplets"], + "query": [[{"entity_substr": "Юрген Шмидхубер", "entity_ids": ["Q92735"]}]], + }, + [ + { + "animals_skill_entities_info": {}, + "entities_info": { + "Юрген Шмидхубер": { + "age": 60, + "conf": 1.0, + "country of sitizenship": [["Q183", "Германия"]], + "date of birth": [['"+1963-01-17^^T"', "17 January 1963"]], + "entity_label": "Шмидхубер, Юрген", + "instance of": [["Q5", "человек"]], + "occupation": [ + ["Q15976092", "исследователь искусственного интеллекта"], + ["Q1622272", "преподаватель университета"], + ["Q1650915", "исследователь"], + ["Q82594", "специалист в области информатики"], + ], + "plain_entity": "Q92735", + "pos": 0, + "token_conf": 1.0, + "types_2hop": [ + ["Q12737077", "род занятий"], + ["Q15976092", "исследователь искусственного интеллекта"], + ["Q15980158", "писатель-документалист"], + ["Q1622272", "преподаватель университета"], + ["Q1650915", "исследователь"], + ["Q28640", "профессия"], + ["Q3400985", "научно-педагогический работник"], + ["Q37226", "учитель"], + ["Q4164871", "должность"], + ["Q5", "человек"], + ["Q5157565", "профессия в ИТ"], + ["Q5428874", "преподаватель"], + ["Q66666607", "академическая профессия"], + ["Q66666685", "академический профессионал"], + ["Q82594", "специалист в области информатики"], + ], + } + }, + "topic_skill_entities_info": {}, + "utt_num": 0, + "wiki_skill_entities_info": {}, + } + ], + ) + ], +) +@pytest.mark.skipif(lang="@en") +def test_wiki_parser_ru(url: str, request_data: dict, gold_results: list[dict]): + response = requests.post(url, json=request_data) + result = response.json() + assert response.status_code == 200 + assert result == gold_results diff --git a/assistant_dists/action_stories_ru_assistant/pipeline_conf.json b/assistant_dists/action_stories_ru_assistant/pipeline_conf.json index 6354bc9f68..83fa4b12e8 100644 --- a/assistant_dists/action_stories_ru_assistant/pipeline_conf.json +++ b/assistant_dists/action_stories_ru_assistant/pipeline_conf.json @@ -198,6 +198,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/ai_faq_assistant/pipeline_conf.json b/assistant_dists/ai_faq_assistant/pipeline_conf.json index c2c037b98f..a21dcc585d 100644 --- a/assistant_dists/ai_faq_assistant/pipeline_conf.json +++ b/assistant_dists/ai_faq_assistant/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/deeppavlov_assistant/pipeline_conf.json b/assistant_dists/deeppavlov_assistant/pipeline_conf.json index 883a13ac68..a95c4c9293 100644 --- a/assistant_dists/deeppavlov_assistant/pipeline_conf.json +++ b/assistant_dists/deeppavlov_assistant/pipeline_conf.json @@ -180,6 +180,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/deepy_assistant/pipeline_conf.json b/assistant_dists/deepy_assistant/pipeline_conf.json index 872f48c31a..3521ead83c 100644 --- a/assistant_dists/deepy_assistant/pipeline_conf.json +++ b/assistant_dists/deepy_assistant/pipeline_conf.json @@ -180,6 +180,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/document_based_qa/docker-compose.override.yml b/assistant_dists/document_based_qa/docker-compose.override.yml index 6301101208..eb792e3bab 100644 --- a/assistant_dists/document_based_qa/docker-compose.override.yml +++ b/assistant_dists/document_based_qa/docker-compose.override.yml @@ -17,6 +17,7 @@ services: env_file: [ .env ] build: context: ./annotators/SentSeg/ + dockerfile: Dockerfile-test command: flask run -h 0.0.0.0 -p 8011 environment: - FLASK_APP=server diff --git a/assistant_dists/document_based_qa/pipeline_conf.json b/assistant_dists/document_based_qa/pipeline_conf.json index aa1b881c18..6c36d8fc66 100644 --- a/assistant_dists/document_based_qa/pipeline_conf.json +++ b/assistant_dists/document_based_qa/pipeline_conf.json @@ -253,6 +253,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream/docker-compose-test.override.yml b/assistant_dists/dream/docker-compose-test.override.yml new file mode 100644 index 0000000000..17afe7521f --- /dev/null +++ b/assistant_dists/dream/docker-compose-test.override.yml @@ -0,0 +1,488 @@ +services: + agent: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream/pipeline_conf.json' + environment: + WAIT_HOSTS: "sentseg:8011, ranking-based-response-selector:8002, + dff-intent-responder-skill:8012, intent-catcher:8014, ner:8021, + factoid-qa:8071, kbqa:8072, entity-linking:8075, wiki-parser:8077, text-qa:8078, + combined-classification:8087, fact-retrieval:8100, entity-detection:8103, + sentence-ranker:8128, property-extraction:8136, prompt-selector:8135, openai-api-chatgpt:8145, + dff-dream-persona-chatgpt-prompted-skill:8137, dff-dream-faq-prompted-skill:8170, + openai-api-chatgpt-16k:8167, summarization-annotator:8058, dialog-summarizer:8059" + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: EN + FALLBACK_FILE: fallbacks_dream_en.json + + ranking-based-response-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8002 + SERVICE_NAME: response_selector + LANGUAGE: EN + SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + SENTENCE_RANKER_TIMEOUT: 3 + N_UTTERANCES_CONTEXT: 5 + FILTER_TOXIC_OR_BADLISTED: 1 + FALLBACK_FILE: fallbacks_dream_en.json + context: . + dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8002 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentseg: + env_file: [ .env ] + build: + context: ./annotators/SentSeg/ + dockerfile: Dockerfile-test + command: flask run -h 0.0.0.0 -p 8011 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 1.5G + reservations: + memory: 1.5G + + dff-intent-responder-skill: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8012 + SERVICE_NAME: dff_intent_responder_skill + INTENT_RESPONSE_PHRASES_FNAME: intent_response_phrases.json + context: . + dockerfile: ./skills/dff_intent_responder_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8012 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + intent-catcher: + env_file: [ .env ] + build: + context: . + dockerfile: ./annotators/IntentCatcherTransformers/Dockerfile + args: + SERVICE_PORT: 8014 + CONFIG_NAME: intents_model_dp_config.json + INTENT_PHRASES_PATH: intent_phrases.json + command: python -m flask run -h 0.0.0.0 -p 8014 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 3.5G + reservations: + memory: 3.5G + + ner: + env_file: [ .env ] + build: + args: + CONFIG: ner_case_agnostic_multilingual_bert_base_extended.json + SERVICE_PORT: 8021 + SRC_DIR: annotators/NER_deeppavlov + COMMIT: f5117cd9ad1e64f6c2d970ecaa42fc09ccb23144 + context: ./ + dockerfile: annotators/NER_deeppavlov/Dockerfile-test + command: flask run -h 0.0.0.0 -p 8021 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + tty: true + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + factoid-qa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8071 + SERVICE_NAME: factoid_qa + context: . + dockerfile: ./skills/factoid_qa/Dockerfile + command: flask run -h 0.0.0.0 -p 8071 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + entity-linking: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8075 + SERVICE_NAME: entity_linking + CONFIG: entity_linking_eng.json + SRC_DIR: annotators/entity_linking + context: ./ + dockerfile: annotators/entity_linking/Dockerfile-test + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + wiki-parser: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8077 + SERVICE_NAME: wiki_parser + WIKI_LITE_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata2022.hdt + WIKI_LITE_INDEX_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata2022.hdt.index.v1-1 + WIKI_CACHE_DB: http://files.deeppavlov.ai/kbqa/wikidata/wikidata_cache.json + CONFIG: wiki_parser.json + SRC_DIR: annotators/wiki_parser + COMMIT: ff5b156d16a949c3ec99da7fb60ae907dec37a41 + FAST: 1 + context: ./ + dockerfile: annotators/wiki_parser/Dockerfile-test + command: flask run -h 0.0.0.0 -p 8077 + environment: + - CUDA_VISIBLE_DEVICES='' + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + text-qa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8078 + SERVICE_NAME: text_qa + CONFIG: qa_eng.json + context: services/text_qa + dockerfile: Dockerfile-test + command: flask run -h 0.0.0.0 -p 8078 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + - LANGUAGE=EN + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + kbqa: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8072 + SERVICE_NAME: kbqa + CONFIG: kbqa_cq_mt_bert_lite.json + SRC_DIR: annotators/kbqa/ + COMMIT: 283a25e322e8fedc6ff0c159e4ec76bb165ae405 + context: ./ + dockerfile: annotators/kbqa/Dockerfile-test + command: flask run -h 0.0.0.0 -p 8072 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 5G + reservations: + memory: 5G + + combined-classification: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8087 + SERVICE_NAME: combined_classification + CONFIG: combined_classifier.json + context: . + dockerfile: ./annotators/combined_classification/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8087 --timeout 600 + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + fact-retrieval: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8100 + SERVICE_NAME: fact_retrieval + CONFIG: configs/fact_retrieval_page.json + CONFIG_WIKI: configs/page_extractor.json + CONFIG_WHOW: configs/whow_page_extractor.json + SRC_DIR: annotators/fact_retrieval/ + COMMIT: 4b3e60c407644b750c9dc292ac6bf206081fb9d0 + N_FACTS: 3 + context: ./ + dockerfile: annotators/fact_retrieval/Dockerfile + command: flask run -h 0.0.0.0 -p 8100 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + + entity-detection: + env_file: [ .env ] + build: + args: + SERVICE_NAME: entity_detection + SEQ_TAG_CONFIG: wikipedia_entity_detection_distilbert.json + CONFIG: entity_detection_eng.json + LOWERCASE: 1 + SERVICE_PORT: 8103 + SRC_DIR: annotators/entity_detection/ + FINEGRAINED: 0 + context: ./ + dockerfile: annotators/entity_detection/Dockerfile-test + command: flask run -h 0.0.0.0 -p 8103 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2.5G + reservations: + memory: 2.5G + + prompt-selector: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8135 + SERVICE_NAME: prompt_selector + SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond + N_SENTENCES_TO_RETURN: 3 + PROMPTS_TO_CONSIDER: dream_persona,dream_faq + context: . + dockerfile: ./annotators/prompt_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8135 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + sentence-ranker: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8128 + SERVICE_NAME: sentence_ranker + PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2 + context: ./services/sentence_ranker/ + command: flask run -h 0.0.0.0 -p 8128 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + openai-api-chatgpt: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8145 + SERVICE_NAME: openai_api_chatgpt + PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8145 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 500M + reservations: + memory: 100M + + dff-dream-persona-chatgpt-prompted-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8137 + SERVICE_NAME: dff_dream_persona_prompted_skill + PROMPT_FILE: common/prompts/dream_persona.json + GENERATIVE_SERVICE_URL: http://openai-api-chatgpt:8145/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json + GENERATIVE_TIMEOUT: 120 + N_UTTERANCES_CONTEXT: 7 + ENVVARS_TO_SEND: OPENAI_API_KEY,OPENAI_ORGANIZATION + context: . + dockerfile: ./skills/dff_template_prompted_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + dff-google-api-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8162 + SERVICE_NAME: dff_google_api_skill + ENVVARS_TO_SEND: OPENAI_API_KEY,GOOGLE_CSE_ID,GOOGLE_API_KEY + context: . + dockerfile: ./skills/dff_google_api_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + property-extraction: + env_file: [.env] + build: + args: + CONFIG_T5: t5_generative_ie_lite_infer.json + CONFIG_REL_RANKER: rel_ranking_roberta.json + SERVICE_PORT: 8136 + SRC_DIR: annotators/property_extraction/ + SERVICE_NAME: property_extraction + context: ./ + dockerfile: annotators/property_extraction/Dockerfile + command: flask run -h 0.0.0.0 -p 8136 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 7G + reservations: + memory: 7G + + dff-dream-faq-prompted-skill: + env_file: [ .env,.env_secret ] + build: + args: + SERVICE_PORT: 8170 + SERVICE_NAME: dff_dream_faq_prompted_skill + PROMPT_FILE: common/prompts/dream_faq.json + GENERATIVE_SERVICE_URL: http://openai-api-chatgpt-16k:8167/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt.json + GENERATIVE_TIMEOUT: 120 + N_UTTERANCES_CONTEXT: 7 + ENVVARS_TO_SEND: OPENAI_API_KEY,OPENAI_ORGANIZATION + context: . + dockerfile: ./skills/dff_template_prompted_skill/Dockerfile + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + openai-api-chatgpt-16k: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8167 + SERVICE_NAME: openai_api_chatgpt_16k + PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8167 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 500M + reservations: + memory: 100M + + summarization-annotator: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8058 + SERVICE_NAME: summarization_annotator + SUMMARIZATION_REQUEST_TIMEOUT: 10 + context: ./annotators/summarization_annotator/ + command: flask run -h 0.0.0.0 -p 8058 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + dialog-summarizer: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8059 + SERVICE_NAME: dialog_summarizer + PRETRAINED_MODEL_NAME: "knkarthick/MEETING_SUMMARY" + context: ./services/dialog_summarizer/ + command: flask run -h 0.0.0.0 -p 8059 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + +version: '3.7' diff --git a/assistant_dists/dream/pipeline_conf.json b/assistant_dists/dream/pipeline_conf.json index 35825f1f75..93216a4643 100644 --- a/assistant_dists/dream/pipeline_conf.json +++ b/assistant_dists/dream/pipeline_conf.json @@ -548,6 +548,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_alexa/pipeline_conf.json b/assistant_dists/dream_alexa/pipeline_conf.json index f393ea1258..af0ee6684d 100644 --- a/assistant_dists/dream_alexa/pipeline_conf.json +++ b/assistant_dists/dream_alexa/pipeline_conf.json @@ -1446,6 +1446,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_embodied/pipeline_conf.json b/assistant_dists/dream_embodied/pipeline_conf.json index 1f16ffda78..165980bfb5 100644 --- a/assistant_dists/dream_embodied/pipeline_conf.json +++ b/assistant_dists/dream_embodied/pipeline_conf.json @@ -368,6 +368,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_kg/dev.yml b/assistant_dists/dream_kg/dev.yml index 336c89599d..e14c5b1956 100644 --- a/assistant_dists/dream_kg/dev.yml +++ b/assistant_dists/dream_kg/dev.yml @@ -11,12 +11,6 @@ services: - "~/.deeppavlov/cache:/root/.cache" ports: - 8128:8128 - spacy-nounphrases: - volumes: - - "./annotators/spacy_nounphrases:/src" - - "./common:/src/common" - ports: - - 8006:8006 sentseg: volumes: - "./annotators/SentSeg:/src" @@ -47,11 +41,6 @@ services: # # you can use persistent local volume if you need # volumes: # - ./venv/data/db_data:/root/data/db - spelling-preprocessing: - volumes: - - "./annotators/spelling_preprocessing:/src" - ports: - - 8074:8074 combined-classification: volumes: - "./common:/src/common" diff --git a/assistant_dists/dream_kg/docker-compose.override.yml b/assistant_dists/dream_kg/docker-compose.override.yml index ae6a94eafd..2ea81c240a 100644 --- a/assistant_dists/dream_kg/docker-compose.override.yml +++ b/assistant_dists/dream_kg/docker-compose.override.yml @@ -2,12 +2,9 @@ services: agent: command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_kg/pipeline_conf.json' environment: - WAIT_HOSTS: "sentence-ranker:8128, - spacy-nounphrases:8006, sentseg:8011, ranking-based-response-selector:8002, - ner:8021, spelling-preprocessing:8074, entity-linking:8075, - combined-classification:8087, entity-detection:8103, - property-extraction:8136, custom-entity-linking:8153, - terminusdb-server:6363, user-knowledge-memorizer:8027, + WAIT_HOSTS: "sentence-ranker:8128, sentseg:8011, ranking-based-response-selector:8002, + ner:8021, entity-linking:8075, combined-classification:8087, entity-detection:8103, + property-extraction:8136, custom-entity-linking:8153, terminusdb-server:6363, user-knowledge-memorizer:8027, dff-user-kg-skill:8028, dff-travel-italy-skill:8025" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} HIGH_PRIORITY_INTENTS: 1 @@ -35,24 +32,6 @@ services: reservations: memory: 3G - spacy-nounphrases: - env_file: [ .env ] - build: - args: - SERVICE_PORT: 8006 - SERVICE_NAME: spacy_nounphrases - context: . - dockerfile: ./annotators/spacy_nounphrases/Dockerfile - command: flask run -h 0.0.0.0 -p 8006 - environment: - - FLASK_APP=server - deploy: - resources: - limits: - memory: 256M - reservations: - memory: 256M - sentseg: env_file: [ .env ] build: @@ -132,23 +111,6 @@ services: reservations: memory: 2.5G - spelling-preprocessing: - env_file: [ .env ] - build: - args: - SERVICE_PORT: 8074 - SERVICE_NAME: spelling_preprocessing - context: ./annotators/spelling_preprocessing/ - command: flask run -h 0.0.0.0 -p 8074 - environment: - - FLASK_APP=server - deploy: - resources: - limits: - memory: 100M - reservations: - memory: 100M - combined-classification: env_file: [ .env ] build: @@ -218,7 +180,7 @@ services: build: args: CONFIG: custom_entity_linking.json - PORT: 8153 + SERVICE_PORT: 8153 SRC_DIR: annotators/custom_entity_linking context: ./ dockerfile: annotators/custom_entity_linking/Dockerfile @@ -243,7 +205,7 @@ services: TERMINUSDB_SERVER_PASSWORD: root TERMINUSDB_SERVER_TEAM: admin TERMINUSDB_SERVER_DB: user_knowledge_db - CONFIG: config.json + USER_KM_SERVICE_CONFIG: index_load_path.json context: ./ dockerfile: annotators/user_knowledge_memorizer/Dockerfile command: flask run -h 0.0.0.0 -p 8027 diff --git a/assistant_dists/dream_kg/pipeline_conf.json b/assistant_dists/dream_kg/pipeline_conf.json index bf5feea13f..04e9c677ed 100644 --- a/assistant_dists/dream_kg/pipeline_conf.json +++ b/assistant_dists/dream_kg/pipeline_conf.json @@ -36,7 +36,7 @@ ], "is_enabled": true, "source": { - "component": "components/sbDcAqiNqxFz.yml", + "component": "components/vCZorqdcCVBI.yml", "service": "services/agent_services/service_configs/dream_kg" } }, @@ -64,27 +64,11 @@ ], "is_enabled": true, "source": { - "component": "components/rFC0YJOoDFvS.yml", + "component": "components/vCZorqdcCVBI.yml", "service": "services/agent_services/service_configs/dream_kg" } }, "annotators": { - "spelling_preprocessing": { - "connector": { - "protocol": "http", - "timeout": 1.0, - "url": "http://spelling-preprocessing:8074/respond" - }, - "dialog_formatter": "state_formatters.dp_formatters:last_utt_dialog", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [], - "state_manager_method": "add_annotation_and_reset_human_attributes_for_first_turn", - "is_enabled": true, - "source": { - "component": "components/pGxj32ic41pvquRXUdqc7A.yml", - "service": "annotators/spelling_preprocessing/service_configs/spelling-preprocessing" - } - }, "sentseg": { "connector": { "protocol": "http", @@ -93,9 +77,7 @@ }, "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "annotators.spelling_preprocessing" - ], + "previous_services": [], "state_manager_method": "add_annotation", "is_enabled": true, "source": { @@ -103,24 +85,6 @@ "service": "annotators/SentSeg/service_configs/sentseg" } }, - "spacy_nounphrases": { - "connector": { - "protocol": "http", - "timeout": 1.0, - "url": "http://spacy-nounphrases:8006/respond" - }, - "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "annotators.spelling_preprocessing" - ], - "state_manager_method": "add_annotation", - "is_enabled": true, - "source": { - "component": "components/dswI5sRZbFPmgcNQKH5iPg.yml", - "service": "annotators/spacy_nounphrases/service_configs/spacy-nounphrases" - } - }, "ner": { "connector": { "protocol": "http", @@ -130,7 +94,6 @@ "dialog_formatter": "state_formatters.dp_formatters:ner_formatter_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "previous_services": [ - "annotators.spelling_preprocessing", "annotators.sentseg" ], "state_manager_method": "add_annotation", @@ -149,7 +112,6 @@ "dialog_formatter": "state_formatters.dp_formatters:entity_detection_formatter_dialog", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "previous_services": [ - "annotators.spelling_preprocessing", "annotators.sentseg" ], "state_manager_method": "add_annotation", @@ -169,8 +131,7 @@ "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", "previous_services": [ "annotators.ner", - "annotators.entity_detection", - "annotators.spacy_nounphrases" + "annotators.entity_detection" ], "state_manager_method": "add_annotation", "is_enabled": true, @@ -208,7 +169,6 @@ "previous_services": [ "annotators.ner", "annotators.entity_detection", - "annotators.spacy_nounphrases", "annotators.property_extraction" ], "state_manager_method": "add_annotation", @@ -226,9 +186,7 @@ }, "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog_w_hist", "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "annotators.spelling_preprocessing" - ], + "previous_services": [], "state_manager_method": "add_annotation", "is_enabled": true, "source": { @@ -274,24 +232,6 @@ "component": "components/3RDNPBdybjBlSQZqcc7nGQ.yml", "service": "annotators/NER_deeppavlov/service_configs/ner" } - }, - "spacy_nounphrases": { - "connector": { - "protocol": "http", - "timeout": 1.0, - "url": "http://spacy-nounphrases:8006/respond" - }, - "dialog_formatter": "state_formatters.dp_formatters:last_bot_utt_dialog", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "skills" - ], - "state_manager_method": "add_annotation_prev_bot_utt", - "is_enabled": true, - "source": { - "component": "components/dswI5sRZbFPmgcNQKH5iPg.yml", - "service": "annotators/spacy_nounphrases/service_configs/spacy-nounphrases" - } } }, "response_annotator_selectors": { @@ -300,8 +240,7 @@ "class_name": "skill_selectors.post_annotator_selector.connector:PostAnnotatorSelectorConnector", "annotator_names": [ "sentseg", - "ner", - "spacy_nounphrases" + "ner" ] }, "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", @@ -333,24 +272,6 @@ "service": "services/sentence_ranker/service_configs/sentence-ranker" } }, - "spacy_nounphrases": { - "connector": { - "protocol": "http", - "timeout": 1.0, - "url": "http://spacy-nounphrases:8006/respond_batch" - }, - "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", - "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", - "previous_services": [ - "skills" - ], - "state_manager_method": "add_hypothesis_annotation_batch", - "is_enabled": true, - "source": { - "component": "components/dswI5sRZbFPmgcNQKH5iPg.yml", - "service": "annotators/spacy_nounphrases/service_configs/spacy-nounphrases" - } - }, "entity_detection": { "connector": { "protocol": "http", @@ -495,6 +416,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_mini/pipeline_conf.json b/assistant_dists/dream_mini/pipeline_conf.json index 93f865ec7c..b3a3269fda 100644 --- a/assistant_dists/dream_mini/pipeline_conf.json +++ b/assistant_dists/dream_mini/pipeline_conf.json @@ -312,6 +312,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_mini_persona_based/pipeline_conf.json b/assistant_dists/dream_mini_persona_based/pipeline_conf.json index bb7ee42bf0..2e8c276aa7 100644 --- a/assistant_dists/dream_mini_persona_based/pipeline_conf.json +++ b/assistant_dists/dream_mini_persona_based/pipeline_conf.json @@ -349,6 +349,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_multilingual/pipeline_conf.json b/assistant_dists/dream_multilingual/pipeline_conf.json index 8d18414941..2f5a6c6304 100755 --- a/assistant_dists/dream_multilingual/pipeline_conf.json +++ b/assistant_dists/dream_multilingual/pipeline_conf.json @@ -190,6 +190,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_multimodal/pipeline_conf.json b/assistant_dists/dream_multimodal/pipeline_conf.json index 4dfd6ce2c7..e1b79ac4ba 100644 --- a/assistant_dists/dream_multimodal/pipeline_conf.json +++ b/assistant_dists/dream_multimodal/pipeline_conf.json @@ -324,6 +324,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_ocean/pipeline_conf.json b/assistant_dists/dream_ocean/pipeline_conf.json index 6e77e5faec..314048aad2 100644 --- a/assistant_dists/dream_ocean/pipeline_conf.json +++ b/assistant_dists/dream_ocean/pipeline_conf.json @@ -501,6 +501,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_persona_openai_prompted/pipeline_conf.json b/assistant_dists/dream_persona_openai_prompted/pipeline_conf.json index 3be93b5dda..cdff3ffa2d 100644 --- a/assistant_dists/dream_persona_openai_prompted/pipeline_conf.json +++ b/assistant_dists/dream_persona_openai_prompted/pipeline_conf.json @@ -298,6 +298,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_persona_prompted/pipeline_conf.json b/assistant_dists/dream_persona_prompted/pipeline_conf.json index 4b72000cdb..e7fb5fccb7 100644 --- a/assistant_dists/dream_persona_prompted/pipeline_conf.json +++ b/assistant_dists/dream_persona_prompted/pipeline_conf.json @@ -258,6 +258,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_persona_rullama_prompted/pipeline_conf.json b/assistant_dists/dream_persona_rullama_prompted/pipeline_conf.json index b6420ca56d..f83294ab6f 100644 --- a/assistant_dists/dream_persona_rullama_prompted/pipeline_conf.json +++ b/assistant_dists/dream_persona_rullama_prompted/pipeline_conf.json @@ -198,6 +198,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_persona_ruxglm_prompted/pipeline_conf.json b/assistant_dists/dream_persona_ruxglm_prompted/pipeline_conf.json index 215b7f4c87..c339917e21 100644 --- a/assistant_dists/dream_persona_ruxglm_prompted/pipeline_conf.json +++ b/assistant_dists/dream_persona_ruxglm_prompted/pipeline_conf.json @@ -198,6 +198,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json b/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json index 649e44d14a..6c3c596459 100644 --- a/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json +++ b/assistant_dists/dream_ranking_and_midas_based_dm/pipeline_conf.json @@ -548,6 +548,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_reasoning/pipeline_conf.json b/assistant_dists/dream_reasoning/pipeline_conf.json index 19b83dcad5..702cb6383f 100644 --- a/assistant_dists/dream_reasoning/pipeline_conf.json +++ b/assistant_dists/dream_reasoning/pipeline_conf.json @@ -240,6 +240,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_robot/pipeline_conf.json b/assistant_dists/dream_robot/pipeline_conf.json index fc29dbe121..70195a9bb0 100644 --- a/assistant_dists/dream_robot/pipeline_conf.json +++ b/assistant_dists/dream_robot/pipeline_conf.json @@ -445,6 +445,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_robot_prompted/pipeline_conf.json b/assistant_dists/dream_robot_prompted/pipeline_conf.json index 3d79def77a..17479797a9 100644 --- a/assistant_dists/dream_robot_prompted/pipeline_conf.json +++ b/assistant_dists/dream_robot_prompted/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_russian/docker-compose.override.yml b/assistant_dists/dream_russian/docker-compose.override.yml index 5ec5a43ae2..3b8769969e 100644 --- a/assistant_dists/dream_russian/docker-compose.override.yml +++ b/assistant_dists/dream_russian/docker-compose.override.yml @@ -93,6 +93,7 @@ services: args: CONFIG: sentseg_ru_bert_torch.json context: ./annotators/sentseg_ru + dockerfile: Dockerfile-test command: flask run -h 0.0.0.0 -p 8011 environment: - CUDA_VISIBLE_DEVICES=0 @@ -190,7 +191,7 @@ services: SRC_DIR: annotators/entity_detection_rus LANGUAGE: RU context: ./ - dockerfile: annotators/entity_detection_rus/Dockerfile + dockerfile: annotators/entity_detection_rus/Dockerfile-test command: flask run -h 0.0.0.0 -p 8103 environment: - FLASK_APP=server @@ -254,7 +255,7 @@ services: LANGUAGE: RU FAST: 1 context: ./ - dockerfile: annotators/wiki_parser/Dockerfile + dockerfile: annotators/wiki_parser/Dockerfile-test command: flask run -h 0.0.0.0 -p 8077 environment: - CUDA_VISIBLE_DEVICES='' @@ -411,7 +412,7 @@ services: SERVICE_PORT: 8110 SRC_DIR: annotators/fact_retrieval_rus context: ./ - dockerfile: annotators/fact_retrieval_rus/Dockerfile + dockerfile: annotators/fact_retrieval_rus/Dockerfile-test command: flask run -h 0.0.0.0 -p 8110 environment: - CUDA_VISIBLE_DEVICES=0 @@ -431,7 +432,7 @@ services: SERVICE_NAME: text_qa CONFIG: qa_rus.json context: services/text_qa - dockerfile: Dockerfile + dockerfile: Dockerfile-test command: flask run -h 0.0.0.0 -p 8078 environment: - CUDA_VISIBLE_DEVICES=0 diff --git a/assistant_dists/dream_russian/pipeline_conf.json b/assistant_dists/dream_russian/pipeline_conf.json index e5f8679cae..703c658e00 100644 --- a/assistant_dists/dream_russian/pipeline_conf.json +++ b/assistant_dists/dream_russian/pipeline_conf.json @@ -614,6 +614,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_russian_persona_based/cpu.yml b/assistant_dists/dream_russian_persona_based/cpu.yml new file mode 100644 index 0000000000..5caad32775 --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/cpu.yml @@ -0,0 +1,21 @@ +version: '3.7' +services: + dialogpt-ru: + environment: + CUDA_VISIBLE_DEVICES: "" + dialogrpt-ru: + environment: + CUDA_VISIBLE_DEVICES: "" + sentseg-ru: + environment: + CUDA_VISIBLE_DEVICES: "" + combined-classification-ru: + environment: + CUDA_VISIBLE_DEVICES: "" + intent-catcher-ru: + environment: + CUDA_VISIBLE_DEVICES: "" + seq2seq-persona-based-ru: + environment: + DEVICE: cpu + CUDA_VISIBLE_DEVICES: "" diff --git a/assistant_dists/dream_russian_persona_based/db_conf.json b/assistant_dists/dream_russian_persona_based/db_conf.json new file mode 100644 index 0000000000..380184822b --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/db_conf.json @@ -0,0 +1,6 @@ +{ + "host": "DB_HOST", + "port": "DB_PORT", + "name": "DB_NAME", + "env": true +} diff --git a/assistant_dists/dream_russian_persona_based/dev.yml b/assistant_dists/dream_russian_persona_based/dev.yml new file mode 100644 index 0000000000..5511572ff3 --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/dev.yml @@ -0,0 +1,89 @@ +# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода +services: + agent: + volumes: + - ".:/dp-agent" + ports: + - 4242:4242 + dff-program-y-ru-skill: + volumes: + - "./skills/dff_program_y_skill:/src" + - "./common:/src/common" + ports: + - 8008:8008 + convers-evaluation-selector-ru: + volumes: + - "./response_selectors/convers_evaluation_based_selector:/src" + - "./common:/src/common" + ports: + - 8009:8009 + dff-intent-responder-ru-skill: + volumes: + - "./skills/dff_intent_responder_skill:/src" + - "./common:/src/common" + ports: + - 8012:8012 + sentseg-ru: + volumes: + - "./annotators/sentseg_ru:/src" + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8011:8011 + intent-catcher-ru: + volumes: + - "./annotators/IntentCatcherTransformers:/src" + - "./common:/src/common" + - "~/.deeppavlov:/root/.deeppavlov" + ports: + - 8014:8014 + badlisted-words-ru: + volumes: + - "./annotators/BadlistedWordsDetector_ru:/src" + - "./common:/src/common" + ports: + - 8018:8018 + combined-classification-ru: + volumes: + - "./annotators/combined_classification_ru:/src" + - "~/.deeppavlov/cache:/root/.cache" + - "./common:/src/common" + ports: + - 8198:8198 + mongo: + ports: + - 27017:27017 + # # you can use persistent local volume if you need + # volumes: + # - ./venv/data/db_data:/root/data/db + dialogpt-ru: + volumes: + - "./services/dialogpt_RU:/src" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8125:8125 + dff-generative-ru-skill: + volumes: + - "./skills/dff_generative_skill:/src" + - "./common:/src/common" + ports: + - 8092:8092 + dialogrpt-ru: + volumes: + - "./services/dialogrpt_ru:/src" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8122:8122 + seq2seq-persona-based-ru: + volumes: + - "./services/seq2seq_persona_based:/src" + - "./common:/src/common" + - "~/.deeppavlov/cache:/root/.cache" + ports: + - 8140:8140 + relative-persona-extractor-ru: + volumes: + - "./annotators/relative_persona_extractor:/src" + - "./common:/src/common" + ports: + - 8133:8133 +version: "3.7" diff --git a/assistant_dists/dream_russian_persona_based/docker-compose.override.yml b/assistant_dists/dream_russian_persona_based/docker-compose.override.yml new file mode 100644 index 0000000000..f0910707d7 --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/docker-compose.override.yml @@ -0,0 +1,263 @@ +services: + agent: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_russian_persona_based/pipeline_conf.json' + environment: + WAIT_HOSTS: "convers-evaluation-selector-ru:8009, + dff-intent-responder-ru-skill:8012, intent-catcher-ru:8014, badlisted-words-ru:8018, + sentseg-ru:8011, dff-generative-ru-skill:8092, dialogpt-ru:8125, + dialogrpt-ru:8122, combined-classification-ru:8198, + relative-persona-extractor-ru:8133, seq2seq-persona-based-ru:8140" + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1200} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: RU + FALLBACK_FILE: fallbacks_dream_ru.json + + dff-program-y-ru-skill: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8008 + SERVICE_NAME: dff_program_y_skill + LANGUAGE: RU + context: . + dockerfile: ./skills/dff_program_y_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8008 --reload + deploy: + resources: + limits: + memory: 1024M + reservations: + memory: 1024M + + convers-evaluation-selector-ru: + env_file: [ .env_ru ] + build: + args: + TAG_BASED_SELECTION: 1 + CALL_BY_NAME_PROBABILITY: 0.5 + PROMPT_PROBA: 0.1 + ACKNOWLEDGEMENT_PROBA: 0.3 + PRIORITIZE_WITH_REQUIRED_ACT: 0 + PRIORITIZE_NO_DIALOG_BREAKDOWN: 0 + PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 0 + IGNORE_DISLIKED_SKILLS: 0 + GREETING_FIRST: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 1 + MAX_TURNS_WITHOUT_SCRIPTS: 7 + ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 1 + PRIORITIZE_SCRIPTED_SKILLS: 0 + CONFIDENCE_STRENGTH: 0.8 + CONV_EVAL_STRENGTH: 0.4 + PRIORITIZE_HUMAN_INITIATIVE: 1 + QUESTION_TO_QUESTION_DOWNSCORE_COEF: 0.8 + LANGUAGE: RU + FALLBACK_FILE: fallbacks_dream_ru.json + context: . + dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile + command: flask run -h 0.0.0.0 -p 8009 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 256M + reservations: + memory: 256M + + dff-intent-responder-ru-skill: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8012 + SERVICE_NAME: dff_intent_responder_skill + INTENT_RESPONSE_PHRASES_FNAME: intent_response_phrases_RU.json + LANGUAGE: RU + context: . + dockerfile: ./skills/dff_intent_responder_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8012 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + sentseg-ru: + env_file: [ .env_ru ] + build: + args: + CONFIG: sentseg_ru_bert_torch.json + context: ./annotators/sentseg_ru + dockerfile: Dockerfile-test + command: flask run -h 0.0.0.0 -p 8011 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + intent-catcher-ru: + env_file: [ .env_ru ] + build: + context: . + dockerfile: ./annotators/IntentCatcherTransformers/Dockerfile + args: + SERVICE_PORT: 8014 + CONFIG_NAME: intents_model_dp_config_RU.json + INTENT_PHRASES_PATH: intent_phrases_RU.json + command: python -m flask run -h 0.0.0.0 -p 8014 + environment: + - FLASK_APP=server + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 3.5G + reservations: + memory: 3.5G + + badlisted-words-ru: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8018 + SERVICE_NAME: badlisted_words + context: annotators/BadlistedWordsDetector_ru/ + command: flask run -h 0.0.0.0 -p 8018 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + combined-classification-ru: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8198 + SERVICE_NAME: combined_classification_ru + CONFIG: combined_classifier_ru.json + context: . + dockerfile: ./annotators/combined_classification_ru/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8198 --timeout 600 + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + dialogpt-ru: + env_file: [ .env_ru ] + build: + context: ./services/dialogpt_RU/ + args: + SERVICE_PORT: 8125 + PRETRAINED_MODEL_NAME_OR_PATH: DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2 + LANGUAGE: RU + MAX_HISTORY_DEPTH: 3 + command: flask run -h 0.0.0.0 -p 8125 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 3G + reservations: + memory: 3G + + dff-generative-ru-skill: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8092 + SERVICE_NAME: dff_generative_skill + LANGUAGE: RU + GENERATIVE_SERVICE_URL: http://dialogpt-ru:8125/respond + context: . + dockerfile: ./skills/dff_generative_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8092 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + dialogrpt-ru: + env_file: [ .env_ru ] + build: + context: ./services/dialogrpt_ru/ + args: + SERVICE_PORT: 8122 + PRETRAINED_MODEL_FNAME: dialogrpt_ru_ckpt_v0.pth + TOKENIZER_NAME_OR_PATH: DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2 + command: flask run -h 0.0.0.0 -p 8122 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 4G + + seq2seq-persona-based-ru: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8140 + SERVICE_NAME: seq2seq_persona_based + PRETRAINED_MODEL_NAME_OR_PATH: DeepPavlov/mbart-large-50-ru-persona-chat + PAIR_DIALOG_HISTORY_LENGTH: 2 + CHAT_EVERY_SENT_MAX_LENGTH: 25 + PERSONA_EVERY_SENT_MAX_LENGTH: 19 + GENERATION_PARAMS_CONFIG: bart-large-ru-persona-chat_v1.json + context: . + dockerfile: ./services/seq2seq_persona_based/Dockerfile + command: flask run -h 0.0.0.0 -p 8140 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + + relative-persona-extractor-ru: + env_file: [ .env_ru ] + build: + args: + SERVICE_PORT: 8133 + SERVICE_NAME: relative_persona_extractor + SENTENCE_RANKER_SERVICE_URL: http://dialogrpt-ru:8122/respond + N_SENTENCES_TO_RETURN: 3 + context: . + dockerfile: ./annotators/relative_persona_extractor/Dockerfile + command: flask run -h 0.0.0.0 -p 8133 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + +version: '3.7' diff --git a/assistant_dists/dream_russian_persona_based/pipeline_conf.json b/assistant_dists/dream_russian_persona_based/pipeline_conf.json new file mode 100644 index 0000000000..9a81b1e8a8 --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/pipeline_conf.json @@ -0,0 +1,387 @@ +{ + "connectors": { + "sentseg": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg-ru:8011/sentseg" + }, + "ner": { + "protocol": "http", + "timeout": 1.5, + "url": "http://ner-ru:8021/ner" + } + }, + "services": { + "last_chance_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую реплику.", + "annotations": { + "sentseg": { + "punct_sent": "Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую реплику.", + "segments": [ + "Извини, что-то пошло не так в моем мозгу.", + "Пожалуйста, повтори предыдущую реплику." + ] + }, + "ner": [ + [] + ] + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "last_chance" + ], + "is_enabled": true, + "source": { + "component": "components/sdjkfn90234rnklcg.yml", + "service": "services/agent_services/service_configs/dream_russian_persona_based" + } + }, + "timeout_service": { + "connector": { + "protocol": "python", + "class_name": "PredefinedTextConnector", + "response_text": "Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую реплику.", + "annotations": { + "sentseg": { + "punct_sent": "Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую реплику.", + "segments": [ + "Извини, что-то пошло не так в моем мозгу.", + "Пожалуйста, повтори предыдущую реплику." + ] + }, + "ner": [ + [] + ] + } + }, + "state_manager_method": "add_bot_utterance_last_chance", + "tags": [ + "timeout" + ], + "is_enabled": true, + "source": { + "component": "components/SDKFjnoqikjh8v7gdsf.yml", + "service": "services/agent_services/service_configs/dream_russian_persona_based" + } + }, + "annotators": { + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg-ru:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/Nw2SnrS4DGoALg6Cu58Zw.yml", + "service": "annotators/sentseg_ru/service_configs/sentseg-ru" + } + }, + "badlisted_words": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://badlisted-words-ru:8018/badlisted_words" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_and_tokenized_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing", + "annotators.spacy_annotator" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/WzIX0HJ0Xyo4V8ItwLew.yml", + "service": "annotators/BadlistedWordsDetector_ru/service_configs/badlisted-words-ru" + } + }, + "combined_classification_ru": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://combined-classification-ru:8198/model" + }, + "dialog_formatter": "state_formatters.dp_formatters:preproc_last_human_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/3dcc0944c3e0.yml", + "service": "annotators/combined_classification_ru/service_configs/combined-classification-ru" + } + }, + "intent_catcher": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://intent-catcher-ru:8014/detect" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_utt_sentseg_segments_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing", + "annotators.sentseg" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/v2RpT5Ryi2ke97ANZxRnJg.yml", + "service": "annotators/IntentCatcherTransformers/service_configs/intent-catcher-ru" + } + }, + "relative_persona_extractor": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://relative-persona-extractor-ru:8133/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:context_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators.spelling_preprocessing" + ], + "state_manager_method": "add_annotation", + "is_enabled": true, + "source": { + "component": "components/jkbn98dsfhg3qtfi.yml", + "service": "annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru" + } + } + }, + "response_annotators": { + "sentseg": { + "connector": { + "protocol": "http", + "timeout": 1.5, + "url": "http://sentseg-ru:8011/sentseg" + }, + "dialog_formatter": "state_formatters.dp_formatters:last_bot_utt_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "response_annotator_selectors" + ], + "state_manager_method": "add_annotation_prev_bot_utt", + "is_enabled": true, + "source": { + "component": "components/4SJxJlf3E8TTM94Uiqt5Xg.yml", + "service": "annotators/sentseg_ru/service_configs/sentseg-ru" + } + } + }, + "response_annotator_selectors": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.post_annotator_selector.connector:PostAnnotatorSelectorConnector", + "annotator_names": [ + "sentseg" + ] + }, + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/LXrJDIf43gwNmPMNXG5Eg.yml", + "service": "services/response_annotator_selectors/service_configs/agent" + } + }, + "candidate_annotators": { + "badlisted_words": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://badlisted-words-ru:8018/badlisted_words_batch" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypotheses_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "candidate_annotators.spacy_annotator" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/gySZ51dyqYi9TOFr6zY5kA.yml", + "service": "annotators/BadlistedWordsDetector_ru/service_configs/badlisted-words-ru" + } + }, + "combined_classification_ru": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://combined-classification-ru:8198/batch_model" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypothesis_histories_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/5eb485871be3.yml", + "service": "annotators/combined_classification_ru/service_configs/combined-classification-ru" + } + }, + "dialogrpt": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://dialogrpt-ru:8122/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:hypotheses_with_context_list", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "skills" + ], + "state_manager_method": "add_hypothesis_annotation_batch", + "is_enabled": true, + "source": { + "component": "components/DUW2u7FwPMyhBOWrK1HTg.yml", + "service": "services/dialogrpt_ru/service_configs/dialogrpt-ru" + } + } + }, + "skill_selectors": { + "rule_based_selector": { + "connector": { + "protocol": "python", + "class_name": "skill_selectors.rule_based_selector.connector:RuleBasedSkillSelectorConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:base_skill_selector_formatter_dialog", + "response_formatter": "state_formatters.dp_formatters:simple_formatter_service", + "previous_services": [ + "annotators" + ], + "tags": [ + "selector" + ], + "is_enabled": true, + "source": { + "component": "components/xSwFvtAUdvtQosvzpb7oMg.yml", + "service": "skill_selectors/rule_based_selector/service_configs/agent" + } + } + }, + "skills": { + "dff_intent_responder_skill": { + "connector": { + "protocol": "http", + "timeout": 2.0, + "url": "http://dff-intent-responder-ru-skill:8012/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_intent_responder_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/qJPPqfjKviUyvDTCn2Xw.yml", + "service": "skills/dff_intent_responder_skill/service_configs/dff-intent-responder-ru-skill" + } + }, + "dummy_skill": { + "connector": { + "protocol": "python", + "class_name": "skills.dummy_skill.connector:DummySkillConnector" + }, + "dialog_formatter": "state_formatters.dp_formatters:utt_sentrewrite_modified_last_dialog", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/uYkoK0vRp4bbIg9akI1yw.yml", + "service": "skills/dummy_skill/service_configs/agent" + } + }, + "dff_generative_skill": { + "connector": { + "protocol": "http", + "timeout": 4.0, + "url": "http://dff-generative-ru-skill:8092/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:dff_generative_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/FJpakzPR4IC8F1sos7FSg.yml", + "service": "skills/dff_generative_skill/service_configs/dff-generative-ru-skill" + } + }, + "seq2seq_persona_based": { + "connector": { + "protocol": "http", + "timeout": 3.0, + "url": "http://seq2seq-persona-based-ru:8140/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:persona_bot_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/askjdfh9023rhuidfg.yml", + "service": "services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru" + } + } + }, + "response_selectors": { + "response_selector": { + "connector": { + "protocol": "http", + "timeout": 1.0, + "url": "http://convers-evaluation-selector-ru:8009/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:full_history_dialog", + "response_formatter": "state_formatters.dp_formatters:base_response_selector_formatter_service", + "previous_services": [ + "candidate_annotators" + ], + "tags": [ + "selector" + ], + "state_manager_method": "add_bot_utterance", + "is_enabled": true, + "source": { + "component": "components/SFiPofc4cGq6IRlit38e8A.yml", + "service": "response_selectors/convers_evaluation_based_selector/service_configs/convers-evaluation-selector-ru" + } + } + } + }, + "metadata": { + "display_name": "Dream Russian Persona-based", + "author": "DeepPavlov", + "description": "Russian version of DeepPavlov Dream Socialbot. This is a generative-based socialbot that uses Russian DialoGPT by DeepPavlov to generate most of the responses. It also contains intent catcher and responder components to cover special user requests.", + "version": "0.1.0", + "date_created": "2022-12-12T12:12:00", + "ram_usage": "50 GB", + "gpu_usage": "50 GB", + "disk_usage": "50 GB" + } +} diff --git a/assistant_dists/dream_russian_persona_based/proxy.yml b/assistant_dists/dream_russian_persona_based/proxy.yml new file mode 100644 index 0000000000..a0c4893194 --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/proxy.yml @@ -0,0 +1,12 @@ +services: + + dialogrpt-ru: + command: [ "nginx", "-g", "daemon off;" ] + build: + context: dp/proxy/ + dockerfile: Dockerfile + environment: + - PROXY_PASS=proxy.deeppavlov.ai:8122 + - PORT=8122 + +version: "3.7" diff --git a/assistant_dists/dream_russian_persona_based/telegram.yml b/assistant_dists/dream_russian_persona_based/telegram.yml new file mode 100644 index 0000000000..9eda333817 --- /dev/null +++ b/assistant_dists/dream_russian_persona_based/telegram.yml @@ -0,0 +1,17 @@ +services: + agent-tg: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.channel=telegram agent.telegram_token=$TG_TOKEN agent.pipeline_config=assistant_dists/dream_russian_persona_based/pipeline_conf.json agent.db_config=assistant_dists/dream_russian_persona_based/db_conf.json' + env_file: [.env] + build: + context: ./ + dockerfile: dockerfile_agent + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 2G + volumes: + - ".:/dp-agent" + +version: '3.7' diff --git a/assistant_dists/dream_russian_robot/pipeline_conf.json b/assistant_dists/dream_russian_robot/pipeline_conf.json index 1e99b34424..16cbcfe2a5 100644 --- a/assistant_dists/dream_russian_robot/pipeline_conf.json +++ b/assistant_dists/dream_russian_robot/pipeline_conf.json @@ -423,6 +423,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_script_based/pipeline_conf.json b/assistant_dists/dream_script_based/pipeline_conf.json index 8373b09ff8..2929518432 100644 --- a/assistant_dists/dream_script_based/pipeline_conf.json +++ b/assistant_dists/dream_script_based/pipeline_conf.json @@ -1498,6 +1498,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/dream_sfc/pipeline_conf.json b/assistant_dists/dream_sfc/pipeline_conf.json index b90eb31b94..36d63675a6 100644 --- a/assistant_dists/dream_sfc/pipeline_conf.json +++ b/assistant_dists/dream_sfc/pipeline_conf.json @@ -1519,6 +1519,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/fairytale_assistant/pipeline_conf.json b/assistant_dists/fairytale_assistant/pipeline_conf.json index b448afa490..ad9c8a6564 100644 --- a/assistant_dists/fairytale_assistant/pipeline_conf.json +++ b/assistant_dists/fairytale_assistant/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/fairytale_ru_assistant/pipeline_conf.json b/assistant_dists/fairytale_ru_assistant/pipeline_conf.json index a50480def1..afefb77c68 100644 --- a/assistant_dists/fairytale_ru_assistant/pipeline_conf.json +++ b/assistant_dists/fairytale_ru_assistant/pipeline_conf.json @@ -198,6 +198,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/fashion_stylist_assistant/pipeline_conf.json b/assistant_dists/fashion_stylist_assistant/pipeline_conf.json index 98036d6359..549b140f85 100644 --- a/assistant_dists/fashion_stylist_assistant/pipeline_conf.json +++ b/assistant_dists/fashion_stylist_assistant/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/google_api/pipeline_conf.json b/assistant_dists/google_api/pipeline_conf.json index 4195c3d9dd..77942fc5e4 100644 --- a/assistant_dists/google_api/pipeline_conf.json +++ b/assistant_dists/google_api/pipeline_conf.json @@ -199,6 +199,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/journalist_helper_ru_assistant/pipeline_conf.json b/assistant_dists/journalist_helper_ru_assistant/pipeline_conf.json index 28c8c0238d..c662cab122 100644 --- a/assistant_dists/journalist_helper_ru_assistant/pipeline_conf.json +++ b/assistant_dists/journalist_helper_ru_assistant/pipeline_conf.json @@ -198,6 +198,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/life_coaching_assistant/pipeline_conf.json b/assistant_dists/life_coaching_assistant/pipeline_conf.json index 5549626b74..53f122e91c 100644 --- a/assistant_dists/life_coaching_assistant/pipeline_conf.json +++ b/assistant_dists/life_coaching_assistant/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/marketing_assistant/pipeline_conf.json b/assistant_dists/marketing_assistant/pipeline_conf.json index d30747532e..00fb647cc5 100644 --- a/assistant_dists/marketing_assistant/pipeline_conf.json +++ b/assistant_dists/marketing_assistant/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/multiskill_ai_assistant/docker-compose.override.yml b/assistant_dists/multiskill_ai_assistant/docker-compose.override.yml index 8391073bc5..2a5437de8f 100644 --- a/assistant_dists/multiskill_ai_assistant/docker-compose.override.yml +++ b/assistant_dists/multiskill_ai_assistant/docker-compose.override.yml @@ -18,6 +18,7 @@ services: env_file: [ .env ] build: context: ./annotators/SentSeg/ + dockerfile: Dockerfile-test command: flask run -h 0.0.0.0 -p 8011 environment: - FLASK_APP=server diff --git a/assistant_dists/multiskill_ai_assistant/pipeline_conf.json b/assistant_dists/multiskill_ai_assistant/pipeline_conf.json index 440b35f329..8f7736e096 100644 --- a/assistant_dists/multiskill_ai_assistant/pipeline_conf.json +++ b/assistant_dists/multiskill_ai_assistant/pipeline_conf.json @@ -340,6 +340,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/multiskill_ru_assistant/pipeline_conf.json b/assistant_dists/multiskill_ru_assistant/pipeline_conf.json index 340c6b78ae..90e31e627a 100644 --- a/assistant_dists/multiskill_ru_assistant/pipeline_conf.json +++ b/assistant_dists/multiskill_ru_assistant/pipeline_conf.json @@ -240,6 +240,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/nutrition_assistant/pipeline_conf.json b/assistant_dists/nutrition_assistant/pipeline_conf.json index 72c2f9ce53..c16a1bc306 100644 --- a/assistant_dists/nutrition_assistant/pipeline_conf.json +++ b/assistant_dists/nutrition_assistant/pipeline_conf.json @@ -256,6 +256,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/storyteller_ru_assistant/pipeline_conf.json b/assistant_dists/storyteller_ru_assistant/pipeline_conf.json index 7909f3ada4..fb7737f678 100644 --- a/assistant_dists/storyteller_ru_assistant/pipeline_conf.json +++ b/assistant_dists/storyteller_ru_assistant/pipeline_conf.json @@ -219,6 +219,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/universal_prompted_assistant/pipeline_conf.json b/assistant_dists/universal_prompted_assistant/pipeline_conf.json index e8dbec6406..71b5da25b6 100644 --- a/assistant_dists/universal_prompted_assistant/pipeline_conf.json +++ b/assistant_dists/universal_prompted_assistant/pipeline_conf.json @@ -222,6 +222,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/assistant_dists/universal_ru_prompted_assistant/pipeline_conf.json b/assistant_dists/universal_ru_prompted_assistant/pipeline_conf.json index 1ca965dfe0..43314dc578 100644 --- a/assistant_dists/universal_ru_prompted_assistant/pipeline_conf.json +++ b/assistant_dists/universal_ru_prompted_assistant/pipeline_conf.json @@ -164,6 +164,9 @@ "previous_services": [ "candidate_annotators" ], + "tags": [ + "selector" + ], "state_manager_method": "add_bot_utterance", "is_enabled": true, "source": { diff --git a/common/generative_configs/openai-text-davinci-003.json b/common/generative_configs/openai-text-davinci-003.json index 107e944b80..6e48754abc 100644 --- a/common/generative_configs/openai-text-davinci-003.json +++ b/common/generative_configs/openai-text-davinci-003.json @@ -1,7 +1,7 @@ { - "max_tokens": 64, + "max_tokens": 256, "temperature": 0.4, "top_p": 1.0, "frequency_penalty": 0, "presence_penalty": 0 -} \ No newline at end of file +} diff --git a/components.tsv b/components.tsv index 30e5027317..7abd795e6b 100644 --- a/components.tsv +++ b/components.tsv @@ -135,14 +135,14 @@ 8130 transformers-lm-gptj 8131 openai-api-davinci3 8132 gpt2-multilingual -8133 relative-persona-extractor +8133 relative-persona-extractor,relative-persona-extractor-ru 8134 dff-dream-persona-gpt-j-prompted-skill 8135 prompt-selector,prompt-selector-ru 8136 property-extraction 8137 dff-dream-persona-chatgpt-prompted-skill 8138 dff-marketing-prompted-skill 8139 dff-fairytale-prompted-skill -8140 seq2seq-persona-based +8140 seq2seq-persona-based,seq2seq-persona-based-ru 8141 dff-nutrition-prompted-skill 8142 dff-life-coaching-prompted-skill 8143 dff-ai-faq-prompted-skill diff --git a/components/3vsfbB89yVpQm5OVW0YcvQ.yml b/components/3vsfbB89yVpQm5OVW0YcvQ.yml index affe24b240..e2aa6cd1d1 100644 --- a/components/3vsfbB89yVpQm5OVW0YcvQ.yml +++ b/components/3vsfbB89yVpQm5OVW0YcvQ.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/convers_evaluation_based_selector/service_configs/scripts-priority-selector date_created: '2023-03-16T09:45:32' diff --git a/components/KX4drAocVa5APcivWHeBNQ.yml b/components/KX4drAocVa5APcivWHeBNQ.yml index 0b79c0f1f8..0a500cfbbe 100644 --- a/components/KX4drAocVa5APcivWHeBNQ.yml +++ b/components/KX4drAocVa5APcivWHeBNQ.yml @@ -18,7 +18,8 @@ previous_services: - response_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/confidence_based_response_selector/service_configs/confidence-based-response-selector date_created: '2023-03-16T09:45:32' diff --git a/components/SDKFjnoqikjh8v7gdsf.yml b/components/SDKFjnoqikjh8v7gdsf.yml new file mode 100644 index 0000000000..e5d84405b6 --- /dev/null +++ b/components/SDKFjnoqikjh8v7gdsf.yml @@ -0,0 +1,32 @@ +name: timeout_service +display_name: Timeout Service +component_type: null +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: Timeout Service +ram_usage: 100M +gpu_usage: null +group: timeout_service +connector: + protocol: python + class_name: PredefinedTextConnector + response_text: Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую + реплику. + annotations: + sentseg: + punct_sent: Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую + реплику. + segments: + - Извини, что-то пошло не так в моем мозгу. + - Пожалуйста, повтори предыдущую реплику. +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: add_bot_utterance_last_chance +tags: +- timeout +endpoint: respond +service: services/agent_services/service_configs/dream_russian_persona_based +date_created: '2023-03-04T19:27:44' diff --git a/components/SFiPofc4cGq6IRlit38e8A.yml b/components/SFiPofc4cGq6IRlit38e8A.yml index 5964d37016..38d48ac58b 100644 --- a/components/SFiPofc4cGq6IRlit38e8A.yml +++ b/components/SFiPofc4cGq6IRlit38e8A.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/convers_evaluation_based_selector/service_configs/convers-evaluation-selector-ru date_created: '2023-03-16T09:45:32' diff --git a/components/YJzc7NwGrLmKp6gfZJh7X1.yml b/components/YJzc7NwGrLmKp6gfZJh7X1.yml index ef3df4557b..a0b90d259c 100644 --- a/components/YJzc7NwGrLmKp6gfZJh7X1.yml +++ b/components/YJzc7NwGrLmKp6gfZJh7X1.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/ranking_based_response_selector/service_configs/ranking-based-response-selector date_created: '2023-03-16T09:45:32' diff --git a/components/YJzc7NwGrLmKp6gfZJh7Xg.yml b/components/YJzc7NwGrLmKp6gfZJh7Xg.yml index 0b61833096..cec9a3b4fa 100644 --- a/components/YJzc7NwGrLmKp6gfZJh7Xg.yml +++ b/components/YJzc7NwGrLmKp6gfZJh7Xg.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/convers_evaluation_based_selector/service_configs/no-restrictions-selector date_created: '2023-03-16T09:45:32' diff --git a/components/askjdfh9023rhuidfg.yml b/components/askjdfh9023rhuidfg.yml new file mode 100644 index 0000000000..ad7633178b --- /dev/null +++ b/components/askjdfh9023rhuidfg.yml @@ -0,0 +1,26 @@ +name: seq2seq_persona_based +display_name: Russian Seq2seq Persona-based +component_type: Script-based with NNs +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Russian generative service based on Transformers seq2seq model, the model was + pre-trained on the PersonaChat dataset to generate a response conditioned on a several + sentences of the socialbot's persona +ram_usage: 1.5G +gpu_usage: 1.5G +group: skills +connector: + protocol: http + timeout: 3.0 + url: http://seq2seq-persona-based-ru:8140/respond +dialog_formatter: state_formatters.dp_formatters:persona_bot_formatter +response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service +previous_services: +- skill_selectors +required_previous_services: null +state_manager_method: add_hypothesis +tags: null +endpoint: respond +service: services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru +date_created: '2023-03-16T09:45:32' diff --git a/components/hE12LfxAkX3K9gU0nU4yE2.yml b/components/hE12LfxAkX3K9gU0nU4yE2.yml index b5a16fb5f2..0978e91f83 100644 --- a/components/hE12LfxAkX3K9gU0nU4yE2.yml +++ b/components/hE12LfxAkX3K9gU0nU4yE2.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/ranking_based_response_selector/service_configs/ranking-based-response-selector-ru date_created: '2023-03-16T09:45:32' diff --git a/components/hjbdfiugef7h3niknto59u9dgf.yml b/components/hjbdfiugef7h3niknto59u9dgf.yml index 5e44ff2c2c..edb2dca9d1 100644 --- a/components/hjbdfiugef7h3niknto59u9dgf.yml +++ b/components/hjbdfiugef7h3niknto59u9dgf.yml @@ -21,7 +21,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/ranking_and_intent_based_response_selector/service_configs/ranking-and-intent-based-response-selector-ru date_created: '2023-03-16T09:45:32' diff --git a/components/jkbn98dsfhg3qtfi.yml b/components/jkbn98dsfhg3qtfi.yml new file mode 100644 index 0000000000..5ca4f5f95b --- /dev/null +++ b/components/jkbn98dsfhg3qtfi.yml @@ -0,0 +1,25 @@ +name: relative_persona_extractor +display_name: Russian Relative Persona Extractor +component_type: null +model_type: Dictionary/Pattern-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Russian annotator utilizing Sentence Ranker to rank persona sentences and selecting + `N_SENTENCES_TO_RETURN` the most relevant sentences +ram_usage: 100M +gpu_usage: null +group: annotators +connector: + protocol: http + timeout: 2.0 + url: http://relative-persona-extractor-ru:8133/respond +dialog_formatter: state_formatters.dp_formatters:context_formatter_dialog +response_formatter: state_formatters.dp_formatters:simple_formatter_service +previous_services: +- annotators.spelling_preprocessing +required_previous_services: null +state_manager_method: add_annotation +tags: null +endpoint: respond +service: annotators/relative_persona_extractor/service_configs/relative-persona-extractor-ru +date_created: '2023-03-16T09:45:32' diff --git a/components/ksDjnfoiwur902hriwnefkwfi2.yml b/components/ksDjnfoiwur902hriwnefkwfi2.yml index 13626f8afe..11a72837c7 100644 --- a/components/ksDjnfoiwur902hriwnefkwfi2.yml +++ b/components/ksDjnfoiwur902hriwnefkwfi2.yml @@ -21,7 +21,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/ranking_and_intent_based_response_selector/service_configs/ranking-and-intent-based-response-selector date_created: '2023-03-16T09:45:32' diff --git a/components/ly2AVNtIcJpTWz1qJ1mvKQ.yml b/components/ly2AVNtIcJpTWz1qJ1mvKQ.yml index dfdb8ef3a9..132a046661 100644 --- a/components/ly2AVNtIcJpTWz1qJ1mvKQ.yml +++ b/components/ly2AVNtIcJpTWz1qJ1mvKQ.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/convers_evaluation_based_selector/service_configs/convers-evaluation-selector date_created: '2023-03-16T09:45:32' diff --git a/components/pmXHLkbPWpFqq37vtWCuS.yml b/components/pmXHLkbPWpFqq37vtWCuS.yml new file mode 100644 index 0000000000..218643b21a --- /dev/null +++ b/components/pmXHLkbPWpFqq37vtWCuS.yml @@ -0,0 +1,24 @@ +name: dff_user_kg_skill +display_name: User KG Skill +component_type: Script-based w/o NNs +model_type: Dictionary/Pattern-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Looks up entities detected by Entity Detection in Knowledge Graph and uses these entities in bot's responses +ram_usage: 128M +gpu_usage: null +group: skills +connector: + protocol: http + timeout: 2.0 + url: http://dff-user-kg-skill:8028/respond +dialog_formatter: state_formatters.dp_formatters:dff_user_kg_skill_formatter +response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service +previous_services: +- skill_selectors +required_previous_services: null +state_manager_method: add_hypothesis +tags: null +endpoint: respond +service: skills/dff_user_kg_skill/service_configs/dff-user-kg-skill +date_created: '2023-06-22T09:45:32' diff --git a/components/sdjkfn90234rnklcg.yml b/components/sdjkfn90234rnklcg.yml new file mode 100644 index 0000000000..9df1f03f51 --- /dev/null +++ b/components/sdjkfn90234rnklcg.yml @@ -0,0 +1,32 @@ +name: last_chance_service +display_name: Last Chance Service +component_type: null +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: Last Chance Service +ram_usage: 100M +gpu_usage: null +group: last_chance_service +connector: + protocol: python + class_name: PredefinedTextConnector + response_text: Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую + реплику. + annotations: + sentseg: + punct_sent: Извини, что-то пошло не так в моем мозгу. Пожалуйста, повтори предыдущую + реплику. + segments: + - Извини, что-то пошло не так в моем мозгу. + - Пожалуйста, повтори предыдущую реплику. +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: add_bot_utterance_last_chance +tags: +- last_chance +endpoint: respond +service: services/agent_services/service_configs/dream_russian_persona_based +date_created: '2023-03-04T19:27:44' diff --git a/components/vCZorqdcCVBI.yml b/components/vCZorqdcCVBI.yml new file mode 100644 index 0000000000..de2ea3ed18 --- /dev/null +++ b/components/vCZorqdcCVBI.yml @@ -0,0 +1,32 @@ +name: last_chance_service +display_name: Last Chance Service +component_type: null +model_type: null +is_customizable: false +author: publisher@deeppavlov.ai +description: Last Chance Service +ram_usage: 100M +gpu_usage: null +group: last_chance_service +connector: + protocol: python + class_name: PredefinedTextConnector + response_text: Sorry, something went wrong inside. Please tell me, what did you + say. + annotations: + sentseg: + punct_sent: Sorry, something went wrong inside. Please tell me, what did you + say. + segments: + - Sorry, something went wrong inside. + - Please tell me, what did you say. +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: add_bot_utterance_last_chance +tags: +- last_chance +endpoint: respond +service: services/agent_services/service_configs/dream_kg +date_created: '2023-10-13T19:27:44' diff --git a/components/zTK24mc7QujCiFRxGdi2Lg.yml b/components/zTK24mc7QujCiFRxGdi2Lg.yml index 2475db814e..b667f55fc8 100644 --- a/components/zTK24mc7QujCiFRxGdi2Lg.yml +++ b/components/zTK24mc7QujCiFRxGdi2Lg.yml @@ -19,7 +19,8 @@ previous_services: - candidate_annotators required_previous_services: null state_manager_method: add_bot_utterance -tags: null +tags: +- selector endpoint: respond service: response_selectors/convers_evaluation_based_selector/service_configs/convers-evaluation-no-scripts-selector date_created: '2023-03-16T09:45:32' diff --git a/response_selectors/ranking_based_response_selector/conftest.py b/response_selectors/ranking_based_response_selector/conftest.py new file mode 100644 index 0000000000..e5826847e1 --- /dev/null +++ b/response_selectors/ranking_based_response_selector/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="http://0.0.0.0") + parser.addoption("--port", action="store", default="8002") + parser.addoption("--handle", action="store", default="respond") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> str: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"{uri}:{port}/{handle}" diff --git a/response_selectors/ranking_based_response_selector/test.py b/response_selectors/ranking_based_response_selector/test.py index 88d88655bf..78245293b8 100644 --- a/response_selectors/ranking_based_response_selector/test.py +++ b/response_selectors/ranking_based_response_selector/test.py @@ -1,19 +1,11 @@ import requests import json -from os import getenv -SERVICE_PORT = getenv("SERVICE_PORT") - - -def main(): +def test_selector(url: str): with open("test_data.json", "r") as f: data = json.load(f) # To skip "Oh, and remember this dialog's id" that raises error due to absence of 'dialog_id' field in test_data. data["dialogs"][0]["human_utterances"].append(data["dialogs"][0]["human_utterances"][0]) - result = requests.post(f"http://0.0.0.0:{SERVICE_PORT}/respond", json=data).json() - assert result[0][0] in ["program_y", "movie_tfidf_retrieval"], print(result) - - -if __name__ == "__main__": - main() + result = requests.post(url, json=data).json() + assert result[0][0] == "program_y" diff --git a/services/agent_services/service_configs/dream_russian_persona_based/environment.yml b/services/agent_services/service_configs/dream_russian_persona_based/environment.yml new file mode 100644 index 0000000000..4e914d8c70 --- /dev/null +++ b/services/agent_services/service_configs/dream_russian_persona_based/environment.yml @@ -0,0 +1,7 @@ +WAIT_HOSTS: '' +WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1200} +HIGH_PRIORITY_INTENTS: 1 +RESTRICTION_FOR_SENSITIVE_CASE: 1 +ALWAYS_TURN_ON_ALL_SKILLS: 0 +LANGUAGE: RU +FALLBACK_FILE: fallbacks_dream_ru.json diff --git a/services/agent_services/service_configs/dream_russian_persona_based/service.yml b/services/agent_services/service_configs/dream_russian_persona_based/service.yml new file mode 100644 index 0000000000..6bfc161626 --- /dev/null +++ b/services/agent_services/service_configs/dream_russian_persona_based/service.yml @@ -0,0 +1,18 @@ +name: agent +endpoints: +- respond +compose: + command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/dream_russian_persona_based/pipeline_conf.json' + environment: + WAIT_HOSTS: '' + WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1200} + HIGH_PRIORITY_INTENTS: 1 + RESTRICTION_FOR_SENSITIVE_CASE: 1 + ALWAYS_TURN_ON_ALL_SKILLS: 0 + LANGUAGE: RU + FALLBACK_FILE: fallbacks_dream_ru.json + volumes: + - .:/dp-agent + ports: + - 4242:4242 +proxy: null diff --git a/services/knowledge_grounding/conftest.py b/services/knowledge_grounding/conftest.py new file mode 100644 index 0000000000..c0dea73e63 --- /dev/null +++ b/services/knowledge_grounding/conftest.py @@ -0,0 +1,10 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--url", action="store", default="http://0.0.0.0:8083/respond") + + +@pytest.fixture +def url(request) -> str: + return request.config.getoption("--url") diff --git a/services/knowledge_grounding/test.py b/services/knowledge_grounding/test.py index e9068e351d..73a25ca6d8 100644 --- a/services/knowledge_grounding/test.py +++ b/services/knowledge_grounding/test.py @@ -1,27 +1,33 @@ +import allure +import pytest import requests -def test_knowledge_grounding(): - url = "http://0.0.0.0:8083/respond" - - checked_sentence1 = ( - "When Mabel visited their home to play the piano, " - "she occasionally glimpsed a flitting swirl of white in the next room, " - "sometimes even received a note of thanks for calling, but she never actually " - "spoke with the reclusive, almost spectral Emily." - ) - knowledge1 = ( - "The real-life soap opera behind the publication of Emily Dickinson’s poems\n" - "When Mabel visited their home to play the piano, she occasionally glimpsed " - "a flitting swirl of white in the next room, sometimes even received a note of " - "thanks for calling, but she never actually spoke with the reclusive, almost spectral Emily." - ) - text1 = "Yeah she was an icon she died in 1886 at the tender age of 55." - - checked_sentence2 = "Penguins are a group of aquatic flightless birds." - knowledge2 = "Penguins are a group of aquatic flightless birds." - text2 = "Who are penguins?" - +@allure.description("""Knowledge grounding multi-language test""") +@pytest.mark.parametrize( + "checked_sentence, knowledge, text, expected", + [ + ( + "When Mabel visited their home to play the piano, " + "she occasionally glimpsed a flitting swirl of white in the next room, " + "sometimes even received a note of thanks for calling, but she never actually " + "spoke with the reclusive, almost spectral Emily.", + "The real-life soap opera behind the publication of Emily Dickinson’s poems\n" + "When Mabel visited their home to play the piano, she occasionally glimpsed " + "a flitting swirl of white in the next room, sometimes even received a note of " + "thanks for calling, but she never actually spoke with the reclusive, almost spectral Emily.", + "Yeah she was an icon she died in 1886 at the tender age of 55.", + True, + ), + ( + "Penguins are a group of aquatic flightless birds.", + "Penguins are a group of aquatic flightless birds.", + "Who are penguins?", + True, + ), + ], +) +def test_knowledge_grounding(url: str, checked_sentence, knowledge, text, expected): history = ( "Do you know who Emily Dickson is?\n" 'Emily Dickinson? The poet? I do! "Tell all the truth, but tell it slant" ' @@ -30,14 +36,8 @@ def test_knowledge_grounding(): request_data = { "batch": [ - {"checked_sentence": checked_sentence1, "knowledge": knowledge1, "text": text1, "history": history}, - {"checked_sentence": checked_sentence2, "knowledge": knowledge2, "text": text2, "history": history}, + {"checked_sentence": checked_sentence, "knowledge": knowledge, "text": text, "history": history}, ] } results = requests.post(url, json=request_data).json() - assert all(results), f"Got empty string among results" - print("Got\n{}\nSuccess".format(results)) - - -if __name__ == "__main__": - test_knowledge_grounding() + assert all(results) diff --git a/services/seq2seq_persona_based/bart-large-ru-persona-chat_v1.json b/services/seq2seq_persona_based/bart-large-ru-persona-chat_v1.json new file mode 100644 index 0000000000..38a0162fa5 --- /dev/null +++ b/services/seq2seq_persona_based/bart-large-ru-persona-chat_v1.json @@ -0,0 +1,5 @@ +{ + "max_new_tokens": 256, + "penalty_alpha": 0.15, + "top_k": 10 +} \ No newline at end of file diff --git a/services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru/environment.yml b/services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru/environment.yml new file mode 100644 index 0000000000..b99231cca8 --- /dev/null +++ b/services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru/environment.yml @@ -0,0 +1,9 @@ +SERVICE_PORT: 8140 +SERVICE_NAME: seq2seq_persona_based +PRETRAINED_MODEL_NAME_OR_PATH: DeepPavlov/mbart-large-50-ru-persona-chat +PAIR_DIALOG_HISTORY_LENGTH: 2 +CHAT_EVERY_SENT_MAX_LENGTH: 25 +PERSONA_EVERY_SENT_MAX_LENGTH: 19 +GENERATION_PARAMS_CONFIG: bart-large-ru-persona-chat_v1.json +CUDA_VISIBLE_DEVICES: '0' +FLASK_APP: server diff --git a/services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru/service.yml b/services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru/service.yml new file mode 100644 index 0000000000..13295e7f1c --- /dev/null +++ b/services/seq2seq_persona_based/service_configs/seq2seq-persona-based-ru/service.yml @@ -0,0 +1,35 @@ +name: seq2seq-persona-based-ru +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8140 + SERVICE_NAME: seq2seq_persona_based + PRETRAINED_MODEL_NAME_OR_PATH: DeepPavlov/bart-base-en-persona-chat + PAIR_DIALOG_HISTORY_LENGTH: 2 + CHAT_EVERY_SENT_MAX_LENGTH: 25 + PERSONA_EVERY_SENT_MAX_LENGTH: 19 + GENERATION_PARAMS_CONFIG: bart-base-en-persona-chat_v1.json + CUDA_VISIBLE_DEVICES: '0' + FLASK_APP: server + context: . + dockerfile: ./services/seq2seq_persona_based/Dockerfile + command: flask run -h 0.0.0.0 -p 8140 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 2G + volumes: + - ./services/seq2seq_persona_based:/src + - ./common:/src/common + - ~/.deeppavlov/cache:/root/.cache + ports: + - 8140:8140 diff --git a/services/text_qa/Dockerfile-test b/services/text_qa/Dockerfile-test new file mode 100644 index 0000000000..5ce4b61a30 --- /dev/null +++ b/services/text_qa/Dockerfile-test @@ -0,0 +1,23 @@ +FROM deeppavlov/deeppavlov:1.2.0-gpu + +RUN apt-get update && apt-get install git -y + +ARG CONFIG +ARG SERVICE_PORT +ARG SED_ARG=" | " + +ENV CONFIG=$CONFIG +ENV SERVICE_PORT=$SERVICE_PORT + +COPY . /src +COPY ./tests/requirements.txt /src/requirements.txt +WORKDIR /src + +RUN pip freeze | grep deeppavlov && \ + pip install -r /src/requirements.txt && \ + python -m deeppavlov install $CONFIG && \ + python -m spacy download en_core_web_sm + +RUN sed -i "s|$SED_ARG|g" "$CONFIG" + +CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8078 diff --git a/services/text_qa/test.sh b/services/text_qa/test.sh index de91d5956c..83ef8ef33d 100755 --- a/services/text_qa/test.sh +++ b/services/text_qa/test.sh @@ -1,3 +1,3 @@ #!/bin/bash -python test_text_qa.py +python -m pytest tests/test_text_qa.py diff --git a/services/text_qa/test_text_qa.py b/services/text_qa/test_text_qa.py deleted file mode 100644 index 29266ca6f0..0000000000 --- a/services/text_qa/test_text_qa.py +++ /dev/null @@ -1,64 +0,0 @@ -import os -import requests - - -language = os.getenv("LANGUAGE", "EN") - - -def main(): - url = "http://0.0.0.0:8078/model" - - request_data = { - "RU": [ - { - "question_raw": ["Где живут кенгуру?"], - "top_facts": [["Кенгуру являются коренными обитателями Австралии."]], - }, - { - "question_raw": ["Кто придумал сверточную сеть?"], - "top_facts": [ - [ - "Свёрточная нейронная сеть - архитектура искусственных нейронных сетей, " - "предложенная Яном Лекуном в 1988 году." - ] - ], - }, - ], - "EN": [ - { - "question_raw": ["Who was the first man in space?"], - "top_facts": [ - [ - "Yuri Gagarin was a Russian pilot and cosmonaut who became the first human to " - "journey into outer space." - ] - ], - }, - { - "question_raw": ["Who played Sheldon Cooper in The Big Bang Theory?"], - "top_facts": [ - [ - "Sheldon Lee Cooper is a fictional character in the CBS television series " - "The Big Bang Theory and its spinoff series Young Sheldon, portrayed by actors " - "Jim Parsons in The Big Bang Theory." - ] - ], - }, - ], - } - gold_results = {"RU": ["Австралии", "Яном Лекуном"], "EN": ["Yuri Gagarin", "Jim Parsons"]} - count = 0 - for data, gold_ans in zip(request_data[language], gold_results[language]): - result = requests.post(url, json=data).json() - res_ans = result[0][0] - if res_ans == gold_ans: - count += 1 - else: - print(f"Got {result}, but expected: {gold_ans}") - - assert count == len(request_data) - print("Success") - - -if __name__ == "__main__": - main() diff --git a/services/text_qa/tests/conftest.py b/services/text_qa/tests/conftest.py new file mode 100644 index 0000000000..3e966a7797 --- /dev/null +++ b/services/text_qa/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--uri", action="store", default="0.0.0.0") + parser.addoption("--port", action="store", default=8078) + parser.addoption("--handle", action="store", default="model") + + +@pytest.fixture +def uri(request) -> str: + return request.config.getoption("--uri") + + +@pytest.fixture +def port(request) -> int: + return request.config.getoption("--port") + + +@pytest.fixture +def handle(request) -> str: + return request.config.getoption("--handle") + + +@pytest.fixture +def url(uri, port, handle) -> str: + return f"http://{uri}:{port}/{handle}" diff --git a/services/text_qa/tests/requirements.txt b/services/text_qa/tests/requirements.txt new file mode 100644 index 0000000000..2bd7502847 --- /dev/null +++ b/services/text_qa/tests/requirements.txt @@ -0,0 +1,8 @@ +sentry-sdk[flask]==0.14.1 +gunicorn==19.9.0 +pyOpenSSL==22.0.0 +transformers==4.10.1 +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl +overrides==3.1.0 +pytest==7.4.2 +allure-pytest==2.13.2 \ No newline at end of file diff --git a/services/text_qa/tests/test_text_qa.py b/services/text_qa/tests/test_text_qa.py new file mode 100644 index 0000000000..9b90c06a9b --- /dev/null +++ b/services/text_qa/tests/test_text_qa.py @@ -0,0 +1,87 @@ +import os +import requests + +import allure +import pytest + + +language = os.getenv("LANGUAGE", "EN") + + +@allure.description("""Test text-qa with questions in English""") +@pytest.mark.skipif(language == "RU", reason="no need to test russian questions") +@pytest.mark.parametrize( + "request_data, gold_result", + [ + ( + { + "EN": { + "question_raw": ["Who was the first man in space?"], + "top_facts": [ + [ + "Yuri Gagarin was a Russian pilot and cosmonaut who became the first human to " + "journey into outer space." + ] + ], + } + }, + "Yuri Gagarin", + ), + ( + { + "EN": { + "question_raw": ["Who played Sheldon Cooper in The Big Bang Theory?"], + "top_facts": [ + [ + "Sheldon Lee Cooper is a fictional character in the CBS television series " + "The Big Bang Theory and its spinoff series Young Sheldon, portrayed by actors " + "Jim Parsons in The Big Bang Theory." + ] + ], + }, + }, + "Jim Parsons", + ), + ], +) +def test_text_qa(url: str, request_data, gold_result): + result = requests.post(url, json=request_data[language]).json() + print(result) + res_ans = result[0][0] + assert res_ans == gold_result + + +@allure.description("""Test text-qa with questions in Russian""") +@pytest.mark.skipif(language == "EN", reason="no need to test english questions") +@pytest.mark.parametrize( + "request_data, gold_result", + [ + ( + { + "RU": { + "question_raw": ["Где живут кенгуру?"], + "top_facts": [["Кенгуру являются коренными обитателями Австралии."]], + }, + }, + "Австралии", + ), + ( + { + "RU": { + "question_raw": ["Кто придумал сверточную сеть?"], + "top_facts": [ + [ + "Свёрточная нейронная сеть - архитектура искусственных нейронных сетей, " + "предложенная Яном Лекуном в 1988 году." + ] + ], + }, + }, + "Яном Лекуном", + ), + ], +) +def test_text_qa_ru(url: str, request_data, gold_result): + result = requests.post(url, json=request_data[language]).json() + res_ans = result[0][0] + assert res_ans == gold_result diff --git a/tests/runtests.sh b/tests/runtests.sh index 370d4b7bb4..fb543d50a7 100755 --- a/tests/runtests.sh +++ b/tests/runtests.sh @@ -56,7 +56,7 @@ function dockercompose_cmd() { # if [[ "$DEVICE" == "cpu" ]]; then # DOCKER_COMPOSE_CMD="docker-compose -f docker-compose.yml -f dev.yml -f cpu.yml -f proxy.yml -f s3.yml -p test" # else - DOCKER_COMPOSE_CMD="docker-compose --no-ansi -p test -f docker-compose.yml -f assistant_dists/dream/docker-compose.override.yml -f assistant_dists/dream/test.yml" + DOCKER_COMPOSE_CMD="docker-compose --no-ansi -p test -f docker-compose.yml -f assistant_dists/dream/docker-compose-test.override.yml -f assistant_dists/dream/test.yml" # fi eval '$DOCKER_COMPOSE_CMD "$@"' if [[ $? != 0 ]]; then