Skip to content

Commit

Permalink
test: add integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
alexgarel committed Aug 26, 2024
1 parent af191c0 commit 819d078
Show file tree
Hide file tree
Showing 17 changed files with 487 additions and 100 deletions.
2 changes: 1 addition & 1 deletion app/_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def import_parallel(
"""
processor = DocumentProcessor(config)
# open a connection for this process
es = connection.get_es_client(timeout=120, retry_on_timeout=True)
es = connection.get_es_client(request_timeout=120, retry_on_timeout=True)
# Note that bulk works better than parallel bulk for our usecase.
# The preprocessing in this file is non-trivial, so it's better to
# parallelize that. If we then do parallel_bulk here, this causes queueing
Expand Down
2 changes: 1 addition & 1 deletion app/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def import_taxonomies(
index_id, index_config = _get_index_config(config_path, index_id)

# open a connection for this process
connection.get_es_client(timeout=120, retry_on_timeout=True)
connection.get_es_client(request_timeout=120, retry_on_timeout=True)

if skip_indexing:
logger.info("Skipping indexing of taxonomies")
Expand Down
25 changes: 17 additions & 8 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
from typing import Annotated, Any

import yaml
from pydantic import BaseModel, Field, HttpUrl, field_validator, model_validator
from pydantic import (
BaseModel,
Field,
FileUrl,
HttpUrl,
field_validator,
model_validator,
)
from pydantic.json_schema import GenerateJsonSchema
from pydantic_settings import BaseSettings

Expand Down Expand Up @@ -196,7 +203,7 @@ class TaxonomySourceConfig(BaseModel):
),
]
url: Annotated[
HttpUrl,
FileUrl | HttpUrl,
Field(
description=cd_(
"""URL of the taxonomy.
Expand Down Expand Up @@ -879,11 +886,18 @@ def from_yaml(cls, path: Path) -> "Config":
# CONFIG is a global variable that contains the search-a-licious configuration
# used. It is specified by the envvar CONFIG_PATH.
CONFIG: Config | None = None


def set_global_config(config_path: Path):
global CONFIG
CONFIG = Config.from_yaml(config_path)


if settings.config_path:
if not settings.config_path.is_file():
raise RuntimeError(f"config file does not exist: {settings.config_path}")

CONFIG = Config.from_yaml(settings.config_path)
set_global_config(settings.config_path)


def check_config_is_defined():
Expand All @@ -893,8 +907,3 @@ def check_config_is_defined():
"No configuration is configured, set envvar "
"CONFIG_PATH with the path of the yaml configuration file"
)


def set_global_config(config_path: Path):
global CONFIG
CONFIG = Config.from_yaml(config_path)
3 changes: 2 additions & 1 deletion app/es_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def build_full_text_query(query: str, config: IndexConfig, query_langs: list[str
},
)
)

# TODO: see whether we should instead use a multi_match query
# with individual field boosts and with match_phrase ?
multi_match_query = Q("multi_match", query=query, fields=fields)

if match_phrase_boost_queries:
Expand Down
4 changes: 1 addition & 3 deletions app/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,11 @@ def process(self, response: Response, projection: set[str] | None) -> JSONType:
"is_count_exact": response.hits.total["relation"] == "eq",
}
hits = []
import pdb

pdb.set_trace()
for hit in response.hits:
result = hit.to_dict()
result["_score"] = hit.meta.score

# TODO make it an unsplit option or move to specific off post processing
for fname in self.config.text_lang_fields:
if fname not in result:
continue
Expand Down
9 changes: 6 additions & 3 deletions app/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ def add_languages_suffix(
This match in a langage OR another
"""
if analysis.luqum_tree is None:
return analysis
transformer = LanguageSuffixTransformer(
lang_fields=set(config.lang_fields), langs=langs
)
Expand Down Expand Up @@ -267,9 +269,10 @@ def build_es_query(
config = params.index_config
es_query = Search(index=config.index.name)
# main query
es_query = es_query.query(
es_query_builder(analysis.luqum_tree, params.index_config, params.langs)
)
if analysis.luqum_tree is not None:
es_query = es_query.query(
es_query_builder(analysis.luqum_tree, params.index_config, params.langs)
)

agg_fields = set(params.facets) if params.facets is not None else set()
if params.charts is not None:
Expand Down
6 changes: 6 additions & 0 deletions app/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,12 @@ def get_taxonomy(
~/.cache/openfoodfacts/taxonomy
:return: a Taxonomy
"""
if taxonomy_url.startswith("file://"):
# just use the file, it's already local
fpath = taxonomy_url[len("file://") :]
if not fpath.startswith("/"):
raise RuntimeError("Relative path (not yet) supported for taxonomy url")
return Taxonomy.from_path(fpath.rstrip("/"))
filename = f"{taxonomy_name}.json"

cache_dir = DEFAULT_CACHE_DIR if cache_dir is None else cache_dir
Expand Down
Empty file added tests/int/__init__.py
Empty file.
38 changes: 38 additions & 0 deletions tests/int/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pathlib
import time

import elasticsearch
import pytest

import app.config
import app.utils.connection

DATA_DIR = pathlib.Path(__file__).parent / "data"
DEFAULT_CONFIG_PATH = DATA_DIR / "test_off.yml"


@pytest.fixture(scope="module")
def test_off_config():
"""Fixture that sets default config to DEFAULT_CONFIG_PATH"""
app.config.set_global_config(DEFAULT_CONFIG_PATH)


@pytest.fixture
def es_connection(test_off_config):
"""Fixture that get's an Elasticsearch connection"""
es = None
waited = 0
while es is None:
try:
es = app.utils.connection.get_es_client()
health = es.cluster.health()
if health.get("status") != "green":
raise elasticsearch.exceptions.ConnectionError(
"Elasticsearch not ready"
)
return es
except elasticsearch.exceptions.ConnectionError:
waited += 1
if waited > 10:
raise
time.sleep(1)
1 change: 1 addition & 0 deletions tests/int/data/test_categories.full.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/int/data/test_labels.full.json

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions tests/int/data/test_off.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# This is a small configuration for integration tests using OFF like data
indices:
"test_off":
index:
id_field_name: code
last_modified_field_name: last_modified_t
name: openfoodfacts
number_of_replicas: 1
number_of_shards: 1
fields:
code:
required: true
type: keyword
product_name:
full_text_search: true
type: text_lang
categories:
full_text_search: true
input_field: categories_tags
taxonomy_name: categories
type: taxonomy
bucket_agg: true
labels:
full_text_search: true
input_field: labels_tags
taxonomy_name: labels
type: taxonomy
bucket_agg: true
unique_scans_n:
type: integer
nova_groups:
type: keyword
bucket_agg: true
last_modified_t:
type: date
created_t:
type: date
nutriments:
type: object
completeness:
type: float
lang_separator: _
match_phrase_boost: 2.0
# todo ?
preprocessor: tests.int.helpers.TestDocumentPreprocessor
document_fetcher: tests.int.helpers.TestDocumentFetcher
result_processor: tests.int.helpers.TestResultProcessor
split_separator: ','
redis_stream_name: product_updates_off
primary_color: "#341100"
accent_color: "#ff8714"
taxonomy:
sources:
- name: categories
url: file:///opt/search/tests/int/data/test_categories.full.json
- name: labels
url: file:///opt/search/tests/int/data/test_labels.full.json
index:
number_of_replicas: 1
number_of_shards: 4
name: test_off_taxonomy
supported_langs:
# a specific language to put the main language entry
- main
- en
- fr
default_index: "test_off"
Loading

0 comments on commit 819d078

Please sign in to comment.