diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 41534b0..4f00cdf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -38,6 +38,12 @@ jobs: host node port: 9300 node port: 9300 discovery type: 'single-node' + - name: Install and run OpenSearch 📦 + uses: esmarkowski/opensearch-github-action@v1.0.0 + with: + version: 2.18.0 + security-disabled: true + port: 9209 - name: Run unit tests run: | pytest diff --git a/README.md b/README.md index ad770cc..995eff8 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ There are a number of pre-defined backends available, where parsed expressions c * Django * sqlalchemy * (Geo)Pandas +* Elasticsearch +* OpenSearch * Pure Python object filtering The usage of those are described in their own documentation. diff --git a/pygeofilter/backends/opensearch/__init__.py b/pygeofilter/backends/opensearch/__init__.py new file mode 100644 index 0000000..83c29b5 --- /dev/null +++ b/pygeofilter/backends/opensearch/__init__.py @@ -0,0 +1,33 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Fabian Schindler +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2022 EOX IT Services GmbH +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +""" OpenSearch backend for pygeofilter. +""" + +from .evaluate import to_filter + +__all__ = ["to_filter"] diff --git a/pygeofilter/backends/opensearch/evaluate.py b/pygeofilter/backends/opensearch/evaluate.py new file mode 100644 index 0000000..40e0e46 --- /dev/null +++ b/pygeofilter/backends/opensearch/evaluate.py @@ -0,0 +1,312 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Fabian Schindler +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2022 EOX IT Services GmbH +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +""" +OpenSearch filter evaluator. + +Uses opensearch-dsl package to create filter objects. +""" + + +# pylint: disable=E1130,C0103,W0223 + +from datetime import date, datetime +from typing import Dict, Optional, Union + +from opensearch_dsl import Q +from packaging.version import Version + +from ... import ast, values +from ..evaluator import Evaluator, handle +from .util import like_to_wildcard + +VERSION_7_10_0 = Version("7.10.0") + + +COMPARISON_OP_MAP = { + ast.ComparisonOp.LT: "lt", + ast.ComparisonOp.LE: "lte", + ast.ComparisonOp.GT: "gt", + ast.ComparisonOp.GE: "gte", +} + + +ARITHMETIC_OP_MAP = { + ast.ArithmeticOp.ADD: "+", + ast.ArithmeticOp.SUB: "-", + ast.ArithmeticOp.MUL: "*", + ast.ArithmeticOp.DIV: "/", +} + + +class OpenSearchDSLEvaluator(Evaluator): + """A filter evaluator for OpenSearch DSL.""" + + def __init__( + self, + attribute_map: Optional[Dict[str, str]] = None, + version: Optional[Version] = None, + ): + self.attribute_map = attribute_map + self.version = version or Version("7.1.0") + + @handle(ast.Not) + def not_(self, _, sub): + """Inverts a filter object.""" + return ~sub + + @handle(ast.And) + def and_(self, _, lhs, rhs): + """Joins two filter objects with an `and` operator.""" + return lhs & rhs + + @handle(ast.Or) + def or_(self, _, lhs, rhs): + """Joins two filter objects with an `or` operator.""" + return lhs | rhs + + @handle(ast.Equal, ast.NotEqual) + def equality(self, node, lhs, rhs): + """Creates a match filter.""" + q = Q("match", **{lhs: rhs}) + if node.op == ast.ComparisonOp.NE: + q = ~q + return q + + @handle(ast.LessThan, ast.LessEqual, ast.GreaterThan, ast.GreaterEqual) + def comparison(self, node, lhs, rhs): + """Creates a `range` filter.""" + return Q("range", **{lhs: {COMPARISON_OP_MAP[node.op]: rhs}}) + + @handle(ast.Between) + def between(self, node: ast.Between, lhs, low, high): + """Creates a `range` filter.""" + q = Q("range", **{lhs: {"gte": low, "lte": high}}) + if node.not_: + q = ~q + return q + + @handle(ast.Like) + def like(self, node: ast.Like, lhs): + """Transforms the provided LIKE pattern to an OpenSearch wildcard + pattern. Thus, this only works properly on "wildcard" fields. + Ignores case-sensitivity when OpenSearch version is below 7.10.0. + """ + pattern = like_to_wildcard( + node.pattern, node.wildcard, node.singlechar, node.escapechar + ) + expr: Dict[str, Union[str, bool]] = { + "value": pattern, + } + if self.version >= VERSION_7_10_0: + expr["case_insensitive"] = node.nocase + + q = Q("wildcard", **{lhs: expr}) + if node.not_: + q = ~q + return q + + @handle(ast.In) + def in_(self, node, lhs, *options): + """Creates a `terms` filter.""" + q = Q("terms", **{lhs: options}) + if node.not_: + q = ~q + return q + + @handle(ast.IsNull) + def null(self, node: ast.IsNull, lhs): + """Performs a null check, by using the `exists` query on the given + field. + """ + q = Q("exists", field=lhs) + if not node.not_: + q = ~q + return q + + @handle(ast.Exists) + def exists(self, node: ast.Exists, lhs): + """Performs an existense check, by using the `exists` query on the + given field + """ + q = Q("exists", field=lhs) + if node.not_: + q = ~q + return q + + @handle(ast.TemporalPredicate, subclasses=True) + def temporal(self, node: ast.TemporalPredicate, lhs, rhs): + """Creates a filter to match the given temporal predicate""" + op = node.op + if isinstance(rhs, (date, datetime)): + low = high = rhs + else: + low, high = rhs + + query = "range" + not_ = False + predicate: Dict[str, Union[date, datetime, str]] + if op == ast.TemporalComparisonOp.DISJOINT: + not_ = True + predicate = {"gte": low, "lte": high} + elif op == ast.TemporalComparisonOp.AFTER: + predicate = {"gt": high} + elif op == ast.TemporalComparisonOp.BEFORE: + predicate = {"lt": low} + elif ( + op == ast.TemporalComparisonOp.TOVERLAPS + or op == ast.TemporalComparisonOp.OVERLAPPEDBY + ): + predicate = {"gte": low, "lte": high} + elif op == ast.TemporalComparisonOp.BEGINS: + query = "term" + predicate = {"value": low} + elif op == ast.TemporalComparisonOp.BEGUNBY: + query = "term" + predicate = {"value": high} + elif op == ast.TemporalComparisonOp.DURING: + predicate = {"gt": low, "lt": high, "relation": "WITHIN"} + elif op == ast.TemporalComparisonOp.TCONTAINS: + predicate = {"gt": low, "lt": high, "relation": "CONTAINS"} + # elif op == ast.TemporalComparisonOp.ENDS: + # pass + # elif op == ast.TemporalComparisonOp.ENDEDBY: + # pass + # elif op == ast.TemporalComparisonOp.TEQUALS: + # pass + # elif op == ast.TemporalComparisonOp.BEFORE_OR_DURING: + # pass + # elif op == ast.TemporalComparisonOp.DURING_OR_AFTER: + # pass + else: + raise NotImplementedError(f"Unsupported temporal operator: {op}") + + q = Q( + query, + **{lhs: predicate}, + ) + if not_: + q = ~q + return q + + @handle( + ast.GeometryIntersects, + ast.GeometryDisjoint, + ast.GeometryWithin, + ast.GeometryContains, + ) + def spatial_comparison(self, node: ast.SpatialComparisonPredicate, lhs: str, rhs): + """Creates a geo_shape query for the give spatial comparison + predicate. + """ + return Q( + "geo_shape", + **{ + lhs: { + "shape": rhs, + "relation": node.op.value.lower(), + }, + }, + ) + + @handle(ast.BBox) + def bbox(self, node: ast.BBox, lhs): + """Performs a geo_shape query for the given bounding box. + Ignores CRS parameter, as it is not supported by OpenSearch. + """ + return Q( + "geo_shape", + **{ + lhs: { + "shape": self.envelope( + values.Envelope(node.minx, node.maxx, node.miny, node.maxy) + ), + "relation": "intersects", + }, + }, + ) + + @handle(ast.Attribute) + def attribute(self, node: ast.Attribute): + """Attribute mapping from filter fields to OpenSearch fields. + If an attribute mapping is provided, it is used to look up the + field name from there. + """ + if self.attribute_map is not None: + return self.attribute_map[node.name] + return node.name + + # @handle(ast.Arithmetic, subclasses=True) + # def arithmetic(self, node: ast.Arithmetic, lhs, rhs): + # op = ARITHMETIC_OP_MAP[node.op] + # return f"({lhs} {op} {rhs})" + + # @handle(ast.Function) + # def function(self, node, *arguments): + # func = self.function_map[node.name] + # return f"{func}({','.join(arguments)})" + + @handle(*values.LITERALS) + def literal(self, node): + """Literal values are directly passed to opensearch-dsl""" + return node + + @handle(values.Geometry) + def geometry(self, node: values.Geometry): + """Geometry values are converted to a GeoJSON object""" + return node.geometry + + @handle(values.Envelope) + def envelope(self, node: values.Envelope): + """Envelope values are converted to an GeoJSON OpenSearch + extension object.""" + return { + "type": "envelope", + "coordinates": [ + [ + min(node.x1, node.x2), + max(node.y1, node.y2), + ], + [ + max(node.x1, node.x2), + min(node.y1, node.y2), + ], + ], + } + + +def to_filter( + root, + attribute_map: Optional[Dict[str, str]] = None, + version: Optional[str] = None, +): + """Shorthand function to convert a pygeofilter AST to an OpenSearch + filter structure. + """ + return OpenSearchDSLEvaluator( + attribute_map, Version(version) if version else None + ).evaluate(root) diff --git a/pygeofilter/backends/opensearch/util.py b/pygeofilter/backends/opensearch/util.py new file mode 100644 index 0000000..02dce60 --- /dev/null +++ b/pygeofilter/backends/opensearch/util.py @@ -0,0 +1,63 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Fabian Schindler +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2022 EOX IT Services GmbH +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +""" General utilities for the OpenSearch backend. +""" + +import re + + +def like_to_wildcard( + value: str, wildcard: str, single_char: str, escape_char: str = "\\" +) -> str: + """Adapts a "LIKE" pattern to create an OpenSearch "wildcard" + pattern. + """ + + x_wildcard = re.escape(wildcard) + x_single_char = re.escape(single_char) + + if escape_char == "\\": + x_escape_char = "\\\\\\\\" + else: + x_escape_char = re.escape(escape_char) + + if wildcard != "*": + value = re.sub( + f"(?=0.2"], }, classifiers=[ diff --git a/tests/backends/opensearch/__init__.py b/tests/backends/opensearch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/backends/opensearch/test_evaluate.py b/tests/backends/opensearch/test_evaluate.py new file mode 100644 index 0000000..0e372b5 --- /dev/null +++ b/tests/backends/opensearch/test_evaluate.py @@ -0,0 +1,298 @@ +# pylint: disable=W0621,C0114,C0115,C0116 + +import pytest +from opensearch_dsl import ( + Date, + DateRange, + Document, + Field, + Float, + GeoPoint, + GeoShape, + Index, + InnerDoc, + Integer, + Nested, + Range, + Text, + connections, +) + +from pygeofilter import ast +from pygeofilter.backends.opensearch import to_filter +from pygeofilter.parsers.ecql import parse +from pygeofilter.util import parse_datetime + + +class Wildcard(Field): + name = "wildcard" + + +class RecordMeta(InnerDoc): + float_meta_attribute = Float() + int_meta_attribute = Integer() + str_meta_attribute = Text() + datetime_meta_attribute = Date() + + +class Record(Document): + identifier = Text() + geometry = GeoShape() + center = GeoPoint() + float_attribute = Float() + int_attribute = Integer() + str_attribute = Wildcard() + maybe_str_attribute = Text() + datetime_attribute = Date() + daterange_attribute = DateRange() + record_metas = Nested(RecordMeta) + + class Index: + name = "record" + + +@pytest.fixture(autouse=True, scope="session") +def connection(): + connections.create_connection( + hosts=["http://localhost:9209"], + ) + + +@pytest.fixture(autouse=True, scope="session") +def index(connection): + Record.init() + index = Index(Record.Index.name) + yield index + index.delete() + + +@pytest.fixture(autouse=True, scope="session") +def data(index): + """Fixture to add initial data to the search index.""" + record_a = Record( + identifier="A", + geometry="MULTIPOLYGON(((0 0, 0 5, 5 5,5 0,0 0)))", + center="POINT(2.5 2.5)", + float_attribute=0.0, + int_attribute=5, + str_attribute="this is a test", + maybe_str_attribute=None, + datetime_attribute=parse_datetime("2000-01-01T00:00:00Z"), + daterange_attribute=Range( + gte=parse_datetime("2000-01-01T00:00:00Z"), + lte=parse_datetime("2000-01-02T00:00:00Z"), + ), + ) + record_a.save() + + record_b = Record( + identifier="B", + geometry="MULTIPOLYGON(((5 5, 5 10, 10 10,10 5,5 5)))", + center="POINT(7.5 7.5)", + float_attribute=30.0, + int_attribute=None, + str_attribute="this is another test", + maybe_str_attribute="some value", + datetime_attribute=parse_datetime("2000-01-01T00:00:10Z"), + daterange_attribute=Range( + gte=parse_datetime("2000-01-04T00:00:00Z"), + lte=parse_datetime("2000-01-05T00:00:00Z"), + ), + ) + record_b.save() + index.refresh() + + yield [record_a, record_b] + + +def filter_(ast_): + query = to_filter(ast_, version="8.2") + print(query) + result = Record.search().query(query).execute() + print([r.identifier for r in result]) + return result + + +def test_comparison(data): + result = filter_(parse("int_attribute = 5")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("float_attribute < 6")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("float_attribute > 6")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + result = filter_(parse("int_attribute <= 5")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("float_attribute >= 8")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + result = filter_(parse("float_attribute <> 0.0")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + +def test_combination(data): + result = filter_(parse("int_attribute = 5 AND float_attribute < 6.0")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("int_attribute = 6 OR float_attribute < 6.0")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + +def test_between(data): + result = filter_(parse("float_attribute BETWEEN -1 AND 1")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("int_attribute NOT BETWEEN 4 AND 6")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + +def test_like(data): + result = filter_(parse("str_attribute LIKE 'this is a test'")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("str_attribute LIKE 'this is % test'")) + assert len(result) == 2 + + result = filter_(parse("str_attribute NOT LIKE '% another test'")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("str_attribute NOT LIKE 'this is . test'")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + result = filter_(parse("str_attribute ILIKE 'THIS IS . TEST'")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("str_attribute ILIKE 'THIS IS % TEST'")) + assert len(result) == 2 + + +def test_in(data): + result = filter_(parse("int_attribute IN ( 1, 2, 3, 4, 5 )")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("int_attribute NOT IN ( 1, 2, 3, 4, 5 )")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + +def test_null(data): + result = filter_(parse("maybe_str_attribute IS NULL")) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_(parse("maybe_str_attribute IS NOT NULL")) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + +def test_has_attr(): + result = filter_(parse("extra_attr EXISTS")) + assert len(result) == 0 + + result = filter_(parse("extra_attr DOES-NOT-EXIST")) + assert len(result) == 2 + + +def test_temporal(data): + result = filter_( + ast.TimeDisjoint( + ast.Attribute("datetime_attribute"), + [ + parse_datetime("2000-01-01T00:00:05.00Z"), + parse_datetime("2000-01-01T00:00:15.00Z"), + ], + ) + ) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_( + parse("datetime_attribute BEFORE 2000-01-01T00:00:05.00Z"), + ) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + result = filter_( + parse("datetime_attribute AFTER 2000-01-01T00:00:05.00Z"), + ) + assert len(result) == 1 and result[0].identifier == data[1].identifier + + +# def test_array(): +# result = filter_( +# ast.ArrayEquals( +# ast.Attribute('array_attr'), +# [2, 3], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[0] + +# result = filter_( +# ast.ArrayContains( +# ast.Attribute('array_attr'), +# [1, 2, 3, 4], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[1] + +# result = filter_( +# ast.ArrayContainedBy( +# ast.Attribute('array_attr'), +# [1, 2, 3, 4], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[0] + +# result = filter_( +# ast.ArrayOverlaps( +# ast.Attribute('array_attr'), +# [5, 6, 7], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[1] + + +def test_spatial(data): + result = filter_( + parse("INTERSECTS(geometry, ENVELOPE (0.0 1.0 0.0 1.0))"), + ) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + # TODO: test more spatial queries + + result = filter_( + parse("BBOX(center, 2, 2, 3, 3)"), + ) + assert len(result) == 1 and result[0].identifier == data[0].identifier + + +# def test_arithmetic(): +# result = filter_( +# parse('int_attr = float_attr - 0.5'), +# data, +# ) +# assert len(result) == 2 + +# result = filter_( +# parse('int_attr = 5 + 20 / 2 - 10'), +# data, +# ) +# assert len(result) == 1 and result[0] is data[0] + + +# def test_function(): +# result = filter_( +# parse('sin(float_attr) BETWEEN -0.75 AND -0.70'), +# data, +# ) +# assert len(result) == 1 and result[0] is data[0] + + +# def test_nested(): +# result = filter_( +# parse('"nested_attr.str_attr" = \'this is a test\''), +# data, +# ) +# assert len(result) == 1 and result[0] is data[0] diff --git a/tests/parsers/cql2_text/test_parser.py b/tests/parsers/cql2_text/test_parser.py index 966fbf8..0a21436 100644 --- a/tests/parsers/cql2_text/test_parser.py +++ b/tests/parsers/cql2_text/test_parser.py @@ -9,6 +9,7 @@ def test_attribute_eq_true_uppercase(): True, ) + def test_attribute_eq_true_lowercase(): result = parse("attr = true") assert result == ast.Equal( diff --git a/tests/parsers/ecql/test_parser.py b/tests/parsers/ecql/test_parser.py index 9fa7fd9..9d12010 100644 --- a/tests/parsers/ecql/test_parser.py +++ b/tests/parsers/ecql/test_parser.py @@ -41,6 +41,7 @@ def test_namespace_attribute_eq_literal(): "A", ) + def test_prefixed_attribute_eq_literal(): result = parse("properties.ns:attr = 'A'") assert result == ast.Equal( @@ -48,6 +49,7 @@ def test_prefixed_attribute_eq_literal(): "A", ) + def test_attribute_eq_literal(): result = parse("attr = 'A'") assert result == ast.Equal(