diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 425b43e6..488d041d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -38,6 +38,12 @@ jobs: host node port: 9300 node port: 9300 discovery type: 'single-node' + - name: Install and run Solr 📦 + uses: OSGeo/solr-action@main + with: + solr_version: 9.8.1 + host_port: 8983 + container_port: 8983 - name: Install and run OpenSearch 📦 uses: esmarkowski/opensearch-github-action@v1.0.0 with: diff --git a/pygeofilter/backends/solr/__init__.py b/pygeofilter/backends/solr/__init__.py new file mode 100644 index 00000000..090a8193 --- /dev/null +++ b/pygeofilter/backends/solr/__init__.py @@ -0,0 +1,33 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Magnar Martinsen +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2025 Norwegian Meteorological Institute +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +""" Apache Solr backend for pygeofilter +""" + +from .evaluate import to_filter + +__all__ = ["to_filter"] diff --git a/pygeofilter/backends/solr/evaluate.py b/pygeofilter/backends/solr/evaluate.py new file mode 100644 index 00000000..abf7b16f --- /dev/null +++ b/pygeofilter/backends/solr/evaluate.py @@ -0,0 +1,306 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Magnar Martinsen +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2025 Norwegian Meteorological Institute +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +""" +Apache Solr filter evaluator. + +Uses native Python to return dict of JSON request payload +""" + + +# pylint: disable=E1130,C0103,W0223 + +from datetime import date, datetime +from typing import Dict, Optional + +from packaging.version import Version + +from ... import ast, values +from ..evaluator import Evaluator, handle +from .util import like_to_wildcard + +VERSION_9_8_1 = Version("9.8.1") + +COMPARISON_OP_MAP = { + ast.ComparisonOp.EQ: "{lhs}:{rhs}", + ast.ComparisonOp.NE: "-{lhs}:{rhs}", + ast.ComparisonOp.GT: "{lhs}:{{{rhs} TO *]", + ast.ComparisonOp.GE: "{lhs}:[{rhs} TO *]", + ast.ComparisonOp.LT: "{lhs}:[* TO {rhs}}}", + ast.ComparisonOp.LE: "{lhs}:[* TO {rhs}]", +} + +ARITHMETIC_OP_MAP = { + ast.ArithmeticOp.ADD: "+", + ast.ArithmeticOp.SUB: "-", + ast.ArithmeticOp.MUL: "*", + ast.ArithmeticOp.DIV: "/", +} + + +class SolrDSLQuery(dict): + def __init__(self, query='*:*', filter=None): + super().__init__() + self['query'] = query + if filter is not None: + self['filter'] = filter + + +class SOLRDSLEvaluator(Evaluator): + """A filter evaluator for Apache Solr""" + + def __init__( + self, + attribute_map: Optional[Dict[str, str]] = None, + version: Optional[Version] = None, + ): + self.attribute_map = attribute_map + self.version = version or Version("9.8.1") + + @handle(ast.And) + def and_(self, _, lhs, rhs): + """Joins two filter objects with an `and` operator.""" + lhs = handle_combination_query(lhs) + rhs = handle_combination_query(rhs) + return SolrDSLQuery(f"{lhs} AND {rhs}") + + @handle(ast.Or) + def or_(self, _, lhs, rhs): + """Joins two filter objects with an `or` operator.""" + lhs = handle_combination_query(lhs) + rhs = handle_combination_query(rhs) + return SolrDSLQuery(f"{lhs} OR {rhs}") + + @handle(ast.LessThan, ast.LessEqual, ast.GreaterThan, ast.GreaterEqual) + def comparison(self, node, lhs, rhs): + """Creates a `range` filter.""" + return SolrDSLQuery(f"{COMPARISON_OP_MAP[node.op]}".format(lhs=lhs, rhs=rhs)) + + @handle(ast.Between) + def between(self, node: ast.Between, lhs, low, high): + """Creates a `range` filter.""" + q = f"{lhs}:[{low} TO {high}]" + if node.not_: + q = f"-{q}" + return SolrDSLQuery(q) + + @handle(ast.In) + def in_(self, node, lhs, *options): + """Creates a `terms` filter.""" + options_str = " OR ".join(str(option) for option in options) + q = f"{lhs}:({options_str})" + if node.not_: + q = f"-{q}" + return SolrDSLQuery(q) + + @handle(ast.IsNull) + def null(self, node: ast.IsNull, lhs): + """Performs a null check.""" + q = f"(*:* -{lhs}:*)" + if node.not_: + q = f"{lhs}:*" + return SolrDSLQuery(q) + + @handle(ast.Exists) + def exists(self, node: ast.Exists, lhs): + """Performs an existense check.""" + q = f"{lhs}:[* TO *]" + if node.not_: + q = f"-{lhs}:[* TO *]" + return SolrDSLQuery(q) + + @handle(ast.Attribute) + def attribute(self, node: ast.Attribute): + """Attribute mapping from filter fields to Solr fields. + If an attribute mapping is provided, it is used to look up the + field name from there. + """ + if self.attribute_map is not None: + return self.attribute_map[node.name] + return node.name + + @handle(*values.LITERALS) + def literal(self, node): + """Literal values are directly passed to Solr""" + return node + + @handle(ast.Not) + def not_(self, _, sub): + """Inverts a filter object.""" + return SolrDSLQuery(f"-{sub}") + + @handle(ast.Like) + def like(self, node: ast.Like, lhs): + """Transforms the provided LIKE pattern to a Solr wildcard + pattern. This only works properly on fields that are not tokenized. + """ + pattern = like_to_wildcard( + node.pattern, node.wildcard, node.singlechar, node.escapechar + ) + if '*' in pattern: + p = pattern.split('*') + if p[0] == '': + q = f"{{!complexphrase}}{lhs}:*{p[1].strip()}" + if node.not_: + q = f"{{!complexphrase}}-{lhs}:\"*{p[1].strip()}\"" + elif p[1] == '': + q = f"{{!complexphrase}}{lhs}:\"{p[0].strip()}*\"" + if node.not_: + q = f"{{!complexphrase}}-{lhs}:{p[0].strip()}*" + else: + q = f"{{!complexphrase}}{lhs}:\"{p[0].strip()}\"*\"{p[1].strip()}\"" + elif '?' in pattern: + q = f"{{!complexphrase}}{lhs}:\"{pattern}\"" + if node.not_: + q = f"{{!complexphrase}}-{lhs}:\"{pattern}\"" + + else: + q = f"{lhs}:\"{pattern}\"" + if node.not_: + q = f"-{q}" + return SolrDSLQuery(q) + + @handle(values.Geometry) + def geometry(self, node: values.Geometry): + """Geometry values are converted to a Solr spatial query. + This assumes that 'geom' is the field in Solr schema which holds the geometry data. + """ + return node.geometry + + @handle(ast.Equal, ast.NotEqual) + def equality(self, node, lhs, rhs): + """Creates a match filter.""" + return SolrDSLQuery(f"{COMPARISON_OP_MAP[node.op]}".format(lhs=lhs, rhs=rhs)) + + @handle(ast.TemporalPredicate, subclasses=True) + def temporal(self, node: ast.TemporalPredicate, lhs, rhs): + """Creates a filter to match the given temporal predicate""" + op = node.op + if isinstance(rhs, (date, datetime)): + low = high = rhs.strftime('%Y-%m-%dT%H:%M:%SZ') + else: + low, high = rhs[0].strftime('%Y-%m-%dT%H:%M:%SZ'), rhs[1].strftime('%Y-%m-%dT%H:%M:%SZ') + + query = None + if op == ast.TemporalComparisonOp.DISJOINT: + query = f"-{lhs}:[{low} TO {high}]" + elif op == ast.TemporalComparisonOp.AFTER: + query = f"{lhs}:{{{high} TO *]" + elif op == ast.TemporalComparisonOp.BEFORE: + query = f"{lhs}:[* TO {low}}}" + elif ( + op == ast.TemporalComparisonOp.TOVERLAPS + or op == ast.TemporalComparisonOp.OVERLAPPEDBY + ): + query = f"{lhs}:[{low} TO {high}]" + elif op == ast.TemporalComparisonOp.BEGINS: + query = f"{lhs}:{low}" + elif op == ast.TemporalComparisonOp.BEGUNBY: + query = f"{lhs}:{high}" + elif op == ast.TemporalComparisonOp.DURING: + query = f"{lhs}:{{{low} TO {high}}}" + elif op == ast.TemporalComparisonOp.TCONTAINS: + query = f"{lhs}:[{low} TO {high}]" + # elif op == ast.TemporalComparisonOp.ENDS: + # pass + # elif op == ast.TemporalComparisonOp.ENDEDBY: + # pass + # elif op == ast.TemporalComparisonOp.TEQUALS: + # pass + # elif op == ast.TemporalComparisonOp.BEFORE_OR_DURING: + # pass + # elif op == ast.TemporalComparisonOp.DURING_OR_AFTER: + # pass + else: + raise NotImplementedError(f"Unsupported temporal operator: {op}") + + return SolrDSLQuery(query) + + @handle( + ast.GeometryIntersects, + ast.GeometryDisjoint, + ast.GeometryWithin, + ast.GeometryContains, + ast.GeometryEquals + ) + def spatial_comparison(self, node: ast.SpatialComparisonPredicate, lhs: str, rhs): + """Creates a spatial query for the given spatial comparison + predicate. + """ + # Solr need capitalized first letter of operator + op = node.op.value.lower().capitalize() + query = f"{{!field f={lhs}}}{op}({rhs})" + return SolrDSLQuery(query) + + @handle(ast.BBox) + def bbox(self, node: ast.BBox, lhs): + """Performs a spatial query for the given bounding box. + Ignores CRS parameter, as it is not supported by Solr. + """ + bbox = self.envelope( + values.Envelope(node.minx, node.maxx, node.miny, node.maxy) + ) + query = f"{{!field f={lhs}}}Intersects({bbox})" + return SolrDSLQuery(query) + + # @handle(ast.Arithmetic, subclasses=True) + # def arithmetic(self, node: ast.Arithmetic, lhs, rhs): + # op = ARITHMETIC_OP_MAP[node.op] + # return f"({lhs} {op} {rhs})" + + # @handle(ast.Function) + # def function(self, node, *arguments): + # func = self.function_map[node.name] + # return f"{func}({','.join(arguments)})" + + @handle(values.Envelope) + def envelope(self, node: values.Envelope): + """Envelope values are converted to an WKT ENVELOPE for Solr.""" + min_x = float(min(node.x1, node.x2)) + max_x = float(max(node.x1, node.x2)) + min_y = float(min(node.y1, node.y2)) + max_y = float(max(node.y1, node.y2)) + return f"ENVELOPE({min_x}, {max_x}, {max_y}, {min_y})" + + +def handle_combination_query(q): + if isinstance(q, dict): + if q['query']: + return q['query'] + + +def to_filter( + root, + attribute_map: Optional[Dict[str, str]] = None, + version: Optional[str] = None, +): + """Shorthand function to convert a pygeofilter AST to an Apache Solr + filter structure. + """ + return SOLRDSLEvaluator( + attribute_map, Version(version) if version else None + ).evaluate(root) diff --git a/pygeofilter/backends/solr/util.py b/pygeofilter/backends/solr/util.py new file mode 100644 index 00000000..e17e0558 --- /dev/null +++ b/pygeofilter/backends/solr/util.py @@ -0,0 +1,63 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Magnar Martinsen +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2025 Norwegian Meteorological Institute +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +""" General utilities for the Apache Solr backend. +""" + +import re + + +def like_to_wildcard( + value: str, wildcard: str, single_char: str, escape_char: str = "\\" +) -> str: + """Adapts a "LIKE" pattern to create an elasticsearch "wildcard" + pattern. + """ + + x_wildcard = re.escape(wildcard) + x_single_char = re.escape(single_char) + + if escape_char == "\\": + x_escape_char = "\\\\\\\\" + else: + x_escape_char = re.escape(escape_char) + + if wildcard != "*": + value = re.sub( + f"(? +# Authors: Magnar Martinsen +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2025 Norwegian Meteorological Institute +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +"""Sample script to test Solr queries""" + +from pygeofilter.backends.solr import to_filter +from pygeofilter.parsers.ecql import parse + + +# AND +print('Testing AND') +ast = parse("title = 'test' AND description = 'test2'") + +print('AST AND: ', ast) + +solr_filter = to_filter(ast) + +print('SOLR filter AND: ', solr_filter) +print('\n') + +# OR +print('Testing OR') +ast = parse("title = 'test' OR description = 'test2'") + +print('AST OR: ', ast) + +solr_filter = to_filter(ast) + +print('SOLR filter OR: ', solr_filter) +print('\n') + +# = +print('Testing Equals =') +ast = parse("int_attribute = 5") +print('AST =: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter =: ', solr_filter) +print('\n') + +# <> +print('Testing NOT EQUAL <>') +ast = parse("int_attribute <> 0.0") +print('AST <>: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter <>: ', solr_filter) +print('\n') + +# < +print('Testing LessThan <') +ast = parse("float_attribute < 6") +print('AST <: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter <: ', solr_filter) +print('\n') + + +# > +print('Testing GraterThan >') +ast = parse("float_attribute > 6") +print('AST >: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter >: ', solr_filter) +print('\n') + + +# <= +print('Testing LessEqual <=') +ast = parse("int_attribute <= 6") +print('AST <=: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter <=: ', solr_filter) +print('\n') + + +# >= +print('Testing LessEqual >=') +ast = parse("float_attribute >= 8") +print('AST >=: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter >=: ', solr_filter) +print('\n') + + +# Combination AND +print('Testing Combination AND') +ast = parse("int_attribute = 5 AND float_attribute < 6.0") +print('AST Combination AND: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter Combination AND: ', solr_filter) +print('\n') + + +# Combination OR +print('Testing Combination OR') +ast = parse("int_attribute = 6 OR float_attribute < 6.0") +print('AST Combination OR: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter Combination OR: ', solr_filter) +print('\n') + + +# Between +print('Testing BETWEEN') +ast = parse("float_attribute BETWEEN -1 AND 1") +print('AST BETWEEN: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter BETWEEN: ', solr_filter) +print('\n') + + +# NOT Between +print('Testing NOT BETWEEN') +ast = parse("int_attribute NOT BETWEEN 4 AND 6") +print('AST NOT BETWEEN: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter NOT BETWEEN: ', solr_filter) +print('\n') + + +# NOT Between +print('Testing NOT BETWEEN') +ast = parse("int_attribute NOT BETWEEN 4 AND 6") +print('AST NOT BETWEEN: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter NOT BETWEEN: ', solr_filter) +print('\n') + + +# IS_NULL +print('Testing IS_NULL') +ast = parse("maybe_str_attribute IS NULL") +print('AST IS_NULL: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter IS_NULL: ', solr_filter) +print('\n') + + +# IS_NOT_NULL +print('Testing IS_NOT_NULL') +ast = parse("maybe_str_attribute IS NOT NULL") +print('AST IS_NOT_NULL: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter IS_NOT_NULL: ', solr_filter) +print('\n') + +# IS_IN +print('Testing IN') +ast = parse("int_attribute IN ( 1, 2, 3, 4, 5 )") +print('AST IN: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter IN: ', solr_filter) +print('\n') + +# IS_NOT_IN +print('Testing NOT IN') +ast = parse("int_attribute NOT IN ( 1, 2, 3, 4, 5 )") +print('AST NOT IN: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter NOT IN: ', solr_filter) +print('\n') + +# LIKE +print('Testing LIKE') +ast = parse("str_attribute LIKE 'this is a test'") +print('AST LIKE: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter LIKE: ', solr_filter) +print('\n') + + +# LIKE % +print('Testing LIKE %') +ast = parse("str_attribute LIKE 'this is % test'") +print('AST LIKE %: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter LIKE %: ', solr_filter) +print('\n') + +# NOT LIKE % +print('Testing NOT LIKE %') +ast = parse("str_attribute NOT LIKE '% another test'") +print('AST NOT LIKE %: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter NOT LIKE %: ', solr_filter) +print('\n') + + +# NOT LIKE . +print('Testing NOT LIKE .') +ast = parse("str_attribute NOT LIKE 'this is . test'") +print('AST NOT LIKE .: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter NOT LIKE .: ', solr_filter) +print('\n') + + +# ILIKE . +print('Testing ILIKE .') +ast = parse("str_attribute ILIKE 'THIS IS . TEST'") +print('AST ILIKE .: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter ILIKE .: ', solr_filter) +print('\n') + + +# ILIKE % +print('Testing ILIKE %') +ast = parse("str_attribute ILIKE 'THIS IS % TEST'") +print('AST ILIKE %: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter ILIKE %: ', solr_filter) +print('\n') + + +# EXISTS +print('Testing EXISTS') +ast = parse("extra_attr EXISTS") +print('AST EXISTS: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter EXISTS: ', solr_filter) +print('\n') + + +# DOES-NOT-EXIST +print('Testing DOES-NOT-EXIST') +ast = parse("extra_attr DOES-NOT-EXIST") +print('AST DOES-NOT-EXIST: ', ast) + +solr_filter = to_filter(ast) +print('SOLR filter DOES-NOT-EXIST: ', solr_filter) +print('\n') + +# Testing temporal BEFORE +print('Testing datetime attribute BEFORE') +ast = parse("datetime_attribute BEFORE 2000-01-01T00:00:05.00Z") +print('AST BEFORE:', ast) + +solr_filter = to_filter(ast) +print('datetime attribute BEFORE: ', solr_filter) +print('\n') + +# Testing temporal AFTER +print('Testing datetime attribute AFTER') +ast = parse("datetime_attribute AFTER 2000-01-01T00:00:05.00Z") +print('AST AFTER:', ast) + +solr_filter = to_filter(ast) +print('datetime attribute AFTER: ', solr_filter) +print('\n') + +# Testing temporal AFTER +# print('Testing datetime attribute DISJOINT') +# ast = ast.TimeDisjoint( +# ast.Attribute("datetime_attribute"), +# [ +# parse_datetime("2000-01-01T00:00:05.00Z"), +# parse_datetime("2000-01-01T00:00:15.00Z"), +# ], +# ) +# print('AST AFTER:', ast) + +# solr_filter = to_filter(ast) +# print('datetime attribute AFTER: ', solr_filter) +# print('\n') + + +# Test spatial Intersects +print('Testing Spatial Intersects') +ast = parse("INTERSECTS(geometry, ENVELOPE (0.0 1.0 0.0 1.0))") +print('AST Spatial Intersects:', ast) + +solr_filter = to_filter(ast) +print('Spatial Intersects: ', solr_filter) +print('\n') + + +# Test spatial Disjoint +print('Testing Spatial Disjoint') +ast = parse("DISJOINT(geometry, ENVELOPE (0.0 1.0 0.0 1.0))") +print('AST Spatial Disjoint:', ast) + +solr_filter = to_filter(ast) +print('Spatial Disjoint: ', solr_filter) +print('\n') + + +# Test spatial Within +print('Testing Spatial Within') +ast = parse("WITHIN(geometry, ENVELOPE (0.0 1.0 0.0 1.0))") +print('AST Spatial Within:', ast) + +solr_filter = to_filter(ast) +print('Spatial Within: ', solr_filter) +print('\n') + + +# Test spatial Contains +print('Testing Spatial Contains') +ast = parse("CONTAINS(geometry, ENVELOPE (0.0 1.0 0.0 1.0))") +print('AST Spatial Contains:', ast) + +solr_filter = to_filter(ast) +print('Spatial Contains: ', solr_filter) +print('\n') + +# Test spatial Equals +print('Testing Spatial Equals') +ast = parse("EQUALS(geometry, ENVELOPE (0.0 1.0 0.0 1.0))") +print('AST Spatial Equals:', ast) + +solr_filter = to_filter(ast) +print('Spatial Equals: ', solr_filter) +print('\n') + + +# Test spatial BBOX +print('Testing Spatial BBOX') +ast = parse("BBOX(center, 2, 2, 3, 3)") +print('AST Spatial BBOX:', ast) + +solr_filter = to_filter(ast) +print('Spatial BBOX: ', solr_filter) +print('\n') diff --git a/tests/backends/solr/__init__.py b/tests/backends/solr/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/backends/solr/test_evaluate.py b/tests/backends/solr/test_evaluate.py new file mode 100644 index 00000000..c44f13f0 --- /dev/null +++ b/tests/backends/solr/test_evaluate.py @@ -0,0 +1,343 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Magnar Martinsen +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2025 Norwegian Meteorological Institute +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +# pylint: disable=W0621,C0114,C0115,C0116 + +import json +import pytest +import requests + +from pygeofilter import ast +from pygeofilter.backends.solr import to_filter +from pygeofilter.parsers.ecql import parse +from pygeofilter.util import parse_datetime + +SOLR_BASE_URL = "http://localhost:8983/solr/test" # replace with your Solr URL +HEADERS = { + 'Content-type': 'application/json', +} + +# input documents for testing +INPUT_DOCS = [ + { + "id": "A", + "geometry_jts": "MULTIPOLYGON(((0 0, 0 5, 5 5,5 0,0 0)))", + "geometry_geo3d": "MULTIPOLYGON(((5 0, 5 5, 0 5, 0 0, 5 0)))", + "center": "POINT(2.5 2.5)", + "float_attribute": 0.0, + "int_attribute": 5, + "str_attribute": "this is a test", + "datetime_attribute": "2000-01-01T00:00:00Z", + "daterange_attribute": "[2000-01-01T00:00:00Z TO 2000-01-02T00:00:00Z]", + }, + { + "id": "B", + "geometry_jts": "MULTIPOLYGON(((5 5, 5 10, 10 10,10 5,5 5)))", + "geometry_geo3d": "MULTIPOLYGON(((10 5, 10 10, 5 10, 5 5, 10 5)))", + "center": "POINT(7.5 7.5)", + "float_attribute": 30.0, + "str_attribute": "this is another test", + "maybe_str_attribute": "some value", + "datetime_attribute": "2000-01-01T00:00:10Z", + "daterange_attribute": "[2000-01-04T00:00:00Z TO 2000-01-05T00:00:00Z]", + }, +] + + +@pytest.fixture(autouse=True, scope="session") +def prepare(): + """Prepare the Solr instance. Add the fields needed for testing""" + # print('Preparing core') + # Create a new core + # res = requests.get('http://localhost:8983/solr/admin/cores?action=CREATE&name=test&configSet= /opt/solr/server/solr/configsets/_default/conf') + # print(res) + # Add the field types + field_types = [ + {"name": "spatial_geo3d", "class": "solr.SpatialRecursivePrefixTreeFieldType", "geo": "true", "spatialContextFactory": "Geo3D", "prefixTree": "s2", "planetModel": "WGS84"}, + {"name": "spatial_jts", "class": "solr.SpatialRecursivePrefixTreeFieldType", "autoIndex": "true", "spatialContextFactory": "JTS", "validationRule": "repairBuffer0", "distErrPct": "0.025", "maxDistErr": "0.001", "distanceUnits": "kilometers"}, + {"name": "date_range", "class": "solr.DateRangeField"} + ] + + for field_type in field_types: + data = json.dumps({"add-field-type": field_type}) + requests.post('http://localhost:8983/api/cores/test/schema', headers={'Content-type': 'application/json'}, data=data) + + # Define the fields to be added + fields = [ + {"name": "extra_attr", "type": "string"}, + {"name": "float_attribute", "type": "pdouble"}, + {"name": "int_attribute", "type": "pint"}, + {"name": "datetime_attribute", "type": "pdate"}, + {"name": "str_attribute", "type": "text_general"}, + {"name": "center", "type": "location"}, + {"name": "geometry_jts", "type": "spatial_jts", "multiValued": "false"}, + {"name": "geometry_geo3d", "type": "spatial_geo3d", "multiValued": "false"}, + {"name": "daterange_attribute", "type": "date_range"} + ] + + # Add the fields to the schema + for field in fields: + data = json.dumps({"add-field": field}) + requests.post('http://localhost:8983/api/cores/test/schema', headers={'Content-type': 'application/json'}, data=data) + index = 'ok' + yield index + print('cleaning up') + requests.get(SOLR_BASE_URL + '/admin/cores?action=UNLOAD&core=test&deleteIndex=true') + + +@pytest.fixture(autouse=True, scope="session") +def index(prepare): + # Add test documents + response = requests.post(SOLR_BASE_URL + '/update', data=json.dumps(INPUT_DOCS), headers=HEADERS) + print(response.json()) + # Commit index + res = requests.get(SOLR_BASE_URL + '/update?commit=true') + print(res.json()) + + +@pytest.fixture(autouse=True, scope="session") +def data(index): + """Fixture to add initial data to the search index.""" + data = { + "query": "id:A", # Query + } + response = requests.get(SOLR_BASE_URL + '/query', data=json.dumps(data), headers=HEADERS) + response_json = response.json() + if response_json['responseHeader']['status'] == 0: + # Print the response + record_a = response_json['response']['docs'][0] + + data = { + "query": "id:B", # Query + } + response = requests.post(SOLR_BASE_URL + '/query', data=json.dumps(data), headers=HEADERS) + response_json = response.json() + if response_json['responseHeader']['status'] == 0: + # Print the response + record_b = response_json['response']['docs'][0] + + yield [record_a, record_b] + + +def filter_(ast_): + query = to_filter(ast_, version="9.8.1") + print(query) + response = requests.post(SOLR_BASE_URL + '/query', data=json.dumps(query), headers=HEADERS) + response_json = response.json() + print(response_json) + return response_json['response']['docs'] + + +def test_comparison(data): + print('DATA: %s' % data) + result = filter_(parse("int_attribute = 5")) + print('RESULT: %s,' % result[0]['id']) + assert len(result) == 1 and result[0]['id'] == data[0]['id'] + + result = filter_(parse("float_attribute < 6.0")) + assert len(result) == 1 and result[0]['id'] == data[0]['id'] + + result = filter_(parse("float_attribute > 6.0")) + assert len(result) == 1 and result[0]['id'] == data[1]['id'] + + result = filter_(parse("int_attribute <= 5")) + assert len(result) == 1 and result[0]['id'] == data[0]['id'] + + result = filter_(parse("float_attribute >= 8.0")) + assert len(result) == 1 and result[0]['id'] == data[1]['id'] + + result = filter_(parse("float_attribute <> 0.0")) + assert len(result) == 1 and result[0]['id'] == data[1]['id'] + + +def test_combination(data): + result = filter_(parse("int_attribute = 5 AND float_attribute < 6.0")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("int_attribute = 6 OR float_attribute < 6.0")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + +def test_between(data): + result = filter_(parse("float_attribute BETWEEN -1 AND 1")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("int_attribute NOT BETWEEN 4 AND 6")) + assert len(result) == 1 and result[0]['id'] is data[1]['id'] + + +def test_like(data): + result = filter_(parse("str_attribute LIKE 'this is a test'")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("str_attribute LIKE 'this is % test'")) + assert len(result) == 2 + + result = filter_(parse("str_attribute NOT LIKE '% another test'")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("str_attribute NOT LIKE 'this is . test'")) + assert len(result) == 1 and result[0]['id'] is data[1]['id'] + + result = filter_(parse("str_attribute ILIKE 'THIS IS . TEST'")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("str_attribute ILIKE 'THIS IS % TEST'")) + assert len(result) == 2 + + +def test_in(data): + result = filter_(parse("int_attribute IN ( 1, 2, 3, 4, 5 )")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("int_attribute NOT IN ( 1, 2, 3, 4, 5 )")) + assert len(result) == 1 and result[0]['id'] is data[1]['id'] + + +def test_null(data): + result = filter_(parse("maybe_str_attribute IS NULL")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_(parse("maybe_str_attribute IS NOT NULL")) + assert len(result) == 1 and result[0]['id'] is data[1]['id'] + + +def test_has_attr(): + result = filter_(parse("extra_attr EXISTS")) + assert len(result) == 0 + + result = filter_(parse("extra_attr DOES-NOT-EXIST")) + assert len(result) == 2 + + +def test_temporal(data): + result = filter_( + ast.TimeDisjoint( + ast.Attribute("datetime_attribute"), + [ + parse_datetime("2000-01-01T00:00:05.00Z"), + parse_datetime("2000-01-01T00:00:15.00Z"), + ], + ) + ) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_( + parse("datetime_attribute BEFORE 2000-01-01T00:00:05.00Z"), + ) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + result = filter_( + parse("datetime_attribute AFTER 2000-01-01T00:00:05.00Z"), + ) + assert len(result) == 1 and result[0]['id'] is data[1]['id'] + + +# def test_array(): +# result = filter_( +# ast.ArrayEquals( +# ast.Attribute('array_attr'), +# [2, 3], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[0] + +# result = filter_( +# ast.ArrayContains( +# ast.Attribute('array_attr'), +# [1, 2, 3, 4], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[1] + +# result = filter_( +# ast.ArrayContainedBy( +# ast.Attribute('array_attr'), +# [1, 2, 3, 4], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[0] + +# result = filter_( +# ast.ArrayOverlaps( +# ast.Attribute('array_attr'), +# [5, 6, 7], +# ), +# data +# ) +# assert len(result) == 1 and result[0] is data[1] + + +def test_spatial(data): + result = filter_( + parse("INTERSECTS(geometry_jts, ENVELOPE (0.0 1.0 0.0 1.0))")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + # TODO: Figure out why geo3d is giving the wrong result + result = filter_( + parse("INTERSECTS(geometry_geo3d, ENVELOPE (0.0 1.0 0.0 1.0))")) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + # TODO: test more spatial queries + + result = filter_( + parse("BBOX(center, 2, 2, 3, 3)"), + ) + assert len(result) == 1 and result[0]['id'] is data[0]['id'] + + +# def test_arithmetic(): +# result = filter_( +# parse('int_attr = float_attr - 0.5'), +# data, +# ) +# assert len(result) == 2 + +# result = filter_( +# parse('int_attr = 5 + 20 / 2 - 10'), +# data, +# ) +# assert len(result) == 1 and result[0] is data[0] + + +# def test_function(): +# result = filter_( +# parse('sin(float_attr) BETWEEN -0.75 AND -0.70'), +# data, +# ) +# assert len(result) == 1 and result[0] is data[0] + + +# def test_nested(): +# result = filter_( +# parse('"nested_attr.str_attr" = \'this is a test\''), +# data, +# ) +# assert len(result) == 1 and result[0] is data[0] diff --git a/tests/backends/solr/test_util.py b/tests/backends/solr/test_util.py new file mode 100644 index 00000000..2661ca2f --- /dev/null +++ b/tests/backends/solr/test_util.py @@ -0,0 +1,33 @@ +# ------------------------------------------------------------------------------ +# +# Project: pygeofilter +# Authors: Magnar Martinsen +# +# ------------------------------------------------------------------------------ +# Copyright (C) 2025 Norwegian Meteorological Institute +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ------------------------------------------------------------------------------ + +from pygeofilter.backends.solr.util import like_to_wildcard + + +def test_like_to_wildcard(): + assert "This ? a test" == like_to_wildcard("This . a test", "*", ".") + assert "This * a test" == like_to_wildcard("This * a test", "*", ".")