From e5eddd7204e6da64526a9b8441619d48c599576e Mon Sep 17 00:00:00 2001 From: Lars Marius Garshol Date: Mon, 28 Jan 2019 11:58:37 +0100 Subject: [PATCH] Optimize contains() on large arrays --- .../spt/data/jslt/impl/AbstractFunction.java | 25 +++++++++++ .../spt/data/jslt/impl/BuiltinFunctions.java | 7 --- .../data/jslt/impl/FunctionExpression.java | 21 +++++++++ .../impl/OptimizedStaticContainsFunction.java | 45 +++++++++++++++++++ 4 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 src/main/java/com/schibsted/spt/data/jslt/impl/AbstractFunction.java create mode 100644 src/main/java/com/schibsted/spt/data/jslt/impl/OptimizedStaticContainsFunction.java diff --git a/src/main/java/com/schibsted/spt/data/jslt/impl/AbstractFunction.java b/src/main/java/com/schibsted/spt/data/jslt/impl/AbstractFunction.java new file mode 100644 index 00000000..fefdafa4 --- /dev/null +++ b/src/main/java/com/schibsted/spt/data/jslt/impl/AbstractFunction.java @@ -0,0 +1,25 @@ + +// Copyright 2018 Schibsted Marketplaces Products & Technology As +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.schibsted.spt.data.jslt.impl; + +import com.schibsted.spt.data.jslt.Function; + +public abstract class AbstractFunction extends AbstractCallable implements Function { + + public AbstractFunction(String name, int min, int max) { + super(name, min, max); + } +} diff --git a/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java b/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java index f28d17a2..ebf94d49 100644 --- a/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java +++ b/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java @@ -117,13 +117,6 @@ public class BuiltinFunctions { macros.put("fallback", new BuiltinFunctions.Fallback()); } - private static abstract class AbstractFunction extends AbstractCallable implements Function { - - public AbstractFunction(String name, int min, int max) { - super(name, min, max); - } - } - private static abstract class AbstractMacro extends AbstractCallable implements Macro { public AbstractMacro(String name, int min, int max) { diff --git a/src/main/java/com/schibsted/spt/data/jslt/impl/FunctionExpression.java b/src/main/java/com/schibsted/spt/data/jslt/impl/FunctionExpression.java index 20418a34..84668e79 100644 --- a/src/main/java/com/schibsted/spt/data/jslt/impl/FunctionExpression.java +++ b/src/main/java/com/schibsted/spt/data/jslt/impl/FunctionExpression.java @@ -55,4 +55,25 @@ public JsonNode apply(Scope scope, JsonNode input) { else return function.call(input, params); } + + private static final int OPTIMIZE_ARRAY_CONTAINS_MIN = 10; + public ExpressionNode optimize() { + super.optimize(); + + // if the second argument to contains() is an array with a large + // number of elements, don't do a linear search. instead, use an + // optimized version of the function that uses a HashSet + if (function == BuiltinFunctions.functions.get("contains") && + arguments.length == 2 && + (arguments[1] instanceof LiteralExpression)) { + + JsonNode v = arguments[1].apply(null, null); + if (v.isArray() && v.size() > OPTIMIZE_ARRAY_CONTAINS_MIN) { + // we use resolve to make sure all references are updated + resolve(new OptimizedStaticContainsFunction(v)); + } + } + + return this; + } } diff --git a/src/main/java/com/schibsted/spt/data/jslt/impl/OptimizedStaticContainsFunction.java b/src/main/java/com/schibsted/spt/data/jslt/impl/OptimizedStaticContainsFunction.java new file mode 100644 index 00000000..24a58768 --- /dev/null +++ b/src/main/java/com/schibsted/spt/data/jslt/impl/OptimizedStaticContainsFunction.java @@ -0,0 +1,45 @@ + +// Copyright 2019 Schibsted Marketplaces Products & Technology As +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.schibsted.spt.data.jslt.impl; + +import java.util.Set; +import java.util.HashSet; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.BooleanNode; + +/** + * An optimized version of contains(a, b) which is used when b is an + * array literal with a large number of values, so that a linear + * search becomes a performance drag. + */ +public class OptimizedStaticContainsFunction extends AbstractFunction { + private Set values; + + public OptimizedStaticContainsFunction(JsonNode array) { + super("optimized-static-contains", 2, 2); + + this.values = new HashSet(); + for (int ix = 0; ix < array.size(); ix++) + values.add(array.get(ix)); + } + + public JsonNode call(JsonNode input, JsonNode[] arguments) { + if (values.contains(arguments[0])) + return BooleanNode.TRUE; + else + return BooleanNode.FALSE; + } +}