From 7bce281284a5ed2b0242f3edb0db366c537cb971 Mon Sep 17 00:00:00 2001 From: Chris K Wensel Date: Tue, 19 Sep 2023 11:45:57 -0700 Subject: [PATCH] switch to improved statement parser this allows support for parameterized intrinsic functions --- docs/OPERATIONS.md | 117 ++++++++++++ tessellate-main/build.gradle.kts | 2 + .../tessellate/model/CoerceOp.java | 43 ----- .../clusterless/tessellate/model/CopyOp.java | 41 ---- .../tessellate/model/DiscardOp.java | 47 ----- .../tessellate/model/InsertOp.java | 66 ------- .../tessellate/model/RenameOp.java | 41 ---- .../tessellate/model/Transform.java | 24 +-- .../tessellate/parser/BaseParser.java | 28 +++ .../tessellate/parser/FieldParser.java | 25 +-- .../tessellate/parser/FieldsParser.java | 22 ++- .../tessellate/parser/Printer.java | 36 ++++ .../tessellate/parser/StatementParser.java | 176 ++++++++++++++++++ .../tessellate/parser/ast/Assignment.java | 47 +++++ .../TransformOp.java => parser/ast/Exp.java} | 7 +- .../tessellate/parser/{ => ast}/Field.java | 10 +- .../parser/{ => ast}/FieldName.java | 2 +- .../parser/{ => ast}/FieldOrdinal.java | 2 +- .../tessellate/parser/{ => ast}/FieldRef.java | 2 +- .../parser/{ => ast}/FieldType.java | 10 +- .../parser/{ => ast}/FieldTypeName.java | 7 +- .../parser/{ => ast}/FieldTypeParam.java | 9 +- .../tessellate/parser/ast/Intrinsic.java | 36 ++++ .../tessellate/parser/ast/IntrinsicName.java | 25 +++ .../parser/ast/IntrinsicParams.java | 32 ++++ .../tessellate/parser/ast/NAryOperation.java | 21 +++ .../clusterless/tessellate/parser/ast/Op.java | 44 +++++ .../tessellate/parser/ast/Operation.java | 78 ++++++++ .../tessellate/parser/ast/Statement.java | 18 ++ .../tessellate/parser/ast/UnaryOperation.java | 23 +++ .../tessellate/pipeline/Pipeline.java | 88 +++------ .../tessellate/pipeline/PipelineContext.java | 38 ++++ .../tessellate/pipeline/Transformer.java | 112 +++++++++++ .../tessellate/pipeline/Transforms.java | 43 ----- ...dsParserTest.java => FieldParserTest.java} | 2 +- .../parser/StatementParserTest.java | 79 ++++++++ .../pipeline/PipelineOptionsMergerTest.java | 6 +- .../pipeline/PipelineParseTest.java | 20 +- .../test/resources/config/pipeline-mvel.json | 29 +++ .../src/test/resources/config/pipeline.json | 2 +- 40 files changed, 1034 insertions(+), 426 deletions(-) create mode 100644 docs/OPERATIONS.md delete mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/model/CoerceOp.java delete mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/model/CopyOp.java delete mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/model/DiscardOp.java delete mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/model/InsertOp.java delete mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/model/RenameOp.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/BaseParser.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/Printer.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/StatementParser.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Assignment.java rename tessellate-main/src/main/java/io/clusterless/tessellate/{model/TransformOp.java => parser/ast/Exp.java} (65%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/Field.java (74%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/FieldName.java (96%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/FieldOrdinal.java (94%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/FieldRef.java (89%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/FieldType.java (74%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/FieldTypeName.java (80%) rename tessellate-main/src/main/java/io/clusterless/tessellate/parser/{ => ast}/FieldTypeParam.java (75%) create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Intrinsic.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicName.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicParams.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/NAryOperation.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Op.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Operation.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Statement.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/UnaryOperation.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/PipelineContext.java create mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transformer.java delete mode 100644 tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transforms.java rename tessellate-main/src/test/java/io/clusterless/tessellate/parser/{FieldsParserTest.java => FieldParserTest.java} (98%) create mode 100644 tessellate-main/src/test/java/io/clusterless/tessellate/parser/StatementParserTest.java create mode 100644 tessellate-main/src/test/resources/config/pipeline-mvel.json diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md new file mode 100644 index 0000000..053e0af --- /dev/null +++ b/docs/OPERATIONS.md @@ -0,0 +1,117 @@ +# Operation Declaration Syntax + +The pipeline declaration, in part, declares the required operation against the schema and tuples being processed. + +A field can be dropped or renamed, or the field type can be changed. During processing, the field is physically dropped +from the processing stream (after being used upstream in a calculation), or the value in the field is coerced into a new +type. + +Insertion of field values can be done statically (by injecting a literal value), or via a function that takes other +fields as input parameters (`concat(firstName, " ", lastName)`). + +Tuples (records) can be filtered and dropped, or a tuple value can be used to generate a set of new tuples (one +for each value in a json list). + +Since the tessellate pipeline declaration is simple json, we need to introduce a new syntax that can accommodate the +different types of operations introduced above. + +```json +{ + "operations": [ + "op1", + "op2", + "etc" + ] +} +``` + +# Operations + +## Single line syntax + +```bnf +operation = field-parameters expression operator field-results + +field-paramters = field *( "+" field ) + +field = (field-name / field-ordinal) [ "|" type ] + +expression = + +``` + +## Transforms + +- `=>` - literal assignment + - `value => intoFieldname|asType` +- `+>` - retain after copy or evaluate + - `fromFieldname +> toFieldname|asType` + - `fromField1|type+fromField2|type !{java} +> toFieldname|asType` + - `fromFieldname @[json pointer] +> toFieldname|asType` + - `fromFieldname ~/regex matcher/ +> toFieldname|Boolean` +- `->` - discard after copy or evaluate + - `fieldname ->` + - `fromFieldname -> toFieldname|asType` + - `fromField1|type+fromField2|type !{java} -> toFieldname|asType` + - `fromFieldname @[json pointer] -> toFieldname|asType` + - `fromFieldname ~/regex matcher/ -> toFieldname|Boolean` +- `[none]`- inplace coercion + - `fieldname|asType` + +## Filters + +The expression will be evaluated and if returns `true`, the tuple will be retained, if `false`, the whole tuple is +discarded. Or the pattern will be applied and if a match is found, the tuple will be retained, if not matched, the whole +tuple is discarded. + +- java expression - + - `fromField1|type+fromField2|type !{java}` +- regular expression (regex) - + - `fromField ~/regex/` +- pointer expression (json pointer) - + - `fromField @[path]` + - `fromField @[path] ~/regex/` + +## Simple Transforms + +- insert - insert a literal value into a field + - `value=>intoField|type` +- coerce - transform a field to a new type + - `field|newType` +- copy - copy a field value to a new field, optionally coercing its type + - `fromField+>toField|type` +- rename - rename a field, optionally coercing its type + - `fromField->toField|type` +- discard - remove a field + - `field->` + +## Expressions + +- `!{java}` - `java` is a java expression +- `~/regex/` - `regex` is a regular expression +- `@[json pointer]` - [JSON Pointer](https://tools.ietf.org/html/draft-ietf-appsawg-json-pointer-03) having support for + wildcards (`/*/`) and descent (`/**/`). +- `^intrinsic{param1:value1, param2:value2}` - `intrinsic` is a built-in function, with optional parameters + +## Intrinsic Transforms + +- any - `true` if any input is true, otherwise `false` or `null` if used as a predicate + - `fromField1+fromField2+fromFieldN ^any{} +> intoField|Boolean` +- all - `true` if all inputs are true, otherwise `false` or `null` if used as a predicate + - `fromField1+fromField2+fromFieldN ^all{} +> intoField|Boolean` +- tsid - create a unique long id + - `^tsid{node:...,nodeCount:...,signed:true/false,epoch:...} +> intoField|type` + - type = long, string +- siphash + - `fromField1+fromField2+fromFieldN ^siphash{} +> intoField|type` + +## Expression Transforms + +- java expression - + - `fromField1|type+fromField2|type !{java} +> toField` // add new derived field + - `fromField1|type+fromField2|type !{java} -> toField` // add new derived field, discard arguments +- regular expression (regex) - + - `fromField ~/regex\/ +> toField` +- pointer expression (json pointer) - + - `fromField @[json pointer] +> toField` + - `fromField @[json pointer] ~/regex/ +> toField` diff --git a/tessellate-main/build.gradle.kts b/tessellate-main/build.gradle.kts index baa73e0..0653ea4 100644 --- a/tessellate-main/build.gradle.kts +++ b/tessellate-main/build.gradle.kts @@ -119,6 +119,8 @@ dependencies { testImplementation("uk.org.webcompere:system-stubs-jupiter:$systemStubs") testImplementation("org.mockito:mockito-inline:5.1.1") + testImplementation("org.assertj:assertj-core:3.24.2") + // https://mvnrepository.com/artifact/software.amazon.awssdk val awsSdk2 = "2.20.69" integrationTestImplementation("software.amazon.awssdk:s3:$awsSdk2") diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/CoerceOp.java b/tessellate-main/src/main/java/io/clusterless/tessellate/model/CoerceOp.java deleted file mode 100644 index 78b69b2..0000000 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/CoerceOp.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -package io.clusterless.tessellate.model; - -import com.fasterxml.jackson.annotation.JsonCreator; -import io.clusterless.tessellate.pipeline.Transforms; - -/** - *
- * ts|DateTime|yyyyMMdd
- * 
- */ -public class CoerceOp implements TransformOp, Model { - Field field; - - @JsonCreator - public CoerceOp(String field) { - this.field = new Field(field); - } - - public CoerceOp(Field field) { - this.field = field; - } - - public Field field() { - return field; - } - - @Override - public Transforms transform() { - return Transforms.coerce; - } - - public String toString() { - return field.toString(); - } -} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/CopyOp.java b/tessellate-main/src/main/java/io/clusterless/tessellate/model/CopyOp.java deleted file mode 100644 index 0af15a5..0000000 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/CopyOp.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -package io.clusterless.tessellate.model; - -import com.fasterxml.jackson.annotation.JsonCreator; -import io.clusterless.tessellate.pipeline.Transforms; -import org.jetbrains.annotations.NotNull; - -/** - *
- * ts+>ymd|DateTime|yyyyMMdd
- * 
- */ -public class CopyOp extends Translate implements TransformOp { - @JsonCreator - public CopyOp(String partition) { - super(partition); - } - - @Override - @NotNull - protected String translate() { - return "[+]>"; - } - - @Override - protected boolean requiresFrom() { - return true; - } - - @Override - public Transforms transform() { - return Transforms.copy; - } -} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/DiscardOp.java b/tessellate-main/src/main/java/io/clusterless/tessellate/model/DiscardOp.java deleted file mode 100644 index 936d5ec..0000000 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/DiscardOp.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -package io.clusterless.tessellate.model; - -import com.fasterxml.jackson.annotation.JsonCreator; -import io.clusterless.tessellate.pipeline.Transforms; - -/** - *
- * ts->
- * 
- */ -public class DiscardOp implements TransformOp, Model { - private final String declaration; - private final Field field; - - @JsonCreator - public DiscardOp(String declaration) { - this.declaration = declaration; - - String[] split = this.declaration.split("->"); - this.field = new Field(split[0]); - } - - public Field field() { - return field; - } - - public String declaration() { - return declaration; - } - - public String toString() { - return declaration; - } - - @Override - public Transforms transform() { - return Transforms.discard; - } -} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/InsertOp.java b/tessellate-main/src/main/java/io/clusterless/tessellate/model/InsertOp.java deleted file mode 100644 index cefdc5e..0000000 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/InsertOp.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -package io.clusterless.tessellate.model; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import io.clusterless.tessellate.pipeline.Transforms; -import org.jetbrains.annotations.NotNull; - -/** - *
- * value=>field|type
- * 
- */ -public class InsertOp implements TransformOp, Model { - private String declaration; - @JsonIgnore - private Field field; - @JsonIgnore - private String value; - - @JsonCreator - public InsertOp(String declaration) { - this.declaration = declaration; - String[] split = declaration.split(translate()); - - if (split.length != 2) { - throw new IllegalArgumentException("invalid " + transform().name() + " declaration, expects 'value" + translate() + "field`, got: " + declaration); - } - - this.field = new Field(split[1]); - this.value = split[0]; - } - - @NotNull - protected String translate() { - return "=>"; - } - - public String declaration() { - return declaration; - } - - public Field field() { - return field; - } - - public String value() { - return value; - } - - public String toString() { - return declaration; - } - - @Override - public Transforms transform() { - return Transforms.insert; - } -} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/RenameOp.java b/tessellate-main/src/main/java/io/clusterless/tessellate/model/RenameOp.java deleted file mode 100644 index 7dd03a7..0000000 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/RenameOp.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -package io.clusterless.tessellate.model; - -import com.fasterxml.jackson.annotation.JsonCreator; -import io.clusterless.tessellate.pipeline.Transforms; -import org.jetbrains.annotations.NotNull; - -/** - *
- * ts->ymd|DateTime|yyyyMMdd
- * 
- */ -public class RenameOp extends Translate implements TransformOp { - @JsonCreator - public RenameOp(String partition) { - super(partition); - } - - @Override - @NotNull - protected String translate() { - return "->"; - } - - @Override - protected boolean requiresFrom() { - return true; - } - - @Override - public Transforms transform() { - return Transforms.rename; - } -} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/Transform.java b/tessellate-main/src/main/java/io/clusterless/tessellate/model/Transform.java index b1e2aed..eb47ec2 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/Transform.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/model/Transform.java @@ -11,7 +11,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.annotation.JsonValue; -import io.clusterless.tessellate.pipeline.Transforms; +import io.clusterless.tessellate.parser.StatementParser; +import io.clusterless.tessellate.parser.ast.Statement; import java.util.ArrayList; import java.util.List; @@ -26,7 +27,7 @@ * - insert - value=ymd|DateTime|yyyyMMdd */ public class Transform implements Model { - private List transforms = new ArrayList<>(); + private List statements = new ArrayList<>(); public Transform(String... transforms) { this(List.of(transforms)); @@ -34,28 +35,23 @@ public Transform(String... transforms) { @JsonCreator public Transform(List transforms) { - transforms.forEach(this::addTransform); + transforms.forEach(this::addStatement); } public Transform() { } @JsonSetter - public void addTransform(String transform) { - for (Transforms value : Transforms.values()) { - if (value.matches(transform)) { - transforms.add(value.transform(transform)); - return; - } - } + public void addStatement(String statement) { + statements.add(StatementParser.parse(statement)); } - public List transformOps() { - return transforms; + public List statements() { + return statements; } @JsonValue - public List transforms() { - return transforms.stream().map(Object::toString).collect(Collectors.toList()); + public List statementsToString() { + return statements.stream().map(Object::toString).collect(Collectors.toList()); } } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/BaseParser.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/BaseParser.java new file mode 100644 index 0000000..33b6a8b --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/BaseParser.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser; + +import org.jparsec.Parser; +import org.jparsec.error.ParserException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BaseParser { + private static final Logger LOG = LoggerFactory.getLogger(BaseParser.class); + + static T parse(Parser parser, String field) { + try { + return parser.parse(field, Parser.Mode.DEBUG); + } catch (ParserException e) { + LOG.error("unable to parse: {}, got: {}", field, e.getMessage().replace("\n", "")); + + throw new RuntimeException(e); + } + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldParser.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldParser.java index d2cc0dc..0310104 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldParser.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldParser.java @@ -8,14 +8,12 @@ package io.clusterless.tessellate.parser; +import io.clusterless.tessellate.parser.ast.*; import org.jparsec.Parser; import org.jparsec.Parsers; import org.jparsec.Scanners; -import org.jparsec.error.ParserException; import org.jparsec.pattern.CharPredicate; import org.jparsec.pattern.CharPredicates; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.util.List; import java.util.Optional; @@ -24,7 +22,6 @@ public class FieldParser { - private static final Logger LOG = LoggerFactory.getLogger(FieldParser.class); private static final CharPredicate FIELD_NAME_EXTRA = CharPredicates.among("~@#$%^&_"); private static final CharPredicate PARAM_EXTRA = CharPredicates.not(among("|+")); // | delimits params, + delimits fields private static final Parser FIELD_NAME = @@ -77,30 +74,18 @@ public class FieldParser { private static final Parser types = Parsers.sequence(typeName, typeParam, FieldType::new); - private static final Parser fullFieldDeclaration = + public static final Parser fullFieldDeclaration = Parsers.sequence(Parsers.or(FIELD_NAME, FIELD_ORDINAL), types.asOptional(), Field::new); private static final Parser FIELD_DELIM = Parsers.sequence(Scanners.many(IS_WHITESPACE), Scanners.isChar('+'), Scanners.many(IS_WHITESPACE)); - private static final Parser> fieldList = + public static final Parser> fieldList = fullFieldDeclaration.sepBy(FIELD_DELIM); public static Field parseField(String field) { - try { - return fullFieldDeclaration.parse(field, Parser.Mode.DEBUG); - } catch (ParserException e) { - LOG.error("unable to parse: {}, got: {}, tree: {}", field, e.getMessage(), e.getParseTree()); - - throw new RuntimeException(e); - } + return BaseParser.parse(fullFieldDeclaration, field); } public static List parseFieldList(String field) { - try { - return fieldList.parse(field, Parser.Mode.DEBUG); - } catch (ParserException e) { - LOG.error("unable to parse: {}, got: {}, tree: {}", field, e.getMessage(), e.getParseTree()); - - throw new RuntimeException(e); - } + return BaseParser.parse(fieldList, field); } } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldsParser.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldsParser.java index b93da35..423fbea 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldsParser.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldsParser.java @@ -14,15 +14,20 @@ import cascading.tuple.type.CoercibleType; import cascading.tuple.type.DateType; import cascading.tuple.type.InstantType; +import io.clusterless.tessellate.parser.ast.Field; +import io.clusterless.tessellate.parser.ast.FieldType; +import io.clusterless.tessellate.parser.ast.FieldTypeParam; import io.clusterless.tessellate.temporal.IntervalUnits; import io.clusterless.tessellate.type.WrappedCoercibleType; import io.clusterless.tessellate.util.JSONUtil; +import org.jetbrains.annotations.NotNull; import java.lang.reflect.Type; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; import java.time.temporal.TemporalUnit; +import java.util.List; import java.util.Locale; import java.util.TimeZone; @@ -74,7 +79,22 @@ public Fields parseSingleFields(String value, Type defaultType) { Field parsed = FieldParser.parseField(value); - Comparable name = parsed.fieldRef.asComparable(); + return asFields(parsed, defaultType); + } + + public Fields asFields(List fields) { + return fields.stream() + .map(this::asFields) + .reduce(Fields.NONE, Fields::append); + } + + @NotNull + public Fields asFields(Field parsed) { + return asFields(parsed, null); + } + + public Fields asFields(Field parsed, Type defaultType) { + Comparable name = parsed.fieldRef().asComparable(); if (parsed.fieldType().isEmpty()) { return defaultType == null ? new Fields(name) : new Fields(name, defaultType); } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/Printer.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/Printer.java new file mode 100644 index 0000000..1775d38 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/Printer.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser; + +import io.clusterless.tessellate.parser.ast.Field; + +import java.util.List; +import java.util.stream.Collectors; + +public class Printer { + public static String withParams(Object name, Object param) { + return name + (param != null ? "|" + param : ""); + } + + public static String fields(List fields) { + return fields.stream().map(Field::toString).collect(Collectors.joining("+")); + } + + public static String literal(String literal) { + if (literal == null) { + return null; + } + + if (literal.matches(".*[,{}'\": ]+.*")) { + return String.format("'%s'", literal.replace("'", "''")); + } + + return literal; + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/StatementParser.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/StatementParser.java new file mode 100644 index 0000000..2376a1b --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/StatementParser.java @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser; + +import io.clusterless.tessellate.parser.ast.*; +import org.jparsec.Parser; +import org.jparsec.Parsers; +import org.jparsec.Scanners; +import org.jparsec.Terminals; +import org.jparsec.pattern.CharPredicates; +import org.jparsec.pattern.Patterns; + +import java.util.Map; +import java.util.stream.Collectors; + +import static org.jparsec.Parsers.EOF; +import static org.jparsec.Parsers.sequence; +import static org.jparsec.pattern.CharPredicates.*; + +public class StatementParser { + + private static Parser op(String op) { + return Patterns.string(op).toScanner(op); + + } + + private static final Parser QUOTED_LITERAL = Parsers.or( + Terminals.StringLiteral.DOUBLE_QUOTE_TOKENIZER, + Terminals.StringLiteral.SINGLE_QUOTE_TOKENIZER); + + public static final Parser ASSIGNMENT = op("=>").source().map(Op::new); // literal arguments only + public static final Parser RETAIN = op("+>").source().map(Op::new); + public static final Parser DISCARD = op("->").source().map(Op::new); + + public static Parser operators = + Parsers.or( + ASSIGNMENT, + RETAIN, + DISCARD + ).source().map(Op::new); + + public static Parser VARIABLE_OPS = + Parsers.or( + RETAIN, + DISCARD + ).source().map(Op::new); + + + private static final Parser PARAM_DELIM = Parsers.sequence(Scanners.many(IS_WHITESPACE), Scanners.isChar(','), Scanners.many(IS_WHITESPACE)); + + public static final Parser LITERAL_VALUE = + Parsers.or( + QUOTED_LITERAL, + Parsers.sequence( + Scanners.isChar(notAmong("'\"")), + Parsers.sequence( + operators.not(), + Scanners.isChar( + CharPredicates.and(ALWAYS, CharPredicates.not(IS_WHITESPACE), CharPredicates.notAmong(",:{}")) + ) + ).many() + ).source() + ).between(Scanners.many(IS_WHITESPACE), Scanners.many(IS_WHITESPACE)); + + private static final Parser> PARAM_ENTRY = + Parsers.sequence( + Scanners.isChar(CharPredicates.IS_ALPHA) + .followedBy(Scanners.many1(IS_ALPHA_NUMERIC_)).source() + .followedBy(Scanners.many(IS_WHITESPACE)) + .followedBy(Scanners.isChar(':') + .followedBy(Scanners.many(IS_WHITESPACE))), + LITERAL_VALUE + .followedBy(Scanners.many(IS_WHITESPACE)), + (Map::entry) + ); + + private static final Parser> PARAMS = + PARAM_ENTRY.sepBy(PARAM_DELIM).map(l -> l.stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + + private static final Parser INTRINSIC_NAME = sequence( + Scanners.isChar('^'), + Scanners.many1(CharPredicates.IS_ALPHA).source(), + (unused, intrinsic) -> new IntrinsicName(intrinsic) + ); + public static final Parser INTRINSIC_PARAMS = sequence( + Scanners.isChar('{'), + Scanners.many(IS_WHITESPACE), + PARAMS.asOptional(), + Scanners.many(IS_WHITESPACE), + Scanners.isChar('}'), + (unused, unused2, params, unused3, unused4) -> new IntrinsicParams(params) + ); + + // ^intrinsic{param1:value1, param2:value2} + public static Parser INTRINSIC = + sequence( + INTRINSIC_NAME, + INTRINSIC_PARAMS, + Intrinsic::new + ); + + public static Parser INTRINSIC_OPERATION_WITH_ARGUMENTS = + sequence( + FieldParser.fieldList.followedBy(Scanners.many(IS_WHITESPACE)), + INTRINSIC.followedBy(Scanners.many(IS_WHITESPACE)), + VARIABLE_OPS.followedBy(Scanners.many(IS_WHITESPACE)), + FieldParser.fieldList.followedBy(EOF), + NAryOperation::new + ); + + public static Parser INTRINSIC_OPERATION_WITHOUT_ARGUMENTS = + sequence( + INTRINSIC.followedBy(Scanners.many(IS_WHITESPACE)), + VARIABLE_OPS.followedBy(Scanners.many(IS_WHITESPACE)), + FieldParser.fieldList.followedBy(EOF), + NAryOperation::new + ); + + public static Parser TRANSFORM_COERCE = FieldParser.fullFieldDeclaration.followedBy(EOF) + .map(UnaryOperation::new); + + public static Parser TRANSFORM_DISCARD = + sequence( + FieldParser.fullFieldDeclaration.followedBy(Scanners.many(IS_WHITESPACE)), + DISCARD.followedBy(Scanners.many(IS_WHITESPACE)).followedBy(EOF), + UnaryOperation::new + ); + + public static Parser TRANSFORM_RENAME = + sequence( + FieldParser.fullFieldDeclaration.followedBy(Scanners.many(IS_WHITESPACE)), + DISCARD.followedBy(Scanners.many(IS_WHITESPACE)), + FieldParser.fullFieldDeclaration.followedBy(EOF), + UnaryOperation::new + ); + + public static Parser TRANSFORM_COPY = + sequence( + FieldParser.fullFieldDeclaration.followedBy(Scanners.many(IS_WHITESPACE)), + RETAIN.followedBy(Scanners.many(IS_WHITESPACE)), + FieldParser.fullFieldDeclaration.followedBy(EOF), + UnaryOperation::new + ); + + public static Parser LITERAL_ASSIGNMENT = + sequence( + LITERAL_VALUE, + ASSIGNMENT.followedBy(Scanners.many(IS_WHITESPACE)), + FieldParser.fullFieldDeclaration.followedBy(EOF), + Assignment::new + ); + + public static Parser STATEMENTS = Parsers.or( + LITERAL_ASSIGNMENT, + TRANSFORM_COERCE, + TRANSFORM_DISCARD, + TRANSFORM_RENAME, + TRANSFORM_COPY, + INTRINSIC_OPERATION_WITH_ARGUMENTS, + INTRINSIC_OPERATION_WITHOUT_ARGUMENTS + ); + + public static T parse(String parse) { + return (T) BaseParser.parse(STATEMENTS, parse); + } + + public static String parseLiteral(String parse) { + return BaseParser.parse(LITERAL_VALUE.followedBy(EOF), parse); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Assignment.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Assignment.java new file mode 100644 index 0000000..c005a5e --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Assignment.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +import com.google.common.base.Joiner; +import io.clusterless.tessellate.parser.Printer; + +public class Assignment implements Statement { + String literal; + Op op; + Field result; + + public Assignment(String literal, Op op, Field result) { + this.literal = literal; + this.op = op; + this.result = result; + } + + public String literal() { + return literal; + } + + public Op op() { + return op; + } + + public Field result() { + return result; + } + + @Override + public String toString() { + return Joiner.on("") + .useForNull("") + .join( + Printer.literal(literal()), + op(), + result + ); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/model/TransformOp.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Exp.java similarity index 65% rename from tessellate-main/src/main/java/io/clusterless/tessellate/model/TransformOp.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Exp.java index 0de8e16..4c9c9cd 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/model/TransformOp.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Exp.java @@ -6,10 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.model; +package io.clusterless.tessellate.parser.ast; -import io.clusterless.tessellate.pipeline.Transforms; - -public interface TransformOp { - Transforms transform(); +public interface Exp { } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/Field.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Field.java similarity index 74% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/Field.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Field.java index e9a14ee..3f381e8 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/Field.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Field.java @@ -6,7 +6,9 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; + +import io.clusterless.tessellate.parser.Printer; import java.util.Optional; @@ -29,10 +31,6 @@ public Optional fieldType() { @Override public String toString() { - final StringBuilder sb = new StringBuilder("Field{"); - sb.append("fieldRef=").append(fieldRef); - sb.append(", fieldType=").append(fieldType); - sb.append('}'); - return sb.toString(); + return Printer.withParams(fieldRef.asComparable(), fieldType); } } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldName.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldName.java similarity index 96% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldName.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldName.java index c5587d5..2a7e26e 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldName.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldName.java @@ -6,7 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; import java.util.Objects; diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldOrdinal.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldOrdinal.java similarity index 94% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldOrdinal.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldOrdinal.java index fd5017d..13e5ada 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldOrdinal.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldOrdinal.java @@ -6,7 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; public class FieldOrdinal implements FieldRef { Integer ordinal; diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldRef.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldRef.java similarity index 89% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldRef.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldRef.java index f03fb5f..60590d0 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldRef.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldRef.java @@ -6,7 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; public interface FieldRef { diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldType.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldType.java similarity index 74% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldType.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldType.java index d0455a3..35f5b40 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldType.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldType.java @@ -6,7 +6,9 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; + +import io.clusterless.tessellate.parser.Printer; import java.util.Optional; @@ -29,10 +31,6 @@ public Optional param() { @Override public String toString() { - final StringBuilder sb = new StringBuilder("FieldType{"); - sb.append("name='").append(name).append('\''); - sb.append(", param=").append(param); - sb.append('}'); - return sb.toString(); + return Printer.withParams(name, param); } } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldTypeName.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldTypeName.java similarity index 80% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldTypeName.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldTypeName.java index 3afa974..67934ca 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldTypeName.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldTypeName.java @@ -6,7 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; import java.util.Objects; @@ -36,9 +36,6 @@ public int hashCode() { @Override public String toString() { - final StringBuilder sb = new StringBuilder("FieldName{"); - sb.append("name='").append(name).append('\''); - sb.append('}'); - return sb.toString(); + return name; } } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldTypeParam.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldTypeParam.java similarity index 75% rename from tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldTypeParam.java rename to tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldTypeParam.java index 55f7c7d..31b0b12 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/FieldTypeParam.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/FieldTypeParam.java @@ -6,7 +6,9 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -package io.clusterless.tessellate.parser; +package io.clusterless.tessellate.parser.ast; + +import io.clusterless.tessellate.parser.Printer; import java.util.Optional; @@ -29,9 +31,6 @@ public String param2() { @Override public String toString() { - final StringBuilder sb = new StringBuilder("FieldTypeParam{"); - sb.append("param='").append(param1).append('\''); - sb.append('}'); - return sb.toString(); + return Printer.withParams(param1, param2); } } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Intrinsic.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Intrinsic.java new file mode 100644 index 0000000..8b1261c --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Intrinsic.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +public class Intrinsic implements Exp { + IntrinsicName name; + IntrinsicParams params; + + public Intrinsic(IntrinsicName name, IntrinsicParams params) { + this.name = name; + this.params = params; + } + + public IntrinsicName name() { + return name; + } + + public IntrinsicParams params() { + return params; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Intrinsic{"); + sb.append("name=").append(name); + sb.append(", params=").append(params); + sb.append('}'); + return sb.toString(); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicName.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicName.java new file mode 100644 index 0000000..9c21046 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicName.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +public class IntrinsicName { + String name; + + public IntrinsicName(String name) { + this.name = name; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("IntrinsicName{"); + sb.append("name='").append(name).append('\''); + sb.append('}'); + return sb.toString(); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicParams.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicParams.java new file mode 100644 index 0000000..2b88a8d --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/IntrinsicParams.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +import java.util.Map; +import java.util.Optional; + +public class IntrinsicParams { + Map params; + + public IntrinsicParams(Optional> params) { + this.params = params.orElse(null); + } + + public Map params() { + return params; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("IntrinsicParams{"); + sb.append("params=").append(params); + sb.append('}'); + return sb.toString(); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/NAryOperation.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/NAryOperation.java new file mode 100644 index 0000000..6a30aa5 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/NAryOperation.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +import java.util.List; + +public class NAryOperation extends Operation { + public NAryOperation(List arguments, Exp exp, Op op, List results) { + super(arguments, exp, op, results); + } + + public NAryOperation(Exp exp, Op op, List results) { + super(exp, op, results); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Op.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Op.java new file mode 100644 index 0000000..1c0f633 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Op.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +import java.util.Objects; + +public class Op { + String op = ""; + + public Op() { + } + + public Op(String op) { + this.op = op; + } + + public String op() { + return op; + } + + @Override + public String toString() { + return op; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Op op1 = (Op) o; + return Objects.equals(op, op1.op); + } + + @Override + public int hashCode() { + return Objects.hash(op); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Operation.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Operation.java new file mode 100644 index 0000000..576b770 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Operation.java @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +import com.google.common.base.Joiner; +import io.clusterless.tessellate.parser.Printer; + +import java.util.Collections; +import java.util.List; + +public class Operation implements Statement { + List arguments = Collections.emptyList(); + Exp exp; + Op op = new Op(); + List results = Collections.emptyList(); + + public Operation(List arguments, Exp exp, Op op, List results) { + this.arguments = arguments; + this.exp = exp; + this.op = op; + this.results = results; + } + + public Operation(Exp exp, Op op, List results) { + this.exp = exp; + this.op = op; + this.results = results; + } + + public Operation(Field field) { + this.arguments = List.of(field); + } + + public Operation(Field argument, Op op, Field result) { + this.arguments = List.of(argument); + this.op = op; + this.results = List.of(result); + } + + public Operation(Field arguments, Op op) { + this.arguments = List.of(arguments); + this.op = op; + } + + public List arguments() { + return arguments; + } + + public T exp() { + return (T) exp; + } + + public Op op() { + return op; + } + + public List results() { + return results; + } + + @Override + public String toString() { + return Joiner.on("") + .useForNull("") + .join( + Printer.fields(arguments), + exp(), + op(), + Printer.fields(results) + ); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Statement.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Statement.java new file mode 100644 index 0000000..5d9c69a --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/Statement.java @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +public interface Statement { + + Op op(); + + default boolean isOperation() { + return this instanceof Operation; + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/UnaryOperation.java b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/UnaryOperation.java new file mode 100644 index 0000000..71290e3 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/parser/ast/UnaryOperation.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser.ast; + +public class UnaryOperation extends Operation { + public UnaryOperation(Field argument, Op op, Field result) { + super(argument, op, result); + } + + public UnaryOperation(Field arguments, Op op) { + super(arguments, op); + } + + public UnaryOperation(Field field) { + super(field); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Pipeline.java b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Pipeline.java index 74f64e9..1f2b010 100644 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Pipeline.java +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Pipeline.java @@ -14,21 +14,20 @@ import cascading.flow.local.LocalFlowProcess; import cascading.flow.stream.duct.DuctException; import cascading.operation.Debug; -import cascading.operation.Insert; import cascading.operation.regex.RegexParser; import cascading.pipe.Each; import cascading.pipe.Pipe; import cascading.pipe.assembly.Coerce; import cascading.pipe.assembly.Copy; -import cascading.pipe.assembly.Discard; -import cascading.pipe.assembly.Rename; import cascading.tap.Tap; import cascading.tuple.Fields; -import cascading.tuple.coerce.Coercions; import io.clusterless.tessellate.factory.*; -import io.clusterless.tessellate.model.*; +import io.clusterless.tessellate.model.Partition; +import io.clusterless.tessellate.model.PipelineDef; +import io.clusterless.tessellate.model.Schema; import io.clusterless.tessellate.options.PipelineOptions; import io.clusterless.tessellate.options.PrintOptions; +import io.clusterless.tessellate.parser.ast.Statement; import io.clusterless.tessellate.printer.SchemaPrinter; import io.clusterless.tessellate.util.Format; import io.clusterless.tessellate.util.Models; @@ -123,68 +122,23 @@ public void build() throws IOException { } // get source fields here so that any partition fields will be captured - Fields currentFields = sourceTap.getSourceFields(); - logCurrentFields(currentFields); + PipelineContext context = new PipelineContext(LOG, sourceTap.getSourceFields(), new Pipe("head")); - Pipe pipe = new Pipe("head"); + logCurrentFields(context.currentFields); Schema sourceSchema = pipelineDef.source().schema(); if (sourceSchema.format() == Format.regex) { Fields declaredFields = Models.fieldAsFields(sourceSchema.declared(), String.class, Fields.ALL); - pipe = new Each(pipe, new Fields("line"), new RegexParser(declaredFields, sourceSchema.pattern()), Fields.SWAP); + Pipe pipe = new Each(context.pipe, new Fields("line"), new RegexParser(declaredFields, sourceSchema.pattern()), Fields.SWAP); LOG.info("parsing lines with regex: {}", sourceSchema.pattern()); - currentFields = currentFields.subtract(new Fields("line")).append(declaredFields); + Fields currentFields = context.currentFields.subtract(new Fields("line")).append(declaredFields); logCurrentFields(currentFields); + context.update(currentFields, pipe); } // todo: group like transforms together if there are no interdependencies - for (TransformOp transformOp : pipelineDef.transform().transformOps()) { - switch (transformOp.transform()) { - case insert: - InsertOp insertOp = (InsertOp) transformOp; - Fields insertFields = insertOp.field().fields(); - String value = insertOp.value() == null || ((String) insertOp.value()).isEmpty() ? null : insertOp.value(); - Object literal = Coercions.coerce(value, insertFields.getType(0)); - LOG.info("transform insert: fields: {}, value: {}", insertFields, literal); - pipe = new Each(pipe, new Insert(insertFields, literal), Fields.ALL); - currentFields = currentFields.append(insertFields); - logCurrentFields(currentFields); - break; - case coerce: - CoerceOp coerceOp = (CoerceOp) transformOp; - Fields coerceFields = coerceOp.field().fields(); - LOG.info("transform coerce: fields: {}", coerceFields); - pipe = new Coerce(pipe, coerceFields); - currentFields = currentFields.rename(coerceFields, coerceFields); // change the type information - logCurrentFields(currentFields); - break; - case copy: - CopyOp copyOp = (CopyOp) transformOp; - Fields copyFromFields = copyOp.from().orElseThrow().fields(); - Fields copyToFields = copyOp.to().fields(); - LOG.info("transform copy: from: {}, to: {}", copyFromFields, copyToFields); - pipe = new Copy(pipe, copyFromFields, copyToFields); - currentFields = currentFields.append(copyToFields); - logCurrentFields(currentFields); - break; - case rename: - RenameOp renameOp = (RenameOp) transformOp; - Fields renameFromFields = renameOp.from().orElseThrow().fields(); - Fields renameToFields = renameOp.to().fields(); - LOG.info("transform rename: from: {}, to: {}", renameFromFields, renameToFields); - pipe = new Rename(pipe, renameFromFields, renameToFields); - currentFields = currentFields.rename(renameFromFields, renameToFields); - logCurrentFields(currentFields); - break; - case discard: - DiscardOp discardOp = (DiscardOp) transformOp; - Fields discardFields = discardOp.field().fields(); - LOG.info("transform discard: fields: {}", discardFields); - pipe = new Discard(pipe, discardFields); - currentFields = currentFields.subtract(discardFields); - logCurrentFields(currentFields); - break; - } + for (Statement statement : pipelineDef.transform().statements()) { + context = new Transformer(statement).resolve(context); } Fields partitionFields = Fields.NONE; @@ -193,14 +147,16 @@ public void build() throws IOException { // todo: honor the -> and +> operators when declaring partitions for (Partition partition : pipelineDef().sink().partitions()) { if (partition.from().isPresent()) { - pipe = new Copy(pipe, partition.from().get().fields(), partition.to().fields()); + Pipe pipe = new Copy(context.pipe, partition.from().get().fields(), partition.to().fields()); partitionFields = partitionFields.append(partition.to().fields()); - } else if (currentFields.contains(partition.to().fields())) { + context.update(context.currentFields, pipe); + } else if (context.currentFields.contains(partition.to().fields())) { partitionFields = partitionFields.append(partition.to().fields()); } else { - pipe = new Coerce(pipe, partition.to().fields()); + Pipe pipe = new Coerce(context.pipe, partition.to().fields()); // change the type information partitionFields = partitionFields.rename(partition.to().fields(), partition.to().fields()); + context.update(context.currentFields, pipe); } } } @@ -209,22 +165,22 @@ public void build() throws IOException { // watch the progress on the console if (pipelineOptions().debug()) { - pipe = new Each(pipe, new Debug(true)); + context.update(context.currentFields, new Each(context.pipe, new Debug(true))); } - LOG.info("sinking into fields: {}", currentFields); + LOG.info("sinking into fields: {}", context.currentFields); SinkFactory sinkFactory = TapFactories.findSinkFactory(pipelineDef.sink()); sinkFactory.applyGlobalProperties(commonProperties); - Tap sinkTap = sinkFactory.getSink(pipelineOptions, pipelineDef.sink(), currentFields); + Tap sinkTap = sinkFactory.getSink(pipelineOptions, pipelineDef.sink(), context.currentFields); flow = new LocalFlowConnector(commonProperties).connect(flowDef() .setName("pipeline") - .addSource(pipe, sourceTap) - .addSink(pipe, sinkTap) - .addTail(pipe)); + .addSource(context.pipe, sourceTap) + .addSink(context.pipe, sinkTap) + .addTail(context.pipe)); state = State.READY; } diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/PipelineContext.java b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/PipelineContext.java new file mode 100644 index 0000000..3bc2d62 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/PipelineContext.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.pipeline; + +import cascading.pipe.Pipe; +import cascading.tuple.Fields; +import org.slf4j.Logger; + + +public class PipelineContext { + public final Logger log; + public Fields currentFields; + public Pipe pipe; + + public PipelineContext(Logger log, Fields currentFields, Pipe pipe) { + this.log = log; + this.currentFields = currentFields; + this.pipe = pipe; + } + + public PipelineContext update(Fields currentFields, Pipe pipe) { + this.currentFields = currentFields; + this.pipe = pipe; + logCurrentFields(this.currentFields); + + return this; + } + + public void logCurrentFields(Fields currentFields) { + log.info("current fields: {}", currentFields); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transformer.java b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transformer.java new file mode 100644 index 0000000..8c1d109 --- /dev/null +++ b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transformer.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.pipeline; + +import cascading.operation.Insert; +import cascading.pipe.Each; +import cascading.pipe.Pipe; +import cascading.pipe.assembly.Coerce; +import cascading.pipe.assembly.Copy; +import cascading.pipe.assembly.Discard; +import cascading.pipe.assembly.Rename; +import cascading.tuple.Fields; +import cascading.tuple.coerce.Coercions; +import io.clusterless.tessellate.parser.FieldsParser; +import io.clusterless.tessellate.parser.ast.*; + +import java.util.List; + +public class Transformer { + private final FieldsParser fieldsParser; + Statement statement; + + public Transformer(Statement statement) { + this.statement = statement; + fieldsParser = FieldsParser.INSTANCE; + } + + PipelineContext resolve(PipelineContext context) { + + switch (statement.op().op()) { + case "": + return handleCoerce(context); + + case "=>": + return handleAssignment(context); + + case "+>": + return copyAndEval(context); + + case "->": + return discardAndEval(context); + + default: + throw new IllegalStateException("Unexpected value: " + statement.op().op()); + } + } + + private PipelineContext discardAndEval(PipelineContext context) { + Operation operation = (Operation) statement; + + if (operation.exp() != null) { + throw new UnsupportedOperationException("unsupported: " + operation); + } + + Fields fromFields = fieldsParser.asFields(operation.arguments()); + Fields toFields = fieldsParser.asFields(operation.results()); + + if (toFields.isNone()) { + context.log.info("transform discard: fields: {}", fromFields); + Pipe pipe = new Discard(context.pipe, fromFields); + Fields currentFields = context.currentFields.subtract(fromFields); + return context.update(currentFields, pipe); + } else { + context.log.info("transform rename: from: {}, to: {}", fromFields, toFields); + Pipe pipe = new Rename(context.pipe, fromFields, toFields); + Fields currentFields = context.currentFields.rename(fromFields, toFields); + return context.update(currentFields, pipe); + } + } + + private PipelineContext copyAndEval(PipelineContext context) { + Operation operation = (Operation) statement; + + if (operation.exp() != null) { + throw new UnsupportedOperationException("unsupported: " + operation); + } + + Fields fromFields = fieldsParser.asFields(operation.arguments()); + Fields toFields = fieldsParser.asFields(operation.results()); + context.log.info("transform copy: from: {}, to: {}", fromFields, toFields); + Pipe pipe = new Copy(context.pipe, fromFields, toFields); + Fields currentFields = context.currentFields.append(toFields); + return context.update(currentFields, pipe); + } + + private PipelineContext handleCoerce(PipelineContext context) { + List arguments = ((UnaryOperation) statement).arguments(); + Fields coerceFields = fieldsParser.asFields(arguments); + context.log.info("transform coerce: fields: {}", coerceFields); + Pipe pipe = new Coerce(context.pipe, coerceFields); + Fields currentFields = context.currentFields.rename(coerceFields, coerceFields); // change the type information + + return context.update(currentFields, pipe); + } + + private PipelineContext handleAssignment(PipelineContext context) { + String value = ((Assignment) statement).literal(); + Fields insertFields = fieldsParser.asFields(((Assignment) statement).result(), null); + Object literal = Coercions.coerce(value, insertFields.getType(0)); + context.log.info("transform insert: fields: {}, value: {}", insertFields, literal); + Pipe pipe = new Each(context.pipe, new Insert(insertFields, literal), Fields.ALL); + Fields currentFields = context.currentFields.append(insertFields); + + return context.update(currentFields, pipe); + } +} diff --git a/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transforms.java b/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transforms.java deleted file mode 100644 index 33c5953..0000000 --- a/tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/Transforms.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -package io.clusterless.tessellate.pipeline; - -import io.clusterless.tessellate.model.*; - -import java.util.function.Function; - -public enum Transforms { - insert("=>", "^.+[=]>.+$", InsertOp::new), - copy("+>", "^.+[+]>.+$", CopyOp::new), - rename("->", "^.+[-]>.+$", RenameOp::new), - discard("->", "^.+[-]>$", DiscardOp::new), - coerce("", "^.+$", CoerceOp::new); - - private final String operator; - private final String match; - private final Function transform; - - Transforms(String operator, String match, Function transform) { - this.operator = operator; - this.match = match; - this.transform = transform; - } - - public String operator() { - return operator; - } - - public boolean matches(String expression) { - return expression.matches(match); - } - - public TransformOp transform(String expression) { - return transform.apply(expression); - } -} diff --git a/tessellate-main/src/test/java/io/clusterless/tessellate/parser/FieldsParserTest.java b/tessellate-main/src/test/java/io/clusterless/tessellate/parser/FieldParserTest.java similarity index 98% rename from tessellate-main/src/test/java/io/clusterless/tessellate/parser/FieldsParserTest.java rename to tessellate-main/src/test/java/io/clusterless/tessellate/parser/FieldParserTest.java index 143009c..09c9d6e 100644 --- a/tessellate-main/src/test/java/io/clusterless/tessellate/parser/FieldsParserTest.java +++ b/tessellate-main/src/test/java/io/clusterless/tessellate/parser/FieldParserTest.java @@ -13,7 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; -public class FieldsParserTest { +public class FieldParserTest { @Test void parseFields() { assertNotNull(FieldParser.parseField("@field")); diff --git a/tessellate-main/src/test/java/io/clusterless/tessellate/parser/StatementParserTest.java b/tessellate-main/src/test/java/io/clusterless/tessellate/parser/StatementParserTest.java new file mode 100644 index 0000000..bc03838 --- /dev/null +++ b/tessellate-main/src/test/java/io/clusterless/tessellate/parser/StatementParserTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023 Chris K Wensel . All Rights Reserved. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +package io.clusterless.tessellate.parser; + +import io.clusterless.tessellate.parser.ast.Intrinsic; +import io.clusterless.tessellate.parser.ast.Operation; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class StatementParserTest { + + @Test + void literals() { + assertThat(StatementParser.parseLiteral("foo")).isEqualTo("foo"); + assertThat(StatementParser.parseLiteral(" foo ")).isEqualTo("foo"); + assertThat(StatementParser.parseLiteral("\"f oo\"")).isEqualTo("f oo"); + assertThat(StatementParser.parseLiteral(" \"f oo\" ")).isEqualTo("f oo"); + assertThat(StatementParser.parseLiteral("\"f\\\"oo\"")).isEqualTo("f\"oo"); + assertThat(StatementParser.parseLiteral(" \"f\\\"oo\" ")).isEqualTo("f\"oo"); + assertThat(StatementParser.parseLiteral("'f oo'")).isEqualTo("f oo"); + assertThat(StatementParser.parseLiteral(" 'f oo' ")).isEqualTo("f oo"); + assertThat(StatementParser.parseLiteral("'f''oo'")).isEqualTo("f'oo"); + assertThat(StatementParser.parseLiteral(" 'f''oo' ")).isEqualTo("f'oo"); + assertThat(StatementParser.parseLiteral("'f{oo'")).isEqualTo("f{oo"); + assertThat(StatementParser.parseLiteral("'f}oo'")).isEqualTo("f}oo"); + assertThat(StatementParser.parseLiteral("'f:oo'")).isEqualTo("f:oo"); + } + + @Test + void parse() { + assertNotNull(StatementParser.parse("fromField1+fromField2+fromFieldN ^siphash{} +> intoField|type")); + assertNotNull(StatementParser.parse("fromField1 + fromField2 + fromFieldN ^siphash{} +> intoField|type")); + assertNotNull(StatementParser.parse("^tsid{node:1,nodeCount:10,signed:true,epoch:123} +> intoField|type")); + + assertNotNull(StatementParser.parse("fromField1+fromField2+fromFieldN ^siphash{} -> intoField|type")); + assertNotNull(StatementParser.parse("fromField1 + fromField2 + fromFieldN ^siphash{} -> intoField|type")); + assertNotNull(StatementParser.parse("fromField1 + fromField2 + fromFieldN ^siphash{prefix:'{:}'} -> intoField|type")); + assertNotNull(StatementParser.parse("^tsid{node:1,nodeCount:10,signed:true,epoch:123} -> intoField|type")); + + assertNotNull(StatementParser.parse("five => intoField|type")); + } + + @Test + void transforms() { + assertNotNull(StatementParser.parse("one")); + assertNotNull(StatementParser.parse("one|string")); + assertNotNull(StatementParser.parse("two->@two")); + assertNotNull(StatementParser.parse("three+>@three|DateTime|yyyyMMdd")); + assertNotNull(StatementParser.parse("four->")); + assertNotNull(StatementParser.parse("\"five\"=>_five")); + assertNotNull(StatementParser.parse("five=>_five")); + assertNotNull(StatementParser.parse("1689820455=>six|DateTime|yyyyMMdd")); + } + + @Test + void intrinsic() { + Operation operation = StatementParser.parse("fromField1+fromField2+fromFieldN ^siphash{prefix:\"{sip-\",postfix:\"-xx\",returnNull:true} +> intoField|string"); + assertThat(operation.arguments()) + .hasSize(3); + assertThat(operation + .exp() + .params() + .params()) + .containsEntry("prefix", "{sip-") // confirm { is retained + .containsEntry("postfix", "-xx") + .containsEntry("returnNull", "true"); + + assertThat(operation.results()) + .hasSize(1); + } +} diff --git a/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineOptionsMergerTest.java b/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineOptionsMergerTest.java index 604b79c..e6fc2a8 100644 --- a/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineOptionsMergerTest.java +++ b/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineOptionsMergerTest.java @@ -10,10 +10,10 @@ import com.adelean.inject.resources.junit.jupiter.GivenTextResource; import com.adelean.inject.resources.junit.jupiter.TestWithResources; -import io.clusterless.tessellate.model.InsertOp; import io.clusterless.tessellate.model.PipelineDef; import io.clusterless.tessellate.options.PipelineOptions; import io.clusterless.tessellate.options.PipelineOptionsMerge; +import io.clusterless.tessellate.parser.ast.Assignment; import io.clusterless.tessellate.util.JSONUtil; import org.junit.jupiter.api.Test; @@ -26,7 +26,7 @@ @TestWithResources public class PipelineOptionsMergerTest { @Test - void usingOptions(@GivenTextResource("/config/pipeline.json") String pipelineJson) throws IOException { + void usingOptions(@GivenTextResource("/config/pipeline-mvel.json") String pipelineJson) throws IOException { List inputs = List.of(URI.create("s3://foo/input")); URI output = URI.create("s3://foo/output"); @@ -41,7 +41,7 @@ void usingOptions(@GivenTextResource("/config/pipeline.json") String pipelineJso assertEquals(inputs, merged.source().inputs()); assertEquals(output, merged.sink().output()); - assertEquals("1689820455", ((InsertOp) merged.transform().transformOps().get(5)).value()); + assertEquals("1689820455", ((Assignment) merged.transform().statements().get(5)).literal()); } @Test diff --git a/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineParseTest.java b/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineParseTest.java index 7c71348..3fbbf41 100644 --- a/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineParseTest.java +++ b/tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineParseTest.java @@ -15,7 +15,10 @@ import com.adelean.inject.resources.junit.jupiter.GivenTextResource; import com.adelean.inject.resources.junit.jupiter.TestWithResources; import com.fasterxml.jackson.core.JsonProcessingException; -import io.clusterless.tessellate.model.*; +import io.clusterless.tessellate.model.Partition; +import io.clusterless.tessellate.model.PipelineDef; +import io.clusterless.tessellate.model.Transform; +import io.clusterless.tessellate.parser.ast.Op; import io.clusterless.tessellate.temporal.IntervalDateTimeFormatter; import io.clusterless.tessellate.type.WrappedCoercibleType; import io.clusterless.tessellate.util.JSONUtil; @@ -25,7 +28,6 @@ import java.util.TimeZone; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; @TestWithResources public class PipelineParseTest { @@ -49,12 +51,12 @@ void name(@GivenTextResource("config/pipeline.json") String pipelineJson) throws Transform transform = pipeline.transform(); - assertEquals(6, transform.transformOps().size()); - assertInstanceOf(CoerceOp.class, transform.transformOps().get(0)); - assertInstanceOf(RenameOp.class, transform.transformOps().get(1)); - assertInstanceOf(CopyOp.class, transform.transformOps().get(2)); - assertInstanceOf(DiscardOp.class, transform.transformOps().get(3)); - assertInstanceOf(InsertOp.class, transform.transformOps().get(4)); - assertInstanceOf(InsertOp.class, transform.transformOps().get(5)); + assertEquals(6, transform.statements().size()); + assertEquals(new Op(), transform.statements().get(0).op()); + assertEquals(new Op("->"), transform.statements().get(1).op()); + assertEquals(new Op("+>"), transform.statements().get(2).op()); + assertEquals(new Op("->"), transform.statements().get(3).op()); + assertEquals(new Op("=>"), transform.statements().get(4).op()); + assertEquals(new Op("=>"), transform.statements().get(5).op()); } } diff --git a/tessellate-main/src/test/resources/config/pipeline-mvel.json b/tessellate-main/src/test/resources/config/pipeline-mvel.json new file mode 100644 index 0000000..a56c243 --- /dev/null +++ b/tessellate-main/src/test/resources/config/pipeline-mvel.json @@ -0,0 +1,29 @@ +{ + "source": { + "inputs": [ + "s3://bucket/path" + ], + "schema": { + "declared": [ + "one|int", + "two|int", + "three|int|-", + "four|Instant|twelfths" + ], + "format": "csv" + }, + "partitions": [ + "one", + "two+>@two", + "three+>@three|DateTime|yyyyMMdd" + ] + }, + "transform": [ + "one", + "two->@two", + "three+>@three|DateTime|yyyyMMdd", + "four->", + "five=>_five", + "@{1689820455}=>six|DateTime|yyyyMMdd" + ] +} diff --git a/tessellate-main/src/test/resources/config/pipeline.json b/tessellate-main/src/test/resources/config/pipeline.json index a56c243..6654804 100644 --- a/tessellate-main/src/test/resources/config/pipeline.json +++ b/tessellate-main/src/test/resources/config/pipeline.json @@ -24,6 +24,6 @@ "three+>@three|DateTime|yyyyMMdd", "four->", "five=>_five", - "@{1689820455}=>six|DateTime|yyyyMMdd" + "1689820455=>six|DateTime|yyyyMMdd" ] }