Skip to content

Commit

Permalink
parser for field declarations
Browse files Browse the repository at this point in the history
  • Loading branch information
cwensel committed Sep 20, 2023
1 parent 9b00969 commit e42b858
Show file tree
Hide file tree
Showing 13 changed files with 422 additions and 16 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ Usage:
- `field-hash` is a hash of the schema: field names, and field types
- `guid` is a random UUID or a provided value

### Supported single field transforms
### Supported operations

#### Transforms

- insert - insert a literal value into a field
- `value=>intoField|type`
Expand All @@ -104,6 +106,11 @@ Usage:
- discard - remove a field
- `field->`

#### Functions

- tsid - create a unique long id
- `!tsid{node:...,nodeCount:...,signed:true/false,epoch:...}+>intoField|type`

### Supported types

- `String`
Expand All @@ -115,7 +122,7 @@ Usage:
- `Float`
- `double` - `null` coerced to `0`
- `Double`
- `boolean` - `null` coerced to `0`
- `boolean` - `null` coerced to `false`
- `Boolean`
- `DateTime|format` - canonical type is `Long`, format defaults to `yyyy-MM-dd HH:mm:ss.SSSSSS z`
- `Instant|format` - canonical type is `java.time.Instant`, supports nanos precision, format defaults to ISO-8601
Expand Down
2 changes: 2 additions & 0 deletions tessellate-main/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ dependencies {
implementation("org.fusesource.jansi:jansi:2.4.0")
implementation("com.github.hal4j:uritemplate:1.3.1")

implementation("org.jparsec:jparsec:3.1")

testImplementation("net.wensel:cascading-core:$cascading:tests")

// https://github.com/hosuaby/inject-resources
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import cascading.tuple.Fields;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import io.clusterless.tessellate.util.FieldsParser;
import io.clusterless.tessellate.parser.FieldsParser;

import java.util.Objects;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

import java.util.Optional;

public class Field {
FieldRef fieldRef;
FieldType fieldType;

public Field(FieldRef fieldRef, Optional<FieldType> fieldType) {
this.fieldRef = fieldRef;
this.fieldType = fieldType.orElse(null);
}

public FieldRef fieldRef() {
return fieldRef;
}

public Optional<FieldType> fieldType() {
return Optional.ofNullable(fieldType);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("Field{");
sb.append("fieldRef=").append(fieldRef);
sb.append(", fieldType=").append(fieldType);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

import java.util.Objects;

public class FieldName implements FieldRef {
String name;

public FieldName(CharSequence name) {
this.name = name.toString();
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FieldName fieldName = (FieldName) o;
return Objects.equals(name, fieldName.name);
}

@Override
public int hashCode() {
return Objects.hash(name);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FieldName{");
sb.append("name='").append(name).append('\'');
sb.append('}');
return sb.toString();
}

@Override
public boolean isOrdinal() {
return false;
}

@Override
public Comparable<?> asComparable() {
return name;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

public class FieldOrdinal implements FieldRef {
Integer ordinal;

public FieldOrdinal(CharSequence ordinal) {
this.ordinal = Integer.parseInt(ordinal.toString());
}

@Override
public boolean isOrdinal() {
return true;
}

@Override
public Comparable<?> asComparable() {
return ordinal;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FieldOrdinal{");
sb.append("ordinal=").append(ordinal);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

import org.jparsec.Parser;
import org.jparsec.Parsers;
import org.jparsec.Scanners;
import org.jparsec.error.ParserException;
import org.jparsec.pattern.CharPredicate;
import org.jparsec.pattern.CharPredicates;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.Optional;

import static org.jparsec.pattern.CharPredicates.*;


public class FieldParser {
private static final Logger LOG = LoggerFactory.getLogger(FieldParser.class);
private static final CharPredicate FIELD_NAME_EXTRA = CharPredicates.among("~@#$%^&_");
private static final CharPredicate PARAM_EXTRA = CharPredicates.not(among("|+")); // | delimits params, + delimits fields
private static final Parser<FieldName> FIELD_NAME =
Scanners.isChar(
or(CharPredicates.IS_ALPHA, FIELD_NAME_EXTRA)
)
.followedBy(Scanners.many(
or(CharPredicates.IS_ALPHA_NUMERIC, FIELD_NAME_EXTRA)
))
.source()
.map(FieldName::new);

private static final Parser<FieldOrdinal> FIELD_ORDINAL =
Scanners.many1(
CharPredicates.range('0', '9')
)
.source()
.map(FieldOrdinal::new);

private static final Parser<FieldTypeName> TYPE_NAME =
Scanners.isChar(CharPredicates.IS_ALPHA)
.followedBy(Scanners.many(CharPredicates.IS_ALPHA_NUMERIC_))
.source()
.map(FieldTypeName::new);

private static final Parser<String> TYPE_PARAM =
Scanners.isChar(
or(
CharPredicates.IS_ALPHA,
CharPredicates.isChar('-') // null token for primitive values
)
)
.followedBy(Scanners.many(or(CharPredicates.IS_ALPHA_NUMERIC_, PARAM_EXTRA)))
.source();

private static final Parser<Optional<FieldTypeParam>> typeParam =
Parsers.sequence(
Scanners.isChar('|'),
TYPE_PARAM,
Parsers.sequence(
Scanners.isChar('|'),
TYPE_PARAM
)
.asOptional(),
(unused, s, d) -> new FieldTypeParam(s, d)
)
.asOptional();
private static final Parser<FieldTypeName> typeName = Parsers.sequence(Scanners.isChar('|'), TYPE_NAME);

private static final Parser<FieldType> types =
Parsers.sequence(typeName, typeParam, FieldType::new);

private static final Parser<Field> fullFieldDeclaration =
Parsers.sequence(Parsers.or(FIELD_NAME, FIELD_ORDINAL), types.asOptional(), Field::new);

private static final Parser<Void> FIELD_DELIM = Parsers.sequence(Scanners.many(IS_WHITESPACE), Scanners.isChar('+'), Scanners.many(IS_WHITESPACE));
private static final Parser<List<Field>> fieldList =
fullFieldDeclaration.sepBy(FIELD_DELIM);

public static Field parseField(String field) {
try {
return fullFieldDeclaration.parse(field, Parser.Mode.DEBUG);
} catch (ParserException e) {
LOG.error("unable to parse: {}, got: {}, tree: {}", field, e.getMessage(), e.getParseTree());

throw new RuntimeException(e);
}
}

public static List<Field> parseFieldList(String field) {
try {
return fieldList.parse(field, Parser.Mode.DEBUG);
} catch (ParserException e) {
LOG.error("unable to parse: {}, got: {}, tree: {}", field, e.getMessage(), e.getParseTree());

throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

public interface FieldRef {

boolean isOrdinal();

Comparable<?> asComparable();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

import java.util.Optional;

public class FieldType {
FieldTypeName name;
FieldTypeParam param;

public FieldType(FieldTypeName name, Optional<FieldTypeParam> param) {
this.name = name;
this.param = param.orElse(null);
}

public FieldTypeName name() {
return name;
}

public Optional<FieldTypeParam> param() {
return Optional.ofNullable(param);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FieldType{");
sb.append("name='").append(name).append('\'');
sb.append(", param=").append(param);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 Chris K Wensel <[email protected]>. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

package io.clusterless.tessellate.parser;

import java.util.Objects;

public class FieldTypeName {
String name;

public FieldTypeName(CharSequence name) {
this.name = name.toString();
}

public String name() {
return name;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FieldTypeName fieldName = (FieldTypeName) o;
return Objects.equals(name, fieldName.name);
}

@Override
public int hashCode() {
return Objects.hash(name);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FieldName{");
sb.append("name='").append(name).append('\'');
sb.append('}');
return sb.toString();
}
}
Loading

0 comments on commit e42b858

Please sign in to comment.