Skip to content

Commit

Permalink
RD-9091: XML reader (#97)
Browse files Browse the repository at this point in the history
Co-authored-by: alexzerntev <[email protected]>
  • Loading branch information
bgaidioz and alexzerntev authored Aug 23, 2023
1 parent 9292167 commit b976f37
Show file tree
Hide file tree
Showing 48 changed files with 2,851 additions and 111 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,245 @@

package raw.compiler.rql2.truffle.builtin

import com.oracle.truffle.api.frame.FrameDescriptor
import raw.compiler.base.source.Type
import raw.compiler.rql2.Rql2TypeUtils.removeProp
import raw.compiler.rql2.builtin.{ParseXmlEntry, ReadXmlEntry}
import raw.compiler.rql2.source._
import raw.compiler.rql2.truffle.{TruffleArg, TruffleEntryExtension}
import raw.runtime.truffle.ExpressionNode
import raw.runtime.truffle.ast.ProgramExpressionNode
import raw.runtime.truffle.ast.expressions.literals.StringNode
import raw.runtime.truffle.ast.io.json.reader.TryableTopLevelWrapper
import raw.runtime.truffle.ast.io.xml.parser._
import raw.runtime.truffle.{ExpressionNode, RawLanguage}
import raw.runtime.truffle.ast.expressions.iterable.list.{ListFromNodeGen, ListFromUnsafeNodeGen}
import raw.runtime.truffle.ast.expressions.option.OptionSomeNodeGen

class TruffleReadXmlEntry extends ReadXmlEntry with TruffleEntryExtension {
override def toTruffle(t: Type, args: Seq[TruffleArg]): ExpressionNode = ???
override def toTruffle(t: Type, args: Seq[TruffleArg]): ExpressionNode = {

val (unnamedArgs, namedArgs) = args.partition(_.idn.isEmpty)
val encoding =
namedArgs.collectFirst { case arg if arg.idn.contains("encoding") => arg.e }.getOrElse(new StringNode("utf-8"))
val timeFormatExp = namedArgs
.collectFirst { case arg if arg.idn.contains("timeFormat") => arg.e }
.getOrElse(new StringNode("HH:mm[:ss[.SSS]]"))
val dateFormatExp = namedArgs
.collectFirst { case arg if arg.idn.contains("dateFormat") => arg.e }
.getOrElse(new StringNode("yyyy-M-d"))
val timestampFormatExp = namedArgs
.collectFirst { case arg if arg.idn.contains("timestampFormat") => arg.e }
.getOrElse(new StringNode("yyyy-M-d['T'][ ]HH:mm[:ss[.SSS]]"))

t match {
case Rql2IterableType(innerType, props) =>
val parseNode = new XmlReadCollectionNode(
unnamedArgs.head.e,
encoding,
dateFormatExp,
timeFormatExp,
timestampFormatExp,
XmlRecurse
.recurseXmlParser(innerType.asInstanceOf[Rql2TypeWithProperties])
)
if (props.contains(Rql2IsTryableTypeProperty())) {
// Probably will need to be either reused in json and xml or create a copy
new TryableTopLevelWrapper(parseNode)
} else {
parseNode
}
case Rql2ListType(innerType: Rql2Type, props) =>
val innerParser = new XmlReadCollectionNode(
unnamedArgs.head.e,
encoding,
dateFormatExp,
timeFormatExp,
timestampFormatExp,
XmlRecurse
.recurseXmlParser(innerType.asInstanceOf[Rql2TypeWithProperties])
)
if (props.contains(Rql2IsTryableTypeProperty())) {
ListFromNodeGen.create(innerParser, innerType)
} else {
ListFromUnsafeNodeGen.create(innerParser, innerType)
}
case _ =>
val parseNode = new XmlReadValueNode(
unnamedArgs.head.e,
encoding,
dateFormatExp,
timeFormatExp,
timestampFormatExp,
XmlRecurse
.recurseXmlParser(t.asInstanceOf[Rql2TypeWithProperties])
)
if (t.asInstanceOf[Rql2TypeWithProperties].props.contains(Rql2IsTryableTypeProperty())) {
// Probably will need to be either reused in json and xml or create a copy
new TryableTopLevelWrapper(parseNode)
} else {
parseNode
}
}
}
}

class TruffleParseXmlEntry extends ParseXmlEntry with TruffleEntryExtension {
override def toTruffle(t: Type, args: Seq[TruffleArg]): ExpressionNode = ???
override def toTruffle(t: Type, args: Seq[TruffleArg]): ExpressionNode = {
val (unnamedArgs, namedArgs) = args.partition(_.idn.isEmpty)
val timeFormatExp = namedArgs
.collectFirst { case arg if arg.idn.contains("timeFormat") => arg.e }
.getOrElse(new StringNode("HH:mm[:ss[.SSS]]"))
val dateFormatExp = namedArgs
.collectFirst { case arg if arg.idn.contains("dateFormat") => arg.e }
.getOrElse(new StringNode("yyyy-M-d"))
val timestampFormatExp = namedArgs
.collectFirst { case arg if arg.idn.contains("timestampFormat") => arg.e }
.getOrElse(new StringNode("yyyy-M-d['T'][ ]HH:mm[:ss[.SSS]]"))

t match {
case Rql2IterableType(innerType, props) =>
val parseNode = new XmlParseCollectionNode(
unnamedArgs.head.e,
dateFormatExp,
timeFormatExp,
timestampFormatExp,
XmlRecurse
.recurseXmlParser(innerType.asInstanceOf[Rql2TypeWithProperties])
)
if (props.contains(Rql2IsTryableTypeProperty())) {
// Probably will need to be either reused in json and xml or create a copy
new TryableTopLevelWrapper(parseNode)
} else {
parseNode
}
case Rql2ListType(innerType: Rql2Type, props) =>
val innerParser = new XmlParseCollectionNode(
unnamedArgs.head.e,
dateFormatExp,
timeFormatExp,
timestampFormatExp,
XmlRecurse
.recurseXmlParser(innerType.asInstanceOf[Rql2TypeWithProperties])
)
if (props.contains(Rql2IsTryableTypeProperty())) {
ListFromNodeGen.create(innerParser, innerType)
} else {
ListFromUnsafeNodeGen.create(innerParser, innerType)
}
case _ =>
val parseNode = new XmlParseValueNode(
unnamedArgs.head.e,
dateFormatExp,
timeFormatExp,
timestampFormatExp,
XmlRecurse
.recurseXmlParser(t.asInstanceOf[Rql2TypeWithProperties])
)
if (t.asInstanceOf[Rql2TypeWithProperties].props.contains(Rql2IsTryableTypeProperty())) {
// Probably will need to be either reused in json and xml or create a copy
new TryableTopLevelWrapper(parseNode)
} else {
parseNode
}
}
}
}

object XmlRecurse {

val lang: RawLanguage = RawLanguage.getCurrentContext.getLanguage
val frameDescriptor = new FrameDescriptor()

// TODO each node should become XML
def recurseXmlParser(tipe: Rql2TypeWithProperties): ProgramExpressionNode = {

// a primitive parser node is a node that parses a primitive type _from a string_. It is applied once the string
// has been extracted, from the element, attribute or text content.
def primitiveParserNode(tipe: Rql2TypeWithProperties) = tipe match {
case _: Rql2UndefinedType => UndefinedParseXmlNodeGen.create();
case Rql2ByteType(_) => ByteParseXmlNodeGen.create()
case Rql2ShortType(_) => ShortParseXmlNodeGen.create()
case Rql2IntType(_) => IntParseXmlNodeGen.create()
case Rql2LongType(_) => LongParseXmlNodeGen.create()
case Rql2FloatType(_) => FloatParseXmlNodeGen.create()
case Rql2DoubleType(_) => DoubleParseXmlNodeGen.create()
case Rql2DecimalType(_) => DecimalParseXmlNodeGen.create()
case Rql2StringType(_) => StringParseXmlNodeGen.create()
case Rql2BoolType(_) => BoolParseXmlNodeGen.create()
case Rql2DateType(_) => DateParseXmlNodeGen.create()
case Rql2TimeType(_) => TimeParseXmlNodeGen.create()
case Rql2TimestampType(_) => TimestampParseXmlNodeGen.create()
}

def recurse(tipe: Rql2TypeWithProperties, fieldName: String): ExpressionNode = {
val isAttribute = fieldName.startsWith("@")
val isText = fieldName == "#text"
val parserNode: ExpressionNode = tipe match {
case tryable if tryable.props.contains(Rql2IsTryableTypeProperty()) =>
// tryable goes first. That way it can catch errors hit when parsing compound XML elements, but also
// XML attributes or XML "text" content.
val innerType = removeProp(tryable, Rql2IsTryableTypeProperty()).asInstanceOf[Rql2TypeWithProperties]
val source = recurse(innerType, fieldName)
val childRootNode = new ProgramExpressionNode(lang, frameDescriptor, source)
// errors are recovered differently for attributes
if (isAttribute) new TryableParseAttributeXmlNode(childRootNode)
else new TryableParseXmlNode(childRootNode)
case nullable if nullable.props.contains(Rql2IsNullableTypeProperty()) =>
val innerType = removeProp(nullable, Rql2IsNullableTypeProperty()).asInstanceOf[Rql2TypeWithProperties]
innerType match {
case (_: Rql2PrimitiveType | _: Rql2UndefinedType) =>
// nullable primitive. We goes the "nullable parser" which checks if the element is empty, and if not applies
// the primitive parser. The case of 'undefined' is handled as a primitive parser because the nullable checks
// the empty string, and calls the undefined parser (which throws) if not.
val primitiveParser = new ProgramExpressionNode(lang, frameDescriptor, primitiveParserNode(innerType))
val textContentParser =
new ProgramExpressionNode(lang, frameDescriptor, new OptionParseXmlTextNode(primitiveParser))
if (isAttribute) new AttributeParsePrimitiveXmlNode(textContentParser);
else if (isText) new TextParseXmlPrimitiveNode(textContentParser);
else new ElementParseXmlPrimitiveNode(textContentParser);
case _ =>
// other nullables (e.g. records, lists) cannot be null if something is found. When empty (e.g. <person/>)
// we get a start tag and and end tag, and it's their fields that are not found and made null.
val source = recurse(innerType, fieldName)
OptionSomeNodeGen.create(source)
}
case Rql2OrType(tipes, _) =>
val children = tipes
.map(tipe => {
val child = recurse(tipe.asInstanceOf[Rql2TypeWithProperties], fieldName)
new ProgramExpressionNode(lang, frameDescriptor, child)
})
.toArray
new OrTypeParseXml(children)
case Rql2ListType(innerType, _) =>
// lists are parsed with their item parser, and then wrapped in a list
recurse(innerType.asInstanceOf[Rql2TypeWithProperties], fieldName)
case Rql2IterableType(innerType, _) =>
// iterables are parsed with their item parser, and then wrapped in an iterable
recurse(innerType.asInstanceOf[Rql2TypeWithProperties], fieldName)
case Rql2RecordType(atts, _) =>
val children = atts
.map(att => {
val child = recurse(att.tipe.asInstanceOf[Rql2TypeWithProperties], att.idn)
new ProgramExpressionNode(lang, frameDescriptor, child)
})
.toArray
new RecordParseXmlNode(
children,
atts.map(_.idn).toArray,
atts.map(_.tipe.asInstanceOf[Rql2TypeWithProperties]).toArray
)
case (_: Rql2PrimitiveType | _: Rql2UndefinedType) =>
// primitive (not nullable). The 'text' parser is applied to the element/attribute/text.
val source = primitiveParserNode(tipe)
val child = new ProgramExpressionNode(lang, frameDescriptor, source)
if (isAttribute) new AttributeParsePrimitiveXmlNode(child);
else if (isText) new TextParseXmlPrimitiveNode(child);
else new ElementParseXmlPrimitiveNode(child);
}
parserNode
}

new ProgramExpressionNode(lang, frameDescriptor, recurse(tipe, "*"))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ import raw.testing.tags.TruffleTests
@TruffleTests class TimestampPackageTruffleTest extends TruffleCompilerTestContext with TimestampPackageTest
@TruffleTests class TryPackageTruffleTest extends TruffleCompilerTestContext with TryPackageTest
@TruffleTests class TypePackageTruffleTest extends TruffleCompilerTestContext with TypePackageTest
// class XmlPackageTruffleTest extends TruffleCompilerTestContext with XmlPackageTest
@TruffleTests class XmlPackageTruffleTest extends TruffleCompilerTestContext with XmlPackageTest
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,12 @@ import raw.testing.tags.TruffleTests

@TruffleTests class RD5884TruffleTest extends TruffleCompilerTestContext with RD5884Test
@TruffleTests class RD8266TruffleTest extends TruffleCompilerTestContext with RD8266Test
// needs Xml
// @TruffleTests class RD5448TruffleTest extends TruffleCompilerTestContext with RD5448Test
@TruffleTests class RD5448TruffleTest extends TruffleCompilerTestContext with RD5448Test
@TruffleTests class RD5238TruffleTest extends TruffleCompilerTestContext with RD5238Test
@TruffleTests class RD3742TruffleTest extends TruffleCompilerTestContext with RD3742Test
@TruffleTests class RD3784TruffleTest extends TruffleCompilerTestContext with RD3784Test

// needs XML
// class RD5784TruffleTest extends TruffleCompilerTestContext with RD5784Test
@TruffleTests class RD5784TruffleTest extends TruffleCompilerTestContext with RD5784Test
@TruffleTests class RD4981TruffleTest extends TruffleCompilerTestContext with RD4981Test
@TruffleTests class RD5920TruffleTest extends TruffleCompilerTestContext with RD5920Test
@TruffleTests class RD572TruffleTest extends TruffleCompilerTestContext with RD572Test
Expand All @@ -43,40 +41,34 @@ import raw.testing.tags.TruffleTests

@TruffleTests class RD5714TruffleTest extends TruffleCompilerTestContext with RD5714Test

// XML
// class RD5697TruffleTest extends TruffleCompilerTestContext with RD5697Test
@TruffleTests class RD5697TruffleTest extends TruffleCompilerTestContext with RD5697Test
@TruffleTests class RD5644TruffleTest extends TruffleCompilerTestContext with RD5644Test

@TruffleTests class RD8993TruffleTest extends TruffleCompilerTestContext with RD8993Test
@TruffleTests class RD4529TruffleTest extends TruffleCompilerTestContext with RD4529Test

@TruffleTests class RD5502TruffleTest extends TruffleCompilerTestContext with RD5502Test

// XML
// class RD5893TruffleTest extends TruffleCompilerTestContext with RD5893Test
@TruffleTests class RD5893TruffleTest extends TruffleCompilerTestContext with RD5893Test

@TruffleTests class RD5746TruffleTest extends TruffleCompilerTestContext with RD5746Test

// Databases
@TruffleTests class RD3084TruffleTest extends TruffleCompilerTestContext with RD3084Test

@TruffleTests class RD8764TruffleTest extends TruffleCompilerTestContext with RD8764Test
@TruffleTests class RD5979TruffleTest extends TruffleCompilerTestContext with RD5979Test

// XML
// class RD5679TruffleTest extends TruffleCompilerTestContext with RD5679Test
@TruffleTests class RD5679TruffleTest extends TruffleCompilerTestContext with RD5679Test

@TruffleTests class RD5775TruffleTest extends TruffleCompilerTestContext with RD5775Test
@TruffleTests class RD5851TruffleTest extends TruffleCompilerTestContext with RD5851Test

@TruffleTests class RD5412TruffleTest extends TruffleCompilerTestContext with RD5412Test

// 'text' format not supported
@TruffleTests class RD5971TruffleTest extends TruffleCompilerTestContext with RD5971Test
@TruffleTests class RD4445TruffleTest extends TruffleCompilerTestContext with RD4445Test

// XML
// class RD5968TruffleTest extends TruffleCompilerTestContext with RD5968Test
@TruffleTests class RD5968TruffleTest extends TruffleCompilerTestContext with RD5968Test

@TruffleTests class RD5691TruffleTest extends TruffleCompilerTestContext with RD5691Test
@TruffleTests class RD8935TruffleTest extends TruffleCompilerTestContext with RD8935Test
Expand All @@ -90,8 +82,7 @@ import raw.testing.tags.TruffleTests
@TruffleTests class RD4741TruffleTest extends TruffleCompilerTestContext with RD4741Test
@TruffleTests class RD5925TruffleTest extends TruffleCompilerTestContext with RD5925Test

// needs XML
// class RD5921TruffleTest extends TruffleCompilerTestContext with RD5921Test
@TruffleTests class RD5921TruffleTest extends TruffleCompilerTestContext with RD5921Test

@TruffleTests class RD5932TruffleTest extends TruffleCompilerTestContext with RD5932Test

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ import raw.testing.tags.TruffleTests
@TruffleTests class BinaryExpLtTruffleTest extends TruffleCompilerTestContext with BinaryExpLtTest
@TruffleTests class PackageNameTruffleTest extends TruffleCompilerTestContext with PackageNameTest

// Xml reader
// class StagedCompilerTruffleTest extends TruffleCompilerTestContext with StagedCompilerTest
@TruffleTests class StagedCompilerTruffleTest extends TruffleCompilerTestContext with StagedCompilerTest

@TruffleTests class LetFunTruffleTest extends TruffleCompilerTestContext with LetFunTest
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ class InferAndReadXmlEntry extends SugarEntryExtension with XmlEntryExtensionHel
XmlInputFormatDescriptor(dataType, _, _, _, _)
) = inputFormatDescriptor
) yield {
addProp(inferTypeToRql2Type(dataType, false, false), Rql2IsTryableTypeProperty())
inferTypeToRql2Type(dataType, false, false) match {
case Rql2IterableType(inner, _) => Rql2IterableType(inner)
case t => addProp(t, Rql2IsTryableTypeProperty())
}
}
}

Expand Down Expand Up @@ -171,11 +174,24 @@ trait XmlEntryExtensionHelper extends EntryExtensionHelper {
// on list and iterables we are removing the nullability/tryability
case t: Rql2IterableType => validateXmlType(t.innerType).right.map(inner => Rql2IterableType(inner))
case t: Rql2ListType => validateXmlType(t.innerType).right.map(inner => Rql2ListType(inner))
case t: Rql2OrType =>
val validated = t.tipes.map(validateXmlType)
val errors = validated.collect { case Left(error) => error }
case Rql2OrType(options, props) =>
// inner types may have 'tryable' or 'nullable' flags:
// * tryable is removed because a tryable-whatever option would always successfully parse
// as a failed whatever, and other parsers would never be tested.
// * nullable is removed too because it's unclear which nullable is parsed, and
// it is more consistent to move that property to the or-type itself (done below)
val validation = options
.map(resetProps(_, Set.empty)) // strip the error property of or-type options + remove nullability
.map(validateXmlType)
val errors = validation.collect { case Left(error) => error }
if (errors.nonEmpty) Left(errors.flatten)
else Right(Rql2OrType(validated.map(_.right.get), t.props))
else {
val validOptions = validation.collect { case Right(t) => t }
val nullable =
options.exists { case t: Rql2TypeWithProperties => t.props.contains(Rql2IsNullableTypeProperty()) }
val finalProps = if (nullable) props + Rql2IsNullableTypeProperty() else props
Right(Rql2OrType(validOptions, finalProps))
}
case t: Rql2PrimitiveType => Right(t)
case t: Rql2UndefinedType => Right(t)
case t => Left(Seq(UnsupportedType(t, t, None)))
Expand Down
Loading

0 comments on commit b976f37

Please sign in to comment.