Skip to content

Commit

Permalink
RD-9471: Long type missing in CSV input (#119)
Browse files Browse the repository at this point in the history
* Fixed a test case to trigger `LongType` to be inferred.
* Added a comment explaining why not all types are expected.
* Deleted leftover dead code for float and byte.
  • Loading branch information
bgaidioz authored Aug 25, 2023
1 parent a73e15d commit 4af158d
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,11 @@ class CsvColumnParser(
}
case r: Rql2TypeWithProperties =>
assert(r.props.isEmpty)
// These would be types returned by the inferrer. Not all types are expected
// from the inferrer.
r match {
case _: Rql2IntType => new IntParseCsvNode()
case _: Rql2FloatType => new FloatParseCsvNode()
case _: Rql2LongType => new LongParseCsvNode()
case _: Rql2DoubleType => new DoubleParseCsvNode()
case _: Rql2DecimalType => new DecimalParseCsvNode()
case _: Rql2BoolType => new BoolParseCsvNode()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,18 @@ trait CsvOutputTest extends CompilerTestContext {

option("output-format", "csv")

private val csvWithAllTypes =
tempFile("""byteCol;shortCol;intCol;longCol;floatCol;doubleCol;decimalCol;boolCol;dateCol;timeCol;timestampCol
private val csvWithAllTypes = tempFile(
"""byteCol;shortCol;intCol;longCol;floatCol;doubleCol;decimalCol;boolCol;dateCol;timeCol;timestampCol
|1;10;100;1000;3.14;6.28;9.42;true;2023-12-25;01:02:03;2023-12-25T01:02:03
|120;2500;25000;250000;30.14;60.28;90.42;false;2023-02-05;11:12:13;2023-02-05T11:12:13""".stripMargin)
|120;2500;25000;9223372036854775807;30.14;60.28;90.42;false;2023-02-05;11:12:13;2023-02-05T11:12:13""".stripMargin
)

test("""[
|{byteCol: Int.From("1"), shortCol:Int.From("10"), intCol: Int.From("100"), longCol: Int.From("1000"),
| floatCol: Double.From("3.14"), doubleCol: Double.From("6.28"), decimalCol: Double.From("9.42"), boolCol: true,
| dateCol: Date.Parse("12/25/2023", "M/d/yyyy"), timeCol: Time.Parse("01:02:03", "H:m:s"),
| timestampCol: Timestamp.Parse("12/25/2023 01:02:03", "M/d/yyyy H:m:s"), binaryCol: Binary.FromString("Hello World!")},
|{byteCol: Int.From("120"), shortCol:Int.From("2500"), intCol: Int.From("25000"), longCol: Int.From("250000"),
|{byteCol: Int.From("120"), shortCol:Int.From("2500"), intCol: Int.From("25000"), longCol: Long.From("9223372036854775807"),
| floatCol: Double.From("30.14"), doubleCol: Double.From("60.28"), decimalCol: Double.From("90.42"), boolCol: false,
| dateCol: Date.Parse("2/5/2023", "M/d/yyyy"), timeCol: Time.Parse("11:12:13", "H:m:s"),
| timestampCol: Timestamp.Parse("2/5/2023 11:12:13", "M/d/yyyy H:m:s"), binaryCol: Binary.FromString("Olala!")}
Expand All @@ -43,7 +44,7 @@ trait CsvOutputTest extends CompilerTestContext {
path should contain(
"""byteCol,shortCol,intCol,longCol,floatCol,doubleCol,decimalCol,boolCol,dateCol,timeCol,timestampCol,binaryCol
|1,10,100,1000,3.14,6.28,9.42,true,2023-12-25,01:02:03,2023-12-25T01:02:03,SGVsbG8gV29ybGQh
|120,2500,25000,250000,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13,T2xhbGEh
|120,2500,25000,9223372036854775807,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13,T2xhbGEh
|""".stripMargin
)
} finally {
Expand All @@ -58,7 +59,7 @@ trait CsvOutputTest extends CompilerTestContext {
path should contain(
"""byteCol,shortCol,intCol,longCol,floatCol,doubleCol,decimalCol,boolCol,dateCol,timeCol,timestampCol
|1,10,100,1000,3.14,6.28,9.42,true,2023-12-25,01:02:03,2023-12-25T01:02:03
|120,2500,25000,250000,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|120,2500,25000,9223372036854775807,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|""".stripMargin
)
} finally {
Expand Down Expand Up @@ -87,7 +88,7 @@ trait CsvOutputTest extends CompilerTestContext {
path should contain(
"""byteCol,shortCol,intCol,longCol,floatCol,doubleCol,decimalCol,boolCol,dateCol,timeCol,timestampCol
|1,10,100,1000,3.14,6.28,9.42,true,2023-12-25,01:02:03,2023-12-25T01:02:03
|120,2500,25000,250000,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|120,2500,25000,9223372036854775807,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|""".stripMargin
)
} finally {
Expand Down Expand Up @@ -118,15 +119,15 @@ trait CsvOutputTest extends CompilerTestContext {
snapi"""byteCol,shortCol,intCol,longCol,floatCol,doubleCol,decimalCol,boolCol,dateCol,timeCol,timestampCol
|"failed to parse CSV (url: $csvWithAllTypes: line 1, col 1), cannot parse 'byteCol' as a byte","failed to parse CSV (url: $csvWithAllTypes: line 1, col 9), cannot parse 'shortCol' as a short","failed to parse CSV (url: $csvWithAllTypes: line 1, col 18), cannot parse 'intCol' as an int","failed to parse CSV (url: $csvWithAllTypes: line 1, col 25), cannot parse 'longCol' as a long","failed to parse CSV (url: $csvWithAllTypes: line 1, col 33), cannot parse 'floatCol' as a float","failed to parse CSV (url: $csvWithAllTypes: line 1, col 42), cannot parse 'doubleCol' as a double","failed to parse CSV (url: $csvWithAllTypes: line 1, col 52), cannot parse 'decimalCol' as a decimal","failed to parse CSV (url: $csvWithAllTypes: line 1, col 63), cannot parse 'boolCol' as a bool","failed to parse CSV (url: $csvWithAllTypes: line 1, col 71), string 'dateCol' does not match date template 'yyyy-M-d'","failed to parse CSV (url: $csvWithAllTypes: line 1, col 79), string 'timeCol' does not match time template 'HH:mm[:ss[.SSS]]'","failed to parse CSV (url: $csvWithAllTypes: line 1, col 87), string 'timestampCol' does not match timestamp template 'HH:mm[:ss[.SSS]]'"
|1,10,100,1000,3.14,6.28,9.42,true,2023-12-25,01:02:03,2023-12-25T01:02:03
|120,2500,25000,250000,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|120,2500,25000,9223372036854775807,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|""".stripMargin
)
} else {
path should contain(
"""byteCol,shortCol,intCol,longCol,floatCol,doubleCol,decimalCol,boolCol,dateCol,timeCol,timestampCol
|"failed to parse CSV (line 1, col 1), cannot cast 'byteCol' to byte","failed to parse CSV (line 1, col 2), cannot cast 'shortCol' to short","failed to parse CSV (line 1, col 3), cannot cast 'intCol' to int","failed to parse CSV (line 1, col 4), cannot cast 'longCol' to long","failed to parse CSV (line 1, col 5), cannot cast 'floatCol' to float","failed to parse CSV (line 1, col 6), cannot cast 'doubleCol' to double","failed to parse CSV (line 1, col 7), Character d is neither a decimal digit number, decimal point, nor \"e\" notation exponential mark.","failed to parse CSV (line 1, col 8), cannot cast 'boolCol' to boolean","failed to parse CSV (line 1, col 9), string 'dateCol' does not match date template 'yyyy-M-d'","failed to parse CSV (line 1, col 10), string 'timeCol' does not match time template 'HH:mm[:ss[.SSS]]'","failed to parse CSV (line 1, col 11), string 'timestampCol' does not match timestamp template 'yyyy-M-d['T'][ ]HH:mm[:ss[.SSS]]'"
|1,10,100,1000,3.14,6.28,9.42,true,2023-12-25,01:02:03,2023-12-25T01:02:03
|120,2500,25000,250000,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|120,2500,25000,9223372036854775807,30.14,60.28,90.42,false,2023-02-05,11:12:13,2023-02-05T11:12:13
|""".stripMargin
)
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
import com.oracle.truffle.api.nodes.NodeInfo;
import raw.runtime.truffle.ExpressionNode;

@NodeInfo(shortName = "ByteParseCsv")
public class ByteParseCsvNode extends ExpressionNode {
@NodeInfo(shortName = "IntParseCsv")
public class LongParseCsvNode extends ExpressionNode {

public Object executeGeneric(VirtualFrame frame) {
Object[] args = frame.getArguments();
RawTruffleCsvParser parser = (RawTruffleCsvParser) args[0];
return parser.getByte(this);
return parser.getLong(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -202,22 +202,6 @@ Object getOptionByte(ExpressionNode location) {
}
}

@CompilerDirectives.TruffleBoundary
short getShort(ExpressionNode location) {
try {
try {
return jacksonParser.getShortValue();
} catch (JsonProcessingException ex) {
String malformed =
jacksonParser.getText(); // shouldn't throw since we read already the token
throw new CsvParserRawTruffleException(
String.format("cannot parse '%s' as a short", malformed), this, stream, location);
}
} catch (IOException ex) {
throw new CsvReaderRawTruffleException(stream, ex, location);
}
}

@CompilerDirectives.TruffleBoundary
Object getOptionShort(ExpressionNode location) {
try {
Expand Down Expand Up @@ -319,29 +303,6 @@ Object getOptionLong(ExpressionNode location) {
}
}

@CompilerDirectives.TruffleBoundary
float getFloat(ExpressionNode location) {
try {
try {
String token = jacksonParser.getText();
for (String nanToken : nans) {
if (token.equals(nanToken)) {
return Float.NaN;
}
}
return jacksonParser.getFloatValue();
} catch (JsonProcessingException ex) {
throw new CsvParserRawTruffleException(
String.format("cannot parse '%s' as a float", jacksonParser.getText()),
this,
stream,
location);
}
} catch (IOException ex) {
throw new CsvReaderRawTruffleException(stream, ex, location);
}
}

@CompilerDirectives.TruffleBoundary
Object getOptionFloat(ExpressionNode location) {
try {
Expand Down

0 comments on commit 4af158d

Please sign in to comment.