From c77fc4cfe9393943db607dfe5a1e6a608e37126c Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Mon, 4 Nov 2024 22:00:13 -0500 Subject: [PATCH] Support non-space delimiter and add a test for it --- .../io/deephaven/csv/reading/CsvReader.java | 2 +- .../csv/reading/cells/FixedCellGrabber.java | 6 +++-- .../java/io/deephaven/csv/CsvReaderTest.java | 26 +++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/deephaven/csv/reading/CsvReader.java b/src/main/java/io/deephaven/csv/reading/CsvReader.java index 68899ae..a6fb63d 100644 --- a/src/main/java/io/deephaven/csv/reading/CsvReader.java +++ b/src/main/java/io/deephaven/csv/reading/CsvReader.java @@ -107,7 +107,7 @@ private static Result fixedReadLogic( final String[] headers = FixedHeaderFinder.determineHeadersToUse(specs, lineGrabber, columnWidths); final int numCols = headers.length; final CellGrabber grabber = new FixedCellGrabber(lineGrabber, columnWidths.getValue(), - specs.ignoreSurroundingSpaces(), specs.useUtf32CountingConvention()); + specs.ignoreSurroundingSpaces(), (byte)specs.delimiter(), specs.useUtf32CountingConvention()); return commonReadLogic(specs, grabber, null, numCols, numCols, headers, sinkFactory); } diff --git a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java index 1732010..5402fd1 100644 --- a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java +++ b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java @@ -29,6 +29,7 @@ public static CellGrabber makeLineGrabber(InputStream stream) { private final CellGrabber lineGrabber; private final int[] columnWidths; private final boolean ignoreSurroundingSpaces; + private final byte delimiterAsByte; private final boolean utf32CountingMode; private final ByteSlice rowText; private boolean needsUnderlyingRefresh; @@ -38,10 +39,11 @@ public static CellGrabber makeLineGrabber(InputStream stream) { /** Constructor. */ public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths, boolean ignoreSurroundingSpaces, - boolean utf32CountingMode) { + byte delimiterAsByte, boolean utf32CountingMode) { this.lineGrabber = lineGrabber; this.columnWidths = columnWidths; this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; + this.delimiterAsByte = delimiterAsByte; this.utf32CountingMode = utf32CountingMode; this.rowText = new ByteSlice(); this.needsUnderlyingRefresh = true; @@ -77,7 +79,7 @@ public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean en endOfInput.setValue(false); if (ignoreSurroundingSpaces) { - ReaderUtil.trimWhitespace(dest); + dest.trimPadding(delimiterAsByte); } } diff --git a/src/test/java/io/deephaven/csv/CsvReaderTest.java b/src/test/java/io/deephaven/csv/CsvReaderTest.java index fb48d22..f5c3da4 100644 --- a/src/test/java/io/deephaven/csv/CsvReaderTest.java +++ b/src/test/java/io/deephaven/csv/CsvReaderTest.java @@ -1979,6 +1979,32 @@ public void fixedColumnWidthsShortRows(boolean allowMissingColumns) throws CsvRe } } + /** + * We support other ASCII delimiters. In fixed-width mode, the meaning of "ignoreSurroundingSpaces" is expanded + * to mean "ignore surrounding delimiters". + */ + @Test + public void alternateDelimiter() throws CsvReaderException { + final String input = + "" + + "Sym___Type_____Price___SecurityId\n" + + "GOOG__Dividend_0.25____200\n" + + "T_____Dividend_0.15____300\n" + + "Z_____Dividend_0.18____500\n"; + + final ColumnSet expected = + ColumnSet.of( + Column.ofRefs("Sym", "GOOG", "T", "Z"), + Column.ofRefs("Type", "Dividend", "Dividend", "Dividend"), + Column.ofValues("Price", 0.25, 0.15, 0.18), + Column.ofValues("SecurityId", 200, 300, 500)); + + final CsvSpecs specs = + defaultCsvBuilder().hasFixedWidthColumns(true).delimiter('_').ignoreSurroundingSpaces(true).build(); + + invokeTest(specs, input, expected); + } + /** * If there is no header row, the caller needs to specify column widths. */