Skip to content

Commit

Permalink
Support non-space delimiter and add a test for it
Browse files Browse the repository at this point in the history
  • Loading branch information
kosak committed Nov 5, 2024
1 parent ed02a8e commit c77fc4c
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/main/java/io/deephaven/csv/reading/CsvReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ private static Result fixedReadLogic(
final String[] headers = FixedHeaderFinder.determineHeadersToUse(specs, lineGrabber, columnWidths);
final int numCols = headers.length;
final CellGrabber grabber = new FixedCellGrabber(lineGrabber, columnWidths.getValue(),
specs.ignoreSurroundingSpaces(), specs.useUtf32CountingConvention());
specs.ignoreSurroundingSpaces(), (byte)specs.delimiter(), specs.useUtf32CountingConvention());
return commonReadLogic(specs, grabber, null, numCols, numCols, headers, sinkFactory);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public static CellGrabber makeLineGrabber(InputStream stream) {
private final CellGrabber lineGrabber;
private final int[] columnWidths;
private final boolean ignoreSurroundingSpaces;
private final byte delimiterAsByte;
private final boolean utf32CountingMode;
private final ByteSlice rowText;
private boolean needsUnderlyingRefresh;
Expand All @@ -38,10 +39,11 @@ public static CellGrabber makeLineGrabber(InputStream stream) {

/** Constructor. */
public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths, boolean ignoreSurroundingSpaces,
boolean utf32CountingMode) {
byte delimiterAsByte, boolean utf32CountingMode) {
this.lineGrabber = lineGrabber;
this.columnWidths = columnWidths;
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
this.delimiterAsByte = delimiterAsByte;
this.utf32CountingMode = utf32CountingMode;
this.rowText = new ByteSlice();
this.needsUnderlyingRefresh = true;
Expand Down Expand Up @@ -77,7 +79,7 @@ public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean en
endOfInput.setValue(false);

if (ignoreSurroundingSpaces) {
ReaderUtil.trimWhitespace(dest);
dest.trimPadding(delimiterAsByte);
}
}

Expand Down
26 changes: 26 additions & 0 deletions src/test/java/io/deephaven/csv/CsvReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1979,6 +1979,32 @@ public void fixedColumnWidthsShortRows(boolean allowMissingColumns) throws CsvRe
}
}

/**
* We support other ASCII delimiters. In fixed-width mode, the meaning of "ignoreSurroundingSpaces" is expanded
* to mean "ignore surrounding delimiters".
*/
@Test
public void alternateDelimiter() throws CsvReaderException {
final String input =
""
+ "Sym___Type_____Price___SecurityId\n"
+ "GOOG__Dividend_0.25____200\n"
+ "T_____Dividend_0.15____300\n"
+ "Z_____Dividend_0.18____500\n";

final ColumnSet expected =
ColumnSet.of(
Column.ofRefs("Sym", "GOOG", "T", "Z"),
Column.ofRefs("Type", "Dividend", "Dividend", "Dividend"),
Column.ofValues("Price", 0.25, 0.15, 0.18),
Column.ofValues("SecurityId", 200, 300, 500));

final CsvSpecs specs =
defaultCsvBuilder().hasFixedWidthColumns(true).delimiter('_').ignoreSurroundingSpaces(true).build();

invokeTest(specs, input, expected);
}

/**
* If there is no header row, the caller needs to specify column widths.
*/
Expand Down

0 comments on commit c77fc4c

Please sign in to comment.