From 66cd8a712aa509d9e6545eba5cb71fddfe878cc7 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Sun, 3 Nov 2024 20:57:15 -0500 Subject: [PATCH] spotless --- src/main/java/io/deephaven/csv/CsvSpecs.java | 21 ++++--- .../io/deephaven/csv/reading/CsvReader.java | 5 +- .../io/deephaven/csv/reading/ReaderUtil.java | 10 ++-- .../csv/reading/cells/FixedCellGrabber.java | 22 ++++---- .../reading/headers/FixedHeaderFinder.java | 20 ++++--- .../java/io/deephaven/csv/CsvReaderTest.java | 56 ++++++++++--------- 6 files changed, 70 insertions(+), 64 deletions(-) diff --git a/src/main/java/io/deephaven/csv/CsvSpecs.java b/src/main/java/io/deephaven/csv/CsvSpecs.java index 53d34224..361f3ec5 100644 --- a/src/main/java/io/deephaven/csv/CsvSpecs.java +++ b/src/main/java/io/deephaven/csv/CsvSpecs.java @@ -126,23 +126,22 @@ public interface Builder { * When {@link #hasFixedWidthColumns} is set, the library either determines the column widths from the header * row (provided {@link #hasHeaderRow} is set), or the column widths can be specified explictly by the caller. * If the caller wants to specify them explicitly, they can use this method. + * * @param fixedColumnWidths The caller-specified widths of the columns. */ Builder fixedColumnWidths(Iterable fixedColumnWidths); /** - * This setting controls what units fixed width columns are measured in. - * When true, fixed width columns are measured in Unicode code points. - * When false, fixed width columns are measured in UTF-16 units (aka Java chars). - * The difference arises when encountering characters outside the Unicode Basic Multilingual Plane. - * For example, the Unicode code point ๐Ÿ’” (U+1F494) is one Unicode code point, but takes - * two Java chars to represent. Along these lines, the string ๐Ÿ’”๐Ÿ’”๐Ÿ’” would fit in a column of width 3 - * when utf32CountingMode is true, but would require a column width of at least 6 when utf32CountingMode - * is false. + * This setting controls what units fixed width columns are measured in. When true, fixed width columns are + * measured in Unicode code points. When false, fixed width columns are measured in UTF-16 units (aka Java + * chars). The difference arises when encountering characters outside the Unicode Basic Multilingual Plane. For + * example, the Unicode code point ๐Ÿ’” (U+1F494) is one Unicode code point, but takes two Java chars to + * represent. Along these lines, the string ๐Ÿ’”๐Ÿ’”๐Ÿ’” would fit in a column of width 3 when utf32CountingMode is + * true, but would require a column width of at least 6 when utf32CountingMode is false. * - * The default setting of true is arguably more natural for users (the number of characters they see - * matches the visual width of the column). But some programs may want the value of false because they - * are counting Java chars. + * The default setting of true is arguably more natural for users (the number of characters they see matches the + * visual width of the column). But some programs may want the value of false because they are counting Java + * chars. */ Builder useUtf32CountingConvention(boolean useUtf32CountingConvention); diff --git a/src/main/java/io/deephaven/csv/reading/CsvReader.java b/src/main/java/io/deephaven/csv/reading/CsvReader.java index 822ce4dd..68899ae7 100644 --- a/src/main/java/io/deephaven/csv/reading/CsvReader.java +++ b/src/main/java/io/deephaven/csv/reading/CsvReader.java @@ -65,9 +65,8 @@ private CsvReader() {} */ public static Result read(final CsvSpecs specs, final InputStream stream, final SinkFactory sinkFactory) throws CsvReaderException { - return specs.hasFixedWidthColumns() ? - fixedReadLogic(specs, stream, sinkFactory) : - delimitedReadLogic(specs, stream, sinkFactory); + return specs.hasFixedWidthColumns() ? fixedReadLogic(specs, stream, sinkFactory) + : delimitedReadLogic(specs, stream, sinkFactory); } private static Result delimitedReadLogic( diff --git a/src/main/java/io/deephaven/csv/reading/ReaderUtil.java b/src/main/java/io/deephaven/csv/reading/ReaderUtil.java index a11e1651..46859959 100644 --- a/src/main/java/io/deephaven/csv/reading/ReaderUtil.java +++ b/src/main/java/io/deephaven/csv/reading/ReaderUtil.java @@ -32,12 +32,12 @@ public static void trimWhitespace(final ByteSlice cs) { } /** - * Get the expected length of a UTF-8 sequence, given its first byte, and its - * corresponding length in the specified units (UTF-16 or UTF-32). + * Get the expected length of a UTF-8 sequence, given its first byte, and its corresponding length in the specified + * units (UTF-16 or UTF-32). + * * @param firstByte The first byte of the UTF-8 sequence. - * @param numBytes The number of remaining bytes in the input field (including firstByte). If the UTF-8 - * sequence specifies a number of bytes larger than the number of remaining bytes, an - * exception is thrown. + * @param numBytes The number of remaining bytes in the input field (including firstByte). If the UTF-8 sequence + * specifies a number of bytes larger than the number of remaining bytes, an exception is thrown. * @param useUtf32CountingConvention Whether 'charCountResult' should be in units of UTF-32 or UTF-16. * @param charCountResult The number of UTF-32 or UTF-16 units specified by the UTF-8 character. * @return The length of the UTF-8 sequence. diff --git a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java index 32225925..8a551c36 100644 --- a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java +++ b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java @@ -9,19 +9,20 @@ import java.io.InputStream; /** - * This class uses an underlying DelimitedCellGrabber to grab whole lines at a time from the input stream, - * and then it breaks them into fixed-sized cells to return to the caller. + * This class uses an underlying DelimitedCellGrabber to grab whole lines at a time from the input stream, and then it + * breaks them into fixed-sized cells to return to the caller. */ public class FixedCellGrabber implements CellGrabber { /** - * Makes a degenerate CellGrabber that has no delimiters or quotes and therefore returns whole lines. - * This is a somewhat quick-and-dirty way to reuse the buffering and newline logic in DelimitedCellGrabber - * without rewriting it. + * Makes a degenerate CellGrabber that has no delimiters or quotes and therefore returns whole lines. This is a + * somewhat quick-and-dirty way to reuse the buffering and newline logic in DelimitedCellGrabber without rewriting + * it. + * * @param stream The underlying stream. * @return The "line grabber" */ public static CellGrabber makeLineGrabber(InputStream stream) { - final byte IllegalUtf8 = (byte)0xff; + final byte IllegalUtf8 = (byte) 0xff; return new DelimitedCellGrabber(stream, IllegalUtf8, IllegalUtf8, true, false); } @@ -37,7 +38,7 @@ public static CellGrabber makeLineGrabber(InputStream stream) { /** Constructor. */ public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths, boolean ignoreSurroundingSpaces, - boolean utf32CountingMode) { + boolean utf32CountingMode) { this.lineGrabber = lineGrabber; this.columnWidths = columnWidths; this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; @@ -50,7 +51,8 @@ public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths, } @Override - public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean endOfInput) throws CsvReaderException { + public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean endOfInput) + throws CsvReaderException { if (needsUnderlyingRefresh) { // Underlying row used up, and all columns provided. Ask underlying CellGrabber for the next line. lineGrabber.grabNext(rowText, dummy1, endOfInput); @@ -66,7 +68,7 @@ public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean en } // There is data to return. Count off N characters. The final column gets all remaining characters. - final boolean lastCol = colIndex == columnWidths.length - 1; + final boolean lastCol = colIndex == columnWidths.length - 1; final int numCharsToTake = lastCol ? Integer.MAX_VALUE : columnWidths[colIndex]; takeNCharactersInCharset(rowText, dest, numCharsToTake, utf32CountingMode, dummy2); ++colIndex; @@ -80,7 +82,7 @@ public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean en } private static void takeNCharactersInCharset(ByteSlice src, ByteSlice dest, int numCharsToTake, - boolean utf32CountingMode, MutableInt tempInt) { + boolean utf32CountingMode, MutableInt tempInt) { final byte[] data = src.data(); final int cellBegin = src.begin(); int current = cellBegin; diff --git a/src/main/java/io/deephaven/csv/reading/headers/FixedHeaderFinder.java b/src/main/java/io/deephaven/csv/reading/headers/FixedHeaderFinder.java index e9ad7b5e..cfea90ac 100644 --- a/src/main/java/io/deephaven/csv/reading/headers/FixedHeaderFinder.java +++ b/src/main/java/io/deephaven/csv/reading/headers/FixedHeaderFinder.java @@ -44,17 +44,19 @@ public static String[] determineHeadersToUse( } --skipCount; } - final byte paddingByte = (byte)specs.delimiter(); + final byte paddingByte = (byte) specs.delimiter(); if (columnWidthsToUse.length == 0) { // UNITS: UTF8 CHARACTERS columnWidthsToUse = inferColumnWidths(headerRow, paddingByte, specs.useUtf32CountingConvention()); } // DESIRED UNITS: UTF8 CHARACTERS - headersToUse = extractHeaders(headerRow, columnWidthsToUse, paddingByte, specs.useUtf32CountingConvention()); + headersToUse = + extractHeaders(headerRow, columnWidthsToUse, paddingByte, specs.useUtf32CountingConvention()); } else { if (columnWidthsToUse.length == 0) { - throw new CsvReaderException("Can't proceed because hasHeaderRow is false but fixedColumnWidths is unspecified"); + throw new CsvReaderException( + "Can't proceed because hasHeaderRow is false but fixedColumnWidths is unspecified"); } headersToUse = ReaderUtil.makeSyntheticHeaders(columnWidthsToUse.length); } @@ -99,7 +101,8 @@ private static int[] inferColumnWidths(ByteSlice row, byte delimiterAsByte, bool boolean thisCharIsDelimiter = ch == delimiterAsByte; if (currentIndex == row.begin() && thisCharIsDelimiter) { throw new IllegalArgumentException( - String.format("Header row cannot start with the delimiter character '%c'", (char)delimiterAsByte)); + String.format("Header row cannot start with the delimiter character '%c'", + (char) delimiterAsByte)); } if (!thisCharIsDelimiter && prevCharIsDelimiter) { columnWidths.add(numChars); @@ -115,7 +118,7 @@ private static int[] inferColumnWidths(ByteSlice row, byte delimiterAsByte, bool // UNITS: UTF8 CHARACTERS private static String[] extractHeaders(ByteSlice row, int[] columnWidths, byte paddingByte, - boolean utf32CountingMode) { + boolean utf32CountingMode) { final int numCols = columnWidths.length; if (numCols == 0) { return new String[0]; @@ -140,11 +143,12 @@ private static String[] extractHeaders(ByteSlice row, int[] columnWidths, byte p } private static int charWidthsToByteWidths(ByteSlice row, int[] charWidths, boolean utf32CountingMode, - int[] byteWidths) { + int[] byteWidths) { int numCols = charWidths.length; if (byteWidths.length != numCols) { - throw new IllegalArgumentException(String.format("Expected charWidths.length (%d) == byteWidths.length (%d)", - charWidths.length, byteWidths.length)); + throw new IllegalArgumentException( + String.format("Expected charWidths.length (%d) == byteWidths.length (%d)", + charWidths.length, byteWidths.length)); } final MutableInt charCountResult = new MutableInt(); final byte[] data = row.data(); diff --git a/src/test/java/io/deephaven/csv/CsvReaderTest.java b/src/test/java/io/deephaven/csv/CsvReaderTest.java index 7d852b1b..fa31dde7 100644 --- a/src/test/java/io/deephaven/csv/CsvReaderTest.java +++ b/src/test/java/io/deephaven/csv/CsvReaderTest.java @@ -1870,8 +1870,8 @@ public void colnumPassedThrough() throws CsvReaderException { } /** - * Addresses A user requested that the library - * be able to read files like this. + * Addresses A user requested that the library be + * able to read files like this. */ @Test public void bug212() throws CsvReaderException { @@ -1890,7 +1890,8 @@ public void bug212() throws CsvReaderException { Column.ofRefs("NAME", "argo-events", "argo-workflows", "argocd", "beta"), Column.ofRefs("STATUS", "Not Active", "Active", "Active", "Not Active"), Column.ofRefs("AGE", "2y77d", "2y77d", "5y18d", "4y235d"), - Column.ofRefs("LABELS", "app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events", + Column.ofRefs("LABELS", + "app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events", "app.kubernetes.io/instance=argo-workflows,kubernetes.io/metadata.name=argo-workflows", "kubernetes.io/metadata.name=argocd", "kubernetes.io/metadata.name=beta")); @@ -1914,13 +1915,15 @@ public void simpleFixedColumnWidths() throws CsvReaderException { Column.ofValues("Price", 0.25, 0.15, 0.18), Column.ofValues("SecurityId", 200, 300, 500)); - final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build(); + final CsvSpecs specs = + defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build(); invokeTest(specs, input, expected); } /** * We allow data fields to fill the whole cell, without a padding character + * * @throws CsvReaderException */ @Test @@ -1938,7 +1941,8 @@ public void fixedColumnWidthsFullCell() throws CsvReaderException { Column.ofValues("Price", 0.25, 0.15), Column.ofValues("SecurityId", 200, 300)); - final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build(); + final CsvSpecs specs = + defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build(); invokeTest(specs, input, expected); } @@ -1963,7 +1967,7 @@ public void fixedColumnWidthsShortRows(boolean allowMissingColumns) throws CsvRe Column.ofValues("Price", Sentinels.NULL_DOUBLE, 0.15, 0.18, Sentinels.NULL_DOUBLE), Column.ofValues("SecurityId", Sentinels.NULL_INT, 300, 500, Sentinels.NULL_INT)); - final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ') + final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ') .ignoreSurroundingSpaces(true).allowMissingColumns(allowMissingColumns).build(); if (allowMissingColumns) { @@ -1975,11 +1979,11 @@ public void fixedColumnWidthsShortRows(boolean allowMissingColumns) throws CsvRe } /** - * All six Unicode characters โ™กโ™ฅโฅโฆโ—‘โ•ณ are in the Basic Multilingual Plane and can all be represented - * with a single Java char. Therefore, they are counted the same with both counting conventions. + * All six Unicode characters โ™กโ™ฅโฅโฆโ—‘โ•ณ are in the Basic Multilingual Plane and can all be represented with a single + * Java char. Therefore, they are counted the same with both counting conventions. */ @ParameterizedTest - @ValueSource(booleans = {false, true}) + @ValueSource(booleans = {false, true}) public void countsBMPCharactersTheSame(boolean useUtf32CountingConvention) throws CsvReaderException { final String input = "" @@ -2001,13 +2005,13 @@ public void countsBMPCharactersTheSame(boolean useUtf32CountingConvention) throw } /** - * All six Unicode characters ๐Ÿฅฐ๐Ÿ˜ป๐Ÿงก๐Ÿ’“๐Ÿ’•๐Ÿ’– are _outside_ the Basic Multilingual Plane and all are represented - * with two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller - * uses UTF-32 counting convention. They will not fit in the column if the caller uses the UTF-16 counting - * convention (because it takes 12 Java chars to express them). + * All six Unicode characters ๐Ÿฅฐ๐Ÿ˜ป๐Ÿงก๐Ÿ’“๐Ÿ’•๐Ÿ’– are _outside_ the Basic Multilingual Plane and all are represented with + * two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller uses UTF-32 + * counting convention. They will not fit in the column if the caller uses the UTF-16 counting convention (because + * it takes 12 Java chars to express them). */ @ParameterizedTest - @ValueSource(booleans = {false, true}) + @ValueSource(booleans = {false, true}) public void countsNonBMPCharactersDifferently(boolean useUtf32CountingConvention) throws CsvReaderException { final String input = "" @@ -2034,17 +2038,15 @@ public void countsNonBMPCharactersDifferently(boolean useUtf32CountingConvention } /** - * Using Unicode characters as column headers. We give one column a header with characters from the BMP - * and one with characters outside the BMP and show how the behavior differs depending on the - * useUtf32CountingConvention flag. - * โ•”โ•โ•— - * All six Unicode characters ๐Ÿฅฐ๐Ÿ˜ป๐Ÿงก๐Ÿ’“๐Ÿ’•๐Ÿ’– are _outside_ the Basic Multilingual Plane and all are represented - * with two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller - * uses UTF-32 counting convention. They will not fit in the column if the caller uses the UTF-16 counting - * convention (because it takes 12 Java chars to express them). + * Using Unicode characters as column headers. We give one column a header with characters from the BMP and one with + * characters outside the BMP and show how the behavior differs depending on the useUtf32CountingConvention flag. + * โ•”โ•โ•— All six Unicode characters ๐Ÿฅฐ๐Ÿ˜ป๐Ÿงก๐Ÿ’“๐Ÿ’•๐Ÿ’– are _outside_ the Basic Multilingual Plane and all are represented + * with two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller uses + * UTF-32 counting convention. They will not fit in the column if the caller uses the UTF-16 counting convention + * (because it takes 12 Java chars to express them). */ @ParameterizedTest - @ValueSource(booleans = {false, true}) + @ValueSource(booleans = {false, true}) public void unicodeColumnHeaders(boolean useUtf32CountingConvention) throws CsvReaderException { // In the UTF-32 counting convention, this is a column of width 4 (three Unicode characters plus the space) // followed by a column of width 5. The first cell of the data would therefore be "abc", and the next cell @@ -2077,12 +2079,12 @@ public void unicodeColumnHeaders(boolean useUtf32CountingConvention) throws CsvR } /** - * If the library is configured for the UTF-16 counting convention, and there is only one unit of space left - * in the field, and the next character is a character outside the Basic Multilingual Plane that requires two units, - * the library will include that character in the next field rather than this one. + * If the library is configured for the UTF-16 counting convention, and there is only one unit of space left in the + * field, and the next character is a character outside the Basic Multilingual Plane that requires two units, the + * library will include that character in the next field rather than this one. */ @ParameterizedTest - @ValueSource(booleans = {false, true}) + @ValueSource(booleans = {false, true}) public void brokenSurrogatePair(boolean useUtf32CountingConvention) throws CsvReaderException { // This test has a column of width 3 (three characters plus the space) // followed by a column of width 2.