spotless

deephaven · Nov 4, 2024 · 66cd8a7 · 66cd8a7
1 parent 8bd5ae1
commit 66cd8a7
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 64 deletions.
diff --git a/src/main/java/io/deephaven/csv/CsvSpecs.java b/src/main/java/io/deephaven/csv/CsvSpecs.java
@@ -126,23 +126,22 @@ public interface Builder {
          * When {@link #hasFixedWidthColumns} is set, the library either determines the column widths from the header
          * row (provided {@link #hasHeaderRow} is set), or the column widths can be specified explictly by the caller.
          * If the caller wants to specify them explicitly, they can use this method.
+         * 
          * @param fixedColumnWidths The caller-specified widths of the columns.
          */
         Builder fixedColumnWidths(Iterable<Integer> fixedColumnWidths);
 
         /**
-         * This setting controls what units fixed width columns are measured in.
-         * When true, fixed width columns are measured in Unicode code points.
-         * When false, fixed width columns are measured in UTF-16 units (aka Java chars).
-         * The difference arises when encountering characters outside the Unicode Basic Multilingual Plane.
-         * For example, the Unicode code point 💔 (U+1F494) is one Unicode code point, but takes
-         * two Java chars to represent. Along these lines, the string 💔💔💔 would fit in a column of width 3
-         * when utf32CountingMode is true, but would require a column width of at least 6 when utf32CountingMode
-         * is false.
+         * This setting controls what units fixed width columns are measured in. When true, fixed width columns are
+         * measured in Unicode code points. When false, fixed width columns are measured in UTF-16 units (aka Java
+         * chars). The difference arises when encountering characters outside the Unicode Basic Multilingual Plane. For
+         * example, the Unicode code point 💔 (U+1F494) is one Unicode code point, but takes two Java chars to
+         * represent. Along these lines, the string 💔💔💔 would fit in a column of width 3 when utf32CountingMode is
+         * true, but would require a column width of at least 6 when utf32CountingMode is false.
          *
-         * The default setting of true is arguably more natural for users (the number of characters they see
-         * matches the visual width of the column). But some programs may want the value of false because they
-         * are counting Java chars.
+         * The default setting of true is arguably more natural for users (the number of characters they see matches the
+         * visual width of the column). But some programs may want the value of false because they are counting Java
+         * chars.
          */
         Builder useUtf32CountingConvention(boolean useUtf32CountingConvention);
 

diff --git a/src/main/java/io/deephaven/csv/reading/CsvReader.java b/src/main/java/io/deephaven/csv/reading/CsvReader.java
@@ -65,9 +65,8 @@ private CsvReader() {}
      */
     public static Result read(final CsvSpecs specs, final InputStream stream, final SinkFactory sinkFactory)
             throws CsvReaderException {
-        return specs.hasFixedWidthColumns() ?
-                fixedReadLogic(specs, stream, sinkFactory) :
-                delimitedReadLogic(specs, stream, sinkFactory);
+        return specs.hasFixedWidthColumns() ? fixedReadLogic(specs, stream, sinkFactory)
+                : delimitedReadLogic(specs, stream, sinkFactory);
     }
 
     private static Result delimitedReadLogic(

diff --git a/src/main/java/io/deephaven/csv/reading/ReaderUtil.java b/src/main/java/io/deephaven/csv/reading/ReaderUtil.java
@@ -32,12 +32,12 @@ public static void trimWhitespace(final ByteSlice cs) {
     }
 
     /**
-     * Get the expected length of a UTF-8 sequence, given its first byte, and its
-     * corresponding length in the specified units (UTF-16 or UTF-32).
+     * Get the expected length of a UTF-8 sequence, given its first byte, and its corresponding length in the specified
+     * units (UTF-16 or UTF-32).
+     * 
      * @param firstByte The first byte of the UTF-8 sequence.
-     * @param numBytes The number of remaining bytes in the input field (including firstByte). If the UTF-8
-     *                 sequence specifies a number of bytes larger than the number of remaining bytes, an
-     *                 exception is thrown.
+     * @param numBytes The number of remaining bytes in the input field (including firstByte). If the UTF-8 sequence
+     *        specifies a number of bytes larger than the number of remaining bytes, an exception is thrown.
      * @param useUtf32CountingConvention Whether 'charCountResult' should be in units of UTF-32 or UTF-16.
      * @param charCountResult The number of UTF-32 or UTF-16 units specified by the UTF-8 character.
      * @return The length of the UTF-8 sequence.

diff --git a/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/FixedCellGrabber.java
@@ -9,19 +9,20 @@
 import java.io.InputStream;
 
 /**
- * This class uses an underlying DelimitedCellGrabber to grab whole lines at a time from the input stream,
- * and then it breaks them into fixed-sized cells to return to the caller.
+ * This class uses an underlying DelimitedCellGrabber to grab whole lines at a time from the input stream, and then it
+ * breaks them into fixed-sized cells to return to the caller.
  */
 public class FixedCellGrabber implements CellGrabber {
     /**
-     * Makes a degenerate CellGrabber that has no delimiters or quotes and therefore returns whole lines.
-     * This is a somewhat quick-and-dirty way to reuse the buffering and newline logic in DelimitedCellGrabber
-     * without rewriting it.
+     * Makes a degenerate CellGrabber that has no delimiters or quotes and therefore returns whole lines. This is a
+     * somewhat quick-and-dirty way to reuse the buffering and newline logic in DelimitedCellGrabber without rewriting
+     * it.
+     * 
      * @param stream The underlying stream.
      * @return The "line grabber"
      */
     public static CellGrabber makeLineGrabber(InputStream stream) {
-        final byte IllegalUtf8 = (byte)0xff;
+        final byte IllegalUtf8 = (byte) 0xff;
         return new DelimitedCellGrabber(stream, IllegalUtf8, IllegalUtf8, true, false);
     }
 
@@ -37,7 +38,7 @@ public static CellGrabber makeLineGrabber(InputStream stream) {
 
     /** Constructor. */
     public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths, boolean ignoreSurroundingSpaces,
-                            boolean utf32CountingMode) {
+            boolean utf32CountingMode) {
         this.lineGrabber = lineGrabber;
         this.columnWidths = columnWidths;
         this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
@@ -50,7 +51,8 @@ public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths,
     }
 
     @Override
-    public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean endOfInput) throws CsvReaderException {
+    public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean endOfInput)
+            throws CsvReaderException {
         if (needsUnderlyingRefresh) {
             // Underlying row used up, and all columns provided. Ask underlying CellGrabber for the next line.
             lineGrabber.grabNext(rowText, dummy1, endOfInput);
@@ -66,7 +68,7 @@ public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean en
         }
 
         // There is data to return. Count off N characters. The final column gets all remaining characters.
-        final boolean lastCol  = colIndex == columnWidths.length - 1;
+        final boolean lastCol = colIndex == columnWidths.length - 1;
         final int numCharsToTake = lastCol ? Integer.MAX_VALUE : columnWidths[colIndex];
         takeNCharactersInCharset(rowText, dest, numCharsToTake, utf32CountingMode, dummy2);
         ++colIndex;
@@ -80,7 +82,7 @@ public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean en
     }
 
     private static void takeNCharactersInCharset(ByteSlice src, ByteSlice dest, int numCharsToTake,
-                                                 boolean utf32CountingMode, MutableInt tempInt) {
+            boolean utf32CountingMode, MutableInt tempInt) {
         final byte[] data = src.data();
         final int cellBegin = src.begin();
         int current = cellBegin;

diff --git a/src/main/java/io/deephaven/csv/reading/headers/FixedHeaderFinder.java b/src/main/java/io/deephaven/csv/reading/headers/FixedHeaderFinder.java
@@ -44,17 +44,19 @@ public static String[] determineHeadersToUse(
                 }
                 --skipCount;
             }
-            final byte paddingByte = (byte)specs.delimiter();
+            final byte paddingByte = (byte) specs.delimiter();
             if (columnWidthsToUse.length == 0) {
                 // UNITS: UTF8 CHARACTERS
                 columnWidthsToUse = inferColumnWidths(headerRow, paddingByte, specs.useUtf32CountingConvention());
             }
 
             // DESIRED UNITS: UTF8 CHARACTERS
-            headersToUse = extractHeaders(headerRow, columnWidthsToUse, paddingByte, specs.useUtf32CountingConvention());
+            headersToUse =
+                    extractHeaders(headerRow, columnWidthsToUse, paddingByte, specs.useUtf32CountingConvention());
         } else {
             if (columnWidthsToUse.length == 0) {
-                throw new CsvReaderException("Can't proceed because hasHeaderRow is false but fixedColumnWidths is unspecified");
+                throw new CsvReaderException(
+                        "Can't proceed because hasHeaderRow is false but fixedColumnWidths is unspecified");
             }
             headersToUse = ReaderUtil.makeSyntheticHeaders(columnWidthsToUse.length);
         }
@@ -99,7 +101,8 @@ private static int[] inferColumnWidths(ByteSlice row, byte delimiterAsByte, bool
             boolean thisCharIsDelimiter = ch == delimiterAsByte;
             if (currentIndex == row.begin() && thisCharIsDelimiter) {
                 throw new IllegalArgumentException(
-                        String.format("Header row cannot start with the delimiter character '%c'", (char)delimiterAsByte));
+                        String.format("Header row cannot start with the delimiter character '%c'",
+                                (char) delimiterAsByte));
             }
             if (!thisCharIsDelimiter && prevCharIsDelimiter) {
                 columnWidths.add(numChars);
@@ -115,7 +118,7 @@ private static int[] inferColumnWidths(ByteSlice row, byte delimiterAsByte, bool
 
     // UNITS: UTF8 CHARACTERS
     private static String[] extractHeaders(ByteSlice row, int[] columnWidths, byte paddingByte,
-                                           boolean utf32CountingMode) {
+            boolean utf32CountingMode) {
         final int numCols = columnWidths.length;
         if (numCols == 0) {
             return new String[0];
@@ -140,11 +143,12 @@ private static String[] extractHeaders(ByteSlice row, int[] columnWidths, byte p
     }
 
     private static int charWidthsToByteWidths(ByteSlice row, int[] charWidths, boolean utf32CountingMode,
-                                              int[] byteWidths) {
+            int[] byteWidths) {
         int numCols = charWidths.length;
         if (byteWidths.length != numCols) {
-            throw new IllegalArgumentException(String.format("Expected charWidths.length (%d) == byteWidths.length (%d)",
-                    charWidths.length, byteWidths.length));
+            throw new IllegalArgumentException(
+                    String.format("Expected charWidths.length (%d) == byteWidths.length (%d)",
+                            charWidths.length, byteWidths.length));
         }
         final MutableInt charCountResult = new MutableInt();
         final byte[] data = row.data();

diff --git a/src/test/java/io/deephaven/csv/CsvReaderTest.java b/src/test/java/io/deephaven/csv/CsvReaderTest.java
@@ -1870,8 +1870,8 @@ public void colnumPassedThrough() throws CsvReaderException {
     }
 
     /**
-     * Addresses <a href="https://github.com/deephaven/deephaven-csv/issues/212"> A user requested that the library
-     * be able to read files like this.
+     * Addresses <a href="https://github.com/deephaven/deephaven-csv/issues/212"> A user requested that the library be
+     * able to read files like this.
      */
     @Test
     public void bug212() throws CsvReaderException {
@@ -1890,7 +1890,8 @@ public void bug212() throws CsvReaderException {
                 Column.ofRefs("NAME", "argo-events", "argo-workflows", "argocd", "beta"),
                 Column.ofRefs("STATUS", "Not Active", "Active", "Active", "Not Active"),
                 Column.ofRefs("AGE", "2y77d", "2y77d", "5y18d", "4y235d"),
-                Column.ofRefs("LABELS", "app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events",
+                Column.ofRefs("LABELS",
+                        "app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events",
                         "app.kubernetes.io/instance=argo-workflows,kubernetes.io/metadata.name=argo-workflows",
                         "kubernetes.io/metadata.name=argocd",
                         "kubernetes.io/metadata.name=beta"));
@@ -1914,13 +1915,15 @@ public void simpleFixedColumnWidths() throws CsvReaderException {
                         Column.ofValues("Price", 0.25, 0.15, 0.18),
                         Column.ofValues("SecurityId", 200, 300, 500));
 
-        final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build();
+        final CsvSpecs specs =
+                defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build();
 
         invokeTest(specs, input, expected);
     }
 
     /**
      * We allow data fields to fill the whole cell, without a padding character
+     * 
      * @throws CsvReaderException
      */
     @Test
@@ -1938,7 +1941,8 @@ public void fixedColumnWidthsFullCell() throws CsvReaderException {
                         Column.ofValues("Price", 0.25, 0.15),
                         Column.ofValues("SecurityId", 200, 300));
 
-        final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build();
+        final CsvSpecs specs =
+                defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ').ignoreSurroundingSpaces(true).build();
         invokeTest(specs, input, expected);
     }
 
@@ -1963,7 +1967,7 @@ public void fixedColumnWidthsShortRows(boolean allowMissingColumns) throws CsvRe
                         Column.ofValues("Price", Sentinels.NULL_DOUBLE, 0.15, 0.18, Sentinels.NULL_DOUBLE),
                         Column.ofValues("SecurityId", Sentinels.NULL_INT, 300, 500, Sentinels.NULL_INT));
 
-        final CsvSpecs specs  = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ')
+        final CsvSpecs specs = defaultCsvBuilder().hasFixedWidthColumns(true).delimiter(' ')
                 .ignoreSurroundingSpaces(true).allowMissingColumns(allowMissingColumns).build();
 
         if (allowMissingColumns) {
@@ -1975,11 +1979,11 @@ public void fixedColumnWidthsShortRows(boolean allowMissingColumns) throws CsvRe
     }
 
     /**
-     * All six Unicode characters ♡♥❥❦◑╳ are in the Basic Multilingual Plane and can all be represented
-     * with a single Java char. Therefore, they are counted the same with both counting conventions.
+     * All six Unicode characters ♡♥❥❦◑╳ are in the Basic Multilingual Plane and can all be represented with a single
+     * Java char. Therefore, they are counted the same with both counting conventions.
      */
     @ParameterizedTest
-    @ValueSource(booleans =  {false, true})
+    @ValueSource(booleans = {false, true})
     public void countsBMPCharactersTheSame(boolean useUtf32CountingConvention) throws CsvReaderException {
         final String input =
                 ""
@@ -2001,13 +2005,13 @@ public void countsBMPCharactersTheSame(boolean useUtf32CountingConvention) throw
     }
 
     /**
-     * All six Unicode characters 🥰😻🧡💓💕💖 are _outside_ the Basic Multilingual Plane and all are represented
-     * with two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller
-     * uses UTF-32 counting convention. They will not fit in the column if the caller uses the UTF-16 counting
-     * convention (because it takes 12 Java chars to express them).
+     * All six Unicode characters 🥰😻🧡💓💕💖 are _outside_ the Basic Multilingual Plane and all are represented with
+     * two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller uses UTF-32
+     * counting convention. They will not fit in the column if the caller uses the UTF-16 counting convention (because
+     * it takes 12 Java chars to express them).
      */
     @ParameterizedTest
-    @ValueSource(booleans =  {false, true})
+    @ValueSource(booleans = {false, true})
     public void countsNonBMPCharactersDifferently(boolean useUtf32CountingConvention) throws CsvReaderException {
         final String input =
                 ""
@@ -2034,17 +2038,15 @@ public void countsNonBMPCharactersDifferently(boolean useUtf32CountingConvention
     }
 
     /**
-     * Using Unicode characters as column headers. We give one column a header with characters from the BMP
-     * and one with characters outside the BMP and show how the behavior differs depending on the
-     * useUtf32CountingConvention flag.
-     * ╔═╗
-     * All six Unicode characters 🥰😻🧡💓💕💖 are _outside_ the Basic Multilingual Plane and all are represented
-     * with two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller
-     * uses UTF-32 counting convention. They will not fit in the column if the caller uses the UTF-16 counting
-     * convention (because it takes 12 Java chars to express them).
+     * Using Unicode characters as column headers. We give one column a header with characters from the BMP and one with
+     * characters outside the BMP and show how the behavior differs depending on the useUtf32CountingConvention flag.
+     * ╔═╗ All six Unicode characters 🥰😻🧡💓💕💖 are _outside_ the Basic Multilingual Plane and all are represented
+     * with two Java chars. The Sym column has a width of six. They will fit in the "Sym" column if the caller uses
+     * UTF-32 counting convention. They will not fit in the column if the caller uses the UTF-16 counting convention
+     * (because it takes 12 Java chars to express them).
      */
     @ParameterizedTest
-    @ValueSource(booleans =  {false, true})
+    @ValueSource(booleans = {false, true})
     public void unicodeColumnHeaders(boolean useUtf32CountingConvention) throws CsvReaderException {
         // In the UTF-32 counting convention, this is a column of width 4 (three Unicode characters plus the space)
         // followed by a column of width 5. The first cell of the data would therefore be "abc", and the next cell
@@ -2077,12 +2079,12 @@ public void unicodeColumnHeaders(boolean useUtf32CountingConvention) throws CsvR
     }
 
     /**
-     * If the library is configured for the UTF-16 counting convention, and there is only one unit of space left
-     * in the field, and the next character is a character outside the Basic Multilingual Plane that requires two units,
-     * the library will include that character in the next field rather than this one.
+     * If the library is configured for the UTF-16 counting convention, and there is only one unit of space left in the
+     * field, and the next character is a character outside the Basic Multilingual Plane that requires two units, the
+     * library will include that character in the next field rather than this one.
      */
     @ParameterizedTest
-    @ValueSource(booleans =  {false, true})
+    @ValueSource(booleans = {false, true})
     public void brokenSurrogatePair(boolean useUtf32CountingConvention) throws CsvReaderException {
         // This test has a column of width 3 (three characters plus the space)
         // followed by a column of width 2.