From 757058be5e82fafefb6254277da8ed9de0530525 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Thu, 16 May 2024 16:24:26 -0500 Subject: [PATCH] Bugfix in URI conversion when working with directories (#5493) --- .../java/io/deephaven/base/FileUtils.java | 49 +++++++------ .../java/io/deephaven/base/FileUtilsTest.java | 68 +++++++++++++++++++ .../parquet/base/ParquetFileWriter.java | 1 - .../TrackedSeekableChannelsProvider.java | 2 +- ...TrackedSeekableChannelsProviderPlugin.java | 4 +- 5 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 Base/src/test/java/io/deephaven/base/FileUtilsTest.java diff --git a/Base/src/main/java/io/deephaven/base/FileUtils.java b/Base/src/main/java/io/deephaven/base/FileUtils.java index da11eef1289..721ae77b21b 100644 --- a/Base/src/main/java/io/deephaven/base/FileUtils.java +++ b/Base/src/main/java/io/deephaven/base/FileUtils.java @@ -39,6 +39,8 @@ public boolean accept(File dir, String name) { public static final Pattern REPEATED_URI_SEPARATOR_PATTERN = Pattern.compile("//+"); + public static final String FILE_URI_SCHEME = "file"; + /** * Cleans the specified path. All files and subdirectories in the path will be deleted. (ie you'll be left with an * empty directory). @@ -82,7 +84,7 @@ public static void deleteRecursively(File file) { /** * Move files accepted by a filter from their relative path under source to the same relative path under * destination. Creates missing destination subdirectories as needed. - * + * * @param source Must be a directory. * @param destination Must be a directory if it exists. * @param filter Applied to normal files, only. We recurse on directories automatically. @@ -129,7 +131,7 @@ private static void moveRecursivelyInternal(File source, File destination, FileF /** * Recursive delete method that copes with .nfs files. Uses the file's parent as the trash directory. - * + * * @param file */ public static void deleteRecursivelyOnNFS(File file) { @@ -138,7 +140,7 @@ public static void deleteRecursivelyOnNFS(File file) { /** * Recursive delete method that copes with .nfs files. - * + * * @param trashFile Filename to move regular files to before deletion. .nfs files may be created in its parent * directory. * @param fileToBeDeleted File or directory at which to begin recursive deletion. @@ -169,7 +171,7 @@ public static void deleteRecursivelyOnNFS(final File trashFile, final File fileT /** * Scan directory recursively to find all files - * + * * @param dir * @return */ @@ -282,21 +284,36 @@ public static URI convertToURI(final String source, final boolean isDirectory) { URI uri; try { uri = new URI(source); + if (uri.getScheme() == null) { + // Convert to a "file" URI + return convertToURI(new File(source), isDirectory); + } + if (uri.getScheme().equals(FILE_URI_SCHEME)) { + return convertToURI(new File(uri), isDirectory); + } + String path = uri.getPath(); + final boolean endsWithSlash = path.charAt(path.length() - 1) == URI_SEPARATOR_CHAR; + if (!isDirectory && endsWithSlash) { + throw new IllegalArgumentException("Non-directory URI should not end with a slash: " + uri); + } + boolean isUpdated = false; + if (isDirectory && !endsWithSlash) { + path = path + URI_SEPARATOR_CHAR; + isUpdated = true; + } // Replace two or more consecutive slashes in the path with a single slash - final String path = uri.getPath(); if (path.contains(REPEATED_URI_SEPARATOR)) { - final String canonicalizedPath = REPEATED_URI_SEPARATOR_PATTERN.matcher(path).replaceAll(URI_SEPARATOR); - uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), canonicalizedPath, - uri.getQuery(), uri.getFragment()); + path = REPEATED_URI_SEPARATOR_PATTERN.matcher(path).replaceAll(URI_SEPARATOR); + isUpdated = true; + } + if (isUpdated) { + uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), path, uri.getQuery(), + uri.getFragment()); } } catch (final URISyntaxException e) { // If the URI is invalid, assume it's a file path return convertToURI(new File(source), isDirectory); } - if (uri.getScheme() == null) { - // Convert to a "file" URI - return convertToURI(new File(source), isDirectory); - } return uri; } @@ -314,17 +331,11 @@ public static URI convertToURI(final File file, final boolean isDirectory) { if (File.separatorChar != URI_SEPARATOR_CHAR) { absPath = absPath.replace(File.separatorChar, URI_SEPARATOR_CHAR); } - if (absPath.charAt(0) != URI_SEPARATOR_CHAR) { - absPath = URI_SEPARATOR_CHAR + absPath; - } if (isDirectory && absPath.charAt(absPath.length() - 1) != URI_SEPARATOR_CHAR) { absPath = absPath + URI_SEPARATOR_CHAR; } - if (absPath.startsWith(REPEATED_URI_SEPARATOR)) { - absPath = REPEATED_URI_SEPARATOR + absPath; - } try { - return new URI("file", null, absPath, null); + return new URI(FILE_URI_SCHEME, null, absPath, null); } catch (final URISyntaxException e) { throw new IllegalStateException("Failed to convert file to URI: " + file, e); } diff --git a/Base/src/test/java/io/deephaven/base/FileUtilsTest.java b/Base/src/test/java/io/deephaven/base/FileUtilsTest.java new file mode 100644 index 00000000000..fc00e5802cb --- /dev/null +++ b/Base/src/test/java/io/deephaven/base/FileUtilsTest.java @@ -0,0 +1,68 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.base; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Path; + +import junit.framework.TestCase; +import org.junit.Assert; + +public class FileUtilsTest extends TestCase { + + public void testConvertToFileURI() throws IOException { + final File currentDir = new File("").getAbsoluteFile(); + fileUriTestHelper(currentDir.toString(), true, currentDir.toURI().toString()); + + final File someFile = new File(currentDir, "tempFile"); + fileUriTestHelper(someFile.getPath(), false, someFile.toURI().toString()); + + // Check if trailing slash gets added for a directory + final String expectedDirURI = "file:" + currentDir.getPath() + "/path/to/directory/"; + fileUriTestHelper(currentDir.getPath() + "/path/to/directory", true, expectedDirURI); + + // Check if multiple slashes get normalized + fileUriTestHelper(currentDir.getPath() + "////path///to////directory////", true, expectedDirURI); + + // Check if multiple slashes in the beginning get normalized + fileUriTestHelper("////" + currentDir.getPath() + "/path/to/directory", true, expectedDirURI); + + // Check for bad inputs for files with trailing slashes + final String expectedFileURI = someFile.toURI().toString(); + fileUriTestHelper(someFile.getPath() + "/", false, expectedFileURI); + Assert.assertEquals(expectedFileURI, + FileUtils.convertToURI("file:" + someFile.getPath() + "/", false).toString()); + } + + private static void fileUriTestHelper(final String filePath, final boolean isDirectory, + final String expectedURIString) { + Assert.assertEquals(expectedURIString, FileUtils.convertToURI(filePath, isDirectory).toString()); + Assert.assertEquals(expectedURIString, FileUtils.convertToURI(new File(filePath), isDirectory).toString()); + Assert.assertEquals(expectedURIString, FileUtils.convertToURI(Path.of(filePath), isDirectory).toString()); + } + + public void testConvertToS3URI() throws URISyntaxException { + Assert.assertEquals("s3://bucket/key", FileUtils.convertToURI("s3://bucket/key", false).toString()); + + // Check if trailing slash gets added for a directory + Assert.assertEquals("s3://bucket/key/".toString(), FileUtils.convertToURI("s3://bucket/key", true).toString()); + + // Check if multiple slashes get normalized + Assert.assertEquals("s3://bucket/key/", FileUtils.convertToURI("s3://bucket///key///", true).toString()); + + try { + FileUtils.convertToURI("", false); + Assert.fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException expected) { + } + + try { + FileUtils.convertToURI("s3://bucket/key/", false); + Assert.fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException expected) { + } + } +} diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java index f050c119dce..81dc13a4430 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java @@ -18,7 +18,6 @@ import org.apache.parquet.schema.MessageType; import org.jetbrains.annotations.NotNull; -import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; diff --git a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java index 38c17439e6b..4aec474721d 100644 --- a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java +++ b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProvider.java @@ -27,7 +27,7 @@ import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import java.util.stream.Stream; -import static io.deephaven.extensions.trackedfile.TrackedSeekableChannelsProviderPlugin.FILE_URI_SCHEME; +import static io.deephaven.base.FileUtils.FILE_URI_SCHEME; /** * {@link SeekableChannelsProvider} implementation that is constrained by a Deephaven {@link TrackedFileHandleFactory}. diff --git a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProviderPlugin.java b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProviderPlugin.java index f8098f4c717..32ec7a3564a 100644 --- a/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProviderPlugin.java +++ b/extensions/trackedfile/src/main/java/io/deephaven/extensions/trackedfile/TrackedSeekableChannelsProviderPlugin.java @@ -12,14 +12,14 @@ import java.net.URI; +import static io.deephaven.base.FileUtils.FILE_URI_SCHEME; + /** * {@link SeekableChannelsProviderPlugin} implementation used for reading files from local disk. */ @AutoService(SeekableChannelsProviderPlugin.class) public final class TrackedSeekableChannelsProviderPlugin implements SeekableChannelsProviderPlugin { - static final String FILE_URI_SCHEME = "file"; - @Override public boolean isCompatible(@NotNull final URI uri, @Nullable final Object object) { return FILE_URI_SCHEME.equals(uri.getScheme());