From 72b39b1fb3c4f54d4ceb1efa8a6a23f8383525c4 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Wed, 15 Jan 2025 15:25:37 -0800 Subject: [PATCH 01/47] add initial lake fs based implementation --- core/build.sbt | 2 + core/file-storage-service/build.sbt | 61 ++++++++++ .../project/build.properties | 1 + .../storage/LakeFSApiClientInstance.scala | 57 +++++++++ .../ics/amber/storage/LakeFSFileStorage.scala | 112 ++++++++++++++++++ .../amber/storage/LakeFSFileStorageSpec.scala | 90 ++++++++++++++ 6 files changed, 323 insertions(+) create mode 100644 core/file-storage-service/build.sbt create mode 100644 core/file-storage-service/project/build.properties create mode 100644 core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala create mode 100644 core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala create mode 100644 core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala diff --git a/core/build.sbt b/core/build.sbt index 2f0ec2d44f..606d9242d6 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -3,6 +3,8 @@ lazy val WorkflowCore = (project in file("workflow-core")) .dependsOn(DAO) .configs(Test) .dependsOn(DAO % "test->test") // test scope dependency +lazy val FileStorageService = (project in file("file-storage-service")) + .dependsOn(WorkflowCore) lazy val WorkflowOperator = (project in file("workflow-operator")).dependsOn(WorkflowCore) lazy val WorkflowCompilingService = (project in file("workflow-compiling-service")) .dependsOn(WorkflowOperator) diff --git a/core/file-storage-service/build.sbt b/core/file-storage-service/build.sbt new file mode 100644 index 0000000000..2221d77816 --- /dev/null +++ b/core/file-storage-service/build.sbt @@ -0,0 +1,61 @@ +import scala.collection.Seq + +name := "file-storage-service" +organization := "edu.uci.ics" +version := "0.1.0" +scalaVersion := "2.13.12" + + +// Enable semanticdb for Scalafix +ThisBuild / semanticdbEnabled := true +ThisBuild / semanticdbVersion := scalafixSemanticdb.revision + +// Manage dependency conflicts by always using the latest revision +ThisBuild / conflictManager := ConflictManager.latestRevision + +// Restrict parallel execution of tests to avoid conflicts +Global / concurrentRestrictions += Tags.limit(Tags.Test, 1) + +///////////////////////////////////////////////////////////////////////////// +// Compiler Options +///////////////////////////////////////////////////////////////////////////// + +// Scala compiler options +Compile / scalacOptions ++= Seq( + "-Xelide-below", "WARNING", // Turn on optimizations with "WARNING" as the threshold + "-feature", // Check feature warnings + "-deprecation", // Check deprecation warnings + "-Ywarn-unused:imports" // Check for unused imports +) + +///////////////////////////////////////////////////////////////////////////// +// Version Variables +///////////////////////////////////////////////////////////////////////////// + +val dropwizardVersion = "4.0.7" +val mockitoVersion = "5.4.0" +val assertjVersion = "3.24.2" + +///////////////////////////////////////////////////////////////////////////// +// Test-related Dependencies +///////////////////////////////////////////////////////////////////////////// + +libraryDependencies ++= Seq( + "org.scalamock" %% "scalamock" % "5.2.0" % Test, // ScalaMock + "org.scalatest" %% "scalatest" % "3.2.17" % Test, // ScalaTest + "io.dropwizard" % "dropwizard-testing" % dropwizardVersion % Test, // Dropwizard Testing + "org.mockito" % "mockito-core" % mockitoVersion % Test, // Mockito for mocking + "org.assertj" % "assertj-core" % assertjVersion % Test, // AssertJ for assertions + "com.novocode" % "junit-interface" % "0.11" % Test // SBT interface for JUnit +) + +///////////////////////////////////////////////////////////////////////////// +// Dependencies +///////////////////////////////////////////////////////////////////////////// + +// Core Dependencies +libraryDependencies ++= Seq( + "io.dropwizard" % "dropwizard-core" % dropwizardVersion, + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.15.2", + "io.lakefs" % "sdk" % "1.48.0" +) diff --git a/core/file-storage-service/project/build.properties b/core/file-storage-service/project/build.properties new file mode 100644 index 0000000000..fe69360b7c --- /dev/null +++ b/core/file-storage-service/project/build.properties @@ -0,0 +1 @@ +sbt.version = 1.10.7 diff --git a/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala b/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala new file mode 100644 index 0000000000..5bfe8d4045 --- /dev/null +++ b/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala @@ -0,0 +1,57 @@ +package edu.uci.ics.amber.storage + +import io.lakefs.clients.sdk.{ApiClient, ServerConfiguration, ServerVariable} + +import java.util +import scala.jdk.CollectionConverters._ + +/** + * LakeFSApiClientInstance is a singleton that manages the LakeFS ApiClient instance. + * - Provides a single shared ApiClient for all LakeFS operations in the Texera application. + * - Lazily initializes the client on first access. + * - Supports replacing the client instance primarily for testing or reconfiguration. + */ +object LakeFSApiClientInstance { + + private var instance: Option[ApiClient] = None + + // Constant server configuration list + private val servers: List[ServerConfiguration] = List( + new ServerConfiguration( + "http://127.0.0.1:8000/api/v1", + "lakeFS API server endpoint", + new util.HashMap[String, ServerVariable]() + ) + ) + + private val username: String = "AKIAIOSFOLQUICKSTART" + private val password: String = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + + /** + * Retrieves the singleton LakeFS ApiClient instance. + * - If the client is not initialized, it is lazily created using the configured properties. + * @return the ApiClient instance. + */ + def getInstance(): ApiClient = { + instance match { + case Some(client) => client + case None => + val apiClient = new ApiClient() + apiClient.setUsername(username) + apiClient.setPassword(password) + apiClient.setServers(servers.asJava) + instance = Some(apiClient) + apiClient + } + } + + /** + * Replaces the existing LakeFS ApiClient instance. + * - This method is useful for testing or dynamically updating the client. + * + * @param apiClient the new ApiClient instance to replace the current one. + */ + def replaceInstance(apiClient: ApiClient): Unit = { + instance = Some(apiClient) + } +} \ No newline at end of file diff --git a/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala b/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala new file mode 100644 index 0000000000..28aa17ef3e --- /dev/null +++ b/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala @@ -0,0 +1,112 @@ +package edu.uci.ics.amber.storage + +import io.lakefs.clients.sdk._ +import io.lakefs.clients.sdk.model._ + +import java.io.{File, FileOutputStream, InputStream, OutputStream} +import java.nio.file.{Files, Path} +import scala.jdk.CollectionConverters._ + +/** + * LakeFSFileStorage provides high-level file storage operations using LakeFS, + * similar to Git operations for version control and file management. + */ +object LakeFSFileStorage { + + // Lazy initialization of LakeFS API clients + private lazy val apiClient: ApiClient = LakeFSApiClientInstance.getInstance() + private lazy val repoApi: RepositoriesApi = new RepositoriesApi(apiClient) + private lazy val objectsApi: ObjectsApi = new ObjectsApi(apiClient) + private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) + + /** + * Initializes a new repository in LakeFS. + * + * @param repoName Name of the repository. + * @param storageNamespace Storage path (e.g., "s3://bucket-name/"). + * @param defaultBranch Default branch name, usually "main". + */ + def initRepo(repoName: String, storageNamespace: String, defaultBranch: String = "main"): Repository = { + val repo = new RepositoryCreation() + .name(repoName) + .storageNamespace(storageNamespace) + .defaultBranch(defaultBranch) + + repoApi.createRepository(repo).execute() + } + + /** + * Writes a file to the repository (similar to Git add). + * Converts the InputStream to a temporary file for upload. + * + * @param repoName Repository name. + * @param branch Branch name. + * @param filePath Path in the repository. + * @param inputStream File content stream. + */ + def writeFileToRepo(repoName: String, branch: String, filePath: String, inputStream: InputStream): ObjectStats = { + val tempFilePath = Files.createTempFile("lakefs-upload-", ".tmp") + val tempFileStream = new FileOutputStream(tempFilePath.toFile) + val buffer = new Array[Byte](1024) + + // Create an iterator to repeatedly call inputStream.read, and direct buffered data to file + Iterator + .continually(inputStream.read(buffer)) + .takeWhile(_ != -1) + .foreach(tempFileStream.write(buffer, 0, _)) + + inputStream.close() + tempFileStream.close() + + // Upload the temporary file to LakeFS + objectsApi.uploadObject(repoName, branch, filePath).content(tempFilePath.toFile).execute() + } + + /** + * Removes a file from the repository (similar to Git rm). + * + * @param repoName Repository name. + * @param branch Branch name. + * @param filePath Path in the repository to delete. + */ + def removeFileFromRepo(repoName: String, branch: String, filePath: String): Unit = { + objectsApi.deleteObject(repoName, branch, filePath).execute() + } + + /** + * Executes operations and creates a commit (similar to a transactional commit). + * + * @param repoName Repository name. + * @param branch Branch name. + * @param commitMessage Commit message. + * @param operations File operations to perform before committing. + */ + def withCreateVersion(repoName: String, branch: String, commitMessage: String)(operations: => Unit): Unit = { + operations + val commit = new CommitCreation() + .message(commitMessage) + + commitsApi.commit(repoName, branch, commit).execute() + } + + /** + * Retrieves file content from a specific commit and path. + * + * @param repoName Repository name. + * @param commitHash Commit hash of the version. + * @param filePath Path to the file in the repository. + * @param outputStream OutputStream to write the content. + */ + def retrieveFileContent(repoName: String, commitHash: String, filePath: String, outputStream: OutputStream): File = { + objectsApi.getObject(repoName, commitHash, filePath).execute() + } + + /** + * Deletes an entire repository. + * + * @param repoName Name of the repository to delete. + */ + def deleteRepo(repoName: String): Unit = { + repoApi.deleteRepository(repoName).execute() + } +} \ No newline at end of file diff --git a/core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala b/core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala new file mode 100644 index 0000000000..ffac598072 --- /dev/null +++ b/core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala @@ -0,0 +1,90 @@ +package edu.uci.ics.amber.storage + +import org.scalatest.BeforeAndAfter +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} +import java.nio.file.{Files, Path} +import scala.jdk.CollectionConverters._ + +class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfter { + + val repoName = "test-repo" + val branchName = "main" + val filePath = "testFile.txt" + + var commitHashes: List[String] = List.empty + + val testFileContentV1 = "This is test file version 1" + val testFileContentV2 = "This is test file version 2" + + before { + // Initialize the repository + LakeFSFileStorage.initRepo(repoName, "s3://test-bucket/") + } + + after { + // Delete the repository + LakeFSFileStorage.deleteRepo(repoName) + } + + private def writeFile(content: String): Unit = { + val inputStream = new ByteArrayInputStream(content.getBytes) + LakeFSFileStorage.writeFileToRepo(repoName, branchName, filePath, inputStream) + } + + "LakeFSFileStorage" should "write and retrieve file content across versions" in { + // Version 1 + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 1") { + writeFile(testFileContentV1) + } + + // Version 2 + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 2") { + writeFile(testFileContentV2) + } + + // Retrieve version 1 + val outputV1 = new ByteArrayOutputStream() + LakeFSFileStorage.retrieveFileContent(repoName, branchName, filePath, outputV1) + outputV1.toString should equal(testFileContentV2) // Latest content should be V2 + + // (Optional) Verify version 1 content by using specific commit hash if available + } + + it should "remove a file and verify its absence" in { + // Write and commit version 1 + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file") { + writeFile(testFileContentV1) + } + + // Remove the file and commit + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Remove file") { + LakeFSFileStorage.removeFileFromRepo(repoName, branchName, filePath) + } + + // Attempt to retrieve the deleted file (expect failure) + val output = new ByteArrayOutputStream() + intercept[Exception] { + LakeFSFileStorage.retrieveFileContent(repoName, branchName, filePath, output) + } + } + + it should "handle multiple versions correctly" in { + // Version 1 + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 1") { + writeFile(testFileContentV1) + } + + // Version 2 + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 2") { + writeFile(testFileContentV2) + } + + // Retrieve the latest version content (should be V2) + val outputLatest = new ByteArrayOutputStream() + LakeFSFileStorage.retrieveFileContent(repoName, branchName, filePath, outputLatest) + outputLatest.toString should equal(testFileContentV2) + } +} \ No newline at end of file From 47fe1aba9103d62984f11534b018a7239e271953 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Wed, 15 Jan 2025 21:46:25 -0800 Subject: [PATCH 02/47] move lakefs logic to workflow core --- core/file-storage-service/build.sbt | 61 --------- .../project/build.properties | 1 - .../amber/storage/LakeFSFileStorageSpec.scala | 90 ------------- core/workflow-core/build.sbt | 1 + .../storage/LakeFSApiClientInstance.scala | 2 +- .../core}/storage/LakeFSFileStorage.scala | 27 ++-- .../amber/core/storage/VFSURIFactory.scala | 10 ++ .../storage/model/LakeFSFileDocument.scala | 5 + .../amber/storage/LakeFSFileStorageSpec.scala | 122 ++++++++++++++++++ 9 files changed, 157 insertions(+), 162 deletions(-) delete mode 100644 core/file-storage-service/build.sbt delete mode 100644 core/file-storage-service/project/build.properties delete mode 100644 core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala rename core/{file-storage-service/src/main/scala/edu/uci/ics/amber => workflow-core/src/main/scala/edu/uci/ics/amber/core}/storage/LakeFSApiClientInstance.scala (97%) rename core/{file-storage-service/src/main/scala/edu/uci/ics/amber => workflow-core/src/main/scala/edu/uci/ics/amber/core}/storage/LakeFSFileStorage.scala (81%) create mode 100644 core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala create mode 100644 core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala diff --git a/core/file-storage-service/build.sbt b/core/file-storage-service/build.sbt deleted file mode 100644 index 2221d77816..0000000000 --- a/core/file-storage-service/build.sbt +++ /dev/null @@ -1,61 +0,0 @@ -import scala.collection.Seq - -name := "file-storage-service" -organization := "edu.uci.ics" -version := "0.1.0" -scalaVersion := "2.13.12" - - -// Enable semanticdb for Scalafix -ThisBuild / semanticdbEnabled := true -ThisBuild / semanticdbVersion := scalafixSemanticdb.revision - -// Manage dependency conflicts by always using the latest revision -ThisBuild / conflictManager := ConflictManager.latestRevision - -// Restrict parallel execution of tests to avoid conflicts -Global / concurrentRestrictions += Tags.limit(Tags.Test, 1) - -///////////////////////////////////////////////////////////////////////////// -// Compiler Options -///////////////////////////////////////////////////////////////////////////// - -// Scala compiler options -Compile / scalacOptions ++= Seq( - "-Xelide-below", "WARNING", // Turn on optimizations with "WARNING" as the threshold - "-feature", // Check feature warnings - "-deprecation", // Check deprecation warnings - "-Ywarn-unused:imports" // Check for unused imports -) - -///////////////////////////////////////////////////////////////////////////// -// Version Variables -///////////////////////////////////////////////////////////////////////////// - -val dropwizardVersion = "4.0.7" -val mockitoVersion = "5.4.0" -val assertjVersion = "3.24.2" - -///////////////////////////////////////////////////////////////////////////// -// Test-related Dependencies -///////////////////////////////////////////////////////////////////////////// - -libraryDependencies ++= Seq( - "org.scalamock" %% "scalamock" % "5.2.0" % Test, // ScalaMock - "org.scalatest" %% "scalatest" % "3.2.17" % Test, // ScalaTest - "io.dropwizard" % "dropwizard-testing" % dropwizardVersion % Test, // Dropwizard Testing - "org.mockito" % "mockito-core" % mockitoVersion % Test, // Mockito for mocking - "org.assertj" % "assertj-core" % assertjVersion % Test, // AssertJ for assertions - "com.novocode" % "junit-interface" % "0.11" % Test // SBT interface for JUnit -) - -///////////////////////////////////////////////////////////////////////////// -// Dependencies -///////////////////////////////////////////////////////////////////////////// - -// Core Dependencies -libraryDependencies ++= Seq( - "io.dropwizard" % "dropwizard-core" % dropwizardVersion, - "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.15.2", - "io.lakefs" % "sdk" % "1.48.0" -) diff --git a/core/file-storage-service/project/build.properties b/core/file-storage-service/project/build.properties deleted file mode 100644 index fe69360b7c..0000000000 --- a/core/file-storage-service/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version = 1.10.7 diff --git a/core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala b/core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala deleted file mode 100644 index ffac598072..0000000000 --- a/core/file-storage-service/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala +++ /dev/null @@ -1,90 +0,0 @@ -package edu.uci.ics.amber.storage - -import org.scalatest.BeforeAndAfter -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers - -import java.io.{ByteArrayInputStream, ByteArrayOutputStream} -import java.nio.file.{Files, Path} -import scala.jdk.CollectionConverters._ - -class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfter { - - val repoName = "test-repo" - val branchName = "main" - val filePath = "testFile.txt" - - var commitHashes: List[String] = List.empty - - val testFileContentV1 = "This is test file version 1" - val testFileContentV2 = "This is test file version 2" - - before { - // Initialize the repository - LakeFSFileStorage.initRepo(repoName, "s3://test-bucket/") - } - - after { - // Delete the repository - LakeFSFileStorage.deleteRepo(repoName) - } - - private def writeFile(content: String): Unit = { - val inputStream = new ByteArrayInputStream(content.getBytes) - LakeFSFileStorage.writeFileToRepo(repoName, branchName, filePath, inputStream) - } - - "LakeFSFileStorage" should "write and retrieve file content across versions" in { - // Version 1 - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 1") { - writeFile(testFileContentV1) - } - - // Version 2 - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 2") { - writeFile(testFileContentV2) - } - - // Retrieve version 1 - val outputV1 = new ByteArrayOutputStream() - LakeFSFileStorage.retrieveFileContent(repoName, branchName, filePath, outputV1) - outputV1.toString should equal(testFileContentV2) // Latest content should be V2 - - // (Optional) Verify version 1 content by using specific commit hash if available - } - - it should "remove a file and verify its absence" in { - // Write and commit version 1 - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file") { - writeFile(testFileContentV1) - } - - // Remove the file and commit - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Remove file") { - LakeFSFileStorage.removeFileFromRepo(repoName, branchName, filePath) - } - - // Attempt to retrieve the deleted file (expect failure) - val output = new ByteArrayOutputStream() - intercept[Exception] { - LakeFSFileStorage.retrieveFileContent(repoName, branchName, filePath, output) - } - } - - it should "handle multiple versions correctly" in { - // Version 1 - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 1") { - writeFile(testFileContentV1) - } - - // Version 2 - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Version 2") { - writeFile(testFileContentV2) - } - - // Retrieve the latest version content (should be V2) - val outputLatest = new ByteArrayOutputStream() - LakeFSFileStorage.retrieveFileContent(repoName, branchName, filePath, outputLatest) - outputLatest.toString should equal(testFileContentV2) - } -} \ No newline at end of file diff --git a/core/workflow-core/build.sbt b/core/workflow-core/build.sbt index e3a2ebb7ee..34290d1ce5 100644 --- a/core/workflow-core/build.sbt +++ b/core/workflow-core/build.sbt @@ -173,4 +173,5 @@ libraryDependencies ++= Seq( "org.eclipse.jgit" % "org.eclipse.jgit" % "5.13.0.202109080827-r", // jgit "org.yaml" % "snakeyaml" % "1.30", // yaml reader (downgrade to 1.30 due to dropwizard 1.3.23 required by amber) "org.apache.commons" % "commons-vfs2" % "2.9.0", // for FileResolver throw VFS-related exceptions + "io.lakefs" % "sdk" % "1.48.0" ) \ No newline at end of file diff --git a/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala similarity index 97% rename from core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala rename to core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala index 5bfe8d4045..c8bd94ddf0 100644 --- a/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSApiClientInstance.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.amber.storage +package edu.uci.ics.amber.core.storage import io.lakefs.clients.sdk.{ApiClient, ServerConfiguration, ServerVariable} diff --git a/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala similarity index 81% rename from core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala rename to core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index 28aa17ef3e..cf8df02958 100644 --- a/core/file-storage-service/src/main/scala/edu/uci/ics/amber/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -1,10 +1,10 @@ -package edu.uci.ics.amber.storage +package edu.uci.ics.amber.core.storage import io.lakefs.clients.sdk._ import io.lakefs.clients.sdk.model._ -import java.io.{File, FileOutputStream, InputStream, OutputStream} -import java.nio.file.{Files, Path} +import java.io.{File, FileOutputStream, InputStream} +import java.nio.file.Files import scala.jdk.CollectionConverters._ /** @@ -18,6 +18,7 @@ object LakeFSFileStorage { private lazy val repoApi: RepositoriesApi = new RepositoriesApi(apiClient) private lazy val objectsApi: ObjectsApi = new ObjectsApi(apiClient) private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) + private lazy val refsApi: RefsApi = new RefsApi(apiClient) /** * Initializes a new repository in LakeFS. @@ -31,6 +32,7 @@ object LakeFSFileStorage { .name(repoName) .storageNamespace(storageNamespace) .defaultBranch(defaultBranch) + .sampleData(false) repoApi.createRepository(repo).execute() } @@ -76,12 +78,12 @@ object LakeFSFileStorage { /** * Executes operations and creates a commit (similar to a transactional commit). * - * @param repoName Repository name. - * @param branch Branch name. + * @param repoName Repository name. + * @param branch Branch name. * @param commitMessage Commit message. - * @param operations File operations to perform before committing. + * @param operations File operations to perform before committing. */ - def withCreateVersion(repoName: String, branch: String, commitMessage: String)(operations: => Unit): Unit = { + def withCreateVersion(repoName: String, branch: String, commitMessage: String)(operations: => Unit): Commit = { operations val commit = new CommitCreation() .message(commitMessage) @@ -95,9 +97,8 @@ object LakeFSFileStorage { * @param repoName Repository name. * @param commitHash Commit hash of the version. * @param filePath Path to the file in the repository. - * @param outputStream OutputStream to write the content. */ - def retrieveFileContent(repoName: String, commitHash: String, filePath: String, outputStream: OutputStream): File = { + def retrieveFileContent(repoName: String, commitHash: String, filePath: String): File = { objectsApi.getObject(repoName, commitHash, filePath).execute() } @@ -109,4 +110,12 @@ object LakeFSFileStorage { def deleteRepo(repoName: String): Unit = { repoApi.deleteRepository(repoName).execute() } + + def retrieveVersionsOfRepository(repoName: String, branchName: String): List[Commit] = { + refsApi.logCommits(repoName, branchName).execute().getResults.asScala.toList + } + + def retrieveObjectsOfVersion(repoName: String, commitHash: String): List[ObjectStats] = { + objectsApi.listObjects(repoName, commitHash).execute().getResults.asScala.toList + } } \ No newline at end of file diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala index b822144a2a..51d162ed40 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala @@ -6,6 +6,7 @@ import edu.uci.ics.amber.core.virtualidentity.{ WorkflowIdentity } import edu.uci.ics.amber.core.workflow.PortIdentity +import io.lakefs.clients.sdk.model.ObjectStats import java.net.URI @@ -17,6 +18,15 @@ object VFSResourceType extends Enumeration { object VFSURIFactory { val VFS_FILE_URI_SCHEME = "vfs" + val LAKEFS_FILE_URI_SCHEME = "lakefs" + + def createLakeFSObjectURI(repoName: String, commitHash: String, obj: ObjectStats): String = { + s"${LAKEFS_FILE_URI_SCHEME}://$repoName/$commitHash/${obj.getPath}" + } + + def decodeLakeFSURI(uri: URI): (String, String, String, String) = { + + } /** * Parses a VFS URI and extracts its components diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala new file mode 100644 index 0000000000..7dc3dd9e63 --- /dev/null +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala @@ -0,0 +1,5 @@ +package edu.uci.ics.amber.core.storage.model + +class LakeFSFileDocument { + +} diff --git a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala new file mode 100644 index 0000000000..8d34feb2c8 --- /dev/null +++ b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala @@ -0,0 +1,122 @@ +package edu.uci.ics.amber.storage + +import edu.uci.ics.amber.core.storage.LakeFSFileStorage +import org.scalatest.BeforeAndAfterAll +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.io.{ByteArrayInputStream, File} +import java.nio.file.Files +import java.util.UUID + +class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll { + + val repoName: String = UUID.randomUUID().toString + val branchName = "main" + + val fileContent1 = "Content of file 1" + val fileContent2 = "Content of file 2" + val fileContent3 = "Content of file 3" + val fileContent4 = "Content of file 4" + + val filePaths: Seq[String] = Seq( + "dir1/file1.txt", + "dir1/file2.txt", + "dir1/subdir1/file3.txt", + "dir2/file4.txt" + ) + + override def beforeAll(): Unit = { + LakeFSFileStorage.initRepo(repoName, s"local://$repoName") + } + + override def afterAll(): Unit = { + LakeFSFileStorage.deleteRepo(repoName) + } + + private def writeFile(filePath: String, content: String): Unit = { + val inputStream = new ByteArrayInputStream(content.getBytes) + LakeFSFileStorage.writeFileToRepo(repoName, branchName, filePath, inputStream) + } + + private def readFileContent(file: File): String = { + new String(Files.readAllBytes(file.toPath)) + } + + private def findCommitByMessage(message: String): Option[String] = { + LakeFSFileStorage.retrieveVersionsOfRepository(repoName, branchName) + .find(_.getMessage == message) + .map(_.getId) + } + + "LakeFSFileStorage" should "write multiple files and verify contents across versions" in { + // Version 1: Add file1.txt and file2.txt + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file1 and file2") { + writeFile(filePaths(0), fileContent1) + writeFile(filePaths(1), fileContent2) + } + + // Version 2: Add file3.txt + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file3") { + writeFile(filePaths(2), fileContent3) + } + + // Version 3: Add file4.txt + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file4") { + writeFile(filePaths(3), fileContent4) + } + + // Validate Version 1 + val commitV1 = findCommitByMessage("Add file1 and file2").get + val objectsV1 = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV1).map(_.getPath) + objectsV1 should contain allElementsOf Seq(filePaths(0), filePaths(1)) + objectsV1 should not contain filePaths(2) + + // Validate Version 2 + val commitV2 = findCommitByMessage("Add file3").get + val objectsV2 = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV2).map(_.getPath) + objectsV2 should contain allElementsOf Seq(filePaths(0), filePaths(1), filePaths(2)) + objectsV2 should not contain filePaths(3) + + // Validate Version 3 + val commitV3 = findCommitByMessage("Add file4").get + val objects = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV3) + val objectsV3 = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV3).map(_.getPath) + objectsV3 should contain allElementsOf filePaths + + // Verify content of file4.txt in the latest commit + val file4 = LakeFSFileStorage.retrieveFileContent(repoName, commitV3, filePaths(3)) + readFileContent(file4) should equal(fileContent4) + } + + it should "remove a file and verify its absence in the next version" in { + // Delete file2.txt and commit the change + LakeFSFileStorage.withCreateVersion(repoName, branchName, "Remove file2.txt") { + LakeFSFileStorage.removeFileFromRepo(repoName, branchName, filePaths(1)) + } + + // Locate the commit by message + val deleteCommit = findCommitByMessage("Remove file2.txt").get + + // Verify file2.txt is absent in the latest commit + val objectsAfterDeletion = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, deleteCommit).map(_.getPath) + objectsAfterDeletion should not contain filePaths(1) + + // Verify file1.txt is still present + val file1 = LakeFSFileStorage.retrieveFileContent(repoName, deleteCommit, filePaths(0)) + readFileContent(file1) should equal(fileContent1) + } + + it should "maintain hierarchical structure in file retrieval" in { + // Get the latest commit + val latestCommit = LakeFSFileStorage.retrieveVersionsOfRepository(repoName, branchName).head.getId + + // Retrieve all objects + val objects = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, latestCommit) + val objectPaths = objects.map(_.getPath) + + // Verify nested directories are intact + objectPaths should contain("dir1/subdir1/file3.txt") + objectPaths should contain("dir2/file4.txt") + } +} \ No newline at end of file From 013cc566bd32f26829a15adb34982e8619d2ff27 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Wed, 15 Jan 2025 22:10:22 -0800 Subject: [PATCH 03/47] add uri related and lake fs document --- .../ics/amber/core/storage/FileResolver.scala | 44 +++++++++- .../amber/core/storage/VFSURIFactory.scala | 9 -- .../storage/model/LakeFSFileDocument.scala | 83 ++++++++++++++++++- 3 files changed, 124 insertions(+), 12 deletions(-) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala index 18a5acd0c6..2ef3da1bff 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala @@ -20,6 +20,7 @@ import scala.util.{Success, Try} object FileResolver { val DATASET_FILE_URI_SCHEME = "dataset" + val LAKEFS_FILE_URI_SCHEME = "lakefs" /** * Resolves a given fileName to either a file on the local file system or a dataset file. @@ -32,7 +33,7 @@ object FileResolver { if (isFileResolved(fileName)) { return new URI(fileName) } - val resolvers: Seq[String => URI] = Seq(localResolveFunc, datasetResolveFunc) + val resolvers: Seq[String => URI] = Seq(localResolveFunc, datasetResolveFunc, lakeFSResolveFunc) // Try each resolver function in sequence resolvers @@ -135,6 +136,47 @@ object FileResolver { } } + /** + * Resolves a LakeFS file. + * + * Expected input: /repoName/commitHash/objectPath (objectPath can have nested directories) + * Resolved as: lakefs://repoName/commitHash/objectPath + */ + private def lakeFSResolveFunc(fileName: String): URI = { + // Ensure the URI has the lakefs scheme + val fullUri = if (isFileResolved(fileName)) { + new URI(fileName) + } else { + new URI(s"$LAKEFS_FILE_URI_SCHEME://${fileName.stripPrefix("/")}") + } + + // Validate the scheme + if (fullUri.getScheme != LAKEFS_FILE_URI_SCHEME) { + throw new FileNotFoundException(s"Invalid LakeFS scheme: ${fullUri.getScheme}") + } + + // Split the path and extract repoName, commitHash, and objectPath + val filePath = Paths.get(fullUri.getPath.stripPrefix("/")) + val pathSegments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray + + if (pathSegments.length < 3) { + throw new FileNotFoundException(s"Invalid LakeFS URI format: $fileName") + } + + val repoName = pathSegments(0) // repoName + val commitHash = pathSegments(1) // commitHash + val objectPath = Paths.get(pathSegments.drop(2).head, pathSegments.drop(2).tail: _*).toString + + try { + // Verify that the object exists in LakeFS + LakeFSFileStorage.retrieveFileContent(repoName, commitHash, objectPath) + fullUri // Return the constructed URI if the object exists + } catch { + case _: Exception => + throw new FileNotFoundException(s"LakeFS file not found: $fileName") + } + } + /** * Checks if a given file path has a valid scheme. * diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala index 51d162ed40..ab9487a840 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala @@ -18,15 +18,6 @@ object VFSResourceType extends Enumeration { object VFSURIFactory { val VFS_FILE_URI_SCHEME = "vfs" - val LAKEFS_FILE_URI_SCHEME = "lakefs" - - def createLakeFSObjectURI(repoName: String, commitHash: String, obj: ObjectStats): String = { - s"${LAKEFS_FILE_URI_SCHEME}://$repoName/$commitHash/${obj.getPath}" - } - - def decodeLakeFSURI(uri: URI): (String, String, String, String) = { - - } /** * Parses a VFS URI and extracts its components diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala index 7dc3dd9e63..1e3c8e6fb9 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala @@ -1,5 +1,84 @@ package edu.uci.ics.amber.core.storage.model -class LakeFSFileDocument { +import edu.uci.ics.amber.core.storage.LakeFSFileStorage +import org.apache.commons.vfs2.FileNotFoundException -} +import java.io.{File, InputStream} +import java.net.{URI, URLDecoder} +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Paths} +import scala.jdk.CollectionConverters.IteratorHasAsScala + +/** + * LakeFSFileDocument manages file operations on LakeFS. + * + * @param uri The LakeFS file URI in the format: lakefs://repoName/commitHash/objectPath + */ +private[storage] class LakeFSFileDocument(uri: URI) extends VirtualDocument[Nothing] { + + // Utility function to parse and decode URI into components + private def parseUri(uri: URI): (String, String, String) = { + val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray + + if (segments.length < 3) + throw new IllegalArgumentException(s"Invalid LakeFS URI format: $uri") + + val repoName = URLDecoder.decode(segments(0), StandardCharsets.UTF_8) + val commitHash = URLDecoder.decode(segments(1), StandardCharsets.UTF_8) + val decodedPathSegments = segments.drop(2).map(part => URLDecoder.decode(part, StandardCharsets.UTF_8)) + val objectPath = Paths.get(decodedPathSegments.head, decodedPathSegments.tail: _*).toString + + (repoName, commitHash, objectPath) + } + + // Extract repoName, commitHash, and objectPath from the URI + private val (repoName, commitHash, objectPath) = parseUri(uri) + + // Cache for the temporary file + private var tempFile: Option[File] = None + + /** + * Returns the URI of the LakeFS file. + */ + override def getURI: URI = uri + + /** + * Provides an InputStream of the LakeFS file content. + */ + override def asInputStream(): InputStream = { + try { + Files.newInputStream(LakeFSFileStorage.retrieveFileContent(repoName, commitHash, objectPath).toPath) + } catch { + case _: Exception => + throw new FileNotFoundException(s"Failed to retrieve file from LakeFS: $uri") + } + } + + /** + * Provides a local File object of the LakeFS file by downloading it temporarily. + */ + override def asFile(): File = { + tempFile match { + case Some(file) => file + case None => + tempFile = Some(LakeFSFileStorage.retrieveFileContent(repoName, commitHash, objectPath)) + tempFile.get + } + } + + /** + * Deletes the temporary file and the object from LakeFS. + */ + override def clear(): Unit = { + // Delete temporary local file + tempFile.foreach(file => Files.deleteIfExists(file.toPath)) + + // Delete the object from LakeFS + try { + LakeFSFileStorage.removeFileFromRepo(repoName, commitHash, objectPath) + } catch { + case _: Exception => + throw new FileNotFoundException(s"Failed to delete file from LakeFS: $uri") + } + } +} \ No newline at end of file From 9bcb8e4769ddc12f6f0f0fbb52c4308039552b80 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 16 Jan 2025 12:40:29 -0800 Subject: [PATCH 04/47] fix bugs --- .../amber/core/storage/DocumentFactory.scala | 2 + .../ics/amber/core/storage/FileResolver.scala | 20 ++-- .../storage/LakeFSApiClientInstance.scala | 30 +++--- .../core/storage/LakeFSFileStorage.scala | 99 ++++++++++--------- .../amber/core/storage/VFSURIFactory.scala | 1 - .../storage/model/LakeFSFileDocument.scala | 47 +++++---- .../amber/storage/LakeFSFileStorageSpec.scala | 11 ++- 7 files changed, 109 insertions(+), 101 deletions(-) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala index 5d057a8743..2222e5f278 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala @@ -29,6 +29,8 @@ object DocumentFactory { fileUri.getScheme match { case DATASET_FILE_URI_SCHEME => new DatasetFileDocument(fileUri) case "file" => new ReadonlyLocalFileDocument(fileUri) + case LAKEFS_FILE_URI_SCHEME => + new LakeFSFileDocument(fileUri) case unsupportedScheme => throw new UnsupportedOperationException( s"Unsupported URI scheme: $unsupportedScheme for creating the ReadonlyDocument" diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala index 2ef3da1bff..821f8d92be 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala @@ -137,11 +137,11 @@ object FileResolver { } /** - * Resolves a LakeFS file. - * - * Expected input: /repoName/commitHash/objectPath (objectPath can have nested directories) - * Resolved as: lakefs://repoName/commitHash/objectPath - */ + * Resolves a LakeFS file. + * + * Expected input: /repoName/commitHash/objectPath (objectPath can have nested directories) + * Resolved as: lakefs://repoName/commitHash/objectPath + */ private def lakeFSResolveFunc(fileName: String): URI = { // Ensure the URI has the lakefs scheme val fullUri = if (isFileResolved(fileName)) { @@ -159,13 +159,9 @@ object FileResolver { val filePath = Paths.get(fullUri.getPath.stripPrefix("/")) val pathSegments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray - if (pathSegments.length < 3) { - throw new FileNotFoundException(s"Invalid LakeFS URI format: $fileName") - } - - val repoName = pathSegments(0) // repoName - val commitHash = pathSegments(1) // commitHash - val objectPath = Paths.get(pathSegments.drop(2).head, pathSegments.drop(2).tail: _*).toString + val repoName = fullUri.getHost // repoName + val commitHash = pathSegments.head // commitHash + val objectPath = Paths.get(pathSegments.drop(1).head, pathSegments.drop(1).tail: _*).toString try { // Verify that the object exists in LakeFS diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala index c8bd94ddf0..c8d10aeff4 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala @@ -6,11 +6,11 @@ import java.util import scala.jdk.CollectionConverters._ /** - * LakeFSApiClientInstance is a singleton that manages the LakeFS ApiClient instance. - * - Provides a single shared ApiClient for all LakeFS operations in the Texera application. - * - Lazily initializes the client on first access. - * - Supports replacing the client instance primarily for testing or reconfiguration. - */ + * LakeFSApiClientInstance is a singleton that manages the LakeFS ApiClient instance. + * - Provides a single shared ApiClient for all LakeFS operations in the Texera application. + * - Lazily initializes the client on first access. + * - Supports replacing the client instance primarily for testing or reconfiguration. + */ object LakeFSApiClientInstance { private var instance: Option[ApiClient] = None @@ -28,10 +28,10 @@ object LakeFSApiClientInstance { private val password: String = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" /** - * Retrieves the singleton LakeFS ApiClient instance. - * - If the client is not initialized, it is lazily created using the configured properties. - * @return the ApiClient instance. - */ + * Retrieves the singleton LakeFS ApiClient instance. + * - If the client is not initialized, it is lazily created using the configured properties. + * @return the ApiClient instance. + */ def getInstance(): ApiClient = { instance match { case Some(client) => client @@ -46,12 +46,12 @@ object LakeFSApiClientInstance { } /** - * Replaces the existing LakeFS ApiClient instance. - * - This method is useful for testing or dynamically updating the client. - * - * @param apiClient the new ApiClient instance to replace the current one. - */ + * Replaces the existing LakeFS ApiClient instance. + * - This method is useful for testing or dynamically updating the client. + * + * @param apiClient the new ApiClient instance to replace the current one. + */ def replaceInstance(apiClient: ApiClient): Unit = { instance = Some(apiClient) } -} \ No newline at end of file +} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index cf8df02958..f2aee35d4e 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -8,9 +8,9 @@ import java.nio.file.Files import scala.jdk.CollectionConverters._ /** - * LakeFSFileStorage provides high-level file storage operations using LakeFS, - * similar to Git operations for version control and file management. - */ + * LakeFSFileStorage provides high-level file storage operations using LakeFS, + * similar to Git operations for version control and file management. + */ object LakeFSFileStorage { // Lazy initialization of LakeFS API clients @@ -21,13 +21,17 @@ object LakeFSFileStorage { private lazy val refsApi: RefsApi = new RefsApi(apiClient) /** - * Initializes a new repository in LakeFS. - * - * @param repoName Name of the repository. - * @param storageNamespace Storage path (e.g., "s3://bucket-name/"). - * @param defaultBranch Default branch name, usually "main". - */ - def initRepo(repoName: String, storageNamespace: String, defaultBranch: String = "main"): Repository = { + * Initializes a new repository in LakeFS. + * + * @param repoName Name of the repository. + * @param storageNamespace Storage path (e.g., "s3://bucket-name/"). + * @param defaultBranch Default branch name, usually "main". + */ + def initRepo( + repoName: String, + storageNamespace: String, + defaultBranch: String = "main" + ): Repository = { val repo = new RepositoryCreation() .name(repoName) .storageNamespace(storageNamespace) @@ -38,15 +42,20 @@ object LakeFSFileStorage { } /** - * Writes a file to the repository (similar to Git add). - * Converts the InputStream to a temporary file for upload. - * - * @param repoName Repository name. - * @param branch Branch name. - * @param filePath Path in the repository. - * @param inputStream File content stream. - */ - def writeFileToRepo(repoName: String, branch: String, filePath: String, inputStream: InputStream): ObjectStats = { + * Writes a file to the repository (similar to Git add). + * Converts the InputStream to a temporary file for upload. + * + * @param repoName Repository name. + * @param branch Branch name. + * @param filePath Path in the repository. + * @param inputStream File content stream. + */ + def writeFileToRepo( + repoName: String, + branch: String, + filePath: String, + inputStream: InputStream + ): ObjectStats = { val tempFilePath = Files.createTempFile("lakefs-upload-", ".tmp") val tempFileStream = new FileOutputStream(tempFilePath.toFile) val buffer = new Array[Byte](1024) @@ -65,25 +74,27 @@ object LakeFSFileStorage { } /** - * Removes a file from the repository (similar to Git rm). - * - * @param repoName Repository name. - * @param branch Branch name. - * @param filePath Path in the repository to delete. - */ + * Removes a file from the repository (similar to Git rm). + * + * @param repoName Repository name. + * @param branch Branch name. + * @param filePath Path in the repository to delete. + */ def removeFileFromRepo(repoName: String, branch: String, filePath: String): Unit = { objectsApi.deleteObject(repoName, branch, filePath).execute() } /** - * Executes operations and creates a commit (similar to a transactional commit). - * - * @param repoName Repository name. - * @param branch Branch name. - * @param commitMessage Commit message. - * @param operations File operations to perform before committing. - */ - def withCreateVersion(repoName: String, branch: String, commitMessage: String)(operations: => Unit): Commit = { + * Executes operations and creates a commit (similar to a transactional commit). + * + * @param repoName Repository name. + * @param branch Branch name. + * @param commitMessage Commit message. + * @param operations File operations to perform before committing. + */ + def withCreateVersion(repoName: String, branch: String, commitMessage: String)( + operations: => Unit + ): Commit = { operations val commit = new CommitCreation() .message(commitMessage) @@ -92,21 +103,21 @@ object LakeFSFileStorage { } /** - * Retrieves file content from a specific commit and path. - * - * @param repoName Repository name. - * @param commitHash Commit hash of the version. - * @param filePath Path to the file in the repository. - */ + * Retrieves file content from a specific commit and path. + * + * @param repoName Repository name. + * @param commitHash Commit hash of the version. + * @param filePath Path to the file in the repository. + */ def retrieveFileContent(repoName: String, commitHash: String, filePath: String): File = { objectsApi.getObject(repoName, commitHash, filePath).execute() } /** - * Deletes an entire repository. - * - * @param repoName Name of the repository to delete. - */ + * Deletes an entire repository. + * + * @param repoName Name of the repository to delete. + */ def deleteRepo(repoName: String): Unit = { repoApi.deleteRepository(repoName).execute() } @@ -118,4 +129,4 @@ object LakeFSFileStorage { def retrieveObjectsOfVersion(repoName: String, commitHash: String): List[ObjectStats] = { objectsApi.listObjects(repoName, commitHash).execute().getResults.asScala.toList } -} \ No newline at end of file +} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala index ab9487a840..b822144a2a 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/VFSURIFactory.scala @@ -6,7 +6,6 @@ import edu.uci.ics.amber.core.virtualidentity.{ WorkflowIdentity } import edu.uci.ics.amber.core.workflow.PortIdentity -import io.lakefs.clients.sdk.model.ObjectStats import java.net.URI diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala index 1e3c8e6fb9..85be8dace7 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/LakeFSFileDocument.scala @@ -4,29 +4,24 @@ import edu.uci.ics.amber.core.storage.LakeFSFileStorage import org.apache.commons.vfs2.FileNotFoundException import java.io.{File, InputStream} -import java.net.{URI, URLDecoder} -import java.nio.charset.StandardCharsets +import java.net.URI import java.nio.file.{Files, Paths} -import scala.jdk.CollectionConverters.IteratorHasAsScala /** - * LakeFSFileDocument manages file operations on LakeFS. - * - * @param uri The LakeFS file URI in the format: lakefs://repoName/commitHash/objectPath - */ + * LakeFSFileDocument manages file operations on LakeFS. + * + * @param uri The LakeFS file URI in the format: lakefs://repoName/commitHash/objectPath + */ private[storage] class LakeFSFileDocument(uri: URI) extends VirtualDocument[Nothing] { // Utility function to parse and decode URI into components private def parseUri(uri: URI): (String, String, String) = { - val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray + val filePath = Paths.get(uri.getPath.stripPrefix("/")) + val segments = (0 until filePath.getNameCount).map(filePath.getName(_).toString).toArray - if (segments.length < 3) - throw new IllegalArgumentException(s"Invalid LakeFS URI format: $uri") - - val repoName = URLDecoder.decode(segments(0), StandardCharsets.UTF_8) - val commitHash = URLDecoder.decode(segments(1), StandardCharsets.UTF_8) - val decodedPathSegments = segments.drop(2).map(part => URLDecoder.decode(part, StandardCharsets.UTF_8)) - val objectPath = Paths.get(decodedPathSegments.head, decodedPathSegments.tail: _*).toString + val repoName = uri.getHost // repoName + val commitHash = segments.head // commitHash + val objectPath = Paths.get(segments.drop(1).head, segments.drop(1).tail: _*).toString (repoName, commitHash, objectPath) } @@ -38,16 +33,18 @@ private[storage] class LakeFSFileDocument(uri: URI) extends VirtualDocument[Noth private var tempFile: Option[File] = None /** - * Returns the URI of the LakeFS file. - */ + * Returns the URI of the LakeFS file. + */ override def getURI: URI = uri /** - * Provides an InputStream of the LakeFS file content. - */ + * Provides an InputStream of the LakeFS file content. + */ override def asInputStream(): InputStream = { try { - Files.newInputStream(LakeFSFileStorage.retrieveFileContent(repoName, commitHash, objectPath).toPath) + Files.newInputStream( + LakeFSFileStorage.retrieveFileContent(repoName, commitHash, objectPath).toPath + ) } catch { case _: Exception => throw new FileNotFoundException(s"Failed to retrieve file from LakeFS: $uri") @@ -55,8 +52,8 @@ private[storage] class LakeFSFileDocument(uri: URI) extends VirtualDocument[Noth } /** - * Provides a local File object of the LakeFS file by downloading it temporarily. - */ + * Provides a local File object of the LakeFS file by downloading it temporarily. + */ override def asFile(): File = { tempFile match { case Some(file) => file @@ -67,8 +64,8 @@ private[storage] class LakeFSFileDocument(uri: URI) extends VirtualDocument[Noth } /** - * Deletes the temporary file and the object from LakeFS. - */ + * Deletes the temporary file and the object from LakeFS. + */ override def clear(): Unit = { // Delete temporary local file tempFile.foreach(file => Files.deleteIfExists(file.toPath)) @@ -81,4 +78,4 @@ private[storage] class LakeFSFileDocument(uri: URI) extends VirtualDocument[Noth throw new FileNotFoundException(s"Failed to delete file from LakeFS: $uri") } } -} \ No newline at end of file +} diff --git a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala index 8d34feb2c8..89c84ed5cb 100644 --- a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala +++ b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala @@ -44,7 +44,8 @@ class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfte } private def findCommitByMessage(message: String): Option[String] = { - LakeFSFileStorage.retrieveVersionsOfRepository(repoName, branchName) + LakeFSFileStorage + .retrieveVersionsOfRepository(repoName, branchName) .find(_.getMessage == message) .map(_.getId) } @@ -99,7 +100,8 @@ class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfte val deleteCommit = findCommitByMessage("Remove file2.txt").get // Verify file2.txt is absent in the latest commit - val objectsAfterDeletion = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, deleteCommit).map(_.getPath) + val objectsAfterDeletion = + LakeFSFileStorage.retrieveObjectsOfVersion(repoName, deleteCommit).map(_.getPath) objectsAfterDeletion should not contain filePaths(1) // Verify file1.txt is still present @@ -109,7 +111,8 @@ class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfte it should "maintain hierarchical structure in file retrieval" in { // Get the latest commit - val latestCommit = LakeFSFileStorage.retrieveVersionsOfRepository(repoName, branchName).head.getId + val latestCommit = + LakeFSFileStorage.retrieveVersionsOfRepository(repoName, branchName).head.getId // Retrieve all objects val objects = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, latestCommit) @@ -119,4 +122,4 @@ class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfte objectPaths should contain("dir1/subdir1/file3.txt") objectPaths should contain("dir2/file4.txt") } -} \ No newline at end of file +} From df0b5de83b22ddce11503a5df18036ab66c54880 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 16 Feb 2025 13:57:34 -0800 Subject: [PATCH 05/47] a compilable version --- core/build.sbt | 13 +- core/file-service/build.sbt | 67 ++ .../src/main/resources/auth-config.yaml | 6 + .../main/resources/file-service-config.yaml | 22 + .../uci/ics/texera/service/FileService.scala | 47 + .../service/FileServiceConfiguration.scala | 5 + .../ics/texera/service/auth/AuthConfig.scala | 34 + .../uci/ics/texera/service/auth/JwtAuth.scala | 55 + .../ics/texera/service/auth/SessionUser.scala | 21 + .../service/auth/UserAuthenticator.scala | 31 + .../service/auth/UserRoleAuthorizer.scala | 15 + .../resource/DatasetAccessResource.scala | 194 ++++ .../service/resource/DatasetResource.scala | 961 ++++++++++++++++++ .../texera/service/type/DatasetFileNode.scala | 131 +++ .../amber/core/storage/DocumentFactory.scala | 2 +- .../edu/uci/ics/amber/util/PathUtils.scala | 2 + 16 files changed, 1603 insertions(+), 3 deletions(-) create mode 100644 core/file-service/build.sbt create mode 100644 core/file-service/src/main/resources/auth-config.yaml create mode 100644 core/file-service/src/main/resources/file-service-config.yaml create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/FileServiceConfiguration.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/AuthConfig.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/type/DatasetFileNode.scala diff --git a/core/build.sbt b/core/build.sbt index 606d9242d6..5cccdfc48e 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -3,8 +3,17 @@ lazy val WorkflowCore = (project in file("workflow-core")) .dependsOn(DAO) .configs(Test) .dependsOn(DAO % "test->test") // test scope dependency -lazy val FileStorageService = (project in file("file-storage-service")) +lazy val FileService = (project in file("file-service")) .dependsOn(WorkflowCore) + .settings( + dependencyOverrides ++= Seq( + // override it as io.dropwizard 4 require 2.16.1 or higher + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.16.1", + "com.fasterxml.jackson.core" % "jackson-databind" % "2.16.1", + "org.glassfish.jersey.core" % "jersey-common" % "3.0.12" + ) + ) + lazy val WorkflowOperator = (project in file("workflow-operator")).dependsOn(WorkflowCore) lazy val WorkflowCompilingService = (project in file("workflow-compiling-service")) .dependsOn(WorkflowOperator) @@ -35,7 +44,7 @@ lazy val WorkflowExecutionService = (project in file("amber")) // root project definition lazy val CoreProject = (project in file(".")) - .aggregate(DAO, WorkflowCore, WorkflowOperator, WorkflowCompilingService, WorkflowExecutionService) + .aggregate(DAO, WorkflowCore, FileService, WorkflowOperator, WorkflowCompilingService, WorkflowExecutionService) .settings( name := "core", version := "0.1.0", diff --git a/core/file-service/build.sbt b/core/file-service/build.sbt new file mode 100644 index 0000000000..d2bb7c15cb --- /dev/null +++ b/core/file-service/build.sbt @@ -0,0 +1,67 @@ +import scala.collection.Seq + +name := "file-service" +organization := "edu.uci.ics" +version := "0.1.0" +scalaVersion := "2.13.12" + + +// Enable semanticdb for Scalafix +ThisBuild / semanticdbEnabled := true +ThisBuild / semanticdbVersion := scalafixSemanticdb.revision + +// Manage dependency conflicts by always using the latest revision +ThisBuild / conflictManager := ConflictManager.latestRevision + +// Restrict parallel execution of tests to avoid conflicts +Global / concurrentRestrictions += Tags.limit(Tags.Test, 1) + +///////////////////////////////////////////////////////////////////////////// +// Compiler Options +///////////////////////////////////////////////////////////////////////////// + +// Scala compiler options +Compile / scalacOptions ++= Seq( + "-Xelide-below", "WARNING", // Turn on optimizations with "WARNING" as the threshold + "-feature", // Check feature warnings + "-deprecation", // Check deprecation warnings + "-Ywarn-unused:imports" // Check for unused imports +) + +///////////////////////////////////////////////////////////////////////////// +// Version Variables +///////////////////////////////////////////////////////////////////////////// + +val dropwizardVersion = "4.0.7" +val mockitoVersion = "5.4.0" +val assertjVersion = "3.24.2" + +///////////////////////////////////////////////////////////////////////////// +// Test-related Dependencies +///////////////////////////////////////////////////////////////////////////// + +libraryDependencies ++= Seq( + "org.scalamock" %% "scalamock" % "5.2.0" % Test, // ScalaMock + "org.scalatest" %% "scalatest" % "3.2.17" % Test, // ScalaTest + "io.dropwizard" % "dropwizard-testing" % dropwizardVersion % Test, // Dropwizard Testing + "org.mockito" % "mockito-core" % mockitoVersion % Test, // Mockito for mocking + "org.assertj" % "assertj-core" % assertjVersion % Test, // AssertJ for assertions + "com.novocode" % "junit-interface" % "0.11" % Test // SBT interface for JUnit +) + +///////////////////////////////////////////////////////////////////////////// +// Dependencies +///////////////////////////////////////////////////////////////////////////// + +// Core Dependencies +libraryDependencies ++= Seq( + "io.dropwizard" % "dropwizard-core" % dropwizardVersion, + "io.dropwizard" % "dropwizard-auth" % dropwizardVersion, // Dropwizard Authentication module + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.15.2", + "org.glassfish.jersey.media" % "jersey-media-multipart" % "3.1.10", + "jakarta.ws.rs" % "jakarta.ws.rs-api" % "3.1.0", // Ensure Jakarta JAX-RS API is available + "com.github.toastshaman" % "dropwizard-auth-jwt" % "1.1.2-0", + "org.bitbucket.b_c" % "jose4j" % "0.9.6", + "org.playframework" %% "play-json" % "3.1.0-M1", + "io.lakefs" % "sdk" % "1.48.0" +) diff --git a/core/file-service/src/main/resources/auth-config.yaml b/core/file-service/src/main/resources/auth-config.yaml new file mode 100644 index 0000000000..a9cd08a7b1 --- /dev/null +++ b/core/file-service/src/main/resources/auth-config.yaml @@ -0,0 +1,6 @@ +auth: + jwt: + exp-in-days: 30 + # generate the secret again for each deployment using the following: + # 'openssl rand -hex 16' or 'xxd -l16 -ps /dev/urandom' + 256-bit-secret: "8a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d" \ No newline at end of file diff --git a/core/file-service/src/main/resources/file-service-config.yaml b/core/file-service/src/main/resources/file-service-config.yaml new file mode 100644 index 0000000000..01396a8910 --- /dev/null +++ b/core/file-service/src/main/resources/file-service-config.yaml @@ -0,0 +1,22 @@ +server: + applicationConnectors: + - type: http + port: 9092 + adminConnectors: [] + +logging: + level: INFO + loggers: + "io.dropwizard": INFO + appenders: + - type: console + - type: file + currentLogFilename: log/file-service.log + threshold: ALL + queueSize: 512 + discardingThreshold: 0 + archive: true + archivedLogFilenamePattern: log/file-service-%d{yyyy-MM-dd}.log.gz + archivedFileCount: 7 + bufferSize: 8KiB + immediateFlush: true \ No newline at end of file diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala new file mode 100644 index 0000000000..6b755b8c54 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -0,0 +1,47 @@ +package edu.uci.ics.texera.service + +import io.dropwizard.core.Application +import io.dropwizard.core.setup.{Bootstrap, Environment} +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import edu.uci.ics.amber.util.PathUtils.fileServicePath +import edu.uci.ics.texera.service.auth.JwtAuth +import edu.uci.ics.texera.service.resource.{DatasetAccessResource, DatasetResource} +import org.eclipse.jetty.server.session.SessionHandler +import org.glassfish.jersey.media.multipart.MultiPartFeature + +class FileService extends Application[FileServiceConfiguration] { + override def initialize(bootstrap: Bootstrap[FileServiceConfiguration]): Unit = { + // Register Scala module to Dropwizard default object mapper + bootstrap.getObjectMapper.registerModule(DefaultScalaModule) + } + + override def run(configuration: FileServiceConfiguration, environment: Environment): Unit = { + // Serve backend at /api + environment.jersey.setUrlPattern("/api/*") + environment.jersey.register(classOf[SessionHandler]) + environment.servlets.setSessionHandler(new SessionHandler) + environment.jersey.register(classOf[MultiPartFeature]) + // Register JWT authentication + JwtAuth.setupJwtAuth(environment) + + // Register multipart feature for file uploads + environment.jersey.register(classOf[DatasetResource]) + environment.jersey.register(classOf[DatasetAccessResource]) + } +} + +object FileService { + def main(args: Array[String]): Unit = { + // Set the configuration file's path + val configFilePath = fileServicePath + .resolve("src") + .resolve("main") + .resolve("resources") + .resolve("file-service-config.yaml") + .toAbsolutePath + .toString + + // Start the Dropwizard application + new FileService().run("server", configFilePath) + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileServiceConfiguration.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileServiceConfiguration.scala new file mode 100644 index 0000000000..be3a700ce9 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileServiceConfiguration.scala @@ -0,0 +1,5 @@ +package edu.uci.ics.texera.service + +import io.dropwizard.core.Configuration + +class FileServiceConfiguration extends Configuration {} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/AuthConfig.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/AuthConfig.scala new file mode 100644 index 0000000000..2e6e78192e --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/AuthConfig.scala @@ -0,0 +1,34 @@ +package edu.uci.ics.texera.service.auth + +import org.yaml.snakeyaml.Yaml + +import java.util.{Map => JMap} +import scala.jdk.CollectionConverters._ + +object AuthConfig { + private val conf: Map[String, Any] = { + val yaml = new Yaml() + val inputStream = getClass.getClassLoader.getResourceAsStream("auth-config.yaml") + val javaConf = yaml.load(inputStream).asInstanceOf[JMap[String, Any]].asScala.toMap + + val authMap = javaConf("auth").asInstanceOf[JMap[String, Any]].asScala.toMap + val jwtMap = authMap("jwt").asInstanceOf[JMap[String, Any]].asScala.toMap + + javaConf.updated( + "auth", + authMap.updated("jwt", jwtMap) + ) + } + + // Read JWT expiration time + val jwtExpirationDays: Int = conf("auth") + .asInstanceOf[Map[String, Any]]("jwt") + .asInstanceOf[Map[String, Any]]("exp-in-days") + .asInstanceOf[Int] + + // Read JWT secret key + val jwtSecretKey: String = conf("auth") + .asInstanceOf[Map[String, Any]]("jwt") + .asInstanceOf[Map[String, Any]]("256-bit-secret") + .asInstanceOf[String] +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala new file mode 100644 index 0000000000..9df68d19f3 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala @@ -0,0 +1,55 @@ +package edu.uci.ics.texera.service.auth + +import com.github.toastshaman.dropwizard.auth.jwt.JwtAuthFilter +import io.dropwizard.auth.AuthDynamicFeature +import io.dropwizard.auth.AuthValueFactoryProvider +import io.dropwizard.auth.PrincipalImpl +import io.dropwizard.core.setup.Environment +import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder} +import org.jose4j.keys.HmacKey + +import java.util.Random + +object JwtAuth { + private val TOKEN_SECRET: String = AuthConfig.jwtSecretKey.toLowerCase() match { + case "random" => getRandomHexString + case _ => AuthConfig.jwtSecretKey + } + private val TOKEN_EXPIRATION_DAYS = AuthConfig.jwtExpirationDays + + // JWT Consumer for verification + private val jwtConsumer: JwtConsumer = new JwtConsumerBuilder() + .setAllowedClockSkewInSeconds(30) + .setRequireExpirationTime() + .setRequireSubject() + .setVerificationKey(new HmacKey(TOKEN_SECRET.getBytes)) + .setRelaxVerificationKeyValidation() + .build() + + // Register authentication middleware in Dropwizard + def setupJwtAuth(environment: Environment): Unit = { + environment.jersey.register( + new AuthDynamicFeature( + new JwtAuthFilter.Builder[SessionUser]() + .setJwtConsumer(jwtConsumer) + .setRealm("realm") + .setPrefix("Bearer") + .setAuthenticator(UserAuthenticator) + .setAuthorizer(UserRoleAuthorizer) + .buildAuthFilter() + ) + ) + + // Allow @Auth annotation injection for secured endpoints + environment.jersey.register(new AuthValueFactoryProvider.Binder(classOf[PrincipalImpl])) + } + + private def getRandomHexString: String = { + val bytes = 32 + val r = new Random() + val sb = new StringBuffer + while (sb.length < bytes) + sb.append(Integer.toHexString(r.nextInt())) + sb.toString.substring(0, bytes) + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala new file mode 100644 index 0000000000..ec564b82be --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala @@ -0,0 +1,21 @@ +package edu.uci.ics.texera.service.auth + +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User +import org.jooq.types.UInteger + +import java.security.Principal + +class SessionUser(val user: User) extends Principal { + def getUser: User = user + + override def getName: String = user.getName + + def getUid: UInteger = user.getUid + + def getEmail: String = user.getEmail + + def getGoogleId: String = user.getGoogleId + + def isRoleOf(role: UserRole): Boolean = user.getRole == role +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala new file mode 100644 index 0000000000..a1df7944df --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala @@ -0,0 +1,31 @@ +package edu.uci.ics.texera.service.auth + +import com.typesafe.scalalogging.LazyLogging +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User +import io.dropwizard.auth.Authenticator +import org.jooq.types.UInteger +import org.jose4j.jwt.consumer.JwtContext + +import java.util.Optional + +object UserAuthenticator extends Authenticator[JwtContext, SessionUser] with LazyLogging { + override def authenticate(context: JwtContext): Optional[SessionUser] = { + // This method will be called once the token's signature has been verified, + // including the token secret and the expiration time + try { + val userName = context.getJwtClaims.getSubject + val email = context.getJwtClaims.getClaimValue("email").asInstanceOf[String] + val userId = UInteger.valueOf(context.getJwtClaims.getClaimValue("userId").asInstanceOf[Long]) + val role = UserRole.valueOf(context.getJwtClaims.getClaimValue("role").asInstanceOf[String]) + val googleId = context.getJwtClaims.getClaimValue("googleId").asInstanceOf[String] + val user = new User(userId, userName, email, null, googleId, role, null) + Optional.of(new SessionUser(user)) + } catch { + case e: Exception => + logger.error("Failed to authenticate the JwtContext", e) + Optional.empty() + } + + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala new file mode 100644 index 0000000000..3df41f6307 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala @@ -0,0 +1,15 @@ +package edu.uci.ics.texera.service.auth + +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole +import io.dropwizard.auth.Authorizer +import jakarta.ws.rs.container.ContainerRequestContext + +object UserRoleAuthorizer extends Authorizer[SessionUser] { + override def authorize( + user: SessionUser, + role: String, + requestContext: ContainerRequestContext + ): Boolean = { + user.isRoleOf(UserRole.valueOf(role)) + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala new file mode 100644 index 0000000000..cf0f0cf404 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala @@ -0,0 +1,194 @@ +package edu.uci.ics.texera.service.resource + +import edu.uci.ics.amber.core.storage.StorageConfig +import edu.uci.ics.texera.dao.SqlServer +import edu.uci.ics.texera.dao.SqlServer.withTransaction +import edu.uci.ics.texera.dao.jooq.generated.Tables.USER +import edu.uci.ics.texera.dao.jooq.generated.enums.DatasetUserAccessPrivilege +import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS +import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{DatasetDao, DatasetUserAccessDao, UserDao} +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{DatasetUserAccess, User} +import edu.uci.ics.texera.service.resource.DatasetAccessResource.{AccessEntry, context, getOwner} +import org.jooq.{DSLContext, EnumType} +import org.jooq.types.UInteger + +import java.util +import javax.annotation.security.RolesAllowed +import javax.ws.rs._ +import javax.ws.rs.core.{MediaType, Response} + +object DatasetAccessResource { + private lazy val context: DSLContext = SqlServer + .getInstance(StorageConfig.jdbcUrl, StorageConfig.jdbcUsername, StorageConfig.jdbcPassword) + .createDSLContext() + + def isDatasetPublic(ctx: DSLContext, did: UInteger): Boolean = { + val datasetDao = new DatasetDao(ctx.configuration()) + Option(datasetDao.fetchOneByDid(did)) + .flatMap(dataset => Option(dataset.getIsPublic)) + .contains(1.toByte) + } + + def userHasReadAccess(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + isDatasetPublic(ctx, did) || + userHasWriteAccess(ctx, did, uid) || + getDatasetUserAccessPrivilege(ctx, did, uid) == DatasetUserAccessPrivilege.READ + } + + def userOwnDataset(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + val datasetDao = new DatasetDao(ctx.configuration()) + + Option(datasetDao.fetchOneByDid(did)) + .exists(_.getOwnerUid == uid) + } + + def userHasWriteAccess(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + userOwnDataset(ctx, did, uid) || + getDatasetUserAccessPrivilege(ctx, did, uid) == DatasetUserAccessPrivilege.WRITE + } + + def getDatasetUserAccessPrivilege( + ctx: DSLContext, + did: UInteger, + uid: UInteger + ): DatasetUserAccessPrivilege = { + Option( + ctx + .select(DATASET_USER_ACCESS.PRIVILEGE) + .from(DATASET_USER_ACCESS) + .where( + DATASET_USER_ACCESS.DID + .eq(did) + .and(DATASET_USER_ACCESS.UID.eq(uid)) + ) + .fetchOneInto(classOf[DatasetUserAccessPrivilege]) + ).getOrElse(DatasetUserAccessPrivilege.NONE) + } + + def getOwner(ctx: DSLContext, did: UInteger): User = { + val datasetDao = new DatasetDao(ctx.configuration()) + val userDao = new UserDao(ctx.configuration()) + + Option(datasetDao.fetchOneByDid(did)) + .flatMap(dataset => Option(dataset.getOwnerUid)) + .map(ownerUid => userDao.fetchOneByUid(ownerUid)) + .orNull + } + + case class AccessEntry(email: String, name: String, privilege: EnumType) {} + +} + +@Produces(Array(MediaType.APPLICATION_JSON)) +@RolesAllowed(Array("REGULAR", "ADMIN")) +@Path("/access/dataset") +class DatasetAccessResource { + + /** + * This method returns the owner of a dataset + * + * @param did , dataset id + * @return ownerEmail, the owner's email + */ + @GET + @Path("/owner/{did}") + def getOwnerEmailOfDataset(@PathParam("did") did: UInteger): String = { + var email = "" + withTransaction(context) { ctx => + val owner = getOwner(ctx, did) + if (owner != null) { + email = owner.getEmail + } + } + email + } + + /** + * Returns information about all current shared access of the given dataset + * + * @param did dataset id + * @return a List of email/name/permission + */ + @GET + @Path("/list/{did}") + def getAccessList( + @PathParam("did") did: UInteger + ): util.List[AccessEntry] = { + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + ctx + .select( + USER.EMAIL, + USER.NAME, + DATASET_USER_ACCESS.PRIVILEGE + ) + .from(DATASET_USER_ACCESS) + .join(USER) + .on(USER.UID.eq(DATASET_USER_ACCESS.UID)) + .where( + DATASET_USER_ACCESS.DID + .eq(did) + .and(DATASET_USER_ACCESS.UID.notEqual(datasetDao.fetchOneByDid(did).getOwnerUid)) + ) + .fetchInto(classOf[AccessEntry]) + } + } + + /** + * This method shares a dataset to a user with a specific access type + * + * @param did the given dataset + * @param email the email which the access is given to + * @param privilege the type of Access given to the target user + * @return rejection if user not permitted to share the workflow or Success Message + */ + @PUT + @Path("/grant/{did}/{email}/{privilege}") + def grantAccess( + @PathParam("did") did: UInteger, + @PathParam("email") email: String, + @PathParam("privilege") privilege: String + ): Response = { + withTransaction(context) { ctx => + val datasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) + val userDao = new UserDao(ctx.configuration()) + datasetUserAccessDao.merge( + new DatasetUserAccess( + did, + userDao.fetchOneByEmail(email).getUid, + DatasetUserAccessPrivilege.valueOf(privilege) + ) + ) + Response.ok().build() + } + } + + /** + * This method revoke the user's access of the given dataset + * + * @param did the given dataset + * @param email the email of the use whose access is about to be removed + * @return message indicating a success message + */ + @DELETE + @Path("/revoke/{did}/{email}") + def revokeAccess( + @PathParam("did") did: UInteger, + @PathParam("email") email: String + ): Response = { + withTransaction(context) { ctx => + val userDao = new UserDao(ctx.configuration()) + + ctx + .delete(DATASET_USER_ACCESS) + .where( + DATASET_USER_ACCESS.UID + .eq(userDao.fetchOneByEmail(email).getUid) + .and(DATASET_USER_ACCESS.DID.eq(did)) + ) + .execute() + + Response.ok().build() + } + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala new file mode 100644 index 0000000000..3d0b2f2e02 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -0,0 +1,961 @@ +package edu.uci.ics.texera.service.resource + +import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, StorageConfig} +import edu.uci.ics.amber.core.storage.util.dataset.{ + GitVersionControlLocalFileStorage, + PhysicalFileNode +} +import edu.uci.ics.amber.util.PathUtils +import edu.uci.ics.texera.dao.SqlServer +import edu.uci.ics.texera.dao.SqlServer.withTransaction +import edu.uci.ics.texera.dao.jooq.generated.enums.DatasetUserAccessPrivilege +import edu.uci.ics.texera.dao.jooq.generated.tables.Dataset.DATASET +import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS +import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION +import edu.uci.ics.texera.dao.jooq.generated.tables.User.USER +import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ + DatasetDao, + DatasetUserAccessDao, + DatasetVersionDao +} +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ + Dataset, + DatasetUserAccess, + DatasetVersion, + User +} +import edu.uci.ics.texera.service.`type`.DatasetFileNode +import edu.uci.ics.texera.service.auth.SessionUser +import edu.uci.ics.texera.service.resource.DatasetAccessResource.{ + getDatasetUserAccessPrivilege, + getOwner, + isDatasetPublic, + userHasReadAccess, + userHasWriteAccess, + userOwnDataset +} +import edu.uci.ics.texera.service.resource.DatasetResource.{ + DATASET_IS_PRIVATE, + DATASET_IS_PUBLIC, + DashboardDataset, + DashboardDatasetVersion, + DatasetDescriptionModification, + DatasetIDs, + DatasetNameModification, + DatasetOperation, + DatasetVersionRootFileNodesResponse, + FILE_OPERATION_REMOVE_PREFIX, + FILE_OPERATION_UPLOAD_PREFIX, + applyDatasetOperationToCreateNewVersion, + calculateDatasetVersionSize, + context, + getDatasetByID, + getDatasetVersionByID, + getLatestDatasetVersion +} +import io.dropwizard.auth.Auth +import jakarta.annotation.security.RolesAllowed +import jakarta.ws.rs._ +import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} +import org.apache.commons.lang3.StringUtils +import org.glassfish.jersey.media.multipart.{FormDataMultiPart, FormDataParam} +import org.jooq.types.UInteger +import org.jooq.{DSLContext, EnumType} +import play.api.libs.json.Json + +import java.io.{IOException, InputStream, OutputStream} +import java.net.{URI, URLDecoder} +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.util.Optional +import java.util.concurrent.locks.ReentrantLock +import java.util.zip.{ZipEntry, ZipOutputStream} +import scala.collection.mutable +import scala.collection.mutable.ListBuffer +import scala.jdk.CollectionConverters._ +import scala.jdk.OptionConverters._ +import scala.util.control.NonFatal +import scala.util.{Failure, Success, Try, Using} + +object DatasetResource { + private val DATASET_IS_PUBLIC: Byte = 1 + private val DATASET_IS_PRIVATE: Byte = 0 + private val FILE_OPERATION_UPLOAD_PREFIX = "file:upload:" + private val FILE_OPERATION_REMOVE_PREFIX = "file:remove" + + private val datasetLocks: scala.collection.concurrent.Map[UInteger, ReentrantLock] = + new scala.collection.concurrent.TrieMap[UInteger, ReentrantLock]() + + private val context = SqlServer + .getInstance(StorageConfig.jdbcUrl, StorageConfig.jdbcUsername, StorageConfig.jdbcPassword) + .createDSLContext() + + /** + * fetch the size of a certain dataset version. + * @param did the target dataset id + * @param versionHash the hash of the version. If None, fetch the latest version + * @return + */ + def calculateDatasetVersionSize(did: UInteger, versionHash: Option[String] = None): Long = { + + /** + * Internal util to calculate the size from the physical nodes + */ + def calculateSizeFromPhysicalNodes(nodes: java.util.Set[PhysicalFileNode]): Long = { + nodes.asScala.foldLeft(0L) { (totalSize, node) => + totalSize + (if (node.isDirectory) { + calculateSizeFromPhysicalNodes(node.getChildren) + } else { + node.getSize + }) + } + } + + Try { + val datasetPath = PathUtils.getDatasetPath(did) + val hash = versionHash.getOrElse { + getLatestDatasetVersion(context, did) + .map(_.getVersionHash) + .getOrElse(throw new NoSuchElementException("No versions found for this dataset")) + } + + val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( + datasetPath, + hash + ) + + calculateSizeFromPhysicalNodes(fileNodes) + } match { + case Success(size) => size + case Failure(exception) => + val errorMessage = versionHash.map(_ => "dataset version").getOrElse("dataset") + println(s"Error calculating $errorMessage size: ${exception.getMessage}") + 0L + } + } + + /** + * Helper function to get the dataset from DB using did + */ + private def getDatasetByID(ctx: DSLContext, did: UInteger): Dataset = { + val datasetDao = new DatasetDao(ctx.configuration()) + val dataset = datasetDao.fetchOneByDid(did) + if (dataset == null) { + throw new NotFoundException(f"Dataset $did not found") + } + dataset + } + + /** + * Helper function to get the dataset version from DB using dvid + */ + private def getDatasetVersionByID( + ctx: DSLContext, + dvid: UInteger + ): DatasetVersion = { + val datasetVersionDao = new DatasetVersionDao(ctx.configuration()) + val version = datasetVersionDao.fetchOneByDvid(dvid) + if (version == null) { + throw new NotFoundException("Dataset Version not found") + } + version + } + + /** + * Helper function to get the latest dataset version from the DB + */ + private def getLatestDatasetVersion( + ctx: DSLContext, + did: UInteger + ): Option[DatasetVersion] = { + ctx + .selectFrom(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(did)) + .orderBy(DATASET_VERSION.CREATION_TIME.desc()) + .limit(1) + .fetchOptionalInto(classOf[DatasetVersion]) + .toScala + } + + // DatasetOperation defines the operations that will be applied when creating a new dataset version + private case class DatasetOperation( + filesToAdd: Map[java.nio.file.Path, InputStream], + filesToRemove: List[URI] + ) + + /** + * Create a new dataset version by adding new files + * @param did the target dataset id + * @param user the user submitting the request + * @param filesToAdd the map containing the files to add + * @return the created dataset version + */ + def createNewDatasetVersionByAddingFiles( + did: UInteger, + user: User, + filesToAdd: Map[java.nio.file.Path, InputStream] + ): Option[DashboardDatasetVersion] = { + applyDatasetOperationToCreateNewVersion( + context, + did, + user.getUid, + user.getEmail, + "", + DatasetOperation(filesToAdd, List()) + ) + } + + // apply the dataset operation to create a new dataset version + // it returns the created dataset version if creation succeed, else return None + // concurrency control is performed here: the thread has to have the lock in order to create the new version + private def applyDatasetOperationToCreateNewVersion( + ctx: DSLContext, + did: UInteger, + uid: UInteger, + ownerEmail: String, + userProvidedVersionName: String, + datasetOperation: DatasetOperation + ): Option[DashboardDatasetVersion] = { + // Helper function to generate the dataset version name + // the format of dataset version name is: v{#n} - {user provided dataset version name}. e.g. v10 - new version + def generateDatasetVersionName( + ctx: DSLContext, + did: UInteger, + userProvidedVersionName: String + ): String = { + val numberOfExistingVersions = ctx + .selectFrom(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(did)) + .fetch() + .size() + + val sanitizedUserProvidedVersionName = + StringUtils.replaceEach(userProvidedVersionName, Array("/", "\\"), Array("", "")) + val res = if (sanitizedUserProvidedVersionName == "") { + "v" + (numberOfExistingVersions + 1).toString + } else { + "v" + (numberOfExistingVersions + 1).toString + " - " + sanitizedUserProvidedVersionName + } + + res + } + + // Acquire or Create the lock for dataset of {did} + val lock = DatasetResource.datasetLocks.getOrElseUpdate(did, new ReentrantLock()) + + if (lock.isLocked) { + return None + } + lock.lock() + try { + val dataset = getDatasetByID(ctx, did) + val datasetPath = PathUtils.getDatasetPath(did) + if (datasetOperation.filesToAdd.isEmpty && datasetOperation.filesToRemove.isEmpty) { + return None + } + val datasetName = dataset.getName + val versionName = generateDatasetVersionName(ctx, did, userProvidedVersionName) + val commitHash = GitVersionControlLocalFileStorage.withCreateVersion( + datasetPath, + versionName, + () => { + datasetOperation.filesToAdd.foreach { + case (filePath, fileStream) => + GitVersionControlLocalFileStorage.writeFileToRepo(datasetPath, filePath, fileStream) + } + + datasetOperation.filesToRemove.foreach { fileUri => + DocumentFactory.openDocument(fileUri)._1.clear() + } + } + ) + + // create the DatasetVersion that persists in the DB + val datasetVersion = new DatasetVersion() + + datasetVersion.setName(versionName) + datasetVersion.setDid(did) + datasetVersion.setCreatorUid(uid) + datasetVersion.setVersionHash(commitHash) + + val physicalFileNodes = + GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion(datasetPath, commitHash) + Some( + DashboardDatasetVersion( + // insert the dataset version into DB, and fetch the newly-inserted one. + ctx + .insertInto(DATASET_VERSION) // Assuming DATASET is the table reference + .set(ctx.newRecord(DATASET_VERSION, datasetVersion)) + .returning() // Assuming ID is the primary key column + .fetchOne() + .into(classOf[DatasetVersion]), + DatasetFileNode.fromPhysicalFileNodes( + Map( + (ownerEmail, datasetName, versionName) -> physicalFileNodes.asScala.toList + ) + ) + ) + ) + } finally { + // Release the lock + lock.unlock() + } + } + + case class DashboardDataset( + dataset: Dataset, + ownerEmail: String, + accessPrivilege: EnumType, + isOwner: Boolean, + versions: List[DashboardDatasetVersion], + size: Long + ) + case class DashboardDatasetVersion( + datasetVersion: DatasetVersion, + fileNodes: List[DatasetFileNode] + ) + + case class DatasetIDs(dids: List[UInteger]) + + case class DatasetNameModification(did: UInteger, name: String) + + case class DatasetDescriptionModification(did: UInteger, description: String) + + case class DatasetVersionRootFileNodesResponse( + fileNodes: List[DatasetFileNode], + size: Long + ) +} + +@Produces(Array(MediaType.APPLICATION_JSON, "image/jpeg", "application/pdf")) +@Path("/dataset") +class DatasetResource { + private val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no read access to this dataset" + private val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the dataset not found" + private val ERR_DATASET_CREATION_FAILED_MESSAGE = + "Dataset creation is failed. Please make sure to upload files in order to create the initial version of dataset" + + /** + * Helper function to get the dataset from DB with additional information including user access privilege and owner email + */ + private def getDashboardDataset( + ctx: DSLContext, + did: UInteger, + uid: Option[UInteger], + isPublic: Boolean = false + ): DashboardDataset = { + if ( + (isPublic && !isDatasetPublic(ctx, did)) || + (!isPublic && (!userHasReadAccess(ctx, did, uid.get))) + ) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val targetDataset = getDatasetByID(ctx, did) + val userAccessPrivilege = + if (isPublic) DatasetUserAccessPrivilege.NONE + else getDatasetUserAccessPrivilege(ctx, did, uid.get) + val isOwner = !isPublic && (targetDataset.getOwnerUid == uid.get) + + DashboardDataset( + targetDataset, + getOwner(ctx, did).getEmail, + userAccessPrivilege, + isOwner, + List(), + calculateDatasetVersionSize(did) + ) + } + + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/create") + @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) + def createDataset( + @Auth user: SessionUser, + @FormDataParam("datasetName") datasetName: String, + @FormDataParam("datasetDescription") datasetDescription: String, + @FormDataParam("isDatasetPublic") isDatasetPublic: String, + @FormDataParam("initialVersionName") initialVersionName: String, + files: FormDataMultiPart + ): DashboardDataset = { + + withTransaction(context) { ctx => + val uid = user.getUid + val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) + val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) + + // do the name duplication check + val userExistingDatasetNames = datasetDao.fetchByOwnerUid(uid).asScala.map(_.getName) + if (userExistingDatasetNames.contains(datasetName)) { + throw new BadRequestException("Dataset with the same name already exists") + } + + val dataset: Dataset = new Dataset() + dataset.setName(datasetName) + dataset.setDescription(datasetDescription) + dataset.setIsPublic(isDatasetPublic.toByte) + dataset.setOwnerUid(uid) + + val createdDataset = ctx + .insertInto(DATASET) + .set(ctx.newRecord(DATASET, dataset)) + .returning() + .fetchOne() + + val did = createdDataset.getDid + val datasetPath = PathUtils.getDatasetPath(did) + + val datasetUserAccess = new DatasetUserAccess() + datasetUserAccess.setDid(createdDataset.getDid) + datasetUserAccess.setUid(uid) + datasetUserAccess.setPrivilege(DatasetUserAccessPrivilege.WRITE) + datasetOfUserDao.insert(datasetUserAccess) + + // initialize the dataset directory + GitVersionControlLocalFileStorage.initRepo(datasetPath) +// createdVersion match { +// case Some(_) => +// case None => +// // none means creation failed, user does not submit any files when creating the dataset +// throw new BadRequestException(ERR_DATASET_CREATION_FAILED_MESSAGE) +// } + + DashboardDataset( + new Dataset( + createdDataset.getDid, + createdDataset.getOwnerUid, + createdDataset.getName, + createdDataset.getIsPublic, + createdDataset.getDescription, + createdDataset.getCreationTime + ), + user.getEmail, + DatasetUserAccessPrivilege.WRITE, + isOwner = true, + versions = List(), + size = calculateDatasetVersionSize(did) + ) + } + } + + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/delete") + def deleteDataset(datasetIDs: DatasetIDs, @Auth user: SessionUser): Response = { + val uid = user.getUid + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + for (did <- datasetIDs.dids) { + if (!userOwnDataset(ctx, did, uid)) { + // throw the exception that user has no access to certain dataset + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + // delete the dataset repo from the filesystem + GitVersionControlLocalFileStorage.deleteRepo(PathUtils.getDatasetPath(did)) + + // delete the dataset from the DB + datasetDao.deleteById(did) + } + + Response.ok().build() + } + } + + @POST + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/update/name") + def updateDatasetName( + modificator: DatasetNameModification, + @Auth sessionUser: SessionUser + ): Response = { + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + val uid = sessionUser.getUid + val did = modificator.did + val name = modificator.name + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val existedDataset = getDatasetByID(ctx, did) + existedDataset.setName(name) + datasetDao.update(existedDataset) + Response.ok().build() + } + } + + @POST + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/update/description") + def updateDatasetDescription( + modificator: DatasetDescriptionModification, + @Auth sessionUser: SessionUser + ): Response = { + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + val uid = sessionUser.getUid + val did = modificator.did + val description = modificator.description + + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val existedDataset = getDatasetByID(ctx, did) + existedDataset.setDescription(description) + datasetDao.update(existedDataset) + Response.ok().build() + } + } + + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/update/publicity") + def toggleDatasetPublicity( + @PathParam("did") did: UInteger, + @Auth sessionUser: SessionUser + ): Response = { + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + val uid = sessionUser.getUid + + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val existedDataset = getDatasetByID(ctx, did) + if (existedDataset.getIsPublic == DATASET_IS_PUBLIC) { + existedDataset.setIsPublic(DATASET_IS_PRIVATE) + } else { + existedDataset.setIsPublic(DATASET_IS_PUBLIC) + } + + datasetDao.update(existedDataset) + Response.ok().build() + } + } + + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/version/create") + @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) + def createDatasetVersion( + @PathParam("did") did: UInteger, + @FormDataParam("versionName") versionName: String, + @Auth user: SessionUser + ): Unit = { + val uid = user.getUid + } + + /** + * This method returns a list of DashboardDatasets objects that are accessible by current user. + * + * @param user the session user + * @return list of user accessible DashboardDataset objects + */ + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/list") + def listDatasets( + @Auth user: SessionUser + ): List[DashboardDataset] = { + val uid = user.getUid + withTransaction(context)(ctx => { + var accessibleDatasets: ListBuffer[DashboardDataset] = ListBuffer() + // first fetch all datasets user have explicit access to + accessibleDatasets = ListBuffer.from( + ctx + .select() + .from( + DATASET + .leftJoin(DATASET_USER_ACCESS) + .on(DATASET_USER_ACCESS.DID.eq(DATASET.DID)) + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + ) + .where(DATASET_USER_ACCESS.UID.eq(uid)) + .fetch() + .map(record => { + val dataset = record.into(DATASET).into(classOf[Dataset]) + val datasetAccess = record.into(DATASET_USER_ACCESS).into(classOf[DatasetUserAccess]) + val ownerEmail = record.into(USER).getEmail + DashboardDataset( + isOwner = dataset.getOwnerUid == uid, + dataset = dataset, + accessPrivilege = datasetAccess.getPrivilege, + versions = List(), + ownerEmail = ownerEmail, + size = calculateDatasetVersionSize(dataset.getDid) + ) + }) + .asScala + ) + + // then we fetch the public datasets and merge it as a part of the result if not exist + val publicDatasets = ctx + .select() + .from( + DATASET + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + ) + .where(DATASET.IS_PUBLIC.eq(DATASET_IS_PUBLIC)) + .fetch() + .map(record => { + val dataset = record.into(DATASET).into(classOf[Dataset]) + val ownerEmail = record.into(USER).getEmail + DashboardDataset( + isOwner = false, + dataset = dataset, + accessPrivilege = DatasetUserAccessPrivilege.READ, + versions = List(), + ownerEmail = ownerEmail, + size = calculateDatasetVersionSize(dataset.getDid) + ) + }) + publicDatasets.forEach { publicDataset => + if (!accessibleDatasets.exists(_.dataset.getDid == publicDataset.dataset.getDid)) { + val dashboardDataset = DashboardDataset( + isOwner = false, + dataset = publicDataset.dataset, + ownerEmail = publicDataset.ownerEmail, + accessPrivilege = DatasetUserAccessPrivilege.READ, + versions = List(), + size = calculateDatasetVersionSize(publicDataset.dataset.getDid) + ) + accessibleDatasets = accessibleDatasets :+ dashboardDataset + } + } + + accessibleDatasets.toList + }) + } + + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/version/list") + def getDatasetVersionList( + @PathParam("did") did: UInteger, + @Auth user: SessionUser + ): List[DatasetVersion] = { + val uid = user.getUid + withTransaction(context)(ctx => { + if (!userHasReadAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + fetchDatasetVersions(ctx, did) + }) + } + + @GET + @Path("/{did}/publicVersion/list") + def getPublicDatasetVersionList( + @PathParam("did") did: UInteger + ): List[DatasetVersion] = { + withTransaction(context)(ctx => { + if (!isDatasetPublic(ctx, did)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + fetchDatasetVersions(ctx, did) + }) + } + + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/version/latest") + def retrieveLatestDatasetVersion( + @PathParam("did") did: UInteger, + @Auth user: SessionUser + ): DashboardDatasetVersion = { + val uid = user.getUid + withTransaction(context)(ctx => { + if (!userHasReadAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + val dataset = getDatasetByID(ctx, did) + val latestVersion = getLatestDatasetVersion(ctx, did).getOrElse( + throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) + ) + val datasetPath = PathUtils.getDatasetPath(did) + + val ownerNode = DatasetFileNode + .fromPhysicalFileNodes( + Map( + (user.getEmail, dataset.getName, latestVersion.getName) -> + GitVersionControlLocalFileStorage + .retrieveRootFileNodesOfVersion( + datasetPath, + latestVersion.getVersionHash + ) + .asScala + .toList + ) + ) + .head + + DashboardDatasetVersion( + latestVersion, + ownerNode.children.get + .find(_.getName == dataset.getName) + .head + .children + .get + .find(_.getName == latestVersion.getName) + .head + .children + .get + ) + }) + } + + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/version/{dvid}/rootFileNodes") + def retrieveDatasetVersionRootFileNodes( + @PathParam("did") did: UInteger, + @PathParam("dvid") dvid: UInteger, + @Auth user: SessionUser + ): DatasetVersionRootFileNodesResponse = { + val uid = user.getUid + withTransaction(context)(ctx => + fetchDatasetVersionRootFileNodes(ctx, did, dvid, Some(uid), isPublic = false) + ) + } + + @GET + @Path("/{did}/publicVersion/{dvid}/rootFileNodes") + def retrievePublicDatasetVersionRootFileNodes( + @PathParam("did") did: UInteger, + @PathParam("dvid") dvid: UInteger + ): DatasetVersionRootFileNodesResponse = { + withTransaction(context)(ctx => + fetchDatasetVersionRootFileNodes(ctx, did, dvid, None, isPublic = true) + ) + } + + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}") + def getDataset( + @PathParam("did") did: UInteger, + @Auth user: SessionUser + ): DashboardDataset = { + val uid = user.getUid + withTransaction(context)(ctx => fetchDataset(ctx, did, Some(uid), isPublic = false)) + } + + @GET + @Path("/public/{did}") + def getPublicDataset( + @PathParam("did") did: UInteger + ): DashboardDataset = { + withTransaction(context)(ctx => fetchDataset(ctx, did, None, isPublic = true)) + } + + @GET + @Path("/file") + def retrieveDatasetSingleFile( + @QueryParam("path") pathStr: String + ): Response = { + val decodedPathStr = URLDecoder.decode(pathStr, StandardCharsets.UTF_8.name()) + + withTransaction(context)(ctx => { + val fileUri = FileResolver.resolve(decodedPathStr) + val streamingOutput = new StreamingOutput() { + override def write(output: OutputStream): Unit = { + val inputStream = DocumentFactory.openReadonlyDocument(fileUri).asInputStream() + try { + val buffer = new Array[Byte](8192) // buffer size + var bytesRead = inputStream.read(buffer) + while (bytesRead != -1) { + output.write(buffer, 0, bytesRead) + bytesRead = inputStream.read(buffer) + } + } finally { + inputStream.close() + } + } + } + + val contentType = decodedPathStr.split("\\.").lastOption.map(_.toLowerCase) match { + case Some("jpg") | Some("jpeg") => "image/jpeg" + case Some("png") => "image/png" + case Some("csv") => "text/csv" + case Some("md") => "text/markdown" + case Some("txt") => "text/plain" + case Some("html") | Some("htm") => "text/html" + case Some("json") => "application/json" + case Some("pdf") => "application/pdf" + case Some("doc") | Some("docx") => "application/msword" + case Some("xls") | Some("xlsx") => "application/vnd.ms-excel" + case Some("ppt") | Some("pptx") => "application/vnd.ms-powerpoint" + case Some("mp4") => "video/mp4" + case Some("mp3") => "audio/mpeg" + case _ => "application/octet-stream" // default binary format + } + + Response.ok(streamingOutput).`type`(contentType).build() + }) + } + + /** + * Retrieves a ZIP file for a specific dataset version or the latest version. + * + * @param did The dataset ID (used when getLatest is true). + * @param dvid The dataset version ID, if given, retrieve this version; if not given, retrieve the latest version + * @param user The session user. + * @return A Response containing the dataset version as a ZIP file. + */ + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/version-zip") + def retrieveDatasetVersionZip( + @QueryParam("did") did: UInteger, + @QueryParam("dvid") dvid: Optional[Integer], + @Auth user: SessionUser + ): Response = { + if (!userHasReadAccess(context, did, user.getUid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + val dataset = getDatasetByID(context, did) + val version = if (dvid.isEmpty) { + getLatestDatasetVersion(context, did).getOrElse( + throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) + ) + } else { + getDatasetVersionByID(context, UInteger.valueOf(dvid.get)) + } + val targetDatasetPath = PathUtils.getDatasetPath(dataset.getDid) + val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( + targetDatasetPath, + version.getVersionHash + ) + + val streamingOutput = new StreamingOutput { + override def write(outputStream: OutputStream): Unit = { + Using(new ZipOutputStream(outputStream)) { zipOutputStream => + def addFileNodeToZip(fileNode: PhysicalFileNode): Unit = { + val relativePath = fileNode.getRelativePath.toString + + if (fileNode.isDirectory) { + // For directories, add a ZIP entry with a trailing slash + zipOutputStream.putNextEntry(new ZipEntry(relativePath + "/")) + zipOutputStream.closeEntry() + + // Recursively add children + fileNode.getChildren.asScala.foreach(addFileNodeToZip) + } else { + // For files, add the file content + try { + zipOutputStream.putNextEntry(new ZipEntry(relativePath)) + Using(Files.newInputStream(fileNode.getAbsolutePath)) { inputStream => + inputStream.transferTo(zipOutputStream) + } + } catch { + case e: IOException => + throw new WebApplicationException(s"Error processing file: $relativePath", e) + } finally { + zipOutputStream.closeEntry() + } + } + } + + // Start the recursive process for each root file node + fileNodes.asScala.foreach(addFileNodeToZip) + }.recover { + case e: IOException => + throw new WebApplicationException("Error creating ZIP output stream", e) + case NonFatal(e) => + throw new WebApplicationException("Unexpected error while creating ZIP", e) + } + } + } + + Response + .ok(streamingOutput) + .header( + "Content-Disposition", + s"attachment; filename=${dataset.getName}-${version.getName}.zip" + ) + .`type`("application/zip") + .build() + } + + @GET + @Path("/datasetUserAccess") + def datasetUserAccess( + @QueryParam("did") did: UInteger + ): java.util.List[UInteger] = { + val records = context + .select(DATASET_USER_ACCESS.UID) + .from(DATASET_USER_ACCESS) + .where(DATASET_USER_ACCESS.DID.eq(did)) + .fetch() + + records.getValues(DATASET_USER_ACCESS.UID) + } + + private def fetchDatasetVersions(ctx: DSLContext, did: UInteger): List[DatasetVersion] = { + ctx + .selectFrom(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(did)) + .orderBy(DATASET_VERSION.CREATION_TIME.desc()) // Change to .asc() for ascending order + .fetchInto(classOf[DatasetVersion]) + .asScala + .toList + } + + private def fetchDatasetVersionRootFileNodes( + ctx: DSLContext, + did: UInteger, + dvid: UInteger, + uid: Option[UInteger], + isPublic: Boolean + ): DatasetVersionRootFileNodesResponse = { + val dataset = getDashboardDataset(ctx, did, uid, isPublic) + val targetDatasetPath = PathUtils.getDatasetPath(did) + val datasetVersion = getDatasetVersionByID(ctx, dvid) + val datasetName = dataset.dataset.getName + val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( + targetDatasetPath, + datasetVersion.getVersionHash + ) + val versionHash = datasetVersion.getVersionHash + val size = calculateDatasetVersionSize(did, Some(versionHash)) + + val ownerFileNode = DatasetFileNode + .fromPhysicalFileNodes( + Map((dataset.ownerEmail, datasetName, datasetVersion.getName) -> fileNodes.asScala.toList) + ) + .head + + DatasetVersionRootFileNodesResponse( + ownerFileNode.children.get + .find(_.getName == datasetName) + .head + .children + .get + .find(_.getName == datasetVersion.getName) + .head + .children + .get, + size + ) + } + + private def fetchDataset( + ctx: DSLContext, + did: UInteger, + uid: Option[UInteger], + isPublic: Boolean + ): DashboardDataset = { + val dashboardDataset = getDashboardDataset(ctx, did, uid, isPublic) + val size = calculateDatasetVersionSize(did) + dashboardDataset.copy(size = size) + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/DatasetFileNode.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/DatasetFileNode.scala new file mode 100644 index 0000000000..365ac7310f --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/DatasetFileNode.scala @@ -0,0 +1,131 @@ +package edu.uci.ics.texera.service.`type` + +import edu.uci.ics.amber.core.storage.util.dataset.PhysicalFileNode + +import java.util +import scala.collection.mutable + +// DatasetFileNode represents a unique file in dataset, its full path is in the format of: +// /ownerEmail/datasetName/versionName/fileRelativePath +// e.g. /bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv +// ownerName is bob@texera.com; datasetName is twitterDataset, versionName is v1, fileRelativePath is california/irvine/tw1.csv +class DatasetFileNode( + val name: String, // direct name of this node + val nodeType: String, // "file" or "directory" + val parent: DatasetFileNode, // the parent node + val ownerEmail: String, + val size: Option[Long] = None, // size of the file in bytes, None if directory + var children: Option[List[DatasetFileNode]] = None // Only populated if 'type' is 'directory' +) { + + // Ensure the type is either "file" or "directory" + require(nodeType == "file" || nodeType == "directory", "type must be 'file' or 'directory'") + + // Getters for the parameters + def getName: String = name + + def getNodeType: String = nodeType + + def getParent: DatasetFileNode = parent + + def getOwnerEmail: String = ownerEmail + + def getSize: Option[Long] = size + + def getChildren: List[DatasetFileNode] = children.getOrElse(List()) + + // Method to get the full file path + def getFilePath: String = { + val pathComponents = new mutable.ArrayBuffer[String]() + var currentNode: DatasetFileNode = this + while (currentNode != null) { + if (currentNode.parent != null) { // Skip the root node to avoid double slashes + pathComponents.prepend(currentNode.name) + } + currentNode = currentNode.parent + } + "/" + pathComponents.mkString("/") + } +} + +object DatasetFileNode { + def fromPhysicalFileNodes( + map: Map[(String, String, String), List[PhysicalFileNode]] + ): List[DatasetFileNode] = { + val rootNode = new DatasetFileNode("/", "directory", null, "") + val ownerNodes = mutable.Map[String, DatasetFileNode]() + + map.foreach { + case ((ownerEmail, datasetName, versionName), physicalNodes) => + val ownerNode = ownerNodes.getOrElseUpdate( + ownerEmail, { + val newNode = new DatasetFileNode(ownerEmail, "directory", rootNode, ownerEmail) + rootNode.children = Some(rootNode.getChildren :+ newNode) + newNode + } + ) + + val datasetNode = ownerNode.getChildren.find(_.getName == datasetName).getOrElse { + val newNode = new DatasetFileNode(datasetName, "directory", ownerNode, ownerEmail) + ownerNode.children = Some(ownerNode.getChildren :+ newNode) + newNode + } + + val versionNode = datasetNode.getChildren.find(_.getName == versionName).getOrElse { + val newNode = new DatasetFileNode(versionName, "directory", datasetNode, ownerEmail) + datasetNode.children = Some(datasetNode.getChildren :+ newNode) + newNode + } + + physicalNodes.foreach(node => addNodeToTree(versionNode, node, ownerEmail)) + } + + // Sorting function to sort children of a node alphabetically in descending order + def sortChildren(node: DatasetFileNode): Unit = { + node.children = Some(node.getChildren.sortBy(_.getName)(Ordering.String.reverse)) + node.getChildren.foreach(sortChildren) + } + + // Apply the sorting to the root node + sortChildren(rootNode) + + rootNode.getChildren + } + + private def addNodeToTree( + parentNode: DatasetFileNode, + physicalNode: PhysicalFileNode, + ownerEmail: String + ): Unit = { + val queue = new util.LinkedList[(DatasetFileNode, PhysicalFileNode)]() + queue.add((parentNode, physicalNode)) + + while (!queue.isEmpty) { + val (currentParent, currentPhysicalNode) = queue.poll() + val relativePath = currentPhysicalNode.getRelativePath.toString.split("/").toList + val nodeName = relativePath.last + + val fileType = + if (currentPhysicalNode.isDirectory) "directory" else "file" + val fileSize = + if (fileType == "file") Some(currentPhysicalNode.getSize) else None + val existingNode = currentParent.getChildren.find(child => + child.getName == nodeName && child.getNodeType == fileType + ) + val fileNode = existingNode.getOrElse { + val newNode = new DatasetFileNode( + nodeName, + fileType, + currentParent, + ownerEmail, + fileSize + ) + currentParent.children = Some(currentParent.getChildren :+ newNode) + newNode + } + + // Add children of the current physical node to the queue + currentPhysicalNode.getChildren.forEach(child => queue.add((fileNode, child))) + } + } +} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala index 2222e5f278..991ba17a56 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/DocumentFactory.scala @@ -1,6 +1,6 @@ package edu.uci.ics.amber.core.storage -import edu.uci.ics.amber.core.storage.FileResolver.DATASET_FILE_URI_SCHEME +import edu.uci.ics.amber.core.storage.FileResolver.{DATASET_FILE_URI_SCHEME, LAKEFS_FILE_URI_SCHEME} import edu.uci.ics.amber.core.storage.model._ import edu.uci.ics.amber.core.storage.VFSResourceType._ import edu.uci.ics.amber.core.storage.VFSURIFactory.{VFS_FILE_URI_SCHEME, decodeURI} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/util/PathUtils.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/util/PathUtils.scala index f88f29d0b3..b7d34e134b 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/util/PathUtils.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/util/PathUtils.scala @@ -37,6 +37,8 @@ object PathUtils { lazy val workflowCompilingServicePath: Path = corePath.resolve("workflow-compiling-service") + lazy val fileServicePath: Path = corePath.resolve("file-service") + private lazy val datasetsRootPath = corePath.resolve("amber").resolve("user-resources").resolve("datasets") From 9242a5aaaef02967f51755eae003839a908f1139 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 16 Feb 2025 14:01:25 -0800 Subject: [PATCH 06/47] a runnable version --- .../uci/ics/texera/service/FileService.scala | 2 +- .../service/resource/DatasetResource.scala | 454 +++++++++--------- 2 files changed, 228 insertions(+), 228 deletions(-) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index 6b755b8c54..52794ebfcb 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -20,7 +20,7 @@ class FileService extends Application[FileServiceConfiguration] { environment.jersey.setUrlPattern("/api/*") environment.jersey.register(classOf[SessionHandler]) environment.servlets.setSessionHandler(new SessionHandler) - environment.jersey.register(classOf[MultiPartFeature]) +// environment.jersey.register(classOf[MultiPartFeature]) // Register JWT authentication JwtAuth.setupJwtAuth(environment) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 3d0b2f2e02..a9c3a83b43 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -367,151 +367,151 @@ class DatasetResource { ) } - @POST - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/create") - @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) - def createDataset( - @Auth user: SessionUser, - @FormDataParam("datasetName") datasetName: String, - @FormDataParam("datasetDescription") datasetDescription: String, - @FormDataParam("isDatasetPublic") isDatasetPublic: String, - @FormDataParam("initialVersionName") initialVersionName: String, - files: FormDataMultiPart - ): DashboardDataset = { - - withTransaction(context) { ctx => - val uid = user.getUid - val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) - val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) - - // do the name duplication check - val userExistingDatasetNames = datasetDao.fetchByOwnerUid(uid).asScala.map(_.getName) - if (userExistingDatasetNames.contains(datasetName)) { - throw new BadRequestException("Dataset with the same name already exists") - } - - val dataset: Dataset = new Dataset() - dataset.setName(datasetName) - dataset.setDescription(datasetDescription) - dataset.setIsPublic(isDatasetPublic.toByte) - dataset.setOwnerUid(uid) - - val createdDataset = ctx - .insertInto(DATASET) - .set(ctx.newRecord(DATASET, dataset)) - .returning() - .fetchOne() - - val did = createdDataset.getDid - val datasetPath = PathUtils.getDatasetPath(did) - - val datasetUserAccess = new DatasetUserAccess() - datasetUserAccess.setDid(createdDataset.getDid) - datasetUserAccess.setUid(uid) - datasetUserAccess.setPrivilege(DatasetUserAccessPrivilege.WRITE) - datasetOfUserDao.insert(datasetUserAccess) - - // initialize the dataset directory - GitVersionControlLocalFileStorage.initRepo(datasetPath) -// createdVersion match { -// case Some(_) => -// case None => -// // none means creation failed, user does not submit any files when creating the dataset -// throw new BadRequestException(ERR_DATASET_CREATION_FAILED_MESSAGE) +// @POST +// @RolesAllowed(Array("REGULAR", "ADMIN")) +// @Path("/create") +// @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) +// def createDataset( +// @Auth user: SessionUser, +// @FormDataParam("datasetName") datasetName: String, +// @FormDataParam("datasetDescription") datasetDescription: String, +// @FormDataParam("isDatasetPublic") isDatasetPublic: String, +// @FormDataParam("initialVersionName") initialVersionName: String, +// files: FormDataMultiPart +// ): DashboardDataset = { +// +// withTransaction(context) { ctx => +// val uid = user.getUid +// val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) +// val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) +// +// // do the name duplication check +// val userExistingDatasetNames = datasetDao.fetchByOwnerUid(uid).asScala.map(_.getName) +// if (userExistingDatasetNames.contains(datasetName)) { +// throw new BadRequestException("Dataset with the same name already exists") // } - - DashboardDataset( - new Dataset( - createdDataset.getDid, - createdDataset.getOwnerUid, - createdDataset.getName, - createdDataset.getIsPublic, - createdDataset.getDescription, - createdDataset.getCreationTime - ), - user.getEmail, - DatasetUserAccessPrivilege.WRITE, - isOwner = true, - versions = List(), - size = calculateDatasetVersionSize(did) - ) - } - } - - @POST - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/delete") - def deleteDataset(datasetIDs: DatasetIDs, @Auth user: SessionUser): Response = { - val uid = user.getUid - withTransaction(context) { ctx => - val datasetDao = new DatasetDao(ctx.configuration()) - for (did <- datasetIDs.dids) { - if (!userOwnDataset(ctx, did, uid)) { - // throw the exception that user has no access to certain dataset - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - // delete the dataset repo from the filesystem - GitVersionControlLocalFileStorage.deleteRepo(PathUtils.getDatasetPath(did)) - - // delete the dataset from the DB - datasetDao.deleteById(did) - } - - Response.ok().build() - } - } - - @POST - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Produces(Array(MediaType.APPLICATION_JSON)) - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/update/name") - def updateDatasetName( - modificator: DatasetNameModification, - @Auth sessionUser: SessionUser - ): Response = { - withTransaction(context) { ctx => - val datasetDao = new DatasetDao(ctx.configuration()) - val uid = sessionUser.getUid - val did = modificator.did - val name = modificator.name - if (!userHasWriteAccess(ctx, did, uid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - - val existedDataset = getDatasetByID(ctx, did) - existedDataset.setName(name) - datasetDao.update(existedDataset) - Response.ok().build() - } - } - - @POST - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Produces(Array(MediaType.APPLICATION_JSON)) - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/update/description") - def updateDatasetDescription( - modificator: DatasetDescriptionModification, - @Auth sessionUser: SessionUser - ): Response = { - withTransaction(context) { ctx => - val datasetDao = new DatasetDao(ctx.configuration()) - val uid = sessionUser.getUid - val did = modificator.did - val description = modificator.description - - if (!userHasWriteAccess(ctx, did, uid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - - val existedDataset = getDatasetByID(ctx, did) - existedDataset.setDescription(description) - datasetDao.update(existedDataset) - Response.ok().build() - } - } +// +// val dataset: Dataset = new Dataset() +// dataset.setName(datasetName) +// dataset.setDescription(datasetDescription) +// dataset.setIsPublic(isDatasetPublic.toByte) +// dataset.setOwnerUid(uid) +// +// val createdDataset = ctx +// .insertInto(DATASET) +// .set(ctx.newRecord(DATASET, dataset)) +// .returning() +// .fetchOne() +// +// val did = createdDataset.getDid +// val datasetPath = PathUtils.getDatasetPath(did) +// +// val datasetUserAccess = new DatasetUserAccess() +// datasetUserAccess.setDid(createdDataset.getDid) +// datasetUserAccess.setUid(uid) +// datasetUserAccess.setPrivilege(DatasetUserAccessPrivilege.WRITE) +// datasetOfUserDao.insert(datasetUserAccess) +// +// // initialize the dataset directory +// GitVersionControlLocalFileStorage.initRepo(datasetPath) +//// createdVersion match { +//// case Some(_) => +//// case None => +//// // none means creation failed, user does not submit any files when creating the dataset +//// throw new BadRequestException(ERR_DATASET_CREATION_FAILED_MESSAGE) +//// } +// +// DashboardDataset( +// new Dataset( +// createdDataset.getDid, +// createdDataset.getOwnerUid, +// createdDataset.getName, +// createdDataset.getIsPublic, +// createdDataset.getDescription, +// createdDataset.getCreationTime +// ), +// user.getEmail, +// DatasetUserAccessPrivilege.WRITE, +// isOwner = true, +// versions = List(), +// size = calculateDatasetVersionSize(did) +// ) +// } +// } + +// @POST +// @RolesAllowed(Array("REGULAR", "ADMIN")) +// @Path("/delete") +// def deleteDataset(datasetIDs: DatasetIDs, @Auth user: SessionUser): Response = { +// val uid = user.getUid +// withTransaction(context) { ctx => +// val datasetDao = new DatasetDao(ctx.configuration()) +// for (did <- datasetIDs.dids) { +// if (!userOwnDataset(ctx, did, uid)) { +// // throw the exception that user has no access to certain dataset +// throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) +// } +// // delete the dataset repo from the filesystem +// GitVersionControlLocalFileStorage.deleteRepo(PathUtils.getDatasetPath(did)) +// +// // delete the dataset from the DB +// datasetDao.deleteById(did) +// } +// +// Response.ok().build() +// } +// } + +// @POST +// @Consumes(Array(MediaType.APPLICATION_JSON)) +// @Produces(Array(MediaType.APPLICATION_JSON)) +// @RolesAllowed(Array("REGULAR", "ADMIN")) +// @Path("/update/name") +// def updateDatasetName( +// modificator: DatasetNameModification, +// @Auth sessionUser: SessionUser +// ): Response = { +// withTransaction(context) { ctx => +// val datasetDao = new DatasetDao(ctx.configuration()) +// val uid = sessionUser.getUid +// val did = modificator.did +// val name = modificator.name +// if (!userHasWriteAccess(ctx, did, uid)) { +// throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) +// } +// +// val existedDataset = getDatasetByID(ctx, did) +// existedDataset.setName(name) +// datasetDao.update(existedDataset) +// Response.ok().build() +// } +// } + +// @POST +// @Consumes(Array(MediaType.APPLICATION_JSON)) +// @Produces(Array(MediaType.APPLICATION_JSON)) +// @RolesAllowed(Array("REGULAR", "ADMIN")) +// @Path("/update/description") +// def updateDatasetDescription( +// modificator: DatasetDescriptionModification, +// @Auth sessionUser: SessionUser +// ): Response = { +// withTransaction(context) { ctx => +// val datasetDao = new DatasetDao(ctx.configuration()) +// val uid = sessionUser.getUid +// val did = modificator.did +// val description = modificator.description +// +// if (!userHasWriteAccess(ctx, did, uid)) { +// throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) +// } +// +// val existedDataset = getDatasetByID(ctx, did) +// existedDataset.setDescription(description) +// datasetDao.update(existedDataset) +// Response.ok().build() +// } +// } @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @@ -552,89 +552,89 @@ class DatasetResource { val uid = user.getUid } - /** - * This method returns a list of DashboardDatasets objects that are accessible by current user. - * - * @param user the session user - * @return list of user accessible DashboardDataset objects - */ - @GET - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/list") - def listDatasets( - @Auth user: SessionUser - ): List[DashboardDataset] = { - val uid = user.getUid - withTransaction(context)(ctx => { - var accessibleDatasets: ListBuffer[DashboardDataset] = ListBuffer() - // first fetch all datasets user have explicit access to - accessibleDatasets = ListBuffer.from( - ctx - .select() - .from( - DATASET - .leftJoin(DATASET_USER_ACCESS) - .on(DATASET_USER_ACCESS.DID.eq(DATASET.DID)) - .leftJoin(USER) - .on(USER.UID.eq(DATASET.OWNER_UID)) - ) - .where(DATASET_USER_ACCESS.UID.eq(uid)) - .fetch() - .map(record => { - val dataset = record.into(DATASET).into(classOf[Dataset]) - val datasetAccess = record.into(DATASET_USER_ACCESS).into(classOf[DatasetUserAccess]) - val ownerEmail = record.into(USER).getEmail - DashboardDataset( - isOwner = dataset.getOwnerUid == uid, - dataset = dataset, - accessPrivilege = datasetAccess.getPrivilege, - versions = List(), - ownerEmail = ownerEmail, - size = calculateDatasetVersionSize(dataset.getDid) - ) - }) - .asScala - ) - - // then we fetch the public datasets and merge it as a part of the result if not exist - val publicDatasets = ctx - .select() - .from( - DATASET - .leftJoin(USER) - .on(USER.UID.eq(DATASET.OWNER_UID)) - ) - .where(DATASET.IS_PUBLIC.eq(DATASET_IS_PUBLIC)) - .fetch() - .map(record => { - val dataset = record.into(DATASET).into(classOf[Dataset]) - val ownerEmail = record.into(USER).getEmail - DashboardDataset( - isOwner = false, - dataset = dataset, - accessPrivilege = DatasetUserAccessPrivilege.READ, - versions = List(), - ownerEmail = ownerEmail, - size = calculateDatasetVersionSize(dataset.getDid) - ) - }) - publicDatasets.forEach { publicDataset => - if (!accessibleDatasets.exists(_.dataset.getDid == publicDataset.dataset.getDid)) { - val dashboardDataset = DashboardDataset( - isOwner = false, - dataset = publicDataset.dataset, - ownerEmail = publicDataset.ownerEmail, - accessPrivilege = DatasetUserAccessPrivilege.READ, - versions = List(), - size = calculateDatasetVersionSize(publicDataset.dataset.getDid) - ) - accessibleDatasets = accessibleDatasets :+ dashboardDataset - } - } - - accessibleDatasets.toList - }) - } +// /** +// * This method returns a list of DashboardDatasets objects that are accessible by current user. +// * +// * @param user the session user +// * @return list of user accessible DashboardDataset objects +// */ +// @GET +// @RolesAllowed(Array("REGULAR", "ADMIN")) +// @Path("/list") +// def listDatasets( +// @Auth user: SessionUser +// ): List[DashboardDataset] = { +// val uid = user.getUid +// withTransaction(context)(ctx => { +// var accessibleDatasets: ListBuffer[DashboardDataset] = ListBuffer() +// // first fetch all datasets user have explicit access to +// accessibleDatasets = ListBuffer.from( +// ctx +// .select() +// .from( +// DATASET +// .leftJoin(DATASET_USER_ACCESS) +// .on(DATASET_USER_ACCESS.DID.eq(DATASET.DID)) +// .leftJoin(USER) +// .on(USER.UID.eq(DATASET.OWNER_UID)) +// ) +// .where(DATASET_USER_ACCESS.UID.eq(uid)) +// .fetch() +// .map(record => { +// val dataset = record.into(DATASET).into(classOf[Dataset]) +// val datasetAccess = record.into(DATASET_USER_ACCESS).into(classOf[DatasetUserAccess]) +// val ownerEmail = record.into(USER).getEmail +// DashboardDataset( +// isOwner = dataset.getOwnerUid == uid, +// dataset = dataset, +// accessPrivilege = datasetAccess.getPrivilege, +// versions = List(), +// ownerEmail = ownerEmail, +// size = calculateDatasetVersionSize(dataset.getDid) +// ) +// }) +// .asScala +// ) +// +// // then we fetch the public datasets and merge it as a part of the result if not exist +// val publicDatasets = ctx +// .select() +// .from( +// DATASET +// .leftJoin(USER) +// .on(USER.UID.eq(DATASET.OWNER_UID)) +// ) +// .where(DATASET.IS_PUBLIC.eq(DATASET_IS_PUBLIC)) +// .fetch() +// .map(record => { +// val dataset = record.into(DATASET).into(classOf[Dataset]) +// val ownerEmail = record.into(USER).getEmail +// DashboardDataset( +// isOwner = false, +// dataset = dataset, +// accessPrivilege = DatasetUserAccessPrivilege.READ, +// versions = List(), +// ownerEmail = ownerEmail, +// size = calculateDatasetVersionSize(dataset.getDid) +// ) +// }) +// publicDatasets.forEach { publicDataset => +// if (!accessibleDatasets.exists(_.dataset.getDid == publicDataset.dataset.getDid)) { +// val dashboardDataset = DashboardDataset( +// isOwner = false, +// dataset = publicDataset.dataset, +// ownerEmail = publicDataset.ownerEmail, +// accessPrivilege = DatasetUserAccessPrivilege.READ, +// versions = List(), +// size = calculateDatasetVersionSize(publicDataset.dataset.getDid) +// ) +// accessibleDatasets = accessibleDatasets :+ dashboardDataset +// } +// } +// +// accessibleDatasets.toList +// }) +// } @GET @RolesAllowed(Array("REGULAR", "ADMIN")) From 59b46a1e639045c935544f43d901d3181970bd9a Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 20 Feb 2025 16:38:50 -0800 Subject: [PATCH 07/47] finish jwt auth --- .../uci/ics/texera/service/FileService.scala | 15 +++-- .../uci/ics/texera/service/auth/JwtAuth.scala | 1 - .../texera/service/auth/JwtAuthFilter.scala | 40 +++++++++++++ .../ics/texera/service/auth/JwtParser.scala | 59 +++++++++++++++++++ .../service/auth/UserAuthenticator.scala | 30 +++++----- .../service/resource/DatasetResource.scala | 34 +++++------ 6 files changed, 141 insertions(+), 38 deletions(-) create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index 52794ebfcb..6896dc5787 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -4,10 +4,10 @@ import io.dropwizard.core.Application import io.dropwizard.core.setup.{Bootstrap, Environment} import com.fasterxml.jackson.module.scala.DefaultScalaModule import edu.uci.ics.amber.util.PathUtils.fileServicePath -import edu.uci.ics.texera.service.auth.JwtAuth +import edu.uci.ics.texera.service.auth.{JwtAuthFilter, SessionUser} import edu.uci.ics.texera.service.resource.{DatasetAccessResource, DatasetResource} +import io.dropwizard.auth.AuthDynamicFeature import org.eclipse.jetty.server.session.SessionHandler -import org.glassfish.jersey.media.multipart.MultiPartFeature class FileService extends Application[FileServiceConfiguration] { override def initialize(bootstrap: Bootstrap[FileServiceConfiguration]): Unit = { @@ -20,9 +20,14 @@ class FileService extends Application[FileServiceConfiguration] { environment.jersey.setUrlPattern("/api/*") environment.jersey.register(classOf[SessionHandler]) environment.servlets.setSessionHandler(new SessionHandler) -// environment.jersey.register(classOf[MultiPartFeature]) - // Register JWT authentication - JwtAuth.setupJwtAuth(environment) + + // Register JWT authentication filter + environment.jersey.register(new AuthDynamicFeature(classOf[JwtAuthFilter])) + + // Enable @Auth annotation for injecting SessionUser + environment.jersey.register( + new io.dropwizard.auth.AuthValueFactoryProvider.Binder(classOf[SessionUser]) + ) // Register multipart feature for file uploads environment.jersey.register(classOf[DatasetResource]) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala index 9df68d19f3..07f949eef7 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala @@ -34,7 +34,6 @@ object JwtAuth { .setJwtConsumer(jwtConsumer) .setRealm("realm") .setPrefix("Bearer") - .setAuthenticator(UserAuthenticator) .setAuthorizer(UserRoleAuthorizer) .buildAuthFilter() ) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala new file mode 100644 index 0000000000..c955f2945b --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala @@ -0,0 +1,40 @@ +package edu.uci.ics.texera.service.auth + +import jakarta.ws.rs.container.{ContainerRequestContext, ContainerRequestFilter} +import jakarta.ws.rs.core.{HttpHeaders, SecurityContext} +import jakarta.ws.rs.ext.Provider +import jakarta.ws.rs.container.ResourceInfo +import jakarta.ws.rs.core.Context + +import java.security.Principal +import com.typesafe.scalalogging.LazyLogging +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole + +@Provider +class JwtAuthFilter extends ContainerRequestFilter with LazyLogging { + + @Context + private var resourceInfo: ResourceInfo = _ + + override def filter(requestContext: ContainerRequestContext): Unit = { + val authHeader = requestContext.getHeaderString(HttpHeaders.AUTHORIZATION) + + if (authHeader != null && authHeader.startsWith("Bearer ")) { + val token = authHeader.substring(7) // Remove "Bearer " prefix + val userOpt = JwtParser.parseToken(token) + + if (userOpt.isPresent) { + val user = userOpt.get() + requestContext.setSecurityContext(new SecurityContext { + override def getUserPrincipal: Principal = user + override def isUserInRole(role: String): Boolean = + user.isRoleOf(UserRole.valueOf(role)) + override def isSecure: Boolean = false + override def getAuthenticationScheme: String = "Bearer" + }) + } else { + logger.warn("Invalid JWT: Unable to parse token") + } + } + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala new file mode 100644 index 0000000000..2c4392d9bb --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala @@ -0,0 +1,59 @@ +package edu.uci.ics.texera.service.auth + +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User +import org.jooq.types.UInteger +import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder} +import org.jose4j.keys.HmacKey +import org.jose4j.lang.UnresolvableKeyException +import com.typesafe.scalalogging.LazyLogging +import org.jose4j.jwt.JwtClaims + +import java.nio.charset.StandardCharsets +import java.util.Optional + +object JwtParser extends LazyLogging { + + private val TOKEN_SECRET = AuthConfig.jwtSecretKey.toLowerCase() match { + case "random" => getRandomHexString + case _ => AuthConfig.jwtSecretKey + } + + private val jwtConsumer: JwtConsumer = new JwtConsumerBuilder() + .setAllowedClockSkewInSeconds(30) + .setRequireExpirationTime() + .setRequireSubject() + .setVerificationKey(new HmacKey(TOKEN_SECRET.getBytes(StandardCharsets.UTF_8))) + .setRelaxVerificationKeyValidation() + .build() + + def parseToken(token: String): Optional[SessionUser] = { + try { + val jwtClaims: JwtClaims = jwtConsumer.processToClaims(token) + val userName = jwtClaims.getSubject + val email = jwtClaims.getClaimValue("email", classOf[String]) + val userId = UInteger.valueOf(jwtClaims.getClaimValue("userId").asInstanceOf[Long]) + val role = UserRole.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String]) + val googleId = jwtClaims.getClaimValue("googleId", classOf[String]) + + val user = new User(userId, userName, email, null, googleId, role, null) + Optional.of(new SessionUser(user)) + } catch { + case _: UnresolvableKeyException => + logger.error("Invalid JWT Signature") + Optional.empty() + case e: Exception => + logger.error(s"Failed to parse JWT: ${e.getMessage}") + Optional.empty() + } + } + + private def getRandomHexString: String = { + val bytes = 32 + val r = new scala.util.Random() + val sb = new StringBuilder + while (sb.length < bytes) + sb.append(Integer.toHexString(r.nextInt())) + sb.toString.substring(0, bytes) + } +} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala index a1df7944df..82e9ebafa2 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala @@ -1,31 +1,33 @@ package edu.uci.ics.texera.service.auth -import com.typesafe.scalalogging.LazyLogging import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import io.dropwizard.auth.Authenticator import org.jooq.types.UInteger -import org.jose4j.jwt.consumer.JwtContext import java.util.Optional +import org.jose4j.jwt.consumer.JwtContext -object UserAuthenticator extends Authenticator[JwtContext, SessionUser] with LazyLogging { +class UserAuthenticator extends Authenticator[JwtContext, SessionUser] { override def authenticate(context: JwtContext): Optional[SessionUser] = { - // This method will be called once the token's signature has been verified, - // including the token secret and the expiration time try { - val userName = context.getJwtClaims.getSubject - val email = context.getJwtClaims.getClaimValue("email").asInstanceOf[String] - val userId = UInteger.valueOf(context.getJwtClaims.getClaimValue("userId").asInstanceOf[Long]) - val role = UserRole.valueOf(context.getJwtClaims.getClaimValue("role").asInstanceOf[String]) - val googleId = context.getJwtClaims.getClaimValue("googleId").asInstanceOf[String] - val user = new User(userId, userName, email, null, googleId, role, null) - Optional.of(new SessionUser(user)) + val jwtClaims = context.getJwtClaims + + val userName = jwtClaims.getSubject + val email = jwtClaims.getClaimValue("email", classOf[String]) + val userId = UInteger.valueOf(jwtClaims.getClaimValue("userId").asInstanceOf[Long]) + val role = UserRole.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String]) + val googleId = jwtClaims.getClaimValue("googleId", classOf[String]) + + // Construct a SessionUser object + val user = new SessionUser( + new User(userId, userName, email, null, googleId, role, null) + ) + Optional.of(user) } catch { case e: Exception => - logger.error("Failed to authenticate the JwtContext", e) + e.printStackTrace() Optional.empty() } - } } diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index a9c3a83b43..1eecf25119 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -9,18 +9,14 @@ import edu.uci.ics.amber.util.PathUtils import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction import edu.uci.ics.texera.dao.jooq.generated.enums.DatasetUserAccessPrivilege -import edu.uci.ics.texera.dao.jooq.generated.tables.Dataset.DATASET import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION -import edu.uci.ics.texera.dao.jooq.generated.tables.User.USER import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ DatasetDao, - DatasetUserAccessDao, DatasetVersionDao } import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ Dataset, - DatasetUserAccess, DatasetVersion, User } @@ -31,22 +27,14 @@ import edu.uci.ics.texera.service.resource.DatasetAccessResource.{ getOwner, isDatasetPublic, userHasReadAccess, - userHasWriteAccess, - userOwnDataset + userHasWriteAccess } import edu.uci.ics.texera.service.resource.DatasetResource.{ DATASET_IS_PRIVATE, DATASET_IS_PUBLIC, DashboardDataset, DashboardDatasetVersion, - DatasetDescriptionModification, - DatasetIDs, - DatasetNameModification, - DatasetOperation, DatasetVersionRootFileNodesResponse, - FILE_OPERATION_REMOVE_PREFIX, - FILE_OPERATION_UPLOAD_PREFIX, - applyDatasetOperationToCreateNewVersion, calculateDatasetVersionSize, context, getDatasetByID, @@ -58,10 +46,9 @@ import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} import org.apache.commons.lang3.StringUtils -import org.glassfish.jersey.media.multipart.{FormDataMultiPart, FormDataParam} +import org.glassfish.jersey.media.multipart.FormDataParam import org.jooq.types.UInteger import org.jooq.{DSLContext, EnumType} -import play.api.libs.json.Json import java.io.{IOException, InputStream, OutputStream} import java.net.{URI, URLDecoder} @@ -70,8 +57,6 @@ import java.nio.file.Files import java.util.Optional import java.util.concurrent.locks.ReentrantLock import java.util.zip.{ZipEntry, ZipOutputStream} -import scala.collection.mutable -import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters._ import scala.jdk.OptionConverters._ import scala.util.control.NonFatal @@ -552,7 +537,20 @@ class DatasetResource { val uid = user.getUid } -// /** + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/test") + def sessionTest( + @Auth user: SessionUser + ): Response = { + println(user.getName) + println(user.getEmail) + println(user.getGoogleId) + + Response.ok().build() + } + + // /** // * This method returns a list of DashboardDatasets objects that are accessible by current user. // * // * @param user the session user From d89e9c383177e3ca9abaa9b6a9306653b5c07812 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sat, 22 Feb 2025 12:27:55 -0800 Subject: [PATCH 08/47] make the backend work --- .../src/main/resources/docker-compose.yml | 39 ++ .../src/main/resources/lakefs-config.yaml | 20 + .../src/main/resources/minio-config.yml | 15 + .../uci/ics/texera/service/FileService.scala | 8 + .../uci/ics/texera/service/auth/JwtAuth.scala | 54 --- .../texera/service/auth/JwtAuthFilter.scala | 4 +- .../ics/texera/service/auth/JwtParser.scala | 8 +- .../ics/texera/service/auth/SessionUser.scala | 6 +- .../service/auth/UserAuthenticator.scala | 33 -- .../service/auth/UserRoleAuthorizer.scala | 15 - .../resource/DatasetAccessResource.scala | 39 +- .../service/resource/DatasetResource.scala | 404 ++++++++---------- .../type/{ => dataset}/DatasetFileNode.scala | 0 .../storage/LakeFSApiClientInstance.scala | 10 +- .../core/storage/LakeFSFileStorage.scala | 15 +- 15 files changed, 302 insertions(+), 368 deletions(-) create mode 100644 core/file-service/src/main/resources/docker-compose.yml create mode 100644 core/file-service/src/main/resources/lakefs-config.yaml create mode 100644 core/file-service/src/main/resources/minio-config.yml delete mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala delete mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala delete mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala rename core/file-service/src/main/scala/edu/uci/ics/texera/service/type/{ => dataset}/DatasetFileNode.scala (100%) diff --git a/core/file-service/src/main/resources/docker-compose.yml b/core/file-service/src/main/resources/docker-compose.yml new file mode 100644 index 0000000000..cb34e310c0 --- /dev/null +++ b/core/file-service/src/main/resources/docker-compose.yml @@ -0,0 +1,39 @@ +version: '3.8' + +services: + minio: + image: minio/minio:latest + container_name: minio + ports: + - "9500:9000" # MinIO API (use this in LakeFS config) + - "9501:9001" # MinIO Console UI + environment: + - MINIO_ROOT_USER=texera_minio + - MINIO_ROOT_PASSWORD=password + volumes: + - /Users/baijiadong/Desktop/chenlab/texera/core/file-service/src/main/user-resources/minio:/data + command: server --console-address ":9001" /data + + lakefs: + image: treeverse/lakefs:latest + container_name: lakefs + depends_on: + - minio # Ensure MinIO starts first + ports: + - "8000:8000" # LakeFS API/UI + environment: + # PostgreSQL Connection (External DB) + - LAKEFS_DATABASE_TYPE=postgres + - LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgresql://texera_lakefs_admin:password@host.docker.internal:5432/texera_lakefs + + # Authentication + - LAKEFS_AUTH_ENCRYPT_SECRET_KEY=random_string_for_lakefs + + # MinIO Storage Configuration for LakeFS + - LAKEFS_BLOCKSTORE_TYPE=s3 + - LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true + - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 # MinIO internal service URL + - LAKEFS_BLOCKSTORE_S3_DISCOVER_BUCKET_REGION=false + - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=texera_minio + - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=password + command: run \ No newline at end of file diff --git a/core/file-service/src/main/resources/lakefs-config.yaml b/core/file-service/src/main/resources/lakefs-config.yaml new file mode 100644 index 0000000000..bc3e4dc707 --- /dev/null +++ b/core/file-service/src/main/resources/lakefs-config.yaml @@ -0,0 +1,20 @@ +--- +database: + type: "postgres" + postgres: + connection_string: "postgresql://texera_lakefs_admin:password@localhost:5432/texera_lakefs" + +auth: + encrypt: + # Replace this with a randomly-generated string. Keep it safe! + secret_key: "random_string_for_lakefs" + +blockstore: + type: s3 + s3: + force_path_style: true + endpoint: http://localhost:9500 # MinIO API now runs on port 9500 + discover_bucket_region: false + credentials: + access_key_id: "texera_minio" # Matches MINIO_ROOT_USER from docker-compose + secret_access_key: "password" # Matches MINIO_ROOT_PASSWORD from docker-compose \ No newline at end of file diff --git a/core/file-service/src/main/resources/minio-config.yml b/core/file-service/src/main/resources/minio-config.yml new file mode 100644 index 0000000000..a4ee5aace8 --- /dev/null +++ b/core/file-service/src/main/resources/minio-config.yml @@ -0,0 +1,15 @@ +version: '3.8' + +services: + minio: + image: minio/minio:latest + container_name: minio + ports: + - "9500:9000" # MinIO API + - "9501:9001" # MinIO Console UI + environment: + - MINIO_ROOT_USER=texera_minio + - MINIO_ROOT_PASSWORD=password + volumes: + - /Users/baijiadong/Desktop/chenlab/texera/core/file-service/src/main/user-resources/minio:/data + command: server --console-address ":9001" /data \ No newline at end of file diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index 6896dc5787..feca85ba10 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -3,7 +3,9 @@ package edu.uci.ics.texera.service import io.dropwizard.core.Application import io.dropwizard.core.setup.{Bootstrap, Environment} import com.fasterxml.jackson.module.scala.DefaultScalaModule +import edu.uci.ics.amber.core.storage.StorageConfig import edu.uci.ics.amber.util.PathUtils.fileServicePath +import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.service.auth.{JwtAuthFilter, SessionUser} import edu.uci.ics.texera.service.resource.{DatasetAccessResource, DatasetResource} import io.dropwizard.auth.AuthDynamicFeature @@ -18,6 +20,12 @@ class FileService extends Application[FileServiceConfiguration] { override def run(configuration: FileServiceConfiguration, environment: Environment): Unit = { // Serve backend at /api environment.jersey.setUrlPattern("/api/*") + SqlServer.initConnection( + StorageConfig.jdbcUrl, + StorageConfig.jdbcUsername, + StorageConfig.jdbcPassword + ) + environment.jersey.register(classOf[SessionHandler]) environment.servlets.setSessionHandler(new SessionHandler) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala deleted file mode 100644 index 07f949eef7..0000000000 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuth.scala +++ /dev/null @@ -1,54 +0,0 @@ -package edu.uci.ics.texera.service.auth - -import com.github.toastshaman.dropwizard.auth.jwt.JwtAuthFilter -import io.dropwizard.auth.AuthDynamicFeature -import io.dropwizard.auth.AuthValueFactoryProvider -import io.dropwizard.auth.PrincipalImpl -import io.dropwizard.core.setup.Environment -import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder} -import org.jose4j.keys.HmacKey - -import java.util.Random - -object JwtAuth { - private val TOKEN_SECRET: String = AuthConfig.jwtSecretKey.toLowerCase() match { - case "random" => getRandomHexString - case _ => AuthConfig.jwtSecretKey - } - private val TOKEN_EXPIRATION_DAYS = AuthConfig.jwtExpirationDays - - // JWT Consumer for verification - private val jwtConsumer: JwtConsumer = new JwtConsumerBuilder() - .setAllowedClockSkewInSeconds(30) - .setRequireExpirationTime() - .setRequireSubject() - .setVerificationKey(new HmacKey(TOKEN_SECRET.getBytes)) - .setRelaxVerificationKeyValidation() - .build() - - // Register authentication middleware in Dropwizard - def setupJwtAuth(environment: Environment): Unit = { - environment.jersey.register( - new AuthDynamicFeature( - new JwtAuthFilter.Builder[SessionUser]() - .setJwtConsumer(jwtConsumer) - .setRealm("realm") - .setPrefix("Bearer") - .setAuthorizer(UserRoleAuthorizer) - .buildAuthFilter() - ) - ) - - // Allow @Auth annotation injection for secured endpoints - environment.jersey.register(new AuthValueFactoryProvider.Binder(classOf[PrincipalImpl])) - } - - private def getRandomHexString: String = { - val bytes = 32 - val r = new Random() - val sb = new StringBuffer - while (sb.length < bytes) - sb.append(Integer.toHexString(r.nextInt())) - sb.toString.substring(0, bytes) - } -} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala index c955f2945b..a6a153ea9b 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtAuthFilter.scala @@ -8,7 +8,7 @@ import jakarta.ws.rs.core.Context import java.security.Principal import com.typesafe.scalalogging.LazyLogging -import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRoleEnum @Provider class JwtAuthFilter extends ContainerRequestFilter with LazyLogging { @@ -28,7 +28,7 @@ class JwtAuthFilter extends ContainerRequestFilter with LazyLogging { requestContext.setSecurityContext(new SecurityContext { override def getUserPrincipal: Principal = user override def isUserInRole(role: String): Boolean = - user.isRoleOf(UserRole.valueOf(role)) + user.isRoleOf(UserRoleEnum.valueOf(role)) override def isSecure: Boolean = false override def getAuthenticationScheme: String = "Bearer" }) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala index 2c4392d9bb..570d0d9266 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala @@ -1,12 +1,12 @@ package edu.uci.ics.texera.service.auth -import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import org.jooq.types.UInteger import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder} import org.jose4j.keys.HmacKey import org.jose4j.lang.UnresolvableKeyException import com.typesafe.scalalogging.LazyLogging +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRoleEnum import org.jose4j.jwt.JwtClaims import java.nio.charset.StandardCharsets @@ -32,11 +32,11 @@ object JwtParser extends LazyLogging { val jwtClaims: JwtClaims = jwtConsumer.processToClaims(token) val userName = jwtClaims.getSubject val email = jwtClaims.getClaimValue("email", classOf[String]) - val userId = UInteger.valueOf(jwtClaims.getClaimValue("userId").asInstanceOf[Long]) - val role = UserRole.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String]) + val userId = jwtClaims.getClaimValue("userId").asInstanceOf[Long].toInt + val role = UserRoleEnum.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String]) val googleId = jwtClaims.getClaimValue("googleId", classOf[String]) - val user = new User(userId, userName, email, null, googleId, role, null) + val user = new User(userId, userName, email, null, googleId, null, role) Optional.of(new SessionUser(user)) } catch { case _: UnresolvableKeyException => diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala index ec564b82be..39a9f25aef 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.service.auth -import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole +import edu.uci.ics.texera.dao.jooq.generated.enums.UserRoleEnum import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import org.jooq.types.UInteger @@ -11,11 +11,11 @@ class SessionUser(val user: User) extends Principal { override def getName: String = user.getName - def getUid: UInteger = user.getUid + def getUid: Integer = user.getUid def getEmail: String = user.getEmail def getGoogleId: String = user.getGoogleId - def isRoleOf(role: UserRole): Boolean = user.getRole == role + def isRoleOf(role: UserRoleEnum): Boolean = user.getRole == role } diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala deleted file mode 100644 index 82e9ebafa2..0000000000 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserAuthenticator.scala +++ /dev/null @@ -1,33 +0,0 @@ -package edu.uci.ics.texera.service.auth - -import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole -import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User -import io.dropwizard.auth.Authenticator -import org.jooq.types.UInteger - -import java.util.Optional -import org.jose4j.jwt.consumer.JwtContext - -class UserAuthenticator extends Authenticator[JwtContext, SessionUser] { - override def authenticate(context: JwtContext): Optional[SessionUser] = { - try { - val jwtClaims = context.getJwtClaims - - val userName = jwtClaims.getSubject - val email = jwtClaims.getClaimValue("email", classOf[String]) - val userId = UInteger.valueOf(jwtClaims.getClaimValue("userId").asInstanceOf[Long]) - val role = UserRole.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String]) - val googleId = jwtClaims.getClaimValue("googleId", classOf[String]) - - // Construct a SessionUser object - val user = new SessionUser( - new User(userId, userName, email, null, googleId, role, null) - ) - Optional.of(user) - } catch { - case e: Exception => - e.printStackTrace() - Optional.empty() - } - } -} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala deleted file mode 100644 index 3df41f6307..0000000000 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/UserRoleAuthorizer.scala +++ /dev/null @@ -1,15 +0,0 @@ -package edu.uci.ics.texera.service.auth - -import edu.uci.ics.texera.dao.jooq.generated.enums.UserRole -import io.dropwizard.auth.Authorizer -import jakarta.ws.rs.container.ContainerRequestContext - -object UserRoleAuthorizer extends Authorizer[SessionUser] { - override def authorize( - user: SessionUser, - role: String, - requestContext: ContainerRequestContext - ): Boolean = { - user.isRoleOf(UserRole.valueOf(role)) - } -} diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala index cf0f0cf404..84834cfec1 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala @@ -4,13 +4,12 @@ import edu.uci.ics.amber.core.storage.StorageConfig import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction import edu.uci.ics.texera.dao.jooq.generated.Tables.USER -import edu.uci.ics.texera.dao.jooq.generated.enums.DatasetUserAccessPrivilege +import edu.uci.ics.texera.dao.jooq.generated.enums.PrivilegeEnum import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{DatasetDao, DatasetUserAccessDao, UserDao} import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{DatasetUserAccess, User} import edu.uci.ics.texera.service.resource.DatasetAccessResource.{AccessEntry, context, getOwner} import org.jooq.{DSLContext, EnumType} -import org.jooq.types.UInteger import java.util import javax.annotation.security.RolesAllowed @@ -19,39 +18,39 @@ import javax.ws.rs.core.{MediaType, Response} object DatasetAccessResource { private lazy val context: DSLContext = SqlServer - .getInstance(StorageConfig.jdbcUrl, StorageConfig.jdbcUsername, StorageConfig.jdbcPassword) + .getInstance() .createDSLContext() - def isDatasetPublic(ctx: DSLContext, did: UInteger): Boolean = { + def isDatasetPublic(ctx: DSLContext, did: Integer): Boolean = { val datasetDao = new DatasetDao(ctx.configuration()) Option(datasetDao.fetchOneByDid(did)) .flatMap(dataset => Option(dataset.getIsPublic)) .contains(1.toByte) } - def userHasReadAccess(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + def userHasReadAccess(ctx: DSLContext, did: Integer, uid: Integer): Boolean = { isDatasetPublic(ctx, did) || userHasWriteAccess(ctx, did, uid) || - getDatasetUserAccessPrivilege(ctx, did, uid) == DatasetUserAccessPrivilege.READ + getDatasetUserAccessPrivilege(ctx, did, uid) == PrivilegeEnum.READ } - def userOwnDataset(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + def userOwnDataset(ctx: DSLContext, did: Integer, uid: Integer): Boolean = { val datasetDao = new DatasetDao(ctx.configuration()) Option(datasetDao.fetchOneByDid(did)) .exists(_.getOwnerUid == uid) } - def userHasWriteAccess(ctx: DSLContext, did: UInteger, uid: UInteger): Boolean = { + def userHasWriteAccess(ctx: DSLContext, did: Integer, uid: Integer): Boolean = { userOwnDataset(ctx, did, uid) || - getDatasetUserAccessPrivilege(ctx, did, uid) == DatasetUserAccessPrivilege.WRITE + getDatasetUserAccessPrivilege(ctx, did, uid) == PrivilegeEnum.WRITE } def getDatasetUserAccessPrivilege( ctx: DSLContext, - did: UInteger, - uid: UInteger - ): DatasetUserAccessPrivilege = { + did: Integer, + uid: Integer + ): PrivilegeEnum = { Option( ctx .select(DATASET_USER_ACCESS.PRIVILEGE) @@ -61,11 +60,11 @@ object DatasetAccessResource { .eq(did) .and(DATASET_USER_ACCESS.UID.eq(uid)) ) - .fetchOneInto(classOf[DatasetUserAccessPrivilege]) - ).getOrElse(DatasetUserAccessPrivilege.NONE) + .fetchOneInto(classOf[PrivilegeEnum]) + ).getOrElse(PrivilegeEnum.NONE) } - def getOwner(ctx: DSLContext, did: UInteger): User = { + def getOwner(ctx: DSLContext, did: Integer): User = { val datasetDao = new DatasetDao(ctx.configuration()) val userDao = new UserDao(ctx.configuration()) @@ -92,7 +91,7 @@ class DatasetAccessResource { */ @GET @Path("/owner/{did}") - def getOwnerEmailOfDataset(@PathParam("did") did: UInteger): String = { + def getOwnerEmailOfDataset(@PathParam("did") did: Integer): String = { var email = "" withTransaction(context) { ctx => val owner = getOwner(ctx, did) @@ -112,7 +111,7 @@ class DatasetAccessResource { @GET @Path("/list/{did}") def getAccessList( - @PathParam("did") did: UInteger + @PathParam("did") did: Integer ): util.List[AccessEntry] = { withTransaction(context) { ctx => val datasetDao = new DatasetDao(ctx.configuration()) @@ -145,7 +144,7 @@ class DatasetAccessResource { @PUT @Path("/grant/{did}/{email}/{privilege}") def grantAccess( - @PathParam("did") did: UInteger, + @PathParam("did") did: Integer, @PathParam("email") email: String, @PathParam("privilege") privilege: String ): Response = { @@ -156,7 +155,7 @@ class DatasetAccessResource { new DatasetUserAccess( did, userDao.fetchOneByEmail(email).getUid, - DatasetUserAccessPrivilege.valueOf(privilege) + PrivilegeEnum.valueOf(privilege) ) ) Response.ok().build() @@ -173,7 +172,7 @@ class DatasetAccessResource { @DELETE @Path("/revoke/{did}/{email}") def revokeAccess( - @PathParam("did") did: UInteger, + @PathParam("did") did: Integer, @PathParam("email") email: String ): Response = { withTransaction(context) { ctx => diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 1eecf25119..79f638ce1d 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -1,53 +1,27 @@ package edu.uci.ics.texera.service.resource -import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, StorageConfig} -import edu.uci.ics.amber.core.storage.util.dataset.{ - GitVersionControlLocalFileStorage, - PhysicalFileNode -} +import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, LakeFSFileStorage, StorageConfig} +import edu.uci.ics.amber.core.storage.util.dataset.{GitVersionControlLocalFileStorage, PhysicalFileNode} import edu.uci.ics.amber.util.PathUtils import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction -import edu.uci.ics.texera.dao.jooq.generated.enums.DatasetUserAccessPrivilege +import edu.uci.ics.texera.dao.jooq.generated.enums.PrivilegeEnum +import edu.uci.ics.texera.dao.jooq.generated.tables.Dataset.DATASET import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION -import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ - DatasetDao, - DatasetVersionDao -} -import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ - Dataset, - DatasetVersion, - User -} +import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{DatasetDao, DatasetUserAccessDao, DatasetVersionDao} +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{Dataset, DatasetUserAccess, DatasetVersion, User} import edu.uci.ics.texera.service.`type`.DatasetFileNode import edu.uci.ics.texera.service.auth.SessionUser -import edu.uci.ics.texera.service.resource.DatasetAccessResource.{ - getDatasetUserAccessPrivilege, - getOwner, - isDatasetPublic, - userHasReadAccess, - userHasWriteAccess -} -import edu.uci.ics.texera.service.resource.DatasetResource.{ - DATASET_IS_PRIVATE, - DATASET_IS_PUBLIC, - DashboardDataset, - DashboardDatasetVersion, - DatasetVersionRootFileNodesResponse, - calculateDatasetVersionSize, - context, - getDatasetByID, - getDatasetVersionByID, - getLatestDatasetVersion -} +import edu.uci.ics.texera.service.resource.DatasetAccessResource.{getDatasetUserAccessPrivilege, getOwner, isDatasetPublic, userHasReadAccess, userHasWriteAccess, userOwnDataset} +import edu.uci.ics.texera.service.resource.DatasetResource.{DATASET_IS_PRIVATE, DATASET_IS_PUBLIC, DashboardDataset, DashboardDatasetVersion, DatasetDescriptionModification, DatasetIDs, DatasetNameModification, DatasetVersionRootFileNodesResponse, calculateDatasetVersionSize, context, getDatasetByID, getDatasetVersionByID, getLatestDatasetVersion} import io.dropwizard.auth.Auth +import io.lakefs.clients.sdk.ApiException import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} import org.apache.commons.lang3.StringUtils import org.glassfish.jersey.media.multipart.FormDataParam -import org.jooq.types.UInteger import org.jooq.{DSLContext, EnumType} import java.io.{IOException, InputStream, OutputStream} @@ -68,11 +42,11 @@ object DatasetResource { private val FILE_OPERATION_UPLOAD_PREFIX = "file:upload:" private val FILE_OPERATION_REMOVE_PREFIX = "file:remove" - private val datasetLocks: scala.collection.concurrent.Map[UInteger, ReentrantLock] = - new scala.collection.concurrent.TrieMap[UInteger, ReentrantLock]() + private val datasetLocks: scala.collection.concurrent.Map[Integer, ReentrantLock] = + new scala.collection.concurrent.TrieMap[Integer, ReentrantLock]() private val context = SqlServer - .getInstance(StorageConfig.jdbcUrl, StorageConfig.jdbcUsername, StorageConfig.jdbcPassword) + .getInstance() .createDSLContext() /** @@ -81,7 +55,7 @@ object DatasetResource { * @param versionHash the hash of the version. If None, fetch the latest version * @return */ - def calculateDatasetVersionSize(did: UInteger, versionHash: Option[String] = None): Long = { + def calculateDatasetVersionSize(did: Integer, versionHash: Option[String] = None): Long = { /** * Internal util to calculate the size from the physical nodes @@ -122,7 +96,7 @@ object DatasetResource { /** * Helper function to get the dataset from DB using did */ - private def getDatasetByID(ctx: DSLContext, did: UInteger): Dataset = { + private def getDatasetByID(ctx: DSLContext, did: Integer): Dataset = { val datasetDao = new DatasetDao(ctx.configuration()) val dataset = datasetDao.fetchOneByDid(did) if (dataset == null) { @@ -136,7 +110,7 @@ object DatasetResource { */ private def getDatasetVersionByID( ctx: DSLContext, - dvid: UInteger + dvid: Integer ): DatasetVersion = { val datasetVersionDao = new DatasetVersionDao(ctx.configuration()) val version = datasetVersionDao.fetchOneByDvid(dvid) @@ -151,7 +125,7 @@ object DatasetResource { */ private def getLatestDatasetVersion( ctx: DSLContext, - did: UInteger + did: Integer ): Option[DatasetVersion] = { ctx .selectFrom(DATASET_VERSION) @@ -176,7 +150,7 @@ object DatasetResource { * @return the created dataset version */ def createNewDatasetVersionByAddingFiles( - did: UInteger, + did: Integer, user: User, filesToAdd: Map[java.nio.file.Path, InputStream] ): Option[DashboardDatasetVersion] = { @@ -195,8 +169,8 @@ object DatasetResource { // concurrency control is performed here: the thread has to have the lock in order to create the new version private def applyDatasetOperationToCreateNewVersion( ctx: DSLContext, - did: UInteger, - uid: UInteger, + did: Integer, + uid: Integer, ownerEmail: String, userProvidedVersionName: String, datasetOperation: DatasetOperation @@ -205,7 +179,7 @@ object DatasetResource { // the format of dataset version name is: v{#n} - {user provided dataset version name}. e.g. v10 - new version def generateDatasetVersionName( ctx: DSLContext, - did: UInteger, + did: Integer, userProvidedVersionName: String ): String = { val numberOfExistingVersions = ctx @@ -300,11 +274,11 @@ object DatasetResource { fileNodes: List[DatasetFileNode] ) - case class DatasetIDs(dids: List[UInteger]) + case class DatasetIDs(dids: List[Integer]) - case class DatasetNameModification(did: UInteger, name: String) + case class DatasetNameModification(did: Integer, name: String) - case class DatasetDescriptionModification(did: UInteger, description: String) + case class DatasetDescriptionModification(did: Integer, description: String) case class DatasetVersionRootFileNodesResponse( fileNodes: List[DatasetFileNode], @@ -325,8 +299,8 @@ class DatasetResource { */ private def getDashboardDataset( ctx: DSLContext, - did: UInteger, - uid: Option[UInteger], + did: Integer, + uid: Option[Integer], isPublic: Boolean = false ): DashboardDataset = { if ( @@ -338,7 +312,7 @@ class DatasetResource { val targetDataset = getDatasetByID(ctx, did) val userAccessPrivilege = - if (isPublic) DatasetUserAccessPrivilege.NONE + if (isPublic) PrivilegeEnum.NONE else getDatasetUserAccessPrivilege(ctx, did, uid.get) val isOwner = !isPublic && (targetDataset.getOwnerUid == uid.get) @@ -352,157 +326,130 @@ class DatasetResource { ) } -// @POST -// @RolesAllowed(Array("REGULAR", "ADMIN")) -// @Path("/create") -// @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) -// def createDataset( -// @Auth user: SessionUser, -// @FormDataParam("datasetName") datasetName: String, -// @FormDataParam("datasetDescription") datasetDescription: String, -// @FormDataParam("isDatasetPublic") isDatasetPublic: String, -// @FormDataParam("initialVersionName") initialVersionName: String, -// files: FormDataMultiPart -// ): DashboardDataset = { -// -// withTransaction(context) { ctx => -// val uid = user.getUid -// val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) -// val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) -// -// // do the name duplication check -// val userExistingDatasetNames = datasetDao.fetchByOwnerUid(uid).asScala.map(_.getName) -// if (userExistingDatasetNames.contains(datasetName)) { -// throw new BadRequestException("Dataset with the same name already exists") -// } -// -// val dataset: Dataset = new Dataset() -// dataset.setName(datasetName) -// dataset.setDescription(datasetDescription) -// dataset.setIsPublic(isDatasetPublic.toByte) -// dataset.setOwnerUid(uid) -// -// val createdDataset = ctx -// .insertInto(DATASET) -// .set(ctx.newRecord(DATASET, dataset)) -// .returning() -// .fetchOne() -// -// val did = createdDataset.getDid -// val datasetPath = PathUtils.getDatasetPath(did) -// -// val datasetUserAccess = new DatasetUserAccess() -// datasetUserAccess.setDid(createdDataset.getDid) -// datasetUserAccess.setUid(uid) -// datasetUserAccess.setPrivilege(DatasetUserAccessPrivilege.WRITE) -// datasetOfUserDao.insert(datasetUserAccess) -// -// // initialize the dataset directory -// GitVersionControlLocalFileStorage.initRepo(datasetPath) -//// createdVersion match { -//// case Some(_) => -//// case None => -//// // none means creation failed, user does not submit any files when creating the dataset -//// throw new BadRequestException(ERR_DATASET_CREATION_FAILED_MESSAGE) -//// } -// -// DashboardDataset( -// new Dataset( -// createdDataset.getDid, -// createdDataset.getOwnerUid, -// createdDataset.getName, -// createdDataset.getIsPublic, -// createdDataset.getDescription, -// createdDataset.getCreationTime -// ), -// user.getEmail, -// DatasetUserAccessPrivilege.WRITE, -// isOwner = true, -// versions = List(), -// size = calculateDatasetVersionSize(did) -// ) -// } -// } + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/create") + @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) + def createDataset( + @Auth user: SessionUser, + @FormDataParam("datasetName") datasetName: String, + @FormDataParam("datasetDescription") datasetDescription: String, + @FormDataParam("isDatasetPublic") isDatasetPublic: String, + ): DashboardDataset = { -// @POST -// @RolesAllowed(Array("REGULAR", "ADMIN")) -// @Path("/delete") -// def deleteDataset(datasetIDs: DatasetIDs, @Auth user: SessionUser): Response = { -// val uid = user.getUid -// withTransaction(context) { ctx => -// val datasetDao = new DatasetDao(ctx.configuration()) -// for (did <- datasetIDs.dids) { -// if (!userOwnDataset(ctx, did, uid)) { -// // throw the exception that user has no access to certain dataset -// throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) -// } -// // delete the dataset repo from the filesystem -// GitVersionControlLocalFileStorage.deleteRepo(PathUtils.getDatasetPath(did)) -// -// // delete the dataset from the DB -// datasetDao.deleteById(did) -// } -// -// Response.ok().build() -// } -// } + withTransaction(context) { ctx => + val uid = user.getUid + val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) + val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) -// @POST -// @Consumes(Array(MediaType.APPLICATION_JSON)) -// @Produces(Array(MediaType.APPLICATION_JSON)) -// @RolesAllowed(Array("REGULAR", "ADMIN")) -// @Path("/update/name") -// def updateDatasetName( -// modificator: DatasetNameModification, -// @Auth sessionUser: SessionUser -// ): Response = { -// withTransaction(context) { ctx => -// val datasetDao = new DatasetDao(ctx.configuration()) -// val uid = sessionUser.getUid -// val did = modificator.did -// val name = modificator.name -// if (!userHasWriteAccess(ctx, did, uid)) { -// throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) -// } -// -// val existedDataset = getDatasetByID(ctx, did) -// existedDataset.setName(name) -// datasetDao.update(existedDataset) -// Response.ok().build() -// } -// } + // do the name duplication check + if (!datasetDao.fetchByName(datasetName).isEmpty) { + throw new BadRequestException("Dataset with the same name already exists") + } -// @POST -// @Consumes(Array(MediaType.APPLICATION_JSON)) -// @Produces(Array(MediaType.APPLICATION_JSON)) -// @RolesAllowed(Array("REGULAR", "ADMIN")) -// @Path("/update/description") -// def updateDatasetDescription( -// modificator: DatasetDescriptionModification, -// @Auth sessionUser: SessionUser -// ): Response = { -// withTransaction(context) { ctx => -// val datasetDao = new DatasetDao(ctx.configuration()) -// val uid = sessionUser.getUid -// val did = modificator.did -// val description = modificator.description -// -// if (!userHasWriteAccess(ctx, did, uid)) { -// throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) -// } -// -// val existedDataset = getDatasetByID(ctx, did) -// existedDataset.setDescription(description) -// datasetDao.update(existedDataset) -// Response.ok().build() -// } -// } + // Try to initialize the repository in LakeFS + try { + LakeFSFileStorage.initRepo(datasetName) + } catch { + case e: Exception => + throw new WebApplicationException(s"Failed to initialize repository in LakeFS: ${e.getMessage}") + } + + // insert the dataset into database + val dataset: Dataset = new Dataset() + dataset.setName(datasetName) + dataset.setDescription(datasetDescription) + dataset.setIsPublic(isDatasetPublic.toBoolean) + dataset.setOwnerUid(uid) + + val createdDataset = ctx + .insertInto(DATASET) + .set(ctx.newRecord(DATASET, dataset)) + .returning() + .fetchOne() + + // insert requester as the write access of the dataset + val datasetUserAccess = new DatasetUserAccess() + datasetUserAccess.setDid(createdDataset.getDid) + datasetUserAccess.setUid(uid) + datasetUserAccess.setPrivilege(PrivilegeEnum.WRITE) + datasetOfUserDao.insert(datasetUserAccess) + + DashboardDataset( + new Dataset( + createdDataset.getDid, + createdDataset.getOwnerUid, + createdDataset.getName, + createdDataset.getIsPublic, + createdDataset.getDescription, + createdDataset.getCreationTime + ), + user.getEmail, + PrivilegeEnum.WRITE, + isOwner = true, + versions = List(), + size = 0 + ) + } + } + + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/delete") + def deleteDataset(datasetName: String, @Auth user: SessionUser): Response = { + val uid = user.getUid + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + val dataset = datasetDao.fetchByName(datasetName).asScala.toList + if (dataset.isEmpty || !userOwnDataset(ctx, dataset.head.getDid, uid)) { + // throw the exception that user has no access to certain dataset + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + try { + LakeFSFileStorage.deleteRepo(datasetName) + } catch { + case e: Exception => + throw new WebApplicationException(s"Failed to delete a repository in LakeFS: ${e.getMessage}", e) + } + + // delete the dataset from the DB + datasetDao.deleteById(dataset.head.getDid) + + Response.ok().build() + } + } + + @POST + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/update/description") + def updateDatasetDescription( + modificator: DatasetDescriptionModification, + @Auth sessionUser: SessionUser + ): Response = { + withTransaction(context) { ctx => + val datasetDao = new DatasetDao(ctx.configuration()) + val uid = sessionUser.getUid + val did = modificator.did + val description = modificator.description + + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val existedDataset = getDatasetByID(ctx, did) + existedDataset.setDescription(description) + datasetDao.update(existedDataset) + Response.ok().build() + } + } @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}/update/publicity") def toggleDatasetPublicity( - @PathParam("did") did: UInteger, + @PathParam("did") did: Integer, @Auth sessionUser: SessionUser ): Response = { withTransaction(context) { ctx => @@ -514,11 +461,7 @@ class DatasetResource { } val existedDataset = getDatasetByID(ctx, did) - if (existedDataset.getIsPublic == DATASET_IS_PUBLIC) { - existedDataset.setIsPublic(DATASET_IS_PRIVATE) - } else { - existedDataset.setIsPublic(DATASET_IS_PUBLIC) - } + existedDataset.setIsPublic(!existedDataset.getIsPublic) datasetDao.update(existedDataset) Response.ok().build() @@ -527,10 +470,10 @@ class DatasetResource { @POST @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/{did}/version/create") + @Path("/{name}/version/create") @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) def createDatasetVersion( - @PathParam("did") did: UInteger, + @PathParam("name") name: String, @FormDataParam("versionName") versionName: String, @Auth user: SessionUser ): Unit = { @@ -543,10 +486,6 @@ class DatasetResource { def sessionTest( @Auth user: SessionUser ): Response = { - println(user.getName) - println(user.getEmail) - println(user.getGoogleId) - Response.ok().build() } @@ -610,7 +549,7 @@ class DatasetResource { // DashboardDataset( // isOwner = false, // dataset = dataset, -// accessPrivilege = DatasetUserAccessPrivilege.READ, +// accessPrivilege = PrivilegeEnum.READ, // versions = List(), // ownerEmail = ownerEmail, // size = calculateDatasetVersionSize(dataset.getDid) @@ -622,7 +561,7 @@ class DatasetResource { // isOwner = false, // dataset = publicDataset.dataset, // ownerEmail = publicDataset.ownerEmail, -// accessPrivilege = DatasetUserAccessPrivilege.READ, +// accessPrivilege = PrivilegeEnum.READ, // versions = List(), // size = calculateDatasetVersionSize(publicDataset.dataset.getDid) // ) @@ -636,24 +575,27 @@ class DatasetResource { @GET @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/{did}/version/list") + @Path("/{name}/version/list") def getDatasetVersionList( - @PathParam("did") did: UInteger, + @PathParam("name") name: String, @Auth user: SessionUser ): List[DatasetVersion] = { val uid = user.getUid withTransaction(context)(ctx => { - if (!userHasReadAccess(ctx, did, uid)) { + val datasetDao = new DatasetDao(ctx.configuration()) + val datasets = datasetDao.fetchByName(name).asScala + if (datasets.isEmpty || !userHasReadAccess(ctx, datasets.head.getDid, uid)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - fetchDatasetVersions(ctx, did) + fetchDatasetVersions(ctx, datasets.head.getDid) }) } + // TODO: change did to name @GET - @Path("/{did}/publicVersion/list") + @Path("/{name}/publicVersion/list") def getPublicDatasetVersionList( - @PathParam("did") did: UInteger + @PathParam("name") did: Integer ): List[DatasetVersion] = { withTransaction(context)(ctx => { if (!isDatasetPublic(ctx, did)) { @@ -667,7 +609,7 @@ class DatasetResource { @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}/version/latest") def retrieveLatestDatasetVersion( - @PathParam("did") did: UInteger, + @PathParam("did") did: Integer, @Auth user: SessionUser ): DashboardDatasetVersion = { val uid = user.getUid @@ -715,8 +657,8 @@ class DatasetResource { @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}/version/{dvid}/rootFileNodes") def retrieveDatasetVersionRootFileNodes( - @PathParam("did") did: UInteger, - @PathParam("dvid") dvid: UInteger, + @PathParam("did") did: Integer, + @PathParam("dvid") dvid: Integer, @Auth user: SessionUser ): DatasetVersionRootFileNodesResponse = { val uid = user.getUid @@ -728,8 +670,8 @@ class DatasetResource { @GET @Path("/{did}/publicVersion/{dvid}/rootFileNodes") def retrievePublicDatasetVersionRootFileNodes( - @PathParam("did") did: UInteger, - @PathParam("dvid") dvid: UInteger + @PathParam("did") did: Integer, + @PathParam("dvid") dvid: Integer ): DatasetVersionRootFileNodesResponse = { withTransaction(context)(ctx => fetchDatasetVersionRootFileNodes(ctx, did, dvid, None, isPublic = true) @@ -740,7 +682,7 @@ class DatasetResource { @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}") def getDataset( - @PathParam("did") did: UInteger, + @PathParam("did") did: Integer, @Auth user: SessionUser ): DashboardDataset = { val uid = user.getUid @@ -750,7 +692,7 @@ class DatasetResource { @GET @Path("/public/{did}") def getPublicDataset( - @PathParam("did") did: UInteger + @PathParam("did") did: Integer ): DashboardDataset = { withTransaction(context)(ctx => fetchDataset(ctx, did, None, isPublic = true)) } @@ -813,7 +755,7 @@ class DatasetResource { @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/version-zip") def retrieveDatasetVersionZip( - @QueryParam("did") did: UInteger, + @QueryParam("did") did: Integer, @QueryParam("dvid") dvid: Optional[Integer], @Auth user: SessionUser ): Response = { @@ -826,7 +768,7 @@ class DatasetResource { throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) ) } else { - getDatasetVersionByID(context, UInteger.valueOf(dvid.get)) + getDatasetVersionByID(context, Integer.valueOf(dvid.get)) } val targetDatasetPath = PathUtils.getDatasetPath(dataset.getDid) val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( @@ -887,8 +829,8 @@ class DatasetResource { @GET @Path("/datasetUserAccess") def datasetUserAccess( - @QueryParam("did") did: UInteger - ): java.util.List[UInteger] = { + @QueryParam("did") did: Integer + ): java.util.List[Integer] = { val records = context .select(DATASET_USER_ACCESS.UID) .from(DATASET_USER_ACCESS) @@ -898,7 +840,7 @@ class DatasetResource { records.getValues(DATASET_USER_ACCESS.UID) } - private def fetchDatasetVersions(ctx: DSLContext, did: UInteger): List[DatasetVersion] = { + private def fetchDatasetVersions(ctx: DSLContext, did: Integer): List[DatasetVersion] = { ctx .selectFrom(DATASET_VERSION) .where(DATASET_VERSION.DID.eq(did)) @@ -910,9 +852,9 @@ class DatasetResource { private def fetchDatasetVersionRootFileNodes( ctx: DSLContext, - did: UInteger, - dvid: UInteger, - uid: Option[UInteger], + did: Integer, + dvid: Integer, + uid: Option[Integer], isPublic: Boolean ): DatasetVersionRootFileNodesResponse = { val dataset = getDashboardDataset(ctx, did, uid, isPublic) @@ -948,8 +890,8 @@ class DatasetResource { private def fetchDataset( ctx: DSLContext, - did: UInteger, - uid: Option[UInteger], + did: Integer, + uid: Option[Integer], isPublic: Boolean ): DashboardDataset = { val dashboardDataset = getDashboardDataset(ctx, did, uid, isPublic) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/DatasetFileNode.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala similarity index 100% rename from core/file-service/src/main/scala/edu/uci/ics/texera/service/type/DatasetFileNode.scala rename to core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala index c8d10aeff4..b19004285d 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala @@ -24,8 +24,9 @@ object LakeFSApiClientInstance { ) ) - private val username: String = "AKIAIOSFOLQUICKSTART" - private val password: String = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + private val apiSecret: String = "random_string_for_lakefs" + private val accessKeyID: String = "AKIAJIWZ57BWHNDAGMPQ" + private val secretAccessKey: String = "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" /** * Retrieves the singleton LakeFS ApiClient instance. @@ -37,8 +38,9 @@ object LakeFSApiClientInstance { case Some(client) => client case None => val apiClient = new ApiClient() - apiClient.setUsername(username) - apiClient.setPassword(password) + apiClient.setApiKey(apiSecret) + apiClient.setUsername(accessKeyID) + apiClient.setPassword(secretAccessKey) apiClient.setServers(servers.asJava) instance = Some(apiClient) apiClient diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index f2aee35d4e..1cbca33ec4 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -20,18 +20,29 @@ object LakeFSFileStorage { private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) private lazy val refsApi: RefsApi = new RefsApi(apiClient) + private val storageNamespaceURI: String = "s3://texera-dataset" /** * Initializes a new repository in LakeFS. * * @param repoName Name of the repository. - * @param storageNamespace Storage path (e.g., "s3://bucket-name/"). * @param defaultBranch Default branch name, usually "main". */ def initRepo( repoName: String, - storageNamespace: String, defaultBranch: String = "main" ): Repository = { + val repoNamePattern = "^[a-z0-9][a-z0-9-]{2,62}$".r + + // Validate repoName + if (!repoNamePattern.matches(repoName)) { + throw new IllegalArgumentException( + s"Invalid dataset name: '$repoName'. " + + "Dataset names must be 3-63 characters long, " + + "contain only lowercase letters, numbers, and hyphens, " + + "and cannot start or end with a hyphen." + ) + } + val storageNamespace = s"$storageNamespaceURI/$repoName" val repo = new RepositoryCreation() .name(repoName) .storageNamespace(storageNamespace) From f180f0c6fad90dcfc35053abce198a5f6120be96 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sat, 22 Feb 2025 17:53:16 -0800 Subject: [PATCH 09/47] keep refactoring the dataset resource --- .../service/resource/DatasetResource.scala | 434 +++++++++--------- .../type/dataset/DatasetFileNode.scala | 81 ++++ .../core/storage/LakeFSFileStorage.scala | 11 +- 3 files changed, 297 insertions(+), 229 deletions(-) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 79f638ce1d..6b316f71f9 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -1,22 +1,56 @@ package edu.uci.ics.texera.service.resource -import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, LakeFSFileStorage, StorageConfig} -import edu.uci.ics.amber.core.storage.util.dataset.{GitVersionControlLocalFileStorage, PhysicalFileNode} +import edu.uci.ics.amber.core.storage.{ + DocumentFactory, + FileResolver, + LakeFSFileStorage, + StorageConfig +} +import edu.uci.ics.amber.core.storage.util.dataset.{ + GitVersionControlLocalFileStorage, + PhysicalFileNode +} import edu.uci.ics.amber.util.PathUtils import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction import edu.uci.ics.texera.dao.jooq.generated.enums.PrivilegeEnum +import edu.uci.ics.texera.dao.jooq.generated.tables.User.USER import edu.uci.ics.texera.dao.jooq.generated.tables.Dataset.DATASET import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION -import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{DatasetDao, DatasetUserAccessDao, DatasetVersionDao} -import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{Dataset, DatasetUserAccess, DatasetVersion, User} +import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ + DatasetDao, + DatasetUserAccessDao, + DatasetVersionDao +} +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ + Dataset, + DatasetUserAccess, + DatasetVersion, + User +} import edu.uci.ics.texera.service.`type`.DatasetFileNode import edu.uci.ics.texera.service.auth.SessionUser -import edu.uci.ics.texera.service.resource.DatasetAccessResource.{getDatasetUserAccessPrivilege, getOwner, isDatasetPublic, userHasReadAccess, userHasWriteAccess, userOwnDataset} -import edu.uci.ics.texera.service.resource.DatasetResource.{DATASET_IS_PRIVATE, DATASET_IS_PUBLIC, DashboardDataset, DashboardDatasetVersion, DatasetDescriptionModification, DatasetIDs, DatasetNameModification, DatasetVersionRootFileNodesResponse, calculateDatasetVersionSize, context, getDatasetByID, getDatasetVersionByID, getLatestDatasetVersion} +import edu.uci.ics.texera.service.resource.DatasetAccessResource.{ + getDatasetUserAccessPrivilege, + getOwner, + isDatasetPublic, + userHasReadAccess, + userHasWriteAccess, + userOwnDataset +} +import edu.uci.ics.texera.service.resource.DatasetResource.{ + DashboardDataset, + DashboardDatasetVersion, + DatasetDescriptionModification, + DatasetVersionRootFileNodesResponse, + calculateDatasetVersionSize, + context, + getDatasetByID, + getDatasetVersionByID, + getLatestDatasetVersion +} import io.dropwizard.auth.Auth -import io.lakefs.clients.sdk.ApiException import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} @@ -31,66 +65,49 @@ import java.nio.file.Files import java.util.Optional import java.util.concurrent.locks.ReentrantLock import java.util.zip.{ZipEntry, ZipOutputStream} +import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters._ import scala.jdk.OptionConverters._ import scala.util.control.NonFatal import scala.util.{Failure, Success, Try, Using} object DatasetResource { - private val DATASET_IS_PUBLIC: Byte = 1 - private val DATASET_IS_PRIVATE: Byte = 0 - private val FILE_OPERATION_UPLOAD_PREFIX = "file:upload:" - private val FILE_OPERATION_REMOVE_PREFIX = "file:remove" - - private val datasetLocks: scala.collection.concurrent.Map[Integer, ReentrantLock] = - new scala.collection.concurrent.TrieMap[Integer, ReentrantLock]() - private val context = SqlServer .getInstance() .createDSLContext() /** - * fetch the size of a certain dataset version. - * @param did the target dataset id - * @param versionHash the hash of the version. If None, fetch the latest version - * @return + * Fetch the size of a certain dataset version. + * + * @param name The target dataset's name (LakeFS repository name). + * @param versionHash The hash of the version. If None, fetch the latest version. + * @return The total size of all objects in the dataset version. + * @throws NoSuchElementException If the version hash is not found in the repository. */ - def calculateDatasetVersionSize(did: Integer, versionHash: Option[String] = None): Long = { - - /** - * Internal util to calculate the size from the physical nodes - */ - def calculateSizeFromPhysicalNodes(nodes: java.util.Set[PhysicalFileNode]): Long = { - nodes.asScala.foldLeft(0L) { (totalSize, node) => - totalSize + (if (node.isDirectory) { - calculateSizeFromPhysicalNodes(node.getChildren) - } else { - node.getSize - }) - } + def calculateDatasetVersionSize(name: String, versionHash: Option[String] = None): Long = { + // Retrieve all commits (versions) of the dataset repository + val commits = LakeFSFileStorage.retrieveVersionsOfRepository(name) + + // Determine the target commit + val targetCommit = versionHash match { + case Some(hash) => + commits + .find(_.getId == hash) + .getOrElse( + throw new NoSuchElementException( + s"Version hash '$hash' not found in repository '$name'" + ) + ) + case None => + commits.headOption // The latest commit (commits are sorted from latest to earliest) + .getOrElse(throw new NoSuchElementException(s"No versions found for dataset '$name'")) } - Try { - val datasetPath = PathUtils.getDatasetPath(did) - val hash = versionHash.getOrElse { - getLatestDatasetVersion(context, did) - .map(_.getVersionHash) - .getOrElse(throw new NoSuchElementException("No versions found for this dataset")) - } - - val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( - datasetPath, - hash - ) + // Retrieve objects of the target version and sum up their sizes + val objects = LakeFSFileStorage.retrieveObjectsOfVersion(name, targetCommit.getId) - calculateSizeFromPhysicalNodes(fileNodes) - } match { - case Success(size) => size - case Failure(exception) => - val errorMessage = versionHash.map(_ => "dataset version").getOrElse("dataset") - println(s"Error calculating $errorMessage size: ${exception.getMessage}") - 0L - } + // Sum the sizes of all objects in the dataset version + objects.map(_.getSizeBytes.longValue()).sum } /** @@ -199,75 +216,61 @@ object DatasetResource { res } - // Acquire or Create the lock for dataset of {did} - val lock = DatasetResource.datasetLocks.getOrElseUpdate(did, new ReentrantLock()) - - if (lock.isLocked) { + val dataset = getDatasetByID(ctx, did) + val datasetPath = PathUtils.getDatasetPath(did) + if (datasetOperation.filesToAdd.isEmpty && datasetOperation.filesToRemove.isEmpty) { return None } - lock.lock() - try { - val dataset = getDatasetByID(ctx, did) - val datasetPath = PathUtils.getDatasetPath(did) - if (datasetOperation.filesToAdd.isEmpty && datasetOperation.filesToRemove.isEmpty) { - return None - } - val datasetName = dataset.getName - val versionName = generateDatasetVersionName(ctx, did, userProvidedVersionName) - val commitHash = GitVersionControlLocalFileStorage.withCreateVersion( - datasetPath, - versionName, - () => { - datasetOperation.filesToAdd.foreach { - case (filePath, fileStream) => - GitVersionControlLocalFileStorage.writeFileToRepo(datasetPath, filePath, fileStream) - } + val datasetName = dataset.getName + val versionName = generateDatasetVersionName(ctx, did, userProvidedVersionName) + val commitHash = GitVersionControlLocalFileStorage.withCreateVersion( + datasetPath, + versionName, + () => { + datasetOperation.filesToAdd.foreach { + case (filePath, fileStream) => + GitVersionControlLocalFileStorage.writeFileToRepo(datasetPath, filePath, fileStream) + } - datasetOperation.filesToRemove.foreach { fileUri => - DocumentFactory.openDocument(fileUri)._1.clear() - } + datasetOperation.filesToRemove.foreach { fileUri => + DocumentFactory.openDocument(fileUri)._1.clear() } - ) + } + ) - // create the DatasetVersion that persists in the DB - val datasetVersion = new DatasetVersion() - - datasetVersion.setName(versionName) - datasetVersion.setDid(did) - datasetVersion.setCreatorUid(uid) - datasetVersion.setVersionHash(commitHash) - - val physicalFileNodes = - GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion(datasetPath, commitHash) - Some( - DashboardDatasetVersion( - // insert the dataset version into DB, and fetch the newly-inserted one. - ctx - .insertInto(DATASET_VERSION) // Assuming DATASET is the table reference - .set(ctx.newRecord(DATASET_VERSION, datasetVersion)) - .returning() // Assuming ID is the primary key column - .fetchOne() - .into(classOf[DatasetVersion]), - DatasetFileNode.fromPhysicalFileNodes( - Map( - (ownerEmail, datasetName, versionName) -> physicalFileNodes.asScala.toList - ) + // create the DatasetVersion that persists in the DB + val datasetVersion = new DatasetVersion() + + datasetVersion.setName(versionName) + datasetVersion.setDid(did) + datasetVersion.setCreatorUid(uid) + datasetVersion.setVersionHash(commitHash) + + val physicalFileNodes = + GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion(datasetPath, commitHash) + Some( + DashboardDatasetVersion( + // insert the dataset version into DB, and fetch the newly-inserted one. + ctx + .insertInto(DATASET_VERSION) // Assuming DATASET is the table reference + .set(ctx.newRecord(DATASET_VERSION, datasetVersion)) + .returning() // Assuming ID is the primary key column + .fetchOne() + .into(classOf[DatasetVersion]), + DatasetFileNode.fromPhysicalFileNodes( + Map( + (ownerEmail, datasetName, versionName) -> physicalFileNodes.asScala.toList ) ) ) - } finally { - // Release the lock - lock.unlock() - } + ) } case class DashboardDataset( dataset: Dataset, ownerEmail: String, accessPrivilege: EnumType, - isOwner: Boolean, - versions: List[DashboardDatasetVersion], - size: Long + isOwner: Boolean ) case class DashboardDatasetVersion( datasetVersion: DatasetVersion, @@ -276,9 +279,7 @@ object DatasetResource { case class DatasetIDs(dids: List[Integer]) - case class DatasetNameModification(did: Integer, name: String) - - case class DatasetDescriptionModification(did: Integer, description: String) + case class DatasetDescriptionModification(name: String, description: String) case class DatasetVersionRootFileNodesResponse( fileNodes: List[DatasetFileNode], @@ -289,7 +290,7 @@ object DatasetResource { @Produces(Array(MediaType.APPLICATION_JSON, "image/jpeg", "application/pdf")) @Path("/dataset") class DatasetResource { - private val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no read access to this dataset" + private val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no access to this dataset" private val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the dataset not found" private val ERR_DATASET_CREATION_FAILED_MESSAGE = "Dataset creation is failed. Please make sure to upload files in order to create the initial version of dataset" @@ -320,9 +321,7 @@ class DatasetResource { targetDataset, getOwner(ctx, did).getEmail, userAccessPrivilege, - isOwner, - List(), - calculateDatasetVersionSize(did) + isOwner ) } @@ -334,7 +333,7 @@ class DatasetResource { @Auth user: SessionUser, @FormDataParam("datasetName") datasetName: String, @FormDataParam("datasetDescription") datasetDescription: String, - @FormDataParam("isDatasetPublic") isDatasetPublic: String, + @FormDataParam("isDatasetPublic") isDatasetPublic: String ): DashboardDataset = { withTransaction(context) { ctx => @@ -352,7 +351,9 @@ class DatasetResource { LakeFSFileStorage.initRepo(datasetName) } catch { case e: Exception => - throw new WebApplicationException(s"Failed to initialize repository in LakeFS: ${e.getMessage}") + throw new WebApplicationException( + s"Failed to initialize repository in LakeFS: ${e.getMessage}" + ) } // insert the dataset into database @@ -386,9 +387,7 @@ class DatasetResource { ), user.getEmail, PrivilegeEnum.WRITE, - isOwner = true, - versions = List(), - size = 0 + isOwner = true ) } } @@ -409,7 +408,10 @@ class DatasetResource { LakeFSFileStorage.deleteRepo(datasetName) } catch { case e: Exception => - throw new WebApplicationException(s"Failed to delete a repository in LakeFS: ${e.getMessage}", e) + throw new WebApplicationException( + s"Failed to delete a repository in LakeFS: ${e.getMessage}", + e + ) } // delete the dataset from the DB @@ -429,18 +431,17 @@ class DatasetResource { @Auth sessionUser: SessionUser ): Response = { withTransaction(context) { ctx => - val datasetDao = new DatasetDao(ctx.configuration()) val uid = sessionUser.getUid - val did = modificator.did - val description = modificator.description - if (!userHasWriteAccess(ctx, did, uid)) { + val datasetDao = new DatasetDao(ctx.configuration()) + val datasets = datasetDao.fetchByName(modificator.name).asScala.toList + if (datasets.isEmpty || !userHasWriteAccess(ctx, datasets.head.getDid, uid)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - val existedDataset = getDatasetByID(ctx, did) - existedDataset.setDescription(description) - datasetDao.update(existedDataset) + val datasetToChange = datasets.head + datasetToChange.setDescription(modificator.description) + datasetDao.update(datasetToChange) Response.ok().build() } } @@ -478,100 +479,86 @@ class DatasetResource { @Auth user: SessionUser ): Unit = { val uid = user.getUid + // TODO: finish it } + /** + * This method returns a list of DashboardDatasets objects that are accessible by current user. + * + * @param user the session user + * @return list of user accessible DashboardDataset objects + */ @GET @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/test") - def sessionTest( + @Path("/list") + def listDatasets( @Auth user: SessionUser - ): Response = { - Response.ok().build() - } + ): List[DashboardDataset] = { + val uid = user.getUid + withTransaction(context)(ctx => { + var accessibleDatasets: ListBuffer[DashboardDataset] = ListBuffer() + // first fetch all datasets user have explicit access to + accessibleDatasets = ListBuffer.from( + ctx + .select() + .from( + DATASET + .leftJoin(DATASET_USER_ACCESS) + .on(DATASET_USER_ACCESS.DID.eq(DATASET.DID)) + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + ) + .where(DATASET_USER_ACCESS.UID.eq(uid)) + .fetch() + .map(record => { + val dataset = record.into(DATASET).into(classOf[Dataset]) + val datasetAccess = record.into(DATASET_USER_ACCESS).into(classOf[DatasetUserAccess]) + val ownerEmail = record.into(USER).getEmail + DashboardDataset( + isOwner = dataset.getOwnerUid == uid, + dataset = dataset, + accessPrivilege = datasetAccess.getPrivilege, + ownerEmail = ownerEmail + ) + }) + .asScala + ) - // /** -// * This method returns a list of DashboardDatasets objects that are accessible by current user. -// * -// * @param user the session user -// * @return list of user accessible DashboardDataset objects -// */ -// @GET -// @RolesAllowed(Array("REGULAR", "ADMIN")) -// @Path("/list") -// def listDatasets( -// @Auth user: SessionUser -// ): List[DashboardDataset] = { -// val uid = user.getUid -// withTransaction(context)(ctx => { -// var accessibleDatasets: ListBuffer[DashboardDataset] = ListBuffer() -// // first fetch all datasets user have explicit access to -// accessibleDatasets = ListBuffer.from( -// ctx -// .select() -// .from( -// DATASET -// .leftJoin(DATASET_USER_ACCESS) -// .on(DATASET_USER_ACCESS.DID.eq(DATASET.DID)) -// .leftJoin(USER) -// .on(USER.UID.eq(DATASET.OWNER_UID)) -// ) -// .where(DATASET_USER_ACCESS.UID.eq(uid)) -// .fetch() -// .map(record => { -// val dataset = record.into(DATASET).into(classOf[Dataset]) -// val datasetAccess = record.into(DATASET_USER_ACCESS).into(classOf[DatasetUserAccess]) -// val ownerEmail = record.into(USER).getEmail -// DashboardDataset( -// isOwner = dataset.getOwnerUid == uid, -// dataset = dataset, -// accessPrivilege = datasetAccess.getPrivilege, -// versions = List(), -// ownerEmail = ownerEmail, -// size = calculateDatasetVersionSize(dataset.getDid) -// ) -// }) -// .asScala -// ) -// -// // then we fetch the public datasets and merge it as a part of the result if not exist -// val publicDatasets = ctx -// .select() -// .from( -// DATASET -// .leftJoin(USER) -// .on(USER.UID.eq(DATASET.OWNER_UID)) -// ) -// .where(DATASET.IS_PUBLIC.eq(DATASET_IS_PUBLIC)) -// .fetch() -// .map(record => { -// val dataset = record.into(DATASET).into(classOf[Dataset]) -// val ownerEmail = record.into(USER).getEmail -// DashboardDataset( -// isOwner = false, -// dataset = dataset, -// accessPrivilege = PrivilegeEnum.READ, -// versions = List(), -// ownerEmail = ownerEmail, -// size = calculateDatasetVersionSize(dataset.getDid) -// ) -// }) -// publicDatasets.forEach { publicDataset => -// if (!accessibleDatasets.exists(_.dataset.getDid == publicDataset.dataset.getDid)) { -// val dashboardDataset = DashboardDataset( -// isOwner = false, -// dataset = publicDataset.dataset, -// ownerEmail = publicDataset.ownerEmail, -// accessPrivilege = PrivilegeEnum.READ, -// versions = List(), -// size = calculateDatasetVersionSize(publicDataset.dataset.getDid) -// ) -// accessibleDatasets = accessibleDatasets :+ dashboardDataset -// } -// } -// -// accessibleDatasets.toList -// }) -// } + // then we fetch the public datasets and merge it as a part of the result if not exist + val publicDatasets = ctx + .select() + .from( + DATASET + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + ) + .where(DATASET.IS_PUBLIC.eq(true)) + .fetch() + .map(record => { + val dataset = record.into(DATASET).into(classOf[Dataset]) + val ownerEmail = record.into(USER).getEmail + DashboardDataset( + isOwner = false, + dataset = dataset, + accessPrivilege = PrivilegeEnum.READ, + ownerEmail = ownerEmail + ) + }) + publicDatasets.forEach { publicDataset => + if (!accessibleDatasets.exists(_.dataset.getDid == publicDataset.dataset.getDid)) { + val dashboardDataset = DashboardDataset( + isOwner = false, + dataset = publicDataset.dataset, + ownerEmail = publicDataset.ownerEmail, + accessPrivilege = PrivilegeEnum.READ + ) + accessibleDatasets = accessibleDatasets :+ dashboardDataset + } + } + + accessibleDatasets.toList + }) + } @GET @RolesAllowed(Array("REGULAR", "ADMIN")) @@ -621,19 +608,13 @@ class DatasetResource { val latestVersion = getLatestDatasetVersion(ctx, did).getOrElse( throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) ) - val datasetPath = PathUtils.getDatasetPath(did) val ownerNode = DatasetFileNode - .fromPhysicalFileNodes( + .fromLakeFSRepositoryCommittedObjects( Map( (user.getEmail, dataset.getName, latestVersion.getName) -> - GitVersionControlLocalFileStorage - .retrieveRootFileNodesOfVersion( - datasetPath, - latestVersion.getVersionHash - ) - .asScala - .toList + LakeFSFileStorage + .retrieveObjectsOfVersion(dataset.getName, latestVersion.getVersionHash) ) ) .head @@ -866,7 +847,7 @@ class DatasetResource { datasetVersion.getVersionHash ) val versionHash = datasetVersion.getVersionHash - val size = calculateDatasetVersionSize(did, Some(versionHash)) + val size = calculateDatasetVersionSize(datasetName, Some(versionHash)) val ownerFileNode = DatasetFileNode .fromPhysicalFileNodes( @@ -895,7 +876,6 @@ class DatasetResource { isPublic: Boolean ): DashboardDataset = { val dashboardDataset = getDashboardDataset(ctx, did, uid, isPublic) - val size = calculateDatasetVersionSize(did) - dashboardDataset.copy(size = size) + dashboardDataset } } diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala index 365ac7310f..22bef96d08 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala @@ -1,6 +1,7 @@ package edu.uci.ics.texera.service.`type` import edu.uci.ics.amber.core.storage.util.dataset.PhysicalFileNode +import io.lakefs.clients.sdk.model.ObjectStats import java.util import scala.collection.mutable @@ -49,6 +50,86 @@ class DatasetFileNode( } object DatasetFileNode { + + /** + * Converts a map of LakeFS committed objects into a structured dataset file node tree. + * + * @param map A mapping from `(ownerEmail, datasetName, versionName)` to a list of committed objects. + * @return A list of root-level dataset file nodes. + */ + def fromLakeFSRepositoryCommittedObjects( + map: Map[(String, String, String), List[ObjectStats]] + ): List[DatasetFileNode] = { + val rootNode = new DatasetFileNode("/", "directory", null, "") + + // Owner level nodes map + val ownerNodes = mutable.Map[String, DatasetFileNode]() + + map.foreach { + case ((ownerEmail, datasetName, versionName), objects) => + val ownerNode = ownerNodes.getOrElseUpdate( + ownerEmail, { + val newNode = new DatasetFileNode(ownerEmail, "directory", rootNode, ownerEmail) + rootNode.children = Some(rootNode.getChildren :+ newNode) + newNode + } + ) + + val datasetNode = ownerNode.getChildren.find(_.getName == datasetName).getOrElse { + val newNode = new DatasetFileNode(datasetName, "directory", ownerNode, ownerEmail) + ownerNode.children = Some(ownerNode.getChildren :+ newNode) + newNode + } + + val versionNode = datasetNode.getChildren.find(_.getName == versionName).getOrElse { + val newNode = new DatasetFileNode(versionName, "directory", datasetNode, ownerEmail) + datasetNode.children = Some(datasetNode.getChildren :+ newNode) + newNode + } + + // Directory map for efficient lookups + val directoryMap = mutable.Map[String, DatasetFileNode]() + directoryMap("") = versionNode // Root of the dataset version + + // Process each object (file or directory) from LakeFS + objects.foreach { obj => + val pathParts = obj.getPath.split("/").toList + var currentPath = "" + var parentNode: DatasetFileNode = versionNode + + pathParts.foreach { part => + currentPath = if (currentPath.isEmpty) part else s"$currentPath/$part" + + val isFile = pathParts.last == part + val nodeType = if (isFile) "file" else "directory" + val fileSize = if (isFile) Some(obj.getSizeBytes.longValue()) else None + + val existingNode = directoryMap.get(currentPath) + + val node = existingNode.getOrElse { + val newNode = new DatasetFileNode(part, nodeType, parentNode, ownerEmail, fileSize) + parentNode.children = Some(parentNode.getChildren :+ newNode) + if (!isFile) directoryMap(currentPath) = newNode + newNode + } + + parentNode = node // Move parent reference deeper for next iteration + } + } + } + + // Sorting function to sort children of a node alphabetically in descending order + def sortChildren(node: DatasetFileNode): Unit = { + node.children = Some(node.getChildren.sortBy(_.getName)(Ordering.String.reverse)) + node.getChildren.foreach(sortChildren) + } + + // Apply the sorting to the root node + sortChildren(rootNode) + + rootNode.getChildren + } + def fromPhysicalFileNodes( map: Map[(String, String, String), List[PhysicalFileNode]] ): List[DatasetFileNode] = { diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index 1cbca33ec4..def2c3b4ed 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -21,6 +21,7 @@ object LakeFSFileStorage { private lazy val refsApi: RefsApi = new RefsApi(apiClient) private val storageNamespaceURI: String = "s3://texera-dataset" + /** * Initializes a new repository in LakeFS. * @@ -133,8 +134,14 @@ object LakeFSFileStorage { repoApi.deleteRepository(repoName).execute() } - def retrieveVersionsOfRepository(repoName: String, branchName: String): List[Commit] = { - refsApi.logCommits(repoName, branchName).execute().getResults.asScala.toList + def retrieveVersionsOfRepository(repoName: String, branchName: String = "main"): List[Commit] = { + refsApi + .logCommits(repoName, branchName) + .execute() + .getResults + .asScala + .toList + .sortBy(_.getCreationDate)(Ordering[java.lang.Long].reverse) // Sort in descending order } def retrieveObjectsOfVersion(repoName: String, commitHash: String): List[ObjectStats] = { From c329e3f68b0d4e6792778cd851dc136625c1c4ba Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sat, 22 Feb 2025 18:30:42 -0800 Subject: [PATCH 10/47] succinct the config parsing --- ...nfig.yaml => file-service-web-config.yaml} | 0 .../src/main/resources/lakefs-config.yaml | 20 ------- .../uci/ics/texera/service/FileService.scala | 2 +- .../src/main/resources/storage-config.yaml | 13 ++++ .../storage/LakeFSApiClientInstance.scala | 59 ------------------- .../core/storage/LakeFSFileStorage.scala | 19 +++++- .../amber/core/storage/StorageConfig.scala | 25 ++++++++ 7 files changed, 56 insertions(+), 82 deletions(-) rename core/file-service/src/main/resources/{file-service-config.yaml => file-service-web-config.yaml} (100%) delete mode 100644 core/file-service/src/main/resources/lakefs-config.yaml delete mode 100644 core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala diff --git a/core/file-service/src/main/resources/file-service-config.yaml b/core/file-service/src/main/resources/file-service-web-config.yaml similarity index 100% rename from core/file-service/src/main/resources/file-service-config.yaml rename to core/file-service/src/main/resources/file-service-web-config.yaml diff --git a/core/file-service/src/main/resources/lakefs-config.yaml b/core/file-service/src/main/resources/lakefs-config.yaml deleted file mode 100644 index bc3e4dc707..0000000000 --- a/core/file-service/src/main/resources/lakefs-config.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -database: - type: "postgres" - postgres: - connection_string: "postgresql://texera_lakefs_admin:password@localhost:5432/texera_lakefs" - -auth: - encrypt: - # Replace this with a randomly-generated string. Keep it safe! - secret_key: "random_string_for_lakefs" - -blockstore: - type: s3 - s3: - force_path_style: true - endpoint: http://localhost:9500 # MinIO API now runs on port 9500 - discover_bucket_region: false - credentials: - access_key_id: "texera_minio" # Matches MINIO_ROOT_USER from docker-compose - secret_access_key: "password" # Matches MINIO_ROOT_PASSWORD from docker-compose \ No newline at end of file diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index feca85ba10..fd0ee2f83e 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -50,7 +50,7 @@ object FileService { .resolve("src") .resolve("main") .resolve("resources") - .resolve("file-service-config.yaml") + .resolve("file-service-web-config.yaml") .toAbsolutePath .toString diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index db01f33d90..0f9a69cb0c 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -26,6 +26,19 @@ storage: num-retries: 10 min-wait-ms: 100 # 0.1s max-wait-ms: 10000 # 10s + + lakefs: + endpoint: "http://127.0.0.1:8000/api/v1" + auth: + api-secret: "random_string_for_lakefs" + username: "AKIAJIWZ57BWHNDAGMPQ" + password: "Y5e/aFeE+ZM1AahSCCEvH+GXkFZq4Y3qihExq2fw" + s3: + endpoint: "http://localhost:9500" + auth: + username: "texera_minio" + password: "password" + jdbc: url: "jdbc:postgresql://localhost:5432/texera_db?currentSchema=texera_db,public" username: "postgres" diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala deleted file mode 100644 index b19004285d..0000000000 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSApiClientInstance.scala +++ /dev/null @@ -1,59 +0,0 @@ -package edu.uci.ics.amber.core.storage - -import io.lakefs.clients.sdk.{ApiClient, ServerConfiguration, ServerVariable} - -import java.util -import scala.jdk.CollectionConverters._ - -/** - * LakeFSApiClientInstance is a singleton that manages the LakeFS ApiClient instance. - * - Provides a single shared ApiClient for all LakeFS operations in the Texera application. - * - Lazily initializes the client on first access. - * - Supports replacing the client instance primarily for testing or reconfiguration. - */ -object LakeFSApiClientInstance { - - private var instance: Option[ApiClient] = None - - // Constant server configuration list - private val servers: List[ServerConfiguration] = List( - new ServerConfiguration( - "http://127.0.0.1:8000/api/v1", - "lakeFS API server endpoint", - new util.HashMap[String, ServerVariable]() - ) - ) - - private val apiSecret: String = "random_string_for_lakefs" - private val accessKeyID: String = "AKIAJIWZ57BWHNDAGMPQ" - private val secretAccessKey: String = "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" - - /** - * Retrieves the singleton LakeFS ApiClient instance. - * - If the client is not initialized, it is lazily created using the configured properties. - * @return the ApiClient instance. - */ - def getInstance(): ApiClient = { - instance match { - case Some(client) => client - case None => - val apiClient = new ApiClient() - apiClient.setApiKey(apiSecret) - apiClient.setUsername(accessKeyID) - apiClient.setPassword(secretAccessKey) - apiClient.setServers(servers.asJava) - instance = Some(apiClient) - apiClient - } - } - - /** - * Replaces the existing LakeFS ApiClient instance. - * - This method is useful for testing or dynamically updating the client. - * - * @param apiClient the new ApiClient instance to replace the current one. - */ - def replaceInstance(apiClient: ApiClient): Unit = { - instance = Some(apiClient) - } -} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index def2c3b4ed..e0160b43cf 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -6,6 +6,7 @@ import io.lakefs.clients.sdk.model._ import java.io.{File, FileOutputStream, InputStream} import java.nio.file.Files import scala.jdk.CollectionConverters._ +import edu.uci.ics.amber.core.storage.StorageConfig /** * LakeFSFileStorage provides high-level file storage operations using LakeFS, @@ -13,8 +14,22 @@ import scala.jdk.CollectionConverters._ */ object LakeFSFileStorage { - // Lazy initialization of LakeFS API clients - private lazy val apiClient: ApiClient = LakeFSApiClientInstance.getInstance() + private lazy val apiClient: ApiClient = { + val client = new ApiClient() + client.setApiKey(StorageConfig.lakefsPassword) + client.setUsername(StorageConfig.lakefsUsername) + client.setPassword(StorageConfig.lakefsPassword) + client.setServers( + List( + new ServerConfiguration( + StorageConfig.lakefsEndpoint, + "LakeFS API server endpoint", + new java.util.HashMap[String, ServerVariable]() + ) + ).asJava + ) + client + } private lazy val repoApi: RepositoriesApi = new RepositoriesApi(apiClient) private lazy val objectsApi: ObjectsApi = new ObjectsApi(apiClient) private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala index c27a1d5f98..992b5a4ead 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala @@ -23,6 +23,10 @@ object StorageConfig { val icebergCommitMap = icebergTableMap("commit").asInstanceOf[JMap[String, Any]].asScala.toMap val icebergRetryMap = icebergCommitMap("retry").asInstanceOf[JMap[String, Any]].asScala.toMap val jdbcMap = storageMap("jdbc").asInstanceOf[JMap[String, Any]].asScala.toMap + val lakefsMap = storageMap("lakefs").asInstanceOf[JMap[String, Any]].asScala.toMap + val lakefsAuthMap = lakefsMap("auth").asInstanceOf[JMap[String, Any]].asScala.toMap + val s3Map = storageMap("s3").asInstanceOf[JMap[String, Any]].asScala.toMap + val s3AuthMap = s3Map("auth").asInstanceOf[JMap[String, Any]].asScala.toMap javaConf.updated( "storage", @@ -44,6 +48,8 @@ object StorageConfig { ) ) .updated("jdbc", jdbcMap) + .updated("lakefs", lakefsMap.updated("auth", lakefsAuthMap)) + .updated("s3", s3Map.updated("auth", s3AuthMap)) ) } @@ -67,6 +73,7 @@ object StorageConfig { .asInstanceOf[Map[String, Any]]("commit-batch-size") .asInstanceOf[Int] + // Iceberg table configurations val icebergTableResultNamespace: String = conf("storage") .asInstanceOf[Map[String, Any]]("iceberg") .asInstanceOf[Map[String, Any]]("table") @@ -169,4 +176,22 @@ object StorageConfig { // File storage configurations val fileStorageDirectoryPath: Path = corePath.resolve("amber").resolve("user-resources").resolve("workflow-results") + + // LakeFS configurations + val lakefsEndpoint: String = conf("storage") + .asInstanceOf[Map[String, Any]]("lakefs") + .asInstanceOf[Map[String, Any]]("endpoint") + .asInstanceOf[String] + + val lakefsUsername: String = conf("storage") + .asInstanceOf[Map[String, Any]]("lakefs") + .asInstanceOf[Map[String, Any]]("auth") + .asInstanceOf[Map[String, Any]]("username") + .asInstanceOf[String] + + val lakefsPassword: String = conf("storage") + .asInstanceOf[Map[String, Any]]("lakefs") + .asInstanceOf[Map[String, Any]]("auth") + .asInstanceOf[Map[String, Any]]("password") + .asInstanceOf[String] } From f45b60221e6fca727b83e27cdfe875464b79b125 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 23 Feb 2025 23:19:11 -0800 Subject: [PATCH 11/47] test more APIs and closing to finish --- .../uci/ics/texera/service/FileService.scala | 8 + .../service/resource/DatasetResource.scala | 346 +++++++++--------- .../type/dataset/DatasetFileNode.scala | 16 + .../type/serde/DatasetFileNodeSerializer.java | 44 +++ core/gui/proxy.config.json | 5 + core/gui/src/app/app.module.ts | 6 + .../app/common/type/dataset-staged-object.ts | 7 + .../dataset-detail.component.html | 26 +- .../dataset-detail.component.scss | 5 + ...dataset-staged-objects-list.component.html | 10 + ...dataset-staged-objects-list.component.scss | 0 ...r-dataset-staged-objects-list.component.ts | 30 ++ .../service/user/dataset/dataset.service.ts | 11 + core/workflow-core/build.sbt | 5 +- .../src/main/resources/storage-config.yaml | 4 + .../core/storage/LakeFSFileStorage.scala | 24 ++ .../ics/amber/core/storage/S3Storage.scala | 172 +++++++++ .../amber/core/storage/StorageConfig.scala | 37 +- 18 files changed, 577 insertions(+), 179 deletions(-) create mode 100644 core/file-service/src/main/scala/edu/uci/ics/texera/service/type/serde/DatasetFileNodeSerializer.java create mode 100644 core/gui/src/app/common/type/dataset-staged-object.ts create mode 100644 core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html create mode 100644 core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss create mode 100644 core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts create mode 100644 core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index fd0ee2f83e..edd1d6d1e5 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -1,11 +1,14 @@ package edu.uci.ics.texera.service +import com.fasterxml.jackson.databind.module.SimpleModule import io.dropwizard.core.Application import io.dropwizard.core.setup.{Bootstrap, Environment} import com.fasterxml.jackson.module.scala.DefaultScalaModule import edu.uci.ics.amber.core.storage.StorageConfig import edu.uci.ics.amber.util.PathUtils.fileServicePath import edu.uci.ics.texera.dao.SqlServer +import edu.uci.ics.texera.service.`type`.DatasetFileNode +import edu.uci.ics.texera.service.`type`.serde.DatasetFileNodeSerializer import edu.uci.ics.texera.service.auth.{JwtAuthFilter, SessionUser} import edu.uci.ics.texera.service.resource.{DatasetAccessResource, DatasetResource} import io.dropwizard.auth.AuthDynamicFeature @@ -15,6 +18,11 @@ class FileService extends Application[FileServiceConfiguration] { override def initialize(bootstrap: Bootstrap[FileServiceConfiguration]): Unit = { // Register Scala module to Dropwizard default object mapper bootstrap.getObjectMapper.registerModule(DefaultScalaModule) + + // register a new custom module and add the custom serializer into it + val customSerializerModule = new SimpleModule("CustomSerializers") + customSerializerModule.addSerializer(classOf[DatasetFileNode], new DatasetFileNodeSerializer()) + bootstrap.getObjectMapper.registerModule(customSerializerModule) } override def run(configuration: FileServiceConfiguration, environment: Environment): Unit = { diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 6b316f71f9..e993e82e5b 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -4,6 +4,7 @@ import edu.uci.ics.amber.core.storage.{ DocumentFactory, FileResolver, LakeFSFileStorage, + S3Storage, StorageConfig } import edu.uci.ics.amber.core.storage.util.dataset.{ @@ -44,6 +45,7 @@ import edu.uci.ics.texera.service.resource.DatasetResource.{ DashboardDatasetVersion, DatasetDescriptionModification, DatasetVersionRootFileNodesResponse, + Diff, calculateDatasetVersionSize, context, getDatasetByID, @@ -159,113 +161,6 @@ object DatasetResource { filesToRemove: List[URI] ) - /** - * Create a new dataset version by adding new files - * @param did the target dataset id - * @param user the user submitting the request - * @param filesToAdd the map containing the files to add - * @return the created dataset version - */ - def createNewDatasetVersionByAddingFiles( - did: Integer, - user: User, - filesToAdd: Map[java.nio.file.Path, InputStream] - ): Option[DashboardDatasetVersion] = { - applyDatasetOperationToCreateNewVersion( - context, - did, - user.getUid, - user.getEmail, - "", - DatasetOperation(filesToAdd, List()) - ) - } - - // apply the dataset operation to create a new dataset version - // it returns the created dataset version if creation succeed, else return None - // concurrency control is performed here: the thread has to have the lock in order to create the new version - private def applyDatasetOperationToCreateNewVersion( - ctx: DSLContext, - did: Integer, - uid: Integer, - ownerEmail: String, - userProvidedVersionName: String, - datasetOperation: DatasetOperation - ): Option[DashboardDatasetVersion] = { - // Helper function to generate the dataset version name - // the format of dataset version name is: v{#n} - {user provided dataset version name}. e.g. v10 - new version - def generateDatasetVersionName( - ctx: DSLContext, - did: Integer, - userProvidedVersionName: String - ): String = { - val numberOfExistingVersions = ctx - .selectFrom(DATASET_VERSION) - .where(DATASET_VERSION.DID.eq(did)) - .fetch() - .size() - - val sanitizedUserProvidedVersionName = - StringUtils.replaceEach(userProvidedVersionName, Array("/", "\\"), Array("", "")) - val res = if (sanitizedUserProvidedVersionName == "") { - "v" + (numberOfExistingVersions + 1).toString - } else { - "v" + (numberOfExistingVersions + 1).toString + " - " + sanitizedUserProvidedVersionName - } - - res - } - - val dataset = getDatasetByID(ctx, did) - val datasetPath = PathUtils.getDatasetPath(did) - if (datasetOperation.filesToAdd.isEmpty && datasetOperation.filesToRemove.isEmpty) { - return None - } - val datasetName = dataset.getName - val versionName = generateDatasetVersionName(ctx, did, userProvidedVersionName) - val commitHash = GitVersionControlLocalFileStorage.withCreateVersion( - datasetPath, - versionName, - () => { - datasetOperation.filesToAdd.foreach { - case (filePath, fileStream) => - GitVersionControlLocalFileStorage.writeFileToRepo(datasetPath, filePath, fileStream) - } - - datasetOperation.filesToRemove.foreach { fileUri => - DocumentFactory.openDocument(fileUri)._1.clear() - } - } - ) - - // create the DatasetVersion that persists in the DB - val datasetVersion = new DatasetVersion() - - datasetVersion.setName(versionName) - datasetVersion.setDid(did) - datasetVersion.setCreatorUid(uid) - datasetVersion.setVersionHash(commitHash) - - val physicalFileNodes = - GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion(datasetPath, commitHash) - Some( - DashboardDatasetVersion( - // insert the dataset version into DB, and fetch the newly-inserted one. - ctx - .insertInto(DATASET_VERSION) // Assuming DATASET is the table reference - .set(ctx.newRecord(DATASET_VERSION, datasetVersion)) - .returning() // Assuming ID is the primary key column - .fetchOne() - .into(classOf[DatasetVersion]), - DatasetFileNode.fromPhysicalFileNodes( - Map( - (ownerEmail, datasetName, versionName) -> physicalFileNodes.asScala.toList - ) - ) - ) - ) - } - case class DashboardDataset( dataset: Dataset, ownerEmail: String, @@ -277,7 +172,12 @@ object DatasetResource { fileNodes: List[DatasetFileNode] ) - case class DatasetIDs(dids: List[Integer]) + case class Diff( + path: String, + pathType: String, + diffType: String, // "added", "removed", "changed", etc. + sizeBytes: Option[Long] // Size of the changed file (None for directories) + ) case class DatasetDescriptionModification(name: String, description: String) @@ -301,27 +201,20 @@ class DatasetResource { private def getDashboardDataset( ctx: DSLContext, did: Integer, - uid: Option[Integer], - isPublic: Boolean = false + requesterUid: Option[Integer] ): DashboardDataset = { - if ( - (isPublic && !isDatasetPublic(ctx, did)) || - (!isPublic && (!userHasReadAccess(ctx, did, uid.get))) - ) { + val targetDataset = getDatasetByID(ctx, did) + if (requesterUid.isDefined && !userHasReadAccess(ctx, did, requesterUid.get)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - val targetDataset = getDatasetByID(ctx, did) - val userAccessPrivilege = - if (isPublic) PrivilegeEnum.NONE - else getDatasetUserAccessPrivilege(ctx, did, uid.get) - val isOwner = !isPublic && (targetDataset.getOwnerUid == uid.get) + val userAccessPrivilege = getDatasetUserAccessPrivilege(ctx, did, requesterUid.get) DashboardDataset( targetDataset, getOwner(ctx, did).getEmail, userAccessPrivilege, - isOwner + targetDataset.getOwnerUid == requesterUid.get ) } @@ -392,6 +285,89 @@ class DatasetResource { } } + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/version/create") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def createDatasetVersion( + versionName: String, + @PathParam("did") did: Integer, + @Auth user: SessionUser + ): DashboardDatasetVersion = { + val uid = user.getUid + withTransaction(context) { ctx => + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val dataset = getDatasetByID(ctx, did) + val datasetName = dataset.getName + + // Check if there are any changes in LakeFS before creating a new version + val diffs = LakeFSFileStorage.retrieveUncommittedObjects(repoName = datasetName) + + if (diffs.isEmpty) { + throw new WebApplicationException( + "No changes detected in dataset. Version creation aborted.", + Response.Status.BAD_REQUEST + ) + } + + // Generate a new version name + val versionCount = ctx + .selectCount() + .from(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(did)) + .fetchOne(0, classOf[Int]) + + val sanitizedVersionName = Option(versionName).filter(_.nonEmpty).getOrElse("") + val newVersionName = if (sanitizedVersionName.isEmpty) { + s"v${versionCount + 1}" + } else { + s"v${versionCount + 1} - $sanitizedVersionName" + } + + // Create a commit in LakeFS + val commit = LakeFSFileStorage.createCommit( + repoName = datasetName, + branch = "main", + commitMessage = s"Created dataset version: $newVersionName" + ) + + if (commit == null || commit.getId == null) { + throw new WebApplicationException( + "Failed to create commit in LakeFS. Version creation aborted.", + Response.Status.INTERNAL_SERVER_ERROR + ) + } + + // Create a new dataset version entry in the database + val datasetVersion = new DatasetVersion() + datasetVersion.setDid(did) + datasetVersion.setCreatorUid(uid) + datasetVersion.setName(newVersionName) + datasetVersion.setVersionHash(commit.getId) // Store LakeFS version hash + + val insertedVersion = ctx + .insertInto(DATASET_VERSION) + .set(ctx.newRecord(DATASET_VERSION, datasetVersion)) + .returning() + .fetchOne() + .into(classOf[DatasetVersion]) + + // Retrieve committed file structure + val fileNodes = LakeFSFileStorage.retrieveObjectsOfVersion(datasetName, commit.getId) + + DashboardDatasetVersion( + insertedVersion, + DatasetFileNode + .fromLakeFSRepositoryCommittedObjects( + Map((user.getEmail, datasetName, newVersionName) -> fileNodes) + ) + ) + } + } + @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/delete") @@ -408,12 +384,15 @@ class DatasetResource { LakeFSFileStorage.deleteRepo(datasetName) } catch { case e: Exception => - throw new WebApplicationException( - s"Failed to delete a repository in LakeFS: ${e.getMessage}", - e - ) +// throw new WebApplicationException( +// s"Failed to delete a repository in LakeFS: ${e.getMessage}", +// e +// ) } + // delete the directory on S3 + S3Storage.deleteDirectory(StorageConfig.lakefsBlockStorageBucketName, datasetName) + // delete the dataset from the DB datasetDao.deleteById(dataset.head.getDid) @@ -469,17 +448,66 @@ class DatasetResource { } } - @POST + @GET @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/{name}/version/create") - @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) - def createDatasetVersion( - @PathParam("name") name: String, - @FormDataParam("versionName") versionName: String, + @Path("/{did}/diff") + def getDatasetDiff( + @PathParam("did") did: Integer, @Auth user: SessionUser - ): Unit = { + ): List[Diff] = { val uid = user.getUid - // TODO: finish it + withTransaction(context) { ctx => + if (!userHasReadAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + // Retrieve staged (uncommitted) changes from LakeFS + val dataset = getDatasetByID(ctx, did) + val lakefsDiffs = LakeFSFileStorage.retrieveUncommittedObjects(dataset.getName) + + // Convert LakeFS Diff objects to our custom Diff case class + lakefsDiffs.map(d => + new Diff( + d.getPath, + d.getPathType.getValue, + d.getType.getValue, + Option(d.getSizeBytes).map(_.longValue()) + ) + ) + } + } + + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/presign") + def getPresignedUrl( + @PathParam("did") did: Integer, + @QueryParam("type") operationType: String, + @Auth user: SessionUser + ): Response = { + val uid = user.getUid + withTransaction(context) { ctx => + if (!userHasReadAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + val dataset = getDatasetByID(ctx, did) + val datasetName = dataset.getName + val bucketName = StorageConfig.lakefsBlockStorageBucketName + + val key = s"$datasetName/${java.util.UUID.randomUUID().toString}" // Generate unique key + + val presignedUrl = operationType match { + case "download" => + S3Storage.generatePresignedDownloadUrl(bucketName, key).toString + case "upload" => + S3Storage.generatePresignedUploadUrl(bucketName, key).toString + case _ => + throw new BadRequestException("Invalid type parameter. Use 'download' or 'upload'.") + } + + Response.ok(Map("presignedUrl" -> presignedUrl)).build() + } } /** @@ -562,19 +590,18 @@ class DatasetResource { @GET @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/{name}/version/list") + @Path("/{did}/version/list") def getDatasetVersionList( - @PathParam("name") name: String, + @PathParam("did") did: Integer, @Auth user: SessionUser ): List[DatasetVersion] = { val uid = user.getUid withTransaction(context)(ctx => { - val datasetDao = new DatasetDao(ctx.configuration()) - val datasets = datasetDao.fetchByName(name).asScala - if (datasets.isEmpty || !userHasReadAccess(ctx, datasets.head.getDid, uid)) { + val dataset = getDatasetByID(ctx, did) + if (!userHasReadAccess(ctx, dataset.getDid, uid)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - fetchDatasetVersions(ctx, datasets.head.getDid) + fetchDatasetVersions(ctx, dataset.getDid) }) } @@ -643,9 +670,7 @@ class DatasetResource { @Auth user: SessionUser ): DatasetVersionRootFileNodesResponse = { val uid = user.getUid - withTransaction(context)(ctx => - fetchDatasetVersionRootFileNodes(ctx, did, dvid, Some(uid), isPublic = false) - ) + withTransaction(context)(ctx => fetchDatasetVersionRootFileNodes(ctx, did, dvid, Some(uid))) } @GET @@ -654,9 +679,7 @@ class DatasetResource { @PathParam("did") did: Integer, @PathParam("dvid") dvid: Integer ): DatasetVersionRootFileNodesResponse = { - withTransaction(context)(ctx => - fetchDatasetVersionRootFileNodes(ctx, did, dvid, None, isPublic = true) - ) + withTransaction(context)(ctx => fetchDatasetVersionRootFileNodes(ctx, did, dvid, None)) } @GET @@ -667,7 +690,7 @@ class DatasetResource { @Auth user: SessionUser ): DashboardDataset = { val uid = user.getUid - withTransaction(context)(ctx => fetchDataset(ctx, did, Some(uid), isPublic = false)) + withTransaction(context)(ctx => getDashboardDataset(ctx, did, Some(uid))) } @GET @@ -675,7 +698,7 @@ class DatasetResource { def getPublicDataset( @PathParam("did") did: Integer ): DashboardDataset = { - withTransaction(context)(ctx => fetchDataset(ctx, did, None, isPublic = true)) + withTransaction(context)(ctx => getDashboardDataset(ctx, did, None)) } @GET @@ -835,23 +858,18 @@ class DatasetResource { ctx: DSLContext, did: Integer, dvid: Integer, - uid: Option[Integer], - isPublic: Boolean + uid: Option[Integer] ): DatasetVersionRootFileNodesResponse = { - val dataset = getDashboardDataset(ctx, did, uid, isPublic) - val targetDatasetPath = PathUtils.getDatasetPath(did) + val dataset = getDashboardDataset(ctx, did, uid) val datasetVersion = getDatasetVersionByID(ctx, dvid) val datasetName = dataset.dataset.getName - val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( - targetDatasetPath, - datasetVersion.getVersionHash - ) - val versionHash = datasetVersion.getVersionHash - val size = calculateDatasetVersionSize(datasetName, Some(versionHash)) val ownerFileNode = DatasetFileNode - .fromPhysicalFileNodes( - Map((dataset.ownerEmail, datasetName, datasetVersion.getName) -> fileNodes.asScala.toList) + .fromLakeFSRepositoryCommittedObjects( + Map( + (dataset.ownerEmail, datasetName, datasetVersion.getName) -> LakeFSFileStorage + .retrieveObjectsOfVersion(datasetName, datasetVersion.getVersionHash) + ) ) .head @@ -865,17 +883,7 @@ class DatasetResource { .head .children .get, - size + DatasetFileNode.calculateTotalSize(List(ownerFileNode)) ) } - - private def fetchDataset( - ctx: DSLContext, - did: Integer, - uid: Option[Integer], - isPublic: Boolean - ): DashboardDataset = { - val dashboardDataset = getDashboardDataset(ctx, did, uid, isPublic) - dashboardDataset - } } diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala index 22bef96d08..10d89f8e23 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/dataset/DatasetFileNode.scala @@ -209,4 +209,20 @@ object DatasetFileNode { currentPhysicalNode.getChildren.forEach(child => queue.add((fileNode, child))) } } + + /** + * Traverses a given list of DatasetFileNode and returns the total size of all files. + * + * @param nodes List of root-level DatasetFileNode. + * @return Total size in bytes. + */ + def calculateTotalSize(nodes: List[DatasetFileNode]): Long = { + def traverse(node: DatasetFileNode): Long = { + val fileSize = node.getSize.getOrElse(0L) + val childrenSize = node.getChildren.map(traverse).sum + fileSize + childrenSize + } + + nodes.map(traverse).sum + } } diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/serde/DatasetFileNodeSerializer.java b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/serde/DatasetFileNodeSerializer.java new file mode 100644 index 0000000000..2d2da04fb1 --- /dev/null +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/type/serde/DatasetFileNodeSerializer.java @@ -0,0 +1,44 @@ +package edu.uci.ics.texera.service.type.serde; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import edu.uci.ics.texera.service.type.DatasetFileNode; +import scala.collection.JavaConverters; +import scala.collection.immutable.List; + +import java.io.IOException; + +// this class is used to serialize the FileNode as JSON. So that FileNodes can be inspected by the frontend through JSON. +public class DatasetFileNodeSerializer extends StdSerializer { + + public DatasetFileNodeSerializer() { + this(null); + } + + public DatasetFileNodeSerializer(Class t) { + super(t); + } + + @Override + public void serialize(DatasetFileNode value, JsonGenerator gen, SerializerProvider provider) throws IOException { + gen.writeStartObject(); + gen.writeStringField("name", value.getName()); + gen.writeStringField("type", value.getNodeType()); + gen.writeStringField("parentDir", value.getParent().getFilePath()); + gen.writeStringField("ownerEmail", value.getOwnerEmail()); + if (value.getNodeType().equals("file")) { + gen.writeObjectField("size", value.getSize()); + } + if (value.getNodeType().equals("directory")) { + gen.writeFieldName("children"); + gen.writeStartArray(); + List children = value.getChildren(); + for (DatasetFileNode child : JavaConverters.seqAsJavaList(children)) { + serialize(child, gen, provider); // Recursively serialize children + } + gen.writeEndArray(); + } + gen.writeEndObject(); + } +} diff --git a/core/gui/proxy.config.json b/core/gui/proxy.config.json index 29ce58446f..813650d297 100755 --- a/core/gui/proxy.config.json +++ b/core/gui/proxy.config.json @@ -4,6 +4,11 @@ "secure": false, "changeOrigin": true }, + "/api/dataset": { + "target": "http://localhost:9092", + "secure": false, + "changeOrigin": true + }, "/api": { "target": "http://localhost:8080", "secure": false, diff --git a/core/gui/src/app/app.module.ts b/core/gui/src/app/app.module.ts index 38f59a986a..a51377c32c 100644 --- a/core/gui/src/app/app.module.ts +++ b/core/gui/src/app/app.module.ts @@ -142,6 +142,10 @@ import { SocialLoginModule, SocialAuthServiceConfig, GoogleSigninButtonModule } import { GoogleLoginProvider } from "@abacritt/angularx-social-login"; import { lastValueFrom } from "rxjs"; import { HubSearchResultComponent } from "./hub/component/hub-search-result/hub-search-result.component"; +import { + UserDatasetStagedObjectsListComponent +} from "./dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component"; +import {NzEmptyModule} from "ng-zorro-antd/empty"; registerLocaleData(en); @@ -201,6 +205,7 @@ registerLocaleData(en); UserDatasetVersionFiletreeComponent, UserDatasetListItemComponent, UserDatasetFileRendererComponent, + UserDatasetStagedObjectsListComponent, NzModalCommentBoxComponent, LeftPanelComponent, LocalLoginComponent, @@ -291,6 +296,7 @@ registerLocaleData(en); TreeModule, SocialLoginModule, GoogleSigninButtonModule, + NzEmptyModule, ], providers: [ provideNzI18n(en_US), diff --git a/core/gui/src/app/common/type/dataset-staged-object.ts b/core/gui/src/app/common/type/dataset-staged-object.ts new file mode 100644 index 0000000000..f69cc183f1 --- /dev/null +++ b/core/gui/src/app/common/type/dataset-staged-object.ts @@ -0,0 +1,7 @@ +// Represents a staged dataset object change, corresponding to backend Diff +export interface DatasetStagedObject { + fileRelativePath: string; + pathType: "file" | "directory"; + diffType: "added" | "removed" | "changed"; + sizeBytes?: number; // Optional, only present for files +} diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index bd134b605c..13570ec274 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -127,14 +127,24 @@

- - + + + + + + + + + + + + + + + + + diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts new file mode 100644 index 0000000000..da3da6387c --- /dev/null +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts @@ -0,0 +1,30 @@ +import { Component, Input, OnInit } from "@angular/core"; +import {DatasetStagedObject} from "../../../../../../common/type/dataset-staged-object"; +import {DatasetService} from "../../../../../service/user/dataset/dataset.service"; +import {untilDestroyed} from "@ngneat/until-destroy"; +import {pipe} from "rxjs"; + +@Component({ + selector: "texera-dataset-staged-objects-list", + templateUrl: "./user-dataset-staged-objects-list.component.html", + styleUrls: ["./user-dataset-staged-objects-list.component.scss"] +}) +export class UserDatasetStagedObjectsListComponent implements OnInit { + + @Input() did?: number; // Dataset ID, required input from parent component + datasetStagedObjects: DatasetStagedObject[] = []; + + constructor(private datasetService: DatasetService) {} + + + ngOnInit(): void { + console.log("did: ", this.did); + if (this.did != undefined) { + this.datasetService.getDatasetDiff(this.did) + .subscribe((diffs) => { + console.log("Received dataset diff:", diffs); + this.datasetStagedObjects = diffs; + }); + } + } +} diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index ddcef98190..a56bc56da7 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -7,6 +7,7 @@ import { Observable } from "rxjs"; import { DashboardDataset } from "../../../type/dashboard-dataset.interface"; import { FileUploadItem } from "../../../type/dashboard-file.interface"; import { DatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; +import {DatasetStagedObject} from "../../../../common/type/dataset-staged-object"; export const DATASET_BASE_URL = "dataset"; export const DATASET_CREATE_URL = DATASET_BASE_URL + "/create"; @@ -118,6 +119,16 @@ export class DatasetService { ); } + /** + * Retrieves the list of uncommitted dataset changes (diffs). + * @param did Dataset ID + */ + public getDatasetDiff(did: number): Observable { + return this.http.get( + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/diff` + ); + } + /** * retrieve a list of versions of a dataset. The list is sorted so that the latest versions are at front. * @param did diff --git a/core/workflow-core/build.sbt b/core/workflow-core/build.sbt index 34290d1ce5..3c8d056c10 100644 --- a/core/workflow-core/build.sbt +++ b/core/workflow-core/build.sbt @@ -173,5 +173,8 @@ libraryDependencies ++= Seq( "org.eclipse.jgit" % "org.eclipse.jgit" % "5.13.0.202109080827-r", // jgit "org.yaml" % "snakeyaml" % "1.30", // yaml reader (downgrade to 1.30 due to dropwizard 1.3.23 required by amber) "org.apache.commons" % "commons-vfs2" % "2.9.0", // for FileResolver throw VFS-related exceptions - "io.lakefs" % "sdk" % "1.48.0" + "io.lakefs" % "sdk" % "1.48.0", + "software.amazon.awssdk" % "s3" % "2.29.51", + "software.amazon.awssdk" % "auth" % "2.29.51", + "software.amazon.awssdk" % "regions" % "2.29.51", ) \ No newline at end of file diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index 0f9a69cb0c..49df4810b0 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -33,6 +33,10 @@ storage: api-secret: "random_string_for_lakefs" username: "AKIAJIWZ57BWHNDAGMPQ" password: "Y5e/aFeE+ZM1AahSCCEvH+GXkFZq4Y3qihExq2fw" + block-storage: + type: "s3" + bucket-name: "texera-dataset" + s3: endpoint: "http://localhost:9500" auth: diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index e0160b43cf..23dbcab8c8 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -32,6 +32,7 @@ object LakeFSFileStorage { } private lazy val repoApi: RepositoriesApi = new RepositoriesApi(apiClient) private lazy val objectsApi: ObjectsApi = new ObjectsApi(apiClient) + private lazy val branchesApi: BranchesApi = new BranchesApi(apiClient) private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) private lazy val refsApi: RefsApi = new RefsApi(apiClient) @@ -162,4 +163,27 @@ object LakeFSFileStorage { def retrieveObjectsOfVersion(repoName: String, commitHash: String): List[ObjectStats] = { objectsApi.listObjects(repoName, commitHash).execute().getResults.asScala.toList } + + /** + * Retrieves a list of uncommitted (staged) objects in a repository branch. + * + * @param repoName Repository name. + * @param branchName Branch name (defaults to "main"). + * @return List of uncommitted object stats. + */ + def retrieveUncommittedObjects(repoName: String, branchName: String = "main"): List[Diff] = { + branchesApi + .diffBranch(repoName, branchName) + .execute() + .getResults + .asScala + .toList + } + + def createCommit(repoName: String, branch: String, commitMessage: String): Commit = { + val commit = new CommitCreation() + .message(commitMessage) + commitsApi.commit(repoName, branch, commit).execute() + } + } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala new file mode 100644 index 0000000000..99f75cb9b2 --- /dev/null +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala @@ -0,0 +1,172 @@ +package edu.uci.ics.amber.core.storage + +import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} +import software.amazon.awssdk.awscore.presigner.PresignedRequest +import software.amazon.awssdk.core.SdkSystemSetting +import software.amazon.awssdk.core.sync.RequestBody +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.{S3Client, S3Configuration} +import software.amazon.awssdk.services.s3.model._ +import software.amazon.awssdk.services.s3.presigner.S3Presigner +import software.amazon.awssdk.services.s3.presigner.model._ + +import java.io.{File, FileInputStream, InputStream} +import java.net.URL +import java.nio.file.{Files, Paths} +import java.security.MessageDigest +import java.time.Duration +import java.util.Base64 +import scala.jdk.CollectionConverters._ + +/** + * S3Storage provides an abstraction for S3-compatible storage (e.g., MinIO). + * - Uses credentials and endpoint from StorageConfig. + * - Supports object upload, download, listing, and deletion. + */ +object S3Storage { + // Initialize MinIO-compatible S3 Client + private lazy val s3Client: S3Client = { + val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, StorageConfig.s3Password) + S3Client + .builder() + .region(Region.US_WEST_2) // MinIO doesn't require region, but AWS SDK enforces one + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .endpointOverride(java.net.URI.create(StorageConfig.s3Endpoint)) // MinIO URL + .serviceConfiguration( + S3Configuration.builder().pathStyleAccessEnabled(true).build() + ) // Needed for MinIO + .build() + } + + private lazy val s3Presigner: S3Presigner = { + val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, StorageConfig.s3Password) + S3Presigner + .builder() + .region(Region.US_WEST_2) + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .endpointOverride(java.net.URI.create(StorageConfig.s3Endpoint)) + .build() + } + + /** + * Deletes a bucket from MinIO. + * + * @param bucketName Target MinIO bucket to delete. + */ + def deleteBucket(bucketName: String): Unit = { + val request = DeleteBucketRequest + .builder() + .bucket(bucketName) + .build() + + s3Client.deleteBucket(request) + } + + /** + * Deletes a directory (all objects under a given prefix) from a bucket. + * + * @param bucketName Target S3/MinIO bucket. + * @param directoryPrefix The directory to delete (must end with `/`). + */ + def deleteDirectory(bucketName: String, directoryPrefix: String): Unit = { + // Ensure the directory prefix ends with `/` to avoid accidental deletions + val prefix = if (directoryPrefix.endsWith("/")) directoryPrefix else directoryPrefix + "/" + + // List objects under the given prefix + val listRequest = ListObjectsV2Request + .builder() + .bucket(bucketName) + .prefix(prefix) + .build() + + val listResponse = s3Client.listObjectsV2(listRequest) + + // Extract object keys + val objectKeys = listResponse.contents().asScala.map(_.key()) + + if (objectKeys.nonEmpty) { + val objectsToDelete = + objectKeys.map(key => ObjectIdentifier.builder().key(key).build()).asJava + + val deleteRequest = Delete + .builder() + .objects(objectsToDelete) + .build() + + // Compute MD5 checksum for MinIO if required + val md5Hash = MessageDigest + .getInstance("MD5") + .digest(deleteRequest.toString.getBytes("UTF-8")) + + val contentMD5 = Base64.getEncoder.encodeToString(md5Hash) + + // Convert object keys to S3 DeleteObjectsRequest format + val deleteObjectsRequest = DeleteObjectsRequest + .builder() + .bucket(bucketName) + .delete(deleteRequest) + .build() + + // Perform batch deletion + s3Client.deleteObjects(deleteObjectsRequest) + } + } + + /** + * Generates a pre-signed URL for uploading a file. + * + * @param bucketName Target MinIO bucket. + * @param key Object key (path in MinIO). + * @param expiration Expiration duration (default: 15 minutes). + * @return URL string that can be used for upload. + */ + def generatePresignedUploadUrl( + bucketName: String, + key: String, + expiration: Duration = Duration.ofMinutes(15) + ): URL = { + val request = PutObjectPresignRequest + .builder() + .signatureDuration(expiration) + .putObjectRequest( + PutObjectRequest + .builder() + .bucket(bucketName) + .key(key) + .build() + ) + .build() + + val presignedRequest: PresignedPutObjectRequest = s3Presigner.presignPutObject(request) + presignedRequest.url() + } + + /** + * Generates a pre-signed URL for downloading a file. + * + * @param bucketName Target MinIO bucket. + * @param key Object key (path in MinIO). + * @param expiration Expiration duration (default: 15 minutes). + * @return URL string that can be used for download. + */ + def generatePresignedDownloadUrl( + bucketName: String, + key: String, + expiration: Duration = Duration.ofMinutes(15) + ): URL = { + val request = GetObjectPresignRequest + .builder() + .signatureDuration(expiration) + .getObjectRequest( + GetObjectRequest + .builder() + .bucket(bucketName) + .key(key) + .build() + ) + .build() + + val presignedRequest: PresignedGetObjectRequest = s3Presigner.presignGetObject(request) + presignedRequest.url() + } +} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala index 992b5a4ead..184272519a 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala @@ -25,6 +25,8 @@ object StorageConfig { val jdbcMap = storageMap("jdbc").asInstanceOf[JMap[String, Any]].asScala.toMap val lakefsMap = storageMap("lakefs").asInstanceOf[JMap[String, Any]].asScala.toMap val lakefsAuthMap = lakefsMap("auth").asInstanceOf[JMap[String, Any]].asScala.toMap + val lakefsBlockStorageMap = + lakefsMap("block-storage").asInstanceOf[JMap[String, Any]].asScala.toMap val s3Map = storageMap("s3").asInstanceOf[JMap[String, Any]].asScala.toMap val s3AuthMap = s3Map("auth").asInstanceOf[JMap[String, Any]].asScala.toMap @@ -48,7 +50,10 @@ object StorageConfig { ) ) .updated("jdbc", jdbcMap) - .updated("lakefs", lakefsMap.updated("auth", lakefsAuthMap)) + .updated( + "lakefs", + lakefsMap.updated("auth", lakefsAuthMap).updated("block-storage", lakefsBlockStorageMap) + ) .updated("s3", s3Map.updated("auth", s3AuthMap)) ) } @@ -194,4 +199,34 @@ object StorageConfig { .asInstanceOf[Map[String, Any]]("auth") .asInstanceOf[Map[String, Any]]("password") .asInstanceOf[String] + + // LakeFS Block Storage configurations + val lakefsBlockStorageType: String = conf("storage") + .asInstanceOf[Map[String, Any]]("lakefs") + .asInstanceOf[Map[String, Any]]("block-storage") + .asInstanceOf[Map[String, Any]]("type") + .asInstanceOf[String] + + val lakefsBlockStorageBucketName: String = conf("storage") + .asInstanceOf[Map[String, Any]]("lakefs") + .asInstanceOf[Map[String, Any]]("block-storage") + .asInstanceOf[Map[String, Any]]("bucket-name") + .asInstanceOf[String] + + val s3Endpoint: String = conf("storage") + .asInstanceOf[Map[String, Any]]("s3") + .asInstanceOf[Map[String, Any]]("endpoint") + .asInstanceOf[String] + + val s3Username: String = conf("storage") + .asInstanceOf[Map[String, Any]]("s3") + .asInstanceOf[Map[String, Any]]("auth") + .asInstanceOf[Map[String, Any]]("username") + .asInstanceOf[String] + + val s3Password: String = conf("storage") + .asInstanceOf[Map[String, Any]]("s3") + .asInstanceOf[Map[String, Any]]("auth") + .asInstanceOf[Map[String, Any]]("password") + .asInstanceOf[String] } From 1ae1c13650ee46e20d59d1e5ee781e51a797bb93 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Mon, 24 Feb 2025 16:53:30 -0800 Subject: [PATCH 12/47] fix dataset creation and version creation --- .../service/resource/DatasetResource.scala | 135 +- core/gui/package.json | 1 + core/gui/proxy.config.json | 8 + core/gui/src/app/app.module.ts | 6 +- .../dataset-detail.component.html | 13 +- .../dataset-detail.component.scss | 4 +- .../dataset-detail.component.ts | 18 +- ...dataset-staged-objects-list.component.html | 8 +- ...r-dataset-staged-objects-list.component.ts | 21 +- ...ser-dataset-version-creator.component.html | 2 +- .../user-dataset-version-creator.component.ts | 20 +- .../service/user/dataset/dataset.service.ts | 70 +- core/gui/yarn.lock | 1261 ++++++++++++++++- .../src/main/resources/storage-config.yaml | 2 + .../ics/amber/core/storage/FileResolver.scala | 6 +- .../core/storage/LakeFSFileStorage.scala | 11 + .../ics/amber/core/storage/S3Storage.scala | 102 +- .../amber/core/storage/StorageConfig.scala | 10 + .../storage/model/DatasetFileDocument.scala | 26 +- .../core/storage/model/S3Compatible.scala | 11 + 20 files changed, 1518 insertions(+), 217 deletions(-) create mode 100644 core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index e993e82e5b..3cd3d80e19 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -1,16 +1,8 @@ package edu.uci.ics.texera.service.resource -import edu.uci.ics.amber.core.storage.{ - DocumentFactory, - FileResolver, - LakeFSFileStorage, - S3Storage, - StorageConfig -} -import edu.uci.ics.amber.core.storage.util.dataset.{ - GitVersionControlLocalFileStorage, - PhysicalFileNode -} +import edu.uci.ics.amber.core.storage.model.S3Compatible +import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, LakeFSFileStorage, S3Storage, StorageConfig} +import edu.uci.ics.amber.core.storage.util.dataset.{GitVersionControlLocalFileStorage, PhysicalFileNode} import edu.uci.ics.amber.util.PathUtils import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction @@ -19,39 +11,12 @@ import edu.uci.ics.texera.dao.jooq.generated.tables.User.USER import edu.uci.ics.texera.dao.jooq.generated.tables.Dataset.DATASET import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetUserAccess.DATASET_USER_ACCESS import edu.uci.ics.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION -import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ - DatasetDao, - DatasetUserAccessDao, - DatasetVersionDao -} -import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ - Dataset, - DatasetUserAccess, - DatasetVersion, - User -} +import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{DatasetDao, DatasetUserAccessDao, DatasetVersionDao} +import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{Dataset, DatasetUserAccess, DatasetVersion, User} import edu.uci.ics.texera.service.`type`.DatasetFileNode import edu.uci.ics.texera.service.auth.SessionUser -import edu.uci.ics.texera.service.resource.DatasetAccessResource.{ - getDatasetUserAccessPrivilege, - getOwner, - isDatasetPublic, - userHasReadAccess, - userHasWriteAccess, - userOwnDataset -} -import edu.uci.ics.texera.service.resource.DatasetResource.{ - DashboardDataset, - DashboardDatasetVersion, - DatasetDescriptionModification, - DatasetVersionRootFileNodesResponse, - Diff, - calculateDatasetVersionSize, - context, - getDatasetByID, - getDatasetVersionByID, - getLatestDatasetVersion -} +import edu.uci.ics.texera.service.resource.DatasetAccessResource.{getDatasetUserAccessPrivilege, getOwner, isDatasetPublic, userHasReadAccess, userHasWriteAccess, userOwnDataset} +import edu.uci.ics.texera.service.resource.DatasetResource.{DashboardDataset, DashboardDatasetVersion, DatasetDescriptionModification, DatasetVersionRootFileNodesResponse, Diff, calculateDatasetVersionSize, context, getDatasetByID, getDatasetVersionByID, getLatestDatasetVersion} import io.dropwizard.auth.Auth import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ @@ -288,7 +253,7 @@ class DatasetResource { @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}/version/create") - @Consumes(Array(MediaType.APPLICATION_JSON)) + @Consumes(Array(MediaType.TEXT_PLAIN)) def createDatasetVersion( versionName: String, @PathParam("did") did: Integer, @@ -384,10 +349,10 @@ class DatasetResource { LakeFSFileStorage.deleteRepo(datasetName) } catch { case e: Exception => -// throw new WebApplicationException( -// s"Failed to delete a repository in LakeFS: ${e.getMessage}", -// e -// ) + throw new WebApplicationException( + s"Failed to delete a repository in LakeFS: ${e.getMessage}", + e + ) } // delete the directory on S3 @@ -425,6 +390,49 @@ class DatasetResource { } } + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("presign") + def getPresignedUrl( + @QueryParam("type") operationType: String, + @QueryParam("key") encodedUrl: String, + @QueryParam("multipart") multipart: Optional[Boolean], + @QueryParam("contentType") contentType: Optional[String], + @Auth user: SessionUser + ): Response = { + val uid = user.getUid + withTransaction(context) { ctx => + // TODO: bring the access control back + val decodedPathStr = URLDecoder.decode(encodedUrl, StandardCharsets.UTF_8.name()) + val fileUri = FileResolver.resolve(decodedPathStr) + val document = DocumentFactory.openReadonlyDocument(fileUri).asInstanceOf[S3Compatible] + + val objectKey = s"${document.getVersionHash()}/${document.getObjectRelativePath()}" + + val presignedUrl = operationType match { + case "download" => +// LakeFSFileStorage.retrieveFilePresignedUrl(document.getRepoName(), document.getVersionHash(), document.getObjectRelativePath()) + S3Storage.generatePresignedDownloadUrl(document.getBucketName(), objectKey).toString + + case "upload" => + if (multipart.toScala.contains(true)) { + // Generate presigned URLs for multipart upload (initiate the multipart upload) + val uploadId = S3Storage.initiateMultipartUpload(document.getBucketName(), document.getObjectRelativePath(), contentType.toScala) + Response.ok(Map("uploadId" -> uploadId, "key" -> document.getObjectRelativePath())).build() + } else { + // Generate presigned URL for a single-part upload + S3Storage.generatePresignedUploadUrl(document.getBucketName(), document.getObjectRelativePath(), multipart = false, contentType.toScala).toString + } + + case _ => + throw new BadRequestException("Invalid type parameter. Use 'download' or 'upload'.") + } + + Response.ok(Map("presignedUrl" -> presignedUrl)).build() + } + } + + @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}/update/publicity") @@ -477,39 +485,6 @@ class DatasetResource { } } - @GET - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/{did}/presign") - def getPresignedUrl( - @PathParam("did") did: Integer, - @QueryParam("type") operationType: String, - @Auth user: SessionUser - ): Response = { - val uid = user.getUid - withTransaction(context) { ctx => - if (!userHasReadAccess(ctx, did, uid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - - val dataset = getDatasetByID(ctx, did) - val datasetName = dataset.getName - val bucketName = StorageConfig.lakefsBlockStorageBucketName - - val key = s"$datasetName/${java.util.UUID.randomUUID().toString}" // Generate unique key - - val presignedUrl = operationType match { - case "download" => - S3Storage.generatePresignedDownloadUrl(bucketName, key).toString - case "upload" => - S3Storage.generatePresignedUploadUrl(bucketName, key).toString - case _ => - throw new BadRequestException("Invalid type parameter. Use 'download' or 'upload'.") - } - - Response.ok(Map("presignedUrl" -> presignedUrl)).build() - } - } - /** * This method returns a list of DashboardDatasets objects that are accessible by current user. * diff --git a/core/gui/package.json b/core/gui/package.json index 59a9aa1b64..4cb6a7f87c 100644 --- a/core/gui/package.json +++ b/core/gui/package.json @@ -34,6 +34,7 @@ "@angular/platform-browser-dynamic": "16.2.12", "@angular/router": "16.2.12", "@auth0/angular-jwt": "5.1.0", + "@aws-sdk/client-s3": "^3.750.0", "@codingame/monaco-vscode-java-default-extension": "8.0.4", "@codingame/monaco-vscode-python-default-extension": "8.0.4", "@codingame/monaco-vscode-r-default-extension": "8.0.4", diff --git a/core/gui/proxy.config.json b/core/gui/proxy.config.json index 813650d297..74cf828a95 100755 --- a/core/gui/proxy.config.json +++ b/core/gui/proxy.config.json @@ -25,5 +25,13 @@ "ws": true, "secure": false, "changeOrigin": false + }, + "/lakefs": { + "target": "http://localhost:8000", + "secure": false, + "changeOrigin": true, + "pathRewrite": { + "^/lakefs": "" + } } } diff --git a/core/gui/src/app/app.module.ts b/core/gui/src/app/app.module.ts index a51377c32c..c67856a96b 100644 --- a/core/gui/src/app/app.module.ts +++ b/core/gui/src/app/app.module.ts @@ -142,10 +142,8 @@ import { SocialLoginModule, SocialAuthServiceConfig, GoogleSigninButtonModule } import { GoogleLoginProvider } from "@abacritt/angularx-social-login"; import { lastValueFrom } from "rxjs"; import { HubSearchResultComponent } from "./hub/component/hub-search-result/hub-search-result.component"; -import { - UserDatasetStagedObjectsListComponent -} from "./dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component"; -import {NzEmptyModule} from "ng-zorro-antd/empty"; +import { UserDatasetStagedObjectsListComponent } from "./dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component"; +import { NzEmptyModule } from "ng-zorro-antd/empty"; registerLocaleData(en); diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index 13570ec274..3c65ed303e 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -129,8 +129,12 @@

+ + - - + @@ -171,7 +172,7 @@

Create a New Version
diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss index dbb24b2b40..1c9eb23bdc 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss @@ -141,6 +141,6 @@ nz-select { } nz-tabset { - width: 95%; // Take up 90% of the container width - margin: 0 auto; // Center it horizontally + width: 95%; // Take up 90% of the container width + margin: 0 auto; // Center it horizontally } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts index 81b192b820..d5fda2544f 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts @@ -143,15 +143,9 @@ export class DatasetDetailComponent implements OnInit { renderVersionCreatorSider() { if (this.did) { - this.datasetService - .retrieveDatasetLatestVersion(this.did) - .pipe(untilDestroyed(this)) - .subscribe(latestVersion => { - this.versionCreatorBaseVersion = latestVersion; - this.isCreatingDataset = false; - this.isCreatingVersion = true; - this.siderWidth = this.MAX_SIDER_WIDTH; - }); + this.isCreatingDataset = false; + this.isCreatingVersion = true; + this.siderWidth = this.MAX_SIDER_WIDTH; } } @@ -229,8 +223,10 @@ export class DatasetDetailComponent implements OnInit { this.versions = versionNames; // by default, the selected version is the 1st element in the retrieved list // which is guaranteed(by the backend) to be the latest created version. - this.selectedVersion = this.versions[0]; - this.onVersionSelected(this.selectedVersion); + if (this.versions.length > 0) { + this.selectedVersion = this.versions[0]; + this.onVersionSelected(this.selectedVersion); + } }); } } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html index 1d9be22325..003f90d813 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html @@ -1,4 +1,6 @@ - + - + diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts index da3da6387c..30f71ea947 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts @@ -1,30 +1,27 @@ import { Component, Input, OnInit } from "@angular/core"; -import {DatasetStagedObject} from "../../../../../../common/type/dataset-staged-object"; -import {DatasetService} from "../../../../../service/user/dataset/dataset.service"; -import {untilDestroyed} from "@ngneat/until-destroy"; -import {pipe} from "rxjs"; +import { DatasetStagedObject } from "../../../../../../common/type/dataset-staged-object"; +import { DatasetService } from "../../../../../service/user/dataset/dataset.service"; +import { untilDestroyed } from "@ngneat/until-destroy"; +import { pipe } from "rxjs"; @Component({ selector: "texera-dataset-staged-objects-list", templateUrl: "./user-dataset-staged-objects-list.component.html", - styleUrls: ["./user-dataset-staged-objects-list.component.scss"] + styleUrls: ["./user-dataset-staged-objects-list.component.scss"], }) export class UserDatasetStagedObjectsListComponent implements OnInit { - @Input() did?: number; // Dataset ID, required input from parent component datasetStagedObjects: DatasetStagedObject[] = []; constructor(private datasetService: DatasetService) {} - ngOnInit(): void { console.log("did: ", this.did); if (this.did != undefined) { - this.datasetService.getDatasetDiff(this.did) - .subscribe((diffs) => { - console.log("Received dataset diff:", diffs); - this.datasetStagedObjects = diffs; - }); + this.datasetService.getDatasetDiff(this.did).subscribe(diffs => { + console.log("Received dataset diff:", diffs); + this.datasetStagedObjects = diffs; + }); } } } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html index 5a9401b2be..5b63bff4f6 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html @@ -21,7 +21,7 @@ + [previouslyUploadFiles]="[]">
diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts index bfa34d0462..80e29bb611 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts @@ -20,7 +20,7 @@ export class UserDatasetVersionCreatorComponent implements OnInit { isCreatingVersion: boolean = false; @Input() - baseVersion: DatasetVersion | undefined; + did: number | undefined; // this emits the ID of the newly created version/dataset, will emit 0 if creation is failed. @Output() @@ -128,18 +128,18 @@ export class UserDatasetVersionCreatorComponent implements OnInit { return; // Stop further execution if the form is not valid } - if (this.newUploadFiles.length == 0 && this.removedFilePaths.length == 0) { - this.notificationService.error( - `Please either upload new file(s) or remove old file(s) when creating a new ${this.isCreatingVersion ? "Version" : "Dataset"}` - ); - return; - } + // if (this.newUploadFiles.length == 0 && this.removedFilePaths.length == 0) { + // this.notificationService.error( + // `Please either upload new file(s) or remove old file(s) when creating a new ${this.isCreatingVersion ? "Version" : "Dataset"}` + // ); + // return; + // } this.isUploading = true; - if (this.isCreatingVersion && this.baseVersion) { + if (this.isCreatingVersion && this.did) { const versionName = this.form.get("versionDescription")?.value; this.datasetService - .createDatasetVersion(this.baseVersion?.did, versionName, this.removedFilePaths, this.newUploadFiles) + .createDatasetVersion(this.did, versionName) .pipe(untilDestroyed(this)) .subscribe({ next: res => { @@ -169,7 +169,7 @@ export class UserDatasetVersionCreatorComponent implements OnInit { // do the name sanitization this.datasetService - .createDataset(ds, initialVersionName, this.newUploadFiles) + .createDataset(ds) .pipe(untilDestroyed(this)) .subscribe({ next: res => { diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index a56bc56da7..32930510d8 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -1,13 +1,13 @@ import { Injectable } from "@angular/core"; import { HttpClient, HttpParams } from "@angular/common/http"; -import { map } from "rxjs/operators"; +import { map, switchMap } from "rxjs/operators"; import { Dataset, DatasetVersion } from "../../../../common/type/dataset"; import { AppSettings } from "../../../../common/app-setting"; -import { Observable } from "rxjs"; +import { from, Observable } from "rxjs"; import { DashboardDataset } from "../../../type/dashboard-dataset.interface"; import { FileUploadItem } from "../../../type/dashboard-file.interface"; import { DatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; -import {DatasetStagedObject} from "../../../../common/type/dataset-staged-object"; +import { DatasetStagedObject } from "../../../../common/type/dataset-staged-object"; export const DATASET_BASE_URL = "dataset"; export const DATASET_CREATE_URL = DATASET_BASE_URL + "/create"; @@ -33,20 +33,11 @@ export const DATASET_GET_OWNERS_URL = DATASET_BASE_URL + "/datasetUserAccess"; export class DatasetService { constructor(private http: HttpClient) {} - public createDataset( - dataset: Dataset, - initialVersionName: string, - filesToBeUploaded: FileUploadItem[] - ): Observable { + public createDataset(dataset: Dataset): Observable { const formData = new FormData(); formData.append("datasetName", dataset.name); formData.append("datasetDescription", dataset.description); - formData.append("isDatasetPublic", dataset.isPublic.toString()); - formData.append("initialVersionName", initialVersionName); - - filesToBeUploaded.forEach(file => { - formData.append(`file:upload:${file.name}`, file.file); - }); + formData.append("isDatasetPublic", dataset.isPublic ? "true" : "false"); return this.http.post(`${AppSettings.getApiEndpoint()}/${DATASET_CREATE_URL}`, formData); } @@ -58,11 +49,25 @@ export class DatasetService { return this.http.get(apiUrl); } - public retrieveDatasetVersionSingleFile(path: string): Observable { - const encodedPath = encodeURIComponent(path); - return this.http.get(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/file?path=${encodedPath}`, { - responseType: "blob", - }); + /** + * Retrieves a single file from a dataset version using a pre-signed URL. + * @param filePath Relative file path within the dataset. + * @returns Observable + */ + public retrieveDatasetVersionSingleFile(filePath: string): Observable { + return this.http + .get<{ + presignedUrl: string; + }>(`${AppSettings.getApiEndpoint()}/dataset/presign?type=download&key=${encodeURIComponent(filePath)}`) + .pipe( + switchMap(({ presignedUrl }) => { + const url = new URL(presignedUrl); + + let repoName = url.hostname.split(".")[0]; + let newUrl = `lakefs/${repoName}${url.pathname}${url.search}`; + return this.http.get(newUrl, { responseType: "blob" }); + }) + ); } /** @@ -88,29 +93,14 @@ export class DatasetService { public retrieveAccessibleDatasets(): Observable { return this.http.get(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}`); } - public createDatasetVersion( - did: number, - newVersion: string, - removedFilePaths: string[], - filesToBeUploaded: FileUploadItem[] - ): Observable { - const formData = new FormData(); - formData.append("versionName", newVersion); - - if (removedFilePaths.length > 0) { - const removedFilesString = JSON.stringify(removedFilePaths); - formData.append("file:remove", removedFilesString); - } - - filesToBeUploaded.forEach(file => { - formData.append(`file:upload:${file.name}`, file.file); - }); - + public createDatasetVersion(did: number, newVersion: string): Observable { return this.http .post<{ datasetVersion: DatasetVersion; fileNodes: DatasetFileNode[]; - }>(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/version/create`, formData) + }>(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/version/create`, newVersion, { + headers: { "Content-Type": "text/plain" }, + }) .pipe( map(response => { response.datasetVersion.fileNodes = response.fileNodes; @@ -124,9 +114,7 @@ export class DatasetService { * @param did Dataset ID */ public getDatasetDiff(did: number): Observable { - return this.http.get( - `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/diff` - ); + return this.http.get(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/diff`); } /** diff --git a/core/gui/yarn.lock b/core/gui/yarn.lock index 050ab8bfee..b2d1f4a74e 100644 --- a/core/gui/yarn.lock +++ b/core/gui/yarn.lock @@ -701,6 +701,643 @@ __metadata: languageName: node linkType: hard +"@aws-crypto/crc32@npm:5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/crc32@npm:5.2.0" + dependencies: + "@aws-crypto/util": "npm:^5.2.0" + "@aws-sdk/types": "npm:^3.222.0" + tslib: "npm:^2.6.2" + checksum: 10c0/eab9581d3363af5ea498ae0e72de792f54d8890360e14a9d8261b7b5c55ebe080279fb2556e07994d785341cdaa99ab0b1ccf137832b53b5904cd6928f2b094b + languageName: node + linkType: hard + +"@aws-crypto/crc32c@npm:5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/crc32c@npm:5.2.0" + dependencies: + "@aws-crypto/util": "npm:^5.2.0" + "@aws-sdk/types": "npm:^3.222.0" + tslib: "npm:^2.6.2" + checksum: 10c0/223efac396cdebaf5645568fa9a38cd0c322c960ae1f4276bedfe2e1031d0112e49d7d39225d386354680ecefae29f39af469a84b2ddfa77cb6692036188af77 + languageName: node + linkType: hard + +"@aws-crypto/sha1-browser@npm:5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/sha1-browser@npm:5.2.0" + dependencies: + "@aws-crypto/supports-web-crypto": "npm:^5.2.0" + "@aws-crypto/util": "npm:^5.2.0" + "@aws-sdk/types": "npm:^3.222.0" + "@aws-sdk/util-locate-window": "npm:^3.0.0" + "@smithy/util-utf8": "npm:^2.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/51fed0bf078c10322d910af179871b7d299dde5b5897873ffbeeb036f427e5d11d23db9794439226544b73901920fd19f4d86bbc103ed73cc0cfdea47a83c6ac + languageName: node + linkType: hard + +"@aws-crypto/sha256-browser@npm:5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/sha256-browser@npm:5.2.0" + dependencies: + "@aws-crypto/sha256-js": "npm:^5.2.0" + "@aws-crypto/supports-web-crypto": "npm:^5.2.0" + "@aws-crypto/util": "npm:^5.2.0" + "@aws-sdk/types": "npm:^3.222.0" + "@aws-sdk/util-locate-window": "npm:^3.0.0" + "@smithy/util-utf8": "npm:^2.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/05f6d256794df800fe9aef5f52f2ac7415f7f3117d461f85a6aecaa4e29e91527b6fd503681a17136fa89e9dd3d916e9c7e4cfb5eba222875cb6c077bdc1d00d + languageName: node + linkType: hard + +"@aws-crypto/sha256-js@npm:5.2.0, @aws-crypto/sha256-js@npm:^5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/sha256-js@npm:5.2.0" + dependencies: + "@aws-crypto/util": "npm:^5.2.0" + "@aws-sdk/types": "npm:^3.222.0" + tslib: "npm:^2.6.2" + checksum: 10c0/6c48701f8336341bb104dfde3d0050c89c288051f6b5e9bdfeb8091cf3ffc86efcd5c9e6ff2a4a134406b019c07aca9db608128f8d9267c952578a3108db9fd1 + languageName: node + linkType: hard + +"@aws-crypto/supports-web-crypto@npm:^5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/supports-web-crypto@npm:5.2.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/4d2118e29d68ca3f5947f1e37ce1fbb3239a0c569cc938cdc8ab8390d595609b5caf51a07c9e0535105b17bf5c52ea256fed705a07e9681118120ab64ee73af2 + languageName: node + linkType: hard + +"@aws-crypto/util@npm:5.2.0, @aws-crypto/util@npm:^5.2.0": + version: 5.2.0 + resolution: "@aws-crypto/util@npm:5.2.0" + dependencies: + "@aws-sdk/types": "npm:^3.222.0" + "@smithy/util-utf8": "npm:^2.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/0362d4c197b1fd64b423966945130207d1fe23e1bb2878a18e361f7743c8d339dad3f8729895a29aa34fff6a86c65f281cf5167c4bf253f21627ae80b6dd2951 + languageName: node + linkType: hard + +"@aws-sdk/client-s3@npm:^3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/client-s3@npm:3.750.0" + dependencies: + "@aws-crypto/sha1-browser": "npm:5.2.0" + "@aws-crypto/sha256-browser": "npm:5.2.0" + "@aws-crypto/sha256-js": "npm:5.2.0" + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/credential-provider-node": "npm:3.750.0" + "@aws-sdk/middleware-bucket-endpoint": "npm:3.734.0" + "@aws-sdk/middleware-expect-continue": "npm:3.734.0" + "@aws-sdk/middleware-flexible-checksums": "npm:3.750.0" + "@aws-sdk/middleware-host-header": "npm:3.734.0" + "@aws-sdk/middleware-location-constraint": "npm:3.734.0" + "@aws-sdk/middleware-logger": "npm:3.734.0" + "@aws-sdk/middleware-recursion-detection": "npm:3.734.0" + "@aws-sdk/middleware-sdk-s3": "npm:3.750.0" + "@aws-sdk/middleware-ssec": "npm:3.734.0" + "@aws-sdk/middleware-user-agent": "npm:3.750.0" + "@aws-sdk/region-config-resolver": "npm:3.734.0" + "@aws-sdk/signature-v4-multi-region": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@aws-sdk/util-endpoints": "npm:3.743.0" + "@aws-sdk/util-user-agent-browser": "npm:3.734.0" + "@aws-sdk/util-user-agent-node": "npm:3.750.0" + "@aws-sdk/xml-builder": "npm:3.734.0" + "@smithy/config-resolver": "npm:^4.0.1" + "@smithy/core": "npm:^3.1.4" + "@smithy/eventstream-serde-browser": "npm:^4.0.1" + "@smithy/eventstream-serde-config-resolver": "npm:^4.0.1" + "@smithy/eventstream-serde-node": "npm:^4.0.1" + "@smithy/fetch-http-handler": "npm:^5.0.1" + "@smithy/hash-blob-browser": "npm:^4.0.1" + "@smithy/hash-node": "npm:^4.0.1" + "@smithy/hash-stream-node": "npm:^4.0.1" + "@smithy/invalid-dependency": "npm:^4.0.1" + "@smithy/md5-js": "npm:^4.0.1" + "@smithy/middleware-content-length": "npm:^4.0.1" + "@smithy/middleware-endpoint": "npm:^4.0.5" + "@smithy/middleware-retry": "npm:^4.0.6" + "@smithy/middleware-serde": "npm:^4.0.2" + "@smithy/middleware-stack": "npm:^4.0.1" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/node-http-handler": "npm:^4.0.2" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/url-parser": "npm:^4.0.1" + "@smithy/util-base64": "npm:^4.0.0" + "@smithy/util-body-length-browser": "npm:^4.0.0" + "@smithy/util-body-length-node": "npm:^4.0.0" + "@smithy/util-defaults-mode-browser": "npm:^4.0.6" + "@smithy/util-defaults-mode-node": "npm:^4.0.6" + "@smithy/util-endpoints": "npm:^3.0.1" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-retry": "npm:^4.0.1" + "@smithy/util-stream": "npm:^4.1.1" + "@smithy/util-utf8": "npm:^4.0.0" + "@smithy/util-waiter": "npm:^4.0.2" + tslib: "npm:^2.6.2" + checksum: 10c0/502099eb11b014a5a13ad3b363fa33aa4706c4b9717b64b76fdf30cc8b0b907ce6dc4fbfc751deddfc978c88a46545e2ba310da2dec6c138cf59f7cfe2ed70f4 + languageName: node + linkType: hard + +"@aws-sdk/client-sso@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/client-sso@npm:3.750.0" + dependencies: + "@aws-crypto/sha256-browser": "npm:5.2.0" + "@aws-crypto/sha256-js": "npm:5.2.0" + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/middleware-host-header": "npm:3.734.0" + "@aws-sdk/middleware-logger": "npm:3.734.0" + "@aws-sdk/middleware-recursion-detection": "npm:3.734.0" + "@aws-sdk/middleware-user-agent": "npm:3.750.0" + "@aws-sdk/region-config-resolver": "npm:3.734.0" + "@aws-sdk/types": "npm:3.734.0" + "@aws-sdk/util-endpoints": "npm:3.743.0" + "@aws-sdk/util-user-agent-browser": "npm:3.734.0" + "@aws-sdk/util-user-agent-node": "npm:3.750.0" + "@smithy/config-resolver": "npm:^4.0.1" + "@smithy/core": "npm:^3.1.4" + "@smithy/fetch-http-handler": "npm:^5.0.1" + "@smithy/hash-node": "npm:^4.0.1" + "@smithy/invalid-dependency": "npm:^4.0.1" + "@smithy/middleware-content-length": "npm:^4.0.1" + "@smithy/middleware-endpoint": "npm:^4.0.5" + "@smithy/middleware-retry": "npm:^4.0.6" + "@smithy/middleware-serde": "npm:^4.0.2" + "@smithy/middleware-stack": "npm:^4.0.1" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/node-http-handler": "npm:^4.0.2" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/url-parser": "npm:^4.0.1" + "@smithy/util-base64": "npm:^4.0.0" + "@smithy/util-body-length-browser": "npm:^4.0.0" + "@smithy/util-body-length-node": "npm:^4.0.0" + "@smithy/util-defaults-mode-browser": "npm:^4.0.6" + "@smithy/util-defaults-mode-node": "npm:^4.0.6" + "@smithy/util-endpoints": "npm:^3.0.1" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-retry": "npm:^4.0.1" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/a7f2688697dfa9bad799cbd984295e9b685431ec8da13bdf12b8bf5e6c218e3caae231eba147f216d6cf6607c15ded0895535740131986a44e0ca121a095942e + languageName: node + linkType: hard + +"@aws-sdk/core@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/core@npm:3.750.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/core": "npm:^3.1.4" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/signature-v4": "npm:^5.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-middleware": "npm:^4.0.1" + fast-xml-parser: "npm:4.4.1" + tslib: "npm:^2.6.2" + checksum: 10c0/45e45a8ea152a10972aa6f54bfda8da9ca70edcf11b793b900b216a3244b149f6bf79a9ee1aaca8d4244511229045e883a6d469fd6e425e58a874bfd5660bee3 + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-env@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-env@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/19d4c486ef8e3acc7b249d9e617390edfcdf42b5f75ab10ac6d2491681aa9c3e835dea99c41d51a09433011843f20c84a724fbee8ef8fd01f4313c2689b8383a + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-http@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-http@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/fetch-http-handler": "npm:^5.0.1" + "@smithy/node-http-handler": "npm:^4.0.2" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-stream": "npm:^4.1.1" + tslib: "npm:^2.6.2" + checksum: 10c0/3324dbd96f6daebf71fd422819bdd35778e49bc697ed1b638b4572da89c45946029135603d1be855f01e01961627eee3361c5be3e20994467413dc3ae7fa45a0 + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-ini@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-ini@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/credential-provider-env": "npm:3.750.0" + "@aws-sdk/credential-provider-http": "npm:3.750.0" + "@aws-sdk/credential-provider-process": "npm:3.750.0" + "@aws-sdk/credential-provider-sso": "npm:3.750.0" + "@aws-sdk/credential-provider-web-identity": "npm:3.750.0" + "@aws-sdk/nested-clients": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/credential-provider-imds": "npm:^4.0.1" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/f25c297e2717bdf09ae167051be8080079810d4634d2125fb3118f9bcf2d65d41e7f765fdbe857e1f1298833dc0433c18c545146ea9ef2192c25a35842bce881 + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-node@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-node@npm:3.750.0" + dependencies: + "@aws-sdk/credential-provider-env": "npm:3.750.0" + "@aws-sdk/credential-provider-http": "npm:3.750.0" + "@aws-sdk/credential-provider-ini": "npm:3.750.0" + "@aws-sdk/credential-provider-process": "npm:3.750.0" + "@aws-sdk/credential-provider-sso": "npm:3.750.0" + "@aws-sdk/credential-provider-web-identity": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/credential-provider-imds": "npm:^4.0.1" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/3badb574b7c00d764795672451bb19f3f83c7f2e37ef9c6742063973507c8c445607894e90ba8a84ab7cae07528e275635e479869ae6c14d65f68a234121b119 + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-process@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-process@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/f6bf9e83daaa685afe3bd413a12ef02424467e852526d51bfe210672cc353068394462e9af61eda2403fef6fb1c40e790b98b231ff130e2bf56d7b4621a725e5 + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-sso@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-sso@npm:3.750.0" + dependencies: + "@aws-sdk/client-sso": "npm:3.750.0" + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/token-providers": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/32eb0a38c7f7c69a69fddf5411b3d4442226b54bef472316561685ab70438247f2a053255b2e2e3e79862c79b5614293e97fcc30e7e2fa3bf42e841de9f18974 + languageName: node + linkType: hard + +"@aws-sdk/credential-provider-web-identity@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/credential-provider-web-identity@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/nested-clients": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/7a81561002c5b7a819c279bd2b881271b3bd0f641cef34a57e928eb7c0313d7302343e5a7c36ab452146ce2a03fe3c151d1b781553fab93d27c8478116d87ba2 + languageName: node + linkType: hard + +"@aws-sdk/middleware-bucket-endpoint@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-bucket-endpoint@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@aws-sdk/util-arn-parser": "npm:3.723.0" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-config-provider": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/f0f98bb478ff469ec3aab0ae5b8122cafc26e4d88efbb1d277429dfd21c70a64eaf996d5cbb7360ff93dcc0e985d75bca5bfcb6a814b1d18ab14c5b912c7c5ad + languageName: node + linkType: hard + +"@aws-sdk/middleware-expect-continue@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-expect-continue@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/5e6fa03e4b4ef8ff52314a5aea6b7c807e39516ad7c817003c8ef22c4d25de98dc469bab30d6f11a56cba7a968bcdf032373c8c1d074a16ff72ac2cd08f1a5e9 + languageName: node + linkType: hard + +"@aws-sdk/middleware-flexible-checksums@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/middleware-flexible-checksums@npm:3.750.0" + dependencies: + "@aws-crypto/crc32": "npm:5.2.0" + "@aws-crypto/crc32c": "npm:5.2.0" + "@aws-crypto/util": "npm:5.2.0" + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/is-array-buffer": "npm:^4.0.0" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-stream": "npm:^4.1.1" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/d1c176d54978d3de1bb71531270e546ead441741547cc2f1aef97445d7af29aefee754df6ee9e85b06ca3528cfce22142c5c9c94f1f6e2bf12bb7c858462a73e + languageName: node + linkType: hard + +"@aws-sdk/middleware-host-header@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-host-header@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/56e8501c3beda2961ebba56f1146849594edafa0d33ce2bdb04b62df9732d1218ffe89882333d87d76079798dc575af1756db4d7270916d8d83f8d9ef7c4798e + languageName: node + linkType: hard + +"@aws-sdk/middleware-location-constraint@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-location-constraint@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/ec6a10d2545dfbda2806e8dd2244a6be76c97d5fdae2068c461cb61753801ce60079518ad45f3eb559a37042f057636da754cccec751d04d0b94b534d423424e + languageName: node + linkType: hard + +"@aws-sdk/middleware-logger@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-logger@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/dc690e546d0411929ff5888cd2dad56b7583f160ce4339f24d4963b9d11022f06da76d5f96c56d2ff2624493885254200788c763f113c26695875b8a229ee9a1 + languageName: node + linkType: hard + +"@aws-sdk/middleware-recursion-detection@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-recursion-detection@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/e46e5f99895a4370141b3439c58b94670fddd01d18bbda43a621cb0a5f2bb3384db66757f16da49815af52d29f2cfb8c5d12e273853ad34c919f4f71d078572f + languageName: node + linkType: hard + +"@aws-sdk/middleware-sdk-s3@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/middleware-sdk-s3@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@aws-sdk/util-arn-parser": "npm:3.723.0" + "@smithy/core": "npm:^3.1.4" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/signature-v4": "npm:^5.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-config-provider": "npm:^4.0.0" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-stream": "npm:^4.1.1" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/f7e5e08c4ae895577f767060a7bc5cd7d9c24f105b66c44e906015932fcd4071c2e6c76e9e9df3790b8d4e72746a0f9dc628e8b7477fdafb81c8de8ccce1a24b + languageName: node + linkType: hard + +"@aws-sdk/middleware-ssec@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/middleware-ssec@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/ba1d0f202ef0e58d82895bbe71dcb4520f0eaf958ebc37baa3383e42729091fca2f927ec3482478b0ece35ae001c72da9afb71c83504e0aba6df4074a6a2187a + languageName: node + linkType: hard + +"@aws-sdk/middleware-user-agent@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/middleware-user-agent@npm:3.750.0" + dependencies: + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@aws-sdk/util-endpoints": "npm:3.743.0" + "@smithy/core": "npm:^3.1.4" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/24e5636b40370b778631b4af6381082318ad3de64b5566805215b0242e4f58b089ab2cb2c8c915b12b007ac8a7477a37db71c5d0fbd40b1452fccd68e17f984c + languageName: node + linkType: hard + +"@aws-sdk/nested-clients@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/nested-clients@npm:3.750.0" + dependencies: + "@aws-crypto/sha256-browser": "npm:5.2.0" + "@aws-crypto/sha256-js": "npm:5.2.0" + "@aws-sdk/core": "npm:3.750.0" + "@aws-sdk/middleware-host-header": "npm:3.734.0" + "@aws-sdk/middleware-logger": "npm:3.734.0" + "@aws-sdk/middleware-recursion-detection": "npm:3.734.0" + "@aws-sdk/middleware-user-agent": "npm:3.750.0" + "@aws-sdk/region-config-resolver": "npm:3.734.0" + "@aws-sdk/types": "npm:3.734.0" + "@aws-sdk/util-endpoints": "npm:3.743.0" + "@aws-sdk/util-user-agent-browser": "npm:3.734.0" + "@aws-sdk/util-user-agent-node": "npm:3.750.0" + "@smithy/config-resolver": "npm:^4.0.1" + "@smithy/core": "npm:^3.1.4" + "@smithy/fetch-http-handler": "npm:^5.0.1" + "@smithy/hash-node": "npm:^4.0.1" + "@smithy/invalid-dependency": "npm:^4.0.1" + "@smithy/middleware-content-length": "npm:^4.0.1" + "@smithy/middleware-endpoint": "npm:^4.0.5" + "@smithy/middleware-retry": "npm:^4.0.6" + "@smithy/middleware-serde": "npm:^4.0.2" + "@smithy/middleware-stack": "npm:^4.0.1" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/node-http-handler": "npm:^4.0.2" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/url-parser": "npm:^4.0.1" + "@smithy/util-base64": "npm:^4.0.0" + "@smithy/util-body-length-browser": "npm:^4.0.0" + "@smithy/util-body-length-node": "npm:^4.0.0" + "@smithy/util-defaults-mode-browser": "npm:^4.0.6" + "@smithy/util-defaults-mode-node": "npm:^4.0.6" + "@smithy/util-endpoints": "npm:^3.0.1" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-retry": "npm:^4.0.1" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/6bb067637b529b7db3e7ad0fd00baa36261b7436fd0ecda645250b2bcb40b4d00c62989a5fe766e190b35cf829dc8cb8b91a56ecc00f3078da3bb6aeadd8bf66 + languageName: node + linkType: hard + +"@aws-sdk/region-config-resolver@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/region-config-resolver@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-config-provider": "npm:^4.0.0" + "@smithy/util-middleware": "npm:^4.0.1" + tslib: "npm:^2.6.2" + checksum: 10c0/c1e026dcbe9d7529ec5efee979a868d0c868287d68e7e219bd730d887ab1ccf17ef48516477e57325fef55543217496bcfe7ba6d17d9ecad98cf8cf18d5ced63 + languageName: node + linkType: hard + +"@aws-sdk/signature-v4-multi-region@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/signature-v4-multi-region@npm:3.750.0" + dependencies: + "@aws-sdk/middleware-sdk-s3": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/signature-v4": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/b51c9bc6dda0b2ae2f5d75897be67f1408d27def508206b9c62cddd68e2ec7911e91a174b853dbfae7df8b294c01583ab0b936b9ce4acd00ff2e87b538268000 + languageName: node + linkType: hard + +"@aws-sdk/token-providers@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/token-providers@npm:3.750.0" + dependencies: + "@aws-sdk/nested-clients": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/1486ad60eef09bce9d9c118048c27969bdfee25721524a65a1c66e3461a1413e6ca1dedbf51976d8b39168c5045039d9e5a0d841b44aa29293858c07037a1c80 + languageName: node + linkType: hard + +"@aws-sdk/types@npm:3.734.0, @aws-sdk/types@npm:^3.222.0": + version: 3.734.0 + resolution: "@aws-sdk/types@npm:3.734.0" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/74313849619b8bce9e6a52c70fcdaa212574a443503c78bccdba77cdc7bc66b8cecefe461852e0bab7376cc2ec3e1891730b1a027be63efb47394115c8ddb856 + languageName: node + linkType: hard + +"@aws-sdk/util-arn-parser@npm:3.723.0": + version: 3.723.0 + resolution: "@aws-sdk/util-arn-parser@npm:3.723.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/5d2adfded61acaf222ed21bf8e5a8b067fe469dfaab03a6b69c591a090c48d309b1f3c4fd64826f71ef9883390adb77a9bf884667b242615f221236bc5a8b326 + languageName: node + linkType: hard + +"@aws-sdk/util-endpoints@npm:3.743.0": + version: 3.743.0 + resolution: "@aws-sdk/util-endpoints@npm:3.743.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-endpoints": "npm:^3.0.1" + tslib: "npm:^2.6.2" + checksum: 10c0/9adba3aa9a5a3cadb7f89c7b3424034c5efb7c10c55114ab02e3d069b4112a05a1e8578ff6ed937412f5d5d1a9cdeeac03b80e5b5d47eaf8fb167d031915e424 + languageName: node + linkType: hard + +"@aws-sdk/util-locate-window@npm:^3.0.0": + version: 3.723.0 + resolution: "@aws-sdk/util-locate-window@npm:3.723.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/c9c75d3ee06bd1d1edad78bea8324f2d4ad6086803f27731e1f3c25e946bb630c8db2991a5337e4dbeee06507deab9abea80b134ba4e3fbb27471d438a030639 + languageName: node + linkType: hard + +"@aws-sdk/util-user-agent-browser@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/util-user-agent-browser@npm:3.734.0" + dependencies: + "@aws-sdk/types": "npm:3.734.0" + "@smithy/types": "npm:^4.1.0" + bowser: "npm:^2.11.0" + tslib: "npm:^2.6.2" + checksum: 10c0/7fc8c5e29f3219f8abf1d0cff73dd6bb34f32a235473843e50f61375b1c05f4c49269cd956c9e4623c87c025e1eeef9fc699ae3389665459721bc11e00c25ead + languageName: node + linkType: hard + +"@aws-sdk/util-user-agent-node@npm:3.750.0": + version: 3.750.0 + resolution: "@aws-sdk/util-user-agent-node@npm:3.750.0" + dependencies: + "@aws-sdk/middleware-user-agent": "npm:3.750.0" + "@aws-sdk/types": "npm:3.734.0" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + peerDependencies: + aws-crt: ">=1.0.0" + peerDependenciesMeta: + aws-crt: + optional: true + checksum: 10c0/0f903a4830a2d88e962644eb3a11a7d672898224579a3812172cbdabb881338bff08d904801cb9480c006342f7f605cb764c413e5cb09d4ccf5e40b82734b554 + languageName: node + linkType: hard + +"@aws-sdk/xml-builder@npm:3.734.0": + version: 3.734.0 + resolution: "@aws-sdk/xml-builder@npm:3.734.0" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/77eb3d603d45a235982a86e5adbc2de727389924cbbd8edb9b13f1a201b15304c57aebb18e00cce909920b3519d0ca71406989b01b6544c87c7b3c4f04d66887 + languageName: node + linkType: hard + "@babel/code-frame@npm:^7.0.0, @babel/code-frame@npm:^7.16.7, @babel/code-frame@npm:^7.22.13, @babel/code-frame@npm:^7.22.5, @babel/code-frame@npm:^7.25.7": version: 7.25.7 resolution: "@babel/code-frame@npm:7.25.7" @@ -4852,6 +5489,602 @@ __metadata: languageName: node linkType: hard +"@smithy/abort-controller@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/abort-controller@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/1ecd5c3454ced008463e6de826c294f31f6073ba91e22e443e0269ee0854d9376f73ea756b3acf77aa806a9a98e8b2568ce2e7f15ddf0a7816c99b7deefeef57 + languageName: node + linkType: hard + +"@smithy/chunked-blob-reader-native@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/chunked-blob-reader-native@npm:4.0.0" + dependencies: + "@smithy/util-base64": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/4387f4e8841f20c1c4e689078141de7e6f239e7883be3a02810a023aa30939b15576ee00227b991972d2c5a2f3b6152bcaeca0975c9fa8d3669354c647bd532a + languageName: node + linkType: hard + +"@smithy/chunked-blob-reader@npm:^5.0.0": + version: 5.0.0 + resolution: "@smithy/chunked-blob-reader@npm:5.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/55ba0fe366ddaa3f93e1faf8a70df0b67efedbd0008922295efe215df09b68df0ba3043293e65b17e7d1be71448d074c2bfc54e5eb6bd18f59b425822c2b9e9a + languageName: node + linkType: hard + +"@smithy/config-resolver@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/config-resolver@npm:4.0.1" + dependencies: + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-config-provider": "npm:^4.0.0" + "@smithy/util-middleware": "npm:^4.0.1" + tslib: "npm:^2.6.2" + checksum: 10c0/4ec3486deb3017607ed1b9a42b4b806b78e2c7a00f6dd51b98ccb82d9f7506b206bd9412ec0d2a05e95bc2ac3fbbafe55b1ffce9faccc4086f837645f3f7e64d + languageName: node + linkType: hard + +"@smithy/core@npm:^3.1.4": + version: 3.1.4 + resolution: "@smithy/core@npm:3.1.4" + dependencies: + "@smithy/middleware-serde": "npm:^4.0.2" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-body-length-browser": "npm:^4.0.0" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-stream": "npm:^4.1.1" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/8c91573fe679eecc160440b66895bb22e1549a320c86066d01ec63aa9bf756e16bb0135e0d48b039b1ccd0f8f6b580d20242d784236b6c5ca566e1cb6bf0901a + languageName: node + linkType: hard + +"@smithy/credential-provider-imds@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/credential-provider-imds@npm:4.0.1" + dependencies: + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/url-parser": "npm:^4.0.1" + tslib: "npm:^2.6.2" + checksum: 10c0/76b5d82dfd2924f2b7a701fa159af54d3e9b16a644a210e3a74e5a3776bb28c2ffbdd342ed3f2bb1d2adf401e8144e84614523b1fad245b43e319e1d01fa1652 + languageName: node + linkType: hard + +"@smithy/eventstream-codec@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/eventstream-codec@npm:4.0.1" + dependencies: + "@aws-crypto/crc32": "npm:5.2.0" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-hex-encoding": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/439262fddae863cadad83cc468418294d1d998134619dd67e2836cc93bbfa5b01448e852516046f03b62d0edcd558014b755b1fb0d71b9317268d5c3a5e55bbd + languageName: node + linkType: hard + +"@smithy/eventstream-serde-browser@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/eventstream-serde-browser@npm:4.0.1" + dependencies: + "@smithy/eventstream-serde-universal": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/4766a8a735085dea1ed9aad486fa70cb04908a31843d4e698a28accc373a6dc80bc8abe9834d390f347326458c03424afbd7f7f9e59a66970b839de3d44940e1 + languageName: node + linkType: hard + +"@smithy/eventstream-serde-config-resolver@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/eventstream-serde-config-resolver@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/4ba8bba39392025389c610ce984b612adfe0ed2b37f926e6ce2acafaf178d04aec395924ff37d2ad9534a28652fc64c4938b66b4bd1d2ff695ac8fcdcc4d356e + languageName: node + linkType: hard + +"@smithy/eventstream-serde-node@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/eventstream-serde-node@npm:4.0.1" + dependencies: + "@smithy/eventstream-serde-universal": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/ed451ed4483ca62cb450a7540e43ba99b816e32da7bd306d14ea49dd3ceb8a37f791578a0e5d21caf9b9f75c36c69e025c7add117cf8b0510ad3ef32ac38b08c + languageName: node + linkType: hard + +"@smithy/eventstream-serde-universal@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/eventstream-serde-universal@npm:4.0.1" + dependencies: + "@smithy/eventstream-codec": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/8a1261fca8df7559bf78234f961903281b8602ffdbe0ff25f506cba25f013e4bb93bd8380703224fe63aeaf66e13bfebbdaf8083f38628750fc5f3c4ee07dff8 + languageName: node + linkType: hard + +"@smithy/fetch-http-handler@npm:^5.0.1": + version: 5.0.1 + resolution: "@smithy/fetch-http-handler@npm:5.0.1" + dependencies: + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/querystring-builder": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-base64": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/5123f6119de50d4c992ebf29b769382d7000db4ed8f564680c5727e2a8beb71664198eb2eaf7cb6152ab777f654d54cf9bff5a4658e1cfdeef2987eeea7f1149 + languageName: node + linkType: hard + +"@smithy/hash-blob-browser@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/hash-blob-browser@npm:4.0.1" + dependencies: + "@smithy/chunked-blob-reader": "npm:^5.0.0" + "@smithy/chunked-blob-reader-native": "npm:^4.0.0" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/16c61fe0ff52074aa374a439955f0ea0a6c6fb64744b55c840f29db1da05cefb340a6d1d4b2a7708ca6f447e972015a95bdfef4fc5361d0bc7c2c3b5cd4c1ca8 + languageName: node + linkType: hard + +"@smithy/hash-node@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/hash-node@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + "@smithy/util-buffer-from": "npm:^4.0.0" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/d84be63a2c8a4aafa3b9f23ae76c9cf92a31fa7c49c85930424da1335259b29f6333c5c82d2e7bf689549290ffd0d995043c9ea6f05b0b2a8dfad1f649eac43f + languageName: node + linkType: hard + +"@smithy/hash-stream-node@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/hash-stream-node@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/c214460da504008905dff7c654cc8b49dfcb060fedef77e63fc36e3c71972be39b018e4a5618e3efb654a6b63a604975521c161ae4614d2580a4c821dfb6e1d5 + languageName: node + linkType: hard + +"@smithy/invalid-dependency@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/invalid-dependency@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/74bebdffb6845f6060eed482ad6e921df66af90d2f8c63f39a3bb334fa68a3e3aa8bd5cd7aa5f65628857e235e113895433895db910ba290633daa0df5725eb7 + languageName: node + linkType: hard + +"@smithy/is-array-buffer@npm:^2.2.0": + version: 2.2.0 + resolution: "@smithy/is-array-buffer@npm:2.2.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/2f2523cd8cc4538131e408eb31664983fecb0c8724956788b015aaf3ab85a0c976b50f4f09b176f1ed7bbe79f3edf80743be7a80a11f22cd9ce1285d77161aaf + languageName: node + linkType: hard + +"@smithy/is-array-buffer@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/is-array-buffer@npm:4.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/ae393fbd5944d710443cd5dd225d1178ef7fb5d6259c14f3e1316ec75e401bda6cf86f7eb98bfd38e5ed76e664b810426a5756b916702cbd418f0933e15e7a3b + languageName: node + linkType: hard + +"@smithy/md5-js@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/md5-js@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/b5e3fa1d31832535b3a35d0a52ebf983da7cf1a1658b6a7f8bcc948cde808eb361696575d78e5e5df92f3c9b9569b5a1f2d1dff7b465d0a803fa901e0286599d + languageName: node + linkType: hard + +"@smithy/middleware-content-length@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/middleware-content-length@npm:4.0.1" + dependencies: + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/3dfbfe658cc8636e9e923a10151a32c6234897c4a86856e55fe4fadc322b3f3e977e50d15553afcb34cadb213de2d95a82af9c8f735e758f4dc21a031e8ecb17 + languageName: node + linkType: hard + +"@smithy/middleware-endpoint@npm:^4.0.5": + version: 4.0.5 + resolution: "@smithy/middleware-endpoint@npm:4.0.5" + dependencies: + "@smithy/core": "npm:^3.1.4" + "@smithy/middleware-serde": "npm:^4.0.2" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/url-parser": "npm:^4.0.1" + "@smithy/util-middleware": "npm:^4.0.1" + tslib: "npm:^2.6.2" + checksum: 10c0/4573b7fb9525c3b887050183dc0c31bb6fd2801c98a8e94984474634e940a5efd73bbfc49c50d90245089112519bfcdbd8b5c2f279b2f4e64bd8df2203d5221c + languageName: node + linkType: hard + +"@smithy/middleware-retry@npm:^4.0.6": + version: 4.0.6 + resolution: "@smithy/middleware-retry@npm:4.0.6" + dependencies: + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/service-error-classification": "npm:^4.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-retry": "npm:^4.0.1" + tslib: "npm:^2.6.2" + uuid: "npm:^9.0.1" + checksum: 10c0/395888b3ae39b4bfa91b145f77f72a31de63a5e1fe7bbefb6a8ce0596b6843f92cf640421cf3e802746e6432946035d61e5e665d0dc1bdc9c70ce318b6347c45 + languageName: node + linkType: hard + +"@smithy/middleware-serde@npm:^4.0.2": + version: 4.0.2 + resolution: "@smithy/middleware-serde@npm:4.0.2" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/b1efee86ecc37a063bdfdb89cf691c9b9627502473f2caa0c964c0648f7b550b7a49755a9b13cdfc11aebf1641cf3ae6f8b5f1895a20241960504936da9b3138 + languageName: node + linkType: hard + +"@smithy/middleware-stack@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/middleware-stack@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/b7f710e263e37a8c80c8d31c7d8fe5f66dec2955cde412054eefcc8df53905e1e2e53a01fd7930eb82c82a3a28eadd00e69f07dfc6e793b1d9272db58a982e9b + languageName: node + linkType: hard + +"@smithy/node-config-provider@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/node-config-provider@npm:4.0.1" + dependencies: + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/shared-ini-file-loader": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/f8d3b1fe91eeba41426ec57d62cfbeaed027650b5549fb2ba5bc889c1cfb7880d4fdb5a484d231b3fb2a9c9023c1f4e8907a5d18d75b3787481cde9f87c4d9cb + languageName: node + linkType: hard + +"@smithy/node-http-handler@npm:^4.0.2": + version: 4.0.2 + resolution: "@smithy/node-http-handler@npm:4.0.2" + dependencies: + "@smithy/abort-controller": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/querystring-builder": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/6a3446dcf3bf006cf55b065edfbe7636f2aa13073f2937e224890902de44b191a5214dce4cb61e98b1ad53889bdbb35386e8810a338bc75ea3743f8d4550a2ad + languageName: node + linkType: hard + +"@smithy/property-provider@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/property-provider@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/43960a6bdf25944e1cc9d4ee83bf45ab5641f7e2068c46d5015166c0f035b1752e03847d7c15d3c013f5f0467441c9c5a8d6a0428f5401988035867709e4dea3 + languageName: node + linkType: hard + +"@smithy/protocol-http@npm:^5.0.1": + version: 5.0.1 + resolution: "@smithy/protocol-http@npm:5.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/87b157cc86c23f7199acad237e5e0cc309b18a2a4162dfd8f99609f6cca403f832b645535e58173e2933b4d96ec71f2df16d04e1bdcf52b7b0fcbdbc0067de93 + languageName: node + linkType: hard + +"@smithy/querystring-builder@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/querystring-builder@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + "@smithy/util-uri-escape": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/21f39e3a79458d343f3dec76b38598c49a34a3c4d1d3c23b6c8895eae2b610fb3c704f995a1730599ef7a881216ea064a25bb7dc8abe5bb1ee50dc6078ad97a4 + languageName: node + linkType: hard + +"@smithy/querystring-parser@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/querystring-parser@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/10e5aba13fbb9a602299fb92f02142e291ab5c7cd221e0ca542981414533e081abdd7442de335f2267ee4a9ff8eba4d7ba848455df50d2771f0ddb8b7d8f9d8b + languageName: node + linkType: hard + +"@smithy/service-error-classification@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/service-error-classification@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + checksum: 10c0/de015fd140bf4e97da34a2283ce73971eb3b3aae53a257000dce0c99b8974a5e76bae9e517545ef58bd00ca8094c813cd1bcf0696c2c51e731418e2a769c744f + languageName: node + linkType: hard + +"@smithy/shared-ini-file-loader@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/shared-ini-file-loader@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/0f0173dbe61c8dac6847cc2c5115db5f1292c956c7f0559ce7bc8e5ed196a4b102977445ee1adb72206a15226a1098cdea01e92aa8ce19f4343f1135e7d37bcf + languageName: node + linkType: hard + +"@smithy/signature-v4@npm:^5.0.1": + version: 5.0.1 + resolution: "@smithy/signature-v4@npm:5.0.1" + dependencies: + "@smithy/is-array-buffer": "npm:^4.0.0" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-hex-encoding": "npm:^4.0.0" + "@smithy/util-middleware": "npm:^4.0.1" + "@smithy/util-uri-escape": "npm:^4.0.0" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/a7f118642c9641f813098faad355fc5b54ae215fec589fb238d72d44149248c02e32dcfe034000f151ab665450542df88c70d269f9a3233e01a905ec03512514 + languageName: node + linkType: hard + +"@smithy/smithy-client@npm:^4.1.5": + version: 4.1.5 + resolution: "@smithy/smithy-client@npm:4.1.5" + dependencies: + "@smithy/core": "npm:^3.1.4" + "@smithy/middleware-endpoint": "npm:^4.0.5" + "@smithy/middleware-stack": "npm:^4.0.1" + "@smithy/protocol-http": "npm:^5.0.1" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-stream": "npm:^4.1.1" + tslib: "npm:^2.6.2" + checksum: 10c0/7dbb54f2cff8d502ac93b03181e78ca051f1f6028df0643805f3aceefb4bbe492e4a7e4496933a8bfc146eb65879554bf9a17d083351ff2e9302d0494b67fa28 + languageName: node + linkType: hard + +"@smithy/types@npm:^4.1.0": + version: 4.1.0 + resolution: "@smithy/types@npm:4.1.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/d8817145ea043c5b29783df747ed47c3a1c584fd9d02bbdb609d38b7cb4dded1197ac214ae112744c86abe0537a314dae0edbc0e752bb639ef2d9fb84c67a9d9 + languageName: node + linkType: hard + +"@smithy/url-parser@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/url-parser@npm:4.0.1" + dependencies: + "@smithy/querystring-parser": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/fc969b55857b3bcdc920f54bbb9b0c88b5c7695ac7100bea1c7038fd4c9a09ebe0fbb38c4839d39acea28da0d8cb4fea71ffbf362d8aec295acbb94c1b45fc86 + languageName: node + linkType: hard + +"@smithy/util-base64@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-base64@npm:4.0.0" + dependencies: + "@smithy/util-buffer-from": "npm:^4.0.0" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/ad18ec66cc357c189eef358d96876b114faf7086b13e47e009b265d0ff80cec046052500489c183957b3a036768409acdd1a373e01074cc002ca6983f780cffc + languageName: node + linkType: hard + +"@smithy/util-body-length-browser@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-body-length-browser@npm:4.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/574a10934024a86556e9dcde1a9776170284326c3dfcc034afa128cc5a33c1c8179fca9cfb622ef8be5f2004316cc3f427badccceb943e829105536ec26306d9 + languageName: node + linkType: hard + +"@smithy/util-body-length-node@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-body-length-node@npm:4.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/e91fd3816767606c5f786166ada26440457fceb60f96653b3d624dcf762a8c650e513c275ff3f647cb081c63c283cc178853a7ed9aa224abc8ece4eeeef7a1dd + languageName: node + linkType: hard + +"@smithy/util-buffer-from@npm:^2.2.0": + version: 2.2.0 + resolution: "@smithy/util-buffer-from@npm:2.2.0" + dependencies: + "@smithy/is-array-buffer": "npm:^2.2.0" + tslib: "npm:^2.6.2" + checksum: 10c0/223d6a508b52ff236eea01cddc062b7652d859dd01d457a4e50365af3de1e24a05f756e19433f6ccf1538544076b4215469e21a4ea83dc1d58d829725b0dbc5a + languageName: node + linkType: hard + +"@smithy/util-buffer-from@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-buffer-from@npm:4.0.0" + dependencies: + "@smithy/is-array-buffer": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/be7cd33b6cb91503982b297716251e67cdca02819a15797632091cadab2dc0b4a147fff0709a0aa9bbc0b82a2644a7ed7c8afdd2194d5093cee2e9605b3a9f6f + languageName: node + linkType: hard + +"@smithy/util-config-provider@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-config-provider@npm:4.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/cd9498d5f77a73aadd575084bcb22d2bb5945bac4605d605d36f2efe3f165f2b60f4dc88b7a62c2ed082ffa4b2c2f19621d0859f18399edbc2b5988d92e4649f + languageName: node + linkType: hard + +"@smithy/util-defaults-mode-browser@npm:^4.0.6": + version: 4.0.6 + resolution: "@smithy/util-defaults-mode-browser@npm:4.0.6" + dependencies: + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + bowser: "npm:^2.11.0" + tslib: "npm:^2.6.2" + checksum: 10c0/4c1d406f7bde7455649ef70d1f09955e614da8a000ffeceac111aad0ee3daeb126206e88ae169f359da3aace382e2800bc20475438343ff87970682a3fdc6aa2 + languageName: node + linkType: hard + +"@smithy/util-defaults-mode-node@npm:^4.0.6": + version: 4.0.6 + resolution: "@smithy/util-defaults-mode-node@npm:4.0.6" + dependencies: + "@smithy/config-resolver": "npm:^4.0.1" + "@smithy/credential-provider-imds": "npm:^4.0.1" + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/property-provider": "npm:^4.0.1" + "@smithy/smithy-client": "npm:^4.1.5" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/30209b45ed2f45d8152e4be2bffb1fe6b9a99fb350659170adcef464bd7f926c33651555d0592f1fbe1280432e90d0862061dd486af438afd9b356db20b0986e + languageName: node + linkType: hard + +"@smithy/util-endpoints@npm:^3.0.1": + version: 3.0.1 + resolution: "@smithy/util-endpoints@npm:3.0.1" + dependencies: + "@smithy/node-config-provider": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/fed80f300e6a6e69873e613cdd12f640d33a19fc09a41e3afd536f7ea36f7785edd96fbd0402b6980a0e5dfc9bcb8b37f503d522b4ef317f31f4fd0100c466ff + languageName: node + linkType: hard + +"@smithy/util-hex-encoding@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-hex-encoding@npm:4.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/70dbb3aa1a79aff3329d07a66411ff26398df338bdd8a6d077b438231afe3dc86d9a7022204baddecd8bc633f059d5c841fa916d81dd7447ea79b64148f386d2 + languageName: node + linkType: hard + +"@smithy/util-middleware@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/util-middleware@npm:4.0.1" + dependencies: + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/1dd2b058f392fb6788809f14c2c1d53411f79f6e9f88b515ffd36792f9f5939fe4af96fb5b0486a3d0cd30181783b7a5393dce2e8b83ba62db7c6d3af6572eff + languageName: node + linkType: hard + +"@smithy/util-retry@npm:^4.0.1": + version: 4.0.1 + resolution: "@smithy/util-retry@npm:4.0.1" + dependencies: + "@smithy/service-error-classification": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/93ef89572651b8a30b9a648292660ae9532508ec6d2577afc62e1d9125fe6d14086e0f70a2981bf9f12256b41a57152368b5ed839cdd2df47ba78dd005615173 + languageName: node + linkType: hard + +"@smithy/util-stream@npm:^4.1.1": + version: 4.1.1 + resolution: "@smithy/util-stream@npm:4.1.1" + dependencies: + "@smithy/fetch-http-handler": "npm:^5.0.1" + "@smithy/node-http-handler": "npm:^4.0.2" + "@smithy/types": "npm:^4.1.0" + "@smithy/util-base64": "npm:^4.0.0" + "@smithy/util-buffer-from": "npm:^4.0.0" + "@smithy/util-hex-encoding": "npm:^4.0.0" + "@smithy/util-utf8": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/9088e4e9baeac8af4de3bc8694cc57d49b3c9ef45c6441cc572b3d14fb88e0929624070d1528c3afe27ab710a2e0eb4a7c2938d676795b78788ab135b2f66e32 + languageName: node + linkType: hard + +"@smithy/util-uri-escape@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-uri-escape@npm:4.0.0" + dependencies: + tslib: "npm:^2.6.2" + checksum: 10c0/23984624060756adba8aa4ab1693fe6b387ee5064d8ec4dfd39bb5908c4ee8b9c3f2dc755da9b07505d8e3ce1338c1867abfa74158931e4728bf3cfcf2c05c3d + languageName: node + linkType: hard + +"@smithy/util-utf8@npm:^2.0.0": + version: 2.3.0 + resolution: "@smithy/util-utf8@npm:2.3.0" + dependencies: + "@smithy/util-buffer-from": "npm:^2.2.0" + tslib: "npm:^2.6.2" + checksum: 10c0/e18840c58cc507ca57fdd624302aefd13337ee982754c9aa688463ffcae598c08461e8620e9852a424d662ffa948fc64919e852508028d09e89ced459bd506ab + languageName: node + linkType: hard + +"@smithy/util-utf8@npm:^4.0.0": + version: 4.0.0 + resolution: "@smithy/util-utf8@npm:4.0.0" + dependencies: + "@smithy/util-buffer-from": "npm:^4.0.0" + tslib: "npm:^2.6.2" + checksum: 10c0/28a5a5372cbf0b3d2e32dd16f79b04c2aec6f704cf13789db922e9686fde38dde0171491cfa4c2c201595d54752a319faaeeed3c325329610887694431e28c98 + languageName: node + linkType: hard + +"@smithy/util-waiter@npm:^4.0.2": + version: 4.0.2 + resolution: "@smithy/util-waiter@npm:4.0.2" + dependencies: + "@smithy/abort-controller": "npm:^4.0.1" + "@smithy/types": "npm:^4.1.0" + tslib: "npm:^2.6.2" + checksum: 10c0/36ee71b41923ae58d9246745e3b7497fe45577dbb97f6e15dd07b4fddb4f82f32e0b7604c7b388fc92d5cbe49d9499998eda979a77a4a770c1b25686a5aed4ce + languageName: node + linkType: hard + "@socket.io/component-emitter@npm:~3.1.0": version: 3.1.2 resolution: "@socket.io/component-emitter@npm:3.1.2" @@ -7013,6 +8246,13 @@ __metadata: languageName: node linkType: hard +"bowser@npm:^2.11.0": + version: 2.11.0 + resolution: "bowser@npm:2.11.0" + checksum: 10c0/04efeecc7927a9ec33c667fa0965dea19f4ac60b3fea60793c2e6cf06c1dcd2f7ae1dbc656f450c5f50783b1c75cf9dc173ba6f3b7db2feee01f8c4b793e1bd3 + languageName: node + linkType: hard + "brace-expansion@npm:^1.1.7": version: 1.1.11 resolution: "brace-expansion@npm:1.1.11" @@ -10299,6 +11539,17 @@ __metadata: languageName: node linkType: hard +"fast-xml-parser@npm:4.4.1": + version: 4.4.1 + resolution: "fast-xml-parser@npm:4.4.1" + dependencies: + strnum: "npm:^1.0.5" + bin: + fxparser: src/cli/cli.js + checksum: 10c0/7f334841fe41bfb0bf5d920904ccad09cefc4b5e61eaf4c225bf1e1bb69ee77ef2147d8942f783ee8249e154d1ca8a858e10bda78a5d78b8bed3f48dcee9bf33 + languageName: node + linkType: hard + "fastq@npm:^1.6.0": version: 1.17.1 resolution: "fastq@npm:1.17.1" @@ -11105,6 +12356,7 @@ __metadata: "@angular/platform-browser-dynamic": "npm:16.2.12" "@angular/router": "npm:16.2.12" "@auth0/angular-jwt": "npm:5.1.0" + "@aws-sdk/client-s3": "npm:^3.750.0" "@codingame/monaco-vscode-java-default-extension": "npm:8.0.4" "@codingame/monaco-vscode-python-default-extension": "npm:8.0.4" "@codingame/monaco-vscode-r-default-extension": "npm:8.0.4" @@ -17372,6 +18624,13 @@ __metadata: languageName: node linkType: hard +"strnum@npm:^1.0.5": + version: 1.1.1 + resolution: "strnum@npm:1.1.1" + checksum: 10c0/c016034f9896ea99c4a22a8a8142d1ec72dba8d514ddec399f96998d5d2ab9f9e5b6c75c761d9730c3244b794022b1a63ec293f0da41ab0a994e3584020ba1ad + languageName: node + linkType: hard + "strong-log-transformer@npm:^2.1.0": version: 2.1.0 resolution: "strong-log-transformer@npm:2.1.0" @@ -18482,7 +19741,7 @@ __metadata: languageName: node linkType: hard -"uuid@npm:^9.0.0": +"uuid@npm:^9.0.0, uuid@npm:^9.0.1": version: 9.0.1 resolution: "uuid@npm:9.0.1" bin: diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index 49df4810b0..f41780864b 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -38,6 +38,8 @@ storage: bucket-name: "texera-dataset" s3: + presigned-url-upload-expiration-minutes: 15 + presigned-url-download-expiration-minutes: 15 endpoint: "http://localhost:9500" auth: username: "texera_minio" diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala index 821f8d92be..909f3a81fa 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala @@ -118,13 +118,13 @@ object FileResolver { } .toArray - // Prepend did and versionHash to the encoded path segments + // Prepend dataset name and versionHash to the encoded path segments val allPathSegments = Array( - dataset.getDid.intValue().toString, + datasetName, datasetVersion.getVersionHash ) ++ encodedFileRelativePath - // Build the the format /{did}/{versionHash}/{fileRelativePath}, both Linux and Windows use forward slash as the splitter + // Build the format /{datasetName}/{versionHash}/{fileRelativePath}, both Linux and Windows use forward slash as the splitter val uriSplitter = "/" val encodedPath = uriSplitter + allPathSegments.mkString(uriSplitter) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index 23dbcab8c8..b4f33768f1 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -141,6 +141,17 @@ object LakeFSFileStorage { objectsApi.getObject(repoName, commitHash, filePath).execute() } + /** + * Retrieves file content from a specific commit and path. + * + * @param repoName Repository name. + * @param commitHash Commit hash of the version. + * @param filePath Path to the file in the repository. + */ + def retrieveFilePresignedUrl(repoName: String, commitHash: String, filePath: String): String = { + objectsApi.getObject(repoName, commitHash, filePath).presign(true).execute().getPath + } + /** * Deletes an entire repository. * diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala index 99f75cb9b2..2e7129f4ba 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala @@ -113,50 +113,80 @@ object S3Storage { } /** - * Generates a pre-signed URL for uploading a file. - * - * @param bucketName Target MinIO bucket. - * @param key Object key (path in MinIO). - * @param expiration Expiration duration (default: 15 minutes). - * @return URL string that can be used for upload. - */ + * Generates a pre-signed URL for uploading a file. + * Supports both single and multipart uploads. + * + * @param bucketName Target S3 bucket. + * @param key Object key (file path). + * @param expiration Expiration duration. + * @param multipart Whether the upload is multipart. + * @param contentType Optional content type. + * @return Either a pre-signed URL for single-part uploads or an upload ID for multipart uploads. + */ def generatePresignedUploadUrl( - bucketName: String, - key: String, - expiration: Duration = Duration.ofMinutes(15) - ): URL = { - val request = PutObjectPresignRequest - .builder() - .signatureDuration(expiration) - .putObjectRequest( - PutObjectRequest - .builder() - .bucket(bucketName) - .key(key) - .build() - ) - .build() + bucketName: String, + key: String, + multipart: Boolean, + contentType: Option[String] = None + ): Either[URL, String] = { + if (multipart) { + // Initialize multipart upload + val uploadId = initiateMultipartUpload(bucketName, key, contentType) + Right(uploadId) + } else { + // Generate pre-signed URL for single-part upload + val requestBuilder = PutObjectRequest + .builder() + .bucket(bucketName) + .key(key) - val presignedRequest: PresignedPutObjectRequest = s3Presigner.presignPutObject(request) - presignedRequest.url() + contentType.foreach(requestBuilder.contentType) + + val request = PutObjectPresignRequest + .builder() + .signatureDuration(Duration.ofMinutes(StorageConfig.s3PresignedUrlUploadExpirationMinutes)) + .putObjectRequest(requestBuilder.build()) + .build() + + val presignedRequest: PresignedPutObjectRequest = s3Presigner.presignPutObject(request) + Left(presignedRequest.url()) + } } /** - * Generates a pre-signed URL for downloading a file. - * - * @param bucketName Target MinIO bucket. - * @param key Object key (path in MinIO). - * @param expiration Expiration duration (default: 15 minutes). - * @return URL string that can be used for download. - */ + * Initiates a multipart upload and returns the upload ID. + * + * @param bucket S3 bucket name. + * @param key Object key (file path). + * @param contentType Optional content type. + * @return Upload ID for multipart upload. + */ + def initiateMultipartUpload(bucket: String, key: String, contentType: Option[String] = None): String = { + val requestBuilder = CreateMultipartUploadRequest.builder() + .bucket(bucket) + .key(key) + + contentType.foreach(requestBuilder.contentType) + + val response = s3Client.createMultipartUpload(requestBuilder.build()) + response.uploadId() + } + + /** + * Generates a pre-signed URL for downloading a file. + * + * @param bucketName Target S3 bucket. + * @param key Object key (file path). + * @param expiration Expiration duration. + * @return URL string for download. + */ def generatePresignedDownloadUrl( - bucketName: String, - key: String, - expiration: Duration = Duration.ofMinutes(15) - ): URL = { + bucketName: String, + key: String, + ): URL = { val request = GetObjectPresignRequest .builder() - .signatureDuration(expiration) + .signatureDuration(Duration.ofMinutes(StorageConfig.s3PresignedUrlDownloadExpirationMinutes)) .getObjectRequest( GetObjectRequest .builder() diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala index 184272519a..99b8a7b739 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala @@ -218,6 +218,16 @@ object StorageConfig { .asInstanceOf[Map[String, Any]]("endpoint") .asInstanceOf[String] + val s3PresignedUrlUploadExpirationMinutes: Int = conf("storage") + .asInstanceOf[Map[String, Any]]("s3") + .asInstanceOf[Map[String, Any]]("presigned-url-upload-expiration-minutes") + .asInstanceOf[Int] + + val s3PresignedUrlDownloadExpirationMinutes: Int = conf("storage") + .asInstanceOf[Map[String, Any]]("s3") + .asInstanceOf[Map[String, Any]]("presigned-url-download-expiration-minutes") + .asInstanceOf[Int] + val s3Username: String = conf("storage") .asInstanceOf[Map[String, Any]]("s3") .asInstanceOf[Map[String, Any]]("auth") diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index 0f65191e08..35b268db97 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -1,5 +1,6 @@ package edu.uci.ics.amber.core.storage.model +import edu.uci.ics.amber.core.storage.{S3Storage, StorageConfig} import edu.uci.ics.amber.core.storage.util.dataset.GitVersionControlLocalFileStorage import edu.uci.ics.amber.util.PathUtils @@ -9,31 +10,32 @@ import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path, Paths} import scala.jdk.CollectionConverters.IteratorHasAsScala -private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Nothing] { +private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Nothing] with S3Compatible { // Utility function to parse and decode URI segments into individual components - private def parseUri(uri: URI): (Int, String, Path) = { + private def parseUri(uri: URI): (String, String, Path) = { val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray if (segments.length < 3) throw new IllegalArgumentException("URI format is incorrect") - val did = segments(0).toInt + // TODO: consider whether use dataset name or did + val datasetName = segments(0) val datasetVersionHash = URLDecoder.decode(segments(1), StandardCharsets.UTF_8) val decodedRelativeSegments = segments.drop(2).map(part => URLDecoder.decode(part, StandardCharsets.UTF_8)) val fileRelativePath = Paths.get(decodedRelativeSegments.head, decodedRelativeSegments.tail: _*) - (did, datasetVersionHash, fileRelativePath) + (datasetName, datasetVersionHash, fileRelativePath) } // Extract components from URI using the utility function - private val (did, datasetVersionHash, fileRelativePath) = parseUri(uri) + private val (datasetName, datasetVersionHash, fileRelativePath) = parseUri(uri) private var tempFile: Option[File] = None override def getURI: URI = uri override def asInputStream(): InputStream = { - val datasetAbsolutePath = PathUtils.getDatasetPath(Integer.valueOf(did)) + val datasetAbsolutePath = PathUtils.getDatasetPath(0) GitVersionControlLocalFileStorage .retrieveFileContentOfVersionAsInputStream( datasetAbsolutePath, @@ -75,8 +77,16 @@ private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Not } // then remove the dataset file GitVersionControlLocalFileStorage.removeFileFromRepo( - PathUtils.getDatasetPath(Integer.valueOf(did)), - PathUtils.getDatasetPath(Integer.valueOf(did)).resolve(fileRelativePath) + PathUtils.getDatasetPath(0), + PathUtils.getDatasetPath(0).resolve(fileRelativePath) ) } + + override def getVersionHash(): String = datasetVersionHash + + override def getRepoName(): String = datasetName + + override def getBucketName(): String = StorageConfig.lakefsBlockStorageBucketName + + override def getObjectRelativePath(): String = fileRelativePath.toString } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala new file mode 100644 index 0000000000..d2209b92ed --- /dev/null +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala @@ -0,0 +1,11 @@ +package edu.uci.ics.amber.core.storage.model + +trait S3Compatible { + def getRepoName(): String + + def getBucketName(): String + + def getVersionHash(): String + + def getObjectRelativePath(): String +} From 2ed989d1f01232951137639983a3862dc2aed41a Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 25 Feb 2025 12:48:23 -0800 Subject: [PATCH 13/47] fix the presigned get --- core/file-service/src/main/resources/docker-compose.yml | 1 + .../ics/texera/service/resource/DatasetResource.scala | 4 ++-- .../user-dataset-file-renderer.component.ts | 9 ++++++++- .../dashboard/service/user/dataset/dataset.service.ts | 6 +----- .../uci/ics/amber/core/storage/LakeFSFileStorage.scala | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/core/file-service/src/main/resources/docker-compose.yml b/core/file-service/src/main/resources/docker-compose.yml index cb34e310c0..4c69ba52c7 100644 --- a/core/file-service/src/main/resources/docker-compose.yml +++ b/core/file-service/src/main/resources/docker-compose.yml @@ -34,6 +34,7 @@ services: - LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 # MinIO internal service URL - LAKEFS_BLOCKSTORE_S3_DISCOVER_BUCKET_REGION=false + - LAKEFS_BLOCKSTORE_S3_PRE_SIGNED_ENDPOINT=http://localhost:9500 - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=texera_minio - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=password command: run \ No newline at end of file diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 3cd3d80e19..63f7d943a4 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -411,8 +411,8 @@ class DatasetResource { val presignedUrl = operationType match { case "download" => -// LakeFSFileStorage.retrieveFilePresignedUrl(document.getRepoName(), document.getVersionHash(), document.getObjectRelativePath()) - S3Storage.generatePresignedDownloadUrl(document.getBucketName(), objectKey).toString + LakeFSFileStorage.retrieveFilePresignedUrl(document.getRepoName(), document.getVersionHash(), document.getObjectRelativePath()) +// S3Storage.generatePresignedDownloadUrl(document.getBucketName(), objectKey).toString case "upload" => if (multipart.toScala.contains(true)) { diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts index b46160732b..659289a88d 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts @@ -24,6 +24,13 @@ export const MIME_TYPES = { OCTET_STREAM: "application/octet-stream", // Default binary format }; +export function getMimeType(filename: string): string { + const extension = filename.split(".").pop()?.toUpperCase(); + return extension && MIME_TYPES[extension as keyof typeof MIME_TYPES] + ? MIME_TYPES[extension as keyof typeof MIME_TYPES] + : MIME_TYPES.OCTET_STREAM; +} + // the size limits for all preview-supported types export const MIME_TYPE_SIZE_LIMITS_MB = { [MIME_TYPES.JPEG]: 5 * 1024 * 1024, // 5 MB @@ -136,7 +143,7 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe .subscribe({ next: blob => { this.isLoading = false; - const blobMimeType = blob.type; + const blobMimeType = getMimeType(this.filePath); if (!this.isPreviewSupported(blobMimeType)) { this.onFileTypePreviewUnsupported(); return; diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 32930510d8..cb3c224fc6 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -61,11 +61,7 @@ export class DatasetService { }>(`${AppSettings.getApiEndpoint()}/dataset/presign?type=download&key=${encodeURIComponent(filePath)}`) .pipe( switchMap(({ presignedUrl }) => { - const url = new URL(presignedUrl); - - let repoName = url.hostname.split(".")[0]; - let newUrl = `lakefs/${repoName}${url.pathname}${url.search}`; - return this.http.get(newUrl, { responseType: "blob" }); + return this.http.get(presignedUrl, { responseType: "blob" }); }) ); } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index b4f33768f1..80791a167b 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -149,7 +149,7 @@ object LakeFSFileStorage { * @param filePath Path to the file in the repository. */ def retrieveFilePresignedUrl(repoName: String, commitHash: String, filePath: String): String = { - objectsApi.getObject(repoName, commitHash, filePath).presign(true).execute().getPath + objectsApi.statObject(repoName, commitHash, filePath).presign(true).execute().getPhysicalAddress } /** From c31cbcc5b14a0a66480ab0576583bfa3a5e043b7 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 25 Feb 2025 15:43:32 -0800 Subject: [PATCH 14/47] closing to finish the upload --- .../service/resource/DatasetResource.scala | 92 +++++++++----- .../service/user/dataset/dataset.service.ts | 99 +++++++++++++++- .../core/storage/LakeFSFileStorage.scala | 32 +++-- .../ics/amber/core/storage/S3Storage.scala | 112 ------------------ .../storage/model/DatasetFileDocument.scala | 8 +- .../amber/core/storage/model/OnDataset.scala | 9 ++ .../core/storage/model/S3Compatible.scala | 11 -- 7 files changed, 197 insertions(+), 166 deletions(-) create mode 100644 core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/OnDataset.scala delete mode 100644 core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 63f7d943a4..edc317c221 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.service.resource -import edu.uci.ics.amber.core.storage.model.S3Compatible +import edu.uci.ics.amber.core.storage.model.OnDataset import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, LakeFSFileStorage, S3Storage, StorageConfig} import edu.uci.ics.amber.core.storage.util.dataset.{GitVersionControlLocalFileStorage, PhysicalFileNode} import edu.uci.ics.amber.util.PathUtils @@ -392,43 +392,81 @@ class DatasetResource { @GET @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("presign") + @Path("/presign") def getPresignedUrl( - @QueryParam("type") operationType: String, @QueryParam("key") encodedUrl: String, - @QueryParam("multipart") multipart: Optional[Boolean], - @QueryParam("contentType") contentType: Optional[String], @Auth user: SessionUser ): Response = { val uid = user.getUid withTransaction(context) { ctx => - // TODO: bring the access control back val decodedPathStr = URLDecoder.decode(encodedUrl, StandardCharsets.UTF_8.name()) val fileUri = FileResolver.resolve(decodedPathStr) - val document = DocumentFactory.openReadonlyDocument(fileUri).asInstanceOf[S3Compatible] - - val objectKey = s"${document.getVersionHash()}/${document.getObjectRelativePath()}" - - val presignedUrl = operationType match { - case "download" => - LakeFSFileStorage.retrieveFilePresignedUrl(document.getRepoName(), document.getVersionHash(), document.getObjectRelativePath()) -// S3Storage.generatePresignedDownloadUrl(document.getBucketName(), objectKey).toString - - case "upload" => - if (multipart.toScala.contains(true)) { - // Generate presigned URLs for multipart upload (initiate the multipart upload) - val uploadId = S3Storage.initiateMultipartUpload(document.getBucketName(), document.getObjectRelativePath(), contentType.toScala) - Response.ok(Map("uploadId" -> uploadId, "key" -> document.getObjectRelativePath())).build() - } else { - // Generate presigned URL for a single-part upload - S3Storage.generatePresignedUploadUrl(document.getBucketName(), document.getObjectRelativePath(), multipart = false, contentType.toScala).toString - } + val document = DocumentFactory.openReadonlyDocument(fileUri).asInstanceOf[OnDataset] - case _ => - throw new BadRequestException("Invalid type parameter. Use 'download' or 'upload'.") + val datasetDao = new DatasetDao(ctx.configuration()) + val datasets = datasetDao.fetchByName(document.getDatasetName()).asScala.toList + + if (datasets.isEmpty || !userHasReadAccess(ctx, datasets.head.getDid, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + Response.ok(Map("presignedUrl" -> LakeFSFileStorage.getFilePresignedUrl(document.getDatasetName(), document.getVersionHash(), document.getFileRelativePath()))).build() + } + } + + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/multipart-upload") + def multipartUpload( + @PathParam("did") did: Integer, + @QueryParam("type") operationType: String, + @QueryParam("key") encodedUrl: String, + @QueryParam("uploadId") uploadId: Optional[String], + @QueryParam("numParts") numParts: Optional[Integer], + @Auth user: SessionUser + ): Response = { + val uid = user.getUid + + withTransaction(context) { ctx => + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } + val datasetName = getDatasetByID(ctx, did).getName + + // Decode the file path + val filePath = URLDecoder.decode(encodedUrl, StandardCharsets.UTF_8.name()) + + operationType.toLowerCase match { + case "init" => + // Ensure numParts is provided for initiation + val numPartsValue = numParts.toScala.getOrElse(throw new BadRequestException("numParts is required for initialization")) + + // Initiate multipart upload and retrieve presigned URLs + val presignedResponse = LakeFSFileStorage.initiatePresignedMultipartUploads(datasetName, filePath, numPartsValue) + + Response.ok(Map("uploadId" -> presignedResponse.getUploadId, "presignedUrls" -> presignedResponse.getPresignedUrls)).build() - Response.ok(Map("presignedUrl" -> presignedUrl)).build() + case "finish" => + // Ensure uploadId is provided for completion + val uploadIdValue = uploadId.toScala.getOrElse(throw new BadRequestException("uploadId is required for completion")) + + // Complete the multipart upload + val objectStats = LakeFSFileStorage.completePresignedMultipartUploads(datasetName, filePath, uploadIdValue) + + Response.ok(Map("message" -> "Multipart upload completed successfully", "filePath" -> objectStats.getPath())).build() + + case "abort" => + // Ensure uploadId is provided for abortion + val uploadIdValue = uploadId.toScala.getOrElse(throw new BadRequestException("uploadId is required for abortion")) + + // Abort the multipart upload + LakeFSFileStorage.abortPresignedMultipartUploads(datasetName, filePath, uploadIdValue) + + Response.ok(Map("message" -> "Multipart upload aborted successfully")).build() + + case _ => + throw new BadRequestException("Invalid type parameter. Use 'init', 'finish', or 'abort'.") + } } } diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index cb3c224fc6..625cf04072 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -1,13 +1,14 @@ import { Injectable } from "@angular/core"; import { HttpClient, HttpParams } from "@angular/common/http"; -import { map, switchMap } from "rxjs/operators"; +import {catchError, map, switchMap, tap} from "rxjs/operators"; import { Dataset, DatasetVersion } from "../../../../common/type/dataset"; import { AppSettings } from "../../../../common/app-setting"; -import { from, Observable } from "rxjs"; +import {forkJoin, from, Observable, throwError} from "rxjs"; import { DashboardDataset } from "../../../type/dashboard-dataset.interface"; import { FileUploadItem } from "../../../type/dashboard-file.interface"; import { DatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; import { DatasetStagedObject } from "../../../../common/type/dataset-staged-object"; +import {S3Client} from "@aws-sdk/client-s3"; export const DATASET_BASE_URL = "dataset"; export const DATASET_CREATE_URL = DATASET_BASE_URL + "/create"; @@ -27,11 +28,13 @@ export const DATASET_PUBLIC_VERSION_BASE_URL = "publicVersion"; export const DATASET_PUBLIC_VERSION_RETRIEVE_LIST_URL = DATASET_PUBLIC_VERSION_BASE_URL + "/list"; export const DATASET_GET_OWNERS_URL = DATASET_BASE_URL + "/datasetUserAccess"; +const MULTIPART_UPLOAD_PART_SIZE_MB = 50 * 1024 * 1024; // 50MB per part + @Injectable({ providedIn: "root", }) export class DatasetService { - constructor(private http: HttpClient) {} + constructor(private s3Client: S3Client, private http: HttpClient) {} public createDataset(dataset: Dataset): Observable { const formData = new FormData(); @@ -58,7 +61,7 @@ export class DatasetService { return this.http .get<{ presignedUrl: string; - }>(`${AppSettings.getApiEndpoint()}/dataset/presign?type=download&key=${encodeURIComponent(filePath)}`) + }>(`${AppSettings.getApiEndpoint()}/dataset/presign?key=${encodeURIComponent(filePath)}`) .pipe( switchMap(({ presignedUrl }) => { return this.http.get(presignedUrl, { responseType: "blob" }); @@ -105,6 +108,94 @@ export class DatasetService { ); } + /** + * Handles multipart upload for large files using RxJS. + * @param did Dataset ID + * @param filePath Path of the file within the dataset + * @param file File object to be uploaded + */ + public multipartUpload(did: number, filePath: string, file: File): Observable { + const partCount = Math.ceil(file.size / MULTIPART_UPLOAD_PART_SIZE_MB); + + return this.initiateMultipartUpload(did, filePath, partCount).pipe( + switchMap(initiateResponse => { + const uploadId = initiateResponse.uploadId; + if (!uploadId) { + return throwError(() => new Error("Failed to initiate multipart upload")); + } + + console.log(`Started multipart upload for ${filePath} with UploadId: ${uploadId}`); + + const uploadObservables = initiateResponse.presignedUrls.map((url, index) => { + const start = index * MULTIPART_UPLOAD_PART_SIZE_MB; + const end = Math.min(start + MULTIPART_UPLOAD_PART_SIZE_MB, file.size); + const chunk = file.slice(start, end); + + return from(fetch(url, { method: "PUT", body: chunk })).pipe( + switchMap(response => { + if (!response.ok) { + return throwError(() => new Error(`Failed to upload part ${index + 1}`)); + } + console.log(`Uploaded part ${index + 1} of ${partCount}`); + return from(Promise.resolve()); + }) + ); + }); + + return forkJoin(uploadObservables).pipe( + switchMap(() => this.finalizeMultipartUpload(did, filePath, uploadId, false)), + tap(() => console.log(`Multipart upload for ${filePath} completed successfully!`)), + catchError(error => { + console.error(`Multipart upload failed for ${filePath}`, error); + return this.finalizeMultipartUpload(did, filePath, uploadId, true).pipe( + tap(() => console.error(`Upload aborted for ${filePath}`)), + switchMap(() => throwError(() => error)) + ); + }) + ); + }) + ); + } + + /** + * Initiates a multipart upload and retrieves presigned URLs for each part. + * @param did Dataset ID + * @param filePath File path within the dataset + * @param numParts Number of parts for the multipart upload + */ + private initiateMultipartUpload(did: number, filePath: string, numParts: number): Observable<{ uploadId: string; presignedUrls: string[] }> { + const params = new HttpParams() + .set("type", "init") + .set("key", encodeURIComponent(filePath)) + .set("numParts", numParts.toString()); + + return this.http.post<{ uploadId: string; presignedUrls: string[] }>( + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/multipart-upload`, + {}, + { params } + ); + } + + /** + * Completes or aborts a multipart upload. + * @param did Dataset ID + * @param filePath File path within the dataset + * @param uploadId Upload ID returned from the initiation step + * @param isAbort Whether to abort (true) or complete (false) the upload + */ + private finalizeMultipartUpload(did: number, filePath: string, uploadId: string, isAbort: boolean = false): Observable { + const params = new HttpParams() + .set("type", isAbort ? "abort" : "finish") + .set("key", encodeURIComponent(filePath)) + .set("uploadId", uploadId); + + return this.http.post( + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/multipart-upload`, + {}, + { params } + ); + } + /** * Retrieves the list of uncommitted dataset changes (diffs). * @param did Dataset ID diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index 80791a167b..7dc9506031 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -35,9 +35,11 @@ object LakeFSFileStorage { private lazy val branchesApi: BranchesApi = new BranchesApi(apiClient) private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) private lazy val refsApi: RefsApi = new RefsApi(apiClient) + private lazy val experimentalApi: ExperimentalApi = new ExperimentalApi(apiClient) - private val storageNamespaceURI: String = "s3://texera-dataset" + private val storageNamespaceURI: String = s"${StorageConfig.lakefsBlockStorageType}://${StorageConfig.lakefsBlockStorageBucketName}" + private val branchName: String = "main" /** * Initializes a new repository in LakeFS. * @@ -46,7 +48,6 @@ object LakeFSFileStorage { */ def initRepo( repoName: String, - defaultBranch: String = "main" ): Repository = { val repoNamePattern = "^[a-z0-9][a-z0-9-]{2,62}$".r @@ -63,7 +64,7 @@ object LakeFSFileStorage { val repo = new RepositoryCreation() .name(repoName) .storageNamespace(storageNamespace) - .defaultBranch(defaultBranch) + .defaultBranch(branchName) .sampleData(false) repoApi.createRepository(repo).execute() @@ -148,10 +149,28 @@ object LakeFSFileStorage { * @param commitHash Commit hash of the version. * @param filePath Path to the file in the repository. */ - def retrieveFilePresignedUrl(repoName: String, commitHash: String, filePath: String): String = { + def getFilePresignedUrl(repoName: String, commitHash: String, filePath: String): String = { objectsApi.statObject(repoName, commitHash, filePath).presign(true).execute().getPhysicalAddress } + /** + * + */ + def initiatePresignedMultipartUploads(repoName: String, filePath: String, numberOfParts: Int): PresignMultipartUpload = { + experimentalApi.createPresignMultipartUpload(repoName, branchName, filePath).parts(numberOfParts).execute() + + } + + def completePresignedMultipartUploads(repoName: String, filePath: String, uploadId: String): ObjectStats = { + val completePresignMultipartUpload: CompletePresignMultipartUpload = new CompletePresignMultipartUpload() + experimentalApi.completePresignMultipartUpload(repoName, branchName, uploadId, filePath).execute() + } + + def abortPresignedMultipartUploads(repoName: String, filePath: String, uploadId: String): Unit = { + experimentalApi.abortPresignMultipartUpload(repoName, branchName, uploadId, filePath).execute() + } + + /** * Deletes an entire repository. * @@ -161,7 +180,7 @@ object LakeFSFileStorage { repoApi.deleteRepository(repoName).execute() } - def retrieveVersionsOfRepository(repoName: String, branchName: String = "main"): List[Commit] = { + def retrieveVersionsOfRepository(repoName: String): List[Commit] = { refsApi .logCommits(repoName, branchName) .execute() @@ -179,10 +198,9 @@ object LakeFSFileStorage { * Retrieves a list of uncommitted (staged) objects in a repository branch. * * @param repoName Repository name. - * @param branchName Branch name (defaults to "main"). * @return List of uncommitted object stats. */ - def retrieveUncommittedObjects(repoName: String, branchName: String = "main"): List[Diff] = { + def retrieveUncommittedObjects(repoName: String): List[Diff] = { branchesApi .diffBranch(repoName, branchName) .execute() diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala index 2e7129f4ba..6803e13e30 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala @@ -38,30 +38,6 @@ object S3Storage { .build() } - private lazy val s3Presigner: S3Presigner = { - val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, StorageConfig.s3Password) - S3Presigner - .builder() - .region(Region.US_WEST_2) - .credentialsProvider(StaticCredentialsProvider.create(credentials)) - .endpointOverride(java.net.URI.create(StorageConfig.s3Endpoint)) - .build() - } - - /** - * Deletes a bucket from MinIO. - * - * @param bucketName Target MinIO bucket to delete. - */ - def deleteBucket(bucketName: String): Unit = { - val request = DeleteBucketRequest - .builder() - .bucket(bucketName) - .build() - - s3Client.deleteBucket(request) - } - /** * Deletes a directory (all objects under a given prefix) from a bucket. * @@ -111,92 +87,4 @@ object S3Storage { s3Client.deleteObjects(deleteObjectsRequest) } } - - /** - * Generates a pre-signed URL for uploading a file. - * Supports both single and multipart uploads. - * - * @param bucketName Target S3 bucket. - * @param key Object key (file path). - * @param expiration Expiration duration. - * @param multipart Whether the upload is multipart. - * @param contentType Optional content type. - * @return Either a pre-signed URL for single-part uploads or an upload ID for multipart uploads. - */ - def generatePresignedUploadUrl( - bucketName: String, - key: String, - multipart: Boolean, - contentType: Option[String] = None - ): Either[URL, String] = { - if (multipart) { - // Initialize multipart upload - val uploadId = initiateMultipartUpload(bucketName, key, contentType) - Right(uploadId) - } else { - // Generate pre-signed URL for single-part upload - val requestBuilder = PutObjectRequest - .builder() - .bucket(bucketName) - .key(key) - - contentType.foreach(requestBuilder.contentType) - - val request = PutObjectPresignRequest - .builder() - .signatureDuration(Duration.ofMinutes(StorageConfig.s3PresignedUrlUploadExpirationMinutes)) - .putObjectRequest(requestBuilder.build()) - .build() - - val presignedRequest: PresignedPutObjectRequest = s3Presigner.presignPutObject(request) - Left(presignedRequest.url()) - } - } - - /** - * Initiates a multipart upload and returns the upload ID. - * - * @param bucket S3 bucket name. - * @param key Object key (file path). - * @param contentType Optional content type. - * @return Upload ID for multipart upload. - */ - def initiateMultipartUpload(bucket: String, key: String, contentType: Option[String] = None): String = { - val requestBuilder = CreateMultipartUploadRequest.builder() - .bucket(bucket) - .key(key) - - contentType.foreach(requestBuilder.contentType) - - val response = s3Client.createMultipartUpload(requestBuilder.build()) - response.uploadId() - } - - /** - * Generates a pre-signed URL for downloading a file. - * - * @param bucketName Target S3 bucket. - * @param key Object key (file path). - * @param expiration Expiration duration. - * @return URL string for download. - */ - def generatePresignedDownloadUrl( - bucketName: String, - key: String, - ): URL = { - val request = GetObjectPresignRequest - .builder() - .signatureDuration(Duration.ofMinutes(StorageConfig.s3PresignedUrlDownloadExpirationMinutes)) - .getObjectRequest( - GetObjectRequest - .builder() - .bucket(bucketName) - .key(key) - .build() - ) - .build() - - val presignedRequest: PresignedGetObjectRequest = s3Presigner.presignGetObject(request) - presignedRequest.url() - } } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index 35b268db97..c41252f219 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -10,7 +10,7 @@ import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path, Paths} import scala.jdk.CollectionConverters.IteratorHasAsScala -private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Nothing] with S3Compatible { +private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Nothing] with OnDataset { // Utility function to parse and decode URI segments into individual components private def parseUri(uri: URI): (String, String, Path) = { val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray @@ -84,9 +84,7 @@ private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Not override def getVersionHash(): String = datasetVersionHash - override def getRepoName(): String = datasetName + override def getDatasetName(): String = datasetName - override def getBucketName(): String = StorageConfig.lakefsBlockStorageBucketName - - override def getObjectRelativePath(): String = fileRelativePath.toString + override def getFileRelativePath(): String = fileRelativePath.toString } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/OnDataset.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/OnDataset.scala new file mode 100644 index 0000000000..e628f47134 --- /dev/null +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/OnDataset.scala @@ -0,0 +1,9 @@ +package edu.uci.ics.amber.core.storage.model + +trait OnDataset { + def getDatasetName(): String + + def getVersionHash(): String + + def getFileRelativePath(): String +} diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala deleted file mode 100644 index d2209b92ed..0000000000 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/S3Compatible.scala +++ /dev/null @@ -1,11 +0,0 @@ -package edu.uci.ics.amber.core.storage.model - -trait S3Compatible { - def getRepoName(): String - - def getBucketName(): String - - def getVersionHash(): String - - def getObjectRelativePath(): String -} From 02a4057e4a3835937207fa8c5d61971ab2d7d8b0 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 25 Feb 2025 19:05:10 -0800 Subject: [PATCH 15/47] refactor dataset frontend --- core/gui/src/app/app.module.ts | 2 + .../app/common/type/dataset-staged-object.ts | 2 +- .../files-uploader.component.html | 38 ++++++++--------- .../files-uploader.component.ts | 8 ++-- .../dataset-detail.component.html | 41 +++++++++---------- .../dataset-detail.component.ts | 11 ++++- ...dataset-staged-objects-list.component.html | 28 +++++++------ ...dataset-staged-objects-list.component.scss | 6 +++ .../service/user/dataset/dataset.service.ts | 6 ++- 9 files changed, 82 insertions(+), 60 deletions(-) diff --git a/core/gui/src/app/app.module.ts b/core/gui/src/app/app.module.ts index c67856a96b..6c458c337c 100644 --- a/core/gui/src/app/app.module.ts +++ b/core/gui/src/app/app.module.ts @@ -144,6 +144,7 @@ import { lastValueFrom } from "rxjs"; import { HubSearchResultComponent } from "./hub/component/hub-search-result/hub-search-result.component"; import { UserDatasetStagedObjectsListComponent } from "./dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component"; import { NzEmptyModule } from "ng-zorro-antd/empty"; +import {NzDividerModule} from "ng-zorro-antd/divider"; registerLocaleData(en); @@ -295,6 +296,7 @@ registerLocaleData(en); SocialLoginModule, GoogleSigninButtonModule, NzEmptyModule, + NzDividerModule, ], providers: [ provideNzI18n(en_US), diff --git a/core/gui/src/app/common/type/dataset-staged-object.ts b/core/gui/src/app/common/type/dataset-staged-object.ts index f69cc183f1..1d3d16dd16 100644 --- a/core/gui/src/app/common/type/dataset-staged-object.ts +++ b/core/gui/src/app/common/type/dataset-staged-object.ts @@ -1,6 +1,6 @@ // Represents a staged dataset object change, corresponding to backend Diff export interface DatasetStagedObject { - fileRelativePath: string; + path: string; pathType: "file" | "directory"; diffType: "added" | "removed" | "changed"; sizeBytes?: number; // Optional, only present for files diff --git a/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html b/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html index eb0f9f37ed..a1c6e3091b 100644 --- a/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html +++ b/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html @@ -29,23 +29,23 @@ -
-

Previous Uploads

- - -
-
-

New Uploads

- -
+ + + + + + + + + + + + + + + + + + + diff --git a/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.ts b/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.ts index c97f1ae943..28377b8d58 100644 --- a/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.ts +++ b/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.ts @@ -87,9 +87,9 @@ export class FilesUploaderComponent { .filter((item): item is FileUploadItem => item !== null); if (successfulUploads.length > 0) { - successfulUploads.forEach(fileUploadItem => { - this.addFileToNewUploadsFileTree(fileUploadItem.name, fileUploadItem); - }); + // successfulUploads.forEach(fileUploadItem => { + // this.addFileToNewUploadsFileTree(fileUploadItem.name, fileUploadItem); + // }); const successMessage = `${successfulUploads.length} file${successfulUploads.length > 1 ? "s" : ""} selected successfully!`; this.showFileUploadBanner("success", successMessage); } @@ -100,7 +100,7 @@ export class FilesUploaderComponent { this.showFileUploadBanner("error", errorMessage); } - this.uploadedFiles.emit(Array.from(this.newUploadNodeToFileItems.values())); + this.uploadedFiles.emit(successfulUploads); }) .catch(error => { this.showFileUploadBanner("error", `Unexpected error: ${error.message}`); diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index 3c65ed303e..ccf4138174 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -127,25 +127,18 @@

- - - + - - - - - - - + + Create a New Version

-
-

Dataset Explorer

-
+
Choose a Version:
@@ -227,6 +218,14 @@
Choose a Version:
(selectedTreeNode)="onVersionFileTreeNodeSelected($event)"> > + + + + + +
- - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + +
diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts index 5647408f17..edb6b6deea 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts @@ -13,8 +13,8 @@ import { DASHBOARD_USER_DATASET } from "../../../../../app-routing.constant"; import { UserService } from "../../../../../common/service/user/user.service"; import { isDefined } from "../../../../../common/util/predicate"; import { HubService } from "../../../../../hub/service/hub.service"; -import {FileUploadItem} from "../../../../type/dashboard-file.interface"; -import {file} from "jszip"; +import { FileUploadItem } from "../../../../type/dashboard-file.interface"; +import { file } from "jszip"; export const THROTTLE_TIME_MS = 1000; @@ -292,8 +292,15 @@ export class DatasetDetailComponent implements OnInit { onNewUploadFilesChanged(files: FileUploadItem[]) { if (this.did) { + const did = this.did; files.map(file => { - }) + this.datasetService + .multipartUpload(did, file.name, file.file) + .pipe(untilDestroyed(this)) + .subscribe(res => { + console.log("Multipart upload: ", res); + }); + }); } } diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 9e4e52d099..52692e4e31 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -34,10 +34,7 @@ const MULTIPART_UPLOAD_PART_SIZE_MB = 50 * 1024 * 1024; // 50MB per part providedIn: "root", }) export class DatasetService { - private s3Client: S3Client; - constructor(private http: HttpClient) { - this.s3Client = new S3Client(); } public createDataset(dataset: Dataset): Observable { @@ -130,6 +127,9 @@ export class DatasetService { console.log(`Started multipart upload for ${filePath} with UploadId: ${uploadId}`); + // Array to store part numbers and ETags + const uploadedParts: { PartNumber: number; ETag: string }[] = []; + const uploadObservables = initiateResponse.presignedUrls.map((url, index) => { const start = index * MULTIPART_UPLOAD_PART_SIZE_MB; const end = Math.min(start + MULTIPART_UPLOAD_PART_SIZE_MB, file.size); @@ -140,18 +140,24 @@ export class DatasetService { if (!response.ok) { return throwError(() => new Error(`Failed to upload part ${index + 1}`)); } - console.log(`Uploaded part ${index + 1} of ${partCount}`); + const etag = response.headers.get("ETag")?.replace(/"/g, ""); // Extract and clean ETag + if (!etag) { + return throwError(() => new Error(`Missing ETag for part ${index + 1}`)); + } + + uploadedParts.push({ PartNumber: index + 1, ETag: etag }); + console.log(`Uploaded part ${index + 1} of ${partCount}, ETag: ${etag}`); return from(Promise.resolve()); }) ); }); return forkJoin(uploadObservables).pipe( - switchMap(() => this.finalizeMultipartUpload(did, filePath, uploadId, false)), + switchMap(() => this.finalizeMultipartUpload(did, filePath, uploadId, uploadedParts, initiateResponse.physicalAddress, false)), tap(() => console.log(`Multipart upload for ${filePath} completed successfully!`)), catchError(error => { console.error(`Multipart upload failed for ${filePath}`, error); - return this.finalizeMultipartUpload(did, filePath, uploadId, true).pipe( + return this.finalizeMultipartUpload(did, filePath, uploadId, uploadedParts, initiateResponse.physicalAddress, true).pipe( tap(() => console.error(`Upload aborted for ${filePath}`)), switchMap(() => throwError(() => error)) ); @@ -167,13 +173,13 @@ export class DatasetService { * @param filePath File path within the dataset * @param numParts Number of parts for the multipart upload */ - private initiateMultipartUpload(did: number, filePath: string, numParts: number): Observable<{ uploadId: string; presignedUrls: string[] }> { + private initiateMultipartUpload(did: number, filePath: string, numParts: number): Observable<{ uploadId: string; presignedUrls: string[]; physicalAddress: string }> { const params = new HttpParams() .set("type", "init") .set("key", encodeURIComponent(filePath)) .set("numParts", numParts.toString()); - return this.http.post<{ uploadId: string; presignedUrls: string[] }>( + return this.http.post<{ uploadId: string; presignedUrls: string[]; physicalAddress: string }>( `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/multipart-upload`, {}, { params } @@ -181,21 +187,24 @@ export class DatasetService { } /** - * Completes or aborts a multipart upload. - * @param did Dataset ID - * @param filePath File path within the dataset - * @param uploadId Upload ID returned from the initiation step - * @param isAbort Whether to abort (true) or complete (false) the upload + * Completes or aborts a multipart upload, sending part numbers and ETags to the backend. */ - private finalizeMultipartUpload(did: number, filePath: string, uploadId: string, isAbort: boolean = false): Observable { + private finalizeMultipartUpload( + did: number, + filePath: string, + uploadId: string, + parts: { PartNumber: number; ETag: string }[], + physicalAddress: string, + isAbort: boolean + ): Observable { const params = new HttpParams() .set("type", isAbort ? "abort" : "finish") .set("key", encodeURIComponent(filePath)) .set("uploadId", uploadId); return this.http.post( - `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/multipart-upload`, - {}, + `${AppSettings.getApiEndpoint()}/dataset/${did}/multipart-upload`, + { parts, physicalAddress }, { params } ); } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index 7dc9506031..fc1e2beefd 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -37,9 +37,11 @@ object LakeFSFileStorage { private lazy val refsApi: RefsApi = new RefsApi(apiClient) private lazy val experimentalApi: ExperimentalApi = new ExperimentalApi(apiClient) - private val storageNamespaceURI: String = s"${StorageConfig.lakefsBlockStorageType}://${StorageConfig.lakefsBlockStorageBucketName}" + private val storageNamespaceURI: String = + s"${StorageConfig.lakefsBlockStorageType}://${StorageConfig.lakefsBlockStorageBucketName}" private val branchName: String = "main" + /** * Initializes a new repository in LakeFS. * @@ -47,7 +49,7 @@ object LakeFSFileStorage { * @param defaultBranch Default branch name, usually "main". */ def initRepo( - repoName: String, + repoName: String ): Repository = { val repoNamePattern = "^[a-z0-9][a-z0-9-]{2,62}$".r @@ -143,33 +145,70 @@ object LakeFSFileStorage { } /** - * Retrieves file content from a specific commit and path. - * - * @param repoName Repository name. - * @param commitHash Commit hash of the version. - * @param filePath Path to the file in the repository. - */ + * Retrieves file content from a specific commit and path. + * + * @param repoName Repository name. + * @param commitHash Commit hash of the version. + * @param filePath Path to the file in the repository. + */ def getFilePresignedUrl(repoName: String, commitHash: String, filePath: String): String = { objectsApi.statObject(repoName, commitHash, filePath).presign(true).execute().getPhysicalAddress } /** - * - */ - def initiatePresignedMultipartUploads(repoName: String, filePath: String, numberOfParts: Int): PresignMultipartUpload = { - experimentalApi.createPresignMultipartUpload(repoName, branchName, filePath).parts(numberOfParts).execute() + */ + def initiatePresignedMultipartUploads( + repoName: String, + filePath: String, + numberOfParts: Int + ): PresignMultipartUpload = { + experimentalApi + .createPresignMultipartUpload(repoName, branchName, filePath) + .parts(numberOfParts) + .execute() } - def completePresignedMultipartUploads(repoName: String, filePath: String, uploadId: String): ObjectStats = { - val completePresignMultipartUpload: CompletePresignMultipartUpload = new CompletePresignMultipartUpload() - experimentalApi.completePresignMultipartUpload(repoName, branchName, uploadId, filePath).execute() + def completePresignedMultipartUploads( + repoName: String, + filePath: String, + uploadId: String, + partsList: List[(Int, String)], + physicalAddress: String + ): ObjectStats = { + val completePresignMultipartUpload: CompletePresignMultipartUpload = + new CompletePresignMultipartUpload() + completePresignMultipartUpload.setParts( + partsList + .map(part => { + val newUploadPart = new UploadPart + newUploadPart.setPartNumber(part._1) + newUploadPart.setEtag(part._2) + newUploadPart + }) + .asJava + ) + completePresignMultipartUpload.setPhysicalAddress(physicalAddress) + experimentalApi + .completePresignMultipartUpload(repoName, branchName, uploadId, filePath) + .completePresignMultipartUpload(completePresignMultipartUpload) + .execute() } - def abortPresignedMultipartUploads(repoName: String, filePath: String, uploadId: String): Unit = { - experimentalApi.abortPresignMultipartUpload(repoName, branchName, uploadId, filePath).execute() - } + def abortPresignedMultipartUploads( + repoName: String, + filePath: String, + uploadId: String, + physicalAddress: String + ): Unit = { + val abortPresignMultipartUpload: AbortPresignMultipartUpload = new AbortPresignMultipartUpload + abortPresignMultipartUpload.setPhysicalAddress(physicalAddress) + experimentalApi + .abortPresignMultipartUpload(repoName, branchName, uploadId, filePath) + .abortPresignMultipartUpload(abortPresignMultipartUpload) + .execute() + } /** * Deletes an entire repository. diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index c41252f219..7d7f41434b 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -10,7 +10,9 @@ import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path, Paths} import scala.jdk.CollectionConverters.IteratorHasAsScala -private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Nothing] with OnDataset { +private[storage] class DatasetFileDocument(uri: URI) + extends VirtualDocument[Nothing] + with OnDataset { // Utility function to parse and decode URI segments into individual components private def parseUri(uri: URI): (String, String, Path) = { val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray From 7d702f2eaf1abc84286f10ed5a9e71832e308a3f Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Wed, 26 Feb 2025 15:26:45 -0800 Subject: [PATCH 17/47] closing to finish the gui --- .../ics/texera/service/auth/JwtParser.scala | 1 - .../ics/texera/service/auth/SessionUser.scala | 1 - .../resource/DatasetAccessResource.scala | 1 - .../service/resource/DatasetResource.scala | 71 +++++++++- .../app/common/type/datasetVersionFileTree.ts | 16 +++ .../dataset-detail.component.html | 125 ++++++++++-------- .../dataset-detail.component.scss | 26 ++-- .../dataset-detail.component.ts | 32 ++++- ...dataset-staged-objects-list.component.html | 28 +++- ...dataset-staged-objects-list.component.scss | 32 +++++ ...r-dataset-staged-objects-list.component.ts | 40 +++++- ...er-dataset-version-filetree.component.scss | 2 +- .../service/user/dataset/dataset.service.ts | 36 +++++ .../core/storage/LakeFSFileStorage.scala | 12 ++ .../ics/amber/core/storage/S3Storage.scala | 9 -- .../storage/model/DatasetFileDocument.scala | 1 - 16 files changed, 336 insertions(+), 97 deletions(-) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala index 570d0d9266..97732a5192 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/JwtParser.scala @@ -1,7 +1,6 @@ package edu.uci.ics.texera.service.auth import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User -import org.jooq.types.UInteger import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder} import org.jose4j.keys.HmacKey import org.jose4j.lang.UnresolvableKeyException diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala index 39a9f25aef..79a05eccbd 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/auth/SessionUser.scala @@ -2,7 +2,6 @@ package edu.uci.ics.texera.service.auth import edu.uci.ics.texera.dao.jooq.generated.enums.UserRoleEnum import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User -import org.jooq.types.UInteger import java.security.Principal diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala index 84834cfec1..877a1c05cd 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala @@ -1,6 +1,5 @@ package edu.uci.ics.texera.service.resource -import edu.uci.ics.amber.core.storage.StorageConfig import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction import edu.uci.ics.texera.dao.jooq.generated.Tables.USER diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 0272951561..c7736d9766 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -28,8 +28,7 @@ import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ Dataset, DatasetUserAccess, - DatasetVersion, - User + DatasetVersion } import edu.uci.ics.texera.service.`type`.DatasetFileNode import edu.uci.ics.texera.service.auth.SessionUser @@ -47,7 +46,6 @@ import edu.uci.ics.texera.service.resource.DatasetResource.{ DatasetDescriptionModification, DatasetVersionRootFileNodesResponse, Diff, - calculateDatasetVersionSize, context, getDatasetByID, getDatasetVersionByID, @@ -57,7 +55,6 @@ import io.dropwizard.auth.Auth import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} -import org.apache.commons.lang3.StringUtils import org.glassfish.jersey.media.multipart.FormDataParam import org.jooq.{DSLContext, EnumType} @@ -66,13 +63,12 @@ import java.net.{URI, URLDecoder} import java.nio.charset.StandardCharsets import java.nio.file.Files import java.util.Optional -import java.util.concurrent.locks.ReentrantLock import java.util.zip.{ZipEntry, ZipOutputStream} import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters._ import scala.jdk.OptionConverters._ import scala.util.control.NonFatal -import scala.util.{Failure, Success, Try, Using} +import scala.util.Using object DatasetResource { private val context = SqlServer @@ -460,6 +456,38 @@ class DatasetResource { } } + @DELETE + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/file") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def deleteDatasetFile( + @PathParam("did") did: Integer, + @QueryParam("filePath") encodedFilePath: String, + @Auth user: SessionUser + ): Response = { + val uid = user.getUid + withTransaction(context) { ctx => + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + val datasetName = getDatasetByID(ctx, did).getName + + // Decode the file path + val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) + // Try to initialize the repository in LakeFS + try { + LakeFSFileStorage.deleteObject(datasetName, filePath) + } catch { + case e: Exception => + throw new WebApplicationException( + s"Failed to delete the file from repo in LakeFS: ${e.getMessage}" + ) + } + + Response.ok().build() + } + } + @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/{did}/multipart-upload") @@ -634,6 +662,37 @@ class DatasetResource { } } + @PUT + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/diff") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def resetDatasetFileDiff( + @PathParam("did") did: Integer, + @QueryParam("filePath") encodedFilePath: String, + @Auth user: SessionUser + ): Response = { + val uid = user.getUid + withTransaction(context) { ctx => + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + val datasetName = getDatasetByID(ctx, did).getName + + // Decode the file path + val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) + // Try to reset the file change in LakeFS + try { + LakeFSFileStorage.resertObjectUploadOrDeletion(datasetName, filePath) + } catch { + case e: Exception => + throw new WebApplicationException( + s"Failed to reset the changes from repo in LakeFS: ${e.getMessage}" + ) + } + Response.ok().build() + } + } + /** * This method returns a list of DashboardDatasets objects that are accessible by current user. * diff --git a/core/gui/src/app/common/type/datasetVersionFileTree.ts b/core/gui/src/app/common/type/datasetVersionFileTree.ts index f0cef32e12..39509d6eef 100644 --- a/core/gui/src/app/common/type/datasetVersionFileTree.ts +++ b/core/gui/src/app/common/type/datasetVersionFileTree.ts @@ -11,6 +11,22 @@ export function getFullPathFromDatasetFileNode(node: DatasetFileNode): string { return `${node.parentDir}/${node.name}`; } +/** + * Returns the relative path of a DatasetFileNode by stripping the first three segments. + * @param node The DatasetFileNode whose relative path is needed. + * @returns The relative path (without the first three segments and without a leading slash). + */ +export function getRelativePathFromDatasetFileNode(node: DatasetFileNode): string { + const fullPath = getFullPathFromDatasetFileNode(node); // Get the full path + const pathSegments = fullPath.split("/").filter(segment => segment.length > 0); // Split and remove empty segments + + if (pathSegments.length <= 3) { + return ""; // If there are 3 or fewer segments, return an empty string (no relative path exists) + } + + return pathSegments.slice(3).join("/"); // Join remaining segments as the relative path +} + export function getPathsUnderOrEqualDatasetFileNode(node: DatasetFileNode): string[] { // Helper function to recursively gather paths const gatherPaths = (node: DatasetFileNode): string[] => { diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index ccf4138174..fce774e870 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -174,66 +174,77 @@
Create a New Version
- -
-
Choose a Version:
-
- - - + + +
+
Choose a Version:
+
+ + + + +
+
+ + Version Size: {{ formatSize(currentDatasetVersionSize) }} +
+
+ + +
+
+ + + + + + + -
-
- - Version Size: {{ formatSize(currentDatasetVersionSize) }} -
-
- - > - - - - - - - - + +
diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss index 1c9eb23bdc..a098efafde 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss @@ -1,15 +1,17 @@ -.rounded-button { - display: inline-flex; /* Use flexbox for alignment */ +.create-dataset-version-button { + display: flex; /* Use flexbox for centering */ align-items: center; /* Center vertically */ justify-content: center; /* Center horizontally */ color: white; border: none; - padding: 10px 20px; /* Adjust padding as needed */ + padding: 12px 40px; /* Increase padding for a wider button */ border-radius: 25px; cursor: pointer; transition: background-color 0.3s; - margin-top: 50px; - margin-left: 20px; + margin: 50px auto 0 auto; /* Auto margins for horizontal centering */ + width: 200px; /* Adjust width as needed */ + font-size: 1.1em; /* Make text slightly bigger */ + font-weight: bold; /* Optional: Make text bold */ } .version-storage { @@ -140,7 +142,15 @@ nz-select { color: red; } -nz-tabset { - width: 95%; // Take up 90% of the container width - margin: 0 auto; // Center it horizontally +nz-collapse { + margin-bottom: 10px; +} + +::ng-deep .ant-collapse-header { + font-weight: bold !important; // Make the text bolder + font-size: 1.2em !important; // Increase font size (adjust as needed) + text-align: center !important; // Center the text + display: flex; + justify-content: center; + align-items: center; } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts index edb6b6deea..8004089fa9 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts @@ -1,9 +1,14 @@ -import { Component, OnInit } from "@angular/core"; +import { Component, EventEmitter, OnInit, Output } from "@angular/core"; import { ActivatedRoute, Router } from "@angular/router"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; import { DatasetService } from "../../../../service/user/dataset/dataset.service"; import { NzResizeEvent } from "ng-zorro-antd/resizable"; -import { DatasetFileNode, getFullPathFromDatasetFileNode } from "../../../../../common/type/datasetVersionFileTree"; +import { + DatasetFileNode, + getFullPathFromDatasetFileNode, + getPathsUnderOrEqualDatasetFileNode, + getRelativePathFromDatasetFileNode, +} from "../../../../../common/type/datasetVersionFileTree"; import { DatasetVersion } from "../../../../../common/type/dataset"; import { switchMap, throttleTime } from "rxjs/operators"; import { NotificationService } from "../../../../../common/service/notification/notification.service"; @@ -53,6 +58,8 @@ export class DatasetDetailComponent implements OnInit { public viewCount: number = 0; public displayPreciseViewCount = false; + @Output() userMakeChanges = new EventEmitter(); + constructor( private route: ActivatedRoute, private router: Router, @@ -298,12 +305,31 @@ export class DatasetDetailComponent implements OnInit { .multipartUpload(did, file.name, file.file) .pipe(untilDestroyed(this)) .subscribe(res => { - console.log("Multipart upload: ", res); + this.userMakeChanges.emit(); }); }); } } + onPreviouslyUploadedFileDeleted(node: DatasetFileNode) { + if (this.did) { + this.datasetService + .deleteDatasetFile(this.did, getRelativePathFromDatasetFileNode(node)) + .pipe(untilDestroyed(this)) + .subscribe({ + next: (res: Response) => { + this.notificationService.success( + `File ${node.name} is successfully deleted. You may finalize it or revert it at the "Create Version" panel` + ); + this.userMakeChanges.emit(); + }, + error: (err: unknown) => { + this.notificationService.error("Failed to delete the file"); + }, + }); + } + } + // alias for formatSize formatSize = formatSize; diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html index 22f02f63bf..c80841eefb 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html @@ -5,12 +5,34 @@ nzSize="small" *ngIf="datasetStagedObjects.length > 0"> - {{ obj.diffType }} - {{obj.path}} + + {{ obj.diffType }} + + + {{ obj.path }} + + + + + nzNotFoundContent="No pending changes">
diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss index 1d19d1c816..f404f3db1c 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss @@ -4,3 +4,35 @@ overflow-y: auto; /* Enables vertical scrolling when content exceeds max-height */ overflow-x: auto; /* Prevents horizontal scrolling */ } + +.truncate-file-path { + display: inline-block; + max-width: 150px; /* Adjust width as needed */ + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + direction: rtl; /* Makes ellipsis appear at the beginning */ + unicode-bidi: plaintext; /* Prevents text from flipping in RTL mode */ +} + +.truncate-file-path:hover::after { + content: attr(data-fullpath); /* Show full path as tooltip */ + position: absolute; + background: rgba(0, 0, 0, 0.8); + color: white; + padding: 5px; + border-radius: 4px; + white-space: nowrap; + z-index: 10; + transform: translateY(-25px); +} + +.delete-button { + width: 24px; /* Minimum width for button */ + height: 24px; /* Keep the button small */ + padding: 0; /* Remove extra padding */ + display: flex; + align-items: center; + justify-content: center; + min-width: unset; /* Prevents unnecessary expansion */ +} diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts index 30f71ea947..d7b53cbdc2 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.ts @@ -1,8 +1,7 @@ -import { Component, Input, OnInit } from "@angular/core"; +import { Component, EventEmitter, Input, OnInit } from "@angular/core"; import { DatasetStagedObject } from "../../../../../../common/type/dataset-staged-object"; import { DatasetService } from "../../../../../service/user/dataset/dataset.service"; -import { untilDestroyed } from "@ngneat/until-destroy"; -import { pipe } from "rxjs"; +import { NotificationService } from "../../../../../../common/service/notification/notification.service"; @Component({ selector: "texera-dataset-staged-objects-list", @@ -10,13 +9,28 @@ import { pipe } from "rxjs"; styleUrls: ["./user-dataset-staged-objects-list.component.scss"], }) export class UserDatasetStagedObjectsListComponent implements OnInit { - @Input() did?: number; // Dataset ID, required input from parent component + @Input() did?: number; // Dataset ID + @Input() set userMakeChangesEvent(event: EventEmitter) { + if (event) { + event.subscribe(() => { + console.log("Upload success event received, reloading staged objects..."); + this.fetchDatasetStagedObjects(); + }); + } + } + datasetStagedObjects: DatasetStagedObject[] = []; - constructor(private datasetService: DatasetService) {} + constructor( + private datasetService: DatasetService, + private notificationService: NotificationService + ) {} ngOnInit(): void { - console.log("did: ", this.did); + this.fetchDatasetStagedObjects(); + } + + private fetchDatasetStagedObjects(): void { if (this.did != undefined) { this.datasetService.getDatasetDiff(this.did).subscribe(diffs => { console.log("Received dataset diff:", diffs); @@ -24,4 +38,18 @@ export class UserDatasetStagedObjectsListComponent implements OnInit { }); } } + + onObjectReverted(objDiff: DatasetStagedObject) { + if (this.did) { + this.datasetService.resetDatasetFileDiff(this.did, objDiff.path).subscribe({ + next: (res: Response) => { + this.notificationService.success(`"${objDiff.diffType} ${objDiff.path}" is successfully reverted`); + this.fetchDatasetStagedObjects(); + }, + error: (err: unknown) => { + this.notificationService.error("Failed to delete the file"); + }, + }); + } + } } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.scss index fe9407f78a..a20082ce2c 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.scss @@ -10,7 +10,7 @@ /* Styles for the file tree container */ .file-tree-container { - max-height: 500px; /* Adjust the max-height as needed */ + max-height: 200px; /* Adjust the max-height as needed */ overflow-y: auto; /* Enables vertical scrolling when content exceeds max-height */ overflow-x: auto; /* Prevents horizontal scrolling */ } diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 52692e4e31..4c0c5032a4 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -209,6 +209,42 @@ export class DatasetService { ); } + /** + * Resets a dataset file difference in LakeFS. + * @param did Dataset ID + * @param filePath File path to reset + */ + public resetDatasetFileDiff(did: number, filePath: string): Observable { + const apiUrl = `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/diff`; + const params = new HttpParams().set("filePath", encodeURIComponent(filePath)); + + return this.http.put(apiUrl, {}, { params }).pipe( + tap(() => console.log(`Reset file diff for dataset ${did}, file: ${filePath}`)), + catchError(error => { + console.error(`Failed to reset file diff for ${filePath}:`, error); + return throwError(() => error); + }) + ); + } + + /** + * Deletes a dataset file from LakeFS. + * @param did Dataset ID + * @param filePath File path to delete + */ + public deleteDatasetFile(did: number, filePath: string): Observable { + const apiUrl = `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/file`; + const params = new HttpParams().set("filePath", encodeURIComponent(filePath)); + + return this.http.delete(apiUrl, { params }).pipe( + tap(() => console.log(`Deleted file from dataset ${did}, file: ${filePath}`)), + catchError(error => { + console.error(`Failed to delete file ${filePath}:`, error); + return throwError(() => error); + }) + ); + } + /** * Retrieves the list of uncommitted dataset changes (diffs). * @param did Dataset ID diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index fc1e2beefd..5895180d77 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -7,6 +7,7 @@ import java.io.{File, FileOutputStream, InputStream} import java.nio.file.Files import scala.jdk.CollectionConverters._ import edu.uci.ics.amber.core.storage.StorageConfig +import io.lakefs.clients.sdk.model.ResetCreation.TypeEnum /** * LakeFSFileStorage provides high-level file storage operations using LakeFS, @@ -254,4 +255,15 @@ object LakeFSFileStorage { commitsApi.commit(repoName, branch, commit).execute() } + def deleteObject(repoName: String, filePath: String): Unit = { + objectsApi.deleteObject(repoName, branchName, filePath).execute() + } + + def resertObjectUploadOrDeletion(repoName: String, filePath: String): Unit = { + val resetCreation: ResetCreation = new ResetCreation + resetCreation.setType(TypeEnum.OBJECT) + resetCreation.setPath(filePath) + + branchesApi.resetBranch(repoName, branchName, resetCreation).execute() + } } diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala index 6803e13e30..4b1ac12eee 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala @@ -1,20 +1,11 @@ package edu.uci.ics.amber.core.storage import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} -import software.amazon.awssdk.awscore.presigner.PresignedRequest -import software.amazon.awssdk.core.SdkSystemSetting -import software.amazon.awssdk.core.sync.RequestBody import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.{S3Client, S3Configuration} import software.amazon.awssdk.services.s3.model._ -import software.amazon.awssdk.services.s3.presigner.S3Presigner -import software.amazon.awssdk.services.s3.presigner.model._ -import java.io.{File, FileInputStream, InputStream} -import java.net.URL -import java.nio.file.{Files, Paths} import java.security.MessageDigest -import java.time.Duration import java.util.Base64 import scala.jdk.CollectionConverters._ diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index 7d7f41434b..cbef13131d 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -1,6 +1,5 @@ package edu.uci.ics.amber.core.storage.model -import edu.uci.ics.amber.core.storage.{S3Storage, StorageConfig} import edu.uci.ics.amber.core.storage.util.dataset.GitVersionControlLocalFileStorage import edu.uci.ics.amber.util.PathUtils From a71c2a3245f42be0837bf4c946d5751160818e8e Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Wed, 26 Feb 2025 16:06:18 -0800 Subject: [PATCH 18/47] delete the lakefs test as the test environment don't have it --- .../amber/storage/LakeFSFileStorageSpec.scala | 125 ------------------ 1 file changed, 125 deletions(-) delete mode 100644 core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala diff --git a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala deleted file mode 100644 index 89c84ed5cb..0000000000 --- a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/LakeFSFileStorageSpec.scala +++ /dev/null @@ -1,125 +0,0 @@ -package edu.uci.ics.amber.storage - -import edu.uci.ics.amber.core.storage.LakeFSFileStorage -import org.scalatest.BeforeAndAfterAll -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers - -import java.io.{ByteArrayInputStream, File} -import java.nio.file.Files -import java.util.UUID - -class LakeFSFileStorageSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll { - - val repoName: String = UUID.randomUUID().toString - val branchName = "main" - - val fileContent1 = "Content of file 1" - val fileContent2 = "Content of file 2" - val fileContent3 = "Content of file 3" - val fileContent4 = "Content of file 4" - - val filePaths: Seq[String] = Seq( - "dir1/file1.txt", - "dir1/file2.txt", - "dir1/subdir1/file3.txt", - "dir2/file4.txt" - ) - - override def beforeAll(): Unit = { - LakeFSFileStorage.initRepo(repoName, s"local://$repoName") - } - - override def afterAll(): Unit = { - LakeFSFileStorage.deleteRepo(repoName) - } - - private def writeFile(filePath: String, content: String): Unit = { - val inputStream = new ByteArrayInputStream(content.getBytes) - LakeFSFileStorage.writeFileToRepo(repoName, branchName, filePath, inputStream) - } - - private def readFileContent(file: File): String = { - new String(Files.readAllBytes(file.toPath)) - } - - private def findCommitByMessage(message: String): Option[String] = { - LakeFSFileStorage - .retrieveVersionsOfRepository(repoName, branchName) - .find(_.getMessage == message) - .map(_.getId) - } - - "LakeFSFileStorage" should "write multiple files and verify contents across versions" in { - // Version 1: Add file1.txt and file2.txt - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file1 and file2") { - writeFile(filePaths(0), fileContent1) - writeFile(filePaths(1), fileContent2) - } - - // Version 2: Add file3.txt - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file3") { - writeFile(filePaths(2), fileContent3) - } - - // Version 3: Add file4.txt - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Add file4") { - writeFile(filePaths(3), fileContent4) - } - - // Validate Version 1 - val commitV1 = findCommitByMessage("Add file1 and file2").get - val objectsV1 = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV1).map(_.getPath) - objectsV1 should contain allElementsOf Seq(filePaths(0), filePaths(1)) - objectsV1 should not contain filePaths(2) - - // Validate Version 2 - val commitV2 = findCommitByMessage("Add file3").get - val objectsV2 = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV2).map(_.getPath) - objectsV2 should contain allElementsOf Seq(filePaths(0), filePaths(1), filePaths(2)) - objectsV2 should not contain filePaths(3) - - // Validate Version 3 - val commitV3 = findCommitByMessage("Add file4").get - val objects = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV3) - val objectsV3 = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, commitV3).map(_.getPath) - objectsV3 should contain allElementsOf filePaths - - // Verify content of file4.txt in the latest commit - val file4 = LakeFSFileStorage.retrieveFileContent(repoName, commitV3, filePaths(3)) - readFileContent(file4) should equal(fileContent4) - } - - it should "remove a file and verify its absence in the next version" in { - // Delete file2.txt and commit the change - LakeFSFileStorage.withCreateVersion(repoName, branchName, "Remove file2.txt") { - LakeFSFileStorage.removeFileFromRepo(repoName, branchName, filePaths(1)) - } - - // Locate the commit by message - val deleteCommit = findCommitByMessage("Remove file2.txt").get - - // Verify file2.txt is absent in the latest commit - val objectsAfterDeletion = - LakeFSFileStorage.retrieveObjectsOfVersion(repoName, deleteCommit).map(_.getPath) - objectsAfterDeletion should not contain filePaths(1) - - // Verify file1.txt is still present - val file1 = LakeFSFileStorage.retrieveFileContent(repoName, deleteCommit, filePaths(0)) - readFileContent(file1) should equal(fileContent1) - } - - it should "maintain hierarchical structure in file retrieval" in { - // Get the latest commit - val latestCommit = - LakeFSFileStorage.retrieveVersionsOfRepository(repoName, branchName).head.getId - - // Retrieve all objects - val objects = LakeFSFileStorage.retrieveObjectsOfVersion(repoName, latestCommit) - val objectPaths = objects.map(_.getPath) - - // Verify nested directories are intact - objectPaths should contain("dir1/subdir1/file3.txt") - objectPaths should contain("dir2/file4.txt") - } -} From cc1718d7cfa353b8b385be3522963e40dfca5982 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Wed, 26 Feb 2025 17:02:08 -0800 Subject: [PATCH 19/47] keep improving the backend and frontend --- .../component/dashboard.component.html | 2 +- .../component/dashboard.component.ts | 3 ++ .../files-uploader.component.html | 19 --------- .../dataset-detail.component.html | 19 +++++---- .../dataset-detail.component.scss | 4 ++ .../dataset-detail.component.ts | 7 ++++ ...dataset-staged-objects-list.component.scss | 2 +- ...r-dataset-staged-objects-list.component.ts | 42 ++++++++++++------- ...ser-dataset-version-creator.component.html | 7 ---- ...ser-dataset-version-creator.component.scss | 1 + .../user-dataset-version-creator.component.ts | 9 ---- .../service/user/dataset/dataset.service.ts | 41 +++++++++++++----- .../src/environments/environment.default.ts | 2 +- .../core/storage/LakeFSFileStorage.scala | 8 +++- 14 files changed, 93 insertions(+), 73 deletions(-) diff --git a/core/gui/src/app/dashboard/component/dashboard.component.html b/core/gui/src/app/dashboard/component/dashboard.component.html index 6b4485eafd..f8b3704cf1 100644 --- a/core/gui/src/app/dashboard/component/dashboard.component.html +++ b/core/gui/src/app/dashboard/component/dashboard.component.html @@ -164,7 +164,7 @@ *ngIf="!isLogin && googleLogin" type="standard" size="large" - [width]="200"> + width="200"> diff --git a/core/gui/src/app/dashboard/component/dashboard.component.ts b/core/gui/src/app/dashboard/component/dashboard.component.ts index 2df0680c6b..1bc3fedb9a 100644 --- a/core/gui/src/app/dashboard/component/dashboard.component.ts +++ b/core/gui/src/app/dashboard/component/dashboard.component.ts @@ -20,6 +20,7 @@ import { } from "../../app-routing.constant"; import { environment } from "../../../environments/environment"; import { Version } from "../../../environments/version"; +import { types } from "sass"; @Component({ selector: "texera-dashboard", @@ -145,4 +146,6 @@ export class DashboardComponent implements OnInit { } protected readonly DASHBOARD_ABOUT = DASHBOARD_ABOUT; + protected readonly types = types; + protected readonly String = String; } diff --git a/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html b/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html index 1cba750525..ee8aa6bd97 100644 --- a/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html +++ b/core/gui/src/app/dashboard/component/user/files-uploader/files-uploader.component.html @@ -29,23 +29,4 @@ - - - - - - - - - - - - - - - - - - - diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index fce774e870..773caedf48 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -71,6 +71,7 @@

Choose a Version:

- + + [userMakeChangesEvent]="userMakeChanges" + (stagedObjectsChanged)="onStagedObjectsUpdated($event)"> + + + (); @@ -307,17 +308,37 @@ export class DatasetDetailComponent implements OnInit { onNewUploadFilesChanged(files: FileUploadItem[]) { if (this.did) { const did = this.did; - files.map(file => { + files.forEach(file => { this.datasetService .multipartUpload(did, file.name, file.file) .pipe(untilDestroyed(this)) - .subscribe(res => { - this.userMakeChanges.emit(); + .subscribe({ + next: res => { + this.uploadProgress = res; // Update the progress UI + }, + error: () => { + this.uploadProgress = { filePath: file.name, percentage: 100, status: "abort" }; + setTimeout(() => (this.uploadProgress = null), 3000); // Auto-hide after 3s + }, + complete: () => { + this.uploadProgress = { filePath: file.name, percentage: 100, status: "finish" }; + // Emit event to refresh dataset-staged-objects-list + this.userMakeChanges.emit(); + setTimeout(() => (this.uploadProgress = null), 3000); // Auto-hide after 3s + }, }); }); } } + clearUploadProgress() { + this.uploadProgress = null; + } + + getUploadStatus(status: string): "active" | "exception" | "success" { + return status === "uploading" ? "active" : status === "abort" ? "exception" : "success"; + } + onPreviouslyUploadedFileDeleted(node: DatasetFileNode) { if (this.did) { this.datasetService diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html index c80841eefb..1a6d785440 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html @@ -1,7 +1,6 @@
diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 4460dbc530..1c723b0923 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -3,7 +3,7 @@ import { HttpClient, HttpParams } from "@angular/common/http"; import { catchError, map, switchMap, tap } from "rxjs/operators"; import { Dataset, DatasetVersion } from "../../../../common/type/dataset"; import { AppSettings } from "../../../../common/app-setting"; -import { forkJoin, from, Observable, throwError } from "rxjs"; +import { EMPTY, forkJoin, from, Observable, of, throwError } from "rxjs"; import { DashboardDataset } from "../../../type/dashboard-dataset.interface"; import { FileUploadItem } from "../../../type/dashboard-file.interface"; import { DatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; @@ -115,72 +115,94 @@ export class DatasetService { * @param filePath Path of the file within the dataset * @param file File object to be uploaded */ - public multipartUpload(did: number, filePath: string, file: File): Observable { + public multipartUpload( + did: number, + filePath: string, + file: File + ): Observable<{ filePath: string; percentage: number; status: "uploading" | "finished" | "aborted" }> { const partCount = Math.ceil(file.size / MULTIPART_UPLOAD_PART_SIZE_MB); - return this.initiateMultipartUpload(did, filePath, partCount).pipe( - switchMap(initiateResponse => { - const uploadId = initiateResponse.uploadId; - if (!uploadId) { - return throwError(() => new Error("Failed to initiate multipart upload")); - } - - console.log(`Started multipart upload for ${filePath} with UploadId: ${uploadId}`); - - // Array to store part numbers and ETags - const uploadedParts: { PartNumber: number; ETag: string }[] = []; - - const uploadObservables = initiateResponse.presignedUrls.map((url, index) => { - const start = index * MULTIPART_UPLOAD_PART_SIZE_MB; - const end = Math.min(start + MULTIPART_UPLOAD_PART_SIZE_MB, file.size); - const chunk = file.slice(start, end); - - return from(fetch(url, { method: "PUT", body: chunk })).pipe( - switchMap(response => { - if (!response.ok) { - return throwError(() => new Error(`Failed to upload part ${index + 1}`)); - } - const etag = response.headers.get("ETag")?.replace(/"/g, ""); // Extract and clean ETag - if (!etag) { - return throwError(() => new Error(`Missing ETag for part ${index + 1}`)); - } - - uploadedParts.push({ PartNumber: index + 1, ETag: etag }); - console.log(`Uploaded part ${index + 1} of ${partCount}, ETag: ${etag}`); - return from(Promise.resolve()); - }) - ); - }); - - return forkJoin(uploadObservables).pipe( - switchMap(() => - this.finalizeMultipartUpload( - did, - filePath, - uploadId, - uploadedParts, - initiateResponse.physicalAddress, - false - ) - ), - tap(() => console.log(`Multipart upload for ${filePath} completed successfully!`)), - catchError((error: unknown) => { - console.error(`Multipart upload failed for ${filePath}`, error); - return this.finalizeMultipartUpload( - did, - filePath, - uploadId, - uploadedParts, - initiateResponse.physicalAddress, - true - ).pipe( - tap(() => console.error(`Upload aborted for ${filePath}`)), - switchMap(() => throwError(() => error)) + return new Observable(observer => { + this.initiateMultipartUpload(did, filePath, partCount) + .pipe( + switchMap(initiateResponse => { + const uploadId = initiateResponse.uploadId; + if (!uploadId) { + observer.error(new Error("Failed to initiate multipart upload")); + return EMPTY; + } + + const uploadedParts: { PartNumber: number; ETag: string }[] = []; + let uploadedCount = 0; // Track uploaded parts + + const uploadObservables = initiateResponse.presignedUrls.map((url, index) => { + const start = index * MULTIPART_UPLOAD_PART_SIZE_MB; + const end = Math.min(start + MULTIPART_UPLOAD_PART_SIZE_MB, file.size); + const chunk = file.slice(start, end); + + return from(fetch(url, { method: "PUT", body: chunk })).pipe( + switchMap(response => { + if (!response.ok) { + return throwError(() => new Error(`Failed to upload part ${index + 1}`)); + } + const etag = response.headers.get("ETag")?.replace(/"/g, ""); + if (!etag) { + return throwError(() => new Error(`Missing ETag for part ${index + 1}`)); + } + + uploadedParts.push({ PartNumber: index + 1, ETag: etag }); + uploadedCount++; + + // Emit upload progress + observer.next({ + filePath, + percentage: Math.round((uploadedCount / partCount) * 100), + status: "uploading", + }); + + return of(null); + }) + ); + }); + + return forkJoin(uploadObservables).pipe( + switchMap(() => + this.finalizeMultipartUpload( + did, + filePath, + uploadId, + uploadedParts, + initiateResponse.physicalAddress, + false + ) + ), + tap(() => { + observer.next({ filePath, percentage: 100, status: "finished" }); + observer.complete(); + }), + catchError((error: unknown) => { + observer.next({ + filePath, + percentage: Math.round((uploadedCount / partCount) * 100), + status: "aborted", + }); + + return this.finalizeMultipartUpload( + did, + filePath, + uploadId, + uploadedParts, + initiateResponse.physicalAddress, + true + ).pipe(switchMap(() => throwError(() => error))); + }) ); }) - ); - }) - ); + ) + .subscribe({ + error: (err: unknown) => observer.error(err), + }); + }); } /** From 893df7177f3e456e427c3401645b51281c58d07c Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Fri, 28 Feb 2025 15:04:59 -0800 Subject: [PATCH 23/47] clean up the frontend --- .../dataset-detail.component.html | 27 ++---- .../dataset-detail.component.ts | 86 +++++++------------ ...ser-dataset-version-creator.component.html | 8 +- .../user-dataset-version-creator.component.ts | 61 +++++-------- .../user-dataset/user-dataset.component.ts | 33 ++++++- 5 files changed, 92 insertions(+), 123 deletions(-) diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index ebe925d808..7c653d87df 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -1,4 +1,4 @@ -
+

Dataset: {{datasetName}}

Dataset: {{datasetName}}

- +

@@ -130,11 +130,11 @@

nzTheme="outline">

-
-
-
Create a New Dataset
-
Create a New Version
-
-
- - -
-
-
+
{ - const param = params["did"]; - if (param !== "create") { - this.did = param; - this.renderDatasetViewSider(); - this.retrieveDatasetInfo(); - this.retrieveDatasetVersionList(); - } else { - this.renderDatasetCreatorSider(); - } + this.did = params["did"]; + this.retrieveDatasetInfo(); + this.retrieveDatasetVersionList(); return this.route.data; // or some other observable }), untilDestroyed(this) @@ -144,49 +140,34 @@ export class DatasetDetailComponent implements OnInit { }); } - renderDatasetViewSider() { - this.isCreatingVersion = false; - this.isCreatingDataset = false; - } - renderDatasetCreatorSider() { - this.isCreatingVersion = false; - this.isCreatingDataset = true; - this.siderWidth = this.MAX_SIDER_WIDTH; - } - - renderVersionCreatorSider() { + public onClickOpenVersionCreator() { if (this.did) { - this.isCreatingDataset = false; - this.isCreatingVersion = true; - this.siderWidth = this.MAX_SIDER_WIDTH; - } - } - - public onCreationFinished(creationID: number) { - if (creationID != 0) { - // creation succeed - if (this.isCreatingVersion) { - this.retrieveDatasetVersionList(); - this.renderDatasetViewSider(); - } else { - this.router.navigate([`${DASHBOARD_USER_DATASET}/${creationID}`]); - } - } else { - // creation failed - if (this.isCreatingVersion) { - this.isCreatingVersion = false; - this.isCreatingDataset = false; - this.retrieveDatasetVersionList(); - } else { - this.router.navigate([DASHBOARD_USER_DATASET]); - } + const modal = this.modalService.create({ + nzTitle: "Create New Dataset Version", + nzContent: UserDatasetVersionCreatorComponent, + nzFooter: null, + nzData: { + isCreatingVersion: true, + did: this.did, + }, + nzBodyStyle: { + resize: "both", + overflow: "auto", + minHeight: "200px", + minWidth: "550px", + maxWidth: "90vw", + maxHeight: "80vh", + }, + nzWidth: "fit-content", + }); + modal.afterClose.pipe(untilDestroyed(this)).subscribe(result => { + if (result != null) { + this.retrieveDatasetVersionList(); + } + }); } } - public onClickOpenVersionCreator() { - this.renderVersionCreatorSider(); - } - onPublicStatusChange(checked: boolean): void { // Handle the change in dataset public status if (this.did) { @@ -297,10 +278,6 @@ export class DatasetDetailComponent implements OnInit { this.loadFileContent(node); } - isDisplayingDataset(): boolean { - return !this.isCreatingDataset && !this.isCreatingVersion; - } - userHasWriteAccess(): boolean { return this.userDatasetAccessLevel == "WRITE"; } @@ -322,7 +299,6 @@ export class DatasetDetailComponent implements OnInit { }, complete: () => { this.uploadProgress = { filePath: file.name, percentage: 100, status: "finish" }; - // Emit event to refresh dataset-staged-objects-list this.userMakeChanges.emit(); setTimeout(() => (this.uploadProgress = null), 3000); // Auto-hide after 3s }, diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html index b7e6d4fb4c..4ed8ec4879 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html @@ -1,11 +1,11 @@
+ [ngClass]="{'disabled-backdrop': isCreating}"> Create @@ -32,7 +32,7 @@ nzType="default" (click)="onClickCancel()" class="cancel-btn" - [disabled]="isUploading"> + [disabled]="isCreating"> Cancel diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts index d7f1e7bed2..8f1225adc7 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts @@ -1,4 +1,4 @@ -import { Component, EventEmitter, Input, OnInit, Output } from "@angular/core"; +import { Component, EventEmitter, inject, Input, OnInit, Output } from "@angular/core"; import { FormBuilder, FormGroup, Validators } from "@angular/forms"; import { FormlyFieldConfig } from "@ngx-formly/core"; import { DatasetService } from "../../../../../service/user/dataset/dataset.service"; @@ -8,6 +8,7 @@ import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; import { NotificationService } from "../../../../../../common/service/notification/notification.service"; import sanitize from "sanitize-filename"; import { HttpErrorResponse } from "@angular/common/http"; +import { NZ_MODAL_DATA, NzModalRef } from "ng-zorro-antd/modal"; @UntilDestroy() @Component({ @@ -16,22 +17,12 @@ import { HttpErrorResponse } from "@angular/common/http"; styleUrls: ["./user-dataset-version-creator.component.scss"], }) export class UserDatasetVersionCreatorComponent implements OnInit { - @Input() - isCreatingVersion: boolean = false; + readonly isCreatingVersion: boolean = inject(NZ_MODAL_DATA).isCreatingVersion; - @Input() - did: number | undefined; - - // this emits the ID of the newly created version/dataset, will emit 0 if creation is failed. - @Output() - datasetOrVersionCreationID: EventEmitter = new EventEmitter(); + readonly did: number = inject(NZ_MODAL_DATA)?.did ?? undefined; isCreateButtonDisabled: boolean = false; - newUploadFiles: FileUploadItem[] = []; - - removedFilePaths: string[] = []; - public form: FormGroup = new FormGroup({}); model: any = {}; fields: FormlyFieldConfig[] = []; @@ -41,9 +32,10 @@ export class UserDatasetVersionCreatorComponent implements OnInit { isDatasetNameSanitized: boolean = false; // boolean to control if is uploading - isUploading: boolean = false; + isCreating: boolean = false; constructor( + private modalRef: NzModalRef, private datasetService: DatasetService, private notificationService: NotificationService, private formBuilder: FormBuilder @@ -108,7 +100,7 @@ export class UserDatasetVersionCreatorComponent implements OnInit { } onClickCancel() { - this.datasetOrVersionCreationID.emit(0); + this.modalRef.close(null); } onClickCreate() { @@ -119,14 +111,7 @@ export class UserDatasetVersionCreatorComponent implements OnInit { return; // Stop further execution if the form is not valid } - // if (this.newUploadFiles.length == 0 && this.removedFilePaths.length == 0) { - // this.notificationService.error( - // `Please either upload new file(s) or remove old file(s) when creating a new ${this.isCreatingVersion ? "Version" : "Dataset"}` - // ); - // return; - // } - - this.isUploading = true; + this.isCreating = true; if (this.isCreatingVersion && this.did) { const versionName = this.form.get("versionDescription")?.value; this.datasetService @@ -135,13 +120,16 @@ export class UserDatasetVersionCreatorComponent implements OnInit { .subscribe({ next: res => { this.notificationService.success("Version Created"); - this.datasetOrVersionCreationID.emit(res.dvid); - this.isUploading = false; + this.isCreating = false; + // creation succeed, emit created version + this.modalRef.close(res); }, error: (res: unknown) => { const err = res as HttpErrorResponse; this.notificationService.error(`Version creation failed: ${err.error.message}`); - this.isUploading = false; + this.isCreating = false; + // creation failed, emit null value + this.modalRef.close(null); }, }); } else { @@ -155,10 +143,6 @@ export class UserDatasetVersionCreatorComponent implements OnInit { creationTime: undefined, versionHierarchy: undefined, }; - const initialVersionName = this.form.get("versionDescription")?.value; - - // do the name sanitization - this.datasetService .createDataset(ds) .pipe(untilDestroyed(this)) @@ -167,13 +151,16 @@ export class UserDatasetVersionCreatorComponent implements OnInit { this.notificationService.success( `Dataset '${ds.name}' Created. ${this.isDatasetNameSanitized ? "We have sanitized your provided dataset name for the compatibility reason" : ""}` ); - this.datasetOrVersionCreationID.emit(res.dataset.did); - this.isUploading = false; + this.isCreating = false; + // if creation succeed, emit the created dashboard dataset + this.modalRef.close(res); }, error: (res: unknown) => { const err = res as HttpErrorResponse; this.notificationService.error(`Dataset ${ds.name} creation failed: ${err.error.message}`); - this.isUploading = false; + this.isCreating = false; + // if creation failed, emit null value + this.modalRef.close(null); }, }); } @@ -183,12 +170,4 @@ export class UserDatasetVersionCreatorComponent implements OnInit { // Handle the change in dataset public status this.isDatasetPublic = newValue; } - - onNewUploadFilesChanged(files: FileUploadItem[]) { - this.newUploadFiles = files; - } - - onRemovingFilePathsChanged(paths: string[]) { - this.removedFilePaths = this.removedFilePaths.concat(paths); - } } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts index e1bbb7a67b..9250126ee6 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts @@ -9,7 +9,12 @@ import { DashboardEntry, UserInfo } from "../../../type/dashboard-entry"; import { SearchResultsComponent } from "../search-results/search-results.component"; import { FiltersComponent } from "../filters/filters.component"; import { firstValueFrom } from "rxjs"; -import { DASHBOARD_USER_DATASET_CREATE } from "../../../../app-routing.constant"; +import { DASHBOARD_USER_DATASET, DASHBOARD_USER_DATASET_CREATE } from "../../../../app-routing.constant"; +import { NzModalService } from "ng-zorro-antd/modal"; +import { FileSelectionComponent } from "../../../../workspace/component/file-selection/file-selection.component"; +import { DatasetFileNode, getFullPathFromDatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; +import { UserDatasetVersionCreatorComponent } from "./user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component"; +import { DashboardDataset } from "../../../type/dashboard-dataset.interface"; @UntilDestroy() @Component({ @@ -50,6 +55,7 @@ export class UserDatasetComponent implements AfterViewInit { private masterFilterList: ReadonlyArray | null = null; constructor( + private modalService: NzModalService, private userService: UserService, private router: Router, private searchService: SearchService, @@ -152,7 +158,30 @@ export class UserDatasetComponent implements AfterViewInit { } public onClickOpenDatasetAddComponent(): void { - this.router.navigate([DASHBOARD_USER_DATASET_CREATE]); + const modal = this.modalService.create({ + nzTitle: "Create New Dataset", + nzContent: UserDatasetVersionCreatorComponent, + nzFooter: null, + nzData: { + isCreatingVersion: false, + }, + nzBodyStyle: { + resize: "both", + overflow: "auto", + minHeight: "200px", + minWidth: "550px", + maxWidth: "90vw", + maxHeight: "80vh", + }, + nzWidth: "fit-content", + }); + // Handle the selection from the modal + modal.afterClose.pipe(untilDestroyed(this)).subscribe(result => { + if (result != null) { + const dashboardDataset: DashboardDataset = result as DashboardDataset; + this.router.navigate([`${DASHBOARD_USER_DATASET}/${dashboardDataset.dataset.did}`]); + } + }); } public deleteDataset(entry: DashboardEntry): void { From 559b9541d8e92fdbd4d5eaa8c0ebd095232f6b7c Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sat, 1 Mar 2025 15:30:42 -0800 Subject: [PATCH 24/47] finish the export --- .../user/dataset/DatasetResource.scala | 25 +--- .../web/service/ResultExportService.scala | 139 +++++++++++------- .../service/resource/DatasetResource.scala | 45 +++++- .../dataset-detail.component.ts | 2 +- .../service/user/dataset/dataset.service.ts | 30 ++-- .../core/storage/LakeFSFileStorage.scala | 20 ++- .../storage/model/DatasetFileDocument.scala | 18 ++- 7 files changed, 172 insertions(+), 107 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala index 27bbedc224..485c98d962 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala @@ -22,8 +22,7 @@ import edu.uci.ics.texera.dao.jooq.generated.tables.daos.{ import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.{ Dataset, DatasetUserAccess, - DatasetVersion, - User + DatasetVersion } import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetAccessResource._ import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.{context, _} @@ -200,28 +199,6 @@ object DatasetResource { DatasetOperation(filesToAdd.toMap, filesToRemove.toList) } - /** - * Create a new dataset version by adding new files - * @param did the target dataset id - * @param user the user submitting the request - * @param filesToAdd the map containing the files to add - * @return the created dataset version - */ - def createNewDatasetVersionByAddingFiles( - did: Integer, - user: User, - filesToAdd: Map[java.nio.file.Path, InputStream] - ): Option[DashboardDatasetVersion] = { - applyDatasetOperationToCreateNewVersion( - context, - did, - user.getUid, - user.getEmail, - "", - DatasetOperation(filesToAdd, List()) - ) - } - // apply the dataset operation to create a new dataset version // it returns the created dataset version if creation succeed, else return None // concurrency control is performed here: the thread has to have the lock in order to create the new version diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index f0a16ef37f..e60f07b564 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -10,13 +10,13 @@ import edu.uci.ics.amber.util.{ArrowUtils, PathUtils} import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse -import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{ WorkflowExecutionsResource, WorkflowVersionResource } import edu.uci.ics.texera.web.service.WorkflowExecutionService.getLatestExecutionId + import java.io.{FilterOutputStream, IOException, OutputStream, PipedInputStream, PipedOutputStream} import java.nio.channels.Channels import java.nio.charset.StandardCharsets @@ -33,7 +33,14 @@ import org.apache.arrow.vector.ipc.ArrowFileWriter import org.apache.commons.lang3.StringUtils import javax.ws.rs.WebApplicationException import javax.ws.rs.core.StreamingOutput +import java.io.OutputStream +import java.nio.channels.Channels +import scala.util.Using +import edu.uci.ics.amber.core.workflow.PortIdentity +import edu.uci.ics.texera.web.auth.JwtAuth +import edu.uci.ics.texera.web.auth.JwtAuth.{TOKEN_EXPIRE_TIME_IN_DAYS, dayToMin, jwtClaims} +import java.net.{HttpURLConnection, URL, URLEncoder} /** * A simple wrapper that ignores 'close()' calls on the underlying stream. * This allows each operator's writer to call close() without ending the entire ZipOutputStream. @@ -52,6 +59,14 @@ object ResultExportService { // Matches the remote's approach for a thread pool final private val pool: ThreadPoolExecutor = Executors.newFixedThreadPool(3).asInstanceOf[ThreadPoolExecutor] + + lazy val fileServiceUploadOneFileToDatasetEndpoint: String = + sys.env + .getOrElse( + "FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT", + "http://localhost:9092/api/dataset/did/upload" + ) + .trim } class ResultExportService(workflowIdentity: WorkflowIdentity) { @@ -156,23 +171,22 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { results: Iterable[Tuple], headers: List[String] ): (Option[String], Option[String]) = { + val fileName = generateFileName(request, operatorId, "csv") try { - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - pool.submit(new Runnable { - override def run(): Unit = { - val writer = CSVWriter.open(pipedOutputStream) + saveToDatasets( + request, + user, + outputStream => { + val writer = CSVWriter.open(outputStream) writer.writeRow(headers) results.foreach { tuple => writer.writeRow(tuple.getFields.toIndexedSeq) } writer.close() - } - }) - - val fileName = generateFileName(request, operatorId, "csv") - saveToDatasets(request, user, pipedInputStream, fileName) + }, + fileName + ) (Some(s"CSV export done for operator $operatorId -> file: $fileName"), None) } catch { case ex: Exception => @@ -202,23 +216,16 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { val field = selectedRow.getField(columnIndex) val dataBytes: Array[Byte] = convertFieldToBytes(field) - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - - pool.submit(new Runnable { - override def run(): Unit = { - pipedOutputStream.write(dataBytes) - pipedOutputStream.close() - } - }) - saveToDatasets(request, user, pipedInputStream, fileName) - (Some(s"Data export done for operator $operatorId -> file: $fileName"), None) - } catch { - case ex: Exception => - (None, Some(s"Data export failed for operator $operatorId: ${ex.getMessage}")) - } - } + saveToDatasets( + request, + user, + outputStream => { + outputStream.write(dataBytes) + outputStream.close() + }, + fileName + ) private def convertFieldToBytes(field: Any): Array[Byte] = { field match { @@ -242,24 +249,24 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } try { - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - val allocator = new RootAllocator() - - pool.submit(() => { - Using.Manager { use => - val (writer, root) = createArrowWriter(results, allocator, pipedOutputStream) - use(writer) - use(root) - use(allocator) - use(pipedOutputStream) - - writeArrowData(writer, root, results) - } - }) - val fileName = generateFileName(request, operatorId, "arrow") - saveToDatasets(request, user, pipedInputStream, fileName) + + saveToDatasets( + request, + user, + outputStream => { + val allocator = new RootAllocator() + Using.Manager { use => + val (writer, root) = createArrowWriter(results, allocator, outputStream) + use(writer) + use(root) + use(allocator) + + writeArrowData(writer, root, results) + } + }, + fileName + ) (Some(s"Arrow file export done for operator $operatorId -> file: $fileName"), None) } catch { @@ -333,17 +340,47 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { private def saveToDatasets( request: ResultExportRequest, user: User, - pipedInputStream: PipedInputStream, + fileWriter: OutputStream => Unit, // Pass function that writes data fileName: String ): Unit = { request.datasetIds.foreach { did => - val datasetPath = PathUtils.getDatasetPath(did) - val filePath = datasetPath.resolve(fileName) - createNewDatasetVersionByAddingFiles( - did, - user, - Map(filePath -> pipedInputStream) + val encodedFilePath = URLEncoder.encode(fileName, StandardCharsets.UTF_8.name()) + val message = URLEncoder.encode( + s"Export from workflow ${request.workflowName}", + StandardCharsets.UTF_8.name() ) + + val uploadUrl = s"$fileServiceUploadOneFileToDatasetEndpoint" + .replace("did", did.toString) + s"?filePath=$encodedFilePath&message=$message" + + var connection: HttpURLConnection = null + try { + val url = new URL(uploadUrl) + connection = url.openConnection().asInstanceOf[HttpURLConnection] + connection.setDoOutput(true) + connection.setRequestMethod("POST") + connection.setRequestProperty("Content-Type", "application/octet-stream") + connection.setRequestProperty( + "Authorization", + s"Bearer ${JwtAuth.jwtToken(jwtClaims(user, dayToMin(TOKEN_EXPIRE_TIME_IN_DAYS)))}" + ) + + // Get output stream from connection + val outputStream = connection.getOutputStream + fileWriter(outputStream) // Write directly to HTTP request output stream + outputStream.close() + + // Check response + val responseCode = connection.getResponseCode + if (responseCode != HttpURLConnection.HTTP_OK) { + throw new RuntimeException(s"Failed to upload file. Server responded with: $responseCode") + } + } catch { + case e: Exception => + throw new RuntimeException(s"Error uploading file to dataset $did: ${e.getMessage}", e) + } finally { + if (connection != null) connection.disconnect() + } } } diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 5eae9b0a70..3f7e9480e7 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -422,9 +422,39 @@ class DatasetResource { } } + @POST + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/{did}/upload") + @Consumes(Array(MediaType.APPLICATION_OCTET_STREAM)) + def uploadOneFileToDataset( + @PathParam("did") did: Integer, + @QueryParam("filePath") encodedFilePath: String, + @QueryParam("message") message: String, + fileStream: InputStream, + @Auth user: SessionUser + ): Response = { + val uid = user.getUid + + withTransaction(context) { ctx => + // Verify the user has write access + if (!userHasWriteAccess(ctx, did, uid)) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) + } + + // Retrieve dataset name + val dataset = getDatasetByID(ctx, did) + val datasetName = dataset.getName + // Decode file path + val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) + // TODO: in the future consider using multipart to upload this stream more faster + LakeFSFileStorage.writeFileToRepo(datasetName, filePath, fileStream) + Response.ok(Map("message" -> "File uploaded successfully")).build() + } + } + @GET @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/presign") + @Path("/presign-download") def getPresignedUrl( @QueryParam("filePath") encodedUrl: String, @QueryParam("datasetName") datasetName: String, @@ -519,12 +549,12 @@ class DatasetResource { @POST @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/{did}/multipart-upload") + @Path("/multipart-upload") @Consumes(Array(MediaType.APPLICATION_JSON)) def multipartUpload( - @PathParam("did") did: Integer, + @QueryParam("datasetName") datasetName: String, @QueryParam("type") operationType: String, - @QueryParam("key") encodedUrl: String, + @QueryParam("filePath") encodedUrl: String, @QueryParam("uploadId") uploadId: Optional[String], @QueryParam("numParts") numParts: Optional[Integer], payload: Map[ @@ -536,10 +566,11 @@ class DatasetResource { val uid = user.getUid withTransaction(context) { ctx => - if (!userHasWriteAccess(ctx, did, uid)) { + val datasetDao = new DatasetDao(ctx.configuration()) + val datasets = datasetDao.fetchByName(datasetName).asScala.toList + if (datasets.isEmpty || !userHasWriteAccess(ctx, datasets.head.getDid, uid)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - val datasetName = getDatasetByID(ctx, did).getName // Decode the file path val filePath = URLDecoder.decode(encodedUrl, StandardCharsets.UTF_8.name()) @@ -711,7 +742,7 @@ class DatasetResource { val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) // Try to reset the file change in LakeFS try { - LakeFSFileStorage.resertObjectUploadOrDeletion(datasetName, filePath) + LakeFSFileStorage.resetObjectUploadOrDeletion(datasetName, filePath) } catch { case e: Exception => throw new WebApplicationException( diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts index 880024a46e..108eb5982d 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts @@ -287,7 +287,7 @@ export class DatasetDetailComponent implements OnInit { const did = this.did; files.forEach(file => { this.datasetService - .multipartUpload(did, file.name, file.file) + .multipartUpload(this.datasetName, file.name, file.file) .pipe(untilDestroyed(this)) .subscribe({ next: res => { diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 1c723b0923..1f14465f5c 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -61,7 +61,9 @@ export class DatasetService { return this.http .get<{ presignedUrl: string; - }>(`${AppSettings.getApiEndpoint()}/dataset/presign?filePath=${encodeURIComponent(filePath)}`) + }>( + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/presign-download?filePath=${encodeURIComponent(filePath)}` + ) .pipe( switchMap(({ presignedUrl }) => { return this.http.get(presignedUrl, { responseType: "blob" }); @@ -111,19 +113,19 @@ export class DatasetService { /** * Handles multipart upload for large files using RxJS. - * @param did Dataset ID + * @param datasetName Dataset Name * @param filePath Path of the file within the dataset * @param file File object to be uploaded */ public multipartUpload( - did: number, + datasetName: string, filePath: string, file: File ): Observable<{ filePath: string; percentage: number; status: "uploading" | "finished" | "aborted" }> { const partCount = Math.ceil(file.size / MULTIPART_UPLOAD_PART_SIZE_MB); return new Observable(observer => { - this.initiateMultipartUpload(did, filePath, partCount) + this.initiateMultipartUpload(datasetName, filePath, partCount) .pipe( switchMap(initiateResponse => { const uploadId = initiateResponse.uploadId; @@ -168,7 +170,7 @@ export class DatasetService { return forkJoin(uploadObservables).pipe( switchMap(() => this.finalizeMultipartUpload( - did, + datasetName, filePath, uploadId, uploadedParts, @@ -188,7 +190,7 @@ export class DatasetService { }); return this.finalizeMultipartUpload( - did, + datasetName, filePath, uploadId, uploadedParts, @@ -207,22 +209,23 @@ export class DatasetService { /** * Initiates a multipart upload and retrieves presigned URLs for each part. - * @param did Dataset ID + * @param datasetName Dataset Name * @param filePath File path within the dataset * @param numParts Number of parts for the multipart upload */ private initiateMultipartUpload( - did: number, + datasetName: string, filePath: string, numParts: number ): Observable<{ uploadId: string; presignedUrls: string[]; physicalAddress: string }> { const params = new HttpParams() .set("type", "init") - .set("key", encodeURIComponent(filePath)) + .set("datasetName", datasetName) + .set("filePath", encodeURIComponent(filePath)) .set("numParts", numParts.toString()); return this.http.post<{ uploadId: string; presignedUrls: string[]; physicalAddress: string }>( - `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/multipart-upload`, + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/multipart-upload`, {}, { params } ); @@ -232,7 +235,7 @@ export class DatasetService { * Completes or aborts a multipart upload, sending part numbers and ETags to the backend. */ private finalizeMultipartUpload( - did: number, + datasetName: string, filePath: string, uploadId: string, parts: { PartNumber: number; ETag: string }[], @@ -241,11 +244,12 @@ export class DatasetService { ): Observable { const params = new HttpParams() .set("type", isAbort ? "abort" : "finish") - .set("key", encodeURIComponent(filePath)) + .set("datasetName", datasetName) + .set("filePath", encodeURIComponent(filePath)) .set("uploadId", uploadId); return this.http.post( - `${AppSettings.getApiEndpoint()}/dataset/${did}/multipart-upload`, + `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/multipart-upload`, { parts, physicalAddress }, { params } ); diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index f46a225c6a..07da708723 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -36,6 +36,7 @@ object LakeFSFileStorage { private lazy val branchesApi: BranchesApi = new BranchesApi(apiClient) private lazy val commitsApi: CommitsApi = new CommitsApi(apiClient) private lazy val refsApi: RefsApi = new RefsApi(apiClient) + private lazy val stagingApi: StagingApi = new StagingApi(apiClient) private lazy val experimentalApi: ExperimentalApi = new ExperimentalApi(apiClient) private val storageNamespaceURI: String = @@ -84,13 +85,12 @@ object LakeFSFileStorage { */ def writeFileToRepo( repoName: String, - branch: String, filePath: String, inputStream: InputStream ): ObjectStats = { val tempFilePath = Files.createTempFile("lakefs-upload-", ".tmp") val tempFileStream = new FileOutputStream(tempFilePath.toFile) - val buffer = new Array[Byte](1024) + val buffer = new Array[Byte](8192) // Create an iterator to repeatedly call inputStream.read, and direct buffered data to file Iterator @@ -102,7 +102,7 @@ object LakeFSFileStorage { tempFileStream.close() // Upload the temporary file to LakeFS - objectsApi.uploadObject(repoName, branch, filePath).content(tempFilePath.toFile).execute() + objectsApi.uploadObject(repoName, branchName, filePath).content(tempFilePath.toFile).execute() } /** @@ -124,14 +124,14 @@ object LakeFSFileStorage { * @param commitMessage Commit message. * @param operations File operations to perform before committing. */ - def withCreateVersion(repoName: String, branch: String, commitMessage: String)( + def withCreateVersion(repoName: String, commitMessage: String)( operations: => Unit ): Commit = { operations val commit = new CommitCreation() .message(commitMessage) - commitsApi.commit(repoName, branch, commit).execute() + commitsApi.commit(repoName, branchName, commit).execute() } /** @@ -156,6 +156,14 @@ object LakeFSFileStorage { objectsApi.statObject(repoName, commitHash, filePath).presign(true).execute().getPhysicalAddress } + def getFilePresignedUploadUrl(repoName: String, filePath: String): String = { + stagingApi + .getPhysicalAddress(repoName, branchName, filePath) + .presign(true) + .execute() + .getPresignedUrl + } + /** */ def initiatePresignedMultipartUploads( @@ -265,7 +273,7 @@ object LakeFSFileStorage { objectsApi.deleteObject(repoName, branchName, filePath).execute() } - def resertObjectUploadOrDeletion(repoName: String, filePath: String): Unit = { + def resetObjectUploadOrDeletion(repoName: String, filePath: String): Unit = { val resetCreation: ResetCreation = new ResetCreation resetCreation.setType(TypeEnum.OBJECT) resetCreation.setPath(filePath) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index ac2fa487e3..73308246dd 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -1,7 +1,10 @@ package edu.uci.ics.amber.core.storage.model import edu.uci.ics.amber.core.storage.LakeFSFileStorage -import edu.uci.ics.amber.core.storage.model.DatasetFileDocument.{fileServiceEndpoint, userJwtToken} +import edu.uci.ics.amber.core.storage.model.DatasetFileDocument.{ + fileServiceGetPresignURLEndpoint, + userJwtToken +} import edu.uci.ics.amber.core.storage.util.dataset.GitVersionControlLocalFileStorage import edu.uci.ics.amber.util.PathUtils @@ -14,8 +17,13 @@ import scala.jdk.CollectionConverters.IteratorHasAsScala object DatasetFileDocument { lazy val userJwtToken: String = sys.env.getOrElse("USER_JWT_TOKEN", "").trim - lazy val fileServiceEndpoint: String = - sys.env.getOrElse("FILE_SERVICE_ENDPOINT", "http://localhost:9092/api/dataset/presign").trim + lazy val fileServiceGetPresignURLEndpoint: String = + sys.env + .getOrElse( + "FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", + "http://localhost:9092/api/dataset/presign-download" + ) + .trim } private[storage] class DatasetFileDocument(uri: URI) @@ -56,7 +64,7 @@ private[storage] class DatasetFileDocument(uri: URI) // Step 1: Get the presigned URL from the file service val presignRequestUrl = - s"$fileServiceEndpoint?datasetName=${getDatasetName()}&commitHash=${getVersionHash()}&filePath=${URLEncoder + s"$fileServiceGetPresignURLEndpoint?datasetName=${getDatasetName()}&commitHash=${getVersionHash()}&filePath=${URLEncoder .encode(getFileRelativePath(), StandardCharsets.UTF_8.name())}" val connection = new URL(presignRequestUrl).openConnection().asInstanceOf[HttpURLConnection] @@ -84,7 +92,7 @@ private[storage] class DatasetFileDocument(uri: URI) } catch { case e: Exception => throw new RuntimeException( - s"Failed to retrieve presigned URL from $fileServiceEndpoint: ${e.getMessage}", + s"Failed to retrieve presigned URL from $fileServiceGetPresignURLEndpoint: ${e.getMessage}", e ) } finally { From e05007049e259faf281255daa8d525fb0760f37f Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sat, 1 Mar 2025 22:12:37 -0800 Subject: [PATCH 25/47] finalize the sharing feature --- .../ics/texera/web/TexeraWebApplication.scala | 16 +- .../resource/DatasetAccessResource.scala | 2 +- .../service/resource/DatasetResource.scala | 148 +----------------- core/gui/src/app/common/type/dataset.ts | 2 +- .../user/list-item/list-item.component.html | 11 +- .../share-access/share-access.component.ts | 2 +- .../dataset-detail.component.html | 29 ++-- .../dataset-detail.component.scss | 13 -- .../dataset-detail.component.ts | 12 +- .../user-dataset-file-renderer.component.ts | 1 + ...ser-dataset-version-creator.component.html | 2 +- .../user-dataset-version-creator.component.ts | 2 +- 12 files changed, 38 insertions(+), 202 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/TexeraWebApplication.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/TexeraWebApplication.scala index be2e9691ee..7c237ad7f9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/TexeraWebApplication.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/TexeraWebApplication.scala @@ -1,6 +1,5 @@ package edu.uci.ics.texera.web -import com.fasterxml.jackson.databind.module.SimpleModule import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.github.dirkraft.dropwizard.fileassets.FileAssetsBundle import com.typesafe.scalalogging.LazyLogging @@ -17,14 +16,7 @@ import edu.uci.ics.texera.web.resource.dashboard.DashboardResource import edu.uci.ics.texera.web.resource.dashboard.admin.execution.AdminExecutionResource import edu.uci.ics.texera.web.resource.dashboard.admin.user.AdminUserResource import edu.uci.ics.texera.web.resource.dashboard.hub.HubResource -import edu.uci.ics.texera.web.resource.dashboard.user.dataset.`type`.{ - DatasetFileNode, - DatasetFileNodeSerializer -} -import edu.uci.ics.texera.web.resource.dashboard.user.dataset.{ - DatasetAccessResource, - DatasetResource -} +import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetAccessResource import edu.uci.ics.texera.web.resource.dashboard.user.project.{ ProjectAccessResource, ProjectResource, @@ -89,11 +81,6 @@ class TexeraWebApplication bootstrap.addBundle(new WebsocketBundle(classOf[CollaborationResource])) // register scala module to dropwizard default object mapper bootstrap.getObjectMapper.registerModule(DefaultScalaModule) - - // register a new custom module and add the custom serializer into it - val customSerializerModule = new SimpleModule("CustomSerializers") - customSerializerModule.addSerializer(classOf[DatasetFileNode], new DatasetFileNodeSerializer()) - bootstrap.getObjectMapper.registerModule(customSerializerModule) } override def run(configuration: TexeraWebConfiguration, environment: Environment): Unit = { @@ -146,7 +133,6 @@ class TexeraWebApplication environment.jersey.register(classOf[ResultResource]) environment.jersey.register(classOf[HubResource]) environment.jersey.register(classOf[WorkflowVersionResource]) - environment.jersey.register(classOf[DatasetResource]) environment.jersey.register(classOf[DatasetAccessResource]) environment.jersey.register(classOf[ProjectResource]) environment.jersey.register(classOf[ProjectAccessResource]) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala index 877a1c05cd..3242a26026 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetAccessResource.scala @@ -24,7 +24,7 @@ object DatasetAccessResource { val datasetDao = new DatasetDao(ctx.configuration()) Option(datasetDao.fetchOneByDid(did)) .flatMap(dataset => Option(dataset.getIsPublic)) - .contains(1.toByte) + .contains(true) } def userHasReadAccess(ctx: DSLContext, did: Integer, uid: Integer): Boolean = { diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 3f7e9480e7..64b5c81598 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -8,11 +8,6 @@ import edu.uci.ics.amber.core.storage.{ S3Storage, StorageConfig } -import edu.uci.ics.amber.core.storage.util.dataset.{ - GitVersionControlLocalFileStorage, - PhysicalFileNode -} -import edu.uci.ics.amber.util.PathUtils import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction import edu.uci.ics.texera.dao.jooq.generated.enums.PrivilegeEnum @@ -58,57 +53,19 @@ import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam import org.jooq.{DSLContext, EnumType} -import java.io.{IOException, InputStream, OutputStream} -import java.net.{URI, URLDecoder} +import java.io.{InputStream, OutputStream} +import java.net.URLDecoder import java.nio.charset.StandardCharsets -import java.nio.file.Files import java.util.Optional -import java.util.zip.{ZipEntry, ZipOutputStream} import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters._ import scala.jdk.OptionConverters._ -import scala.util.control.NonFatal -import scala.util.Using object DatasetResource { private val context = SqlServer .getInstance() .createDSLContext() - /** - * Fetch the size of a certain dataset version. - * - * @param name The target dataset's name (LakeFS repository name). - * @param versionHash The hash of the version. If None, fetch the latest version. - * @return The total size of all objects in the dataset version. - * @throws NoSuchElementException If the version hash is not found in the repository. - */ - def calculateDatasetVersionSize(name: String, versionHash: Option[String] = None): Long = { - // Retrieve all commits (versions) of the dataset repository - val commits = LakeFSFileStorage.retrieveVersionsOfRepository(name) - - // Determine the target commit - val targetCommit = versionHash match { - case Some(hash) => - commits - .find(_.getId == hash) - .getOrElse( - throw new NoSuchElementException( - s"Version hash '$hash' not found in repository '$name'" - ) - ) - case None => - commits.headOption // The latest commit (commits are sorted from latest to earliest) - .getOrElse(throw new NoSuchElementException(s"No versions found for dataset '$name'")) - } - - // Retrieve objects of the target version and sum up their sizes - val objects = LakeFSFileStorage.retrieveObjectsOfVersion(name, targetCommit.getId) - - // Sum the sizes of all objects in the dataset version - objects.map(_.getSizeBytes.longValue()).sum - } - /** * Helper function to get the dataset from DB using did */ @@ -152,12 +109,6 @@ object DatasetResource { .toScala } - // DatasetOperation defines the operations that will be applied when creating a new dataset version - private case class DatasetOperation( - filesToAdd: Map[java.nio.file.Path, InputStream], - filesToRemove: List[URI] - ) - case class DashboardDataset( dataset: Dataset, ownerEmail: String, @@ -176,7 +127,7 @@ object DatasetResource { sizeBytes: Option[Long] // Size of the changed file (None for directories) ) - case class DatasetDescriptionModification(name: String, description: String) + case class DatasetDescriptionModification(did: Integer, description: String) case class DatasetVersionRootFileNodesResponse( fileNodes: List[DatasetFileNode], @@ -408,16 +359,14 @@ class DatasetResource { ): Response = { withTransaction(context) { ctx => val uid = sessionUser.getUid - val datasetDao = new DatasetDao(ctx.configuration()) - val datasets = datasetDao.fetchByName(modificator.name).asScala.toList - if (datasets.isEmpty || !userHasWriteAccess(ctx, datasets.head.getDid, uid)) { + val dataset = getDatasetByID(ctx, modificator.did) + if (!userHasWriteAccess(ctx, modificator.did, uid)) { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - val datasetToChange = datasets.head - datasetToChange.setDescription(modificator.description) - datasetDao.update(datasetToChange) + dataset.setDescription(modificator.description) + datasetDao.update(dataset) Response.ok().build() } } @@ -990,89 +939,6 @@ class DatasetResource { }) } - /** - * Retrieves a ZIP file for a specific dataset version or the latest version. - * - * @param did The dataset ID (used when getLatest is true). - * @param dvid The dataset version ID, if given, retrieve this version; if not given, retrieve the latest version - * @param user The session user. - * @return A Response containing the dataset version as a ZIP file. - */ - @GET - @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/version-zip") - def retrieveDatasetVersionZip( - @QueryParam("did") did: Integer, - @QueryParam("dvid") dvid: Optional[Integer], - @Auth user: SessionUser - ): Response = { - if (!userHasReadAccess(context, did, user.getUid)) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) - } - val dataset = getDatasetByID(context, did) - val version = if (dvid.isEmpty) { - getLatestDatasetVersion(context, did).getOrElse( - throw new NotFoundException(ERR_DATASET_VERSION_NOT_FOUND_MESSAGE) - ) - } else { - getDatasetVersionByID(context, Integer.valueOf(dvid.get)) - } - val targetDatasetPath = PathUtils.getDatasetPath(dataset.getDid) - val fileNodes = GitVersionControlLocalFileStorage.retrieveRootFileNodesOfVersion( - targetDatasetPath, - version.getVersionHash - ) - - val streamingOutput = new StreamingOutput { - override def write(outputStream: OutputStream): Unit = { - Using(new ZipOutputStream(outputStream)) { zipOutputStream => - def addFileNodeToZip(fileNode: PhysicalFileNode): Unit = { - val relativePath = fileNode.getRelativePath.toString - - if (fileNode.isDirectory) { - // For directories, add a ZIP entry with a trailing slash - zipOutputStream.putNextEntry(new ZipEntry(relativePath + "/")) - zipOutputStream.closeEntry() - - // Recursively add children - fileNode.getChildren.asScala.foreach(addFileNodeToZip) - } else { - // For files, add the file content - try { - zipOutputStream.putNextEntry(new ZipEntry(relativePath)) - Using(Files.newInputStream(fileNode.getAbsolutePath)) { inputStream => - inputStream.transferTo(zipOutputStream) - } - } catch { - case e: IOException => - throw new WebApplicationException(s"Error processing file: $relativePath", e) - } finally { - zipOutputStream.closeEntry() - } - } - } - - // Start the recursive process for each root file node - fileNodes.asScala.foreach(addFileNodeToZip) - }.recover { - case e: IOException => - throw new WebApplicationException("Error creating ZIP output stream", e) - case NonFatal(e) => - throw new WebApplicationException("Unexpected error while creating ZIP", e) - } - } - } - - Response - .ok(streamingOutput) - .header( - "Content-Disposition", - s"attachment; filename=${dataset.getName}-${version.getName}.zip" - ) - .`type`("application/zip") - .build() - } - @GET @Path("/datasetUserAccess") def datasetUserAccess( diff --git a/core/gui/src/app/common/type/dataset.ts b/core/gui/src/app/common/type/dataset.ts index be53032944..195f76d671 100644 --- a/core/gui/src/app/common/type/dataset.ts +++ b/core/gui/src/app/common/type/dataset.ts @@ -14,7 +14,7 @@ export interface Dataset { did: number | undefined; ownerUid: number | undefined; name: string; - isPublic: number; + isPublic: boolean; storagePath: string | undefined; description: string; creationTime: number | undefined; diff --git a/core/gui/src/app/dashboard/component/user/list-item/list-item.component.html b/core/gui/src/app/dashboard/component/user/list-item/list-item.component.html index 0a86c22b30..89b6f898c4 100644 --- a/core/gui/src/app/dashboard/component/user/list-item/list-item.component.html +++ b/core/gui/src/app/dashboard/component/user/list-item/list-item.component.html @@ -116,11 +116,12 @@ nz-col nzFlex="75px" class="resource-info"> -
- Size: {{ formatSize(entry.size) }} -
+ + + + + +
{ - this.isPublic = dashboardDataset.dataset.isPublic === 1; + this.isPublic = dashboardDataset.dataset.isPublic; }); } } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html index 7c653d87df..85397678ca 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.html @@ -179,18 +179,19 @@
Choose a Version:
[nzValue]="version" [nzLabel]="version.name"> - + + + + + + + + + + + + +
Choose a Version: - + diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss index 2102d746d8..babf3831f8 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss @@ -142,10 +142,6 @@ nz-select { color: red; } -nz-collapse { - margin-bottom: 10px; -} - .empty-version-indicator { margin-top: 15%; } @@ -153,12 +149,3 @@ nz-collapse { .upload-progress-container { margin-left: 20px; } - -//::ng-deep .ant-collapse-header { -// font-weight: bold !important; // Make the text bolder -// font-size: 1.2em !important; // Increase font size (adjust as needed) -// text-align: center !important; // Center the text -// display: flex; -// justify-content: center; -// align-items: center; -//} diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts index 108eb5982d..befc32975f 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts @@ -163,6 +163,7 @@ export class DatasetDetailComponent implements OnInit { modal.afterClose.pipe(untilDestroyed(this)).subscribe(result => { if (result != null) { this.retrieveDatasetVersionList(); + this.userMakeChanges.emit(); } }); } @@ -200,7 +201,7 @@ export class DatasetDetailComponent implements OnInit { this.datasetName = dataset.name; this.datasetDescription = dataset.description; this.userDatasetAccessLevel = dashboardDataset.accessPrivilege; - this.datasetIsPublic = dataset.isPublic === 1; + this.datasetIsPublic = dataset.isPublic; if (typeof dataset.creationTime === "number") { this.datasetCreationTime = new Date(dataset.creationTime).toString(); } @@ -236,15 +237,6 @@ export class DatasetDetailComponent implements OnInit { this.downloadService.downloadSingleFile(this.currentDisplayedFileName).pipe(untilDestroyed(this)).subscribe(); }; - onClickDownloadVersionAsZip = (): void => { - if (!this.did || !this.selectedVersion?.dvid) return; - - this.downloadService - .downloadDatasetVersion(this.did, this.selectedVersion.dvid, this.datasetName, this.selectedVersion.name) - .pipe(untilDestroyed(this)) - .subscribe(); - }; - onClickScaleTheView() { this.isMaximized = !this.isMaximized; } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts index 659289a88d..7f649b821b 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts @@ -9,6 +9,7 @@ import { NotificationService } from "../../../../../../common/service/notificati export const MIME_TYPES = { JPEG: "image/jpeg", + JPG: "image/jpeg", PNG: "image/png", CSV: "text/csv", TXT: "text/plain", diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html index 4ed8ec4879..477d1ba61c 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.html @@ -2,7 +2,7 @@ + nzTip="Creating...">
diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts index 8f1225adc7..f7a49244f5 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts @@ -136,7 +136,7 @@ export class UserDatasetVersionCreatorComponent implements OnInit { const ds: Dataset = { name: this.datasetNameSanitization(this.form.get("name")?.value), description: this.form.get("description")?.value, - isPublic: this.isDatasetPublic ? 1 : 0, + isPublic: this.isDatasetPublic, did: undefined, ownerUid: undefined, storagePath: undefined, From a2f39e4dabeeb09809e789c9bee296b3b0c73bff Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 2 Mar 2025 14:39:14 -0800 Subject: [PATCH 26/47] fix the delete --- .../src/main/resources/docker-compose.yml | 28 ++++++------ .../uci/ics/texera/service/FileService.scala | 9 +++- .../service/resource/DatasetResource.scala | 18 ++++---- .../user-dataset/user-dataset.component.ts | 2 +- .../service/user/dataset/dataset.service.ts | 33 ++++---------- .../src/main/resources/storage-config.yaml | 8 ++-- .../core/storage/LakeFSFileStorage.scala | 10 +++++ .../ics/amber/core/storage/S3Storage.scala | 43 ++++++++++++++++++- .../amber/core/storage/StorageConfig.scala | 10 ----- 9 files changed, 94 insertions(+), 67 deletions(-) diff --git a/core/file-service/src/main/resources/docker-compose.yml b/core/file-service/src/main/resources/docker-compose.yml index 4c69ba52c7..ff85a7b9ca 100644 --- a/core/file-service/src/main/resources/docker-compose.yml +++ b/core/file-service/src/main/resources/docker-compose.yml @@ -1,40 +1,40 @@ -version: '3.8' - services: minio: image: minio/minio:latest container_name: minio ports: - - "9500:9000" # MinIO API (use this in LakeFS config) - - "9501:9001" # MinIO Console UI + - "9000:9000" # MinIO API (use this in LakeFS config) + - "9001:9001" # MinIO Console UI environment: - MINIO_ROOT_USER=texera_minio - MINIO_ROOT_PASSWORD=password - volumes: - - /Users/baijiadong/Desktop/chenlab/texera/core/file-service/src/main/user-resources/minio:/data command: server --console-address ":9001" /data + profiles: + - local-lakefs lakefs: image: treeverse/lakefs:latest container_name: lakefs depends_on: - - minio # Ensure MinIO starts first + - minio ports: - "8000:8000" # LakeFS API/UI environment: - # PostgreSQL Connection (External DB) - - LAKEFS_DATABASE_TYPE=postgres - - LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgresql://texera_lakefs_admin:password@host.docker.internal:5432/texera_lakefs - + # LakeFS metadata store + - LAKEFS_DATABASE_TYPE=local # Authentication - LAKEFS_AUTH_ENCRYPT_SECRET_KEY=random_string_for_lakefs - # MinIO Storage Configuration for LakeFS - LAKEFS_BLOCKSTORE_TYPE=s3 - LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 # MinIO internal service URL - LAKEFS_BLOCKSTORE_S3_DISCOVER_BUCKET_REGION=false - - LAKEFS_BLOCKSTORE_S3_PRE_SIGNED_ENDPOINT=http://localhost:9500 + - LAKEFS_BLOCKSTORE_S3_PRE_SIGNED_ENDPOINT=http://localhost:9000 - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=texera_minio - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=password - command: run \ No newline at end of file + # LakeFS Installation Credentials + - LAKEFS_INSTALLATION_USER_NAME=texera-admin + - LAKEFS_INSTALLATION_ACCESS_KEY_ID=AKIAIOSFOLKFSSAMPLES + - LAKEFS_INSTALLATION_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + profiles: + - local-lakefs \ No newline at end of file diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index edd1d6d1e5..09af165333 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -4,7 +4,7 @@ import com.fasterxml.jackson.databind.module.SimpleModule import io.dropwizard.core.Application import io.dropwizard.core.setup.{Bootstrap, Environment} import com.fasterxml.jackson.module.scala.DefaultScalaModule -import edu.uci.ics.amber.core.storage.StorageConfig +import edu.uci.ics.amber.core.storage.{LakeFSFileStorage, S3Storage, StorageConfig} import edu.uci.ics.amber.util.PathUtils.fileServicePath import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.service.`type`.DatasetFileNode @@ -19,7 +19,7 @@ class FileService extends Application[FileServiceConfiguration] { // Register Scala module to Dropwizard default object mapper bootstrap.getObjectMapper.registerModule(DefaultScalaModule) - // register a new custom module and add the custom serializer into it + // register a new custom module just for DatasetFileNode serde/deserde val customSerializerModule = new SimpleModule("CustomSerializers") customSerializerModule.addSerializer(classOf[DatasetFileNode], new DatasetFileNodeSerializer()) bootstrap.getObjectMapper.registerModule(customSerializerModule) @@ -34,6 +34,11 @@ class FileService extends Application[FileServiceConfiguration] { StorageConfig.jdbcPassword ) + // check if the texera dataset bucket exists, if not create it + S3Storage.createBucketIfNotExist(StorageConfig.lakefsBlockStorageBucketName) + // check if we can connect to the lakeFS service + LakeFSFileStorage.healthCheck() + environment.jersey.register(classOf[SessionHandler]) environment.servlets.setSessionHandler(new SessionHandler) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 64b5c81598..ef8f8a81a4 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -316,20 +316,20 @@ class DatasetResource { } } - @POST + @DELETE @RolesAllowed(Array("REGULAR", "ADMIN")) - @Path("/delete") - def deleteDataset(datasetName: String, @Auth user: SessionUser): Response = { + @Path("/{did}") + def deleteDataset(@PathParam("did") did: Integer, @Auth user: SessionUser): Response = { val uid = user.getUid withTransaction(context) { ctx => val datasetDao = new DatasetDao(ctx.configuration()) - val dataset = datasetDao.fetchByName(datasetName).asScala.toList - if (dataset.isEmpty || !userOwnDataset(ctx, dataset.head.getDid, uid)) { + val dataset = getDatasetByID(ctx, did) + if (!userOwnDataset(ctx, dataset.getDid, uid)) { // throw the exception that user has no access to certain dataset throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } try { - LakeFSFileStorage.deleteRepo(datasetName) + LakeFSFileStorage.deleteRepo(dataset.getName) } catch { case e: Exception => throw new WebApplicationException( @@ -339,10 +339,12 @@ class DatasetResource { } // delete the directory on S3 - S3Storage.deleteDirectory(StorageConfig.lakefsBlockStorageBucketName, datasetName) + if (S3Storage.directoryExists(StorageConfig.lakefsBlockStorageBucketName, dataset.getName)) { + S3Storage.deleteDirectory(StorageConfig.lakefsBlockStorageBucketName, dataset.getName) + } // delete the dataset from the DB - datasetDao.deleteById(dataset.head.getDid) + datasetDao.deleteById(dataset.getDid) Response.ok().build() } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts index 9250126ee6..ca1b69da63 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset.component.ts @@ -189,7 +189,7 @@ export class UserDatasetComponent implements AfterViewInit { return; } this.datasetService - .deleteDatasets([entry.dataset.dataset.did]) + .deleteDatasets(entry.dataset.dataset.did) .pipe(untilDestroyed(this)) .subscribe(_ => { this.searchResultsComponent.entries = this.searchResultsComponent.entries.filter( diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 1f14465f5c..3a09aa5fbb 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -9,6 +9,7 @@ import { FileUploadItem } from "../../../type/dashboard-file.interface"; import { DatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; import { DatasetStagedObject } from "../../../../common/type/dataset-staged-object"; import { S3Client } from "@aws-sdk/client-s3"; +import { environment } from "../../../../../environments/environment"; export const DATASET_BASE_URL = "dataset"; export const DATASET_CREATE_URL = DATASET_BASE_URL + "/create"; @@ -28,8 +29,6 @@ export const DATASET_PUBLIC_VERSION_BASE_URL = "publicVersion"; export const DATASET_PUBLIC_VERSION_RETRIEVE_LIST_URL = DATASET_PUBLIC_VERSION_BASE_URL + "/list"; export const DATASET_GET_OWNERS_URL = DATASET_BASE_URL + "/datasetUserAccess"; -const MULTIPART_UPLOAD_PART_SIZE_MB = 50 * 1024 * 1024; // 50MB per part - @Injectable({ providedIn: "root", }) @@ -122,7 +121,7 @@ export class DatasetService { filePath: string, file: File ): Observable<{ filePath: string; percentage: number; status: "uploading" | "finished" | "aborted" }> { - const partCount = Math.ceil(file.size / MULTIPART_UPLOAD_PART_SIZE_MB); + const partCount = Math.ceil(file.size / environment.multipartUploadChunkSizeByte); return new Observable(observer => { this.initiateMultipartUpload(datasetName, filePath, partCount) @@ -138,8 +137,8 @@ export class DatasetService { let uploadedCount = 0; // Track uploaded parts const uploadObservables = initiateResponse.presignedUrls.map((url, index) => { - const start = index * MULTIPART_UPLOAD_PART_SIZE_MB; - const end = Math.min(start + MULTIPART_UPLOAD_PART_SIZE_MB, file.size); + const start = index * environment.multipartUploadChunkSizeByte; + const end = Math.min(start + environment.multipartUploadChunkSizeByte, file.size); const chunk = file.slice(start, end); return from(fetch(url, { method: "PUT", body: chunk })).pipe( @@ -261,16 +260,9 @@ export class DatasetService { * @param filePath File path to reset */ public resetDatasetFileDiff(did: number, filePath: string): Observable { - const apiUrl = `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/diff`; const params = new HttpParams().set("filePath", encodeURIComponent(filePath)); - return this.http.put(apiUrl, {}, { params }).pipe( - tap(() => console.log(`Reset file diff for dataset ${did}, file: ${filePath}`)), - catchError((error: unknown) => { - console.error(`Failed to reset file diff for ${filePath}:`, error); - return throwError(() => error); - }) - ); + return this.http.put(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/diff`, {}, { params }); } /** @@ -279,16 +271,9 @@ export class DatasetService { * @param filePath File path to delete */ public deleteDatasetFile(did: number, filePath: string): Observable { - const apiUrl = `${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/file`; const params = new HttpParams().set("filePath", encodeURIComponent(filePath)); - return this.http.delete(apiUrl, { params }).pipe( - tap(() => console.log(`Deleted file from dataset ${did}, file: ${filePath}`)), - catchError((error: unknown) => { - console.error(`Failed to delete file ${filePath}:`, error); - return throwError(() => error); - }) - ); + return this.http.delete(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}/file`, { params }); } /** @@ -346,10 +331,8 @@ export class DatasetService { return this.http.get<{ fileNodes: DatasetFileNode[]; size: number }>(apiUrl); } - public deleteDatasets(dids: number[]): Observable { - return this.http.post(`${AppSettings.getApiEndpoint()}/${DATASET_DELETE_URL}`, { - dids: dids, - }); + public deleteDatasets(did: number): Observable { + return this.http.delete(`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${did}`); } public updateDatasetName(did: number, name: string): Observable { diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index f41780864b..3678025298 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -31,16 +31,14 @@ storage: endpoint: "http://127.0.0.1:8000/api/v1" auth: api-secret: "random_string_for_lakefs" - username: "AKIAJIWZ57BWHNDAGMPQ" - password: "Y5e/aFeE+ZM1AahSCCEvH+GXkFZq4Y3qihExq2fw" + username: "AKIAIOSFOLKFSSAMPLES" + password: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" block-storage: type: "s3" bucket-name: "texera-dataset" s3: - presigned-url-upload-expiration-minutes: 15 - presigned-url-download-expiration-minutes: 15 - endpoint: "http://localhost:9500" + endpoint: "http://localhost:9000" auth: username: "texera_minio" password: "password" diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala index 07da708723..47f9c2e4d1 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala @@ -38,12 +38,22 @@ object LakeFSFileStorage { private lazy val refsApi: RefsApi = new RefsApi(apiClient) private lazy val stagingApi: StagingApi = new StagingApi(apiClient) private lazy val experimentalApi: ExperimentalApi = new ExperimentalApi(apiClient) + private lazy val healthCheckApi: HealthCheckApi = new HealthCheckApi(apiClient) private val storageNamespaceURI: String = s"${StorageConfig.lakefsBlockStorageType}://${StorageConfig.lakefsBlockStorageBucketName}" private val branchName: String = "main" + def healthCheck(): Unit = { + try { + this.healthCheckApi.healthCheck().execute() + } catch { + case e: Exception => + throw new RuntimeException(s"Failed to connect to lake fs server: ${e.getMessage}") + } + } + /** * Initializes a new repository in LakeFS. * diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala index 4b1ac12eee..58248b37a1 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala @@ -29,6 +29,47 @@ object S3Storage { .build() } + /** + * Checks if a directory (prefix) exists within an S3 bucket. + * + * @param bucketName The bucket name. + * @param directoryPrefix The directory (prefix) to check (must end with `/`). + * @return True if the directory contains at least one object, False otherwise. + */ + def directoryExists(bucketName: String, directoryPrefix: String): Boolean = { + // Ensure the prefix ends with `/` to correctly match directories + val normalizedPrefix = + if (directoryPrefix.endsWith("/")) directoryPrefix else directoryPrefix + "/" + + val listRequest = ListObjectsV2Request + .builder() + .bucket(bucketName) + .prefix(normalizedPrefix) + .maxKeys(1) // Only check if at least one object exists + .build() + + val listResponse = s3Client.listObjectsV2(listRequest) + !listResponse.contents().isEmpty // If contents exist, directory exists + } + + /** + * Creates an S3 bucket if it does not already exist. + * + * @param bucketName The name of the bucket to create. + */ + def createBucketIfNotExist(bucketName: String): Unit = { + try { + // Check if the bucket already exists + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()) + } catch { + case _: NoSuchBucketException | _: S3Exception => + // If the bucket does not exist, create it + val createBucketRequest = CreateBucketRequest.builder().bucket(bucketName).build() + s3Client.createBucket(createBucketRequest) + println(s"Bucket '$bucketName' created successfully.") + } + } + /** * Deletes a directory (all objects under a given prefix) from a bucket. * @@ -65,8 +106,6 @@ object S3Storage { .getInstance("MD5") .digest(deleteRequest.toString.getBytes("UTF-8")) - val contentMD5 = Base64.getEncoder.encodeToString(md5Hash) - // Convert object keys to S3 DeleteObjectsRequest format val deleteObjectsRequest = DeleteObjectsRequest .builder() diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala index 99b8a7b739..184272519a 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala @@ -218,16 +218,6 @@ object StorageConfig { .asInstanceOf[Map[String, Any]]("endpoint") .asInstanceOf[String] - val s3PresignedUrlUploadExpirationMinutes: Int = conf("storage") - .asInstanceOf[Map[String, Any]]("s3") - .asInstanceOf[Map[String, Any]]("presigned-url-upload-expiration-minutes") - .asInstanceOf[Int] - - val s3PresignedUrlDownloadExpirationMinutes: Int = conf("storage") - .asInstanceOf[Map[String, Any]]("s3") - .asInstanceOf[Map[String, Any]]("presigned-url-download-expiration-minutes") - .asInstanceOf[Int] - val s3Username: String = conf("storage") .asInstanceOf[Map[String, Any]]("s3") .asInstanceOf[Map[String, Any]]("auth") From 9daac7dfd9362bddab33a6008232f44bd9069864 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 2 Mar 2025 23:17:05 -0800 Subject: [PATCH 27/47] recover the frontend change --- core/gui/src/environments/environment.default.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/gui/src/environments/environment.default.ts b/core/gui/src/environments/environment.default.ts index 7842d4d28d..52560cdcfe 100644 --- a/core/gui/src/environments/environment.default.ts +++ b/core/gui/src/environments/environment.default.ts @@ -76,7 +76,7 @@ export const defaultEnvironment = { /** * the file size limit for dataset upload */ - singleFileUploadMaximumSizeMB: 20480, + singleFileUploadMaximumSizeMB: 20, /** * default data transfer batch size for workflows From 049e9a969a97a3de8ff6b8ec51d956d74cc96260 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 2 Mar 2025 23:18:47 -0800 Subject: [PATCH 28/47] fix test --- .../scala/edu/uci/ics/amber/storage/FileResolverSpec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/FileResolverSpec.scala b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/FileResolverSpec.scala index 54c833fa6a..8442a995f3 100644 --- a/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/FileResolverSpec.scala +++ b/core/workflow-core/src/test/scala/edu/uci/ics/amber/storage/FileResolverSpec.scala @@ -91,10 +91,10 @@ class FileResolverSpec val dataset1TxtUri = FileResolver.resolve(dataset1TxtFilePath) assert( - datasetACsvUri.toString == f"${FileResolver.DATASET_FILE_URI_SCHEME}:///${testDataset.getDid}/${testDatasetVersion2.getVersionHash}/directory/a.csv" + datasetACsvUri.toString == f"${FileResolver.DATASET_FILE_URI_SCHEME}:///${testDataset.getName}/${testDatasetVersion2.getVersionHash}/directory/a.csv" ) assert( - dataset1TxtUri.toString == f"${FileResolver.DATASET_FILE_URI_SCHEME}:///${testDataset.getDid}/${testDatasetVersion1.getVersionHash}/1.txt" + dataset1TxtUri.toString == f"${FileResolver.DATASET_FILE_URI_SCHEME}:///${testDataset.getName}/${testDatasetVersion1.getVersionHash}/1.txt" ) } From 3df767adb3cd140dc5d24886911f8c7815d8c9ab Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Mon, 3 Mar 2025 09:48:12 -0800 Subject: [PATCH 29/47] fix backend dependency and fix frontend --- core/file-service/build.sbt | 4 +- .../uci/ics/texera/service/FileService.scala | 8 ++-- .../service/resource/DatasetResource.scala | 48 +++++++++---------- .../service/util/S3StorageClient.scala} | 6 +-- ...dataset-staged-objects-list.component.scss | 16 +------ core/workflow-core/build.sbt | 5 +- .../storage/model/DatasetFileDocument.scala | 4 +- .../LakeFSStorageClient.scala} | 4 +- 8 files changed, 40 insertions(+), 55 deletions(-) rename core/{workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala => file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala} (97%) rename core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/{LakeFSFileStorage.scala => util/LakeFSStorageClient.scala} (99%) diff --git a/core/file-service/build.sbt b/core/file-service/build.sbt index d2bb7c15cb..bcb6928c4a 100644 --- a/core/file-service/build.sbt +++ b/core/file-service/build.sbt @@ -63,5 +63,7 @@ libraryDependencies ++= Seq( "com.github.toastshaman" % "dropwizard-auth-jwt" % "1.1.2-0", "org.bitbucket.b_c" % "jose4j" % "0.9.6", "org.playframework" %% "play-json" % "3.1.0-M1", - "io.lakefs" % "sdk" % "1.48.0" + "software.amazon.awssdk" % "s3" % "2.29.51", + "software.amazon.awssdk" % "auth" % "2.29.51", + "software.amazon.awssdk" % "regions" % "2.29.51", ) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala index 09af165333..de9a40727b 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/FileService.scala @@ -4,13 +4,15 @@ import com.fasterxml.jackson.databind.module.SimpleModule import io.dropwizard.core.Application import io.dropwizard.core.setup.{Bootstrap, Environment} import com.fasterxml.jackson.module.scala.DefaultScalaModule -import edu.uci.ics.amber.core.storage.{LakeFSFileStorage, S3Storage, StorageConfig} +import edu.uci.ics.amber.core.storage.StorageConfig +import edu.uci.ics.amber.core.storage.util.LakeFSStorageClient import edu.uci.ics.amber.util.PathUtils.fileServicePath import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.service.`type`.DatasetFileNode import edu.uci.ics.texera.service.`type`.serde.DatasetFileNodeSerializer import edu.uci.ics.texera.service.auth.{JwtAuthFilter, SessionUser} import edu.uci.ics.texera.service.resource.{DatasetAccessResource, DatasetResource} +import edu.uci.ics.texera.service.util.{S3StorageClient} import io.dropwizard.auth.AuthDynamicFeature import org.eclipse.jetty.server.session.SessionHandler @@ -35,9 +37,9 @@ class FileService extends Application[FileServiceConfiguration] { ) // check if the texera dataset bucket exists, if not create it - S3Storage.createBucketIfNotExist(StorageConfig.lakefsBlockStorageBucketName) + S3StorageClient.createBucketIfNotExist(StorageConfig.lakefsBlockStorageBucketName) // check if we can connect to the lakeFS service - LakeFSFileStorage.healthCheck() + LakeFSStorageClient.healthCheck() environment.jersey.register(classOf[SessionHandler]) environment.servlets.setSessionHandler(new SessionHandler) diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index ef8f8a81a4..037a8d03d3 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -1,13 +1,8 @@ package edu.uci.ics.texera.service.resource import edu.uci.ics.amber.core.storage.model.OnDataset -import edu.uci.ics.amber.core.storage.{ - DocumentFactory, - FileResolver, - LakeFSFileStorage, - S3Storage, - StorageConfig -} +import edu.uci.ics.amber.core.storage.util.LakeFSStorageClient +import edu.uci.ics.amber.core.storage.{DocumentFactory, FileResolver, StorageConfig} import edu.uci.ics.texera.dao.SqlServer import edu.uci.ics.texera.dao.SqlServer.withTransaction import edu.uci.ics.texera.dao.jooq.generated.enums.PrivilegeEnum @@ -46,6 +41,7 @@ import edu.uci.ics.texera.service.resource.DatasetResource.{ getDatasetVersionByID, getLatestDatasetVersion } +import edu.uci.ics.texera.service.util.S3StorageClient import io.dropwizard.auth.Auth import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ @@ -189,7 +185,7 @@ class DatasetResource { // Try to initialize the repository in LakeFS try { - LakeFSFileStorage.initRepo(datasetName) + LakeFSStorageClient.initRepo(datasetName) } catch { case e: Exception => throw new WebApplicationException( @@ -252,7 +248,7 @@ class DatasetResource { val datasetName = dataset.getName // Check if there are any changes in LakeFS before creating a new version - val diffs = LakeFSFileStorage.retrieveUncommittedObjects(repoName = datasetName) + val diffs = LakeFSStorageClient.retrieveUncommittedObjects(repoName = datasetName) if (diffs.isEmpty) { throw new WebApplicationException( @@ -276,7 +272,7 @@ class DatasetResource { } // Create a commit in LakeFS - val commit = LakeFSFileStorage.createCommit( + val commit = LakeFSStorageClient.createCommit( repoName = datasetName, branch = "main", commitMessage = s"Created dataset version: $newVersionName" @@ -304,7 +300,7 @@ class DatasetResource { .into(classOf[DatasetVersion]) // Retrieve committed file structure - val fileNodes = LakeFSFileStorage.retrieveObjectsOfVersion(datasetName, commit.getId) + val fileNodes = LakeFSStorageClient.retrieveObjectsOfVersion(datasetName, commit.getId) DashboardDatasetVersion( insertedVersion, @@ -329,7 +325,7 @@ class DatasetResource { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } try { - LakeFSFileStorage.deleteRepo(dataset.getName) + LakeFSStorageClient.deleteRepo(dataset.getName) } catch { case e: Exception => throw new WebApplicationException( @@ -339,8 +335,10 @@ class DatasetResource { } // delete the directory on S3 - if (S3Storage.directoryExists(StorageConfig.lakefsBlockStorageBucketName, dataset.getName)) { - S3Storage.deleteDirectory(StorageConfig.lakefsBlockStorageBucketName, dataset.getName) + if ( + S3StorageClient.directoryExists(StorageConfig.lakefsBlockStorageBucketName, dataset.getName) + ) { + S3StorageClient.deleteDirectory(StorageConfig.lakefsBlockStorageBucketName, dataset.getName) } // delete the dataset from the DB @@ -398,7 +396,7 @@ class DatasetResource { // Decode file path val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) // TODO: in the future consider using multipart to upload this stream more faster - LakeFSFileStorage.writeFileToRepo(datasetName, filePath, fileStream) + LakeFSStorageClient.writeFileToRepo(datasetName, filePath, fileStream) Response.ok(Map("message" -> "File uploaded successfully")).build() } } @@ -435,7 +433,7 @@ class DatasetResource { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } - val url = LakeFSFileStorage.getFilePresignedUrl(dsName, commit, decodedPathStr) + val url = LakeFSStorageClient.getFilePresignedUrl(dsName, commit, decodedPathStr) Response.ok(Map("presignedUrl" -> url)).build() } @@ -454,7 +452,7 @@ class DatasetResource { Response .ok( Map( - "presignedUrl" -> LakeFSFileStorage.getFilePresignedUrl( + "presignedUrl" -> LakeFSStorageClient.getFilePresignedUrl( document.getDatasetName(), document.getVersionHash(), document.getFileRelativePath() @@ -486,7 +484,7 @@ class DatasetResource { val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) // Try to initialize the repository in LakeFS try { - LakeFSFileStorage.deleteObject(datasetName, filePath) + LakeFSStorageClient.deleteObject(datasetName, filePath) } catch { case e: Exception => throw new WebApplicationException( @@ -532,7 +530,7 @@ class DatasetResource { throw new BadRequestException("numParts is required for initialization") ) - val presignedResponse = LakeFSFileStorage.initiatePresignedMultipartUploads( + val presignedResponse = LakeFSStorageClient.initiatePresignedMultipartUploads( datasetName, filePath, numPartsValue @@ -577,7 +575,7 @@ class DatasetResource { } // Complete the multipart upload with parts and physical address - val objectStats = LakeFSFileStorage.completePresignedMultipartUploads( + val objectStats = LakeFSStorageClient.completePresignedMultipartUploads( datasetName, filePath, uploadIdValue, @@ -606,7 +604,7 @@ class DatasetResource { } // Abort the multipart upload - LakeFSFileStorage.abortPresignedMultipartUploads( + LakeFSStorageClient.abortPresignedMultipartUploads( datasetName, filePath, uploadIdValue, @@ -659,7 +657,7 @@ class DatasetResource { // Retrieve staged (uncommitted) changes from LakeFS val dataset = getDatasetByID(ctx, did) - val lakefsDiffs = LakeFSFileStorage.retrieveUncommittedObjects(dataset.getName) + val lakefsDiffs = LakeFSStorageClient.retrieveUncommittedObjects(dataset.getName) // Convert LakeFS Diff objects to our custom Diff case class lakefsDiffs.map(d => @@ -693,7 +691,7 @@ class DatasetResource { val filePath = URLDecoder.decode(encodedFilePath, StandardCharsets.UTF_8.name()) // Try to reset the file change in LakeFS try { - LakeFSFileStorage.resetObjectUploadOrDeletion(datasetName, filePath) + LakeFSStorageClient.resetObjectUploadOrDeletion(datasetName, filePath) } catch { case e: Exception => throw new WebApplicationException( @@ -834,7 +832,7 @@ class DatasetResource { .fromLakeFSRepositoryCommittedObjects( Map( (user.getEmail, dataset.getName, latestVersion.getName) -> - LakeFSFileStorage + LakeFSStorageClient .retrieveObjectsOfVersion(dataset.getName, latestVersion.getVersionHash) ) ) @@ -978,7 +976,7 @@ class DatasetResource { val ownerFileNode = DatasetFileNode .fromLakeFSRepositoryCommittedObjects( Map( - (dataset.ownerEmail, datasetName, datasetVersion.getName) -> LakeFSFileStorage + (dataset.ownerEmail, datasetName, datasetVersion.getName) -> LakeFSStorageClient .retrieveObjectsOfVersion(datasetName, datasetVersion.getVersionHash) ) ) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala similarity index 97% rename from core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala rename to core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala index 58248b37a1..7017391111 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/S3Storage.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala @@ -1,12 +1,12 @@ -package edu.uci.ics.amber.core.storage +package edu.uci.ics.texera.service.util +import edu.uci.ics.amber.core.storage.StorageConfig import software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCredentialsProvider} import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.{S3Client, S3Configuration} import software.amazon.awssdk.services.s3.model._ import java.security.MessageDigest -import java.util.Base64 import scala.jdk.CollectionConverters._ /** @@ -14,7 +14,7 @@ import scala.jdk.CollectionConverters._ * - Uses credentials and endpoint from StorageConfig. * - Supports object upload, download, listing, and deletion. */ -object S3Storage { +object S3StorageClient { // Initialize MinIO-compatible S3 Client private lazy val s3Client: S3Client = { val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, StorageConfig.s3Password) diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss index c83d55648c..858dc36eae 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.scss @@ -7,24 +7,10 @@ .truncate-file-path { display: inline-block; - max-width: 200px; /* Adjust width as needed */ + max-width: 250px; /* Adjust width as needed */ white-space: nowrap; overflow: hidden; text-overflow: ellipsis; - direction: rtl; /* Makes ellipsis appear at the beginning */ - unicode-bidi: plaintext; /* Prevents text from flipping in RTL mode */ -} - -.truncate-file-path:hover::after { - content: attr(data-fullpath); /* Show full path as tooltip */ - position: absolute; - background: rgba(0, 0, 0, 0.8); - color: white; - padding: 5px; - border-radius: 4px; - white-space: nowrap; - z-index: 10; - transform: translateY(-25px); } .delete-button { diff --git a/core/workflow-core/build.sbt b/core/workflow-core/build.sbt index 3c8d056c10..2481bbd24c 100644 --- a/core/workflow-core/build.sbt +++ b/core/workflow-core/build.sbt @@ -173,8 +173,5 @@ libraryDependencies ++= Seq( "org.eclipse.jgit" % "org.eclipse.jgit" % "5.13.0.202109080827-r", // jgit "org.yaml" % "snakeyaml" % "1.30", // yaml reader (downgrade to 1.30 due to dropwizard 1.3.23 required by amber) "org.apache.commons" % "commons-vfs2" % "2.9.0", // for FileResolver throw VFS-related exceptions - "io.lakefs" % "sdk" % "1.48.0", - "software.amazon.awssdk" % "s3" % "2.29.51", - "software.amazon.awssdk" % "auth" % "2.29.51", - "software.amazon.awssdk" % "regions" % "2.29.51", + "io.lakefs" % "sdk" % "1.48.0", // for lakeFS api calls ) \ No newline at end of file diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index 73308246dd..862056c040 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -1,10 +1,10 @@ package edu.uci.ics.amber.core.storage.model -import edu.uci.ics.amber.core.storage.LakeFSFileStorage import edu.uci.ics.amber.core.storage.model.DatasetFileDocument.{ fileServiceGetPresignURLEndpoint, userJwtToken } +import edu.uci.ics.amber.core.storage.util.LakeFSStorageClient import edu.uci.ics.amber.core.storage.util.dataset.GitVersionControlLocalFileStorage import edu.uci.ics.amber.util.PathUtils @@ -54,7 +54,7 @@ private[storage] class DatasetFileDocument(uri: URI) override def asInputStream(): InputStream = { if (userJwtToken.isEmpty) { - val presignUrl = LakeFSFileStorage.getFilePresignedUrl( + val presignUrl = LakeFSStorageClient.getFilePresignedUrl( getDatasetName(), getVersionHash(), getFileRelativePath() diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/util/LakeFSStorageClient.scala similarity index 99% rename from core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala rename to core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/util/LakeFSStorageClient.scala index 47f9c2e4d1..f282639483 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/LakeFSFileStorage.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/util/LakeFSStorageClient.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.amber.core.storage +package edu.uci.ics.amber.core.storage.util import io.lakefs.clients.sdk._ import io.lakefs.clients.sdk.model._ @@ -13,7 +13,7 @@ import io.lakefs.clients.sdk.model.ResetCreation.TypeEnum * LakeFSFileStorage provides high-level file storage operations using LakeFS, * similar to Git operations for version control and file management. */ -object LakeFSFileStorage { +object LakeFSStorageClient { private lazy val apiClient: ApiClient = { val client = new ApiClient() From 07cb8cc66799c74eebd4b5a42fca44a611177b51 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Mon, 3 Mar 2025 10:18:00 -0800 Subject: [PATCH 30/47] cleanup the storage config --- .../src/main/resources/storage-config.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index 3678025298..829149b87d 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -26,13 +26,13 @@ storage: num-retries: 10 min-wait-ms: 100 # 0.1s max-wait-ms: 10000 # 10s - + # configuration of the LakeFS & S3 for dataset storage lakefs: - endpoint: "http://127.0.0.1:8000/api/v1" + endpoint: "http://localhost:8000/api/v1" auth: - api-secret: "random_string_for_lakefs" - username: "AKIAIOSFOLKFSSAMPLES" - password: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + api-secret: "" + username: "" + password: "" block-storage: type: "s3" bucket-name: "texera-dataset" @@ -40,8 +40,8 @@ storage: s3: endpoint: "http://localhost:9000" auth: - username: "texera_minio" - password: "password" + username: "" + password: "" jdbc: url: "jdbc:postgresql://localhost:5432/texera_db?currentSchema=texera_db,public" From 685f034e33c7d8ea5cc6d322dcef57bb032c0afb Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Mon, 3 Mar 2025 10:18:06 -0800 Subject: [PATCH 31/47] add more comments --- .../ics/amber/core/storage/model/DatasetFileDocument.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala index 862056c040..59065b2184 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/model/DatasetFileDocument.scala @@ -15,8 +15,12 @@ import java.nio.file.{Files, Path, Paths} import scala.jdk.CollectionConverters.IteratorHasAsScala object DatasetFileDocument { + // Since requests need to be sent to the FileService in order to read the file, we store USER_JWT_TOKEN in the environment vars + // This variable should be NON-EMPTY in the dynamic-computing-unit architecture, i.e. each user-created computing unit should store user's jwt token. + // In the local development or other architectures, this token can be empty. lazy val userJwtToken: String = sys.env.getOrElse("USER_JWT_TOKEN", "").trim + // The endpoint of getting presigned url from the file service, also stored in the environment vars. lazy val fileServiceGetPresignURLEndpoint: String = sys.env .getOrElse( From 9caae5ddc9fe48c002986fc6b8f9fd7b156f1e10 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Mon, 3 Mar 2025 10:19:12 -0800 Subject: [PATCH 32/47] save the multipart chunk change on frontend --- core/gui/src/environments/environment.default.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/gui/src/environments/environment.default.ts b/core/gui/src/environments/environment.default.ts index 52560cdcfe..40de532fb0 100644 --- a/core/gui/src/environments/environment.default.ts +++ b/core/gui/src/environments/environment.default.ts @@ -78,6 +78,11 @@ export const defaultEnvironment = { */ singleFileUploadMaximumSizeMB: 20, + /** + * the size of each chunk during the multipart upload of file + */ + multipartUploadChunkSizeByte: 50 * 1024 * 1024, // 50 MB + /** * default data transfer batch size for workflows */ From 9471174c3766f9a706701316dcbcc60ae0e6c628 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Mon, 3 Mar 2025 10:38:32 -0800 Subject: [PATCH 33/47] recover gui changes --- core/gui/src/app/dashboard/component/dashboard.component.html | 2 +- core/gui/src/app/dashboard/component/dashboard.component.ts | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/core/gui/src/app/dashboard/component/dashboard.component.html b/core/gui/src/app/dashboard/component/dashboard.component.html index f8b3704cf1..6b4485eafd 100644 --- a/core/gui/src/app/dashboard/component/dashboard.component.html +++ b/core/gui/src/app/dashboard/component/dashboard.component.html @@ -164,7 +164,7 @@ *ngIf="!isLogin && googleLogin" type="standard" size="large" - width="200"> + [width]="200">
diff --git a/core/gui/src/app/dashboard/component/dashboard.component.ts b/core/gui/src/app/dashboard/component/dashboard.component.ts index 1bc3fedb9a..2df0680c6b 100644 --- a/core/gui/src/app/dashboard/component/dashboard.component.ts +++ b/core/gui/src/app/dashboard/component/dashboard.component.ts @@ -20,7 +20,6 @@ import { } from "../../app-routing.constant"; import { environment } from "../../../environments/environment"; import { Version } from "../../../environments/version"; -import { types } from "sass"; @Component({ selector: "texera-dashboard", @@ -146,6 +145,4 @@ export class DashboardComponent implements OnInit { } protected readonly DASHBOARD_ABOUT = DASHBOARD_ABOUT; - protected readonly types = types; - protected readonly String = String; } From 6eef90301efc6b94e1038e3aaed1ad453f954a12 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 4 Mar 2025 12:28:09 -0800 Subject: [PATCH 34/47] do the rebase --- .../ics/texera/web/service/ResultExportService.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index e60f07b564..ce7fc70ea0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -17,7 +17,7 @@ import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{ import edu.uci.ics.texera.web.service.WorkflowExecutionService.getLatestExecutionId -import java.io.{FilterOutputStream, IOException, OutputStream, PipedInputStream, PipedOutputStream} +import java.io.{FilterOutputStream, IOException, OutputStream} import java.nio.channels.Channels import java.nio.charset.StandardCharsets import java.time.LocalDateTime @@ -33,10 +33,6 @@ import org.apache.arrow.vector.ipc.ArrowFileWriter import org.apache.commons.lang3.StringUtils import javax.ws.rs.WebApplicationException import javax.ws.rs.core.StreamingOutput -import java.io.OutputStream -import java.nio.channels.Channels -import scala.util.Using -import edu.uci.ics.amber.core.workflow.PortIdentity import edu.uci.ics.texera.web.auth.JwtAuth import edu.uci.ics.texera.web.auth.JwtAuth.{TOKEN_EXPIRE_TIME_IN_DAYS, dayToMin, jwtClaims} @@ -226,6 +222,11 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { }, fileName ) + (Some(s"Data export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"Data export failed for operator $operatorId: ${ex.getMessage}"))} + } private def convertFieldToBytes(field: Any): Array[Byte] = { field match { From d96296bec80d34fcc3af18cae937fbb368f20bd4 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 4 Mar 2025 13:26:05 -0800 Subject: [PATCH 35/47] add the flag for controlling whether to select files from dataset --- .../input-autocomplete/input-autocomplete.component.ts | 2 +- core/gui/src/environments/environment.default.ts | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/core/gui/src/app/workspace/component/input-autocomplete/input-autocomplete.component.ts b/core/gui/src/app/workspace/component/input-autocomplete/input-autocomplete.component.ts index 008f5c81b6..24bce4c1fa 100644 --- a/core/gui/src/app/workspace/component/input-autocomplete/input-autocomplete.component.ts +++ b/core/gui/src/app/workspace/component/input-autocomplete/input-autocomplete.component.ts @@ -50,7 +50,7 @@ export class InputAutoCompleteComponent extends FieldType { } get isFileSelectionEnabled(): boolean { - return environment.userSystemEnabled; + return environment.userSystemEnabled && environment.selectingFilesFromDatasetsEnabled; } get selectedFilePath(): string | null { diff --git a/core/gui/src/environments/environment.default.ts b/core/gui/src/environments/environment.default.ts index 40de532fb0..9f8b015a22 100644 --- a/core/gui/src/environments/environment.default.ts +++ b/core/gui/src/environments/environment.default.ts @@ -28,6 +28,12 @@ export const defaultEnvironment = { */ userSystemEnabled: false, + /** + * whether selecting files from datasets instead of the local file system. + * The user system must be enabled to make this flag work! + */ + selectingFilesFromDatasetsEnabled: true, + /** * whether local login is enabled */ From 5e6f9a898a20ed4363a8f336ba2171bed78ed535 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 4 Mar 2025 13:31:05 -0800 Subject: [PATCH 36/47] add default values for lakeFS+S3 --- .../src/main/resources/storage-config.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index 829149b87d..4c859f0f6b 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -26,13 +26,14 @@ storage: num-retries: 10 min-wait-ms: 100 # 0.1s max-wait-ms: 10000 # 10s - # configuration of the LakeFS & S3 for dataset storage + # Configurations of the LakeFS & S3 for dataset storage; + # Default values are provided for each field, which you don't need to change them if you deployed LakeFS+S3 via docker-compose.yml in file-service/src/main/resources/docker-compose.yml lakefs: endpoint: "http://localhost:8000/api/v1" auth: - api-secret: "" - username: "" - password: "" + api-secret: "random_string_for_lakefs" + username: "AKIAIOSFOLKFSSAMPLES" + password: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" block-storage: type: "s3" bucket-name: "texera-dataset" @@ -40,8 +41,8 @@ storage: s3: endpoint: "http://localhost:9000" auth: - username: "" - password: "" + username: "texera_minio" + password: "password" jdbc: url: "jdbc:postgresql://localhost:5432/texera_db?currentSchema=texera_db,public" From fd84702f1cd7150fbed7632e9e1eeada4576ed47 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 4 Mar 2025 15:14:47 -0800 Subject: [PATCH 37/47] fmt --- .../uci/ics/texera/web/service/ResultExportService.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index ce7fc70ea0..b9998f6f3c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -6,7 +6,7 @@ import edu.uci.ics.amber.core.storage.model.VirtualDocument import edu.uci.ics.amber.core.tuple.Tuple import edu.uci.ics.amber.core.virtualidentity.{OperatorIdentity, WorkflowIdentity} import edu.uci.ics.amber.core.workflow.PortIdentity -import edu.uci.ics.amber.util.{ArrowUtils, PathUtils} +import edu.uci.ics.amber.util.ArrowUtils import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse @@ -16,7 +16,6 @@ import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{ } import edu.uci.ics.texera.web.service.WorkflowExecutionService.getLatestExecutionId - import java.io.{FilterOutputStream, IOException, OutputStream} import java.nio.channels.Channels import java.nio.charset.StandardCharsets @@ -37,6 +36,7 @@ import edu.uci.ics.texera.web.auth.JwtAuth import edu.uci.ics.texera.web.auth.JwtAuth.{TOKEN_EXPIRE_TIME_IN_DAYS, dayToMin, jwtClaims} import java.net.{HttpURLConnection, URL, URLEncoder} + /** * A simple wrapper that ignores 'close()' calls on the underlying stream. * This allows each operator's writer to call close() without ending the entire ZipOutputStream. @@ -212,7 +212,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { val field = selectedRow.getField(columnIndex) val dataBytes: Array[Byte] = convertFieldToBytes(field) - saveToDatasets( request, user, @@ -225,7 +224,8 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { (Some(s"Data export done for operator $operatorId -> file: $fileName"), None) } catch { case ex: Exception => - (None, Some(s"Data export failed for operator $operatorId: ${ex.getMessage}"))} + (None, Some(s"Data export failed for operator $operatorId: ${ex.getMessage}")) + } } private def convertFieldToBytes(field: Any): Array[Byte] = { From 86c9fcbc9919631ce7c462ef1e89b092918eccba Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Tue, 4 Mar 2025 15:42:24 -0800 Subject: [PATCH 38/47] add file service to part of the scripts --- core/file-service/build.sbt | 2 +- core/scripts/build-services.sh | 3 +++ core/scripts/deploy-daemon.sh | 9 +++++++++ core/scripts/file-service.sh | 1 + core/scripts/terminate-daemon.sh | 9 +++++++-- 5 files changed, 21 insertions(+), 3 deletions(-) create mode 100755 core/scripts/file-service.sh diff --git a/core/file-service/build.sbt b/core/file-service/build.sbt index bcb6928c4a..454ff4145b 100644 --- a/core/file-service/build.sbt +++ b/core/file-service/build.sbt @@ -5,6 +5,7 @@ organization := "edu.uci.ics" version := "0.1.0" scalaVersion := "2.13.12" +enablePlugins(JavaAppPackaging) // Enable semanticdb for Scalafix ThisBuild / semanticdbEnabled := true @@ -60,7 +61,6 @@ libraryDependencies ++= Seq( "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.15.2", "org.glassfish.jersey.media" % "jersey-media-multipart" % "3.1.10", "jakarta.ws.rs" % "jakarta.ws.rs-api" % "3.1.0", // Ensure Jakarta JAX-RS API is available - "com.github.toastshaman" % "dropwizard-auth-jwt" % "1.1.2-0", "org.bitbucket.b_c" % "jose4j" % "0.9.6", "org.playframework" %% "play-json" % "3.1.0-M1", "software.amazon.awssdk" % "s3" % "2.29.51", diff --git a/core/scripts/build-services.sh b/core/scripts/build-services.sh index 47f273640b..b18585ea79 100755 --- a/core/scripts/build-services.sh +++ b/core/scripts/build-services.sh @@ -2,5 +2,8 @@ sbt clean dist unzip workflow-compiling-service/target/universal/workflow-compiling-service-0.1.0.zip -d target/ rm workflow-compiling-service/target/universal/workflow-compiling-service-0.1.0.zip +unzip file-service/target/universal/file-service-0.1.0.zip -d target/ +rm file-service/target/universal/file-service-0.1.0.zip + unzip amber/target/universal/texera-0.1-SNAPSHOT.zip -d amber/target/ rm amber/target/universal/texera-0.1-SNAPSHOT.zip diff --git a/core/scripts/deploy-daemon.sh b/core/scripts/deploy-daemon.sh index 0e17773f7a..ae8f5f996f 100755 --- a/core/scripts/deploy-daemon.sh +++ b/core/scripts/deploy-daemon.sh @@ -42,6 +42,15 @@ done echo "${green}WorkflowCompilingService launched at $(pgrep -f TexeraWorkflowCompilingService)${reset}" echo +echo "${green}Starting FileService in daemon...${reset}" +setsid nohup ./scripts/file-service.sh >/dev/null 2>&1 & +echo "${green}Waiting FileService to launch on 9092...${reset}" +while ! nc -z localhost 9092; do + sleep 0.1 # wait 100ms before check again +done +echo "${green}FileService launched at $(pgrep -f FileService)${reset}" +echo + echo "${green}Starting WorkflowComputingUnit in daemon...${reset}" setsid nohup ./scripts/workflow-computing-unit.sh >/dev/null 2>&1 & echo "${green}Waiting WorkflowComputingUnit to launch on 8085...${reset}" diff --git a/core/scripts/file-service.sh b/core/scripts/file-service.sh new file mode 100755 index 0000000000..c3b75743c4 --- /dev/null +++ b/core/scripts/file-service.sh @@ -0,0 +1 @@ +target/file-service-0.1.0/bin/file-service \ No newline at end of file diff --git a/core/scripts/terminate-daemon.sh b/core/scripts/terminate-daemon.sh index c4efeb8b75..306c27aa18 100755 --- a/core/scripts/terminate-daemon.sh +++ b/core/scripts/terminate-daemon.sh @@ -11,11 +11,16 @@ kill -9 $(pgrep -f WorkflowCompilingService) echo "${green}Terminated.${reset}" echo +echo "${red}Terminating FileService at $(pgrep -f FileService)...${reset}" +kill -9 $(pgrep -f FileService) +echo "${green}Terminated.${reset}" +echo + echo "${red}Terminating TexeraWebApplication at $(pgrep -f TexeraWebApplication)...${reset}" kill -9 $(pgrep -f TexeraWebApplication) echo "${green}Terminated.${reset}" echo -echo "${red}Terminating TexeraRunWorker at $(pgrep -f TexeraRunWorker)...${reset}" -kill -9 $(pgrep -f TexeraRunWorker) +echo "${red}Terminating ComputingUnitMaster at $(pgrep -f ComputingUnitMaster)...${reset}" +kill -9 $(pgrep -f ComputingUnitMaster) echo "${green}Terminated.${reset}" From 2b277fb4e4a01f58d164e726b34cc853d1529c03 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 6 Mar 2025 11:45:16 -0800 Subject: [PATCH 39/47] resolve comments and fix the py udf document --- .../python/core/storage/dataset/__init__.py | 5 - .../src/main/python/pytexera/__init__.py | 2 + .../main/python/pytexera/storage/__init__.py | 5 + .../storage}/dataset_file_document.py | 7 +- .../texera/service/util/S3StorageClient.scala | 2 +- core/gui/package.json | 1 - .../dataset-detail.component.scss | 2 +- ...dataset-staged-objects-list.component.html | 2 +- .../service/user/dataset/dataset.service.ts | 2 - core/gui/yarn.lock | 1261 +---------------- core/workflow-core/build.sbt | 2 +- .../src/main/resources/storage-config.yaml | 1 + .../amber/core/storage/StorageConfig.scala | 5 + 13 files changed, 21 insertions(+), 1276 deletions(-) delete mode 100644 core/amber/src/main/python/core/storage/dataset/__init__.py create mode 100644 core/amber/src/main/python/pytexera/storage/__init__.py rename core/amber/src/main/python/{core/storage/dataset => pytexera/storage}/dataset_file_document.py (91%) diff --git a/core/amber/src/main/python/core/storage/dataset/__init__.py b/core/amber/src/main/python/core/storage/dataset/__init__.py deleted file mode 100644 index b1f44d0c84..0000000000 --- a/core/amber/src/main/python/core/storage/dataset/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from core.storage.dataset.dataset_file_document import DatasetFileDocument - -__all__ = [ - "DatasetFileDocument" -] \ No newline at end of file diff --git a/core/amber/src/main/python/pytexera/__init__.py b/core/amber/src/main/python/pytexera/__init__.py index 28ed737411..a4a7a9c5ce 100644 --- a/core/amber/src/main/python/pytexera/__init__.py +++ b/core/amber/src/main/python/pytexera/__init__.py @@ -3,6 +3,7 @@ from typing import Iterator, Optional, Union from pyamber import * +from .storage.dataset_file_document import DatasetFileDocument from .udf.udf_operator import ( UDFOperatorV2, UDFTableOperator, @@ -22,6 +23,7 @@ "UDFTableOperator", "UDFBatchOperator", "UDFSourceOperator", + "DatasetFileDocument", # export external tools to be used "overrides", "logger", diff --git a/core/amber/src/main/python/pytexera/storage/__init__.py b/core/amber/src/main/python/pytexera/storage/__init__.py new file mode 100644 index 0000000000..cbda25ba41 --- /dev/null +++ b/core/amber/src/main/python/pytexera/storage/__init__.py @@ -0,0 +1,5 @@ +from .dataset_file_document import DatasetFileDocument + +__all__ = [ + "DatasetFileDocument" +] \ No newline at end of file diff --git a/core/amber/src/main/python/core/storage/dataset/dataset_file_document.py b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py similarity index 91% rename from core/amber/src/main/python/core/storage/dataset/dataset_file_document.py rename to core/amber/src/main/python/pytexera/storage/dataset_file_document.py index 3d2e204df0..01c91db4c2 100644 --- a/core/amber/src/main/python/core/storage/dataset/dataset_file_document.py +++ b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -21,16 +21,15 @@ def __init__(self, file_path: str): self.owner_email = parts[0] self.dataset_name = parts[1] self.version_name = parts[2] - self.file_relative_path = "/".join(parts[3:]) # Remaining path as fileRelativePath + self.file_relative_path = "/".join(parts[3:]) - # Load environment variables self.jwt_token = os.getenv("USER_JWT_TOKEN") self.presign_endpoint = os.getenv("PRESIGN_API_ENDPOINT") if not self.jwt_token: raise ValueError("JWT token is required but not set in environment variables.") if not self.presign_endpoint: - self.presign_endpoint = "http://localhost:9092/api/dataset/presign" + self.presign_endpoint = "http://localhost:9092/api/dataset/presign-download" def get_presigned_url(self) -> str: """ @@ -64,4 +63,4 @@ def read_file(self) -> io.BytesIO: if response.status_code != 200: raise RuntimeError(f"Failed to retrieve file content: {response.status_code} {response.text}") - return io.BytesIO(response.content) # Returns file-like object \ No newline at end of file + return io.BytesIO(response.content) \ No newline at end of file diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala index 7017391111..843314cd3a 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala @@ -20,8 +20,8 @@ object S3StorageClient { val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, StorageConfig.s3Password) S3Client .builder() - .region(Region.US_WEST_2) // MinIO doesn't require region, but AWS SDK enforces one .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .region(Region.of(StorageConfig.s3Region)) .endpointOverride(java.net.URI.create(StorageConfig.s3Endpoint)) // MinIO URL .serviceConfiguration( S3Configuration.builder().pathStyleAccessEnabled(true).build() diff --git a/core/gui/package.json b/core/gui/package.json index 4cb6a7f87c..59a9aa1b64 100644 --- a/core/gui/package.json +++ b/core/gui/package.json @@ -34,7 +34,6 @@ "@angular/platform-browser-dynamic": "16.2.12", "@angular/router": "16.2.12", "@auth0/angular-jwt": "5.1.0", - "@aws-sdk/client-s3": "^3.750.0", "@codingame/monaco-vscode-java-default-extension": "8.0.4", "@codingame/monaco-vscode-python-default-extension": "8.0.4", "@codingame/monaco-vscode-r-default-extension": "8.0.4", diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss index babf3831f8..f2c6e53cfb 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.scss @@ -10,7 +10,7 @@ transition: background-color 0.3s; margin: 50px auto 0 auto; /* Auto margins for horizontal centering */ width: 200px; /* Adjust width as needed */ - font-size: 1.1em; /* Make text slightly bigger */ + font-size: 18px; /* Make text slightly bigger */ font-weight: bold; /* Optional: Make text bold */ } diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html index 1a6d785440..f4ec6203ba 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-staged-objects-list/user-dataset-staged-objects-list.component.html @@ -25,7 +25,7 @@ (click)="onObjectReverted(obj)"> diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 3a09aa5fbb..569cb1c78b 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -5,10 +5,8 @@ import { Dataset, DatasetVersion } from "../../../../common/type/dataset"; import { AppSettings } from "../../../../common/app-setting"; import { EMPTY, forkJoin, from, Observable, of, throwError } from "rxjs"; import { DashboardDataset } from "../../../type/dashboard-dataset.interface"; -import { FileUploadItem } from "../../../type/dashboard-file.interface"; import { DatasetFileNode } from "../../../../common/type/datasetVersionFileTree"; import { DatasetStagedObject } from "../../../../common/type/dataset-staged-object"; -import { S3Client } from "@aws-sdk/client-s3"; import { environment } from "../../../../../environments/environment"; export const DATASET_BASE_URL = "dataset"; diff --git a/core/gui/yarn.lock b/core/gui/yarn.lock index b2d1f4a74e..050ab8bfee 100644 --- a/core/gui/yarn.lock +++ b/core/gui/yarn.lock @@ -701,643 +701,6 @@ __metadata: languageName: node linkType: hard -"@aws-crypto/crc32@npm:5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/crc32@npm:5.2.0" - dependencies: - "@aws-crypto/util": "npm:^5.2.0" - "@aws-sdk/types": "npm:^3.222.0" - tslib: "npm:^2.6.2" - checksum: 10c0/eab9581d3363af5ea498ae0e72de792f54d8890360e14a9d8261b7b5c55ebe080279fb2556e07994d785341cdaa99ab0b1ccf137832b53b5904cd6928f2b094b - languageName: node - linkType: hard - -"@aws-crypto/crc32c@npm:5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/crc32c@npm:5.2.0" - dependencies: - "@aws-crypto/util": "npm:^5.2.0" - "@aws-sdk/types": "npm:^3.222.0" - tslib: "npm:^2.6.2" - checksum: 10c0/223efac396cdebaf5645568fa9a38cd0c322c960ae1f4276bedfe2e1031d0112e49d7d39225d386354680ecefae29f39af469a84b2ddfa77cb6692036188af77 - languageName: node - linkType: hard - -"@aws-crypto/sha1-browser@npm:5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/sha1-browser@npm:5.2.0" - dependencies: - "@aws-crypto/supports-web-crypto": "npm:^5.2.0" - "@aws-crypto/util": "npm:^5.2.0" - "@aws-sdk/types": "npm:^3.222.0" - "@aws-sdk/util-locate-window": "npm:^3.0.0" - "@smithy/util-utf8": "npm:^2.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/51fed0bf078c10322d910af179871b7d299dde5b5897873ffbeeb036f427e5d11d23db9794439226544b73901920fd19f4d86bbc103ed73cc0cfdea47a83c6ac - languageName: node - linkType: hard - -"@aws-crypto/sha256-browser@npm:5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/sha256-browser@npm:5.2.0" - dependencies: - "@aws-crypto/sha256-js": "npm:^5.2.0" - "@aws-crypto/supports-web-crypto": "npm:^5.2.0" - "@aws-crypto/util": "npm:^5.2.0" - "@aws-sdk/types": "npm:^3.222.0" - "@aws-sdk/util-locate-window": "npm:^3.0.0" - "@smithy/util-utf8": "npm:^2.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/05f6d256794df800fe9aef5f52f2ac7415f7f3117d461f85a6aecaa4e29e91527b6fd503681a17136fa89e9dd3d916e9c7e4cfb5eba222875cb6c077bdc1d00d - languageName: node - linkType: hard - -"@aws-crypto/sha256-js@npm:5.2.0, @aws-crypto/sha256-js@npm:^5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/sha256-js@npm:5.2.0" - dependencies: - "@aws-crypto/util": "npm:^5.2.0" - "@aws-sdk/types": "npm:^3.222.0" - tslib: "npm:^2.6.2" - checksum: 10c0/6c48701f8336341bb104dfde3d0050c89c288051f6b5e9bdfeb8091cf3ffc86efcd5c9e6ff2a4a134406b019c07aca9db608128f8d9267c952578a3108db9fd1 - languageName: node - linkType: hard - -"@aws-crypto/supports-web-crypto@npm:^5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/supports-web-crypto@npm:5.2.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/4d2118e29d68ca3f5947f1e37ce1fbb3239a0c569cc938cdc8ab8390d595609b5caf51a07c9e0535105b17bf5c52ea256fed705a07e9681118120ab64ee73af2 - languageName: node - linkType: hard - -"@aws-crypto/util@npm:5.2.0, @aws-crypto/util@npm:^5.2.0": - version: 5.2.0 - resolution: "@aws-crypto/util@npm:5.2.0" - dependencies: - "@aws-sdk/types": "npm:^3.222.0" - "@smithy/util-utf8": "npm:^2.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/0362d4c197b1fd64b423966945130207d1fe23e1bb2878a18e361f7743c8d339dad3f8729895a29aa34fff6a86c65f281cf5167c4bf253f21627ae80b6dd2951 - languageName: node - linkType: hard - -"@aws-sdk/client-s3@npm:^3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/client-s3@npm:3.750.0" - dependencies: - "@aws-crypto/sha1-browser": "npm:5.2.0" - "@aws-crypto/sha256-browser": "npm:5.2.0" - "@aws-crypto/sha256-js": "npm:5.2.0" - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/credential-provider-node": "npm:3.750.0" - "@aws-sdk/middleware-bucket-endpoint": "npm:3.734.0" - "@aws-sdk/middleware-expect-continue": "npm:3.734.0" - "@aws-sdk/middleware-flexible-checksums": "npm:3.750.0" - "@aws-sdk/middleware-host-header": "npm:3.734.0" - "@aws-sdk/middleware-location-constraint": "npm:3.734.0" - "@aws-sdk/middleware-logger": "npm:3.734.0" - "@aws-sdk/middleware-recursion-detection": "npm:3.734.0" - "@aws-sdk/middleware-sdk-s3": "npm:3.750.0" - "@aws-sdk/middleware-ssec": "npm:3.734.0" - "@aws-sdk/middleware-user-agent": "npm:3.750.0" - "@aws-sdk/region-config-resolver": "npm:3.734.0" - "@aws-sdk/signature-v4-multi-region": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@aws-sdk/util-endpoints": "npm:3.743.0" - "@aws-sdk/util-user-agent-browser": "npm:3.734.0" - "@aws-sdk/util-user-agent-node": "npm:3.750.0" - "@aws-sdk/xml-builder": "npm:3.734.0" - "@smithy/config-resolver": "npm:^4.0.1" - "@smithy/core": "npm:^3.1.4" - "@smithy/eventstream-serde-browser": "npm:^4.0.1" - "@smithy/eventstream-serde-config-resolver": "npm:^4.0.1" - "@smithy/eventstream-serde-node": "npm:^4.0.1" - "@smithy/fetch-http-handler": "npm:^5.0.1" - "@smithy/hash-blob-browser": "npm:^4.0.1" - "@smithy/hash-node": "npm:^4.0.1" - "@smithy/hash-stream-node": "npm:^4.0.1" - "@smithy/invalid-dependency": "npm:^4.0.1" - "@smithy/md5-js": "npm:^4.0.1" - "@smithy/middleware-content-length": "npm:^4.0.1" - "@smithy/middleware-endpoint": "npm:^4.0.5" - "@smithy/middleware-retry": "npm:^4.0.6" - "@smithy/middleware-serde": "npm:^4.0.2" - "@smithy/middleware-stack": "npm:^4.0.1" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/node-http-handler": "npm:^4.0.2" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/url-parser": "npm:^4.0.1" - "@smithy/util-base64": "npm:^4.0.0" - "@smithy/util-body-length-browser": "npm:^4.0.0" - "@smithy/util-body-length-node": "npm:^4.0.0" - "@smithy/util-defaults-mode-browser": "npm:^4.0.6" - "@smithy/util-defaults-mode-node": "npm:^4.0.6" - "@smithy/util-endpoints": "npm:^3.0.1" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-retry": "npm:^4.0.1" - "@smithy/util-stream": "npm:^4.1.1" - "@smithy/util-utf8": "npm:^4.0.0" - "@smithy/util-waiter": "npm:^4.0.2" - tslib: "npm:^2.6.2" - checksum: 10c0/502099eb11b014a5a13ad3b363fa33aa4706c4b9717b64b76fdf30cc8b0b907ce6dc4fbfc751deddfc978c88a46545e2ba310da2dec6c138cf59f7cfe2ed70f4 - languageName: node - linkType: hard - -"@aws-sdk/client-sso@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/client-sso@npm:3.750.0" - dependencies: - "@aws-crypto/sha256-browser": "npm:5.2.0" - "@aws-crypto/sha256-js": "npm:5.2.0" - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/middleware-host-header": "npm:3.734.0" - "@aws-sdk/middleware-logger": "npm:3.734.0" - "@aws-sdk/middleware-recursion-detection": "npm:3.734.0" - "@aws-sdk/middleware-user-agent": "npm:3.750.0" - "@aws-sdk/region-config-resolver": "npm:3.734.0" - "@aws-sdk/types": "npm:3.734.0" - "@aws-sdk/util-endpoints": "npm:3.743.0" - "@aws-sdk/util-user-agent-browser": "npm:3.734.0" - "@aws-sdk/util-user-agent-node": "npm:3.750.0" - "@smithy/config-resolver": "npm:^4.0.1" - "@smithy/core": "npm:^3.1.4" - "@smithy/fetch-http-handler": "npm:^5.0.1" - "@smithy/hash-node": "npm:^4.0.1" - "@smithy/invalid-dependency": "npm:^4.0.1" - "@smithy/middleware-content-length": "npm:^4.0.1" - "@smithy/middleware-endpoint": "npm:^4.0.5" - "@smithy/middleware-retry": "npm:^4.0.6" - "@smithy/middleware-serde": "npm:^4.0.2" - "@smithy/middleware-stack": "npm:^4.0.1" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/node-http-handler": "npm:^4.0.2" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/url-parser": "npm:^4.0.1" - "@smithy/util-base64": "npm:^4.0.0" - "@smithy/util-body-length-browser": "npm:^4.0.0" - "@smithy/util-body-length-node": "npm:^4.0.0" - "@smithy/util-defaults-mode-browser": "npm:^4.0.6" - "@smithy/util-defaults-mode-node": "npm:^4.0.6" - "@smithy/util-endpoints": "npm:^3.0.1" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-retry": "npm:^4.0.1" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/a7f2688697dfa9bad799cbd984295e9b685431ec8da13bdf12b8bf5e6c218e3caae231eba147f216d6cf6607c15ded0895535740131986a44e0ca121a095942e - languageName: node - linkType: hard - -"@aws-sdk/core@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/core@npm:3.750.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/core": "npm:^3.1.4" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/signature-v4": "npm:^5.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-middleware": "npm:^4.0.1" - fast-xml-parser: "npm:4.4.1" - tslib: "npm:^2.6.2" - checksum: 10c0/45e45a8ea152a10972aa6f54bfda8da9ca70edcf11b793b900b216a3244b149f6bf79a9ee1aaca8d4244511229045e883a6d469fd6e425e58a874bfd5660bee3 - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-env@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-env@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/19d4c486ef8e3acc7b249d9e617390edfcdf42b5f75ab10ac6d2491681aa9c3e835dea99c41d51a09433011843f20c84a724fbee8ef8fd01f4313c2689b8383a - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-http@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-http@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/fetch-http-handler": "npm:^5.0.1" - "@smithy/node-http-handler": "npm:^4.0.2" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-stream": "npm:^4.1.1" - tslib: "npm:^2.6.2" - checksum: 10c0/3324dbd96f6daebf71fd422819bdd35778e49bc697ed1b638b4572da89c45946029135603d1be855f01e01961627eee3361c5be3e20994467413dc3ae7fa45a0 - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-ini@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-ini@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/credential-provider-env": "npm:3.750.0" - "@aws-sdk/credential-provider-http": "npm:3.750.0" - "@aws-sdk/credential-provider-process": "npm:3.750.0" - "@aws-sdk/credential-provider-sso": "npm:3.750.0" - "@aws-sdk/credential-provider-web-identity": "npm:3.750.0" - "@aws-sdk/nested-clients": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/credential-provider-imds": "npm:^4.0.1" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/f25c297e2717bdf09ae167051be8080079810d4634d2125fb3118f9bcf2d65d41e7f765fdbe857e1f1298833dc0433c18c545146ea9ef2192c25a35842bce881 - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-node@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-node@npm:3.750.0" - dependencies: - "@aws-sdk/credential-provider-env": "npm:3.750.0" - "@aws-sdk/credential-provider-http": "npm:3.750.0" - "@aws-sdk/credential-provider-ini": "npm:3.750.0" - "@aws-sdk/credential-provider-process": "npm:3.750.0" - "@aws-sdk/credential-provider-sso": "npm:3.750.0" - "@aws-sdk/credential-provider-web-identity": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/credential-provider-imds": "npm:^4.0.1" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/3badb574b7c00d764795672451bb19f3f83c7f2e37ef9c6742063973507c8c445607894e90ba8a84ab7cae07528e275635e479869ae6c14d65f68a234121b119 - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-process@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-process@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/f6bf9e83daaa685afe3bd413a12ef02424467e852526d51bfe210672cc353068394462e9af61eda2403fef6fb1c40e790b98b231ff130e2bf56d7b4621a725e5 - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-sso@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-sso@npm:3.750.0" - dependencies: - "@aws-sdk/client-sso": "npm:3.750.0" - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/token-providers": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/32eb0a38c7f7c69a69fddf5411b3d4442226b54bef472316561685ab70438247f2a053255b2e2e3e79862c79b5614293e97fcc30e7e2fa3bf42e841de9f18974 - languageName: node - linkType: hard - -"@aws-sdk/credential-provider-web-identity@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/credential-provider-web-identity@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/nested-clients": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/7a81561002c5b7a819c279bd2b881271b3bd0f641cef34a57e928eb7c0313d7302343e5a7c36ab452146ce2a03fe3c151d1b781553fab93d27c8478116d87ba2 - languageName: node - linkType: hard - -"@aws-sdk/middleware-bucket-endpoint@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-bucket-endpoint@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@aws-sdk/util-arn-parser": "npm:3.723.0" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-config-provider": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/f0f98bb478ff469ec3aab0ae5b8122cafc26e4d88efbb1d277429dfd21c70a64eaf996d5cbb7360ff93dcc0e985d75bca5bfcb6a814b1d18ab14c5b912c7c5ad - languageName: node - linkType: hard - -"@aws-sdk/middleware-expect-continue@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-expect-continue@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/5e6fa03e4b4ef8ff52314a5aea6b7c807e39516ad7c817003c8ef22c4d25de98dc469bab30d6f11a56cba7a968bcdf032373c8c1d074a16ff72ac2cd08f1a5e9 - languageName: node - linkType: hard - -"@aws-sdk/middleware-flexible-checksums@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/middleware-flexible-checksums@npm:3.750.0" - dependencies: - "@aws-crypto/crc32": "npm:5.2.0" - "@aws-crypto/crc32c": "npm:5.2.0" - "@aws-crypto/util": "npm:5.2.0" - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/is-array-buffer": "npm:^4.0.0" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-stream": "npm:^4.1.1" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/d1c176d54978d3de1bb71531270e546ead441741547cc2f1aef97445d7af29aefee754df6ee9e85b06ca3528cfce22142c5c9c94f1f6e2bf12bb7c858462a73e - languageName: node - linkType: hard - -"@aws-sdk/middleware-host-header@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-host-header@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/56e8501c3beda2961ebba56f1146849594edafa0d33ce2bdb04b62df9732d1218ffe89882333d87d76079798dc575af1756db4d7270916d8d83f8d9ef7c4798e - languageName: node - linkType: hard - -"@aws-sdk/middleware-location-constraint@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-location-constraint@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/ec6a10d2545dfbda2806e8dd2244a6be76c97d5fdae2068c461cb61753801ce60079518ad45f3eb559a37042f057636da754cccec751d04d0b94b534d423424e - languageName: node - linkType: hard - -"@aws-sdk/middleware-logger@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-logger@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/dc690e546d0411929ff5888cd2dad56b7583f160ce4339f24d4963b9d11022f06da76d5f96c56d2ff2624493885254200788c763f113c26695875b8a229ee9a1 - languageName: node - linkType: hard - -"@aws-sdk/middleware-recursion-detection@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-recursion-detection@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/e46e5f99895a4370141b3439c58b94670fddd01d18bbda43a621cb0a5f2bb3384db66757f16da49815af52d29f2cfb8c5d12e273853ad34c919f4f71d078572f - languageName: node - linkType: hard - -"@aws-sdk/middleware-sdk-s3@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/middleware-sdk-s3@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@aws-sdk/util-arn-parser": "npm:3.723.0" - "@smithy/core": "npm:^3.1.4" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/signature-v4": "npm:^5.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-config-provider": "npm:^4.0.0" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-stream": "npm:^4.1.1" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/f7e5e08c4ae895577f767060a7bc5cd7d9c24f105b66c44e906015932fcd4071c2e6c76e9e9df3790b8d4e72746a0f9dc628e8b7477fdafb81c8de8ccce1a24b - languageName: node - linkType: hard - -"@aws-sdk/middleware-ssec@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/middleware-ssec@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/ba1d0f202ef0e58d82895bbe71dcb4520f0eaf958ebc37baa3383e42729091fca2f927ec3482478b0ece35ae001c72da9afb71c83504e0aba6df4074a6a2187a - languageName: node - linkType: hard - -"@aws-sdk/middleware-user-agent@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/middleware-user-agent@npm:3.750.0" - dependencies: - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@aws-sdk/util-endpoints": "npm:3.743.0" - "@smithy/core": "npm:^3.1.4" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/24e5636b40370b778631b4af6381082318ad3de64b5566805215b0242e4f58b089ab2cb2c8c915b12b007ac8a7477a37db71c5d0fbd40b1452fccd68e17f984c - languageName: node - linkType: hard - -"@aws-sdk/nested-clients@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/nested-clients@npm:3.750.0" - dependencies: - "@aws-crypto/sha256-browser": "npm:5.2.0" - "@aws-crypto/sha256-js": "npm:5.2.0" - "@aws-sdk/core": "npm:3.750.0" - "@aws-sdk/middleware-host-header": "npm:3.734.0" - "@aws-sdk/middleware-logger": "npm:3.734.0" - "@aws-sdk/middleware-recursion-detection": "npm:3.734.0" - "@aws-sdk/middleware-user-agent": "npm:3.750.0" - "@aws-sdk/region-config-resolver": "npm:3.734.0" - "@aws-sdk/types": "npm:3.734.0" - "@aws-sdk/util-endpoints": "npm:3.743.0" - "@aws-sdk/util-user-agent-browser": "npm:3.734.0" - "@aws-sdk/util-user-agent-node": "npm:3.750.0" - "@smithy/config-resolver": "npm:^4.0.1" - "@smithy/core": "npm:^3.1.4" - "@smithy/fetch-http-handler": "npm:^5.0.1" - "@smithy/hash-node": "npm:^4.0.1" - "@smithy/invalid-dependency": "npm:^4.0.1" - "@smithy/middleware-content-length": "npm:^4.0.1" - "@smithy/middleware-endpoint": "npm:^4.0.5" - "@smithy/middleware-retry": "npm:^4.0.6" - "@smithy/middleware-serde": "npm:^4.0.2" - "@smithy/middleware-stack": "npm:^4.0.1" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/node-http-handler": "npm:^4.0.2" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/url-parser": "npm:^4.0.1" - "@smithy/util-base64": "npm:^4.0.0" - "@smithy/util-body-length-browser": "npm:^4.0.0" - "@smithy/util-body-length-node": "npm:^4.0.0" - "@smithy/util-defaults-mode-browser": "npm:^4.0.6" - "@smithy/util-defaults-mode-node": "npm:^4.0.6" - "@smithy/util-endpoints": "npm:^3.0.1" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-retry": "npm:^4.0.1" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/6bb067637b529b7db3e7ad0fd00baa36261b7436fd0ecda645250b2bcb40b4d00c62989a5fe766e190b35cf829dc8cb8b91a56ecc00f3078da3bb6aeadd8bf66 - languageName: node - linkType: hard - -"@aws-sdk/region-config-resolver@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/region-config-resolver@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-config-provider": "npm:^4.0.0" - "@smithy/util-middleware": "npm:^4.0.1" - tslib: "npm:^2.6.2" - checksum: 10c0/c1e026dcbe9d7529ec5efee979a868d0c868287d68e7e219bd730d887ab1ccf17ef48516477e57325fef55543217496bcfe7ba6d17d9ecad98cf8cf18d5ced63 - languageName: node - linkType: hard - -"@aws-sdk/signature-v4-multi-region@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/signature-v4-multi-region@npm:3.750.0" - dependencies: - "@aws-sdk/middleware-sdk-s3": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/signature-v4": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/b51c9bc6dda0b2ae2f5d75897be67f1408d27def508206b9c62cddd68e2ec7911e91a174b853dbfae7df8b294c01583ab0b936b9ce4acd00ff2e87b538268000 - languageName: node - linkType: hard - -"@aws-sdk/token-providers@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/token-providers@npm:3.750.0" - dependencies: - "@aws-sdk/nested-clients": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/1486ad60eef09bce9d9c118048c27969bdfee25721524a65a1c66e3461a1413e6ca1dedbf51976d8b39168c5045039d9e5a0d841b44aa29293858c07037a1c80 - languageName: node - linkType: hard - -"@aws-sdk/types@npm:3.734.0, @aws-sdk/types@npm:^3.222.0": - version: 3.734.0 - resolution: "@aws-sdk/types@npm:3.734.0" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/74313849619b8bce9e6a52c70fcdaa212574a443503c78bccdba77cdc7bc66b8cecefe461852e0bab7376cc2ec3e1891730b1a027be63efb47394115c8ddb856 - languageName: node - linkType: hard - -"@aws-sdk/util-arn-parser@npm:3.723.0": - version: 3.723.0 - resolution: "@aws-sdk/util-arn-parser@npm:3.723.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/5d2adfded61acaf222ed21bf8e5a8b067fe469dfaab03a6b69c591a090c48d309b1f3c4fd64826f71ef9883390adb77a9bf884667b242615f221236bc5a8b326 - languageName: node - linkType: hard - -"@aws-sdk/util-endpoints@npm:3.743.0": - version: 3.743.0 - resolution: "@aws-sdk/util-endpoints@npm:3.743.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-endpoints": "npm:^3.0.1" - tslib: "npm:^2.6.2" - checksum: 10c0/9adba3aa9a5a3cadb7f89c7b3424034c5efb7c10c55114ab02e3d069b4112a05a1e8578ff6ed937412f5d5d1a9cdeeac03b80e5b5d47eaf8fb167d031915e424 - languageName: node - linkType: hard - -"@aws-sdk/util-locate-window@npm:^3.0.0": - version: 3.723.0 - resolution: "@aws-sdk/util-locate-window@npm:3.723.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/c9c75d3ee06bd1d1edad78bea8324f2d4ad6086803f27731e1f3c25e946bb630c8db2991a5337e4dbeee06507deab9abea80b134ba4e3fbb27471d438a030639 - languageName: node - linkType: hard - -"@aws-sdk/util-user-agent-browser@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/util-user-agent-browser@npm:3.734.0" - dependencies: - "@aws-sdk/types": "npm:3.734.0" - "@smithy/types": "npm:^4.1.0" - bowser: "npm:^2.11.0" - tslib: "npm:^2.6.2" - checksum: 10c0/7fc8c5e29f3219f8abf1d0cff73dd6bb34f32a235473843e50f61375b1c05f4c49269cd956c9e4623c87c025e1eeef9fc699ae3389665459721bc11e00c25ead - languageName: node - linkType: hard - -"@aws-sdk/util-user-agent-node@npm:3.750.0": - version: 3.750.0 - resolution: "@aws-sdk/util-user-agent-node@npm:3.750.0" - dependencies: - "@aws-sdk/middleware-user-agent": "npm:3.750.0" - "@aws-sdk/types": "npm:3.734.0" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - peerDependencies: - aws-crt: ">=1.0.0" - peerDependenciesMeta: - aws-crt: - optional: true - checksum: 10c0/0f903a4830a2d88e962644eb3a11a7d672898224579a3812172cbdabb881338bff08d904801cb9480c006342f7f605cb764c413e5cb09d4ccf5e40b82734b554 - languageName: node - linkType: hard - -"@aws-sdk/xml-builder@npm:3.734.0": - version: 3.734.0 - resolution: "@aws-sdk/xml-builder@npm:3.734.0" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/77eb3d603d45a235982a86e5adbc2de727389924cbbd8edb9b13f1a201b15304c57aebb18e00cce909920b3519d0ca71406989b01b6544c87c7b3c4f04d66887 - languageName: node - linkType: hard - "@babel/code-frame@npm:^7.0.0, @babel/code-frame@npm:^7.16.7, @babel/code-frame@npm:^7.22.13, @babel/code-frame@npm:^7.22.5, @babel/code-frame@npm:^7.25.7": version: 7.25.7 resolution: "@babel/code-frame@npm:7.25.7" @@ -5489,602 +4852,6 @@ __metadata: languageName: node linkType: hard -"@smithy/abort-controller@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/abort-controller@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/1ecd5c3454ced008463e6de826c294f31f6073ba91e22e443e0269ee0854d9376f73ea756b3acf77aa806a9a98e8b2568ce2e7f15ddf0a7816c99b7deefeef57 - languageName: node - linkType: hard - -"@smithy/chunked-blob-reader-native@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/chunked-blob-reader-native@npm:4.0.0" - dependencies: - "@smithy/util-base64": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/4387f4e8841f20c1c4e689078141de7e6f239e7883be3a02810a023aa30939b15576ee00227b991972d2c5a2f3b6152bcaeca0975c9fa8d3669354c647bd532a - languageName: node - linkType: hard - -"@smithy/chunked-blob-reader@npm:^5.0.0": - version: 5.0.0 - resolution: "@smithy/chunked-blob-reader@npm:5.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/55ba0fe366ddaa3f93e1faf8a70df0b67efedbd0008922295efe215df09b68df0ba3043293e65b17e7d1be71448d074c2bfc54e5eb6bd18f59b425822c2b9e9a - languageName: node - linkType: hard - -"@smithy/config-resolver@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/config-resolver@npm:4.0.1" - dependencies: - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-config-provider": "npm:^4.0.0" - "@smithy/util-middleware": "npm:^4.0.1" - tslib: "npm:^2.6.2" - checksum: 10c0/4ec3486deb3017607ed1b9a42b4b806b78e2c7a00f6dd51b98ccb82d9f7506b206bd9412ec0d2a05e95bc2ac3fbbafe55b1ffce9faccc4086f837645f3f7e64d - languageName: node - linkType: hard - -"@smithy/core@npm:^3.1.4": - version: 3.1.4 - resolution: "@smithy/core@npm:3.1.4" - dependencies: - "@smithy/middleware-serde": "npm:^4.0.2" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-body-length-browser": "npm:^4.0.0" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-stream": "npm:^4.1.1" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/8c91573fe679eecc160440b66895bb22e1549a320c86066d01ec63aa9bf756e16bb0135e0d48b039b1ccd0f8f6b580d20242d784236b6c5ca566e1cb6bf0901a - languageName: node - linkType: hard - -"@smithy/credential-provider-imds@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/credential-provider-imds@npm:4.0.1" - dependencies: - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/url-parser": "npm:^4.0.1" - tslib: "npm:^2.6.2" - checksum: 10c0/76b5d82dfd2924f2b7a701fa159af54d3e9b16a644a210e3a74e5a3776bb28c2ffbdd342ed3f2bb1d2adf401e8144e84614523b1fad245b43e319e1d01fa1652 - languageName: node - linkType: hard - -"@smithy/eventstream-codec@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/eventstream-codec@npm:4.0.1" - dependencies: - "@aws-crypto/crc32": "npm:5.2.0" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-hex-encoding": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/439262fddae863cadad83cc468418294d1d998134619dd67e2836cc93bbfa5b01448e852516046f03b62d0edcd558014b755b1fb0d71b9317268d5c3a5e55bbd - languageName: node - linkType: hard - -"@smithy/eventstream-serde-browser@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/eventstream-serde-browser@npm:4.0.1" - dependencies: - "@smithy/eventstream-serde-universal": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/4766a8a735085dea1ed9aad486fa70cb04908a31843d4e698a28accc373a6dc80bc8abe9834d390f347326458c03424afbd7f7f9e59a66970b839de3d44940e1 - languageName: node - linkType: hard - -"@smithy/eventstream-serde-config-resolver@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/eventstream-serde-config-resolver@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/4ba8bba39392025389c610ce984b612adfe0ed2b37f926e6ce2acafaf178d04aec395924ff37d2ad9534a28652fc64c4938b66b4bd1d2ff695ac8fcdcc4d356e - languageName: node - linkType: hard - -"@smithy/eventstream-serde-node@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/eventstream-serde-node@npm:4.0.1" - dependencies: - "@smithy/eventstream-serde-universal": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/ed451ed4483ca62cb450a7540e43ba99b816e32da7bd306d14ea49dd3ceb8a37f791578a0e5d21caf9b9f75c36c69e025c7add117cf8b0510ad3ef32ac38b08c - languageName: node - linkType: hard - -"@smithy/eventstream-serde-universal@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/eventstream-serde-universal@npm:4.0.1" - dependencies: - "@smithy/eventstream-codec": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/8a1261fca8df7559bf78234f961903281b8602ffdbe0ff25f506cba25f013e4bb93bd8380703224fe63aeaf66e13bfebbdaf8083f38628750fc5f3c4ee07dff8 - languageName: node - linkType: hard - -"@smithy/fetch-http-handler@npm:^5.0.1": - version: 5.0.1 - resolution: "@smithy/fetch-http-handler@npm:5.0.1" - dependencies: - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/querystring-builder": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-base64": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/5123f6119de50d4c992ebf29b769382d7000db4ed8f564680c5727e2a8beb71664198eb2eaf7cb6152ab777f654d54cf9bff5a4658e1cfdeef2987eeea7f1149 - languageName: node - linkType: hard - -"@smithy/hash-blob-browser@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/hash-blob-browser@npm:4.0.1" - dependencies: - "@smithy/chunked-blob-reader": "npm:^5.0.0" - "@smithy/chunked-blob-reader-native": "npm:^4.0.0" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/16c61fe0ff52074aa374a439955f0ea0a6c6fb64744b55c840f29db1da05cefb340a6d1d4b2a7708ca6f447e972015a95bdfef4fc5361d0bc7c2c3b5cd4c1ca8 - languageName: node - linkType: hard - -"@smithy/hash-node@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/hash-node@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - "@smithy/util-buffer-from": "npm:^4.0.0" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/d84be63a2c8a4aafa3b9f23ae76c9cf92a31fa7c49c85930424da1335259b29f6333c5c82d2e7bf689549290ffd0d995043c9ea6f05b0b2a8dfad1f649eac43f - languageName: node - linkType: hard - -"@smithy/hash-stream-node@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/hash-stream-node@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/c214460da504008905dff7c654cc8b49dfcb060fedef77e63fc36e3c71972be39b018e4a5618e3efb654a6b63a604975521c161ae4614d2580a4c821dfb6e1d5 - languageName: node - linkType: hard - -"@smithy/invalid-dependency@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/invalid-dependency@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/74bebdffb6845f6060eed482ad6e921df66af90d2f8c63f39a3bb334fa68a3e3aa8bd5cd7aa5f65628857e235e113895433895db910ba290633daa0df5725eb7 - languageName: node - linkType: hard - -"@smithy/is-array-buffer@npm:^2.2.0": - version: 2.2.0 - resolution: "@smithy/is-array-buffer@npm:2.2.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/2f2523cd8cc4538131e408eb31664983fecb0c8724956788b015aaf3ab85a0c976b50f4f09b176f1ed7bbe79f3edf80743be7a80a11f22cd9ce1285d77161aaf - languageName: node - linkType: hard - -"@smithy/is-array-buffer@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/is-array-buffer@npm:4.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/ae393fbd5944d710443cd5dd225d1178ef7fb5d6259c14f3e1316ec75e401bda6cf86f7eb98bfd38e5ed76e664b810426a5756b916702cbd418f0933e15e7a3b - languageName: node - linkType: hard - -"@smithy/md5-js@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/md5-js@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/b5e3fa1d31832535b3a35d0a52ebf983da7cf1a1658b6a7f8bcc948cde808eb361696575d78e5e5df92f3c9b9569b5a1f2d1dff7b465d0a803fa901e0286599d - languageName: node - linkType: hard - -"@smithy/middleware-content-length@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/middleware-content-length@npm:4.0.1" - dependencies: - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/3dfbfe658cc8636e9e923a10151a32c6234897c4a86856e55fe4fadc322b3f3e977e50d15553afcb34cadb213de2d95a82af9c8f735e758f4dc21a031e8ecb17 - languageName: node - linkType: hard - -"@smithy/middleware-endpoint@npm:^4.0.5": - version: 4.0.5 - resolution: "@smithy/middleware-endpoint@npm:4.0.5" - dependencies: - "@smithy/core": "npm:^3.1.4" - "@smithy/middleware-serde": "npm:^4.0.2" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/url-parser": "npm:^4.0.1" - "@smithy/util-middleware": "npm:^4.0.1" - tslib: "npm:^2.6.2" - checksum: 10c0/4573b7fb9525c3b887050183dc0c31bb6fd2801c98a8e94984474634e940a5efd73bbfc49c50d90245089112519bfcdbd8b5c2f279b2f4e64bd8df2203d5221c - languageName: node - linkType: hard - -"@smithy/middleware-retry@npm:^4.0.6": - version: 4.0.6 - resolution: "@smithy/middleware-retry@npm:4.0.6" - dependencies: - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/service-error-classification": "npm:^4.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-retry": "npm:^4.0.1" - tslib: "npm:^2.6.2" - uuid: "npm:^9.0.1" - checksum: 10c0/395888b3ae39b4bfa91b145f77f72a31de63a5e1fe7bbefb6a8ce0596b6843f92cf640421cf3e802746e6432946035d61e5e665d0dc1bdc9c70ce318b6347c45 - languageName: node - linkType: hard - -"@smithy/middleware-serde@npm:^4.0.2": - version: 4.0.2 - resolution: "@smithy/middleware-serde@npm:4.0.2" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/b1efee86ecc37a063bdfdb89cf691c9b9627502473f2caa0c964c0648f7b550b7a49755a9b13cdfc11aebf1641cf3ae6f8b5f1895a20241960504936da9b3138 - languageName: node - linkType: hard - -"@smithy/middleware-stack@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/middleware-stack@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/b7f710e263e37a8c80c8d31c7d8fe5f66dec2955cde412054eefcc8df53905e1e2e53a01fd7930eb82c82a3a28eadd00e69f07dfc6e793b1d9272db58a982e9b - languageName: node - linkType: hard - -"@smithy/node-config-provider@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/node-config-provider@npm:4.0.1" - dependencies: - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/shared-ini-file-loader": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/f8d3b1fe91eeba41426ec57d62cfbeaed027650b5549fb2ba5bc889c1cfb7880d4fdb5a484d231b3fb2a9c9023c1f4e8907a5d18d75b3787481cde9f87c4d9cb - languageName: node - linkType: hard - -"@smithy/node-http-handler@npm:^4.0.2": - version: 4.0.2 - resolution: "@smithy/node-http-handler@npm:4.0.2" - dependencies: - "@smithy/abort-controller": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/querystring-builder": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/6a3446dcf3bf006cf55b065edfbe7636f2aa13073f2937e224890902de44b191a5214dce4cb61e98b1ad53889bdbb35386e8810a338bc75ea3743f8d4550a2ad - languageName: node - linkType: hard - -"@smithy/property-provider@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/property-provider@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/43960a6bdf25944e1cc9d4ee83bf45ab5641f7e2068c46d5015166c0f035b1752e03847d7c15d3c013f5f0467441c9c5a8d6a0428f5401988035867709e4dea3 - languageName: node - linkType: hard - -"@smithy/protocol-http@npm:^5.0.1": - version: 5.0.1 - resolution: "@smithy/protocol-http@npm:5.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/87b157cc86c23f7199acad237e5e0cc309b18a2a4162dfd8f99609f6cca403f832b645535e58173e2933b4d96ec71f2df16d04e1bdcf52b7b0fcbdbc0067de93 - languageName: node - linkType: hard - -"@smithy/querystring-builder@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/querystring-builder@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - "@smithy/util-uri-escape": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/21f39e3a79458d343f3dec76b38598c49a34a3c4d1d3c23b6c8895eae2b610fb3c704f995a1730599ef7a881216ea064a25bb7dc8abe5bb1ee50dc6078ad97a4 - languageName: node - linkType: hard - -"@smithy/querystring-parser@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/querystring-parser@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/10e5aba13fbb9a602299fb92f02142e291ab5c7cd221e0ca542981414533e081abdd7442de335f2267ee4a9ff8eba4d7ba848455df50d2771f0ddb8b7d8f9d8b - languageName: node - linkType: hard - -"@smithy/service-error-classification@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/service-error-classification@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - checksum: 10c0/de015fd140bf4e97da34a2283ce73971eb3b3aae53a257000dce0c99b8974a5e76bae9e517545ef58bd00ca8094c813cd1bcf0696c2c51e731418e2a769c744f - languageName: node - linkType: hard - -"@smithy/shared-ini-file-loader@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/shared-ini-file-loader@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/0f0173dbe61c8dac6847cc2c5115db5f1292c956c7f0559ce7bc8e5ed196a4b102977445ee1adb72206a15226a1098cdea01e92aa8ce19f4343f1135e7d37bcf - languageName: node - linkType: hard - -"@smithy/signature-v4@npm:^5.0.1": - version: 5.0.1 - resolution: "@smithy/signature-v4@npm:5.0.1" - dependencies: - "@smithy/is-array-buffer": "npm:^4.0.0" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-hex-encoding": "npm:^4.0.0" - "@smithy/util-middleware": "npm:^4.0.1" - "@smithy/util-uri-escape": "npm:^4.0.0" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/a7f118642c9641f813098faad355fc5b54ae215fec589fb238d72d44149248c02e32dcfe034000f151ab665450542df88c70d269f9a3233e01a905ec03512514 - languageName: node - linkType: hard - -"@smithy/smithy-client@npm:^4.1.5": - version: 4.1.5 - resolution: "@smithy/smithy-client@npm:4.1.5" - dependencies: - "@smithy/core": "npm:^3.1.4" - "@smithy/middleware-endpoint": "npm:^4.0.5" - "@smithy/middleware-stack": "npm:^4.0.1" - "@smithy/protocol-http": "npm:^5.0.1" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-stream": "npm:^4.1.1" - tslib: "npm:^2.6.2" - checksum: 10c0/7dbb54f2cff8d502ac93b03181e78ca051f1f6028df0643805f3aceefb4bbe492e4a7e4496933a8bfc146eb65879554bf9a17d083351ff2e9302d0494b67fa28 - languageName: node - linkType: hard - -"@smithy/types@npm:^4.1.0": - version: 4.1.0 - resolution: "@smithy/types@npm:4.1.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/d8817145ea043c5b29783df747ed47c3a1c584fd9d02bbdb609d38b7cb4dded1197ac214ae112744c86abe0537a314dae0edbc0e752bb639ef2d9fb84c67a9d9 - languageName: node - linkType: hard - -"@smithy/url-parser@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/url-parser@npm:4.0.1" - dependencies: - "@smithy/querystring-parser": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/fc969b55857b3bcdc920f54bbb9b0c88b5c7695ac7100bea1c7038fd4c9a09ebe0fbb38c4839d39acea28da0d8cb4fea71ffbf362d8aec295acbb94c1b45fc86 - languageName: node - linkType: hard - -"@smithy/util-base64@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-base64@npm:4.0.0" - dependencies: - "@smithy/util-buffer-from": "npm:^4.0.0" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/ad18ec66cc357c189eef358d96876b114faf7086b13e47e009b265d0ff80cec046052500489c183957b3a036768409acdd1a373e01074cc002ca6983f780cffc - languageName: node - linkType: hard - -"@smithy/util-body-length-browser@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-body-length-browser@npm:4.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/574a10934024a86556e9dcde1a9776170284326c3dfcc034afa128cc5a33c1c8179fca9cfb622ef8be5f2004316cc3f427badccceb943e829105536ec26306d9 - languageName: node - linkType: hard - -"@smithy/util-body-length-node@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-body-length-node@npm:4.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/e91fd3816767606c5f786166ada26440457fceb60f96653b3d624dcf762a8c650e513c275ff3f647cb081c63c283cc178853a7ed9aa224abc8ece4eeeef7a1dd - languageName: node - linkType: hard - -"@smithy/util-buffer-from@npm:^2.2.0": - version: 2.2.0 - resolution: "@smithy/util-buffer-from@npm:2.2.0" - dependencies: - "@smithy/is-array-buffer": "npm:^2.2.0" - tslib: "npm:^2.6.2" - checksum: 10c0/223d6a508b52ff236eea01cddc062b7652d859dd01d457a4e50365af3de1e24a05f756e19433f6ccf1538544076b4215469e21a4ea83dc1d58d829725b0dbc5a - languageName: node - linkType: hard - -"@smithy/util-buffer-from@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-buffer-from@npm:4.0.0" - dependencies: - "@smithy/is-array-buffer": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/be7cd33b6cb91503982b297716251e67cdca02819a15797632091cadab2dc0b4a147fff0709a0aa9bbc0b82a2644a7ed7c8afdd2194d5093cee2e9605b3a9f6f - languageName: node - linkType: hard - -"@smithy/util-config-provider@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-config-provider@npm:4.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/cd9498d5f77a73aadd575084bcb22d2bb5945bac4605d605d36f2efe3f165f2b60f4dc88b7a62c2ed082ffa4b2c2f19621d0859f18399edbc2b5988d92e4649f - languageName: node - linkType: hard - -"@smithy/util-defaults-mode-browser@npm:^4.0.6": - version: 4.0.6 - resolution: "@smithy/util-defaults-mode-browser@npm:4.0.6" - dependencies: - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - bowser: "npm:^2.11.0" - tslib: "npm:^2.6.2" - checksum: 10c0/4c1d406f7bde7455649ef70d1f09955e614da8a000ffeceac111aad0ee3daeb126206e88ae169f359da3aace382e2800bc20475438343ff87970682a3fdc6aa2 - languageName: node - linkType: hard - -"@smithy/util-defaults-mode-node@npm:^4.0.6": - version: 4.0.6 - resolution: "@smithy/util-defaults-mode-node@npm:4.0.6" - dependencies: - "@smithy/config-resolver": "npm:^4.0.1" - "@smithy/credential-provider-imds": "npm:^4.0.1" - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/property-provider": "npm:^4.0.1" - "@smithy/smithy-client": "npm:^4.1.5" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/30209b45ed2f45d8152e4be2bffb1fe6b9a99fb350659170adcef464bd7f926c33651555d0592f1fbe1280432e90d0862061dd486af438afd9b356db20b0986e - languageName: node - linkType: hard - -"@smithy/util-endpoints@npm:^3.0.1": - version: 3.0.1 - resolution: "@smithy/util-endpoints@npm:3.0.1" - dependencies: - "@smithy/node-config-provider": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/fed80f300e6a6e69873e613cdd12f640d33a19fc09a41e3afd536f7ea36f7785edd96fbd0402b6980a0e5dfc9bcb8b37f503d522b4ef317f31f4fd0100c466ff - languageName: node - linkType: hard - -"@smithy/util-hex-encoding@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-hex-encoding@npm:4.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/70dbb3aa1a79aff3329d07a66411ff26398df338bdd8a6d077b438231afe3dc86d9a7022204baddecd8bc633f059d5c841fa916d81dd7447ea79b64148f386d2 - languageName: node - linkType: hard - -"@smithy/util-middleware@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/util-middleware@npm:4.0.1" - dependencies: - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/1dd2b058f392fb6788809f14c2c1d53411f79f6e9f88b515ffd36792f9f5939fe4af96fb5b0486a3d0cd30181783b7a5393dce2e8b83ba62db7c6d3af6572eff - languageName: node - linkType: hard - -"@smithy/util-retry@npm:^4.0.1": - version: 4.0.1 - resolution: "@smithy/util-retry@npm:4.0.1" - dependencies: - "@smithy/service-error-classification": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/93ef89572651b8a30b9a648292660ae9532508ec6d2577afc62e1d9125fe6d14086e0f70a2981bf9f12256b41a57152368b5ed839cdd2df47ba78dd005615173 - languageName: node - linkType: hard - -"@smithy/util-stream@npm:^4.1.1": - version: 4.1.1 - resolution: "@smithy/util-stream@npm:4.1.1" - dependencies: - "@smithy/fetch-http-handler": "npm:^5.0.1" - "@smithy/node-http-handler": "npm:^4.0.2" - "@smithy/types": "npm:^4.1.0" - "@smithy/util-base64": "npm:^4.0.0" - "@smithy/util-buffer-from": "npm:^4.0.0" - "@smithy/util-hex-encoding": "npm:^4.0.0" - "@smithy/util-utf8": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/9088e4e9baeac8af4de3bc8694cc57d49b3c9ef45c6441cc572b3d14fb88e0929624070d1528c3afe27ab710a2e0eb4a7c2938d676795b78788ab135b2f66e32 - languageName: node - linkType: hard - -"@smithy/util-uri-escape@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-uri-escape@npm:4.0.0" - dependencies: - tslib: "npm:^2.6.2" - checksum: 10c0/23984624060756adba8aa4ab1693fe6b387ee5064d8ec4dfd39bb5908c4ee8b9c3f2dc755da9b07505d8e3ce1338c1867abfa74158931e4728bf3cfcf2c05c3d - languageName: node - linkType: hard - -"@smithy/util-utf8@npm:^2.0.0": - version: 2.3.0 - resolution: "@smithy/util-utf8@npm:2.3.0" - dependencies: - "@smithy/util-buffer-from": "npm:^2.2.0" - tslib: "npm:^2.6.2" - checksum: 10c0/e18840c58cc507ca57fdd624302aefd13337ee982754c9aa688463ffcae598c08461e8620e9852a424d662ffa948fc64919e852508028d09e89ced459bd506ab - languageName: node - linkType: hard - -"@smithy/util-utf8@npm:^4.0.0": - version: 4.0.0 - resolution: "@smithy/util-utf8@npm:4.0.0" - dependencies: - "@smithy/util-buffer-from": "npm:^4.0.0" - tslib: "npm:^2.6.2" - checksum: 10c0/28a5a5372cbf0b3d2e32dd16f79b04c2aec6f704cf13789db922e9686fde38dde0171491cfa4c2c201595d54752a319faaeeed3c325329610887694431e28c98 - languageName: node - linkType: hard - -"@smithy/util-waiter@npm:^4.0.2": - version: 4.0.2 - resolution: "@smithy/util-waiter@npm:4.0.2" - dependencies: - "@smithy/abort-controller": "npm:^4.0.1" - "@smithy/types": "npm:^4.1.0" - tslib: "npm:^2.6.2" - checksum: 10c0/36ee71b41923ae58d9246745e3b7497fe45577dbb97f6e15dd07b4fddb4f82f32e0b7604c7b388fc92d5cbe49d9499998eda979a77a4a770c1b25686a5aed4ce - languageName: node - linkType: hard - "@socket.io/component-emitter@npm:~3.1.0": version: 3.1.2 resolution: "@socket.io/component-emitter@npm:3.1.2" @@ -8246,13 +7013,6 @@ __metadata: languageName: node linkType: hard -"bowser@npm:^2.11.0": - version: 2.11.0 - resolution: "bowser@npm:2.11.0" - checksum: 10c0/04efeecc7927a9ec33c667fa0965dea19f4ac60b3fea60793c2e6cf06c1dcd2f7ae1dbc656f450c5f50783b1c75cf9dc173ba6f3b7db2feee01f8c4b793e1bd3 - languageName: node - linkType: hard - "brace-expansion@npm:^1.1.7": version: 1.1.11 resolution: "brace-expansion@npm:1.1.11" @@ -11539,17 +10299,6 @@ __metadata: languageName: node linkType: hard -"fast-xml-parser@npm:4.4.1": - version: 4.4.1 - resolution: "fast-xml-parser@npm:4.4.1" - dependencies: - strnum: "npm:^1.0.5" - bin: - fxparser: src/cli/cli.js - checksum: 10c0/7f334841fe41bfb0bf5d920904ccad09cefc4b5e61eaf4c225bf1e1bb69ee77ef2147d8942f783ee8249e154d1ca8a858e10bda78a5d78b8bed3f48dcee9bf33 - languageName: node - linkType: hard - "fastq@npm:^1.6.0": version: 1.17.1 resolution: "fastq@npm:1.17.1" @@ -12356,7 +11105,6 @@ __metadata: "@angular/platform-browser-dynamic": "npm:16.2.12" "@angular/router": "npm:16.2.12" "@auth0/angular-jwt": "npm:5.1.0" - "@aws-sdk/client-s3": "npm:^3.750.0" "@codingame/monaco-vscode-java-default-extension": "npm:8.0.4" "@codingame/monaco-vscode-python-default-extension": "npm:8.0.4" "@codingame/monaco-vscode-r-default-extension": "npm:8.0.4" @@ -18624,13 +17372,6 @@ __metadata: languageName: node linkType: hard -"strnum@npm:^1.0.5": - version: 1.1.1 - resolution: "strnum@npm:1.1.1" - checksum: 10c0/c016034f9896ea99c4a22a8a8142d1ec72dba8d514ddec399f96998d5d2ab9f9e5b6c75c761d9730c3244b794022b1a63ec293f0da41ab0a994e3584020ba1ad - languageName: node - linkType: hard - "strong-log-transformer@npm:^2.1.0": version: 2.1.0 resolution: "strong-log-transformer@npm:2.1.0" @@ -19741,7 +18482,7 @@ __metadata: languageName: node linkType: hard -"uuid@npm:^9.0.0, uuid@npm:^9.0.1": +"uuid@npm:^9.0.0": version: 9.0.1 resolution: "uuid@npm:9.0.1" bin: diff --git a/core/workflow-core/build.sbt b/core/workflow-core/build.sbt index 2481bbd24c..4cacebc207 100644 --- a/core/workflow-core/build.sbt +++ b/core/workflow-core/build.sbt @@ -173,5 +173,5 @@ libraryDependencies ++= Seq( "org.eclipse.jgit" % "org.eclipse.jgit" % "5.13.0.202109080827-r", // jgit "org.yaml" % "snakeyaml" % "1.30", // yaml reader (downgrade to 1.30 due to dropwizard 1.3.23 required by amber) "org.apache.commons" % "commons-vfs2" % "2.9.0", // for FileResolver throw VFS-related exceptions - "io.lakefs" % "sdk" % "1.48.0", // for lakeFS api calls + "io.lakefs" % "sdk" % "1.51.0", // for lakeFS api calls ) \ No newline at end of file diff --git a/core/workflow-core/src/main/resources/storage-config.yaml b/core/workflow-core/src/main/resources/storage-config.yaml index 4c859f0f6b..3ca54a4969 100644 --- a/core/workflow-core/src/main/resources/storage-config.yaml +++ b/core/workflow-core/src/main/resources/storage-config.yaml @@ -40,6 +40,7 @@ storage: s3: endpoint: "http://localhost:9000" + region: "us-west-2" auth: username: "texera_minio" password: "password" diff --git a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala index 184272519a..51526d00c5 100644 --- a/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala +++ b/core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/StorageConfig.scala @@ -218,6 +218,11 @@ object StorageConfig { .asInstanceOf[Map[String, Any]]("endpoint") .asInstanceOf[String] + val s3Region: String = conf("storage") + .asInstanceOf[Map[String, Any]]("s3") + .asInstanceOf[Map[String, Any]]("region") + .asInstanceOf[String] + val s3Username: String = conf("storage") .asInstanceOf[Map[String, Any]]("s3") .asInstanceOf[Map[String, Any]]("auth") From fb072393c7c77f4ca5c65bd82e0fc8cc6600e52b Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 6 Mar 2025 14:39:27 -0800 Subject: [PATCH 40/47] fmt python --- .../main/python/pytexera/storage/__init__.py | 4 +--- .../pytexera/storage/dataset_file_document.py | 22 ++++++++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/core/amber/src/main/python/pytexera/storage/__init__.py b/core/amber/src/main/python/pytexera/storage/__init__.py index cbda25ba41..da91bba69d 100644 --- a/core/amber/src/main/python/pytexera/storage/__init__.py +++ b/core/amber/src/main/python/pytexera/storage/__init__.py @@ -1,5 +1,3 @@ from .dataset_file_document import DatasetFileDocument -__all__ = [ - "DatasetFileDocument" -] \ No newline at end of file +__all__ = ["DatasetFileDocument"] diff --git a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py index 01c91db4c2..4aeb20df80 100644 --- a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -16,7 +16,9 @@ def __init__(self, file_path: str): """ parts = file_path.strip("/").split("/") if len(parts) < 4: - raise ValueError("Invalid file path format. Expected: /ownerEmail/datasetName/versionName/fileRelativePath") + raise ValueError( + "Invalid file path format. Expected: /ownerEmail/datasetName/versionName/fileRelativePath" + ) self.owner_email = parts[0] self.dataset_name = parts[1] @@ -27,7 +29,9 @@ def __init__(self, file_path: str): self.presign_endpoint = os.getenv("PRESIGN_API_ENDPOINT") if not self.jwt_token: - raise ValueError("JWT token is required but not set in environment variables.") + raise ValueError( + "JWT token is required but not set in environment variables." + ) if not self.presign_endpoint: self.presign_endpoint = "http://localhost:9092/api/dataset/presign-download" @@ -39,14 +43,18 @@ def get_presigned_url(self) -> str: :raises: RuntimeError if the request fails. """ headers = {"Authorization": f"Bearer {self.jwt_token}"} - encoded_file_path = urllib.parse.quote(f"/{self.owner_email}/{self.dataset_name}/{self.version_name}/{self.file_relative_path}") + encoded_file_path = urllib.parse.quote( + f"/{self.owner_email}/{self.dataset_name}/{self.version_name}/{self.file_relative_path}" + ) params = {"filePath": encoded_file_path} response = requests.get(self.presign_endpoint, headers=headers, params=params) if response.status_code != 200: - raise RuntimeError(f"Failed to get presigned URL: {response.status_code} {response.text}") + raise RuntimeError( + f"Failed to get presigned URL: {response.status_code} {response.text}" + ) return response.json().get("presignedUrl") @@ -61,6 +69,8 @@ def read_file(self) -> io.BytesIO: response = requests.get(presigned_url) if response.status_code != 200: - raise RuntimeError(f"Failed to retrieve file content: {response.status_code} {response.text}") + raise RuntimeError( + f"Failed to retrieve file content: {response.status_code} {response.text}" + ) - return io.BytesIO(response.content) \ No newline at end of file + return io.BytesIO(response.content) From 6b4c960515fbebf267db8ae5c12b27948b9420b3 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 6 Mar 2025 15:06:42 -0800 Subject: [PATCH 41/47] fmt and fix the version of docker compose --- .../src/main/python/pytexera/storage/dataset_file_document.py | 2 -- core/file-service/src/main/resources/docker-compose.yml | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py index 4aeb20df80..161894534b 100644 --- a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -2,8 +2,6 @@ import io import requests import urllib.parse -from urllib.parse import urljoin -from typing import Optional class DatasetFileDocument: diff --git a/core/file-service/src/main/resources/docker-compose.yml b/core/file-service/src/main/resources/docker-compose.yml index ff85a7b9ca..4c254843f7 100644 --- a/core/file-service/src/main/resources/docker-compose.yml +++ b/core/file-service/src/main/resources/docker-compose.yml @@ -1,6 +1,6 @@ services: minio: - image: minio/minio:latest + image: minio/minio:RELEASE.2025-02-28T09-55-16Z container_name: minio ports: - "9000:9000" # MinIO API (use this in LakeFS config) @@ -13,7 +13,7 @@ services: - local-lakefs lakefs: - image: treeverse/lakefs:latest + image: treeverse/lakefs:1.51 container_name: lakefs depends_on: - minio From f8685ab10687885b86c2a34837e5d76ce1179dad Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 6 Mar 2025 18:05:41 -0800 Subject: [PATCH 42/47] try to fix the cors issue --- core/gui/proxy.config.json | 8 -------- .../app/dashboard/service/user/dataset/dataset.service.ts | 7 ++++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/core/gui/proxy.config.json b/core/gui/proxy.config.json index 74cf828a95..813650d297 100755 --- a/core/gui/proxy.config.json +++ b/core/gui/proxy.config.json @@ -25,13 +25,5 @@ "ws": true, "secure": false, "changeOrigin": false - }, - "/lakefs": { - "target": "http://localhost:8000", - "secure": false, - "changeOrigin": true, - "pathRewrite": { - "^/lakefs": "" - } } } diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 569cb1c78b..1d7f44e748 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -139,7 +139,12 @@ export class DatasetService { const end = Math.min(start + environment.multipartUploadChunkSizeByte, file.size); const chunk = file.slice(start, end); - return from(fetch(url, { method: "PUT", body: chunk })).pipe( + return from(fetch(url, { + method: "PUT", body: chunk, headers: { + "Access-Control-Allow-Origin": "*", // Ensures CORS works + "Access-Control-Allow-Methods": "PUT, OPTIONS", + "Access-Control-Allow-Headers": "*" + } })).pipe( switchMap(response => { if (!response.ok) { return throwError(() => new Error(`Failed to upload part ${index + 1}`)); From 2d9eca10513980915e2b639bd1a0a3c8f4462648 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 6 Mar 2025 22:05:58 -0800 Subject: [PATCH 43/47] fmt py file --- .../main/python/pytexera/storage/dataset_file_document.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py index 161894534b..059c8966a4 100644 --- a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -9,13 +9,15 @@ def __init__(self, file_path: str): """ Parses the file path into dataset metadata. - :param file_path: Expected format - "/ownerEmail/datasetName/versionName/fileRelativePath" - Example: "/bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv" + :param file_path: + Expected format - "/ownerEmail/datasetName/versionName/fileRelativePath" + Example: "/bob@texera.com/twitterDataset/v1/california/irvine/tw1.csv" """ parts = file_path.strip("/").split("/") if len(parts) < 4: raise ValueError( - "Invalid file path format. Expected: /ownerEmail/datasetName/versionName/fileRelativePath" + "Invalid file path format. " + "Expected: /ownerEmail/datasetName/versionName/fileRelativePath" ) self.owner_email = parts[0] From a422c1d1f96d74f4a07ff873407628627b260b6b Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Thu, 6 Mar 2025 22:06:17 -0800 Subject: [PATCH 44/47] add header for put --- .../service/user/dataset/dataset.service.ts | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index 1d7f44e748..c767ec8f9b 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -139,12 +139,17 @@ export class DatasetService { const end = Math.min(start + environment.multipartUploadChunkSizeByte, file.size); const chunk = file.slice(start, end); - return from(fetch(url, { - method: "PUT", body: chunk, headers: { - "Access-Control-Allow-Origin": "*", // Ensures CORS works - "Access-Control-Allow-Methods": "PUT, OPTIONS", - "Access-Control-Allow-Headers": "*" - } })).pipe( + return from( + fetch(url, { + method: "PUT", + body: chunk, + headers: { + "Access-Control-Allow-Origin": "*", // Ensures CORS works + "Access-Control-Allow-Methods": "PUT, OPTIONS", + "Access-Control-Allow-Headers": "*", + }, + }) + ).pipe( switchMap(response => { if (!response.ok) { return throwError(() => new Error(`Failed to upload part ${index + 1}`)); From e23242320c1ae7ecbbaf3e192df17d90271aab4a Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Fri, 7 Mar 2025 10:27:08 -0800 Subject: [PATCH 45/47] fmt UDF --- .../pytexera/storage/dataset_file_document.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py index 059c8966a4..f4496c78b9 100644 --- a/core/amber/src/main/python/pytexera/storage/dataset_file_document.py +++ b/core/amber/src/main/python/pytexera/storage/dataset_file_document.py @@ -44,7 +44,10 @@ def get_presigned_url(self) -> str: """ headers = {"Authorization": f"Bearer {self.jwt_token}"} encoded_file_path = urllib.parse.quote( - f"/{self.owner_email}/{self.dataset_name}/{self.version_name}/{self.file_relative_path}" + f"/{self.owner_email}" + f"/{self.dataset_name}" + f"/{self.version_name}" + f"/{self.file_relative_path}" ) params = {"filePath": encoded_file_path} @@ -53,7 +56,8 @@ def get_presigned_url(self) -> str: if response.status_code != 200: raise RuntimeError( - f"Failed to get presigned URL: {response.status_code} {response.text}" + f"Failed to get presigned URL: " + f"{response.status_code} {response.text}" ) return response.json().get("presignedUrl") @@ -62,7 +66,7 @@ def read_file(self) -> io.BytesIO: """ Reads the file content from the presigned URL. - :return: A file-like object (io.BytesIO) for compatibility with various Python libraries. + :return: A file-like object. :raises: RuntimeError if the retrieval fails. """ presigned_url = self.get_presigned_url() @@ -70,7 +74,8 @@ def read_file(self) -> io.BytesIO: if response.status_code != 200: raise RuntimeError( - f"Failed to retrieve file content: {response.status_code} {response.text}" + f"Failed to retrieve file content: " + f"{response.status_code} {response.text}" ) return io.BytesIO(response.content) From 21a8969e2e62507d7dbb5f008351107f6aee1cdc Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 9 Mar 2025 14:53:21 -0700 Subject: [PATCH 46/47] keep refining --- core/file-service/build.sbt | 1 - .../src/main/resources/docker-compose.yml | 4 ++ .../service/resource/DatasetResource.scala | 38 +++++++++++-------- .../texera/service/util/S3StorageClient.scala | 2 +- core/gui/package.json | 1 - .../user-dataset-version-creator.component.ts | 14 ++++--- .../service/user/dataset/dataset.service.ts | 11 +++--- core/gui/yarn.lock | 26 ------------- 8 files changed, 42 insertions(+), 55 deletions(-) diff --git a/core/file-service/build.sbt b/core/file-service/build.sbt index 454ff4145b..984de4d3ba 100644 --- a/core/file-service/build.sbt +++ b/core/file-service/build.sbt @@ -59,7 +59,6 @@ libraryDependencies ++= Seq( "io.dropwizard" % "dropwizard-core" % dropwizardVersion, "io.dropwizard" % "dropwizard-auth" % dropwizardVersion, // Dropwizard Authentication module "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.15.2", - "org.glassfish.jersey.media" % "jersey-media-multipart" % "3.1.10", "jakarta.ws.rs" % "jakarta.ws.rs-api" % "3.1.0", // Ensure Jakarta JAX-RS API is available "org.bitbucket.b_c" % "jose4j" % "0.9.6", "org.playframework" %% "play-json" % "3.1.0-M1", diff --git a/core/file-service/src/main/resources/docker-compose.yml b/core/file-service/src/main/resources/docker-compose.yml index 4c254843f7..a8b10b8db9 100644 --- a/core/file-service/src/main/resources/docker-compose.yml +++ b/core/file-service/src/main/resources/docker-compose.yml @@ -9,6 +9,10 @@ services: - MINIO_ROOT_USER=texera_minio - MINIO_ROOT_PASSWORD=password command: server --console-address ":9001" /data +# By uncommenting the below, you can mount the actual data to your local directory. +# This is recommended as this can ensure the data persistence even if the container is deleted. +# volumes: +# - /your/local/directory:/data # Mount MinIO data directory profiles: - local-lakefs diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala index 037a8d03d3..0ae21b1334 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala @@ -31,6 +31,7 @@ import edu.uci.ics.texera.service.resource.DatasetAccessResource.{ userOwnDataset } import edu.uci.ics.texera.service.resource.DatasetResource.{ + CreateDatasetRequest, DashboardDataset, DashboardDatasetVersion, DatasetDescriptionModification, @@ -46,7 +47,6 @@ import io.dropwizard.auth.Auth import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs._ import jakarta.ws.rs.core.{MediaType, Response, StreamingOutput} -import org.glassfish.jersey.media.multipart.FormDataParam import org.jooq.{DSLContext, EnumType} import java.io.{InputStream, OutputStream} @@ -116,6 +116,12 @@ object DatasetResource { fileNodes: List[DatasetFileNode] ) + case class CreateDatasetRequest( + datasetName: String, + datasetDescription: String, + isDatasetPublic: Boolean + ) + case class Diff( path: String, pathType: String, @@ -165,39 +171,41 @@ class DatasetResource { @POST @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/create") - @Consumes(Array(MediaType.MULTIPART_FORM_DATA)) + @Consumes(Array(MediaType.APPLICATION_JSON)) def createDataset( - @Auth user: SessionUser, - @FormDataParam("datasetName") datasetName: String, - @FormDataParam("datasetDescription") datasetDescription: String, - @FormDataParam("isDatasetPublic") isDatasetPublic: String + request: CreateDatasetRequest, + @Auth user: SessionUser ): DashboardDataset = { withTransaction(context) { ctx => val uid = user.getUid val datasetDao: DatasetDao = new DatasetDao(ctx.configuration()) - val datasetOfUserDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) + val datasetUserAccessDao: DatasetUserAccessDao = new DatasetUserAccessDao(ctx.configuration()) + + val datasetName = request.datasetName + val datasetDescription = request.datasetDescription + val isDatasetPublic = request.isDatasetPublic - // do the name duplication check + // Check if a dataset with the same name already exists if (!datasetDao.fetchByName(datasetName).isEmpty) { throw new BadRequestException("Dataset with the same name already exists") } - // Try to initialize the repository in LakeFS + // Initialize the repository in LakeFS try { LakeFSStorageClient.initRepo(datasetName) } catch { case e: Exception => throw new WebApplicationException( - s"Failed to initialize repository in LakeFS: ${e.getMessage}" + s"Failed to create the dataset: ${e.getMessage}" ) } - // insert the dataset into database - val dataset: Dataset = new Dataset() + // Insert the dataset into the database + val dataset = new Dataset() dataset.setName(datasetName) dataset.setDescription(datasetDescription) - dataset.setIsPublic(isDatasetPublic.toBoolean) + dataset.setIsPublic(isDatasetPublic) dataset.setOwnerUid(uid) val createdDataset = ctx @@ -206,12 +214,12 @@ class DatasetResource { .returning() .fetchOne() - // insert requester as the write access of the dataset + // Insert the requester as the WRITE access user for this dataset val datasetUserAccess = new DatasetUserAccess() datasetUserAccess.setDid(createdDataset.getDid) datasetUserAccess.setUid(uid) datasetUserAccess.setPrivilege(PrivilegeEnum.WRITE) - datasetOfUserDao.insert(datasetUserAccess) + datasetUserAccessDao.insert(datasetUserAccess) DashboardDataset( new Dataset( diff --git a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala index 843314cd3a..de1e70bbc7 100644 --- a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala +++ b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala @@ -25,7 +25,7 @@ object S3StorageClient { .endpointOverride(java.net.URI.create(StorageConfig.s3Endpoint)) // MinIO URL .serviceConfiguration( S3Configuration.builder().pathStyleAccessEnabled(true).build() - ) // Needed for MinIO + ) .build() } diff --git a/core/gui/package.json b/core/gui/package.json index 59a9aa1b64..20b02f092b 100644 --- a/core/gui/package.json +++ b/core/gui/package.json @@ -80,7 +80,6 @@ "read-excel-file": "5.7.1", "ring-buffer-ts": "1.0.3", "rxjs": "7.8.1", - "sanitize-filename": "1.6.3", "tinyqueue": "2.0.3", "ts-proto": "2.2.0", "tslib": "2.3.1", diff --git a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts index f7a49244f5..d062264c81 100644 --- a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts +++ b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-creator/user-dataset-version-creator.component.ts @@ -2,11 +2,9 @@ import { Component, EventEmitter, inject, Input, OnInit, Output } from "@angular import { FormBuilder, FormGroup, Validators } from "@angular/forms"; import { FormlyFieldConfig } from "@ngx-formly/core"; import { DatasetService } from "../../../../../service/user/dataset/dataset.service"; -import { FileUploadItem } from "../../../../../type/dashboard-file.interface"; -import { Dataset, DatasetVersion } from "../../../../../../common/type/dataset"; +import { Dataset } from "../../../../../../common/type/dataset"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; import { NotificationService } from "../../../../../../common/service/notification/notification.service"; -import sanitize from "sanitize-filename"; import { HttpErrorResponse } from "@angular/common/http"; import { NZ_MODAL_DATA, NzModalRef } from "ng-zorro-antd/modal"; @@ -85,10 +83,16 @@ export class UserDatasetVersionCreatorComponent implements OnInit { } datasetNameSanitization(datasetName: string): string { - const sanitizedDatasetName = sanitize(datasetName); - if (sanitizedDatasetName != datasetName) { + // Remove leading spaces + let sanitizedDatasetName = datasetName.trimStart(); + + // Replace all characters that are not letters (a-z, A-Z), numbers (0-9) with a short dash "-" + sanitizedDatasetName = sanitizedDatasetName.replace(/[^a-zA-Z0-9]+/g, "-"); + + if (sanitizedDatasetName !== datasetName) { this.isDatasetNameSanitized = true; } + return sanitizedDatasetName; } diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts index c767ec8f9b..fdecc06362 100644 --- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts +++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts @@ -34,12 +34,11 @@ export class DatasetService { constructor(private http: HttpClient) {} public createDataset(dataset: Dataset): Observable { - const formData = new FormData(); - formData.append("datasetName", dataset.name); - formData.append("datasetDescription", dataset.description); - formData.append("isDatasetPublic", dataset.isPublic ? "true" : "false"); - - return this.http.post(`${AppSettings.getApiEndpoint()}/${DATASET_CREATE_URL}`, formData); + return this.http.post(`${AppSettings.getApiEndpoint()}/${DATASET_CREATE_URL}`, { + datasetName: dataset.name, + datasetDescription: dataset.description, + isDatasetPublic: dataset.isPublic, + }); } public getDataset(did: number, isLogin: boolean = true): Observable { diff --git a/core/gui/yarn.lock b/core/gui/yarn.lock index 050ab8bfee..eefc75f7e6 100644 --- a/core/gui/yarn.lock +++ b/core/gui/yarn.lock @@ -11191,7 +11191,6 @@ __metadata: ring-buffer-ts: "npm:1.0.3" rxjs: "npm:7.8.1" rxjs-marbles: "npm:7.0.1" - sanitize-filename: "npm:1.6.3" sass: "npm:1.71.1" style-loader: "npm:3.3.4" tinyqueue: "npm:2.0.3" @@ -16514,15 +16513,6 @@ __metadata: languageName: node linkType: hard -"sanitize-filename@npm:1.6.3": - version: 1.6.3 - resolution: "sanitize-filename@npm:1.6.3" - dependencies: - truncate-utf8-bytes: "npm:^1.0.0" - checksum: 10c0/16ff47556a6e54e228c28db096bedd303da67b030d4bea4925fd71324932d6b02c7b0446f00ad33987b25b6414f24ae968e01a1a1679ce599542e82c4b07eb1f - languageName: node - linkType: hard - "sass-loader@npm:13.3.2": version: 13.3.2 resolution: "sass-loader@npm:13.3.2" @@ -17763,15 +17753,6 @@ __metadata: languageName: node linkType: hard -"truncate-utf8-bytes@npm:^1.0.0": - version: 1.0.2 - resolution: "truncate-utf8-bytes@npm:1.0.2" - dependencies: - utf8-byte-length: "npm:^1.0.1" - checksum: 10c0/af2b431fc4314f119b551e5fccfad49d4c0ef82e13ba9ca61be6567801195b08e732ce9643542e8ad1b3df44f3df2d7345b3dd34f723954b6bb43a14584d6b3c - languageName: node - linkType: hard - "ts-api-utils@npm:^1.0.1, ts-api-utils@npm:^1.3.0": version: 1.3.0 resolution: "ts-api-utils@npm:1.3.0" @@ -18436,13 +18417,6 @@ __metadata: languageName: node linkType: hard -"utf8-byte-length@npm:^1.0.1": - version: 1.0.5 - resolution: "utf8-byte-length@npm:1.0.5" - checksum: 10c0/e69bda3299608f4cc75976da9fb74ac94801a58b9ca29fdad03a20ec952e7477d7f226c12716b5f36bd4cff8151d1d152d02ee1df3752f017d4b2c725ce3e47a - languageName: node - linkType: hard - "util-deprecate@npm:^1.0.1, util-deprecate@npm:^1.0.2, util-deprecate@npm:~1.0.1": version: 1.0.2 resolution: "util-deprecate@npm:1.0.2" From 6accd5b1225df9ddb4a9799f529065eff8c53219 Mon Sep 17 00:00:00 2001 From: Jiadong Bai Date: Sun, 9 Mar 2025 22:20:46 -0700 Subject: [PATCH 47/47] update the docker compose --- .../src/main/resources/docker-compose.yml | 69 ++++++++++++++----- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/core/file-service/src/main/resources/docker-compose.yml b/core/file-service/src/main/resources/docker-compose.yml index a8b10b8db9..b50ab5f1a7 100644 --- a/core/file-service/src/main/resources/docker-compose.yml +++ b/core/file-service/src/main/resources/docker-compose.yml @@ -1,44 +1,75 @@ +version: "3.5" +name: texera-lakefs services: minio: image: minio/minio:RELEASE.2025-02-28T09-55-16Z container_name: minio ports: - - "9000:9000" # MinIO API (use this in LakeFS config) - - "9001:9001" # MinIO Console UI + - "9000:9000" + - "9001:9001" environment: - MINIO_ROOT_USER=texera_minio - MINIO_ROOT_PASSWORD=password command: server --console-address ":9001" /data -# By uncommenting the below, you can mount the actual data to your local directory. -# This is recommended as this can ensure the data persistence even if the container is deleted. +# Below lines are recommended to uncomment in order to persist your data even if the container dies # volumes: -# - /your/local/directory:/data # Mount MinIO data directory - profiles: - - local-lakefs +# - /path/to/your/local/directory:/data + + postgres: + image: postgres:15 + container_name: postgres + restart: always + environment: + - POSTGRES_DB=texera_lakefs + - POSTGRES_USER=texera_lakefs_admin + - POSTGRES_PASSWORD=password + healthcheck: + test: ["CMD", "pg_isready", "-U", "texera_lakefs_admin"] + interval: 10s + retries: 5 + start_period: 5s +# Ditto +# volumes: +# - /path/to/your/local/directory:/var/lib/postgresql/data lakefs: image: treeverse/lakefs:1.51 container_name: lakefs depends_on: - - minio + postgres: + condition: service_healthy + minio: + condition: service_started ports: - - "8000:8000" # LakeFS API/UI + - "8000:8000" environment: - # LakeFS metadata store - - LAKEFS_DATABASE_TYPE=local - # Authentication - - LAKEFS_AUTH_ENCRYPT_SECRET_KEY=random_string_for_lakefs - # MinIO Storage Configuration for LakeFS - LAKEFS_BLOCKSTORE_TYPE=s3 - LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true - - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 # MinIO internal service URL - - LAKEFS_BLOCKSTORE_S3_DISCOVER_BUCKET_REGION=false + - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 - LAKEFS_BLOCKSTORE_S3_PRE_SIGNED_ENDPOINT=http://localhost:9000 - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=texera_minio - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=password - # LakeFS Installation Credentials + - LAKEFS_AUTH_ENCRYPT_SECRET_KEY=random_string_for_lakefs + - LAKEFS_LOGGING_LEVEL=INFO + - LAKEFS_STATS_ENABLED=1 + - LAKEFS_DATABASE_TYPE=postgres + - LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://texera_lakefs_admin:password@postgres:5432/texera_lakefs?sslmode=disable - LAKEFS_INSTALLATION_USER_NAME=texera-admin - LAKEFS_INSTALLATION_ACCESS_KEY_ID=AKIAIOSFOLKFSSAMPLES - LAKEFS_INSTALLATION_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - profiles: - - local-lakefs \ No newline at end of file + entrypoint: ["/bin/sh", "-c"] + command: + - | + lakefs setup --user-name "$$LAKEFS_INSTALLATION_USER_NAME" --access-key-id "$$LAKEFS_INSTALLATION_ACCESS_KEY_ID" --secret-access-key "$$LAKEFS_INSTALLATION_SECRET_ACCESS_KEY" || true + lakefs run & + echo "---- lakeFS Web UI ----" + echo "http://127.0.0.1:8000/" + echo "" + echo "Access Key ID : $$LAKEFS_INSTALLATION_ACCESS_KEY_ID" + echo "Secret Access Key: $$LAKEFS_INSTALLATION_SECRET_ACCESS_KEY" + echo "" + wait + +networks: + default: + name: texera-lakefs \ No newline at end of file