apache · itholic · Jan 22, 2025 · Jan 23, 2025
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -1229,6 +1229,12 @@
     ],
     "sqlState" : "42710"
   },
+  "DUPLICATED_ARTIFACT" : {
+    "message" : [
+      "Duplicate Artifact: <normalizedRemoteRelativePath>. Artifacts cannot be overwritten."
+    ],
+    "sqlState" : "42713"
+  },
   "DUPLICATE_ASSIGNMENTS" : {
     "message" : [
       "The columns or variables <nameList> appear more than once as assignment targets."

diff --git a/python/pyspark/sql/tests/connect/client/test_artifact.py b/python/pyspark/sql/tests/connect/client/test_artifact.py
@@ -29,7 +29,7 @@
 if should_test_connect:
     from pyspark.sql.connect.client.artifact import ArtifactManager
     from pyspark.sql.connect.client import DefaultChannelBuilder
-    from pyspark.errors.exceptions.connect import SparkConnectGrpcException
+    from pyspark.errors import SparkRuntimeException
 
 
 class ArtifactTestsMixin:
@@ -73,11 +73,15 @@ def test_artifacts_cannot_be_overwritten(self):
             with open(pyfile_path, "w+") as f:
                 f.write("my_func = lambda: 11")
 
-            with self.assertRaisesRegex(
-                SparkConnectGrpcException, "\\(java.lang.RuntimeException\\) Duplicate Artifact"
-            ):
+            with self.assertRaises(SparkRuntimeException) as pe:
                 self.spark.addArtifacts(pyfile_path, pyfile=True)
 
+            self.check_error(
+                exception=pe.exception,
+                errorClass="DUPLICATED_ARTIFACT",
+                messageParameters={"normalizedRemoteRelativePath": "pyfiles/my_pyfile.py"},
+            )
+
     def check_add_zipped_package(self, spark_session):
         with tempfile.TemporaryDirectory(prefix="check_add_zipped_package") as d:
             package_path = os.path.join(d, "my_zipfile")

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
@@ -31,7 +31,7 @@ import scala.reflect.ClassTag
 import org.apache.commons.io.{FilenameUtils, FileUtils}
 import org.apache.hadoop.fs.{LocalFileSystem, Path => FSPath}
 
-import org.apache.spark.{JobArtifactSet, JobArtifactState, SparkContext, SparkEnv, SparkException, SparkUnsupportedOperationException}
+import org.apache.spark.{JobArtifactSet, JobArtifactState, SparkContext, SparkEnv, SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.{CONNECT_SCALA_UDF_STUB_PREFIXES, EXECUTOR_USER_CLASS_PATH_FIRST}
 import org.apache.spark.sql.{Artifact, SparkSession}
@@ -216,8 +216,10 @@ class ArtifactManager(session: SparkSession) extends AutoCloseable with Logging
           return
         }
 
-        throw new RuntimeException(s"Duplicate Artifact: $normalizedRemoteRelativePath. " +
-            "Artifacts cannot be overwritten.")
+        throw new SparkRuntimeException(
+          "DUPLICATED_ARTIFACT",
+          Map("normalizedRemoteRelativePath" -> normalizedRemoteRelativePath.toString)
+        )
       }
       transferFile(serverLocalStagingPath, target, deleteSource = deleteStagedFile)