diff --git a/leonardo-example.conf b/leonardo-example.conf index 26fb2c5bddf..6bcc0f95927 100644 --- a/leonardo-example.conf +++ b/leonardo-example.conf @@ -6,6 +6,9 @@ # Google Cloud dataproc configuration dataproc { dataprocDefaultRegion = "YOUR_REGION" # the google region for your dataproc + #dataprocZone = "YOUR_ZONE" # Optional. The Google zone for the dataproc cluster. + # If not specified, Dataproc will automatically choose a zone + # within the configured region. leoGoogleProject = "GOOGLE_PROJECT" # the name of the google project to use during cluster startup # *this is not the project the cluster will be created in clusterUrlBase = "https://YOUR_DOMAIN/notebooks" # the base url to access your cluster diff --git a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/Boot.scala b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/Boot.scala index d3585d79b91..7008e63c674 100644 --- a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/Boot.scala +++ b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/Boot.scala @@ -75,7 +75,7 @@ object Boot extends App with LazyLogging { } val (leoServiceAccountEmail, leoServiceAccountPemFile) = serviceAccountProvider.getLeoServiceAccountAndKey - val gdDAO = new HttpGoogleDataprocDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google", NetworkTag(dataprocConfig.networkTag), dataprocConfig.vpcNetwork.map(VPCNetworkName), dataprocConfig.vpcSubnet.map(VPCSubnetName), dataprocConfig.dataprocDefaultRegion, dataprocConfig.defaultExecutionTimeout) + val gdDAO = new HttpGoogleDataprocDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google", NetworkTag(dataprocConfig.networkTag), dataprocConfig.vpcNetwork.map(VPCNetworkName), dataprocConfig.vpcSubnet.map(VPCSubnetName), dataprocConfig.dataprocDefaultRegion, dataprocConfig.dataprocZone, dataprocConfig.defaultExecutionTimeout) val googleComputeDAO = new HttpGoogleComputeDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google") val googleIamDAO = new HttpGoogleIamDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google") val googleStorageDAO = new HttpGoogleStorageDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google") diff --git a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala index a74d3d1c64e..ff5175ae349 100644 --- a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala +++ b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/DataprocConfig.scala @@ -7,6 +7,7 @@ import scala.concurrent.duration.FiniteDuration case class DataprocConfig( applicationName: String, dataprocDefaultRegion: String, + dataprocZone: Option[String], leoGoogleProject: GoogleProject, dataprocDockerImage: String, clusterUrlBase: String, diff --git a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/package.scala b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/package.scala index 42750ec6d8d..4a4f20452fa 100644 --- a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/package.scala +++ b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/package.scala @@ -21,6 +21,7 @@ package object config { DataprocConfig( config.getString("applicationName"), config.getString("dataprocDefaultRegion"), + config.getAs[String]("dataprocZone"), GoogleProject(config.getString("leoGoogleProject")), config.getString("dataprocDockerImage"), config.getString("clusterUrlBase"), diff --git a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/google/HttpGoogleDataprocDAO.scala b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/google/HttpGoogleDataprocDAO.scala index 86cd0ce96d8..ccee07cb172 100644 --- a/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/google/HttpGoogleDataprocDAO.scala +++ b/src/main/scala/org/broadinstitute/dsde/workbench/leonardo/dao/google/HttpGoogleDataprocDAO.scala @@ -38,6 +38,7 @@ class HttpGoogleDataprocDAO(appName: String, vpcNetwork: Option[VPCNetworkName], vpcSubnet: Option[VPCSubnetName], defaultRegion: String, + zoneOpt: Option[String], defaultExecutionTimeout: FiniteDuration) (implicit override val system: ActorSystem, override val executionContext: ExecutionContext) extends AbstractHttpGoogleDAO(appName, googleCredentialMode, workbenchMetricBaseName) with GoogleDataprocDAO { @@ -244,6 +245,11 @@ class HttpGoogleDataprocDAO(appName: String, .setMachineTypeUri(machineConfig.masterMachineType.get) .setDiskConfig(new DiskConfig().setBootDiskSizeGb(machineConfig.masterDiskSize.get)) + // Set the zone, if specified. If not specified, Dataproc will pick a zone within the configured region. + zoneOpt.foreach { zone => + gceClusterConfig.setZoneUri(zone) + } + // Create a Cluster Config and give it the GceClusterConfig, the NodeInitializationAction and the InstanceGroupConfig createClusterConfig(machineConfig, credentialsFileName) .setGceClusterConfig(gceClusterConfig) diff --git a/src/test/resources/reference.conf b/src/test/resources/reference.conf index c133bd126bf..2fcf525bd84 100644 --- a/src/test/resources/reference.conf +++ b/src/test/resources/reference.conf @@ -32,6 +32,7 @@ dataproc { applicationName = "test:leonardo" serviceAccountEmail = "test@test.com" dataprocDefaultRegion = "testregion" + dataprocZone = "test-zone" leoGoogleProject = "test-bucket" dataprocDockerImage = "testrepo/test" clusterUrlBase = "http://leonardo/"