Skip to content

Commit

Permalink
Make dataproc zone configurable (#760)
Browse files Browse the repository at this point in the history
  • Loading branch information
rtitle authored Jan 29, 2019
1 parent ba52be0 commit 311df2b
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 1 deletion.
3 changes: 3 additions & 0 deletions leonardo-example.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# Google Cloud dataproc configuration
dataproc {
dataprocDefaultRegion = "YOUR_REGION" # the google region for your dataproc
#dataprocZone = "YOUR_ZONE" # Optional. The Google zone for the dataproc cluster.
# If not specified, Dataproc will automatically choose a zone
# within the configured region.
leoGoogleProject = "GOOGLE_PROJECT" # the name of the google project to use during cluster startup
# *this is not the project the cluster will be created in
clusterUrlBase = "https://YOUR_DOMAIN/notebooks" # the base url to access your cluster
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ object Boot extends App with LazyLogging {
}

val (leoServiceAccountEmail, leoServiceAccountPemFile) = serviceAccountProvider.getLeoServiceAccountAndKey
val gdDAO = new HttpGoogleDataprocDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google", NetworkTag(dataprocConfig.networkTag), dataprocConfig.vpcNetwork.map(VPCNetworkName), dataprocConfig.vpcSubnet.map(VPCSubnetName), dataprocConfig.dataprocDefaultRegion, dataprocConfig.defaultExecutionTimeout)
val gdDAO = new HttpGoogleDataprocDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google", NetworkTag(dataprocConfig.networkTag), dataprocConfig.vpcNetwork.map(VPCNetworkName), dataprocConfig.vpcSubnet.map(VPCSubnetName), dataprocConfig.dataprocDefaultRegion, dataprocConfig.dataprocZone, dataprocConfig.defaultExecutionTimeout)
val googleComputeDAO = new HttpGoogleComputeDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google")
val googleIamDAO = new HttpGoogleIamDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google")
val googleStorageDAO = new HttpGoogleStorageDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import scala.concurrent.duration.FiniteDuration
case class DataprocConfig(
applicationName: String,
dataprocDefaultRegion: String,
dataprocZone: Option[String],
leoGoogleProject: GoogleProject,
dataprocDockerImage: String,
clusterUrlBase: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package object config {
DataprocConfig(
config.getString("applicationName"),
config.getString("dataprocDefaultRegion"),
config.getAs[String]("dataprocZone"),
GoogleProject(config.getString("leoGoogleProject")),
config.getString("dataprocDockerImage"),
config.getString("clusterUrlBase"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class HttpGoogleDataprocDAO(appName: String,
vpcNetwork: Option[VPCNetworkName],
vpcSubnet: Option[VPCSubnetName],
defaultRegion: String,
zoneOpt: Option[String],
defaultExecutionTimeout: FiniteDuration)
(implicit override val system: ActorSystem, override val executionContext: ExecutionContext)
extends AbstractHttpGoogleDAO(appName, googleCredentialMode, workbenchMetricBaseName) with GoogleDataprocDAO {
Expand Down Expand Up @@ -244,6 +245,11 @@ class HttpGoogleDataprocDAO(appName: String,
.setMachineTypeUri(machineConfig.masterMachineType.get)
.setDiskConfig(new DiskConfig().setBootDiskSizeGb(machineConfig.masterDiskSize.get))

// Set the zone, if specified. If not specified, Dataproc will pick a zone within the configured region.
zoneOpt.foreach { zone =>
gceClusterConfig.setZoneUri(zone)
}

// Create a Cluster Config and give it the GceClusterConfig, the NodeInitializationAction and the InstanceGroupConfig
createClusterConfig(machineConfig, credentialsFileName)
.setGceClusterConfig(gceClusterConfig)
Expand Down
1 change: 1 addition & 0 deletions src/test/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dataproc {
applicationName = "test:leonardo"
serviceAccountEmail = "[email protected]"
dataprocDefaultRegion = "testregion"
dataprocZone = "test-zone"
leoGoogleProject = "test-bucket"
dataprocDockerImage = "testrepo/test"
clusterUrlBase = "http://leonardo/"
Expand Down

0 comments on commit 311df2b

Please sign in to comment.