diff --git a/CHANGELOG.md b/CHANGELOG.md index af07d99a14..4614134b48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ be found [here](https://cromwell.readthedocs.io/en/stable/backends/HPC/#optional - Fixed `google_project` and `google_compute_service_account` workflow options not taking effect when using GCP Batch backend - Added a way to use a custom LogsPolicy for the job execution, setting `backend.providers.batch.config.batch.logs-policy` to "CLOUD_LOGGING" (default) keeps the current behavior, or, set it to "PATH" to save the logs into the the mounted disk, at the end, this log file gets copied to the google cloud storage bucket with "task.log" as the name. - When "CLOUD_LOGGING" is used, many more Cromwell / WDL labels for workflow, root workflow, call, shard etc. are now assigned to GCP Batch log entries. +- Fixed subnet selection for networks that use custom subnet creation ### Improved handling of Life Sciences API quota errors diff --git a/docs/backends/GCPBatch.md b/docs/backends/GCPBatch.md index 4395f76cc0..3071b1e7cf 100644 --- a/docs/backends/GCPBatch.md +++ b/docs/backends/GCPBatch.md @@ -253,14 +253,17 @@ backend { ``` The `network-name` and `subnetwork-name` should reference the name of your private network and subnetwork within that -network respectively. The `subnetwork-name` is an optional config. Note that in the -PAPI v2 backend `subnetwork-name` was an optional configuration parameter which accepted a `*` wildcard for choosing the -appropriate subnetwork region, but in GCP Batch the `subnetwork-name` specification can be omitted -and GCP Batch will choose the appropriate subnetwork automatically. - -For example, if your `virtual-private-cloud` config looks like the one above, then Cromwell will use the value of the -configuration key, which is `vpc-network` here, as the name of private network and run the jobs on this network. -If the network name is not present in the config Cromwell will fall back to trying to run jobs on the default network. +network respectively. For example, if your `virtual-private-cloud` config looks like the one above, then Cromwell will +use the value of the configuration key, which is `vpc-network` here, as the name of private network and run the jobs on +this network. If the network name is not present in the config Cromwell will fall back to trying to run jobs on the +default network. + +`subnetwork-name` is an optional configuration parameter which accepts a `*` wildcard for choosing the appropriate +subnetwork region. If your network is using "auto" subnet creation, `subnetwork-name`specification can be omitted and +GCP Batch will choose the appropriate subnetwork automatically. If the network's subnet creation strategy is "custom," +the full subnetwork name (with `*` for region) must be supplied (ex. `"projects/${projectId}/regions/*/subnetworks/subnetwork"`). +Note that wildcard regions are not supported by GCP Batch, Cromwell will replace `*` with the correct region at job +creation time. If the `network-name` or `subnetwork-name` values contain the string `${projectId}` then that value will be replaced by Cromwell with the name of the project running GCP Batch. diff --git a/src/ci/resources/gcp_batch_shared_application.inc.conf b/src/ci/resources/gcp_batch_shared_application.inc.conf index 02b59b4c26..8c29e488d0 100644 --- a/src/ci/resources/gcp_batch_shared_application.inc.conf +++ b/src/ci/resources/gcp_batch_shared_application.inc.conf @@ -120,11 +120,10 @@ backend { virtual-private-cloud { # integration testing: # - fully qualified name - # - hardcoded project id + # - templated project and subnet region # - does not end with `/` - network-name = "projects/broad-dsde-cromwell-dev/global/networks/cromwell-ci-gcpbatch-vpc-network" - # For GCP Batch we do not reference the subnetwork name, Batch has to work that out for itself in order to - # enable running jobs in regions that are different from the region of the GCP Batch to which we send jobs. + network-name = "projects/${projectId}/global/networks/cromwell-ci-gcpbatch-vpc-network" + subnetwork-name = "projects/${projectId}/regions/*/subnetworks/cromwell-ci-gcpbatch-vpc-network" } # Have the engine authenticate to docker.io. See BT-141 for more info. diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala index d058950a07..27c8e10469 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/api/GcpBatchRequestFactoryImpl.scala @@ -56,8 +56,13 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe .setNoExternalIpAddress(data.gcpBatchParameters.runtimeAttributes.noAddress) .setNetwork(vpcAndSubnetworkProjectLabelValues.networkName(data.createParameters.projectId)) + // When selecting a subnet region, prefer zones set in runtime attrs, then fall back to + // the region the host google project is in. Note that zones in runtime attrs will always + // be in a single region. + val region = zonesToRegion(data.createParameters.runtimeAttributes.zones).getOrElse(data.gcpBatchParameters.region) + vpcAndSubnetworkProjectLabelValues - .subnetNameOption(data.createParameters.projectId) + .subnetNameOption(projectId = data.createParameters.projectId, region = region) .foreach(network.setSubnetwork) network @@ -164,9 +169,9 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe override def submitRequest(data: GcpBatchRequest, jobLogger: JobLogger): CreateJobRequest = { - val runtimeAttributes = data.gcpBatchParameters.runtimeAttributes val createParameters = data.createParameters - val retryCount = data.gcpBatchParameters.runtimeAttributes.preemptible + val runtimeAttributes = createParameters.runtimeAttributes + val retryCount = runtimeAttributes.preemptible val allDisksToBeMounted: Seq[GcpBatchAttachedDisk] = createParameters.disks ++ createParameters.referenceDisksForLocalizationOpt.getOrElse(List.empty) val gcpBootDiskSizeMb = convertGbToMib(runtimeAttributes) @@ -252,7 +257,7 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe ) val instancePolicy = createInstancePolicy(cpuPlatform = cpuPlatform, spotModel, accelerators, allDisks, machineType = machineType) - val locationPolicy = LocationPolicy.newBuilder.addAllowedLocations(zones).build + val locationPolicy = LocationPolicy.newBuilder.addAllAllowedLocations(zones.asJava).build val allocationPolicy = createAllocationPolicy(data, locationPolicy, instancePolicy.build, networkPolicy, gcpSa, accelerators) val logsPolicy = data.gcpBatchParameters.batchAttributes.logsPolicy match { diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/VpcAndSubnetworkProjectLabelValues.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/VpcAndSubnetworkProjectLabelValues.scala index 3ba9ea0f95..096ee1aa44 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/VpcAndSubnetworkProjectLabelValues.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/models/VpcAndSubnetworkProjectLabelValues.scala @@ -20,11 +20,13 @@ final case class VpcAndSubnetworkProjectLabelValues(vpcName: String, subnetNameO /** * Replaces the string `\${projectId}` in the subnet name if found. + * Replace wildcard character used in terra configuration for subnetworks with appropriate region */ - def subnetNameOption(projectId: String): Option[String] = - subnetNameOpt map { _.replace(ProjectIdToken, projectId) } + def subnetNameOption(projectId: String, region: String): Option[String] = + subnetNameOpt map { _.replace(ProjectIdToken, projectId) } map { _.replace(regionWildcard, "regions/" + region) } } object VpcAndSubnetworkProjectLabelValues { private val ProjectIdToken = s"$${projectId}" + private val regionWildcard = s"regions/*" } diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/BatchUtilityConversions.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/BatchUtilityConversions.scala index 65ef5a5e04..239424ed73 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/BatchUtilityConversions.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/util/BatchUtilityConversions.scala @@ -9,8 +9,13 @@ import wom.format.MemorySize trait BatchUtilityConversions { // construct zones string - def toZonesPath(zones: Vector[String]): String = - zones.map(zone => "zones/" + zone).mkString(" ") + def toZonesPath(zones: Vector[String]): Vector[String] = + zones.map(zone => "zones/" + zone) + + // Gets first zone in vector and removes last two characters of zone to construct region + // Ex. us-central1-a -> us-central1 + def zonesToRegion(zones: Vector[String]): Option[String] = + zones.headOption.map(_.dropRight(2)) // lowercase text to match gcp label requirements def toLabel(text: String): String = diff --git a/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/models/GcpBatchVpcAndSubnetworkProjectLabelValuesSpec.scala b/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/models/GcpBatchVpcAndSubnetworkProjectLabelValuesSpec.scala index c129201737..04e37fcce2 100644 --- a/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/models/GcpBatchVpcAndSubnetworkProjectLabelValuesSpec.scala +++ b/supportedBackends/google/batch/src/test/scala/cromwell/backend/google/batch/models/GcpBatchVpcAndSubnetworkProjectLabelValuesSpec.scala @@ -9,6 +9,7 @@ class GcpBatchVpcAndSubnetworkProjectLabelValuesSpec extends AnyFlatSpec with Ma behavior of "VpcAndSubnetworkProjectLabelValues" private val myProjectId = "my-project" + private val myRegion = "us-central1" private val labelsTests = Table( ("description", "network", "subnetOption", "networkName", "subnetNameOption"), @@ -29,6 +30,13 @@ class GcpBatchVpcAndSubnetworkProjectLabelValuesSpec extends AnyFlatSpec with Ma Option(s"slashed/$${projectId}/sub"), "slashed/net", Option("slashed/my-project/sub") + ), + ( + "a subnet with a project token and unspecified region", + "slashed/net", + Option(s"slashed/$${projectId}/regions/*/subnet"), + "slashed/net", + Option(s"slashed/my-project/regions/us-central1/subnet") ) ) @@ -36,7 +44,7 @@ class GcpBatchVpcAndSubnetworkProjectLabelValuesSpec extends AnyFlatSpec with Ma it should description in { val labels = VpcAndSubnetworkProjectLabelValues(network, subnetOption) labels.networkName(myProjectId) should be(networkName) - labels.subnetNameOption(myProjectId) should be(subnetNameOption) + labels.subnetNameOption(myProjectId, myRegion) should be(subnetNameOption) } } }