pipelines/controller/config/application.yaml

#
# Copyright 2020-2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

fhirdata:
  # The base URL of the source FHIR server. If `dbConfig` is not set, resources
  # are fetched from this URL through the FHIR Search API.
  # Equivalent to the pipeline `fhirServerUrl` parameter.
  fhirServerUrl: "http://172.17.0.1:8091/fhir"

  # The path to the file containing JDBC settings for connecting to a HAPI FHIR
  # server database. If this is set, resources are fetched directly from the
  # database and `fhirServerUrl` is ignored.
  # Equivalent to pipeline `fhirDatabaseConfigPath` parameter.
  # dbConfig: "config/hapi-postgres-config.json"

  # The path to output Parquet files to. The last portion of the
  # path is used as a prefix for naming the directory that contains
  # per-resource directories and a timestamp will be added;
  # for example, "config/controller_DWH_ORIG" will create something like
  # ./config/controller_DWH_ORIG_TIMESTAMP_2023-01-27T23-55-39.295824Z
  # Similar to the pipeline `outputParquetPath` parameter.
  # 
  # For GCS buckets, dwhRootPrefix must be of the format
  # "gs://<bucket>/<baseDirPath>/<prefix>". <baseDirPath> is optional
  # for GCS buckets and may contain 0 or more directory names.
  #
  # For *nix file systems, dwhRootPrefix must be of the format
  # "/<baseDirPath>/<prefix>" and can be absolute or relative. <baseDirPath>
  # is required for *nix, and must contain 1 or more directory names.
  # dwhRootPrefix: "gs://fhir-test-analytics/config/controller_DWH_ORIG"
  dwhRootPrefix: "config/controller_DWH_ORIG"

  # The schedule for automatic incremental pipeline runs.
  # Uses the Spring CronExpression format, i.e.,
  # "second minute hour day-of-the-month month day-of-the-week", so:
  # "0 0 * * * *" means top of every hour;
  # "*/40 * * * * *" means every 40 seconds;
  # Scheduling very frequent runs is resource intensive.
  incrementalSchedule: "0 0 * * * *"

  # The schedule for automatic DWH snapshot purging. There is no benefit
  # to scheduling the purge job more frequently than incremental runs.
  # Uses the Spring CronExpression format.
  purgeSchedule: "0 30 * * * *"

  # The number of DWH snapshots to retain when the purge job runs.
  # This must be > 0 or the purge job will not run. If a pipeline run fails
  # for any reason, any partial output must be manually removed.
  numOfDwhSnapshotsToRetain: 2

  # The comma-separated list of FHIR resources to fetch/monitor.
  # Equivalent to pipeline `resourceList` parameter.
  resourceList: "Patient,Encounter,Observation"

  # The maximum number of Flink workers to use. Each worker may have 
  # `numThreads` parallel threads.
  maxWorkers: 1

  # If a positive number, the number of threads to use per worker.
  # Otherwise, the number of threads is equal to the number of cores.
  numThreads: -1

  # Whether resource tables should be automatically created on a
  # Hive/Spark server. Primarily meant for single-machine deployment.
  createHiveResourceTables: false

  # Path to a file with the settings used to create tables.
  # Required if createHiveResourceTables is `true`.
  thriftserverHiveConfig: "config/thriftserver-hive-config.json"

  # The JDBC driver to use when creating tables.
  hiveJdbcDriver: "org.apache.hive.jdbc.HiveDriver"

# Enable spring boot actuator end points, use "*" to expose all endpoints, or a comma-separated
# list to expose selected ones
management:
  endpoints:
    web:
      exposure:
        include: health,info,metrics,prometheus,pipeline-metrics