From dd2df36480fb95f0fcd03613949a3e4670829cf3 Mon Sep 17 00:00:00 2001 From: Cody Fincher <204685+cofin@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:03:12 -0500 Subject: [PATCH] feat: `dotenv` support (#143) * feat: dotenv support * feat: Load dotenv file in offload and connect * feat: Load dotenv file in agg_validate and programmatic calls --------- Co-authored-by: nj1973 --- Makefile | 2 +- README.md | 6 +- bin/connect | 11 +- bin/offload | 8 +- pyproject.toml | 3 + src/goe/config/config_checks.py | 40 ------ src/goe/config/config_file.py | 53 ++++++++ src/goe/config/orchestration_config.py | 7 +- src/goe/connect/connect.py | 50 +++---- src/goe/goe.py | 4 - src/goe/listener/prestart.py | 4 +- src/goe/scripts/agg_validate.py | 6 +- src/goe/util/misc_functions.py | 12 -- templates/conf/Makefile | 6 +- templates/conf/offload.env.template.bigquery | 48 +++---- templates/conf/offload.env.template.common | 88 ++++++------ templates/conf/offload.env.template.hadoop | 111 ++++++++------- templates/conf/offload.env.template.listener | 28 ++-- templates/conf/offload.env.template.mssql | 8 +- templates/conf/offload.env.template.netezza | 8 +- templates/conf/offload.env.template.oracle | 28 ++-- templates/conf/offload.env.template.snowflake | 54 ++++---- templates/conf/offload.env.template.synapse | 56 ++++---- templates/conf/offload.env.template.teradata | 16 +-- .../integration/scenarios/scenario_runner.py | 10 -- .../unit/config/test_orchestration_config.py | 127 +++++++++++------- tools/goe-shell-functions.sh | 5 - 27 files changed, 398 insertions(+), 401 deletions(-) delete mode 100644 src/goe/config/config_checks.py create mode 100644 src/goe/config/config_file.py diff --git a/Makefile b/Makefile index c54dcf7b..cc28fa41 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ SHELL := /bin/bash TARGET_DIR=target/offload OFFLOAD_VERSION=$(shell cat version) -GOE_WHEEL="goe-$(shell echo $(OFFLOAD_VERSION) | tr 'A-Z-' 'a-z.')0-py3-none-any.whl" +GOE_WHEEL=goe-$(shell echo $(OFFLOAD_VERSION) | tr 'A-Z-' 'a-z.')0-py3-none-any.whl BUILD=$(strip $(shell git rev-parse --short HEAD)) diff --git a/README.md b/README.md index 5e21e60a..c08a12c3 100644 --- a/README.md +++ b/README.md @@ -62,9 +62,8 @@ If using Dataproc Batches to provide Spark: ## Install database objects To install supporting database objects you need access to a database admin account that can create users, grant them system privileges and create objects in the schemas created. SYSTEM has been used in the example below but this is *not* a necessity: ``` -. ${OFFLOAD_HOME}/conf/offload.env cd ${OFFLOAD_HOME}/setup -sqlplus system@${ORA_CONN} +sqlplus system@yourhost:1521/yoursvc @install_offload ``` @@ -119,9 +118,8 @@ make install-dev-extras ``` # Running commands -Source the correct environment: +Activate the GOE Python virtual environment: ``` -. ${OFFLOAD_HOME}/conf/offload.env source ./.venv/bin/activate ``` diff --git a/bin/connect b/bin/connect index a3bb769c..72864575 100755 --- a/bin/connect +++ b/bin/connect @@ -15,15 +15,10 @@ # limitations under the License. -import os -import sys - -if not os.environ.get("OFFLOAD_HOME"): - print("OFFLOAD_HOME environment variable missing") - print("You should source environment variables first, eg: . ../conf/offload.env") - sys.exit(1) - +from goe.config.config_file import check_config_path from goe.connect.connect import connect + if __name__ == "__main__": + check_config_path() connect() diff --git a/bin/offload b/bin/offload index 635d084c..1b4fee61 100755 --- a/bin/offload +++ b/bin/offload @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. - import sys -from goe.config.config_checks import check_cli_path +from goe.config import config_file from goe.goe import ( get_options, OFFLOAD_OP_NAME, @@ -26,12 +25,11 @@ from goe.offload.offload import OffloadOptionError, get_offload_options from goe.orchestration.cli_entry_points import offload_by_cli -check_cli_path() - - if __name__ == "__main__": + config_file.check_config_path() options = None try: + config_file.load_env() opt = get_options(operation_name=OFFLOAD_OP_NAME) get_offload_options(opt) options, _ = opt.parse_args() diff --git a/pyproject.toml b/pyproject.toml index 0a865019..1e9f2ad3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,9 @@ dependencies = [ "google-cloud-bigquery", "google-cloud-kms", + # Env loading + "python-dotenv", + # GOE Listener packages "fastapi==0.77.0", "uvicorn==0.17.6", diff --git a/src/goe/config/config_checks.py b/src/goe/config/config_checks.py deleted file mode 100644 index 5f121359..00000000 --- a/src/goe/config/config_checks.py +++ /dev/null @@ -1,40 +0,0 @@ -#! /usr/bin/env python3 - -# Copyright 2016 The GOE Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" Function calls checking orchestration configuration. -""" - -import os -import sys - - -def check_cli_path(): - """Check OFFLOAD_HOME in top level command wrappers - This should be imported and called as the first GOE import, for example: - - import os - - from goe.config.config_checks import check_cli_path - check_cli_path() - - import goe.other.libraries.if.required - """ - if not os.environ.get("OFFLOAD_HOME"): - print("OFFLOAD_HOME environment variable missing") - print( - "You should source environment variables first, eg: . ../conf/offload.env" - ) - sys.exit(1) diff --git a/src/goe/config/config_file.py b/src/goe/config/config_file.py new file mode 100644 index 00000000..e7184dde --- /dev/null +++ b/src/goe/config/config_file.py @@ -0,0 +1,53 @@ +# Copyright 2024 The GOE Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import sys +from typing import Optional + +from dotenv import load_dotenv + + +CONFIG_FILE_NAME = "offload.env" +KEY_VALUE_PATTERN = re.compile(r"#?[ ]*([A-Z_0-9]+)=(.*)") + + +def check_config_path(): + """Check OFFLOAD_HOME in top level command wrappers""" + if not os.environ.get("OFFLOAD_HOME"): + print("OFFLOAD_HOME environment variable missing") + sys.exit(1) + + +def get_environment_file_path(): + return os.path.join(os.environ.get("OFFLOAD_HOME"), "conf", CONFIG_FILE_NAME) + + +def load_env(path: str = None): + """Load GOE environment from a configuration file. + + By default this is a fixed location: $OFFLOAD_HOME/conf/offload.env. + In time this will become a parameter and support cloud storage locations. + """ + if not path: + path = get_environment_file_path() + + load_dotenv(path) + + +def env_key_value_pair(line_from_file: str) -> Optional[tuple]: + """Used by connect to get the key names from a configuration file""" + m = KEY_VALUE_PATTERN.match(line_from_file) + return m.groups() if m else None diff --git a/src/goe/config/orchestration_config.py b/src/goe/config/orchestration_config.py index 72d99542..2cc47d0d 100644 --- a/src/goe/config/orchestration_config.py +++ b/src/goe/config/orchestration_config.py @@ -22,7 +22,7 @@ import logging from typing import Optional -from goe.config import orchestration_defaults +from goe.config import config_file, orchestration_defaults from goe.config.config_validation_functions import ( OrchestrationConfigException, check_offload_fs_scheme_supported_in_backend, @@ -349,12 +349,15 @@ def as_defaults(do_not_connect=False): return OrchestrationConfig.from_dict({}, do_not_connect=do_not_connect) @staticmethod - def from_dict(config_dict, do_not_connect=False): + def from_dict(config_dict: dict, do_not_connect=False): assert isinstance(config_dict, dict) unexpected_keys = [k for k in config_dict if k not in EXPECTED_CONFIG_ARGS] assert not unexpected_keys, ( "Unexpected OrchestrationConfig keys: %s" % unexpected_keys ) + # Load environment for defaults. + config_file.load_env() + # Build config from config_dict. return OrchestrationConfig( do_not_connect=do_not_connect, ansi=config_dict.get("ansi", orchestration_defaults.ansi_default()), diff --git a/src/goe/connect/connect.py b/src/goe/connect/connect.py index 6f801a02..5995a6b4 100755 --- a/src/goe/connect/connect.py +++ b/src/goe/connect/connect.py @@ -15,28 +15,15 @@ from datetime import datetime from optparse import SUPPRESS_HELP import os -import re import subprocess import sys import traceback from typing import Optional from getpass import getuser -from goe.goe import ( - get_common_options, - get_log_fh, - get_log_fh_name, - init, - init_log, - log_command_line, - log_timestamp, - version, - OptionValueError, - verbose, - CONFIG_FILE_NAME, -) + from goe.config.orchestration_config import OrchestrationConfig -from goe.config import orchestration_defaults +from goe.config import config_file, orchestration_defaults from goe.connect.connect_backend import ( is_hadoop_environment, run_backend_tests, @@ -63,6 +50,18 @@ test_credential_api_alias, ) from goe.filesystem.goe_dfs_factory import get_dfs_from_options +from goe.goe import ( + get_common_options, + get_log_fh, + get_log_fh_name, + init, + init_log, + log_command_line, + log_timestamp, + version, + OptionValueError, + verbose, +) from goe.offload.offload_messages import OffloadMessages from goe.offload.offload_transport_functions import ssh_cmd_prefix from goe.orchestration import orchestration_constants @@ -206,11 +205,9 @@ def get_environment_file_name(orchestration_config): backend_id = "hadoop" else: backend_id = orchestration_config.target.lower() - return "-".join([frontend_id, backend_id, CONFIG_FILE_NAME + ".template"]) - - -def get_environment_file_path(): - return os.path.join(os.environ.get("OFFLOAD_HOME"), "conf", CONFIG_FILE_NAME) + return "-".join( + [frontend_id, backend_id, config_file.CONFIG_FILE_NAME + ".template"] + ) def get_template_file_path(orchestration_config): @@ -222,7 +219,7 @@ def test_conf_perms(): test_name = "Configuration file permissions" test_header(test_name) hint = "Expected permissions are 640" - environment_file = get_environment_file_path() + environment_file = config_file.get_environment_file_path() perms = oct(os.stat(environment_file).st_mode & 0o777) # Removing oct prefix deemed safe for display only. detail("%s has permissions: %s" % (environment_file, perms[2:])) @@ -310,8 +307,9 @@ def dict_from_environment_file(environment_file): d = {} with open(environment_file) as f: for line in f: - if re.match("^(#.*e|e)xport(.*)", line): - (k, v) = re.sub("^(#.*e|e)xport ", "", line).split("=", 1)[0], line + kv = config_file.env_key_value_pair(line) + if kv: + k, v = kv d[k] = v return d @@ -396,7 +394,8 @@ def check_environment(options, orchestration_config): section_header("Configuration") check_offload_env( - get_environment_file_path(), get_template_file_path(orchestration_config) + config_file.get_environment_file_path(), + get_template_file_path(orchestration_config), ) test_conf_perms() @@ -515,6 +514,7 @@ def get_connect_opts(): def connect(): options = None try: + config_file.load_env() opt = get_connect_opts() options, args = opt.parse_args() @@ -540,7 +540,7 @@ def connect(): if options.upgrade_environment_file: upgrade_environment_file( - get_environment_file_path(), + config_file.get_environment_file_path(), get_template_file_path(orchestration_config), ) else: diff --git a/src/goe/goe.py b/src/goe/goe.py index 976e4c36..ccd92328 100644 --- a/src/goe/goe.py +++ b/src/goe/goe.py @@ -178,8 +178,6 @@ OFFLOAD_OP_NAME = "offload" -CONFIG_FILE_NAME = "offload.env" - # Used in test to identify specific warnings HYBRID_SCHEMA_STEPS_DUE_TO_HWM_CHANGE_MESSAGE_TEXT = ( "Including post transport steps due to HWM change" @@ -3016,8 +3014,6 @@ class GOEOptionTypes(Option): def get_common_options(usage=None): opt = OptionParser(usage=usage, option_class=GOEOptionTypes) - opt.add_option("-c", type="posint", help=SUPPRESS_HELP) - opt.add_option( "--version", dest="version", diff --git a/src/goe/listener/prestart.py b/src/goe/listener/prestart.py index 9eb6246e..5a43878a 100644 --- a/src/goe/listener/prestart.py +++ b/src/goe/listener/prestart.py @@ -21,9 +21,9 @@ def validate_config(): """""" try: # GOE - from goe.config.config_checks import check_cli_path # noqa: WPS433 F401 + from goe.config.config_file import check_config_path # noqa: WPS433 F401 - check_cli_path() + check_config_path() except Exception: print( "failed to validate configuration. Please check your installation" diff --git a/src/goe/scripts/agg_validate.py b/src/goe/scripts/agg_validate.py index 7e22694b..acfdf6a5 100755 --- a/src/goe/scripts/agg_validate.py +++ b/src/goe/scripts/agg_validate.py @@ -24,7 +24,7 @@ import re import sys -from goe.config import config_descriptions, orchestration_defaults +from goe.config import config_descriptions, config_file, orchestration_defaults from goe.config.orchestration_config import OrchestrationConfig from goe.offload.offload_validation import ( CrossDbValidator, @@ -39,7 +39,6 @@ is_number, is_pos_int, parse_python_from_string, - check_offload_env, ) from goe.util.goe_log import log_exception @@ -256,7 +255,8 @@ def main(): MAIN ROUTINE """ - check_offload_env() + config_file.check_config_path() + config_file.load_env() args = parse_args() init(args) diff --git a/src/goe/util/misc_functions.py b/src/goe/util/misc_functions.py index 6cc7635c..955ae8d7 100644 --- a/src/goe/util/misc_functions.py +++ b/src/goe/util/misc_functions.py @@ -334,18 +334,6 @@ def get_option(options, name, repl=None): return getattr(options, name) if (options and hasattr(options, name)) else repl -def check_offload_env(): - """Check offload environment, - i.e. OFFLOAD_HOME is set correctly - """ - if not os.environ.get("OFFLOAD_HOME"): - print("OFFLOAD_HOME environment variable missing") - print( - "You should source environment variables first, eg: . ../conf/offload.env" - ) - sys.exit(1) - - def str_floatlike(maybe_float): """Remove unnecessary 0s from the float or 'float like string' Warning: Relies on str() conversion for floats, which truncates floats diff --git a/templates/conf/Makefile b/templates/conf/Makefile index 43219cd2..cb19771f 100644 --- a/templates/conf/Makefile +++ b/templates/conf/Makefile @@ -47,7 +47,7 @@ $(TARGET_CONF_DIR)/oracle-snowflake-offload.env.template: $(TARGET_CONF_DIR) off tail -n +$(LICENCE_LENGTH) offload.env.template.oracle >> $(TARGET_CONF_DIR)/oracle-snowflake-offload.env.template tail -n +$(LICENCE_LENGTH) offload.env.template.snowflake >> $(TARGET_CONF_DIR)/oracle-snowflake-offload.env.template tail -n +$(LICENCE_LENGTH) offload.env.template.listener >> $(TARGET_CONF_DIR)/oracle-snowflake-offload.env.template - sed -i 's/^export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=/export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=spark-submit/' \ + sed -i 's/^OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=/OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=spark-submit/' \ $(TARGET_CONF_DIR)/oracle-snowflake-offload.env.template $(TARGET_CONF_DIR)/oracle-synapse-offload.env.template: $(TARGET_CONF_DIR) offload.env.template.common offload.env.template.oracle offload.env.template.synapse offload.env.template.listener @@ -73,7 +73,7 @@ $(TARGET_CONF_DIR)/teradata-snowflake-offload.env.template: $(TARGET_CONF_DIR) o tail -n +$(LICENCE_LENGTH) offload.env.template.teradata >> $(TARGET_CONF_DIR)/teradata-snowflake-offload.env.template tail -n +$(LICENCE_LENGTH) offload.env.template.snowflake >> $(TARGET_CONF_DIR)/teradata-snowflake-offload.env.template tail -n +$(LICENCE_LENGTH) offload.env.template.listener >> $(TARGET_CONF_DIR)/teradata-snowflake-offload.env.template - sed -i 's/^export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=/export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=spark-submit/' \ + sed -i 's/^OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=/OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=spark-submit/' \ $(TARGET_CONF_DIR)/teradata-snowflake-offload.env.template $(TARGET_CONF_DIR)/teradata-synapse-offload.env.template: $(TARGET_CONF_DIR) offload.env.template.common offload.env.template.teradata offload.env.template.synapse @@ -81,7 +81,7 @@ $(TARGET_CONF_DIR)/teradata-synapse-offload.env.template: $(TARGET_CONF_DIR) off tail -n +$(LICENCE_LENGTH) offload.env.template.teradata >> $(TARGET_CONF_DIR)/teradata-synapse-offload.env.template tail -n +$(LICENCE_LENGTH) offload.env.template.synapse >> $(TARGET_CONF_DIR)/teradata-synapse-offload.env.template tail -n +$(LICENCE_LENGTH) offload.env.template.listener >> $(TARGET_CONF_DIR)/teradata-synapse-offload.env.template - sed -i 's/^export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=/export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=spark-submit/' \ + sed -i 's/^OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=/OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE=spark-submit/' \ $(TARGET_CONF_DIR)/teradata-synapse-offload.env.template $(TARGET_CONF_DIR): diff --git a/templates/conf/offload.env.template.bigquery b/templates/conf/offload.env.template.bigquery index 7f35e1df..f58bf0e5 100644 --- a/templates/conf/offload.env.template.bigquery +++ b/templates/conf/offload.env.template.bigquery @@ -17,67 +17,67 @@ # =========================================================================================== # Path to Google service account private key JSON file -# export GOOGLE_APPLICATION_CREDENTIALS= +# GOOGLE_APPLICATION_CREDENTIALS= # Backend distribution: -export BACKEND_DISTRIBUTION=GCP +BACKEND_DISTRIBUTION=GCP # Orchestration query engine -export QUERY_ENGINE=BIGQUERY +QUERY_ENGINE=BIGQUERY # Google BigQuery location to use when creating a dataset, this has no impact other than when creating datasets. # The default is to use the BigQuery default. # Note the dataset location must be compatible with that of the bucket specified in OFFLOAD_FS_CONTAINER -export BIGQUERY_DATASET_LOCATION= +BIGQUERY_DATASET_LOCATION= # Project to use for BigQuery table references. # The default is to use the default project for the authenticated user/service account. -export BIGQUERY_DATASET_PROJECT= +BIGQUERY_DATASET_PROJECT= # Google Cloud Key Management Service crytopgraphic key information for customer-managed encryption keys (CMEK) # GOOGLE_KMS_KEY_RING_PROJECT only needs to be set if the KMS project differs from the default # project for the authenticated user/service account. -export GOOGLE_KMS_KEY_RING_PROJECT= -export GOOGLE_KMS_KEY_RING_LOCATION= -export GOOGLE_KMS_KEY_RING_NAME= -export GOOGLE_KMS_KEY_NAME= +GOOGLE_KMS_KEY_RING_PROJECT= +GOOGLE_KMS_KEY_RING_LOCATION= +GOOGLE_KMS_KEY_RING_NAME= +GOOGLE_KMS_KEY_NAME= # Google Dataproc cluster name -export GOOGLE_DATAPROC_CLUSTER= +GOOGLE_DATAPROC_CLUSTER= # Google Dataproc/Dataproc Batches project -export GOOGLE_DATAPROC_PROJECT= +GOOGLE_DATAPROC_PROJECT= # Google Dataproc/Dataproc Batches region -export GOOGLE_DATAPROC_REGION= +GOOGLE_DATAPROC_REGION= # Google Dataproc/Dataproc Batches service account -export GOOGLE_DATAPROC_SERVICE_ACCOUNT= +GOOGLE_DATAPROC_SERVICE_ACCOUNT= # Google Dataproc Batches version, leave blank to disable Dataproc Batches -export GOOGLE_DATAPROC_BATCHES_VERSION= +GOOGLE_DATAPROC_BATCHES_VERSION= # Google Dataproc Batches subnet # GOOGLE_DATAPROC_BATCHES_SUBNET defines a full subnet URI, for example: # projects/my-project/regions/my-region/subnetworks/my-subnet -export GOOGLE_DATAPROC_BATCHES_SUBNET= +GOOGLE_DATAPROC_BATCHES_SUBNET= # Google Dataproc Batches TTL -export GOOGLE_DATAPROC_BATCHES_TTL=2d +GOOGLE_DATAPROC_BATCHES_TTL=2d # Filesystem type for Offloaded tables # When offloading a table to cloud storage the table LOCATION will be structured as below: # ${OFFLOAD_FS_SCHEME}://${OFFLOAD_FS_CONTAINER}/${OFFLOAD_FS_PREFIX}/db_name/table_name/ -export OFFLOAD_FS_SCHEME=gs +OFFLOAD_FS_SCHEME=gs # The path with which to prefix offloaded table paths. -export OFFLOAD_FS_PREFIX=goe +OFFLOAD_FS_PREFIX=goe # A valid bucket or container name when offloading to cloud storage -export OFFLOAD_FS_CONTAINER= +OFFLOAD_FS_CONTAINER= # File format for staged data during an Offload (supported values: AVRO and PARQUET) -export OFFLOAD_STAGING_FORMAT=AVRO +OFFLOAD_STAGING_FORMAT=AVRO # Key/value pairs, in JSON format, defining session query parameters for the orchestration backend query engine. # These take effect for all queries issued to the query engine, e.g: -# export OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"parameter_name\": \"some.value\"}" -#export OFFLOAD_BACKEND_SESSION_PARAMETERS= +# OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"parameter_name\": \"some.value\"}" +#OFFLOAD_BACKEND_SESSION_PARAMETERS= # Case conversion to be applied to any backend identifier names created by GOE (supported values: UPPER, LOWER and NO_MODIFY). -export BACKEND_IDENTIFIER_CASE=LOWER +BACKEND_IDENTIFIER_CASE=LOWER # Authentication mechanism for Spark ThriftServer -export HIVE_SERVER_AUTH_MECHANISM=PLAIN +HIVE_SERVER_AUTH_MECHANISM=PLAIN diff --git a/templates/conf/offload.env.template.common b/templates/conf/offload.env.template.common index 2ad954e5..9a6c807d 100644 --- a/templates/conf/offload.env.template.common +++ b/templates/conf/offload.env.template.common @@ -1,5 +1,3 @@ -#!/bin/bash - # Copyright 2016 The GOE Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,91 +14,91 @@ # SSL settings: # SSL_ACTIVE identifies when the backend SQL engine is using SSL -#export SSL_ACTIVE=true -#export SSL_TRUSTED_CERTS= +#SSL_ACTIVE=true +#SSL_TRUSTED_CERTS= # Path to GOE encryption key file if using encrypted passwords -#export PASSWORD_KEY_FILE= +#PASSWORD_KEY_FILE= # Offload Transport Settings: # The method used to transport data from an RDBMS frontend to a backend, defaults to AUTO # Valid values are AUTO, GOE, GCP and SQOOP -export OFFLOAD_TRANSPORT=AUTO +OFFLOAD_TRANSPORT=AUTO # User to authenticate as for executing Offload Transport commands such as SSH for spark-submit or Sqoop commands, or Livy API calls -export OFFLOAD_TRANSPORT_USER=${USER} +OFFLOAD_TRANSPORT_USER=${USER} # Degree of transport parallelism -export OFFLOAD_TRANSPORT_PARALLELISM=2 +OFFLOAD_TRANSPORT_PARALLELISM=2 # OFFLOAD_TRANSPORT_CMD_HOST host for running data transport commands such as spark-submit or Sqoop commands -export OFFLOAD_TRANSPORT_CMD_HOST="localhost" +OFFLOAD_TRANSPORT_CMD_HOST="localhost" # Control whether parallel data transport tasks should have a consistent point in time when reading RDBMS data -export OFFLOAD_TRANSPORT_CONSISTENT_READ=true +OFFLOAD_TRANSPORT_CONSISTENT_READ=true # Number of records to fetch in a single batch from the RDBMS during Offload -export OFFLOAD_TRANSPORT_FETCH_SIZE= +OFFLOAD_TRANSPORT_FETCH_SIZE= # Maximum table size to use Query Import transport method -export OFFLOAD_TRANSPORT_SMALL_TABLE_THRESHOLD=20M +OFFLOAD_TRANSPORT_SMALL_TABLE_THRESHOLD=20M # OFFLOAD_TRANSPORT_SPARK_THRIFT_HOST host(s) where the Spark Thrift Server is running # OFFLOAD_TRANSPORT_SPARK_THRIFT_HOST can be a comma-separated list to randomly choose from, eg. hdp21,hdp22,hdp23 -export OFFLOAD_TRANSPORT_SPARK_THRIFT_HOST= -export OFFLOAD_TRANSPORT_SPARK_THRIFT_PORT= +OFFLOAD_TRANSPORT_SPARK_THRIFT_HOST= +OFFLOAD_TRANSPORT_SPARK_THRIFT_PORT= # The executable to use for submitting Spark applications. Can be empty, spark-submit or spark2-submit -export OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE= +OFFLOAD_TRANSPORT_SPARK_SUBMIT_EXECUTABLE= # The master URL for the Spark cluster, only used for non-Hadoop Spark clusters, if empty Spark will use default settings -export OFFLOAD_TRANSPORT_SPARK_SUBMIT_MASTER_URL="spark://${OFFLOAD_TRANSPORT_CMD_HOST}:7077" +OFFLOAD_TRANSPORT_SPARK_SUBMIT_MASTER_URL="spark://${OFFLOAD_TRANSPORT_CMD_HOST}:7077" # Yarn queue name for GOE Spark jobs -export OFFLOAD_TRANSPORT_SPARK_QUEUE_NAME= +OFFLOAD_TRANSPORT_SPARK_QUEUE_NAME= # Override JVM flags for spark-submit command, inserted right after "spark-submit", e.g.: # "-Dhadoop.security.credential.provider.path=jceks://hdfs/user/goe/dbname.dbuser.pwd.m.jceks" # For Oracle wallet usage this may be useful as below: # "-Doracle.net.tns_admin=. -Doracle.net.wallet_location=(SOURCE=(METHOD=FILE)(METHOD_DATA=(DIRECTORY=.)))" # This setting is ignored for OFFLOAD_TRANSPORT values that do not utilise Spark -export OFFLOAD_TRANSPORT_SPARK_OVERRIDES= +OFFLOAD_TRANSPORT_SPARK_OVERRIDES= # Key/value pairs, in JSON format, to override Spark property defaults, e.g.: -# export OFFLOAD_TRANSPORT_SPARK_PROPERTIES='{"spark.extraListeners": "GOETaskListener", "spark.executor.memory": "4G"}' +# OFFLOAD_TRANSPORT_SPARK_PROPERTIES='{"spark.extraListeners": "GOETaskListener", "spark.executor.memory": "4G"}' # spark.extraListeners: GOETaskListener is required for Offload Transport verification. Extra listeners may be added to the JSON below. -export OFFLOAD_TRANSPORT_SPARK_PROPERTIES='{"spark.extraListeners": "GOETaskListener", "spark.jars.packages": "com.oracle.database.jdbc:ojdbc11:23.2.0.0,org.apache.spark:spark-avro_2.12:3.3.0"}' +OFFLOAD_TRANSPORT_SPARK_PROPERTIES='{"spark.extraListeners": "GOETaskListener", "spark.jars.packages": "com.oracle.database.jdbc:ojdbc11:23.2.0.0,org.apache.spark:spark-avro_2.12:3.3.0"}' # CSV of files to be passed to Spark. Does not apply to Thriftserver or Livy transport methods. -export OFFLOAD_TRANSPORT_SPARK_FILES= +OFFLOAD_TRANSPORT_SPARK_FILES= # CSV of JAR files to be passed to Spark. Does not apply to Thriftserver or Livy transport methods. -export OFFLOAD_TRANSPORT_SPARK_JARS= +OFFLOAD_TRANSPORT_SPARK_JARS= # URL for Livy/Spark REST API, e.g.: # http://fqdn-n.example.com:port -export OFFLOAD_TRANSPORT_LIVY_API_URL= +OFFLOAD_TRANSPORT_LIVY_API_URL= # OFFLOAD_TRANSPORT_LIVY_API_VERIFY_SSL is used to enable SSL for REST API calls. There are 4 states: # Empty: Do not use SSL # TRUE: Use SSL & verify certificate against known certificates # FALSE: Use SSL & do not verify certificate # /some/path/here/cert-bundle.crt: Use SSL & verify certificate against path to certificate bundle -export OFFLOAD_TRANSPORT_LIVY_API_VERIFY_SSL= +OFFLOAD_TRANSPORT_LIVY_API_VERIFY_SSL= # Idle timeout (in seconds) for Spark client sessions created in Livy -export OFFLOAD_TRANSPORT_LIVY_IDLE_SESSION_TIMEOUT= +OFFLOAD_TRANSPORT_LIVY_IDLE_SESSION_TIMEOUT= # OFFLOAD_TRANSPORT_LIVY_MAX_SESSIONS is used to limit the number of Livy sessions Offload will create # Sessions are re-used when idle, new sessions are only created when no idle sessions are available -export OFFLOAD_TRANSPORT_LIVY_MAX_SESSIONS= +OFFLOAD_TRANSPORT_LIVY_MAX_SESSIONS= # Database connection details for data transport if different to ORA_CONN -export OFFLOAD_TRANSPORT_DSN= +OFFLOAD_TRANSPORT_DSN= # Key/value pairs, in JSON format, to supply Oracle ALTER SESSION parameter values # These only take effect during data transport, e.g.: -# export OFFLOAD_TRANSPORT_RDBMS_SESSION_PARAMETERS='{"cell_offload_processing": "false"}' -export OFFLOAD_TRANSPORT_RDBMS_SESSION_PARAMETERS= +# OFFLOAD_TRANSPORT_RDBMS_SESSION_PARAMETERS='{"cell_offload_processing": "false"}' +OFFLOAD_TRANSPORT_RDBMS_SESSION_PARAMETERS= # Polling interval in seconds for validation of Spark transport row count. # A value of -1 disables retrieval of RDBMS SQL statistics. # A value of 0 disables polling resulting in a single capture of SQL statistics after Offload Transport. # A value greater than 0 polls RDBMS SQL statistics using the specified interval. -#export OFFLOAD_TRANSPORT_VALIDATION_POLLING_INTERVAL=0 +#OFFLOAD_TRANSPORT_VALIDATION_POLLING_INTERVAL=0 # info/detail/debug, default info -export LOG_LEVEL=info +LOG_LEVEL=info # Restrict default size of RDBMS partitions offloaded per cycle. [\d.]+[MG] eg. 100M, 1G, 1.5G -#export MAX_OFFLOAD_CHUNK_SIZE= +#MAX_OFFLOAD_CHUNK_SIZE= # Restrict default number of RDBMS partitions offloaded per cycle. -#export MAX_OFFLOAD_CHUNK_COUNT= +#MAX_OFFLOAD_CHUNK_COUNT= # Default degree of parallelism to use for the RDBMS query executed when validating an offload. # Values or 0 or 1 will execute the query without parallelism. # Values > 1 will force a parallel query of the given degree. # If unset, the RDBMS query will fall back to using the behavior specified by RDBMS defaults. -#export OFFLOAD_VERIFY_PARALLELISM= +#OFFLOAD_VERIFY_PARALLELISM= # =========================================================================================== # Advanced common settings: you probably do not need to modify these lines @@ -110,20 +108,14 @@ export LOG_LEVEL=info # if undefined, the DB_UNIQUE_NAME will be used, giving _ # if defined but empty, no prefix is used, giving # otherwise, databases will be named _ -export DB_NAME_PREFIX= - -# Paths -export PATH=${OFFLOAD_HOME}/bin:$PATH:$KERBEROS_PATH - -# Override config path, defaults to OFFLOAD_HOME/conf -export OFFLOAD_CONFDIR=${OFFLOAD_HOME}/conf +DB_NAME_PREFIX= # Override log path, defaults to OFFLOAD_HOME/log # Also supports Google Cloud Storage paths, e.g.: gs://my-bucket/my-prefix -#export OFFLOAD_LOGDIR= +#OFFLOAD_LOGDIR= # Default number of external table location files for parallel data retrieval -export NUM_LOCATION_FILES=16 +NUM_LOCATION_FILES=16 # Default method of generation for backend stats after an Offload, Incremental Update Extraction or Compaction (supported values: NATIVE, HISTORY, COPY, NONE). # Can override with command-line options if required. @@ -131,16 +123,12 @@ export NUM_LOCATION_FILES=16 # - HISTORY: Gather stats on all partitions without stats. Only applicable to an Offload on Hive (for Impala, HISTORY will be converted to NATIVE) # - COPY: Copy RDBMS stats to the backend table using ALTER TABLE commands. Only applicable to an Offload on Impala # - NONE: Don't compute or copy any stats -#export OFFLOAD_STATS_METHOD=COPY +#OFFLOAD_STATS_METHOD=COPY # Compress load table data during an Offload. This can be useful when staging to cloud storage. -#export OFFLOAD_COMPRESS_LOAD_TABLE=true +#OFFLOAD_COMPRESS_LOAD_TABLE=true # Propagate NOT NULL constraints to the backend system during Offload # - AUTO: Propagate NOT NULL constraints to the backend system # - NONE: Don't copy any NOT NULL constraints -export OFFLOAD_NOT_NULL_PROPAGATION=AUTO - -# Distribute data by partition key(s) during the final INSERT operation of an offload. Hive only (will be ignored for Impala). -# Defaults to true -export OFFLOAD_DISTRIBUTE_ENABLED=true +OFFLOAD_NOT_NULL_PROPAGATION=AUTO diff --git a/templates/conf/offload.env.template.hadoop b/templates/conf/offload.env.template.hadoop index 56530b59..e69b788c 100644 --- a/templates/conf/offload.env.template.hadoop +++ b/templates/conf/offload.env.template.hadoop @@ -17,12 +17,12 @@ # =========================================================================================== # Hadoop/FS settings: -export HADOOP_SSH_USER=${OFFLOAD_TRANSPORT_USER} -export HDFS_HOME=/user/goe +HADOOP_SSH_USER=${OFFLOAD_TRANSPORT_USER} +HDFS_HOME=/user/goe # The HDFS location for both temporary load and final offloaded tables. OFFLOAD_FS_PREFIX, when set, takes # precedence over HDFS_DATA for Offloaded tables -export HDFS_DATA=${HDFS_HOME}/offload -export HDFS_LOAD=${HDFS_DATA} +HDFS_DATA=${HDFS_HOME}/offload +HDFS_LOAD=${HDFS_DATA} # Filesystem type for Offloaded tables. Valid values are: # inherit: Do not include a LOCATION clause when creating a table, inherit the value from the container database @@ -32,14 +32,18 @@ export HDFS_LOAD=${HDFS_DATA} # abfs(s): Use a LOCATION clause to store table data in Microsoft Azure Data Lake Storage Generation 2. This must be correctly configured in the backend system configuration # When offloading a table to cloud storage the table LOCATION will be structured as below: # ${OFFLOAD_FS_SCHEME}://${OFFLOAD_FS_CONTAINER}/${OFFLOAD_FS_PREFIX}/db_name/table_name/ -export OFFLOAD_FS_SCHEME=inherit +OFFLOAD_FS_SCHEME=inherit # The path with which to prefix offloaded table paths. Takes precedence over HDFS_DATA when OFFLOAD_FS_SCHEME != "inherit" -export OFFLOAD_FS_PREFIX=${HDFS_DATA} +OFFLOAD_FS_PREFIX=${HDFS_DATA} # A valid bucket or container name when offloading to cloud storage -export OFFLOAD_FS_CONTAINER= +OFFLOAD_FS_CONTAINER= + +# Distribute data by partition key(s) during the final INSERT operation of an offload. Hive only (will be ignored for Impala). +# Defaults to true +OFFLOAD_DISTRIBUTE_ENABLED=true # HDFS client configuration file location -#export LIBHDFS3_CONF=${OFFLOAD_HOME}/conf/hdfs-client.xml +#LIBHDFS3_CONF=${OFFLOAD_HOME}/conf/hdfs-client.xml # WEBHDFS_HOST/PORT can be used to optimize HDFS activities removing JVM start-up overhead by utilising WebHDFS # WEBHDFS_HOST can be a comma-separated list of hosts if HDFS is configured for High Availability @@ -49,33 +53,33 @@ export OFFLOAD_FS_CONTAINER= # TRUE: Use SSL & verify Hadoop certificate against known certificates # FALSE: Use SSL & do not verify Hadoop certificate # /some/path/here/cert-bundle.crt: Use SSL & verify Hadoop certificate against path to certificate bundle -#export WEBHDFS_HOST= -#export WEBHDFS_PORT= -export WEBHDFS_VERIFY_SSL= +#WEBHDFS_HOST= +#WEBHDFS_PORT= +WEBHDFS_VERIFY_SSL= # Impala/Hive connection settings: # (HIVE_SERVER_HOST/PORT are used for both Impala and Hive connections) # HIVE_SERVER_HOST can be a comma-separated list of hosts to randomly choose from, e.g.: hdp21,hdp22,hdp23 # Default Impala port is 21050, default Hive port is 10000 -export HIVE_SERVER_HOST= -export HIVE_SERVER_PORT=21050 -export HIVE_SERVER_USER=goe -export HIVE_SERVER_PASS= +HIVE_SERVER_HOST= +HIVE_SERVER_PORT=21050 +HIVE_SERVER_USER=goe +HIVE_SERVER_PASS= # Use HTTP transport for HiveServer2 connections -#export HIVE_SERVER_HTTP_TRANSPORT=true +#HIVE_SERVER_HTTP_TRANSPORT=true # Path component of URL endpoint when connecting to HiveServer2 in HTTP mode -#export HIVE_SERVER_HTTP_PATH= +#HIVE_SERVER_HTTP_PATH= # Path to LDAP password file -#export HIVE_SERVER_LDAP_PASSWORD_FILE= +#HIVE_SERVER_LDAP_PASSWORD_FILE= # Hive connection timeout in seconds -export HIVE_SERVER_TIMEOUT=3600 +HIVE_SERVER_TIMEOUT=3600 # Authentication mechanism for HiveServer2 # In non-kerberized environments, should be set to: # impala: NOSASL # hive: value of hive-site.xml: hive.server2.authentication # Ignored in kerberized or LDAP environments -#export HIVE_SERVER_AUTH_MECHANISM=NOSASL +#HIVE_SERVER_AUTH_MECHANISM=NOSASL # Kerberos settings: # KERBEROS_SERVICE is the SQL engine Kerberos service (usually 'impala' or 'hive') @@ -84,20 +88,23 @@ export HIVE_SERVER_TIMEOUT=3600 # if KERBEROS_KEYTAB is provided, KERBEROS_PRINCIPAL should also be provided # it is the kerberos user to authenticate as. ie $kinit -kt KERBEROS_KEYTAB KERBEROS_PRINCIPAL should succeed # Set KERBEROS_PATH if your Kerberos utilities (like kinit) reside in some non-standard directory -export KERBEROS_SERVICE= -export KERBEROS_KEYTAB= -export KERBEROS_PRINCIPAL= -export KERBEROS_PATH=/usr/kerberos/bin +KERBEROS_SERVICE= +KERBEROS_KEYTAB= +KERBEROS_PRINCIPAL= +KERBEROS_PATH=/usr/kerberos/bin # KERBEROS_TICKET_CACHE_PATH is required to use the libhdfs3-based HDFS result cache in a kerberized cluster # For example: /tmp/krb5cc_12345 -export KERBEROS_TICKET_CACHE_PATH= +KERBEROS_TICKET_CACHE_PATH= + +# Paths +PATH=${PATH}:${KERBEROS_PATH} # Integrate with a data governance backend. If the URL is blank then integration is disabled. Format: # http://fqdn-n.example.com:port/api -#export DATA_GOVERNANCE_API_URL= -#export DATA_GOVERNANCE_API_USER= -#export DATA_GOVERNANCE_API_PASS= -export DATA_GOVERNANCE_BACKEND=navigator +#DATA_GOVERNANCE_API_URL= +#DATA_GOVERNANCE_API_USER= +#DATA_GOVERNANCE_API_PASS= +DATA_GOVERNANCE_BACKEND=navigator # CLOUDERA_NAVIGATOR_HIVE_SOURCE_ID is mandatory when integrating with Cloudera Navigator # This can be determined by taking the "identity" value from the output of a curl command like the example below: # curl "${DATA_GOVERNANCE_API_URL}//entities?query=((type:SOURCE)AND(sourceType:Hive)AND(clusterName:))" -u username:password -X GET @@ -105,68 +112,68 @@ export DATA_GOVERNANCE_BACKEND=navigator # - is the correct version, e.g. "v13". This can be identified using: curl "${DATA_GOVERNANCE_API_URL}/version" -X GET # - is the name of the cluster as shown in Navigator # If multiple entities are listed then choose the correct source for the Hive service you intend to use -#export CLOUDERA_NAVIGATOR_HIVE_SOURCE_ID= -export DATA_GOVERNANCE_AUTO_TAGS='GOE,+RDBMS_NAME' +#CLOUDERA_NAVIGATOR_HIVE_SOURCE_ID= +DATA_GOVERNANCE_AUTO_TAGS='GOE,+RDBMS_NAME' # Custom tags can be defined with a comma-separated string in DATA_GOVERNANCE_CUSTOM_TAGS -export DATA_GOVERNANCE_CUSTOM_TAGS= -export DATA_GOVERNANCE_AUTO_PROPERTIES='+GOE_OBJECT_TYPE,+SOURCE_RDBMS_TABLE,+TARGET_RDBMS_TABLE,+INITIAL_OPERATION_DATETIME,+LATEST_OPERATION_DATETIME' +DATA_GOVERNANCE_CUSTOM_TAGS= +DATA_GOVERNANCE_AUTO_PROPERTIES='+GOE_OBJECT_TYPE,+SOURCE_RDBMS_TABLE,+TARGET_RDBMS_TABLE,+INITIAL_OPERATION_DATETIME,+LATEST_OPERATION_DATETIME' # Custom properties can be included in GOE metadata via key/value pairs defined in DATA_GOVERNANCE_CUSTOM_PROPERTIES -#export DATA_GOVERNANCE_CUSTOM_PROPERTIES='{"key": "value", etc}' +#DATA_GOVERNANCE_CUSTOM_PROPERTIES='{"key": "value", etc}' # =========================================================================================== # Advanced settings: you probably do not need to modify the lines below # =========================================================================================== # HDFS_CMD_HOST overrides HIVE_SERVER_HOST for HDFS operations only -export HDFS_CMD_HOST= +HDFS_CMD_HOST= # Databases are named and _load # HDFS_DB_PATH_SUFFIX defaults to .db, giving .db and _load.db # Uncomment the following line if the .db is causing problems -#export HDFS_DB_PATH_SUFFIX= +#HDFS_DB_PATH_SUFFIX= # Backend database and table name of the in-list-join table # can be created and populated with ./connect --create-sequence-table # defaults to default.goe_sequence -#export IN_LIST_JOIN_TABLE="default.goe_sequence" -#export IN_LIST_JOIN_TABLE_SIZE="10000" +#IN_LIST_JOIN_TABLE="default.goe_sequence" +#IN_LIST_JOIN_TABLE_SIZE="10000" # Backend distribution 'override' (supported values: CDH, HDP, EMR, MAPR) -export BACKEND_DISTRIBUTION=CDH +BACKEND_DISTRIBUTION=CDH # Case conversion to be applied to any backend identifier names created by GOE (supported values: UPPER, LOWER and NO_MODIFY). -export BACKEND_IDENTIFIER_CASE=LOWER +BACKEND_IDENTIFIER_CASE=LOWER # Key/value pairs, in JSON format, defining session query parameters for the orchestration backend query engine. # These take effect for all queries issued to the query engine, e.g: -# export OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"request_pool\": \"goe.pool\"}" -#export OFFLOAD_BACKEND_SESSION_PARAMETERS= +# OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"request_pool\": \"goe.pool\"}" +#OFFLOAD_BACKEND_SESSION_PARAMETERS= # Orchestration query engine (supported values: IMPALA, HIVE) -export QUERY_ENGINE=IMPALA +QUERY_ENGINE=IMPALA # Comma-delimited list of HiveServer2 session parameters to set # BATCH_SIZE=16384 is a strongly recommended performance setting -# eg. export HS2_SESSION_PARAMS="BATCH_SIZE=16384,MEM_LIMIT=2G" -export HS2_SESSION_PARAMS="BATCH_SIZE=16384" +# eg. HS2_SESSION_PARAMS="BATCH_SIZE=16384,MEM_LIMIT=2G" +HS2_SESSION_PARAMS="BATCH_SIZE=16384" # An alias provided by Hadoop "credential provided API" to be used for RDBMS authentication -# export OFFLOAD_TRANSPORT_PASSWORD_ALIAS= +# OFFLOAD_TRANSPORT_PASSWORD_ALIAS= # # The credential provider path to be used in conjunction with OFFLOAD_TRANSPORT_PASSWORD_ALIAS, e.g.: # # "jceks://hdfs/user/goe/dbname.dbuser.pwd.m.jceks" # # Only required if the path is not configured in Hadoop configuration defaults -# export OFFLOAD_TRANSPORT_CREDENTIAL_PROVIDER_PATH= +# OFFLOAD_TRANSPORT_CREDENTIAL_PROVIDER_PATH= # Sqoop settings: # You should be using OraOOP optimizations for Sqoop (included in standard Apache Sqoop # from v1.4.5), but if you're not, then you need to disable direct path mode: -export SQOOP_DISABLE_DIRECT=false +SQOOP_DISABLE_DIRECT=false # Override flags for sqoop command, inserted right after "sqoop import", e.g.: # "-Dhadoop.security.credential.provider.path=jceks://hdfs/user/goe/dbname.dbuser.pwd.m.jceks" # This setting is ignored for OFFLOAD_TRANSPORT_METHODs that do not utilise sqoop -export SQOOP_OVERRIDES="-Dsqoop.avro.logical_types.decimal.enable=false" +SQOOP_OVERRIDES="-Dsqoop.avro.logical_types.decimal.enable=false" # Add sqoop command options at the end of the sqoop command -export SQOOP_ADDITIONAL_OPTIONS= +SQOOP_ADDITIONAL_OPTIONS= # HDFS path to Sqoop password file, readable by HADOOP_SSH_USER. If not specified, ORA_APP_PASS or OFFLOAD_TRANSPORT_PASSWORD_ALIAS will be used -export SQOOP_PASSWORD_FILE= +SQOOP_PASSWORD_FILE= # Yarn queue name for GOE Sqoop jobs -export SQOOP_QUEUE_NAME= +SQOOP_QUEUE_NAME= diff --git a/templates/conf/offload.env.template.listener b/templates/conf/offload.env.template.listener index 14e1af25..cec28fc8 100644 --- a/templates/conf/offload.env.template.listener +++ b/templates/conf/offload.env.template.listener @@ -16,17 +16,17 @@ # GOE Listener Configuration # =========================================================================================== -export OFFLOAD_LISTENER_SHARED_TOKEN=welcome22 -export OFFLOAD_LISTENER_HEARTBEAT_INTERVAL=60 -#export OFFLOAD_LISTENER_PORT=8005 -export OFFLOAD_LISTENER_PORT= -export OFFLOAD_LISTENER_HOST=0.0.0.0 -export OFFLOAD_LISTENER_REDIS_HOST= -export OFFLOAD_LISTENER_REDIS_PORT= -export OFFLOAD_LISTENER_REDIS_DB= -# export OFFLOAD_LISTENER_REDIS_USERNAME= -# export OFFLOAD_LISTENER_REDIS_PASSWORD= -# export OFFLOAD_LISTENER_REDIS_USE_SSL= -# export OFFLOAD_LISTENER_REDIS_SSL_CERT= -# export OFFLOAD_LISTENER_REDIS_USE_SENTINEL= -# export OFFLOAD_LISTENER_REDIS_SENTINEL_MASTER= +OFFLOAD_LISTENER_SHARED_TOKEN=welcome22 +OFFLOAD_LISTENER_HEARTBEAT_INTERVAL=60 +#OFFLOAD_LISTENER_PORT=8005 +OFFLOAD_LISTENER_PORT= +OFFLOAD_LISTENER_HOST=0.0.0.0 +OFFLOAD_LISTENER_REDIS_HOST= +OFFLOAD_LISTENER_REDIS_PORT= +OFFLOAD_LISTENER_REDIS_DB= +# OFFLOAD_LISTENER_REDIS_USERNAME= +# OFFLOAD_LISTENER_REDIS_PASSWORD= +# OFFLOAD_LISTENER_REDIS_USE_SSL= +# OFFLOAD_LISTENER_REDIS_SSL_CERT= +# OFFLOAD_LISTENER_REDIS_USE_SENTINEL= +# OFFLOAD_LISTENER_REDIS_SENTINEL_MASTER= diff --git a/templates/conf/offload.env.template.mssql b/templates/conf/offload.env.template.mssql index 2f0c8840..f3f02dc8 100644 --- a/templates/conf/offload.env.template.mssql +++ b/templates/conf/offload.env.template.mssql @@ -17,8 +17,8 @@ # =========================================================================================== # Frontend distribution: -export FRONTEND_DISTRIBUTION=MSSQL +FRONTEND_DISTRIBUTION=MSSQL -export MSSQL_CONN=:1521/ -export MSSQL_APP_USER=goe_app -export MSSQL_APP_PASS=goe_app +MSSQL_CONN=:1521/ +MSSQL_APP_USER=goe_app +MSSQL_APP_PASS=goe_app diff --git a/templates/conf/offload.env.template.netezza b/templates/conf/offload.env.template.netezza index c94f4009..9b74038b 100644 --- a/templates/conf/offload.env.template.netezza +++ b/templates/conf/offload.env.template.netezza @@ -17,8 +17,8 @@ # =========================================================================================== # Frontend distribution: -export FRONTEND_DISTRIBUTION=NETEZZA +FRONTEND_DISTRIBUTION=NETEZZA -export NETEZZA_CONN=:1521/ -export NETEZZA_APP_USER=goe_app -export NETEZZA_APP_PASS=goe_app +NETEZZA_CONN=:1521/ +NETEZZA_APP_USER=goe_app +NETEZZA_APP_PASS=goe_app diff --git a/templates/conf/offload.env.template.oracle b/templates/conf/offload.env.template.oracle index 8d2e5072..2179b0e8 100644 --- a/templates/conf/offload.env.template.oracle +++ b/templates/conf/offload.env.template.oracle @@ -17,36 +17,36 @@ # =========================================================================================== # Frontend distribution: -export FRONTEND_DISTRIBUTION=ORACLE +FRONTEND_DISTRIBUTION=ORACLE # Oracle Database connection details - Fully qualified DB service name must be used # if Oracle service name includes domain-names (DB_DOMAIN), e.g. ORCL12.org.com -export ORA_CONN=:1521/ -export ORA_ADM_USER=goe_adm -export ORA_ADM_PASS=goe_adm -export ORA_APP_USER=goe_app -export ORA_APP_PASS=goe_app -export ORA_REPO_USER=goe_repo +ORA_CONN=:1521/ +ORA_ADM_USER=goe_adm +ORA_ADM_PASS=goe_adm +ORA_APP_USER=goe_app +ORA_APP_PASS=goe_app +ORA_REPO_USER=goe_repo # NLS_LANG should be set to your Oracle NLS_CHARACTERSET -#export NLS_LANG=.AL32UTF8 +#NLS_LANG=.AL32UTF8 # Instruct offload that RDBMS authentication is via an Oracle wallet. The wallet location should be configured using JVM options. # Sqoop: # The example below assumes credentials for ORA_APP_USER are stored in a wallet owned by OFFLOAD_TRANSPORT_USER in directory "/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet" -# export SQOOP_OVERRIDES="${SQOOP_OVERRIDES} -Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet -Dmapred.map.child.java.opts='-Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet'" +# SQOOP_OVERRIDES="${SQOOP_OVERRIDES} -Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet -Dmapred.map.child.java.opts='-Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet'" # Spark Thrift Server: # The example below assumes credentials for ORA_APP_USER are stored in a wallet owned by "hive" in directory "/u01/app/goe/hive_wallet" # spark.driver.extraJavaOptions=-Doracle.net.wallet_location=/u01/app/goe/hive_wallet # spark.executor.extraJavaOptions=-Doracle.net.wallet_location=/u01/app/goe/hive_wallet # Spark Livy & spark-submit: # The example below assumes credentials for ORA_APP_USER are stored in a wallet owned by OFFLOAD_TRANSPORT_USER in directory "/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet" -# export OFFLOAD_TRANSPORT_SPARK_PROPERTIES="{\"spark.driver.extraJavaOptions\": \"-Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet\", \"spark.executor.extraJavaOptions\": \"-Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet\"}" -export OFFLOAD_TRANSPORT_AUTH_USING_ORACLE_WALLET=false +# OFFLOAD_TRANSPORT_SPARK_PROPERTIES="{\"spark.driver.extraJavaOptions\": \"-Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet\", \"spark.executor.extraJavaOptions\": \"-Doracle.net.wallet_location=/u01/app/goe/${OFFLOAD_TRANSPORT_USER}_wallet\"}" +OFFLOAD_TRANSPORT_AUTH_USING_ORACLE_WALLET=false # Setting USE_ORACLE_WALLET=true forces OFFLOAD_TRANSPORT_AUTH_USING_ORACLE_WALLET=true -export USE_ORACLE_WALLET=false +USE_ORACLE_WALLET=false # ORA_ADM_CONN must be set to the connection string in Oracle Wallet for the ORA_ADM_USER when USE_ORACLE_WALLET=true -#export ORA_ADM_CONN= +#ORA_ADM_CONN= # Degree of parallelism to use when sampling data for all columns in the source RDBMS table that are either date or timestamp-based or defined as a number without a precision and scale. A value of 0 or 1 disables parallelism. -export DATA_SAMPLE_PARALLELISM=0 +DATA_SAMPLE_PARALLELISM=0 diff --git a/templates/conf/offload.env.template.snowflake b/templates/conf/offload.env.template.snowflake index 1378ab30..3dcb2b35 100644 --- a/templates/conf/offload.env.template.snowflake +++ b/templates/conf/offload.env.template.snowflake @@ -17,58 +17,58 @@ # =========================================================================================== # Backend distribution: -export BACKEND_DISTRIBUTION=SNOWFLAKE +BACKEND_DISTRIBUTION=SNOWFLAKE # Orchestration query engine -export QUERY_ENGINE=SNOWFLAKE +QUERY_ENGINE=SNOWFLAKE # Snowflake connection settings: -export SNOWFLAKE_USER= -export SNOWFLAKE_PASS= -export SNOWFLAKE_ACCOUNT= -export SNOWFLAKE_DATABASE= -export SNOWFLAKE_ROLE=GOE_OFFLOAD_ROLE -export SNOWFLAKE_WAREHOUSE= +SNOWFLAKE_USER= +SNOWFLAKE_PASS= +SNOWFLAKE_ACCOUNT= +SNOWFLAKE_DATABASE= +SNOWFLAKE_ROLE=GOE_OFFLOAD_ROLE +SNOWFLAKE_WAREHOUSE= # For key pair authentication provide values for SNOWFLAKE_PEM_FILE/SNOWFLAKE_PEM_PASSPHRASE -#export SNOWFLAKE_PEM_FILE= -#export SNOWFLAKE_PEM_PASSPHRASE= +#SNOWFLAKE_PEM_FILE= +#SNOWFLAKE_PEM_PASSPHRASE= # Offload transport settings -export SNOWFLAKE_INTEGRATION= -export SNOWFLAKE_STAGE=GOE_OFFLOAD_STAGE -export SNOWFLAKE_FILE_FORMAT_PREFIX=GOE_OFFLOAD_FILE_FORMAT +SNOWFLAKE_INTEGRATION= +SNOWFLAKE_STAGE=GOE_OFFLOAD_STAGE +SNOWFLAKE_FILE_FORMAT_PREFIX=GOE_OFFLOAD_FILE_FORMAT # Filesystem type for Offloaded tables # When offloading a table to cloud storage the table LOCATION will be structured as below: # ${OFFLOAD_FS_SCHEME}://${OFFLOAD_FS_CONTAINER}/${OFFLOAD_FS_PREFIX}/db_name/table_name/ -export OFFLOAD_FS_SCHEME=gs +OFFLOAD_FS_SCHEME=gs # The path with which to prefix offloaded table paths. -export OFFLOAD_FS_PREFIX=goe +OFFLOAD_FS_PREFIX=goe # A valid bucket or container name when offloading to cloud storage -export OFFLOAD_FS_CONTAINER= +OFFLOAD_FS_CONTAINER= # Key/value pairs, in JSON format, defining session query parameters for the orchestration backend query engine. # These take effect for all queries issued to the query engine, e.g: -# export OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"parameter_name\": \"some.value\"}" -#export OFFLOAD_BACKEND_SESSION_PARAMETERS= +# OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"parameter_name\": \"some.value\"}" +#OFFLOAD_BACKEND_SESSION_PARAMETERS= # File format for staged data during an Offload (supported values: AVRO and PARQUET) -export OFFLOAD_STAGING_FORMAT=PARQUET +OFFLOAD_STAGING_FORMAT=PARQUET # Case conversion to be applied to any backend identifier names created by GOE (supported values: UPPER, LOWER and NO_MODIFY) -export BACKEND_IDENTIFIER_CASE=UPPER +BACKEND_IDENTIFIER_CASE=UPPER # Path to Google service account private key JSON file, required if staging Offload data in Google Cloud Storage -#export GOOGLE_APPLICATION_CREDENTIALS= +#GOOGLE_APPLICATION_CREDENTIALS= # Connection details for S3 storage, may be required if staging Offload data in S3 and not using instance level permissions -#export AWS_ACCESS_KEY_ID= -#export AWS_SECRET_ACCESS_KEY= +#AWS_ACCESS_KEY_ID= +#AWS_SECRET_ACCESS_KEY= # Connection details for Microsoft Azure storage, required if staging Offload data in Azure -#export OFFLOAD_FS_AZURE_ACCOUNT_NAME= -#export OFFLOAD_FS_AZURE_ACCOUNT_KEY= -#export OFFLOAD_FS_AZURE_ACCOUNT_DOMAIN=blob.core.windows.net +#OFFLOAD_FS_AZURE_ACCOUNT_NAME= +#OFFLOAD_FS_AZURE_ACCOUNT_KEY= +#OFFLOAD_FS_AZURE_ACCOUNT_DOMAIN=blob.core.windows.net # Authentication mechanism for Spark ThriftServer -export HIVE_SERVER_AUTH_MECHANISM=PLAIN +HIVE_SERVER_AUTH_MECHANISM=PLAIN diff --git a/templates/conf/offload.env.template.synapse b/templates/conf/offload.env.template.synapse index b833759c..0568db64 100644 --- a/templates/conf/offload.env.template.synapse +++ b/templates/conf/offload.env.template.synapse @@ -17,66 +17,66 @@ # =========================================================================================== # Backend distribution: -export BACKEND_DISTRIBUTION=MSAZURE +BACKEND_DISTRIBUTION=MSAZURE # Orchestration query engine -export QUERY_ENGINE=SYNAPSE +QUERY_ENGINE=SYNAPSE # Synapse connection settings -export SYNAPSE_DATABASE= -export SYNAPSE_SERVER= -export SYNAPSE_PORT=1433 -export SYNAPSE_ROLE=GOE_OFFLOAD_ROLE +SYNAPSE_DATABASE= +SYNAPSE_SERVER= +SYNAPSE_PORT=1433 +SYNAPSE_ROLE=GOE_OFFLOAD_ROLE # Synapse authentication mechanism (supported values: SqlPassword, ActiveDirectoryPassword, ActiveDirectoryMsi, ActiveDirectoryServicePrincipal) -export SYNAPSE_AUTH_MECHANISM=SqlPassword +SYNAPSE_AUTH_MECHANISM=SqlPassword # For SqlPassword/ActiveDirectoryPassword authentication provide values for SYNAPSE_USER/SYNAPSE_PASS -export SYNAPSE_USER= -export SYNAPSE_PASS= +SYNAPSE_USER= +SYNAPSE_PASS= # For ActiveDirectoryMsi authentication with a user-assigned identity provide a value for SYNAPSE_MSI_CLIENT_ID -export SYNAPSE_MSI_CLIENT_ID= +SYNAPSE_MSI_CLIENT_ID= # For ActiveDirectoryServicePrincipal authentication provide values for SYNAPSE_SERVICE_PRINCIPAL_ID/SYNAPSE_SERVICE_PRINCIPAL_SECRET -export SYNAPSE_SERVICE_PRINCIPAL_ID= -export SYNAPSE_SERVICE_PRINCIPAL_SECRET= +SYNAPSE_SERVICE_PRINCIPAL_ID= +SYNAPSE_SERVICE_PRINCIPAL_SECRET= # Offload transport settings -export SYNAPSE_DATA_SOURCE= -export SYNAPSE_FILE_FORMAT= +SYNAPSE_DATA_SOURCE= +SYNAPSE_FILE_FORMAT= # Collation to use for character columns. # Please note that changing this to a value with different behaviour to the frontend system may give unexpected results. -export SYNAPSE_COLLATION=Latin1_General_100_BIN2 +SYNAPSE_COLLATION=Latin1_General_100_BIN2 # Filesystem type for Offloaded tables # When offloading a table to cloud storage the table LOCATION will be structured as below: # ${OFFLOAD_FS_SCHEME}://${OFFLOAD_FS_CONTAINER}/${OFFLOAD_FS_PREFIX}/db_name/table_name/ -export OFFLOAD_FS_SCHEME=wasb +OFFLOAD_FS_SCHEME=wasb # The path with which to prefix offloaded table paths. -export OFFLOAD_FS_PREFIX=goe +OFFLOAD_FS_PREFIX=goe # A valid bucket or container name when offloading to cloud storage -export OFFLOAD_FS_CONTAINER= +OFFLOAD_FS_CONTAINER= # Connection details for Microsoft Azure storage -export OFFLOAD_FS_AZURE_ACCOUNT_DOMAIN=blob.core.windows.net -export OFFLOAD_FS_AZURE_ACCOUNT_KEY= -export OFFLOAD_FS_AZURE_ACCOUNT_NAME= +OFFLOAD_FS_AZURE_ACCOUNT_DOMAIN=blob.core.windows.net +OFFLOAD_FS_AZURE_ACCOUNT_KEY= +OFFLOAD_FS_AZURE_ACCOUNT_NAME= # Key/value pairs, in JSON format, defining session query parameters for the orchestration backend query engine. # These take effect for all queries issued to the query engine, e.g: -# export OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"parameter_name\": \"some.value\"}" -#export OFFLOAD_BACKEND_SESSION_PARAMETERS= +# OFFLOAD_BACKEND_SESSION_PARAMETERS="{\"parameter_name\": \"some.value\"}" +#OFFLOAD_BACKEND_SESSION_PARAMETERS= # File format for staged data during an Offload (supported values: PARQUET) -export OFFLOAD_STAGING_FORMAT=PARQUET +OFFLOAD_STAGING_FORMAT=PARQUET # Case conversion to be applied to any backend identifier names created by GOE (supported values: UPPER, LOWER and NO_MODIFY) -export BACKEND_IDENTIFIER_CASE=LOWER +BACKEND_IDENTIFIER_CASE=LOWER # Name of the Microsoft ODBC driver as specified in odbcinst.ini -export BACKEND_ODBC_DRIVER_NAME="ODBC Driver 17 for SQL Server" +BACKEND_ODBC_DRIVER_NAME="ODBC Driver 17 for SQL Server" # Threshold at which RDBMS segments are considered for HASH distribution -export HASH_DISTRIBUTION_THRESHOLD=1G +HASH_DISTRIBUTION_THRESHOLD=1G # Authentication mechanism for Spark ThriftServer -export HIVE_SERVER_AUTH_MECHANISM=PLAIN +HIVE_SERVER_AUTH_MECHANISM=PLAIN diff --git a/templates/conf/offload.env.template.teradata b/templates/conf/offload.env.template.teradata index 8b82dcac..1a5335be 100644 --- a/templates/conf/offload.env.template.teradata +++ b/templates/conf/offload.env.template.teradata @@ -17,15 +17,15 @@ # =========================================================================================== # Frontend distribution: -export FRONTEND_DISTRIBUTION=TERADATA +FRONTEND_DISTRIBUTION=TERADATA # Teradata connection settings -export TERADATA_SERVER= -export TERADATA_ADM_USER=goe_adm -export TERADATA_ADM_PASS= -export TERADATA_APP_USER=goe_app -export TERADATA_APP_PASS= -export TERADATA_REPO_USER=goe_repo +TERADATA_SERVER= +TERADATA_ADM_USER=goe_adm +TERADATA_ADM_PASS= +TERADATA_APP_USER=goe_app +TERADATA_APP_PASS= +TERADATA_REPO_USER=goe_repo # Name of the Teradata ODBC driver as specified in odbcinst.ini -export FRONTEND_ODBC_DRIVER_NAME="Teradata Database ODBC Driver 17.10" +FRONTEND_ODBC_DRIVER_NAME="Teradata Database ODBC Driver 17.10" diff --git a/tests/integration/scenarios/scenario_runner.py b/tests/integration/scenarios/scenario_runner.py index 5d268a36..fd7fe293 100644 --- a/tests/integration/scenarios/scenario_runner.py +++ b/tests/integration/scenarios/scenario_runner.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -import os import time import traceback from typing import TYPE_CHECKING @@ -55,12 +54,6 @@ def get_config_overrides( return base_config -def get_conf_path(): - offload_home = os.environ.get("OFFLOAD_HOME") - assert offload_home, "OFFLOAD_HOME must be set in order to run tests" - return os.path.join(offload_home, "conf") - - def run_offload( option_dict: dict, orchestration_config: "OrchestrationRepoClientInterface", @@ -167,11 +160,8 @@ def create_goe_shell_runner( ) -> str: """Creates a temporary shell script to run a GOE command and returns the name of the script.""" tmp_file = get_temp_path(suffix=".sh") - conf_dir = get_conf_path() - conf_file = os.path.join(conf_dir, "offload.env") with open(tmp_file, "w") as f: f.write("#!/bin/bash\n") - f.write(f". {conf_file}\n") if cwd: f.write(f"cd {cwd}\n") f.write(f"{' '.join(_ for _ in shell_command)}\n") diff --git a/tests/unit/config/test_orchestration_config.py b/tests/unit/config/test_orchestration_config.py index 60111ef0..0f4b436a 100644 --- a/tests/unit/config/test_orchestration_config.py +++ b/tests/unit/config/test_orchestration_config.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" TestOrchestrationConfig: Unit test library to test TestOrchestrationConfig class -""" import os -from unittest import TestCase, main, mock +from unittest import mock + +import pytest from goe.config.orchestration_config import ( OrchestrationConfig, EXPECTED_CONFIG_ARGS, ) +from goe.config import config_file from goe.config.config_validation_functions import ( OrchestrationConfigException, verify_json_option, @@ -28,56 +29,78 @@ from tests.unit.test_functions import FAKE_ORACLE_BQ_ENV -class TestOrchestrationConfig(TestCase): - def test_verify_json_option(self): - generic_json = '{"attrib1": "String", "attrib2": 1024, "attrib3": 1.024}' - verify_json_option("TEST_JSON", generic_json) - sample_rdbms_parameters_json = ( - '{"cell_offload_processing": "false", "\\"_serial_direct_read\\"": "true"}' - ) - verify_json_option("TEST_JSON", sample_rdbms_parameters_json) - sample_spark_parameters_json = ( - '{"spark.driver.memory": "512M", "spark.executor.memory": "1024M"}' - ) - verify_json_option("TEST_JSON", sample_spark_parameters_json) - for bad_json in [ - '{cell_offload_processing: "false"}', - '{"cell_offload_processing": False}', - ]: - self.assertRaises( - OrchestrationConfigException, - lambda: verify_json_option("TEST_JSON", bad_json), - ) +def test_verify_json_option(): + generic_json = '{"attrib1": "String", "attrib2": 1024, "attrib3": 1.024}' + verify_json_option("TEST_JSON", generic_json) + sample_rdbms_parameters_json = ( + '{"cell_offload_processing": "false", "\\"_serial_direct_read\\"": "true"}' + ) + verify_json_option("TEST_JSON", sample_rdbms_parameters_json) + sample_spark_parameters_json = ( + '{"spark.driver.memory": "512M", "spark.executor.memory": "1024M"}' + ) + verify_json_option("TEST_JSON", sample_spark_parameters_json) + for bad_json in [ + '{cell_offload_processing: "false"}', + '{"cell_offload_processing": False}', + ]: + with pytest.raises(OrchestrationConfigException): + verify_json_option("TEST_JSON", bad_json) + - def test_as_defaults(self): - k = mock.patch.dict(os.environ, FAKE_ORACLE_BQ_ENV) - k.start() - config = OrchestrationConfig.as_defaults() - k.stop() - # Check that every expected attribute is represented - self.assertFalse( - bool(set(EXPECTED_CONFIG_ARGS) - vars(config).keys()), - set(EXPECTED_CONFIG_ARGS) - vars(config).keys(), - ) +def test_as_defaults(): + k = mock.patch.dict(os.environ, FAKE_ORACLE_BQ_ENV) + k.start() + config = OrchestrationConfig.as_defaults() + k.stop() + # Check that every expected attribute is represented + assert not bool(set(EXPECTED_CONFIG_ARGS) - vars(config).keys()), ( + set(EXPECTED_CONFIG_ARGS) - vars(config).keys() + ) - def test_from_dict(self): - k = mock.patch.dict(os.environ, FAKE_ORACLE_BQ_ENV) - k.start() - config = OrchestrationConfig.from_dict({}) - k.stop() - self.assertFalse( - bool(set(EXPECTED_CONFIG_ARGS) - vars(config).keys()), - set(EXPECTED_CONFIG_ARGS) - vars(config).keys(), - ) - k.start() - config = OrchestrationConfig.from_dict({"vverbose": True}) - k.stop() - self.assertFalse( - bool(set(EXPECTED_CONFIG_ARGS) - vars(config).keys()), - set(EXPECTED_CONFIG_ARGS) - vars(config).keys(), - ) - self.assertTrue(config.vverbose) +def test_from_dict(): + k = mock.patch.dict(os.environ, FAKE_ORACLE_BQ_ENV) + k.start() + config = OrchestrationConfig.from_dict({}) + k.stop() + assert not bool(set(EXPECTED_CONFIG_ARGS) - vars(config).keys()), ( + set(EXPECTED_CONFIG_ARGS) - vars(config).keys() + ) + k.start() + config = OrchestrationConfig.from_dict({"vverbose": True}) + k.stop() + assert not bool(set(EXPECTED_CONFIG_ARGS) - vars(config).keys()), ( + set(EXPECTED_CONFIG_ARGS) - vars(config).keys() + ) + assert config.vverbose -if __name__ == "__main__": - main() + +@pytest.mark.parametrize( + "input,expected_key,expected_value", + [ + ("# Copyright 2016 The GOE Authors. All rights reserved.", None, None), + ("#", None, None), + ("# Some comment:", None, None), + ("# Some other comment with = blah", None, None), + ("# Some other comment with=blah", None, None), + ("OFFLOAD_TRANSPORT=AUTO", "OFFLOAD_TRANSPORT", "AUTO"), + ("#OFFLOAD_TRANSPORT=AUTO", "OFFLOAD_TRANSPORT", "AUTO"), + ("# OFFLOAD_TRANSPORT=AUTO", "OFFLOAD_TRANSPORT", "AUTO"), + ("OFFLOAD_TRANSPORT=AUTO=21", "OFFLOAD_TRANSPORT", "AUTO=21"), + ("OFFLOAD_TRANSPORT='AUTO 21'", "OFFLOAD_TRANSPORT", "'AUTO 21'"), + ('OFFLOAD_TRANSPORT="AUTO 21"', "OFFLOAD_TRANSPORT", '"AUTO 21"'), + ], +) +def test_config_file_env_key_value_pair( + input: str, expected_key: str, expected_value: str +): + output = config_file.env_key_value_pair(input) + if expected_key is None: + assert output is None + else: + assert isinstance(output, tuple) + assert len(output) == 2 + k, v = output + assert k == expected_key + assert v == expected_value diff --git a/tools/goe-shell-functions.sh b/tools/goe-shell-functions.sh index 2db8634e..f7580687 100644 --- a/tools/goe-shell-functions.sh +++ b/tools/goe-shell-functions.sh @@ -85,11 +85,6 @@ function source_goe_env { raise_and_exit ${FUNCNAME[0]} 2 "File does not exist: $1/conf/offload.env" fi export OFFLOAD_HOME=$1 - . $1/conf/offload.env - RETVAL=$? - if [[ $RETVAL -gt 0 ]]; then - raise_and_exit ${FUNCNAME[0]} $RETVAL - fi } # ------------------------------------------------------------------------------