From a5267d20b95fdf98b7be39b1be8913c2a8da58db Mon Sep 17 00:00:00 2001 From: Riivo Parts Date: Fri, 2 Feb 2024 17:11:08 +0200 Subject: [PATCH] Add recalculate_checksum command In case you clone a database, you would want to recaclulate checksums for repeatable scripts as they already exist but checksum is wrong as database name has changed. --- CHANGELOG.md | 8 ++- README.md | 43 +++++++++--- schemachange/cli.py | 159 ++++++++++++++++++++++++++++++++++++-------- setup.cfg | 2 +- 4 files changed, 174 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9209fdf..42d43bd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,11 @@ All notable changes to this project will be documented in this file. ## [3.5.3] - 2023-02-18 ### Changed -- Added `undo` subcommand, introducing Undo scripts +- Added `undo` subcommand, introducing Undo script + +## [3.5.4] - 2024-02-05 +### Changed +- Added `recalculate_checksum` subcommand, introducing recalculate checksum scripts ## [3.5.2] - 2023-02-14 ### Changed @@ -24,7 +28,7 @@ All notable changes to this project will be documented in this file. - Cleaned up argument passing and other repetitive code using dictionary and set comparisons for easy maintenance. (Converted variable names to a consistent snake_case from a mix of kebab-case and snake_case) - Fixed change history table processing to allow mixed case names when '"' are used in the name. - Moved most error, log and warning messages and query strings to global or class variables. -- Updated readme to cover new authentication methods +- Updated readme to cover new authentication methods ## [3.4.2] - 2022-10-24 ### Changed diff --git a/README.md b/README.md index d23d0f70..55372f99 100644 --- a/README.md +++ b/README.md @@ -243,7 +243,7 @@ Default [Password](https://docs.snowflake.com/en/user-guide/python-connector-exa [Browser based SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) | `externalbrowser` [Programmatic SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#native-sso-okta-only) (Okta Only) | Okta URL endpoing for your Okta account typically in the form `https://.okta.com` OR `https://.oktapreview.com` -In the event both authentication criteria for the default authenticator are provided, schemachange will prioritize password authentication over key pair authentication. +In the event both authentication criteria for the default authenticator are provided, schemachange will prioritize password authentication over key pair authentication. ### Password Authentication The Snowflake user password for `SNOWFLAKE_USER` is required to be set in the environment variable `SNOWFLAKE_PASSWORD` prior to calling the script. schemachange will fail if the `SNOWFLAKE_PASSWORD` environment variable is not set. @@ -260,20 +260,20 @@ The URL of the authenticator resource that will be receive the POST request. * token-response-name The Expected name of the JSON element containing the Token in the return response from the authenticator resource. * token-request-payload -The Set of variables passed as a dictionary to the `data` element of the request. +The Set of variables passed as a dictionary to the `data` element of the request. * token-request-headers -The Set of variables passed as a dictionary to the `headers` element of the request. +The Set of variables passed as a dictionary to the `headers` element of the request. -It is recomended to use the YAML file and pass oauth secrets into the configuration using the templating engine instead of the command line option. +It is recomended to use the YAML file and pass oauth secrets into the configuration using the templating engine instead of the command line option. ### External Browser Authentication -External browser authentication can be used for local development by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value `externalbrowser` prior to calling schemachange. +External browser authentication can be used for local development by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value `externalbrowser` prior to calling schemachange. The client will be prompted to authenticate in a browser that pops up. Refer to the [documentation](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) to cache the token to minimize the number of times the browser pops up to authenticate the user. ### Okta Authentication -For clients that do not have a browser, can use the popular SaaS Idp option to connect via Okta. This will require the Okta URL that you utilize for SSO. -Okta authentication can be used setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value of your okta endpoint as a fully formed URL ( E.g. `https://.okta.com`) prior to calling schemachange. +For clients that do not have a browser, can use the popular SaaS Idp option to connect via Okta. This will require the Okta URL that you utilize for SSO. +Okta authentication can be used setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value of your okta endpoint as a fully formed URL ( E.g. `https://.okta.com`) prior to calling schemachange. _** NOTE**: Please disable Okta MFA for the user who uses Native SSO authentication with client drivers. Please consult your Okta administrator for more information._ @@ -348,14 +348,14 @@ dry-run: false # A string to include in the QUERY_TAG that is attached to every SQL statement executed query-tag: 'QUERY_TAG' -# Information for Oauth token requests +# Information for Oauth token requests oauthconfig: # url Where token request are posted to token-provider-url: 'https://login.microsoftonline.com/{{ env_var('AZURE_ORG_GUID', 'default') }}/oauth2/v2.0/token' # name of Json entity returned by request token-response-name: 'access_token' # Headers needed for successful post or other security markings ( multiple labeled items permitted - token-request-headers: + token-request-headers: Content-Type: "application/x-www-form-urlencoded" User-Agent: "python/schemachange" # Request Payload for Token (it is recommended pass @@ -438,6 +438,31 @@ Parameter | Description --query-tag | A string to include in the QUERY_TAG that is attached to every SQL statement executed. --oauth-config | Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })' +#### recalculate_checksum +This subcommand is used to recalculate repeatable migration checksums. It is useful when cloning a database to ensure, that you don't need to rerun repeatable migrations. + +`usage: schemachange recalculate_checksum [-h] [--config-folder CONFIG_FOLDER] [-f ROOT_FOLDER] [-m MODULES_FOLDER] [-a SNOWFLAKE_ACCOUNT] [-u SNOWFLAKE_USER] [-r SNOWFLAKE_ROLE] [-w SNOWFLAKE_WAREHOUSE] [-d SNOWFLAKE_DATABASE] [-c CHANGE_HISTORY_TABLE] [--vars VARS] [--create-change-history-table] [-ac] [-v] [--dry-run] [--query-tag QUERY_TAG]` + +Parameter | Description +--- | --- +-h, --help | Show the help message and exit +--config-folder CONFIG_FOLDER | The folder to look in for the schemachange-config.yml file (the default is the current working directory) +-f ROOT_FOLDER, --root-folder ROOT_FOLDER | The root folder for the database change scripts. The default is the current directory. +-m MODULES_FOLDER, --modules-folder MODULES_FOLDER | The modules folder for jinja macros and templates to be used across mutliple scripts +-a SNOWFLAKE_ACCOUNT, --snowflake-account SNOWFLAKE_ACCOUNT | The name of the snowflake account (e.g. xy12345.east-us-2.azure). +-u SNOWFLAKE_USER, --snowflake-user SNOWFLAKE_USER | The name of the snowflake user +-r SNOWFLAKE_ROLE, --snowflake-role SNOWFLAKE_ROLE | The name of the role to use +-w SNOWFLAKE_WAREHOUSE, --snowflake-warehouse SNOWFLAKE_WAREHOUSE | The name of the default warehouse to use. Can be overridden in the change scripts. +-d SNOWFLAKE_DATABASE, --snowflake-database SNOWFLAKE_DATABASE | The name of the default database to use. Can be overridden in the change scripts. +-c CHANGE_HISTORY_TABLE, --change-history-table CHANGE_HISTORY_TABLE | Used to override the default name of the change history table (which is METADATA.SCHEMACHANGE.CHANGE_HISTORY) +--vars VARS | Define values for the variables to replaced in change scripts, given in JSON format (e.g. '{"variable1": "value1", "variable2": "value2"}') +--create-change-history-table | Create the change history table if it does not exist. The default is 'False'. +-ac, --autocommit | Enable autocommit feature for DML commands. The default is 'False'. +-v, --verbose | Display verbose debugging details during execution. The default is 'False'. +--dry-run | Run schemachange in dry run mode. The default is 'False'. +--query-tag | A string to include in the QUERY_TAG that is attached to every SQL statement executed. +--oauth-config | Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })' + #### render This subcommand is used to render a single script to the console. It is intended to support the development and troubleshooting of script that use features from the jinja template engine. diff --git a/schemachange/cli.py b/schemachange/cli.py index 2a81a58e..83c2fc5a 100644 --- a/schemachange/cli.py +++ b/schemachange/cli.py @@ -19,9 +19,9 @@ from jinja2.loaders import BaseLoader from pandas import DataFrame -#region Global Variables +#region Global Variables # metadata -_schemachange_version = '3.5.3' +_schemachange_version = '3.5.4' _config_file_name = 'schemachange-config.yml' _metadata_database_name = 'METADATA' _metadata_schema_name = 'SCHEMACHANGE' @@ -48,7 +48,7 @@ + "{snowflake_role}\nUsing default warehouse {snowflake_warehouse}\nUsing default " \ + "database {snowflake_database}" _log_ch_use = "Using change history table {database_name}.{schema_name}.{table_name} " \ - + "(last altered {last_altered})" + + "(last altered {last_altered})" _log_ch_create = "Created change history table {database_name}.{schema_name}.{table_name}" _err_ch_missing = "Unable to find change history table {database_name}.{schema_name}.{table_name}" _log_ch_max_version = "Max applied change script version: {max_published_version_display}" @@ -58,6 +58,7 @@ + "execution" _log_apply = "Applying change script {script_name}" _log_undo = "Applying undo script {script_name}" +_log_recalculate = "Recalculate checksum for change script {script_name}" _log_apply_set_complete = "Successfully applied {scripts_applied} change scripts (skipping " \ + "{scripts_skipped}) \nCompleted successfully" _log_undo_set_complete = "Successfully applied {scripts_applied} undo scripts" @@ -66,7 +67,7 @@ + "please use a different name" _err_invalid_folder = "Invalid {folder_type} folder: {path}" _err_dup_scripts = "The script name {script_name} exists more than once (first_instance " \ - + "{first_path}, second instance {script_full_path})" + + "{first_path}, second instance {script_full_path})" _err_dup_scripts_version = "The script version {script_version} exists more than once " \ + "(second instance {script_full_path})" _err_dup_undo_scripts_version = "The undo version {script_version} exists more than once " \ @@ -271,7 +272,7 @@ def authenticate(self): snowflake_password = None if os.getenv("SNOWFLAKE_PASSWORD") is not None and os.getenv("SNOWFLAKE_PASSWORD"): snowflake_password = os.getenv("SNOWFLAKE_PASSWORD") - + # Check legacy/deprecated env variable if os.getenv("SNOWSQL_PWD") is not None and os.getenv("SNOWSQL_PWD"): if snowflake_password: @@ -312,20 +313,20 @@ def authenticate(self): self.conArgs['private_key'] = pkb self.conArgs['authenticator'] = 'snowflake' - - elif os.getenv("SNOWFLAKE_AUTHENTICATOR") == 'oauth' and os.getenv("SNOWFLAKE_AUTHENTICATOR"): + + elif os.getenv("SNOWFLAKE_AUTHENTICATOR") == 'oauth' and os.getenv("SNOWFLAKE_AUTHENTICATOR"): oauth_token = self.get_oauth_token() - + if self.verbose: print( _log_auth_type % 'Oauth Access Token') self.conArgs['token'] = oauth_token self.conArgs['authenticator'] = 'oauth' - + elif os.getenv("SNOWFLAKE_AUTHENTICATOR") == 'externalbrowser' and os.getenv("SNOWFLAKE_AUTHENTICATOR"): self.conArgs['authenticator'] = 'externalbrowser' if self.verbose: print(_log_auth_type % 'External Browser') - + elif os.getenv("SNOWFLAKE_AUTHENTICATOR").lower()[:8]=='https://' \ and os.getenv("SNOWFLAKE_AUTHENTICATOR"): okta = os.getenv("SNOWFLAKE_AUTHENTICATOR") @@ -468,7 +469,7 @@ def record_change_script(self, script, script_content, change_history_table, exe # Compose and execute the insert statement to the log file query = self._q_ch_log.format(**frmt_args) self.execute_snowflake_query(query) - + def deploy_command(config): req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse']) @@ -613,6 +614,90 @@ def undo_command(config): print(_log_undo_set_complete.format(scripts_applied=scripts_applied)) +def recalculate_checksum_command(config): + req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse']) + validate_auth_config(config, req_args) + + # Log some additional details + if config['dry_run']: + print("Running in dry-run mode") + print(_log_config_details.format(**config)) + + #connect to snowflake and maintain connection + session = SnowflakeSchemachangeSession(config) + + scripts_skipped = 0 + scripts_applied = 0 + + # Deal with the change history table (create if specified) + change_history_table = get_change_history_table_details(config['change_history_table']) + change_history_metadata = session.fetch_change_history_metadata(change_history_table) + if change_history_metadata: + print(_log_ch_use.format(last_altered=change_history_metadata['last_altered'], **change_history_table)) + elif config['create_change_history_table']: + # Create the change history table (and containing objects) if it don't exist. + if not config['dry_run']: + session.create_change_history_table_if_missing(change_history_table) + print(_log_ch_create.format(**change_history_table)) + else: + raise ValueError(_err_ch_missing.format(**change_history_table)) + + # Find the max published version + max_published_version = '' + + change_history = None + r_scripts_checksum = None + if (config['dry_run'] and change_history_metadata) or not config['dry_run']: + change_history = session.fetch_change_history(change_history_table) + r_scripts_checksum = session.fetch_r_scripts_checksum(change_history_table) + + if change_history: + max_published_version = change_history[0] + max_published_version_display = max_published_version + if max_published_version_display == '': + max_published_version_display = 'None' + print(_log_ch_max_version.format(max_published_version_display=max_published_version_display)) + + # Find all scripts in the root folder (recursively) and sort them correctly + all_scripts = get_all_scripts_recursively(config['root_folder'], config['verbose']) + all_script_names = list(all_scripts.keys()) + # Sort scripts such that versioned scripts get applied first and then the repeatable ones. + all_script_names_sorted = sorted_alphanumeric([script for script in all_script_names if script[0] == 'R']) + + # Loop through each script in order and apply any required changes + for script_name in all_script_names_sorted: + script = all_scripts[script_name] + + # Always process with jinja engine + jinja_processor = JinjaTemplateProcessor(project_root = config['root_folder'], modules_folder = config['modules_folder']) + content = jinja_processor.render(jinja_processor.relpath(script['script_full_path']), config['vars'], config['verbose']) + + # Apply only R scripts where the checksum changed compared to the last execution of snowchange + if script_name[0] == 'R': + # Compute the checksum for the script + checksum_current = hashlib.sha224(content.encode('utf-8')).hexdigest() + + # check if R file was already executed + if (r_scripts_checksum is not None) and script_name in list(r_scripts_checksum['script_name']): + checksum_last = list(r_scripts_checksum.loc[r_scripts_checksum['script_name'] == script_name, 'checksum'])[0] + else: + checksum_last = '' + + # check if there is a change of the checksum in the script + if checksum_current == checksum_last: + if config['verbose']: + print(_log_skip_r.format(**script)) + scripts_skipped += 1 + continue + + print(_log_recalculate.format(**script)) + + if not config['dry_run']: + session.record_change_script(script, content, change_history_table, 0) + scripts_applied += 1 + + print(_log_apply_set_complete.format(scripts_applied=scripts_applied, scripts_skipped=scripts_skipped)) + def render_command(config, script_path): """ Renders the provided script. @@ -622,7 +707,7 @@ def render_command(config, script_path): # Validate the script file path script_path = os.path.abspath(script_path) if not os.path.isfile(script_path): - raise ValueError(_err_invalid_folder.format(folder_type='script_path', path=script_path)) + raise ValueError(_err_invalid_folder.format(folder_type='script_path', path=script_path)) # Always process with jinja engine jinja_processor = JinjaTemplateProcessor(project_root = config['root_folder'], \ modules_folder = config['modules_folder']) @@ -713,10 +798,10 @@ def get_schemachange_config(config_file_path, root_folder, modules_folder, snowf # Validate folder paths if 'root_folder' in config: - config['root_folder'] = os.path.abspath(config['root_folder']) + config['root_folder'] = os.path.abspath(config['root_folder']) if not os.path.isdir(config['root_folder']): raise ValueError(_err_invalid_folder.format(folder_type='root', path=config['root_folder'])) - + if config['modules_folder']: config['modules_folder'] = os.path.abspath(config['modules_folder']) if not os.path.isdir(config['modules_folder']): @@ -810,7 +895,7 @@ def get_all_scripts_recursively(root_directory, verbose): # Throw an error if the same version exists more than once if script_type == 'V': if script['script_version'] in all_versions: - raise ValueError(_err_dup_scripts_version.format(**script)) + raise ValueError(_err_dup_scripts_version.format(**script)) all_versions.append(script['script_version']) if script_type == 'U': @@ -828,22 +913,22 @@ def get_all_scripts_recursively(root_directory, verbose): def get_change_history_table_details(change_history_table): # Start with the global defaults details = dict() - details['database_name'] = _metadata_database_name - details['schema_name'] = _metadata_schema_name - details['table_name'] = _metadata_table_name + details['database_name'] = _metadata_database_name + details['schema_name'] = _metadata_schema_name + details['table_name'] = _metadata_table_name # Then override the defaults if requested. The name could be in one, two or three part notation. if change_history_table is not None: table_name_parts = change_history_table.strip().split('.') - if len(table_name_parts) == 1: - details['table_name'] = table_name_parts[0] + if len(table_name_parts) == 1: + details['table_name'] = table_name_parts[0] elif len(table_name_parts) == 2: - details['table_name'] = table_name_parts[1] - details['schema_name'] = table_name_parts[0] + details['table_name'] = table_name_parts[1] + details['schema_name'] = table_name_parts[0] elif len(table_name_parts) == 3: - details['table_name'] = table_name_parts[2] - details['schema_name'] = table_name_parts[1] - details['database_name'] = table_name_parts[0] + details['table_name'] = table_name_parts[2] + details['schema_name'] = table_name_parts[1] + details['database_name'] = table_name_parts[0] else: raise ValueError(_err_invalid_cht % change_history_table) #if the object name does not include '"' raise to upper case on return @@ -894,6 +979,24 @@ def main(argv=sys.argv): parser = argparse.ArgumentParser(prog = 'schemachange', description = 'Apply schema changes to a Snowflake account. Full readme at https://github.com/Snowflake-Labs/schemachange', formatter_class = argparse.RawTextHelpFormatter) subcommands = parser.add_subparsers(dest='subcommand') + parser_undo = subcommands.add_parser("recalculate_checksum") + parser_undo.add_argument('--config-folder', type = str, default = '.', help = 'The folder to look in for the schemachange-config.yml file (the default is the current working directory)', required = False) + parser_undo.add_argument('-s', '--step', type = int, default = 1, help = 'Amount of versioned migrations to be undone in the reverse of their applied order', required = False) + parser_undo.add_argument('-f', '--root-folder', type = str, help = 'The root folder for the database change scripts', required = False) + parser_undo.add_argument('-m', '--modules-folder', type = str, help = 'The modules folder for jinja macros and templates to be used across multiple scripts', required = False) + parser_undo.add_argument('-a', '--snowflake-account', type = str, help = 'The name of the snowflake account (e.g. xy12345.east-us-2.azure)', required = False) + parser_undo.add_argument('-u', '--snowflake-user', type = str, help = 'The name of the snowflake user', required = False) + parser_undo.add_argument('-r', '--snowflake-role', type = str, help = 'The name of the default role to use', required = False) + parser_undo.add_argument('-w', '--snowflake-warehouse', type = str, help = 'The name of the default warehouse to use. Can be overridden in the change scripts.', required = False) + parser_undo.add_argument('-d', '--snowflake-database', type = str, help = 'The name of the default database to use. Can be overridden in the change scripts.', required = False) + parser_undo.add_argument('-c', '--change-history-table', type = str, help = 'Used to override the default name of the change history table (the default is METADATA.SCHEMACHANGE.CHANGE_HISTORY)', required = False) + parser_undo.add_argument('--vars', type = json.loads, help = 'Define values for the variables to replaced in change scripts, given in JSON format (e.g. {"variable1": "value1", "variable2": "value2"})', required = False) + parser_undo.add_argument('-ac', '--autocommit', action='store_true', help = 'Enable autocommit feature for DML commands (the default is False)', required = False) + parser_undo.add_argument('-v','--verbose', action='store_true', help = 'Display verbose debugging details during execution (the default is False)', required = False) + parser_undo.add_argument('--dry-run', action='store_true', help = 'Run schemachange in dry run mode (the default is False)', required = False) + parser_undo.add_argument('--query-tag', type = str, help = 'The string to add to the Snowflake QUERY_TAG session value for each query executed', required = False) + parser_undo.add_argument('--oauth-config', type = json.loads, help = 'Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })', required = False) + parser_undo = subcommands.add_parser("undo") parser_undo.add_argument('--config-folder', type = str, default = '.', help = 'The folder to look in for the schemachange-config.yml file (the default is the current working directory)', required = False) parser_undo.add_argument('-s', '--step', type = int, default = 1, help = 'Amount of versioned migrations to be undone in the reverse of their applied order', required = False) @@ -942,7 +1045,7 @@ def main(argv=sys.argv): # The original parameters did not support subcommands. Check if a subcommand has been supplied # if not default to deploy to match original behaviour. args = argv[1:] - if len(args) == 0 or not any(subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER", "UNDO"]): + if len(args) == 0 or not any(subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER", "UNDO", "RECALCULATE_CHECKSUM"]): args = ["deploy"] + args args = parser.parse_args(args) @@ -964,6 +1067,8 @@ def main(argv=sys.argv): "create_change_history_table":None,"autocommit":None,"dry_run":None,"query_tag":None,"oauth_config":None,"step":None } elif args.subcommand == 'undo': renderoveride = {"create_change_history_table":None} + elif args.subcommand == 'recalculate_checksum': + renderoveride = {"create_change_history_table":None} elif args.subcommand == 'deploy': renderoveride = {"step":None} @@ -997,6 +1102,8 @@ def main(argv=sys.argv): render_command(config, args.script) elif args.subcommand == 'undo': undo_command(config) + elif args.subcommand == 'recalculate_checksum': + recalculate_checksum_command(config) else: deploy_command(config) diff --git a/setup.cfg b/setup.cfg index 857fc9df..f5c003de 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = schemachange -version = 3.5.3 +version = 3.5.4 author = jamesweakley/jeremiahhansen description = A Database Change Management tool for Snowflake long_description = file: README.md