diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ec0cd51..de531934 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,11 @@ All notable changes to this project will be documented in this file. ## [3.5.3] - 2023-02-18 ### Changed -- Added `undo` subcommand, introducing Undo scripts +- Added `undo` subcommand, introducing Undo script + +## [3.5.4] - 2024-02-05 +### Changed +- Added `recalculate_checksum` subcommand, introducing recalculate checksum scripts ## [3.5.2] - 2023-02-14 ### Changed diff --git a/README.md b/README.md index d23d0f70..55372f99 100644 --- a/README.md +++ b/README.md @@ -243,7 +243,7 @@ Default [Password](https://docs.snowflake.com/en/user-guide/python-connector-exa [Browser based SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) | `externalbrowser` [Programmatic SSO](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#native-sso-okta-only) (Okta Only) | Okta URL endpoing for your Okta account typically in the form `https://.okta.com` OR `https://.oktapreview.com` -In the event both authentication criteria for the default authenticator are provided, schemachange will prioritize password authentication over key pair authentication. +In the event both authentication criteria for the default authenticator are provided, schemachange will prioritize password authentication over key pair authentication. ### Password Authentication The Snowflake user password for `SNOWFLAKE_USER` is required to be set in the environment variable `SNOWFLAKE_PASSWORD` prior to calling the script. schemachange will fail if the `SNOWFLAKE_PASSWORD` environment variable is not set. @@ -260,20 +260,20 @@ The URL of the authenticator resource that will be receive the POST request. * token-response-name The Expected name of the JSON element containing the Token in the return response from the authenticator resource. * token-request-payload -The Set of variables passed as a dictionary to the `data` element of the request. +The Set of variables passed as a dictionary to the `data` element of the request. * token-request-headers -The Set of variables passed as a dictionary to the `headers` element of the request. +The Set of variables passed as a dictionary to the `headers` element of the request. -It is recomended to use the YAML file and pass oauth secrets into the configuration using the templating engine instead of the command line option. +It is recomended to use the YAML file and pass oauth secrets into the configuration using the templating engine instead of the command line option. ### External Browser Authentication -External browser authentication can be used for local development by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value `externalbrowser` prior to calling schemachange. +External browser authentication can be used for local development by setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value `externalbrowser` prior to calling schemachange. The client will be prompted to authenticate in a browser that pops up. Refer to the [documentation](https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use.html#setting-up-browser-based-sso) to cache the token to minimize the number of times the browser pops up to authenticate the user. ### Okta Authentication -For clients that do not have a browser, can use the popular SaaS Idp option to connect via Okta. This will require the Okta URL that you utilize for SSO. -Okta authentication can be used setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value of your okta endpoint as a fully formed URL ( E.g. `https://.okta.com`) prior to calling schemachange. +For clients that do not have a browser, can use the popular SaaS Idp option to connect via Okta. This will require the Okta URL that you utilize for SSO. +Okta authentication can be used setting the environment variable `SNOWFLAKE_AUTHENTICATOR` to the value of your okta endpoint as a fully formed URL ( E.g. `https://.okta.com`) prior to calling schemachange. _** NOTE**: Please disable Okta MFA for the user who uses Native SSO authentication with client drivers. Please consult your Okta administrator for more information._ @@ -348,14 +348,14 @@ dry-run: false # A string to include in the QUERY_TAG that is attached to every SQL statement executed query-tag: 'QUERY_TAG' -# Information for Oauth token requests +# Information for Oauth token requests oauthconfig: # url Where token request are posted to token-provider-url: 'https://login.microsoftonline.com/{{ env_var('AZURE_ORG_GUID', 'default') }}/oauth2/v2.0/token' # name of Json entity returned by request token-response-name: 'access_token' # Headers needed for successful post or other security markings ( multiple labeled items permitted - token-request-headers: + token-request-headers: Content-Type: "application/x-www-form-urlencoded" User-Agent: "python/schemachange" # Request Payload for Token (it is recommended pass @@ -438,6 +438,31 @@ Parameter | Description --query-tag | A string to include in the QUERY_TAG that is attached to every SQL statement executed. --oauth-config | Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })' +#### recalculate_checksum +This subcommand is used to recalculate repeatable migration checksums. It is useful when cloning a database to ensure, that you don't need to rerun repeatable migrations. + +`usage: schemachange recalculate_checksum [-h] [--config-folder CONFIG_FOLDER] [-f ROOT_FOLDER] [-m MODULES_FOLDER] [-a SNOWFLAKE_ACCOUNT] [-u SNOWFLAKE_USER] [-r SNOWFLAKE_ROLE] [-w SNOWFLAKE_WAREHOUSE] [-d SNOWFLAKE_DATABASE] [-c CHANGE_HISTORY_TABLE] [--vars VARS] [--create-change-history-table] [-ac] [-v] [--dry-run] [--query-tag QUERY_TAG]` + +Parameter | Description +--- | --- +-h, --help | Show the help message and exit +--config-folder CONFIG_FOLDER | The folder to look in for the schemachange-config.yml file (the default is the current working directory) +-f ROOT_FOLDER, --root-folder ROOT_FOLDER | The root folder for the database change scripts. The default is the current directory. +-m MODULES_FOLDER, --modules-folder MODULES_FOLDER | The modules folder for jinja macros and templates to be used across mutliple scripts +-a SNOWFLAKE_ACCOUNT, --snowflake-account SNOWFLAKE_ACCOUNT | The name of the snowflake account (e.g. xy12345.east-us-2.azure). +-u SNOWFLAKE_USER, --snowflake-user SNOWFLAKE_USER | The name of the snowflake user +-r SNOWFLAKE_ROLE, --snowflake-role SNOWFLAKE_ROLE | The name of the role to use +-w SNOWFLAKE_WAREHOUSE, --snowflake-warehouse SNOWFLAKE_WAREHOUSE | The name of the default warehouse to use. Can be overridden in the change scripts. +-d SNOWFLAKE_DATABASE, --snowflake-database SNOWFLAKE_DATABASE | The name of the default database to use. Can be overridden in the change scripts. +-c CHANGE_HISTORY_TABLE, --change-history-table CHANGE_HISTORY_TABLE | Used to override the default name of the change history table (which is METADATA.SCHEMACHANGE.CHANGE_HISTORY) +--vars VARS | Define values for the variables to replaced in change scripts, given in JSON format (e.g. '{"variable1": "value1", "variable2": "value2"}') +--create-change-history-table | Create the change history table if it does not exist. The default is 'False'. +-ac, --autocommit | Enable autocommit feature for DML commands. The default is 'False'. +-v, --verbose | Display verbose debugging details during execution. The default is 'False'. +--dry-run | Run schemachange in dry run mode. The default is 'False'. +--query-tag | A string to include in the QUERY_TAG that is attached to every SQL statement executed. +--oauth-config | Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })' + #### render This subcommand is used to render a single script to the console. It is intended to support the development and troubleshooting of script that use features from the jinja template engine. diff --git a/schemachange/cli.py b/schemachange/cli.py index 8e053a77..83c2fc5a 100644 --- a/schemachange/cli.py +++ b/schemachange/cli.py @@ -58,6 +58,7 @@ + "execution" _log_apply = "Applying change script {script_name}" _log_undo = "Applying undo script {script_name}" +_log_recalculate = "Recalculate checksum for change script {script_name}" _log_apply_set_complete = "Successfully applied {scripts_applied} change scripts (skipping " \ + "{scripts_skipped}) \nCompleted successfully" _log_undo_set_complete = "Successfully applied {scripts_applied} undo scripts" @@ -613,6 +614,90 @@ def undo_command(config): print(_log_undo_set_complete.format(scripts_applied=scripts_applied)) +def recalculate_checksum_command(config): + req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse']) + validate_auth_config(config, req_args) + + # Log some additional details + if config['dry_run']: + print("Running in dry-run mode") + print(_log_config_details.format(**config)) + + #connect to snowflake and maintain connection + session = SnowflakeSchemachangeSession(config) + + scripts_skipped = 0 + scripts_applied = 0 + + # Deal with the change history table (create if specified) + change_history_table = get_change_history_table_details(config['change_history_table']) + change_history_metadata = session.fetch_change_history_metadata(change_history_table) + if change_history_metadata: + print(_log_ch_use.format(last_altered=change_history_metadata['last_altered'], **change_history_table)) + elif config['create_change_history_table']: + # Create the change history table (and containing objects) if it don't exist. + if not config['dry_run']: + session.create_change_history_table_if_missing(change_history_table) + print(_log_ch_create.format(**change_history_table)) + else: + raise ValueError(_err_ch_missing.format(**change_history_table)) + + # Find the max published version + max_published_version = '' + + change_history = None + r_scripts_checksum = None + if (config['dry_run'] and change_history_metadata) or not config['dry_run']: + change_history = session.fetch_change_history(change_history_table) + r_scripts_checksum = session.fetch_r_scripts_checksum(change_history_table) + + if change_history: + max_published_version = change_history[0] + max_published_version_display = max_published_version + if max_published_version_display == '': + max_published_version_display = 'None' + print(_log_ch_max_version.format(max_published_version_display=max_published_version_display)) + + # Find all scripts in the root folder (recursively) and sort them correctly + all_scripts = get_all_scripts_recursively(config['root_folder'], config['verbose']) + all_script_names = list(all_scripts.keys()) + # Sort scripts such that versioned scripts get applied first and then the repeatable ones. + all_script_names_sorted = sorted_alphanumeric([script for script in all_script_names if script[0] == 'R']) + + # Loop through each script in order and apply any required changes + for script_name in all_script_names_sorted: + script = all_scripts[script_name] + + # Always process with jinja engine + jinja_processor = JinjaTemplateProcessor(project_root = config['root_folder'], modules_folder = config['modules_folder']) + content = jinja_processor.render(jinja_processor.relpath(script['script_full_path']), config['vars'], config['verbose']) + + # Apply only R scripts where the checksum changed compared to the last execution of snowchange + if script_name[0] == 'R': + # Compute the checksum for the script + checksum_current = hashlib.sha224(content.encode('utf-8')).hexdigest() + + # check if R file was already executed + if (r_scripts_checksum is not None) and script_name in list(r_scripts_checksum['script_name']): + checksum_last = list(r_scripts_checksum.loc[r_scripts_checksum['script_name'] == script_name, 'checksum'])[0] + else: + checksum_last = '' + + # check if there is a change of the checksum in the script + if checksum_current == checksum_last: + if config['verbose']: + print(_log_skip_r.format(**script)) + scripts_skipped += 1 + continue + + print(_log_recalculate.format(**script)) + + if not config['dry_run']: + session.record_change_script(script, content, change_history_table, 0) + scripts_applied += 1 + + print(_log_apply_set_complete.format(scripts_applied=scripts_applied, scripts_skipped=scripts_skipped)) + def render_command(config, script_path): """ Renders the provided script. @@ -894,6 +979,24 @@ def main(argv=sys.argv): parser = argparse.ArgumentParser(prog = 'schemachange', description = 'Apply schema changes to a Snowflake account. Full readme at https://github.com/Snowflake-Labs/schemachange', formatter_class = argparse.RawTextHelpFormatter) subcommands = parser.add_subparsers(dest='subcommand') + parser_undo = subcommands.add_parser("recalculate_checksum") + parser_undo.add_argument('--config-folder', type = str, default = '.', help = 'The folder to look in for the schemachange-config.yml file (the default is the current working directory)', required = False) + parser_undo.add_argument('-s', '--step', type = int, default = 1, help = 'Amount of versioned migrations to be undone in the reverse of their applied order', required = False) + parser_undo.add_argument('-f', '--root-folder', type = str, help = 'The root folder for the database change scripts', required = False) + parser_undo.add_argument('-m', '--modules-folder', type = str, help = 'The modules folder for jinja macros and templates to be used across multiple scripts', required = False) + parser_undo.add_argument('-a', '--snowflake-account', type = str, help = 'The name of the snowflake account (e.g. xy12345.east-us-2.azure)', required = False) + parser_undo.add_argument('-u', '--snowflake-user', type = str, help = 'The name of the snowflake user', required = False) + parser_undo.add_argument('-r', '--snowflake-role', type = str, help = 'The name of the default role to use', required = False) + parser_undo.add_argument('-w', '--snowflake-warehouse', type = str, help = 'The name of the default warehouse to use. Can be overridden in the change scripts.', required = False) + parser_undo.add_argument('-d', '--snowflake-database', type = str, help = 'The name of the default database to use. Can be overridden in the change scripts.', required = False) + parser_undo.add_argument('-c', '--change-history-table', type = str, help = 'Used to override the default name of the change history table (the default is METADATA.SCHEMACHANGE.CHANGE_HISTORY)', required = False) + parser_undo.add_argument('--vars', type = json.loads, help = 'Define values for the variables to replaced in change scripts, given in JSON format (e.g. {"variable1": "value1", "variable2": "value2"})', required = False) + parser_undo.add_argument('-ac', '--autocommit', action='store_true', help = 'Enable autocommit feature for DML commands (the default is False)', required = False) + parser_undo.add_argument('-v','--verbose', action='store_true', help = 'Display verbose debugging details during execution (the default is False)', required = False) + parser_undo.add_argument('--dry-run', action='store_true', help = 'Run schemachange in dry run mode (the default is False)', required = False) + parser_undo.add_argument('--query-tag', type = str, help = 'The string to add to the Snowflake QUERY_TAG session value for each query executed', required = False) + parser_undo.add_argument('--oauth-config', type = json.loads, help = 'Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })', required = False) + parser_undo = subcommands.add_parser("undo") parser_undo.add_argument('--config-folder', type = str, default = '.', help = 'The folder to look in for the schemachange-config.yml file (the default is the current working directory)', required = False) parser_undo.add_argument('-s', '--step', type = int, default = 1, help = 'Amount of versioned migrations to be undone in the reverse of their applied order', required = False) @@ -942,7 +1045,7 @@ def main(argv=sys.argv): # The original parameters did not support subcommands. Check if a subcommand has been supplied # if not default to deploy to match original behaviour. args = argv[1:] - if len(args) == 0 or not any(subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER", "UNDO"]): + if len(args) == 0 or not any(subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER", "UNDO", "RECALCULATE_CHECKSUM"]): args = ["deploy"] + args args = parser.parse_args(args) @@ -964,6 +1067,8 @@ def main(argv=sys.argv): "create_change_history_table":None,"autocommit":None,"dry_run":None,"query_tag":None,"oauth_config":None,"step":None } elif args.subcommand == 'undo': renderoveride = {"create_change_history_table":None} + elif args.subcommand == 'recalculate_checksum': + renderoveride = {"create_change_history_table":None} elif args.subcommand == 'deploy': renderoveride = {"step":None} @@ -997,6 +1102,8 @@ def main(argv=sys.argv): render_command(config, args.script) elif args.subcommand == 'undo': undo_command(config) + elif args.subcommand == 'recalculate_checksum': + recalculate_checksum_command(config) else: deploy_command(config)