From 8cd0e73dd6aae3b1d02539e767dcfabe0adae35d Mon Sep 17 00:00:00 2001 From: achumagin Date: Fri, 1 Mar 2024 20:21:40 +0300 Subject: [PATCH] feat: simplify contract generator --- .gitignore | 4 +++- configuration/configuration.yml | 8 ++++---- data/.gitkeep | 0 docker/docker-compose.yml | 7 +++++++ scripts/data_contract_generator.py | 5 +---- scripts/db_operations.py | 18 +++++++++--------- scripts/main_script.py | 3 ++- 7 files changed, 26 insertions(+), 19 deletions(-) create mode 100644 data/.gitkeep diff --git a/.gitignore b/.gitignore index ffd0f9c..82c4448 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ venv/ env/ + # IDE-specific files .vscode/ .idea/ @@ -76,4 +77,5 @@ yarn-error.log .Pipfile.lock .Pipfile -data/*data_contract.yml \ No newline at end of file +data/*data_contract.yml +.env \ No newline at end of file diff --git a/configuration/configuration.yml b/configuration/configuration.yml index 34fd04c..213ef6f 100644 --- a/configuration/configuration.yml +++ b/configuration/configuration.yml @@ -3,7 +3,7 @@ data_source vertica_local: connection: host: ${vertica_host} port: '5433' - username: dbadmin - password: foo123 - database: Vmart - schema: public \ No newline at end of file + username: ${vertica_user} + password: ${vertica_password} + database: ${vertica_db} + schema: ${vertica_schema} \ No newline at end of file diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index a285b39..934f2cf 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -10,7 +10,14 @@ services: build: context: .. dockerfile: ./docker/Dockerfile + volumes: + - /Users/achumagin/depot/data_qa/etg/soda-contract/data:/app/data environment: - vertica_host=vertica + - vertica_port=5433 + - vertica_user=dbadmin + - vertica_password=foo123 + - vertica_db=Vmart + - vertica_schema=public depends_on: - vertica diff --git a/scripts/data_contract_generator.py b/scripts/data_contract_generator.py index 6b4b047..191de59 100644 --- a/scripts/data_contract_generator.py +++ b/scripts/data_contract_generator.py @@ -29,10 +29,7 @@ def generate_data_contract(column_info): column_name, data_type, is_nullable = column column_data = { 'name': column_name, - 'data_type': re.sub( - r'(varchar|char)\s*\(\s*\d+\s*\)', - lambda match: match.group(1).capitalize(), - data_type), + 'data_type': data_type, 'not_null': not is_nullable } diff --git a/scripts/db_operations.py b/scripts/db_operations.py index a33a2c4..45e2679 100644 --- a/scripts/db_operations.py +++ b/scripts/db_operations.py @@ -38,14 +38,13 @@ def create_vertica_connection(config, max_retries=3): """ conn_info = { 'host': os.environ.get('vertica_host', config.get('host')), - 'port': config.get('port'), - 'user': config.get('username'), - 'password': config.get('password'), - 'database': config.get('database'), - 'schema': config.get('schema'), + 'port': os.environ.get('port',config.get('port')), + 'user': os.environ.get('vertica_user',config.get('username')), + 'password': os.environ.get('vertica_password',config.get('password')), + 'database': os.environ.get('vertica_db',config.get('database')), + 'schema': os.environ.get('vertica_schema',config.get('schema')), 'ssl': False # Set to True if using SSL } - for i in range(max_retries): print(conn_info['host']) try: @@ -57,7 +56,7 @@ def create_vertica_connection(config, max_retries=3): raise -def get_vertica_table_structure(table_name, connection): +def get_vertica_table_structure(schema_name, table_name, connection): """ Get the structure of a Vertica table. @@ -70,9 +69,10 @@ def get_vertica_table_structure(table_name, connection): """ with connection as conn: query = ( - "SELECT column_name, data_type, is_nullable " - f"FROM columns WHERE table_name='{table_name}'" + "SELECT c.column_name, t.type_name, c.is_nullable " + f"FROM columns c JOIN types t ON c.data_type_id = t.type_id WHERE table_name='{table_name}' and table_schema='{schema_name}'" ) + conn.cursor().execute(query) table_structure = conn.cursor().fetchall() diff --git a/scripts/main_script.py b/scripts/main_script.py index e717e6d..390aac8 100644 --- a/scripts/main_script.py +++ b/scripts/main_script.py @@ -16,6 +16,7 @@ def main(): and run checks on the generated file. """ table_name = 'customer_dimension' + schema_name = 'public' config_path = 'configuration/configuration.yml' data_contract_path = f"data/{table_name}_data_contract.yml" @@ -25,7 +26,7 @@ def main(): connection = create_vertica_connection(vertica_config) print("Successfully connected to Vertica!") - column_info = get_vertica_table_structure(table_name, connection) + column_info = get_vertica_table_structure(schema_name, table_name, connection) data_contract_yaml = generate_data_contract(column_info) with open(data_contract_path, 'w', encoding='utf-8') as yaml_file: