Skip to content

Commit

Permalink
feat: simplify contract generator
Browse files Browse the repository at this point in the history
  • Loading branch information
a-chumagin committed Mar 1, 2024
1 parent 5c2456f commit 8cd0e73
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 19 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __pycache__/
venv/
env/


# IDE-specific files
.vscode/
.idea/
Expand Down Expand Up @@ -76,4 +77,5 @@ yarn-error.log
.Pipfile.lock
.Pipfile

data/*data_contract.yml
data/*data_contract.yml
.env
8 changes: 4 additions & 4 deletions configuration/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ data_source vertica_local:
connection:
host: ${vertica_host}
port: '5433'
username: dbadmin
password: foo123
database: Vmart
schema: public
username: ${vertica_user}
password: ${vertica_password}
database: ${vertica_db}
schema: ${vertica_schema}
Empty file added data/.gitkeep
Empty file.
7 changes: 7 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@ services:
build:
context: ..
dockerfile: ./docker/Dockerfile
volumes:
- /Users/achumagin/depot/data_qa/etg/soda-contract/data:/app/data
environment:
- vertica_host=vertica
- vertica_port=5433
- vertica_user=dbadmin
- vertica_password=foo123
- vertica_db=Vmart
- vertica_schema=public
depends_on:
- vertica
5 changes: 1 addition & 4 deletions scripts/data_contract_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ def generate_data_contract(column_info):
column_name, data_type, is_nullable = column
column_data = {
'name': column_name,
'data_type': re.sub(
r'(varchar|char)\s*\(\s*\d+\s*\)',
lambda match: match.group(1).capitalize(),
data_type),
'data_type': data_type,
'not_null': not is_nullable
}

Expand Down
18 changes: 9 additions & 9 deletions scripts/db_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,13 @@ def create_vertica_connection(config, max_retries=3):
"""
conn_info = {
'host': os.environ.get('vertica_host', config.get('host')),
'port': config.get('port'),
'user': config.get('username'),
'password': config.get('password'),
'database': config.get('database'),
'schema': config.get('schema'),
'port': os.environ.get('port',config.get('port')),
'user': os.environ.get('vertica_user',config.get('username')),
'password': os.environ.get('vertica_password',config.get('password')),
'database': os.environ.get('vertica_db',config.get('database')),
'schema': os.environ.get('vertica_schema',config.get('schema')),
'ssl': False # Set to True if using SSL
}

for i in range(max_retries):
print(conn_info['host'])
try:
Expand All @@ -57,7 +56,7 @@ def create_vertica_connection(config, max_retries=3):
raise


def get_vertica_table_structure(table_name, connection):
def get_vertica_table_structure(schema_name, table_name, connection):
"""
Get the structure of a Vertica table.
Expand All @@ -70,9 +69,10 @@ def get_vertica_table_structure(table_name, connection):
"""
with connection as conn:
query = (
"SELECT column_name, data_type, is_nullable "
f"FROM columns WHERE table_name='{table_name}'"
"SELECT c.column_name, t.type_name, c.is_nullable "
f"FROM columns c JOIN types t ON c.data_type_id = t.type_id WHERE table_name='{table_name}' and table_schema='{schema_name}'"
)

conn.cursor().execute(query)
table_structure = conn.cursor().fetchall()

Expand Down
3 changes: 2 additions & 1 deletion scripts/main_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def main():
and run checks on the generated file.
"""
table_name = 'customer_dimension'
schema_name = 'public'
config_path = 'configuration/configuration.yml'
data_contract_path = f"data/{table_name}_data_contract.yml"

Expand All @@ -25,7 +26,7 @@ def main():
connection = create_vertica_connection(vertica_config)
print("Successfully connected to Vertica!")

column_info = get_vertica_table_structure(table_name, connection)
column_info = get_vertica_table_structure(schema_name, table_name, connection)
data_contract_yaml = generate_data_contract(column_info)

with open(data_contract_path, 'w', encoding='utf-8') as yaml_file:
Expand Down

0 comments on commit 8cd0e73

Please sign in to comment.