From c3c6ac64d9c71a3a3c207daec22f899cb3662aaa Mon Sep 17 00:00:00 2001 From: Brian Mesick Date: Thu, 4 Jan 2024 14:03:54 -0500 Subject: [PATCH] refactor: Use named collections for dictionaries This allows us to change admin passwords and keep credentials out of the migrations. --- .../clickhouse-named-collection-config | 30 ++ tutoraspects/patches/clickhouse-server-config | 31 ++ tutoraspects/patches/clickhouse-user-config | 18 ++ .../patches/kustomization-configmapgenerator | 2 + tutoraspects/plugin.py | 47 --- .../versions/0010_course_dictionaries.py | 8 +- .../versions/0014_add_course_names_fields.py | 8 +- .../versions/0015_add_course_key_blocks.py | 8 +- .../0017_add_graded_course_block_names.py | 8 +- .../versions/0023_extend_display_names.py | 8 +- .../alembic/versions/0028_user_pii.py | 4 +- .../versions/0029_use_named_collections.py | 282 ++++++++++++++++++ .../apps/clickhouse/config/docker_config.xml | 2 +- .../clickhouse/config/named_collections.xml | 5 + .../apps/clickhouse/users/user_config.xml | 2 +- .../jobs/init/clickhouse/init-clickhouse.sh | 2 +- 16 files changed, 382 insertions(+), 83 deletions(-) create mode 100644 tutoraspects/patches/clickhouse-named-collection-config create mode 100644 tutoraspects/patches/clickhouse-server-config create mode 100644 tutoraspects/patches/clickhouse-user-config create mode 100644 tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_use_named_collections.py create mode 100644 tutoraspects/templates/aspects/apps/clickhouse/config/named_collections.xml diff --git a/tutoraspects/patches/clickhouse-named-collection-config b/tutoraspects/patches/clickhouse-named-collection-config new file mode 100644 index 000000000..1b38c94a8 --- /dev/null +++ b/tutoraspects/patches/clickhouse-named-collection-config @@ -0,0 +1,30 @@ + + + localhost + {{ CLICKHOUSE_INTERNAL_NATIVE_PORT }} + {{ ASPECTS_XAPI_DATABASE }} + {{ ASPECTS_CLICKHOUSE_REPORT_USER }} + {{ ASPECTS_CLICKHOUSE_REPORT_PASSWORD }} + {% if CLICKHOUSE_SECURE_CONNECTION %}1{% else %}0{% endif %} + + + localhost + {{ CLICKHOUSE_INTERNAL_NATIVE_PORT }} + {{ ASPECTS_EVENT_SINK_DATABASE }} + {{ ASPECTS_CLICKHOUSE_REPORT_USER }} + {{ ASPECTS_CLICKHOUSE_REPORT_PASSWORD }} + {% if CLICKHOUSE_SECURE_CONNECTION %}1{% else %}0{% endif %} + diff --git a/tutoraspects/patches/clickhouse-server-config b/tutoraspects/patches/clickhouse-server-config new file mode 100644 index 000000000..7ec0b8ab3 --- /dev/null +++ b/tutoraspects/patches/clickhouse-server-config @@ -0,0 +1,31 @@ + + + +{{CLICKHOUSE_INTERNAL_HTTP_PORT}} + + +{{CLICKHOUSE_INTERNAL_NATIVE_PORT}} + +:: +0.0.0.0 +1 diff --git a/tutoraspects/patches/clickhouse-user-config b/tutoraspects/patches/clickhouse-user-config new file mode 100644 index 000000000..535c8cbc1 --- /dev/null +++ b/tutoraspects/patches/clickhouse-user-config @@ -0,0 +1,18 @@ + + + + 1048576 + 1048576 + + diff --git a/tutoraspects/patches/kustomization-configmapgenerator b/tutoraspects/patches/kustomization-configmapgenerator index 5272ae682..e1a962f2a 100644 --- a/tutoraspects/patches/kustomization-configmapgenerator +++ b/tutoraspects/patches/kustomization-configmapgenerator @@ -37,10 +37,12 @@ - name: clickhouse-settings files: - plugins/aspects/apps/clickhouse/config/docker_config.xml + - plugins/aspects/apps/clickhouse/config/named_collections.xml - plugins/aspects/apps/clickhouse/users/user_config.xml options: labels: app.kubernetes.io/name: clickhouse + {% endif %} {% if RUN_RALPH %} diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index 140866516..4582dd9cf 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -174,53 +174,6 @@ "{% endif %}", ), ("CLICKHOUSE_K8S_VOLUME_SIZE", "10Gi"), - # This can be used to override some configuration values in - # via "docker_config.xml" file, which will be read from a - # mount on /etc/clickhouse-server/config.d/ on startup. - # See https://clickhouse.com/docs/en/operations/configuration-files - # - # This default allows connecting to Clickhouse when run as a - # standalone docker container, instead of through docker-compose. - ( - "CLICKHOUSE_EXTRA_XML_CONFIG", - """ - - {{CLICKHOUSE_INTERNAL_HTTP_PORT}} - - - {{CLICKHOUSE_INTERNAL_NATIVE_PORT}} - - :: - 0.0.0.0 - 1 - """, - ), - # Override configuration in users.xml. Similar to CLICKHOUSE_EXTRA_XML_CONFIG, - # this will be read from a mount on /etc/clickhouse-server/users.d/ - # on startup - # The http settings revert back to the value from versions pre-23.6, - # when the default was changed from 1Mb to 128Kb - ( - "CLICKHOUSE_EXTRA_USERS_XML_CONFIG", - """ - - - 1048576 - 1048576 - - - """, - ), ( "CLICKHOUSE_URL", "{{CLICKHOUSE_HOST}}:{{CLICKHOUSE_INTERNAL_HTTP_PORT}}", diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0010_course_dictionaries.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0010_course_dictionaries.py index 35cf13bee..8ef2b387c 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0010_course_dictionaries.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0010_course_dictionaries.py @@ -40,9 +40,7 @@ def upgrade(): ) PRIMARY KEY course_key SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_overviews as ( select org, course_key, max(modified) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_overviews @@ -82,9 +80,7 @@ def upgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_blocks as ( select org, course_key, location, max(edited_on) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_blocks diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0014_add_course_names_fields.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0014_add_course_names_fields.py index 69e4dc773..85a97dafa 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0014_add_course_names_fields.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0014_add_course_names_fields.py @@ -40,9 +40,7 @@ def upgrade(): ) PRIMARY KEY course_key SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_overviews as ( select org, course_key, max(modified) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_overviews @@ -102,9 +100,7 @@ def downgrade(): ) PRIMARY KEY course_key SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_overviews as ( select org, course_key, max(modified) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_overviews diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0015_add_course_key_blocks.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0015_add_course_key_blocks.py index 5fae2c76a..84768699f 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0015_add_course_key_blocks.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0015_add_course_key_blocks.py @@ -43,9 +43,7 @@ def upgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_blocks as ( select org, course_key, location, max(edited_on) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_blocks @@ -93,9 +91,7 @@ def downgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_blocks as ( select org, course_key, location, max(edited_on) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_blocks diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0017_add_graded_course_block_names.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0017_add_graded_course_block_names.py index c20298d13..4e6657a3c 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0017_add_graded_course_block_names.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0017_add_graded_course_block_names.py @@ -43,9 +43,7 @@ def upgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query "with most_recent_blocks as ( select org, course_key, location, max(edited_on) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_blocks @@ -96,9 +94,7 @@ def downgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query 'with most_recent_blocks as ( select org, course_key, location, max(edited_on) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_blocks diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0023_extend_display_names.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0023_extend_display_names.py index 24a35eed8..ea18451c4 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0023_extend_display_names.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0023_extend_display_names.py @@ -124,9 +124,7 @@ def upgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query " select location, @@ -173,9 +171,7 @@ def downgrade(): ) PRIMARY KEY location SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query "with most_recent_blocks as ( select org, course_key, location, max(edited_on) as last_modified from {{ ASPECTS_EVENT_SINK_DATABASE }}.{{ ASPECTS_EVENT_SINK_NODES_TABLE }} diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0028_user_pii.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0028_user_pii.py index 25f4cdae1..d31019d3e 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0028_user_pii.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0028_user_pii.py @@ -56,9 +56,7 @@ def upgrade(): ) PRIMARY KEY (user_id, external_user_id) SOURCE(CLICKHOUSE( - user '{{ CLICKHOUSE_ADMIN_USER }}' - password '{{ CLICKHOUSE_ADMIN_PASSWORD }}' - db '{{ ASPECTS_EVENT_SINK_DATABASE }}' + name local_ch_event_sink query " with most_recent_user_profile as ( select diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_use_named_collections.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_use_named_collections.py new file mode 100644 index 000000000..47970cb14 --- /dev/null +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0029_use_named_collections.py @@ -0,0 +1,282 @@ +""" +Update all existing dictionaries to use named collections. + +In Jan 2024 we updated old Alembic migrations to use these, but any database +created before that will need these changes. This allowed us to get the +database credentials out of the migrations (and therefore not break everything +when changing the ClickHouse admin password). For databases created after 2024 +these should change nothing. +""" + +from alembic import op + + +revision = "0029" +down_revision = "0028" +branch_labels = None +depends_on = None +on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" +engine = "ReplicatedReplacingMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "ReplacingMergeTree" + + +def upgrade(): + # We include these drop statements here because "CREATE OR REPLACE DICTIONARY" + # currently throws a file rename error and you can't drop a dictionary with a + # table referring to it. + + ####################### + # user_pii_dict changes + op.execute( + f""" + DROP TABLE IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii + {on_cluster} + """ + ) + op.execute( + f""" + DROP DICTIONARY IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii_dict + {on_cluster} + """ + ) + op.execute( + f""" + CREATE DICTIONARY {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii_dict + {on_cluster} + ( + user_id Int32, + external_user_id UUID, + external_id_type String, + username String, + name String, + meta String, + courseware String, + language String, + location String, + year_of_birth String, + gender String, + level_of_education String, + mailing_address String, + city String, + country String, + state String, + goals String, + bio String, + profile_image_uploaded_at String, + phone_number String + ) + PRIMARY KEY (user_id, external_user_id) + SOURCE(CLICKHOUSE( + name local_ch_event_sink + query " + with most_recent_user_profile as ( + select + user_id, + name, + meta, + courseware, + language, + location, + year_of_birth, + gender, + level_of_education, + mailing_address, + city, + country, + state, + goals, + bio, + profile_image_uploaded_at, + phone_number, + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY (id, time_last_dumped) DESC) as rn + from {{ ASPECTS_EVENT_SINK_DATABASE }}.{{ ASPECTS_EVENT_SINK_USER_PROFILE_TABLE }} + ) + select + mrup.user_id as user_id, + external_user_id, + external_id_type, + username, + name, + meta, + courseware, + language, + location, + year_of_birth, + gender, + level_of_education, + mailing_address, + city, + country, + state, + goals, + bio, + profile_image_uploaded_at, + phone_number + FROM {{ ASPECTS_EVENT_SINK_DATABASE }}.{{ ASPECTS_EVENT_SINK_EXTERNAL_ID_TABLE }} ex + RIGHT OUTER JOIN most_recent_user_profile mrup ON + mrup.user_id = ex.user_id and ( + ex.external_id_type = 'xapi' OR + ex.external_id_type is NULL + ) + WHERE mrup.rn = 1 + " + )) + LAYOUT(COMPLEX_KEY_SPARSE_HASHED()) + LIFETIME({{ ASPECTS_PII_CACHE_LIFETIME }}) + """ + ) + op.execute( + f""" + CREATE TABLE IF NOT EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii + {on_cluster} + ( + user_id Int32, + external_user_id UUID, + external_id_type String, + username String, + name String, + meta String, + courseware String, + language String, + location String, + year_of_birth String, + gender String, + level_of_education String, + mailing_address String, + city String, + country String, + state String, + goals String, + bio String, + profile_image_uploaded_at String, + phone_number String + ) engine = Dictionary({{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii_dict); + """ + ) + + # course_names_dict changes + ########################### + op.execute( + f""" + DROP TABLE IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names + {on_cluster} + """ + ) + op.execute( + f""" + DROP DICTIONARY IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names_dict + {on_cluster} + """ + ) + op.execute( + f""" + CREATE DICTIONARY {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names_dict + {on_cluster} + ( + course_key String, + course_name String, + course_run String, + org String + ) + PRIMARY KEY course_key + SOURCE(CLICKHOUSE( + name local_ch_event_sink + query 'with most_recent_overviews as ( + select org, course_key, max(modified) as last_modified + from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_overviews + group by org, course_key + ) + select + course_key, + display_name, + splitByString(\\'+\\', course_key)[-1] as course_run, + org + from {{ ASPECTS_EVENT_SINK_DATABASE }}.course_overviews co + inner join most_recent_overviews mro on + co.org = mro.org and + co.course_key = mro.course_key and + co.modified = mro.last_modified + ' + )) + LAYOUT(COMPLEX_KEY_HASHED()) + LIFETIME(120); + """ + ) + op.execute( + f""" + CREATE TABLE {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names + {on_cluster} + ( + course_key String, + course_name String, + course_run String, + org String + ) engine = Dictionary({{ ASPECTS_EVENT_SINK_DATABASE }}.course_names_dict); + """ + ) + + # course_block_names_dict changes + ################################## + op.execute( + f""" + DROP TABLE IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.course_block_names + {on_cluster} + """ + ) + op.execute( + f""" + DROP DICTIONARY IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.course_block_names_dict + {on_cluster} + """ + ) + op.execute( + f""" + CREATE DICTIONARY {{ ASPECTS_EVENT_SINK_DATABASE }}.course_block_names_dict + {on_cluster} + ( + location String, + block_name String, + course_key String, + graded Bool, + display_name_with_location String + ) + PRIMARY KEY location + SOURCE(CLICKHOUSE( + name local_ch_event_sink + query " + select + location, + display_name, + course_key, + graded, + display_name_with_location + from {{ ASPECTS_EVENT_SINK_DATABASE }}.{{ ASPECTS_EVENT_SINK_RECENT_BLOCKS_TABLE }} + final + " + )) + LAYOUT(COMPLEX_KEY_SPARSE_HASHED()) + LIFETIME(120); + """ + ) + + op.execute( + f""" + CREATE OR REPLACE TABLE {{ ASPECTS_EVENT_SINK_DATABASE }}.course_block_names + {on_cluster} + ( + location String, + block_name String, + course_key String, + graded Bool, + display_name_with_location String + ) engine = Dictionary({{ ASPECTS_EVENT_SINK_DATABASE }}.course_block_names_dict) + ; + """ + ) + + +def downgrade(): + """ + We can't downgrade these without adding the credentials back, so right now + we do nothing. Earlier migrations will drop these as appropriate. + """ + pass diff --git a/tutoraspects/templates/aspects/apps/clickhouse/config/docker_config.xml b/tutoraspects/templates/aspects/apps/clickhouse/config/docker_config.xml index f7c78ffbf..5939d129e 100644 --- a/tutoraspects/templates/aspects/apps/clickhouse/config/docker_config.xml +++ b/tutoraspects/templates/aspects/apps/clickhouse/config/docker_config.xml @@ -1,3 +1,3 @@ - {{ CLICKHOUSE_EXTRA_XML_CONFIG }} + {{patch("clickhouse-server-config")}} diff --git a/tutoraspects/templates/aspects/apps/clickhouse/config/named_collections.xml b/tutoraspects/templates/aspects/apps/clickhouse/config/named_collections.xml new file mode 100644 index 000000000..a6a072135 --- /dev/null +++ b/tutoraspects/templates/aspects/apps/clickhouse/config/named_collections.xml @@ -0,0 +1,5 @@ + + + {{patch("clickhouse-named-collection-config")}} + + diff --git a/tutoraspects/templates/aspects/apps/clickhouse/users/user_config.xml b/tutoraspects/templates/aspects/apps/clickhouse/users/user_config.xml index 0eed7a38c..ab5975552 100644 --- a/tutoraspects/templates/aspects/apps/clickhouse/users/user_config.xml +++ b/tutoraspects/templates/aspects/apps/clickhouse/users/user_config.xml @@ -1,3 +1,3 @@ - {{ CLICKHOUSE_EXTRA_USERS_XML_CONFIG }} + {{patch("clickhouse-user-config")}} diff --git a/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh b/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh index fc58bf662..74fdee54d 100644 --- a/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh +++ b/tutoraspects/templates/aspects/jobs/init/clickhouse/init-clickhouse.sh @@ -61,7 +61,7 @@ GRANT {{ ON_CLUSTER }} SELECT ON system.events TO '{{ ASPECTS_CLICKHOUSE_REPORT_ GRANT {{ ON_CLUSTER }} SELECT ON system.metrics TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; GRANT {{ ON_CLUSTER }} SELECT ON system.replication_queue TO '{{ ASPECTS_CLICKHOUSE_REPORT_USER }}'; - +-- Patch from clickhouse-extra-sql follows... {{ patch("clickhouse-extra-sql") }} EOF