From cec477ffe9cabc5a965db68593c99f8cc0d2fae7 Mon Sep 17 00:00:00 2001 From: Sean Linsley Date: Sat, 26 Oct 2024 17:25:47 -0500 Subject: [PATCH] Improve performance for databases with many partitions --- .../clean_dump.rb | 46 ++++++++++--------- test/data/partitions.sql | 19 ++++++++ test/expectations/partitions.sql | 1 - 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/lib/activerecord-clean-db-structure/clean_dump.rb b/lib/activerecord-clean-db-structure/clean_dump.rb index 5796f83..d0d9950 100644 --- a/lib/activerecord-clean-db-structure/clean_dump.rb +++ b/lib/activerecord-clean-db-structure/clean_dump.rb @@ -8,6 +8,13 @@ def initialize(dump, options = {}) end def run + clean_partition_tables # Must be first because it makes assumptions about string format + clean + clean_inherited_tables + clean_options + end + + def clean # Remove trailing whitespace dump.gsub!(/[ \t]+$/, '') dump.gsub!(/\A\n/, '') @@ -57,8 +64,9 @@ def run dump.gsub!(/^-- .*_id_seq; Type: SEQUENCE.*/, '') dump.gsub!(/^-- Name: (\w+\s+)?\w+_pkey; Type: CONSTRAINT$/, '') end + end - # Remove inherited tables + def clean_inherited_tables inherited_tables_regexp = /-- Name: ([\w\.]+); Type: TABLE\n\n[^;]+?INHERITS \([\w\.]+\);/m inherited_tables = dump.scan(inherited_tables_regexp).map(&:first) dump.gsub!(inherited_tables_regexp, '') @@ -71,8 +79,9 @@ def run end dump.gsub!(index_regexp, '') end + end - # Remove partitioned tables + def clean_partition_tables partitioned_tables = [] # Postgres 12 pg_dump will output separate ATTACH PARTITION statements (even when run against an 11 or older server) @@ -83,30 +92,23 @@ def run partitioned_tables_regexp2 = /-- Name: ([\w\.]+); Type: TABLE\n\n[^;]+?PARTITION OF [\w\.]+\n[^;]+?;/m partitioned_tables += dump.scan(partitioned_tables_regexp2).map(&:first) - partitioned_tables.each do |partitioned_table| - _partitioned_schema_name, partitioned_table_name_only = partitioned_table.split('.', 2) - dump.gsub!(/-- Name: #{partitioned_table_name_only}; Type: TABLE ATTACH/, '') - dump.gsub!(/-- Name: #{partitioned_table_name_only}; Type: TABLE/, '') - dump.gsub!(/CREATE TABLE #{partitioned_table} \([^;]+;/m, '') - dump.gsub!(/ALTER TABLE ONLY ([\w_\.]+) ATTACH PARTITION #{partitioned_table}[^;]+;/m, '') - - dump.gsub!(/ALTER TABLE ONLY ([\w_]+\.)?#{partitioned_table}[^;]+;/, '') - dump.gsub!(/-- Name: #{partitioned_table} [^;]+; Type: DEFAULT/, '') - - index_regexp = /CREATE (UNIQUE )?INDEX ([\w_]+) ON ([\w_]+\.)?#{partitioned_table}[^;]+;/m - dump.scan(index_regexp).each do |m| - partitioned_table_index = m[1] - dump.gsub!("-- Name: #{partitioned_table_index}; Type: INDEX ATTACH", '') - dump.gsub!("-- Name: #{partitioned_table_index}; Type: INDEX", '') - dump.gsub!(/ALTER INDEX ([\w_\.]+) ATTACH PARTITION ([\w_]+\.)?#{partitioned_table_index};/, '') - end - dump.gsub!(index_regexp, '') - dump.gsub!(/-- Name: #{partitioned_table_name_only}_pkey; Type: INDEX ATTACH/, '') - dump.gsub!(/ALTER INDEX ([\w_]+\.)?[\w_]+_pkey ATTACH PARTITION #{partitioned_table}_pkey;/, '') + # We assume that a comment + schema statement pair has 3 trailing newlines. + # This makes it easier to drop both the comment and statement at once. + statements = dump.split("\n\n\n") + names = [] + partitioned_tables.each { |table| names << table.split('.', 2)[1] } + if names.any? + dump.scan(/CREATE (UNIQUE )?INDEX ([\w_]+) ON ([\w_]+\.)?(#{names.join('|')})[^;]+;/m).each { |m| names << m[1] } end + statements.reject! { |stmt| names.any? { |name| stmt.include?(name) } } + @dump = statements.join("\n\n") + @dump << "\n" if @dump[-1] != "\n" + # This is mostly done to allow restoring Postgres 11 output on Postgres 10 dump.gsub!(/CREATE INDEX ([\w]+) ON ONLY/, 'CREATE INDEX \\1 ON') + end + def clean_options if options[:order_schema_migrations_values] == true schema_migrations_cleanup else diff --git a/test/data/partitions.sql b/test/data/partitions.sql index 3992701..d7fdb3d 100644 --- a/test/data/partitions.sql +++ b/test/data/partitions.sql @@ -10,18 +10,21 @@ CREATE TABLE public.autovacuum_run_stats_35d ( ) PARTITION BY RANGE (occurred_at); + -- -- Name: index_autovacuum_run_stats_35d_on_schema_table_id_occurred_at; Type: INDEX -- CREATE INDEX index_autovacuum_run_stats_35d_on_schema_table_id_occurred_at ON public.autovacuum_run_stats_35d USING btree (schema_table_id, occurred_at); + -- -- Name: index_autovacuum_run_stats_35d_on_server_id_and_occurred_at; Type: INDEX -- CREATE INDEX index_autovacuum_run_stats_35d_on_server_id_and_occurred_at ON public.autovacuum_run_stats_35d USING btree (server_id, occurred_at); + -- -- Name: autovacuum_run_stats_35d_20241026; Type: TABLE; Schema: public; Owner: - -- @@ -33,12 +36,14 @@ CREATE TABLE public.autovacuum_run_stats_35d_20241026 ( occurred_at timestamp with time zone NOT NULL ); + -- -- Name: autovacuum_run_stats_35d_20241026; Type: TABLE ATTACH; Schema: public; Owner: - -- ALTER TABLE ONLY public.autovacuum_run_stats_35d ATTACH PARTITION public.autovacuum_run_stats_35d_20241026 FOR VALUES FROM ('2024-10-25 19:00:00-05') TO ('2024-10-26 19:00:00-05'); + -- -- Name: autovacuum_run_stats_35d_20241026 autovacuum_run_stats_35d_20241026_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -46,24 +51,28 @@ ALTER TABLE ONLY public.autovacuum_run_stats_35d ATTACH PARTITION public.autovac ALTER TABLE ONLY public.autovacuum_run_stats_35d_20241026 ADD CONSTRAINT autovacuum_run_stats_35d_20241026_pkey PRIMARY KEY (autovacuum_run_stats_id); + -- -- Name: autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx; Type: INDEX; Schema: public; Owner: - -- CREATE INDEX autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx ON public.autovacuum_run_stats_35d_20241026 USING btree (server_id, occurred_at); + -- -- Name: autovacuum_run_stats_35d_2024_schema_table_id_occurred_at_idx25; Type: INDEX; Schema: public; Owner: - -- CREATE INDEX autovacuum_run_stats_35d_2024_schema_table_id_occurred_at_idx25 ON public.autovacuum_run_stats_35d_20241026 USING btree (schema_table_id, occurred_at); + -- -- Name: autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx; Type: INDEX ATTACH; Schema: public; Owner: - -- ALTER INDEX public.index_autovacuum_run_stats_35d_on_server_id_and_occurred_at ATTACH PARTITION public.autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx; + -- -- Name: schema_table_infos_35d; Type: TABLE; Schema: public; Owner: - -- @@ -76,9 +85,14 @@ CREATE TABLE public.schema_table_infos_35d ( PARTITION BY RANGE (collected_at); +-- +-- Name: schema_table_infos_35d schema_table_infos_35d_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + ALTER TABLE ONLY public.schema_table_infos_35d ADD CONSTRAINT schema_table_infos_35d_pkey PRIMARY KEY (schema_table_id, collected_at); + -- -- Name: schema_table_infos_35d_20240920; Type: TABLE; Schema: public; Owner: - -- @@ -89,12 +103,14 @@ CREATE TABLE public.schema_table_infos_35d_20240920 ( server_id uuid NOT NULL ); + -- -- Name: schema_table_infos_35d_20240920; Type: TABLE ATTACH; Schema: public; Owner: - -- ALTER TABLE ONLY public.schema_table_infos_35d ATTACH PARTITION public.schema_table_infos_35d_20240920 FOR VALUES FROM ('2024-09-19 19:00:00-05') TO ('2024-09-20 19:00:00-05'); + -- -- Name: schema_table_infos_35d_20240920 schema_table_infos_35d_20240920_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -102,18 +118,21 @@ ALTER TABLE ONLY public.schema_table_infos_35d ATTACH PARTITION public.schema_ta ALTER TABLE ONLY public.schema_table_infos_35d_20240920 ADD CONSTRAINT schema_table_infos_35d_20240920_pkey PRIMARY KEY (schema_table_id, collected_at); + -- -- Name: schema_table_infos_35d_20240920_server_id_idx; Type: INDEX; Schema: public; Owner: - -- CREATE INDEX schema_table_infos_35d_20240920_server_id_idx ON public.schema_table_infos_35d_20240920 USING btree (server_id); + -- -- Name: schema_table_infos_35d_2024092_schema_table_id_collected_at_idx; Type: INDEX; Schema: public; Owner: - -- CREATE INDEX schema_table_infos_35d_2024092_schema_table_id_collected_at_idx ON public.schema_table_infos_35d_20240920 USING btree (schema_table_id, collected_at DESC); + -- -- Name: schema_table_infos_35d_20240920_pkey; Type: INDEX ATTACH; Schema: public; Owner: - -- diff --git a/test/expectations/partitions.sql b/test/expectations/partitions.sql index 04ad7e4..bc222f5 100644 --- a/test/expectations/partitions.sql +++ b/test/expectations/partitions.sql @@ -28,4 +28,3 @@ PARTITION BY RANGE (collected_at); ALTER TABLE ONLY public.schema_table_infos_35d ADD CONSTRAINT schema_table_infos_35d_pkey PRIMARY KEY (schema_table_id, collected_at); -