Skip to content

Commit

Permalink
Improve performance for databases with many partitions
Browse files Browse the repository at this point in the history
  • Loading branch information
seanlinsley committed Oct 26, 2024
1 parent 806292c commit cec477f
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 23 deletions.
46 changes: 24 additions & 22 deletions lib/activerecord-clean-db-structure/clean_dump.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ def initialize(dump, options = {})
end

def run
clean_partition_tables # Must be first because it makes assumptions about string format
clean
clean_inherited_tables
clean_options
end

def clean
# Remove trailing whitespace
dump.gsub!(/[ \t]+$/, '')
dump.gsub!(/\A\n/, '')
Expand Down Expand Up @@ -57,8 +64,9 @@ def run
dump.gsub!(/^-- .*_id_seq; Type: SEQUENCE.*/, '')
dump.gsub!(/^-- Name: (\w+\s+)?\w+_pkey; Type: CONSTRAINT$/, '')
end
end

# Remove inherited tables
def clean_inherited_tables
inherited_tables_regexp = /-- Name: ([\w\.]+); Type: TABLE\n\n[^;]+?INHERITS \([\w\.]+\);/m
inherited_tables = dump.scan(inherited_tables_regexp).map(&:first)
dump.gsub!(inherited_tables_regexp, '')
Expand All @@ -71,8 +79,9 @@ def run
end
dump.gsub!(index_regexp, '')
end
end

# Remove partitioned tables
def clean_partition_tables
partitioned_tables = []

# Postgres 12 pg_dump will output separate ATTACH PARTITION statements (even when run against an 11 or older server)
Expand All @@ -83,30 +92,23 @@ def run
partitioned_tables_regexp2 = /-- Name: ([\w\.]+); Type: TABLE\n\n[^;]+?PARTITION OF [\w\.]+\n[^;]+?;/m
partitioned_tables += dump.scan(partitioned_tables_regexp2).map(&:first)

partitioned_tables.each do |partitioned_table|
_partitioned_schema_name, partitioned_table_name_only = partitioned_table.split('.', 2)
dump.gsub!(/-- Name: #{partitioned_table_name_only}; Type: TABLE ATTACH/, '')
dump.gsub!(/-- Name: #{partitioned_table_name_only}; Type: TABLE/, '')
dump.gsub!(/CREATE TABLE #{partitioned_table} \([^;]+;/m, '')
dump.gsub!(/ALTER TABLE ONLY ([\w_\.]+) ATTACH PARTITION #{partitioned_table}[^;]+;/m, '')

dump.gsub!(/ALTER TABLE ONLY ([\w_]+\.)?#{partitioned_table}[^;]+;/, '')
dump.gsub!(/-- Name: #{partitioned_table} [^;]+; Type: DEFAULT/, '')

index_regexp = /CREATE (UNIQUE )?INDEX ([\w_]+) ON ([\w_]+\.)?#{partitioned_table}[^;]+;/m
dump.scan(index_regexp).each do |m|
partitioned_table_index = m[1]
dump.gsub!("-- Name: #{partitioned_table_index}; Type: INDEX ATTACH", '')
dump.gsub!("-- Name: #{partitioned_table_index}; Type: INDEX", '')
dump.gsub!(/ALTER INDEX ([\w_\.]+) ATTACH PARTITION ([\w_]+\.)?#{partitioned_table_index};/, '')
end
dump.gsub!(index_regexp, '')
dump.gsub!(/-- Name: #{partitioned_table_name_only}_pkey; Type: INDEX ATTACH/, '')
dump.gsub!(/ALTER INDEX ([\w_]+\.)?[\w_]+_pkey ATTACH PARTITION #{partitioned_table}_pkey;/, '')
# We assume that a comment + schema statement pair has 3 trailing newlines.
# This makes it easier to drop both the comment and statement at once.
statements = dump.split("\n\n\n")
names = []
partitioned_tables.each { |table| names << table.split('.', 2)[1] }
if names.any?
dump.scan(/CREATE (UNIQUE )?INDEX ([\w_]+) ON ([\w_]+\.)?(#{names.join('|')})[^;]+;/m).each { |m| names << m[1] }
end
statements.reject! { |stmt| names.any? { |name| stmt.include?(name) } }
@dump = statements.join("\n\n")
@dump << "\n" if @dump[-1] != "\n"

# This is mostly done to allow restoring Postgres 11 output on Postgres 10
dump.gsub!(/CREATE INDEX ([\w]+) ON ONLY/, 'CREATE INDEX \\1 ON')
end

def clean_options
if options[:order_schema_migrations_values] == true
schema_migrations_cleanup
else
Expand Down
19 changes: 19 additions & 0 deletions test/data/partitions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ CREATE TABLE public.autovacuum_run_stats_35d (
)
PARTITION BY RANGE (occurred_at);


--
-- Name: index_autovacuum_run_stats_35d_on_schema_table_id_occurred_at; Type: INDEX
--

CREATE INDEX index_autovacuum_run_stats_35d_on_schema_table_id_occurred_at ON public.autovacuum_run_stats_35d USING btree (schema_table_id, occurred_at);


--
-- Name: index_autovacuum_run_stats_35d_on_server_id_and_occurred_at; Type: INDEX
--

CREATE INDEX index_autovacuum_run_stats_35d_on_server_id_and_occurred_at ON public.autovacuum_run_stats_35d USING btree (server_id, occurred_at);


--
-- Name: autovacuum_run_stats_35d_20241026; Type: TABLE; Schema: public; Owner: -
--
Expand All @@ -33,37 +36,43 @@ CREATE TABLE public.autovacuum_run_stats_35d_20241026 (
occurred_at timestamp with time zone NOT NULL
);


--
-- Name: autovacuum_run_stats_35d_20241026; Type: TABLE ATTACH; Schema: public; Owner: -
--

ALTER TABLE ONLY public.autovacuum_run_stats_35d ATTACH PARTITION public.autovacuum_run_stats_35d_20241026 FOR VALUES FROM ('2024-10-25 19:00:00-05') TO ('2024-10-26 19:00:00-05');


--
-- Name: autovacuum_run_stats_35d_20241026 autovacuum_run_stats_35d_20241026_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.autovacuum_run_stats_35d_20241026
ADD CONSTRAINT autovacuum_run_stats_35d_20241026_pkey PRIMARY KEY (autovacuum_run_stats_id);


--
-- Name: autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx ON public.autovacuum_run_stats_35d_20241026 USING btree (server_id, occurred_at);


--
-- Name: autovacuum_run_stats_35d_2024_schema_table_id_occurred_at_idx25; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX autovacuum_run_stats_35d_2024_schema_table_id_occurred_at_idx25 ON public.autovacuum_run_stats_35d_20241026 USING btree (schema_table_id, occurred_at);


--
-- Name: autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx; Type: INDEX ATTACH; Schema: public; Owner: -
--

ALTER INDEX public.index_autovacuum_run_stats_35d_on_server_id_and_occurred_at ATTACH PARTITION public.autovacuum_run_stats_35d_20241026_server_id_occurred_at_idx;


--
-- Name: schema_table_infos_35d; Type: TABLE; Schema: public; Owner: -
--
Expand All @@ -76,9 +85,14 @@ CREATE TABLE public.schema_table_infos_35d (
PARTITION BY RANGE (collected_at);


--
-- Name: schema_table_infos_35d schema_table_infos_35d_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.schema_table_infos_35d
ADD CONSTRAINT schema_table_infos_35d_pkey PRIMARY KEY (schema_table_id, collected_at);


--
-- Name: schema_table_infos_35d_20240920; Type: TABLE; Schema: public; Owner: -
--
Expand All @@ -89,31 +103,36 @@ CREATE TABLE public.schema_table_infos_35d_20240920 (
server_id uuid NOT NULL
);


--
-- Name: schema_table_infos_35d_20240920; Type: TABLE ATTACH; Schema: public; Owner: -
--

ALTER TABLE ONLY public.schema_table_infos_35d ATTACH PARTITION public.schema_table_infos_35d_20240920 FOR VALUES FROM ('2024-09-19 19:00:00-05') TO ('2024-09-20 19:00:00-05');


--
-- Name: schema_table_infos_35d_20240920 schema_table_infos_35d_20240920_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.schema_table_infos_35d_20240920
ADD CONSTRAINT schema_table_infos_35d_20240920_pkey PRIMARY KEY (schema_table_id, collected_at);


--
-- Name: schema_table_infos_35d_20240920_server_id_idx; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX schema_table_infos_35d_20240920_server_id_idx ON public.schema_table_infos_35d_20240920 USING btree (server_id);


--
-- Name: schema_table_infos_35d_2024092_schema_table_id_collected_at_idx; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX schema_table_infos_35d_2024092_schema_table_id_collected_at_idx ON public.schema_table_infos_35d_20240920 USING btree (schema_table_id, collected_at DESC);


--
-- Name: schema_table_infos_35d_20240920_pkey; Type: INDEX ATTACH; Schema: public; Owner: -
--
Expand Down
1 change: 0 additions & 1 deletion test/expectations/partitions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,3 @@ PARTITION BY RANGE (collected_at);

ALTER TABLE ONLY public.schema_table_infos_35d
ADD CONSTRAINT schema_table_infos_35d_pkey PRIMARY KEY (schema_table_id, collected_at);

0 comments on commit cec477f

Please sign in to comment.