Skip to content

Commit

Permalink
Cleanup and optimize DB schema
Browse files Browse the repository at this point in the history
The database schema was lacking many integrity checks and indexes.
Correct it by first applying a migration that removes all old/stale
data, then creating those indexes.

The driving reason for this is the very slow performance of processing
(specially aggregation) on the new mezuro.org servers. It will hopefully
remove (or at least heavily improve) the superlinear slowdown when the
number of metrics rises, as observed in #207.
  • Loading branch information
danielkza committed Jul 22, 2016
1 parent ae8a0dd commit e2b61ac
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ KalibroProcessor is the processing web service for Mezuro.

== Unreleased

* Optimize database structure by adding foreign keys and indexes where needed
* Fix possible inconsistency in module result creation (lack of transaction)
* Improve acceptance tests by fixing small bugs and adding processing times
* Introduce performance tests for Aggregator
Expand Down
73 changes: 73 additions & 0 deletions db/migrate/20160720185407_clean_inconsistencies.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
class CleanInconsistencies < ActiveRecord::Migration
def self.up
# Unset project reference for repositories with non-existing projects
execute <<-SQL
UPDATE repositories AS r
SET project_id = NULL
WHERE project_id = 0 OR NOT EXISTS (
SELECT 1 FROM projects AS p WHERE p.id = r.project_id
)
SQL

# Delete processings with non-existing repositories
execute <<-SQL
DELETE FROM processings AS p
WHERE NOT EXISTS(
SELECT 1 FROM repositories AS r WHERE r.id = p.repository_id
)
SQL

# Delete process times with non-existing processings
execute <<-SQL
DELETE FROM process_times AS t
WHERE NOT EXISTS (
SELECT 1 FROM processings AS p WHERE p.id = t.processing_id
)
SQL

# Delete module results with non-existing processings
execute <<-SQL
DELETE FROM module_results AS m
WHERE NOT EXISTS (
SELECT 1 FROM processings AS p WHERE p.id = m.processing_id
)
SQL

# Delete kalibro modules with non-existing module results
execute <<-SQL
DELETE FROM kalibro_modules AS k
WHERE NOT EXISTS (
SELECT 1 FROM module_results AS m WHERE m.id = k.module_result_id
)
SQL

# Delete metric results with non-existing module results
execute <<-SQL
DELETE FROM metric_results AS met
WHERE NOT EXISTS (
SELECT 1 FROM module_results AS mod WHERE mod.id = met.module_result_id
)
SQL

# Delete duplicate metric_results. Group them by (module_result, metric_configuration),
# then delete all but the one with the highest ID
# The double wrapping on the inner query is necessary because window functions
# cannot be used in WHERE in PostgreSQL.
execute <<-SQL
DELETE FROM metric_results
WHERE id IN (
SELECT t.id FROM (
SELECT id, ROW_NUMBER() OVER (PARTITION BY module_result_id, metric_configuration_id, "type"
ORDER BY id DESC) AS rnum
FROM metric_results
WHERE "type" = 'TreeMetricResult'
) AS t
WHERE t.rnum > 1
)
SQL
end

def self.down
raise ActiveRecord::IrreversibleMigration
end
end
6 changes: 6 additions & 0 deletions db/migrate/20160720185408_add_indexes_to_kalibro_modules.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddIndexesToKalibroModules < ActiveRecord::Migration
def change
add_foreign_key :kalibro_modules, :module_results, on_delete: :cascade
add_index :kalibro_modules, [:long_name, :granularity]
end
end
6 changes: 6 additions & 0 deletions db/migrate/20160720185409_add_indexes_to_module_results.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddIndexesToModuleResults < ActiveRecord::Migration
def change
add_foreign_key :module_results, :module_results, column: 'parent_id'
add_foreign_key :module_results, :processings, on_delete: :cascade
end
end
11 changes: 11 additions & 0 deletions db/migrate/20160720185410_add_indexes_to_metric_results.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class AddIndexesToMetricResults < ActiveRecord::Migration
def change
add_foreign_key :metric_results, :module_results, on_delete: :cascade
add_index :metric_results, :type
add_index :metric_results, :module_result_id
add_index :metric_results, :metric_configuration_id
add_index :metric_results, [:module_result_id, :metric_configuration_id],
unique: true, where: "type = 'TreeMetricResult'",
name: 'metric_results_module_res_metric_cfg_uniq_idx'
end
end
6 changes: 6 additions & 0 deletions db/migrate/20160720185411_add_indexes_to_processings.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddIndexesToProcessings < ActiveRecord::Migration
def change
add_foreign_key :processings, :repositories
add_foreign_key :processings, :module_results, column: 'root_module_result_id'
end
end
5 changes: 5 additions & 0 deletions db/migrate/20160720185412_add_indexes_to_process_times.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddIndexesToProcessTimes < ActiveRecord::Migration
def change
add_foreign_key :process_times, :processings, on_delete: :cascade
end
end
5 changes: 5 additions & 0 deletions db/migrate/20160720185413_add_indexes_to_repositories.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddIndexesToRepositories < ActiveRecord::Migration
def change
add_foreign_key :repositories, :projects
end
end
16 changes: 15 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 20151002172231) do
ActiveRecord::Schema.define(version: 20160720185413) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand Down Expand Up @@ -40,6 +40,8 @@
t.integer "module_result_id"
end

add_index "kalibro_modules", ["long_name", "granularity"], name: "index_kalibro_modules_on_long_name_and_granularity", using: :btree

create_table "metric_results", force: :cascade do |t|
t.integer "module_result_id"
t.integer "metric_configuration_id"
Expand All @@ -52,7 +54,11 @@
t.integer "related_hotspot_metric_results_id"
end

add_index "metric_results", ["metric_configuration_id"], name: "index_metric_results_on_metric_configuration_id", using: :btree
add_index "metric_results", ["module_result_id", "metric_configuration_id"], name: "metric_results_module_res_metric_cfg_uniq_idx", unique: true, where: "((type)::text = 'TreeMetricResult'::text)", using: :btree
add_index "metric_results", ["module_result_id"], name: "index_metric_results_on_module_result_id", using: :btree
add_index "metric_results", ["related_hotspot_metric_results_id"], name: "index_metric_results_on_related_hotspot_metric_results_id", using: :btree
add_index "metric_results", ["type"], name: "index_metric_results_on_type", using: :btree

create_table "module_results", force: :cascade do |t|
t.float "grade"
Expand Down Expand Up @@ -106,5 +112,13 @@
t.string "branch", default: "master", null: false
end

add_foreign_key "kalibro_modules", "module_results", on_delete: :cascade
add_foreign_key "metric_results", "module_results", on_delete: :cascade
add_foreign_key "metric_results", "related_hotspot_metric_results", column: "related_hotspot_metric_results_id"
add_foreign_key "module_results", "module_results", column: "parent_id"
add_foreign_key "module_results", "processings", on_delete: :cascade
add_foreign_key "process_times", "processings", on_delete: :cascade
add_foreign_key "processings", "module_results", column: "root_module_result_id"
add_foreign_key "processings", "repositories"
add_foreign_key "repositories", "projects"
end

0 comments on commit e2b61ac

Please sign in to comment.