Skip to content

Commit

Permalink
adds migration for path index unique on harvest items + removing exis…
Browse files Browse the repository at this point in the history
…ting duplicates

adds migration test
fixes cascading factory bot in harvest item request spec
  • Loading branch information
andrew-1234 committed Feb 12, 2025
1 parent cd593a4 commit 3113a32
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 16 deletions.
2 changes: 1 addition & 1 deletion app/models/harvest_item.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class HarvestItem < ApplicationRecord
# If the file has fixable mistakes they can be changed by the user here
# (e.g. missing utc offset / site_id for a folder)
STATUS_METADATA_GATHERED = :metadata_gathered
# the file is not valid for some reason we now about (missing utc offset which the user didn't fix)
# the file is not valid for some reason we know about (missing utc offset which the user didn't fix)
STATUS_FAILED = :failed
# successfully harvested the file, there will be an audio_recording that is available now
STATUS_COMPLETED = :completed
Expand Down
36 changes: 36 additions & 0 deletions db/migrate/20250211012005_add_path_unique_to_harvest_items.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# frozen_string_literal: true

# Remove duplicate paths from harvest_items and add unique constraint to path
class AddPathUniqueToHarvestItems < ActiveRecord::Migration[7.2]
# def change
# reversible do |change|
# change.up do
# execute <<~SQL.squish
# DELETE FROM harvest_items
# WHERE id IN (
# SELECT id
# FROM (
# SELECT id,
# ROW_NUMBER() OVER (PARTITION BY path ORDER BY created_at DESC) AS rn
# FROM harvest_items
# ) AS ranked
# WHERE ranked.rn > 1
# );
# SQL

# remove_index :harvest_items, :path
# add_index :harvest_items, :path, unique: true
# end

# change.down do
# remove_index :harvest_items, :path
# add_index :harvest_items, :path

# Rails.logger.warn(
# 'This migration is backwards compatible but does not support reverse migration.
# `Path` unique constraint reverted but no data has been changed.'
# )
# end
# end
# end
end
5 changes: 3 additions & 2 deletions db/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,8 @@ CREATE TABLE public.audio_events (
channel integer,
provenance_id integer,
score numeric,
import_file_index integer,
audio_event_import_file_id bigint
audio_event_import_file_id integer,
import_file_index integer
);


Expand Down Expand Up @@ -4350,6 +4350,7 @@ ALTER TABLE ONLY public.tags
SET search_path TO "$user", public;

INSERT INTO "schema_migrations" (version) VALUES
('20250211012005'),
('20250120064731'),
('20250113012304'),
('20241106015941'),
Expand Down
2 changes: 2 additions & 0 deletions spec/factories/audio_recording_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@
status { 'ready' }

creator

uploader do
# admin user
User.find_by(roles_mask: 1)
end

site

trait :status_new do
Expand Down
180 changes: 180 additions & 0 deletions spec/migrations/add_path_unique_to_harvest_items_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# frozen_string_literal: true

# matches against the migration name
# require_migration!

describe 'AddPathUniqueToHarvestItem' do
let(:user) do
{
id: 1,
user_name: 'user1',
email: '[email protected]',
encrypted_password: 'drowssap'
}
end

let(:project) do
{
id: 1,
name: 'project1',
creator_id: 1
}
end

let(:site) do
{
id: 1,
name: 'site1',
creator_id: 1
}
end

let(:audio_recording) do
{
id: 1,
data_length_bytes: 1000,
duration_seconds: 100.0,
file_hash: 'hash1',
media_type: 'flac',
recorded_date: '2025-02-11 09:21:10.782309000 +0200',
uuid: 'uuid1',
creator_id: 1,
site_id: 1,
uploader_id: 1
}
end

let(:harvest) do
{
id: 1,
project_id: 1,
created_at: '2025-02-12 12:21:10+1000',
updated_at: '2025-02-12 12:21:10+1000'
}
end

before do
stub_const('USER', user)
stub_const('PROJECT', project)
stub_const('SITE', site)
stub_const('AUDIO_RECORDING', audio_recording)
stub_const('HARVEST', harvest)

ActiveRecord::Base.connection.execute(
<<~SQL.squish
BEGIN;
DELETE FROM users;
DELETE FROM projects;
DELETE FROM sites;
DELETE FROM audio_recordings;
DELETE FROM harvests;
DELETE FROM harvest_items;
INSERT INTO users (id, user_name, email, encrypted_password)
VALUES (
#{USER[:id]},
#{ActiveRecord::Base.connection.quote(USER[:user_name])},
#{ActiveRecord::Base.connection.quote(USER[:email])},
#{ActiveRecord::Base.connection.quote(USER[:encrypted_password])});
INSERT INTO projects (id, name, creator_id)
VALUES (
#{PROJECT[:id]},
#{ActiveRecord::Base.connection.quote(PROJECT[:name])},
#{PROJECT[:creator_id]});
INSERT INTO sites (id, name, creator_id)
VALUES (
#{SITE[:id]},
#{ActiveRecord::Base.connection.quote(SITE[:name])},
#{SITE[:creator_id]});
INSERT INTO audio_recordings (
id, data_length_bytes, duration_seconds, file_hash,
media_type, recorded_date, uuid, creator_id,
site_id, uploader_id
)
VALUES (
#{AUDIO_RECORDING[:id]},
#{AUDIO_RECORDING[:data_length_bytes]},
#{AUDIO_RECORDING[:duration_seconds]},
#{ActiveRecord::Base.connection.quote(AUDIO_RECORDING[:file_hash])},
#{ActiveRecord::Base.connection.quote(AUDIO_RECORDING[:media_type])},
#{ActiveRecord::Base.connection.quote(AUDIO_RECORDING[:recorded_date])},
#{ActiveRecord::Base.connection.quote(AUDIO_RECORDING[:uuid])},
#{AUDIO_RECORDING[:creator_id]},
#{AUDIO_RECORDING[:site_id]},
#{AUDIO_RECORDING[:uploader_id]}
);
INSERT INTO harvests (id, project_id, created_at, updated_at)
VALUES (
#{HARVEST[:id]},
#{HARVEST[:project_id]},
#{ActiveRecord::Base.connection.quote(HARVEST[:created_at])},
#{ActiveRecord::Base.connection.quote(HARVEST[:updated_at])});
COMMIT;
SQL
)

stub_const('HARVEST_ITEMS', create_harvest_items)

harvest_values = ''
HARVEST_ITEMS.each.with_index(1) do |example, index|
delimit = index == HARVEST_ITEMS.length ? '' : ",\n"
harvest_values += "(#{example})#{delimit}"
end

ActiveRecord::Base.connection.execute(
<<~SQL.squish
INSERT INTO harvest_items (id, path, status, info, audio_recording_id, uploader_id, harvest_id, deleted, created_at, updated_at)
VALUES #{harvest_values};
SQL
)
end

it 'has an irrelevant test' do
hi = create(:harvest_item)

expect(hi).to be_valid
end

# Create duplicate harvest items for combinations of status and path
# @param entries_per_combination [Integer]
# @param blob [Hash] the jsonb data to be stored in the info column
# @return [Array<String>] values for each harvest item
def create_harvest_items(entries_per_combination: 3, blob: { error: 'null' })
statuses = [
"'metadata_gathered'",
"'failed'",
"'failed'"
]
paths = [
"'harvest_1/path/one'",
"'harvest_1/path/two'",
"'harvest_1/path/three'"
]

json_data = ActiveRecord::Base.connection.quote(blob.to_json)
datetime = ActiveRecord::Base.sanitize_sql_array(["'#{Time.zone.now}'"])

cases = statuses.zip(paths).flat_map { |status, path|
Array.new(entries_per_combination) {
# path, status, info, audio_recording_id, uploader_id, harvest_id, deleted, created_at, updated_at
[path, status, json_data, 'NULL', 1, 1, false, datetime, datetime]
}
}

# append a status 'completed' harvest as a duplicate
cases << ["'harvest_1/path/three'", "'completed'", json_data, 1, 1, 1, true, datetime, datetime]

# prepend a value for 'id' to each sub-array (representing a harvest 'item')
# collapse each 'item', into a single string
cases.map!.with_index(1) { |item, index|
item.unshift(index)
item.join(', ')
}.freeze
end
end
41 changes: 28 additions & 13 deletions spec/requests/harvest_items_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,24 @@
prepare_harvest

before do
@hi1 = create_with_validations(fixable: 3, not_fixable: 1)
@hi2 = create_with_validations(fixable: 0, not_fixable: 1)
@hi3 = create_with_validations(fixable: 1, not_fixable: 0)
@hi4 = create_with_validations(fixable: 0, not_fixable: 1, sub_directories: 'a/b/c')
@hi5 = create_with_validations(fixable: 0, not_fixable: 0, sub_directories: 'a/b/c')
@hi6 = create_with_validations(fixable: 0, not_fixable: 0, sub_directories: 'a/b/d')
@hi7 = create_with_validations(fixable: 1, not_fixable: 0, sub_directories: 'a/b')
@hi8 = create_with_validations(fixable: 1, not_fixable: 0, sub_directories: 'a/b')
@hi9 = create_with_validations(fixable: 1, not_fixable: 0, sub_directories: 'z/b')
@hi1 = create_with_validations(fixable: 3, not_fixable: 1,
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi2 = create_with_validations(fixable: 0, not_fixable: 1,
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi3 = create_with_validations(fixable: 1, not_fixable: 0,
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi4 = create_with_validations(fixable: 0, not_fixable: 1, sub_directories: 'a/b/c',
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi5 = create_with_validations(fixable: 0, not_fixable: 0, sub_directories: 'a/b/c',
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi6 = create_with_validations(fixable: 0, not_fixable: 0, sub_directories: 'a/b/d',
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi7 = create_with_validations(fixable: 1, not_fixable: 0, sub_directories: 'a/b',
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi8 = create_with_validations(fixable: 1, not_fixable: 0, sub_directories: 'a/b',
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
@hi9 = create_with_validations(fixable: 1, not_fixable: 0, sub_directories: 'z/b',
audio: create(:audio_recording, site: site, creator: owner_user, uploader: admin_user))
end

it 'can query for harvest items (root path)' do
Expand Down Expand Up @@ -140,7 +149,7 @@

expect(results.headers).to eq ['id', 'harvest_id', 'path', 'status', 'audio_recording_id']
rows = results.map(&:to_h)
HarvestItem.all.each { |item|
HarvestItem.find_each { |item|
expect(rows).to include(a_hash_including(
'id' => item.id.to_s,
'harvest_id' => item.harvest_id.to_s,
Expand Down Expand Up @@ -247,7 +256,7 @@
end
end

def create_with_validations(fixable: 0, not_fixable: 0, sub_directories: nil)
def create_with_validations(fixable: 0, not_fixable: 0, sub_directories: nil, audio: nil)
validations = []
fixable.times do
validations << BawWorkers::Jobs::Harvest::ValidationResult.new(
Expand All @@ -270,7 +279,13 @@ def create_with_validations(fixable: 0, not_fixable: 0, sub_directories: nil)

path = generate_recording_name(Time.zone.now)
path = File.join(*[harvest.upload_directory_name, sub_directories, path].compact)

create(:harvest_item, path:, status: HarvestItem::STATUS_METADATA_GATHERED, info:, harvest:)
create(
:harvest_item,
path:,
status: HarvestItem::STATUS_METADATA_GATHERED,
info:, harvest:,
uploader: owner_user,
audio_recording: audio
)
end
end

0 comments on commit 3113a32

Please sign in to comment.