Skip to content

Commit

Permalink
Seed schools database with a sample of 1000 schools
Browse files Browse the repository at this point in the history
  • Loading branch information
martyn-w committed Nov 7, 2024
1 parent 554c716 commit 445ed83
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 1 deletion.
18 changes: 18 additions & 0 deletions app/models/bookings/data/gias_data_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ class GiasDataFile
EXPECTED_HEADER = '"URN","LA (code)","LA (name)","EstablishmentNumber","EstablishmentName"'.freeze
EXPECTED_FIRST_ROW = %r(\A\d{3}\d+,)
TEMP_PATH = Rails.root.join('tmp', 'gias').freeze
SAMPLE_COUNT = 1000
attr_reader :today

def initialize
Expand All @@ -14,6 +15,10 @@ def todays_file
@todays_file ||= TEMP_PATH.join "edubase-#{today}.csv"
end

def todays_sample_file
@todays_sample_file ||= TEMP_PATH.join "sample-#{today}.csv"
end

def remove_todays_file!
Rails.logger.debug("Deleting todays edubase data")

Expand All @@ -34,6 +39,10 @@ def path
already_downloaded? ? todays_file : fetch_file
end

def sample_path
already_sampled? ? todays_sample_file : sample_file
end

def source_url
"https://ea-edubase-api-prod.azurewebsites.net/edubase/downloads/public/edubasealldata#{today}.csv"
end
Expand Down Expand Up @@ -74,6 +83,10 @@ def already_downloaded?
File.exist? todays_file
end

def already_sampled?
File.exist? todays_sample_file
end

def fetch_file
create_temp_dir

Expand All @@ -85,6 +98,11 @@ def fetch_file
end
end

def sample_file
# NB: we need to pass the argument as a single commandline rather than as an array of parameters
todays_sample_file.to_s if system "head -n #{SAMPLE_COUNT} #{path} > #{todays_sample_file}"
end

def download_and_save
Rails.logger.debug("Downloading latest edubase data")

Expand Down
16 changes: 16 additions & 0 deletions app/models/bookings/school_sync.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def import_all
end
end

def import_sample
Bookings::Data::SchoolMassImporter.new(data_sample, email_override).import
end

# update any school records that differ from edubase source
def update_all
data_in_batches do |batch|
Expand All @@ -48,6 +52,10 @@ def gias_data_file
Bookings::Data::GiasDataFile.new.path
end

def gias_data_sample_file
Bookings::Data::GiasDataFile.new.sample_path
end

def data_in_batches
rows = []
CSV.foreach(gias_data_file, headers: true, encoding: "ISO-8859-1:UTF-8") do |row|
Expand All @@ -63,4 +71,12 @@ def data_in_batches

true
end

def data_sample
rows = []
CSV.foreach(gias_data_sample_file, headers: true, encoding: "ISO-8859-1:UTF-8") do |row|
rows << row
end
rows
end
end
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class AddDfeSigninOrganisationUuidToBookingsSchools < ActiveRecord::Migration[5.2]
class AddDfESigninOrganisationUuidToBookingsSchools < ActiveRecord::Migration[5.2]
def change
add_column :bookings_schools, :dfe_signin_organisation_uuid, :uuid, null: true
end
Expand Down
1 change: 1 addition & 0 deletions db/seeds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@

# rubocop:disable Rails/Output
puts "\nYou can import all 47000 schools using 'bundle exec rails data:schools:mass_import'"
puts "\nYou can import a sample of 1000 schools using 'bundle exec rails data:schools:sample_import'"
# rubocop:enable Rails/Output
7 changes: 7 additions & 0 deletions lib/tasks/data/manage_schools.rake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ namespace :data do
Bookings::SchoolSync.new(email_override: args[:email_override]).import_all
end

desc "Import a sample of GiaS (EduBase) data from local file"
task :sample_import, %i[email_override] => :environment do |_t, args|
args.with_defaults(email_override: nil)

Bookings::SchoolSync.new(email_override: args[:email_override]).import_sample
end

desc "Update schools"
task update: :environment do |_t, _args|
Bookings::SchoolSync.new.update_all
Expand Down
32 changes: 32 additions & 0 deletions spec/models/bookings/data/gias_data_file_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,38 @@
end
end

describe '#sample_path' do
before { allow(subject).to receive(:sample_file) { subject.todays_sample_file } }

context 'with existing file' do
before { allow(subject).to receive(:already_sampled?).and_return true }
let!(:sample_path) { subject.sample_path }

it "will return path of todays sample file" do
expect(sample_path).to eql \
Rails.root.join('tmp', 'gias', "sample-#{today}.csv")
end

it "will return existing file" do
is_expected.not_to have_received(:sample_file)
end
end

context 'without existing file' do
before { allow(subject).to receive(:already_sampled?).and_return false }
let!(:sample_path) { subject.sample_path }

it "will return path of todays file" do
expect(sample_path).to eql \
Rails.root.join('tmp', 'gias', "sample-#{today}.csv")
end

it "will generate a new file" do
is_expected.to have_received(:sample_file)
end
end
end

context '#remove_old_files' do
let(:yesterday) { Time.zone.yesterday.strftime('%Y%m%d') }
let(:today) { Time.zone.today.strftime('%Y%m%d') }
Expand Down

0 comments on commit 445ed83

Please sign in to comment.