From c55c663432cd38cc84448e4332fea3114b775685 Mon Sep 17 00:00:00 2001 From: "Michael J. Giarlo" Date: Tue, 10 Oct 2017 10:49:14 -0700 Subject: [PATCH] Validate local file ingestions against a whitelist of directories Ports samvera/hyrax#1789 to Sufia for inclusion in 7.4.1 release --- .../sufia/create_with_remote_files_actor.rb | 22 +++++++++++ .../sufia/templates/config/sufia.rb | 16 ++++++++ lib/sufia/configuration.rb | 11 ++++++ .../create_with_remote_files_actor_spec.rb | 38 +++++++++++++++++++ spec/lib/sufia/configuration_spec.rb | 1 + 5 files changed, 88 insertions(+) diff --git a/app/actors/sufia/create_with_remote_files_actor.rb b/app/actors/sufia/create_with_remote_files_actor.rb index dc565f2b55..f09e315503 100644 --- a/app/actors/sufia/create_with_remote_files_actor.rb +++ b/app/actors/sufia/create_with_remote_files_actor.rb @@ -19,6 +19,10 @@ def attach_files(remote_files) return true unless remote_files remote_files.each do |file_info| next if file_info.blank? || file_info[:url].blank? + unless validate_remote_url(file_info[:url]) + Rails.logger.error "User #{user.user_key} attempted to ingest file from url #{file_info[:url]}, which doesn't pass validation" + return false + end create_file_from_url(file_info[:url], file_info[:file_name]) end true @@ -44,5 +48,23 @@ def log(user) CurationConcerns::Operation.create!(user: user, operation_type: "Attach Remote File") end + + def validate_remote_url(url) + uri = URI.parse(URI.encode(url)) + if uri.scheme == 'file' + path = File.absolute_path(URI.decode(uri.path)) + whitelisted_ingest_dirs.any? do |dir| + path.start_with?(dir) && path.length > dir.length + end + else + # TODO: It might be a good idea to validate other URLs as well. + # The server can probably access URLs the user can't. + true + end + end + + def whitelisted_ingest_dirs + Sufia.config.whitelisted_ingest_dirs + end end end diff --git a/lib/generators/sufia/templates/config/sufia.rb b/lib/generators/sufia/templates/config/sufia.rb index 10472e5a12..98dc03e327 100644 --- a/lib/generators/sufia/templates/config/sufia.rb +++ b/lib/generators/sufia/templates/config/sufia.rb @@ -101,6 +101,22 @@ # If you use a multi-server architecture, this MUST be a shared volume. # config.derivatives_path = File.join(Rails.root, 'tmp', 'derivatives') + ## Whitelist all directories which can be used to ingest from the local file + # system. + # + # Any file, and only those, that is anywhere under one of the specified + # directories can be used by CreateWithRemoteFilesActor to add local files + # to works. Files uploaded by the user are handled separately and the + # temporary directory for those need not be included here. + # + # Default value includes BrowseEverything.config['file_system'][:home] if it + # is set, otherwise default is an empty list. You should only need to change + # this if you have custom ingestions using CreateWithRemoteFilesActor to + # ingest files from the file system that are not part of the BrowseEverything + # mount point. + # + # config.whitelisted_ingest_dirs = [] + # If browse-everything has been configured, load the configs. Otherwise, set to nil. begin if defined? BrowseEverything diff --git a/lib/sufia/configuration.rb b/lib/sufia/configuration.rb index 0767f3390a..6a48f51481 100644 --- a/lib/sufia/configuration.rb +++ b/lib/sufia/configuration.rb @@ -155,5 +155,16 @@ def subject_prefix def model_to_create @model_to_create ||= ->(_attributes) { Sufia.primary_work_type.model_name.name } end + + attr_writer :whitelisted_ingest_dirs + # List of directories which can be used for local file system ingestion. + def whitelisted_ingest_dirs + @whitelisted_ingest_dirs ||= \ + if defined? BrowseEverything + Array.wrap(BrowseEverything.config['file_system'].try(:[], :home)).compact + else + [] + end + end end end diff --git a/spec/actors/sufia/create_with_remote_files_actor_spec.rb b/spec/actors/sufia/create_with_remote_files_actor_spec.rb index d4bdcc6e5f..9ee46cbf1e 100644 --- a/spec/actors/sufia/create_with_remote_files_actor_spec.rb +++ b/spec/actors/sufia/create_with_remote_files_actor_spec.rb @@ -51,11 +51,27 @@ file_name: "here.txt" }] end + before do + allow(Sufia.config).to receive(:whitelisted_ingest_dirs).and_return(["/local/file/"]) + end + it "attaches files" do expect(IngestLocalFileJob).to receive(:perform_later).with(FileSet, "/local/file/here.txt", user) expect(actor.create(attributes)).to be true end + context "with files from non-whitelisted directories" do + let(:file) { "file:///local/otherdir/test.txt" } + + # rubocop:disable RSpec/AnyInstance + it "doesn't attach files" do + expect_any_instance_of(described_class).to receive(:validate_remote_url).and_call_original + expect(IngestLocalFileJob).not_to receive(:perform_later) + expect(actor.create(attributes)).to be false + end + # rubocop:enable RSpec/AnyInstance + end + context "with spaces" do let(:file) { "file:///local/file/ pigs .txt" } it "attaches files" do @@ -64,4 +80,26 @@ end end end + + describe "#validate_remote_url" do + before do + allow(Sufia.config).to receive(:whitelisted_ingest_dirs).and_return(['/test/', '/local/file/']) + end + + it "accepts file: urls in whitelisted directories" do + expect(actor.actor.send(:validate_remote_url, "file:///local/file/test.txt")).to be true + expect(actor.actor.send(:validate_remote_url, "file:///local/file/subdirectory/test.txt")).to be true + expect(actor.actor.send(:validate_remote_url, "file:///test/test.txt")).to be true + end + + it "rejects file: urls outside whitelisted directories" do + expect(actor.actor.send(:validate_remote_url, "file:///tmp/test.txt")).to be false + expect(actor.actor.send(:validate_remote_url, "file:///test/../tmp/test.txt")).to be false + expect(actor.actor.send(:validate_remote_url, "file:///test/")).to be false + end + + it "accepts other types of urls" do + expect(actor.actor.send(:validate_remote_url, "https://example.com/test.txt")).to be true + end + end end diff --git a/spec/lib/sufia/configuration_spec.rb b/spec/lib/sufia/configuration_spec.rb index d36935e27e..f7b06d8e7a 100644 --- a/spec/lib/sufia/configuration_spec.rb +++ b/spec/lib/sufia/configuration_spec.rb @@ -28,4 +28,5 @@ it { is_expected.to respond_to(:contact_email) } it { is_expected.to respond_to(:subject_prefix) } it { is_expected.to respond_to(:model_to_create) } + it { is_expected.to respond_to(:whitelisted_ingest_dirs) } end