Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: reject encrypted pdf #436

Merged
merged 4 commits into from
Apr 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,6 @@ gem 'sidekiq-cron'

# Redis for sidekiq, caching, and action cable (eventually)
gem 'redis'

# PDF reader for validating PDF file submissions
gem 'pdf-reader'
13 changes: 13 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
GEM
remote: https://rubygems.org/
specs:
Ascii85 (1.1.0)
actioncable (7.1.2)
actionpack (= 7.1.2)
activesupport (= 7.1.2)
Expand Down Expand Up @@ -78,6 +79,7 @@ GEM
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
aes_key_wrap (1.1.0)
afm (0.2.2)
amq-protocol (2.3.2)
ast (2.4.2)
backport (1.2.0)
Expand Down Expand Up @@ -190,6 +192,7 @@ GEM
grape-swagger-rails (0.4.0)
railties (>= 6.0.6.1)
hashdiff (1.1.0)
hashery (2.1.2)
hirb (0.7.3)
http-accept (1.7.0)
http-cookie (1.0.5)
Expand Down Expand Up @@ -269,6 +272,12 @@ GEM
parser (3.2.2.4)
ast (~> 2.4.1)
racc
pdf-reader (2.12.0)
Ascii85 (~> 1.0)
afm (~> 0.2.1)
hashery (~> 2.0)
ruby-rc4
ttfunk
pkg-config (1.5.6)
prism (0.24.0)
psych (5.1.2)
Expand Down Expand Up @@ -395,6 +404,7 @@ GEM
sorbet-runtime (>= 0.5.10782)
ruby-ole (1.2.12.2)
ruby-progressbar (1.13.0)
ruby-rc4 (0.1.5)
ruby-saml (1.13.0)
nokogiri (>= 1.10.5)
rexml
Expand Down Expand Up @@ -453,6 +463,8 @@ GEM
thor (1.3.0)
tilt (2.3.0)
timeout (0.4.1)
ttfunk (1.8.0)
bigdecimal (~> 3.1)
typhoeus (1.4.1)
ethon (>= 0.9.0)
tzinfo (2.0.6)
Expand Down Expand Up @@ -503,6 +515,7 @@ DEPENDENCIES
moss_ruby (>= 1.1.4)
mysql2
net-smtp
pdf-reader
puma
rack-cors
rails (~> 7.0)
Expand Down
5 changes: 3 additions & 2 deletions app/api/submission/portfolio_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ class PortfolioApi < Grape::API
kind = params[:kind]

# Check that the file is OK to accept
unless FileHelper.accept_file(file, name, kind)
error!({ error: "'#{file[:filename]}' is not a valid #{kind} file" }, 403)
file_result = FileHelper.accept_file(file, name, kind)
unless file_result[:accepted]
error!({ error: "'#{file[:filename]}': #{file_result[:msg]}" }, 403)
end

# Move file into place
Expand Down
5 changes: 3 additions & 2 deletions app/api/task_comments_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ class TaskCommentsApi < Grape::API
error!({ error: 'Comment text is empty, unable to add new comment' }, 403) unless text_comment.present?
result = task.add_text_comment(current_user, text_comment, reply_to_id)
else
unless FileHelper.accept_file(attached_file, 'comment attachment - TaskComment', 'comment_attachment')
error!({ error: 'Please upload only images, audio or PDF documents' }, 403)
file_result = FileHelper.accept_file(attached_file, 'comment attachment - TaskComment', 'comment_attachment')
unless file_result[:accepted]
error!({ error: "File is not an accptable format: #{file_result[:msg]}" }, 403)
end

result = task.add_comment_with_attachment(current_user, attached_file, reply_to_id)
Expand Down
5 changes: 3 additions & 2 deletions app/api/task_definitions_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,9 @@ class TaskDefinitionsApi < Grape::API

file = params[:file]

unless FileHelper.accept_file(file, 'task sheet', 'document')
error!({ error: "'#{file[:name]}' is not a valid #{file[:type]} file" }, 403)
file_result = FileHelper.accept_file(file, 'task sheet', 'document')
unless file_result[:accepted]
error!({ error: "'#{file[:name]}' is not an acceptable format: #{file_result[:msg]}" }, 403)
end

# Actually import...
Expand Down
93 changes: 70 additions & 23 deletions app/helpers/file_helper.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'zip'
require 'tmpdir'
require 'open3'
require 'pdf-reader'

module FileHelper
extend LogHelper
Expand All @@ -19,34 +20,76 @@ def known_extension?(extn)
# - file is passed the file uploaded to Doubtfire (a hash with all relevant data about the file)
#
def accept_file(file, name, kind)
valid = true

case kind
when 'image'
accept = ['image/png', 'image/gif', 'image/bmp', 'image/tiff', 'image/jpeg', 'image/x-ms-bmp']
mime_allow_list = ['image/png', 'image/gif', 'image/bmp', 'image/tiff', 'image/jpeg', 'image/x-ms-bmp']
when 'code'
accept = ['text/x-pascal', 'text/x-c', 'text/x-c++', 'application/csv', 'text/plain', 'text/', 'application/javascript', 'text/html',
mime_allow_list = ['text/x-pascal', 'text/x-c', 'text/x-c++', 'application/csv', 'text/plain', 'text/', 'application/javascript', 'text/html',
'text/css', 'text/x-ruby', 'text/coffeescript', 'text/x-scss', 'application/json', 'text/xml', 'application/xml',
'text/x-yaml', 'application/xml', 'text/x-typescript', 'text/x-vhdl', 'text/x-asm', 'text/x-jack', 'application/x-httpd-php',
'application/tst', 'text/x-cmp', 'text/x-vm', 'application/x-sh', 'application/x-bat', 'application/dat', 'application/x-wine-extension-ini']
when 'document'
accept = [ # -- one day"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
# --"application/msword",
'application/pdf'
]
valid = pdf_valid? file["tempfile"].path
mime_allow_list = [ 'application/pdf' ]
when 'audio'
accept = ['audio/', 'video/webm', 'application/ogg', 'application/octet-stream']
mime_allow_list = ['audio/', 'video/webm', 'application/ogg', 'application/octet-stream']
when 'comment_attachment'
accept = ['audio/', 'video/webm', 'application/ogg', 'image/', 'application/pdf', 'application/octet-stream']
mime_allow_list = ['audio/', 'video/webm', 'application/ogg', 'image/', 'application/pdf', 'application/octet-stream']
when 'video'
accept = ['video/mp4']
mime_allow_list = ['video/mp4']
else
logger.error "Unknown type '#{kind}' provided for '#{name}'"
return false
end

mime_in_list?(file["tempfile"].path, accept) && valid && FileHelper.known_extension?(File.extname(file["tempfile"]).downcase[1..])
extension_check = FileHelper.known_extension?(File.extname(file["tempfile"]).downcase[1..])
unless extension_check
msg = "invalid file extension."
logger.debug "File extension check failed"
return {
accepted: false,
msg: msg
}
end

mime_check = mime_in_list?(file["tempfile"].path, mime_allow_list)
unless mime_check
msg = "invalid file MIME type, file is likely corrupted."
logger.debug "File MIME check failed"
return {
accepted: false,
msg: msg
}
end

# Extra checks for PDF documents
if kind == "document"
pdf_validation_result = validate_pdf(file["tempfile"].path)

if pdf_validation_result[:encrypted]
msg = "PDF file is encrypted, encrypted files are not supported."
logger.debug "PDF file is encrypted"
return {
accepted: false,
msg: msg
}
end

unless pdf_validation_result[:valid]
msg = "PDF file is corrupted."
logger.debug "PDF file is corrupted"
return {
accepted: false,
msg: msg
}
end
end

logger.debug "Uploaded file is accepted"

# All checks are done
{
accepted: true,
msg: "success"
}
end

#
Expand Down Expand Up @@ -339,21 +382,25 @@ def move_files(from_path, to_path, retain_from = false, only_before = nil)
#
# Tests if a PDF is valid / corrupt
#
def pdf_valid?(filename)
# Scan last 1024 bytes for the EOF mark
return false unless File.exist? filename

File.open(filename) do |f|
f.seek -4096, IO::SEEK_END unless f.size <= 4096
f.read.include? '%%EOF'
def validate_pdf(filename)
return { valid: false, encrypted: false } unless File.exist? filename
begin
reader = PDF::Reader.new(filename)
rescue PDF::Reader::MalformedPDFError => e
logger.error "Submitted PDF file #{filename} is invalid: #{e.message}"
return { valid: false, encrypted: false }
rescue PDF::Reader::EncryptedPDFError => e
logger.error "Submitted PDF file #{filename} is encrypted: #{e.message}"
return { valid: false, encrypted: true }
end
{ valid: true, encrypted: false }
end

#
# Copy a PDF into place
#
def copy_pdf(file, dest_path)
if pdf_valid? file
if validate_pdf(file)[:valid]
compress_pdf(file)
FileUtils.cp file, dest_path
true
Expand Down Expand Up @@ -550,7 +597,7 @@ def task_submission_identifier_path_with_timestamp(type, task, timestamp)
module_function :compress_pdf
module_function :qpdf
module_function :move_files
module_function :pdf_valid?
module_function :validate_pdf
module_function :copy_pdf
module_function :read_file_to_str
module_function :path_to_plagarism_html
Expand Down
14 changes: 8 additions & 6 deletions app/models/task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,8 @@ def add_discussion_comment(user, prompts)
discussion.save!

prompts.each_with_index do |prompt, index|
raise "Unknown comment attachment type" unless FileHelper.accept_file(prompt, "comment attachment discussion audio", "audio")
file_result = FileHelper.accept_file(prompt, "comment attachment discussion audio", "audio")
raise "Comment attachment is not an audio file" unless file_result[:accepted]
raise "Error attaching uploaded file." unless discussion.add_prompt(prompt, index)
end

Expand All @@ -702,11 +703,11 @@ def add_comment_with_attachment(user, tempfile, reply_to_id = nil)
comment.task = self
comment.user = user
comment.reply_to_id = reply_to_id
if FileHelper.accept_file(tempfile, "comment attachment audio test", "audio")
if FileHelper.accept_file(tempfile, "comment attachment audio test", "audio")[:accepted]
comment.content_type = :audio
elsif FileHelper.accept_file(tempfile, "comment attachment image test", "image")
elsif FileHelper.accept_file(tempfile, "comment attachment image test", "image")[:accepted]
comment.content_type = :image
elsif FileHelper.accept_file(tempfile, "comment attachment pdf", "document")
elsif FileHelper.accept_file(tempfile, "comment attachment pdf", "document")[:accepted]
comment.content_type = :pdf
else
raise "Unknown comment attachment type"
Expand Down Expand Up @@ -1209,8 +1210,9 @@ def accept_submission(current_user, files, _student, ui, contributions, trigger,
#
files.each_with_index do |file, index|
logger.debug "Accepting submission (file #{index + 1} of #{files.length}) - checking file type for #{file["tempfile"].path}"
unless FileHelper.accept_file(file, file[:name], file[:type])
ui.error!({ 'error' => "'#{file[:name]}' is not a valid #{file[:type]} file" }, 403)
file_result = FileHelper.accept_file(file, file[:name], file[:type])
unless file_result[:accepted]
ui.error!({ 'error' => "'#{file[:name]}' is invalid: #{file_result[:msg]}" }, 403)
end

if File.size(file["tempfile"].path) > 10_000_000
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/generate_pdfs.rake
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ namespace :submission do

Unit.where('active').each do |u|
u.tasks.where('portfolio_evidence is not NULL').each do |t|
unless FileHelper.pdf_valid?(t.portfolio_evidence_path)
unless FileHelper.validate_pdf(t.portfolio_evidence_path)[:valid]
puts t.portfolio_evidence_path
end
end
Expand Down
68 changes: 68 additions & 0 deletions test/models/task_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -362,4 +362,72 @@ def test_ipynb_to_pdf
unit.destroy!
end

def test_pdf_validation_on_submit
unit = FactoryBot.create(:unit, student_count: 1, task_count: 0)
td = TaskDefinition.new({
unit_id: unit.id,
tutorial_stream: unit.tutorial_streams.first,
name: 'PDF Test Task',
description: 'Test task',
weighting: 4,
target_grade: 0,
start_date: unit.start_date + 1.week,
target_date: unit.start_date + 2.weeks,
abbreviation: 'PDFTestTask',
restrict_status_updates: false,
upload_requirements: [ { "key" => 'file0', "name" => 'A pdf file', "type" => 'document' } ],
plagiarism_warn_pct: 0.8,
is_graded: false,
max_quality_pts: 0
})
td.save!

data_to_post = {
trigger: 'ready_for_feedback'
}

# submit an encrypted (but valid) PDF file and ensure it's rejected immediately
data_to_post = with_file('test_files/submissions/encrypted.pdf', 'application/json', data_to_post)

project = unit.active_projects.first

add_auth_header_for user: unit.main_convenor_user

post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post

assert_equal 403, last_response.status, last_response_body

# submit a corrupted PDF file and ensure it's rejected immediately
data_to_post = with_file('test_files/submissions/corrupted.pdf', 'application/json', data_to_post)

project = unit.active_projects.first

add_auth_header_for user: unit.main_convenor_user

post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post

assert_equal 403, last_response.status, last_response_body

# submit a valid PDF file and ensure it's accepted
data_to_post = with_file('test_files/submissions/valid.pdf', 'application/json', data_to_post)

project = unit.active_projects.first

add_auth_header_for user: unit.main_convenor_user

post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post

assert_equal 201, last_response.status, last_response_body

task = project.task_for_task_definition(td)
assert task.convert_submission_to_pdf
path = task.zip_file_path_for_done_task
assert path
assert File.exist? path
assert File.exist? task.final_pdf_path

td.destroy
assert_not File.exist? path
unit.destroy!
end
end
Binary file added test_files/submissions/corrupted.pdf
Binary file not shown.
Binary file added test_files/submissions/encrypted.pdf
Binary file not shown.
Binary file added test_files/submissions/valid.pdf
Binary file not shown.