Skip to content

Commit

Permalink
Merge pull request #436 from ublefo/feat/reject-encrypted-pdf
Browse files Browse the repository at this point in the history
enhance: reject encrypted pdf
  • Loading branch information
macite authored Apr 27, 2024
2 parents fdcb913 + a042df5 commit 18cc875
Show file tree
Hide file tree
Showing 12 changed files with 172 additions and 36 deletions.
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,6 @@ gem 'sidekiq-cron'

# Redis for sidekiq, caching, and action cable (eventually)
gem 'redis'

# PDF reader for validating PDF file submissions
gem 'pdf-reader'
13 changes: 13 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
GEM
remote: https://rubygems.org/
specs:
Ascii85 (1.1.0)
actioncable (7.1.2)
actionpack (= 7.1.2)
activesupport (= 7.1.2)
Expand Down Expand Up @@ -78,6 +79,7 @@ GEM
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
aes_key_wrap (1.1.0)
afm (0.2.2)
amq-protocol (2.3.2)
ast (2.4.2)
backport (1.2.0)
Expand Down Expand Up @@ -190,6 +192,7 @@ GEM
grape-swagger-rails (0.4.0)
railties (>= 6.0.6.1)
hashdiff (1.1.0)
hashery (2.1.2)
hirb (0.7.3)
http-accept (1.7.0)
http-cookie (1.0.5)
Expand Down Expand Up @@ -269,6 +272,12 @@ GEM
parser (3.2.2.4)
ast (~> 2.4.1)
racc
pdf-reader (2.12.0)
Ascii85 (~> 1.0)
afm (~> 0.2.1)
hashery (~> 2.0)
ruby-rc4
ttfunk
pkg-config (1.5.6)
prism (0.24.0)
psych (5.1.2)
Expand Down Expand Up @@ -395,6 +404,7 @@ GEM
sorbet-runtime (>= 0.5.10782)
ruby-ole (1.2.12.2)
ruby-progressbar (1.13.0)
ruby-rc4 (0.1.5)
ruby-saml (1.13.0)
nokogiri (>= 1.10.5)
rexml
Expand Down Expand Up @@ -453,6 +463,8 @@ GEM
thor (1.3.0)
tilt (2.3.0)
timeout (0.4.1)
ttfunk (1.8.0)
bigdecimal (~> 3.1)
typhoeus (1.4.1)
ethon (>= 0.9.0)
tzinfo (2.0.6)
Expand Down Expand Up @@ -503,6 +515,7 @@ DEPENDENCIES
moss_ruby (>= 1.1.4)
mysql2
net-smtp
pdf-reader
puma
rack-cors
rails (~> 7.0)
Expand Down
5 changes: 3 additions & 2 deletions app/api/submission/portfolio_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ class PortfolioApi < Grape::API
kind = params[:kind]

# Check that the file is OK to accept
unless FileHelper.accept_file(file, name, kind)
error!({ error: "'#{file[:filename]}' is not a valid #{kind} file" }, 403)
file_result = FileHelper.accept_file(file, name, kind)
unless file_result[:accepted]
error!({ error: "'#{file[:filename]}': #{file_result[:msg]}" }, 403)
end

# Move file into place
Expand Down
5 changes: 3 additions & 2 deletions app/api/task_comments_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ class TaskCommentsApi < Grape::API
error!({ error: 'Comment text is empty, unable to add new comment' }, 403) unless text_comment.present?
result = task.add_text_comment(current_user, text_comment, reply_to_id)
else
unless FileHelper.accept_file(attached_file, 'comment attachment - TaskComment', 'comment_attachment')
error!({ error: 'Please upload only images, audio or PDF documents' }, 403)
file_result = FileHelper.accept_file(attached_file, 'comment attachment - TaskComment', 'comment_attachment')
unless file_result[:accepted]
error!({ error: "File is not an accptable format: #{file_result[:msg]}" }, 403)
end

result = task.add_comment_with_attachment(current_user, attached_file, reply_to_id)
Expand Down
5 changes: 3 additions & 2 deletions app/api/task_definitions_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,9 @@ class TaskDefinitionsApi < Grape::API

file = params[:file]

unless FileHelper.accept_file(file, 'task sheet', 'document')
error!({ error: "'#{file[:name]}' is not a valid #{file[:type]} file" }, 403)
file_result = FileHelper.accept_file(file, 'task sheet', 'document')
unless file_result[:accepted]
error!({ error: "'#{file[:name]}' is not an acceptable format: #{file_result[:msg]}" }, 403)
end

# Actually import...
Expand Down
93 changes: 70 additions & 23 deletions app/helpers/file_helper.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'zip'
require 'tmpdir'
require 'open3'
require 'pdf-reader'

module FileHelper
extend LogHelper
Expand All @@ -19,34 +20,76 @@ def known_extension?(extn)
# - file is passed the file uploaded to Doubtfire (a hash with all relevant data about the file)
#
def accept_file(file, name, kind)
valid = true

case kind
when 'image'
accept = ['image/png', 'image/gif', 'image/bmp', 'image/tiff', 'image/jpeg', 'image/x-ms-bmp']
mime_allow_list = ['image/png', 'image/gif', 'image/bmp', 'image/tiff', 'image/jpeg', 'image/x-ms-bmp']
when 'code'
accept = ['text/x-pascal', 'text/x-c', 'text/x-c++', 'application/csv', 'text/plain', 'text/', 'application/javascript', 'text/html',
mime_allow_list = ['text/x-pascal', 'text/x-c', 'text/x-c++', 'application/csv', 'text/plain', 'text/', 'application/javascript', 'text/html',
'text/css', 'text/x-ruby', 'text/coffeescript', 'text/x-scss', 'application/json', 'text/xml', 'application/xml',
'text/x-yaml', 'application/xml', 'text/x-typescript', 'text/x-vhdl', 'text/x-asm', 'text/x-jack', 'application/x-httpd-php',
'application/tst', 'text/x-cmp', 'text/x-vm', 'application/x-sh', 'application/x-bat', 'application/dat', 'application/x-wine-extension-ini']
when 'document'
accept = [ # -- one day"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
# --"application/msword",
'application/pdf'
]
valid = pdf_valid? file["tempfile"].path
mime_allow_list = [ 'application/pdf' ]
when 'audio'
accept = ['audio/', 'video/webm', 'application/ogg', 'application/octet-stream']
mime_allow_list = ['audio/', 'video/webm', 'application/ogg', 'application/octet-stream']
when 'comment_attachment'
accept = ['audio/', 'video/webm', 'application/ogg', 'image/', 'application/pdf', 'application/octet-stream']
mime_allow_list = ['audio/', 'video/webm', 'application/ogg', 'image/', 'application/pdf', 'application/octet-stream']
when 'video'
accept = ['video/mp4']
mime_allow_list = ['video/mp4']
else
logger.error "Unknown type '#{kind}' provided for '#{name}'"
return false
end

mime_in_list?(file["tempfile"].path, accept) && valid && FileHelper.known_extension?(File.extname(file["tempfile"]).downcase[1..])
extension_check = FileHelper.known_extension?(File.extname(file["tempfile"]).downcase[1..])
unless extension_check
msg = "invalid file extension."
logger.debug "File extension check failed"
return {
accepted: false,
msg: msg
}
end

mime_check = mime_in_list?(file["tempfile"].path, mime_allow_list)
unless mime_check
msg = "invalid file MIME type, file is likely corrupted."
logger.debug "File MIME check failed"
return {
accepted: false,
msg: msg
}
end

# Extra checks for PDF documents
if kind == "document"
pdf_validation_result = validate_pdf(file["tempfile"].path)

if pdf_validation_result[:encrypted]
msg = "PDF file is encrypted, encrypted files are not supported."
logger.debug "PDF file is encrypted"
return {
accepted: false,
msg: msg
}
end

unless pdf_validation_result[:valid]
msg = "PDF file is corrupted."
logger.debug "PDF file is corrupted"
return {
accepted: false,
msg: msg
}
end
end

logger.debug "Uploaded file is accepted"

# All checks are done
{
accepted: true,
msg: "success"
}
end

#
Expand Down Expand Up @@ -339,21 +382,25 @@ def move_files(from_path, to_path, retain_from = false, only_before = nil)
#
# Tests if a PDF is valid / corrupt
#
def pdf_valid?(filename)
# Scan last 1024 bytes for the EOF mark
return false unless File.exist? filename

File.open(filename) do |f|
f.seek -4096, IO::SEEK_END unless f.size <= 4096
f.read.include? '%%EOF'
def validate_pdf(filename)
return { valid: false, encrypted: false } unless File.exist? filename
begin
reader = PDF::Reader.new(filename)
rescue PDF::Reader::MalformedPDFError => e
logger.error "Submitted PDF file #{filename} is invalid: #{e.message}"
return { valid: false, encrypted: false }
rescue PDF::Reader::EncryptedPDFError => e
logger.error "Submitted PDF file #{filename} is encrypted: #{e.message}"
return { valid: false, encrypted: true }
end
{ valid: true, encrypted: false }
end

#
# Copy a PDF into place
#
def copy_pdf(file, dest_path)
if pdf_valid? file
if validate_pdf(file)[:valid]
compress_pdf(file)
FileUtils.cp file, dest_path
true
Expand Down Expand Up @@ -550,7 +597,7 @@ def task_submission_identifier_path_with_timestamp(type, task, timestamp)
module_function :compress_pdf
module_function :qpdf
module_function :move_files
module_function :pdf_valid?
module_function :validate_pdf
module_function :copy_pdf
module_function :read_file_to_str
module_function :path_to_plagarism_html
Expand Down
14 changes: 8 additions & 6 deletions app/models/task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,8 @@ def add_discussion_comment(user, prompts)
discussion.save!

prompts.each_with_index do |prompt, index|
raise "Unknown comment attachment type" unless FileHelper.accept_file(prompt, "comment attachment discussion audio", "audio")
file_result = FileHelper.accept_file(prompt, "comment attachment discussion audio", "audio")
raise "Comment attachment is not an audio file" unless file_result[:accepted]
raise "Error attaching uploaded file." unless discussion.add_prompt(prompt, index)
end

Expand All @@ -702,11 +703,11 @@ def add_comment_with_attachment(user, tempfile, reply_to_id = nil)
comment.task = self
comment.user = user
comment.reply_to_id = reply_to_id
if FileHelper.accept_file(tempfile, "comment attachment audio test", "audio")
if FileHelper.accept_file(tempfile, "comment attachment audio test", "audio")[:accepted]
comment.content_type = :audio
elsif FileHelper.accept_file(tempfile, "comment attachment image test", "image")
elsif FileHelper.accept_file(tempfile, "comment attachment image test", "image")[:accepted]
comment.content_type = :image
elsif FileHelper.accept_file(tempfile, "comment attachment pdf", "document")
elsif FileHelper.accept_file(tempfile, "comment attachment pdf", "document")[:accepted]
comment.content_type = :pdf
else
raise "Unknown comment attachment type"
Expand Down Expand Up @@ -1211,8 +1212,9 @@ def accept_submission(current_user, files, _student, ui, contributions, trigger,
#
files.each_with_index do |file, index|
logger.debug "Accepting submission (file #{index + 1} of #{files.length}) - checking file type for #{file["tempfile"].path}"
unless FileHelper.accept_file(file, file[:name], file[:type])
ui.error!({ 'error' => "'#{file[:name]}' is not a valid #{file[:type]} file" }, 403)
file_result = FileHelper.accept_file(file, file[:name], file[:type])
unless file_result[:accepted]
ui.error!({ 'error' => "'#{file[:name]}' is invalid: #{file_result[:msg]}" }, 403)
end

if File.size(file["tempfile"].path) > 10_000_000
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/generate_pdfs.rake
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ namespace :submission do

Unit.where('active').each do |u|
u.tasks.where('portfolio_evidence is not NULL').each do |t|
unless FileHelper.pdf_valid?(t.portfolio_evidence_path)
unless FileHelper.validate_pdf(t.portfolio_evidence_path)[:valid]
puts t.portfolio_evidence_path
end
end
Expand Down
68 changes: 68 additions & 0 deletions test/models/task_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -362,4 +362,72 @@ def test_ipynb_to_pdf
unit.destroy!
end

def test_pdf_validation_on_submit
unit = FactoryBot.create(:unit, student_count: 1, task_count: 0)
td = TaskDefinition.new({
unit_id: unit.id,
tutorial_stream: unit.tutorial_streams.first,
name: 'PDF Test Task',
description: 'Test task',
weighting: 4,
target_grade: 0,
start_date: unit.start_date + 1.week,
target_date: unit.start_date + 2.weeks,
abbreviation: 'PDFTestTask',
restrict_status_updates: false,
upload_requirements: [ { "key" => 'file0', "name" => 'A pdf file', "type" => 'document' } ],
plagiarism_warn_pct: 0.8,
is_graded: false,
max_quality_pts: 0
})
td.save!

data_to_post = {
trigger: 'ready_for_feedback'
}

# submit an encrypted (but valid) PDF file and ensure it's rejected immediately
data_to_post = with_file('test_files/submissions/encrypted.pdf', 'application/json', data_to_post)

project = unit.active_projects.first

add_auth_header_for user: unit.main_convenor_user

post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post

assert_equal 403, last_response.status, last_response_body

# submit a corrupted PDF file and ensure it's rejected immediately
data_to_post = with_file('test_files/submissions/corrupted.pdf', 'application/json', data_to_post)

project = unit.active_projects.first

add_auth_header_for user: unit.main_convenor_user

post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post

assert_equal 403, last_response.status, last_response_body

# submit a valid PDF file and ensure it's accepted
data_to_post = with_file('test_files/submissions/valid.pdf', 'application/json', data_to_post)

project = unit.active_projects.first

add_auth_header_for user: unit.main_convenor_user

post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post

assert_equal 201, last_response.status, last_response_body

task = project.task_for_task_definition(td)
assert task.convert_submission_to_pdf
path = task.zip_file_path_for_done_task
assert path
assert File.exist? path
assert File.exist? task.final_pdf_path

td.destroy
assert_not File.exist? path
unit.destroy!
end
end
Binary file added test_files/submissions/corrupted.pdf
Binary file not shown.
Binary file added test_files/submissions/encrypted.pdf
Binary file not shown.
Binary file added test_files/submissions/valid.pdf
Binary file not shown.

0 comments on commit 18cc875

Please sign in to comment.