From dd705009d4e98517b5ee9143ed5759ea83181235 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Sun, 18 Aug 2024 15:45:58 +1000 Subject: [PATCH 01/15] chore: Add files that werent committed first time --- Gemfile.lock | 4 + .../JPlag/unit_similarity_module_jplag.rb | 237 ++++++++++++++++++ app/models/unit.rb | 5 +- lib/tasks/checks.rake | 60 ++--- 4 files changed, 275 insertions(+), 31 deletions(-) create mode 100644 app/models/similarity/JPlag/unit_similarity_module_jplag.rb diff --git a/Gemfile.lock b/Gemfile.lock index 58ca5d592..000d70b11 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -168,6 +168,7 @@ GEM faraday-net_http (3.1.0) net-http ffi (1.17.0-aarch64-linux-gnu) + ffi (1.17.0-x86_64-linux-gnu) fugit (1.11.0) et-orbi (~> 1, >= 1.2.11) raabro (~> 1.4) @@ -265,6 +266,8 @@ GEM nio4r (2.7.3) nokogiri (1.16.5-aarch64-linux) racc (~> 1.4) + nokogiri (1.16.5-x86_64-linux) + racc (~> 1.4) observer (0.1.2) orm_adapter (0.5.0) parallel (1.24.0) @@ -490,6 +493,7 @@ GEM PLATFORMS aarch64-linux + x86_64-linux DEPENDENCIES better_errors diff --git a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb new file mode 100644 index 000000000..77ef4c5b9 --- /dev/null +++ b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb @@ -0,0 +1,237 @@ +# freeze_string_literal: true + +# Provide moss and tii similarity features in unit class +module UnitSimilarityModuleJPLAG + # + # Last date/time of scan + # + def last_plagarism_scan + if self[:last_plagarism_scan].nil? + DateTime.new(2000, 1, 1) + else + self[:last_plagarism_scan] + end + end + + def force_remove_container + puts "Removing container forcibly: jplag" + `docker container rm -vf jplag` + end + + # Pass tasks on to plagarism detection software and setup links between students + def check_jplag_similarity(force: false) + # Get each task... + return unless active + + # need pwd to restore after cding into submission folder (so the files do not have full path) + pwd = FileUtils.pwd + begin + logger.info "Checking plagiarsm for unit #{code} - #{name} (id=#{id})" + + # submissions_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") + submissions_path = "student-work/#{code}-#{id}" + # `docker run --volume formatif_student-work_compose:/student-work --volume jplag_data:/jplag --name jplag` + puts "Starting JPLAG container to run on student-work/#{code}-#{id}" + `sudo docker compose -f /workspace/.devcontainer/docker-compose.yml run jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{submissions_path} --similarity-threshold=0.8 -r result` + #`sudo docker compose -f /workspace/.devcontainer/docker-compose.yml run jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{submissions_path} --similarity-threshold=0.8 -r result` + #`sudo docker start jplag && sudo docker exec -ti jplag -jar jplag-5.1.0-jar-with-dependencies.jar #{submissions_path}/#{code}-#{id} --similarity-threshold=0.8 -r result` + #`sudo docker run --name jplag my_jplag_image /bin/bash -c 'java -jar "jplag-5.1.0-jar-with-dependencies.jar" #{submissions_path}/#{code}-#{id} --similarity-threshold=0.8 -r result'` + #force_remove_container + + task_definitions.each do |td| + next if td.moss_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? + + type_data = td.moss_language.split + next if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') + + # Is there anything to check? + logger.debug "Checking plagiarism for #{td.name} (id=#{td.id})" + tasks = tasks_for_definition(td) + tasks_with_files = tasks.select(&:has_pdf) + + # Skip if not due yet + next if td.due_date > Time.zone.now + + # Skip if no files changed + next unless tasks_with_files.count > 1 && + ( + tasks.where('tasks.file_uploaded_at > ?', last_plagarism_scan).select(&:has_pdf).count > 0 || + td.updated_at > last_plagarism_scan || + force + ) + + # There are new tasks, check these + + logger.debug 'Contacting MOSS for new checks' + + # Create the MossRuby object + # moss_key = Doubtfire::Application.secrets.secret_key_moss + # raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? + + # moss = MossRuby.new(moss_key) + + # Set options -- the options will already have these default values + # moss.options[:max_matches] = 7 + # moss.options[:directory_submission] = true + # moss.options[:show_num_matches] = 500 + # moss.options[:experimental_server] = false + # moss.options[:comment] = '' + # moss.options[:language] = type_data[1] + + # begin + # # Create a file hash, with the files to be processed + # to_check = MossRuby.empty_file_hash + # add_done_files_for_plagiarism_check_of(td, submissions_path, to_check, tasks_with_files) +# + # FileUtils.chdir(submissions_path) +# + # # Get server to process files + # logger.debug 'Sending to MOSS...' + # url = moss.check(to_check, ->(_) { print '.' }) +# + # logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}" +# + # td.plagiarism_report_url = url + # td.plagiarism_updated = true + # td.save + # rescue StandardError => e + # logger.error "Failed to check plagiarism for task #{td.name} (id=#{td.id}). Error: #{e.message}" + # ensure + # FileUtils.chdir(pwd) + # FileUtils.rm_rf submissions_path + # end + end + self.last_plagarism_scan = Time.zone.now + save! + ensure + FileUtils.chdir(pwd) if FileUtils.pwd != pwd + end + + self + end + + def update_plagiarism_stats + moss_key = Doubtfire::Application.secrets.secret_key_moss + raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? + + moss = MossRuby.new(moss_key) + + task_definitions.where(plagiarism_updated: true).find_each do |td| + td.plagiarism_updated = false + td.save + + # Get results + url = td.plagiarism_report_url + logger.debug "Processing MOSS results #{url}" + + warn_pct = td.plagiarism_warn_pct || 50 + + results = moss.extract_results(url, warn_pct, ->(line) { puts line }) + + # Use results + results.each do |match| + task_id1 = %r{.*/(\d+)/$}.match(match[0][:filename])[1] + task_id2 = %r{.*/(\d+)/$}.match(match[1][:filename])[1] + + t1 = Task.find(task_id1) + t2 = Task.find(task_id2) + + if t1.nil? || t2.nil? + logger.error "Could not find tasks #{task_id1} or #{task_id2} for plagiarism stats check!" + next + end + + if td.group_set # its a group task + g1_tasks = t1.group_submission.tasks + g2_tasks = t2.group_submission.tasks + + g1_tasks.each do |gt1| + g2_tasks.each do |gt2| + create_plagiarism_link(gt1, gt2, match, warn_pct) + end + end + + else # just link the individuals... + create_plagiarism_link(t1, t2, match, warn_pct) + end + end + end + + self.last_plagarism_scan = Time.zone.now + save! + + self + end + + private + + def create_plagiarism_link(task1, task2, match, warn_pct) + plk1 = MossTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first + plk2 = MossTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first + + if plk1.nil? || plk2.nil? + # Delete old links between tasks + plk1&.destroy ## will delete its pair + plk2&.destroy + + plk1 = MossTaskSimilarity.create do |plm| + plm.task = task1 + plm.other_task = task2 + plm.pct = match[0][:pct] + plm.flagged = plm.pct >= warn_pct + end + + plk2 = MossTaskSimilarity.create do |plm| + plm.task = task2 + plm.other_task = task1 + plm.pct = match[1][:pct] + plm.flagged = plm.pct >= warn_pct + end + else + # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" + + # Flag is larger than warn pct and larger than previous pct + plk1.flagged = match[0][:pct] >= warn_pct && match[0][:pct] >= plk1.pct + plk2.flagged = match[1][:pct] >= warn_pct && match[1][:pct] >= plk2.pct + + plk1.pct = match[0][:pct] + plk2.pct = match[1][:pct] + end + + plk1.plagiarism_report_url = match[0][:url] + plk2.plagiarism_report_url = match[1][:url] + + plk1.save! + plk2.save! + + FileHelper.save_plagiarism_html(plk1, match[0][:html]) + FileHelper.save_plagiarism_html(plk2, match[1][:html]) + end + + # + # Extract all done files related to a task definition matching a pattern into a given directory. + # Returns an array of files + # + def add_done_files_for_plagiarism_check_of(task_definition, submissions_path, to_check, tasks_with_files) + type_data = task_definition.moss_language.split + return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') + + # get each code file for each task + task_definition.upload_requirements.each_with_index do |upreq, idx| + # only check code files marked for similarity checks + next unless upreq['type'] == 'code' && upreq['tii_check'] + + pattern = task_definition.glob_for_upload_requirement(idx) + + tasks_with_files.each do |t| + t.extract_file_from_done(submissions_path, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) + end + + # extract files matching each pattern + # -- each pattern + MossRuby.add_file(to_check, "**/#{pattern}") + end + + self + end +end diff --git a/app/models/unit.rb b/app/models/unit.rb index 175e62c79..9a1a2450b 100644 --- a/app/models/unit.rb +++ b/app/models/unit.rb @@ -177,7 +177,10 @@ def role_for(user) scope :set_inactive, -> { where('active = ?', false) } include UnitTiiModule - include UnitSimilarityModule + + ## Change this back to the OG unit_similarity_module once the jplag version is working. i.e., copy the jplag ver into the OG + require_relative 'similarity/JPlag/unit_similarity_module_jplag' + include UnitSimilarityModuleJPLAG def detailed_name "#{name} #{teaching_period.present? ? teaching_period.detailed_name : start_date.strftime('%Y-%m-%d')}" diff --git a/lib/tasks/checks.rake b/lib/tasks/checks.rake index 812a42ecf..96dbe3456 100644 --- a/lib/tasks/checks.rake +++ b/lib/tasks/checks.rake @@ -25,35 +25,35 @@ namespace :submission do FileUtils.rm(rake_plagiarism_executing_marker_file) end - task :simulate_plagiarism, [:num_links] => [:skip_prod, :environment] do |t, args| - if is_executing_plagiarism? - puts 'Skip plagiarism check -- already executing' - logger.info 'Skip plagiarism check -- already executing' - else - match_template = { - url: 'http://moss.stanford.edu/results/375180531/match0-top.html', - pct: Random.rand(70..100), - html: File.read('test_files/link_template.html') - } - match = [match_template, match_template] - # Give me two random distinct students with the same TD - unit = Unit.active_units.first - num_links = (args[:num_links] || 1).to_i - puts "Simulating #{num_links} plagiarism links for #{unit.code}..." - num_links.times do - td = unit.task_definitions.first - t1 = unit.tasks.where(task_definition: td).sample() - t2 = unit.tasks.where(task_definition: td).where.not(project_id: t1.project.id).sample() - if t1.nil? || t2.nil? - puts "Can't find any tasks to simulate. Have you run submission:simulate_signoff?'" - return - end - puts "Plagiarism link for #{td.abbreviation} between #{t1.project.student.name} (project_id=#{t1.project.id}) <-> #{t2.project.student.name} (project_id=#{t2.project.id}) created!" - unit.create_plagiarism_link(t1, t2, match) - unit.create_plagiarism_link(t2, t1, match) - end - end - end + #task :simulate_plagiarism, [:num_links] => [:skip_prod, :environment] do |t, args| + # if is_executing_plagiarism? + # puts 'Skip plagiarism check -- already executing' + # logger.info 'Skip plagiarism check -- already executing' + # else + # match_template = { + # url: 'http://moss.stanford.edu/results/375180531/match0-top.html', + # pct: Random.rand(70..100), + # html: File.read('test_files/link_template.html') + # } + # match = [match_template, match_template] + # # Give me two random distinct students with the same TD + # unit = Unit.active_units.first + # num_links = (args[:num_links] || 1).to_i + # puts "Simulating #{num_links} plagiarism links for #{unit.code}..." + # num_links.times do + # td = unit.task_definitions.first + # t1 = unit.tasks.where(task_definition: td).sample() + # t2 = unit.tasks.where(task_definition: td).where.not(project_id: t1.project.id).sample() + # if t1.nil? || t2.nil? + # puts "Can't find any tasks to simulate. Have you run submission:simulate_signoff?'" + # return + # end + # puts "Plagiarism link for #{td.abbreviation} between #{t1.project.student.name} (project_id=#{t1.project.id}) <-> #{t2.project.student.name} (project_id=#{t2.project.id}) created!" + # unit.create_plagiarism_link(t1, t2, match) + # unit.create_plagiarism_link(t2, t1, match) + # end + # end + #end task check_plagiarism: :environment do if is_executing_plagiarism? @@ -69,7 +69,7 @@ namespace :submission do puts ' ------------------------------------------------------------ ' puts " Starting Plagiarism Check for #{unit.name}" puts ' ------------------------------------------------------------ ' - unit.check_moss_similarity + unit.check_jplag_similarity end puts ' ------------------------------------------------------------ ' puts ' done.' From 469b1aca9fd93883ee9bb3f4dd33057279733621 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Tue, 20 Aug 2024 16:44:30 +1000 Subject: [PATCH 02/15] refactor: jplag report generation works but only in student-work/new --- .../JPlag/unit_similarity_module_jplag.rb | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb index 77ef4c5b9..729533d53 100644 --- a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb +++ b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb @@ -13,11 +13,6 @@ def last_plagarism_scan end end - def force_remove_container - puts "Removing container forcibly: jplag" - `docker container rm -vf jplag` - end - # Pass tasks on to plagarism detection software and setup links between students def check_jplag_similarity(force: false) # Get each task... @@ -28,15 +23,12 @@ def check_jplag_similarity(force: false) begin logger.info "Checking plagiarsm for unit #{code} - #{name} (id=#{id})" + ### Section for JPLAG WIP ### # submissions_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") - submissions_path = "student-work/#{code}-#{id}" - # `docker run --volume formatif_student-work_compose:/student-work --volume jplag_data:/jplag --name jplag` - puts "Starting JPLAG container to run on student-work/#{code}-#{id}" - `sudo docker compose -f /workspace/.devcontainer/docker-compose.yml run jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{submissions_path} --similarity-threshold=0.8 -r result` - #`sudo docker compose -f /workspace/.devcontainer/docker-compose.yml run jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{submissions_path} --similarity-threshold=0.8 -r result` - #`sudo docker start jplag && sudo docker exec -ti jplag -jar jplag-5.1.0-jar-with-dependencies.jar #{submissions_path}/#{code}-#{id} --similarity-threshold=0.8 -r result` - #`sudo docker run --name jplag my_jplag_image /bin/bash -c 'java -jar "jplag-5.1.0-jar-with-dependencies.jar" #{submissions_path}/#{code}-#{id} --similarity-threshold=0.8 -r result'` - #force_remove_container + # submissions_path = "/student-work/#{code}-#{id}" + `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar /student-work/new -l csharp --similarity-threshold=0.8 -r /results/result_#{code}` + # puts "Starting JPLAG container to run on student-work/#{code}-#{id}" + # `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar /student-work -l csharp --similarity-threshold=0.8 -r result ` task_definitions.each do |td| next if td.moss_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? From 28c50d9018190da60eb050ec4257eab4e52166c8 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Wed, 21 Aug 2024 21:57:13 +1000 Subject: [PATCH 03/15] rework: add code file extraction from done files zips and generate jplag report on those --- .../JPlag/unit_similarity_module_jplag.rb | 54 +++++++++++++++---- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb index 729533d53..f354e55d9 100644 --- a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb +++ b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb @@ -20,17 +20,22 @@ def check_jplag_similarity(force: false) # need pwd to restore after cding into submission folder (so the files do not have full path) pwd = FileUtils.pwd + + # making temp directory for jplag + root_work_dir = Rails.root.join("tmp", "jplag", "#{code}-#{id}") + FileUtils.mkdir_p(root_work_dir) begin logger.info "Checking plagiarsm for unit #{code} - #{name} (id=#{id})" ### Section for JPLAG WIP ### # submissions_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") - # submissions_path = "/student-work/#{code}-#{id}" - `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar /student-work/new -l csharp --similarity-threshold=0.8 -r /results/result_#{code}` + # puts "Starting JPLAG container to run on student-work/#{code}-#{id}" # `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar /student-work -l csharp --similarity-threshold=0.8 -r result ` task_definitions.each do |td| + tasks_dir = root_work_dir.join(td.id.to_s) + FileUtils.mkdir_p(tasks_dir) next if td.moss_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? type_data = td.moss_language.split @@ -40,21 +45,28 @@ def check_jplag_similarity(force: false) logger.debug "Checking plagiarism for #{td.name} (id=#{td.id})" tasks = tasks_for_definition(td) tasks_with_files = tasks.select(&:has_pdf) + run_jplag_on_done_files(td, tasks_dir, tasks_with_files) + #tasks_with_files.each do |t| + # submissions_dir = tasks_dir.join(t.student.username) + # FileUtils.mkdir_p(submissions_dir) + # pattern = t.upload_requirements.select + # t.extract_file_from_done(submissions_dir, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, name.to_s) }) + #end # Skip if not due yet - next if td.due_date > Time.zone.now + #next if td.due_date > Time.zone.now # Skip if no files changed - next unless tasks_with_files.count > 1 && - ( - tasks.where('tasks.file_uploaded_at > ?', last_plagarism_scan).select(&:has_pdf).count > 0 || - td.updated_at > last_plagarism_scan || - force - ) + #next unless tasks_with_files.count > 1 && + # ( + # tasks.where('tasks.file_uploaded_at > ?', last_plagarism_scan).select(&:has_pdf).count > 0 || + # td.updated_at > last_plagarism_scan || + # force + # ) # There are new tasks, check these - logger.debug 'Contacting MOSS for new checks' + # logger.debug 'Contacting MOSS for new checks' # Create the MossRuby object # moss_key = Doubtfire::Application.secrets.secret_key_moss @@ -226,4 +238,26 @@ def add_done_files_for_plagiarism_check_of(task_definition, submissions_path, to self end + + def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files) + type_data = task_definition.moss_language.split + return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') + + # get each code file for each task + task_definition.upload_requirements.each_with_index do |upreq, idx| + # only check code files marked for similarity checks + next unless upreq['type'] == 'code' && upreq['tii_check'] + + pattern = task_definition.glob_for_upload_requirement(idx) + + tasks_with_files.each do |t| + t.extract_file_from_done(tasks_dir, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) + end + puts "Starting JPLAG container to run on #{tasks_dir}" + tasks_dir_split = tasks_dir.to_s.split('/workspace/doubtfire-api')[1] + `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{tasks_dir_split} -l csharp --similarity-threshold=0.8 -r /results/result` + end + + self + end end From 611bbacbdf5789ced920e2dda06719eb0b9f2e37 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:24:06 +1000 Subject: [PATCH 04/15] refactor: add jplag function to unit_similarity_module and remove temp jplag ver --- .../similarity/unit_similarity_module.rb | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index c69897f78..4d6286b4b 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -14,17 +14,26 @@ def last_plagarism_scan end # Pass tasks on to plagarism detection software and setup links between students - def check_moss_similarity(force: false) + def check_similarity(force: false) # Get each task... return unless active # need pwd to restore after cding into submission folder (so the files do not have full path) pwd = FileUtils.pwd + # making temp directory for unit - jplag + root_work_dir = Rails.root.join("tmp", "jplag", "#{code}-#{id}") + unit_code = "#{code}-#{id}" + FileUtils.mkdir_p(root_work_dir) + begin logger.info "Checking plagiarsm for unit #{code} - #{name} (id=#{id})" task_definitions.each do |td| + # making temp directory for each task - jplag + tasks_dir = root_work_dir.join(td.id.to_s) + FileUtils.mkdir_p(tasks_dir) + next if td.moss_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? type_data = td.moss_language.split @@ -35,6 +44,9 @@ def check_moss_similarity(force: false) tasks = tasks_for_definition(td) tasks_with_files = tasks.select(&:has_pdf) + #JPLAG + run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) + # Skip if not due yet next if td.due_date > Time.zone.now @@ -222,4 +234,27 @@ def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, to_check, self end + + # JPLAG Function - extracts "done" files for each task and packages them into a directory for JPLAG to run on + def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_code) + type_data = task_definition.moss_language.split + return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') + + # get each code file for each task + task_definition.upload_requirements.each_with_index do |upreq, idx| + # only check code files marked for similarity checks + next unless upreq['type'] == 'code' && upreq['tii_check'] + + pattern = task_definition.glob_for_upload_requirement(idx) + + tasks_with_files.each do |t| + t.extract_file_from_done(tasks_dir, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) + end + puts "Starting JPLAG container to run on #{tasks_dir}" + tasks_dir_split = tasks_dir.to_s.split('/workspace/doubtfire-api')[1] + `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{tasks_dir_split} -l #{type_data[1]} --similarity-threshold=0.8 -r /jplag/results/#{unit_code}_#{task_definition.id}-result` + end + + self + end end From 6fa26f07ab5528b9610d185da90ed07f048e063b Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:53:41 +1000 Subject: [PATCH 05/15] fix: make report file generation make more sense and remove tmp files --- .../similarity/unit_similarity_module.rb | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index 4d6286b4b..b71ac823f 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -44,7 +44,7 @@ def check_similarity(force: false) tasks = tasks_for_definition(td) tasks_with_files = tasks.select(&:has_pdf) - #JPLAG + # JPLAG run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) # Skip if not due yet @@ -252,9 +252,33 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c end puts "Starting JPLAG container to run on #{tasks_dir}" tasks_dir_split = tasks_dir.to_s.split('/workspace/doubtfire-api')[1] - `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{tasks_dir_split} -l #{type_data[1]} --similarity-threshold=0.8 -r /jplag/results/#{unit_code}_#{task_definition.id}-result` + + # Check if the directory exists and create it if it doesn't + results_dir = "/jplag/results/#{unit_code}" + `sudo docker exec jplag sh -c 'if [ ! -d "#{results_dir}" ]; then mkdir -p "#{results_dir}"; fi'` + + # Remove existing result file if it exists + result_file = "#{results_dir}/#{task_definition.id}-result.zip" + `sudo docker exec jplag sh -c 'if [ -f "#{result_file}" ]; then rm "#{result_file}"; fi'` + + case type_data[1] + when 'csharp' + file_lang = 'csharp' + when 'cc' + file_lang = 'cpp' + end + + # Run JPLAG + puts "THE FILE TYPE IN MOSS: #{type_data[1]}" + `sudo docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=0.8 -nobr -M RUN -r #{results_dir}/#{task_definition.id}-result` end + # Delete the extracted code files from tmp + tmp_dir = Rails.root.join("tmp", "jplag") + puts "Deleting files in: #{tmp_dir}" + puts "Files to delete: #{Dir.glob("#{tmp_dir}/*")}" + FileUtils.rm_rf(Dir.glob("#{tmp_dir}/*")) + self end end From c83919c49074187cabffebba4a8e7e48dbf96319 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Fri, 20 Sep 2024 20:46:26 +1000 Subject: [PATCH 06/15] fix: make report downloading use new path --- app/api/task_definitions_api.rb | 29 ++++++++++++++++ app/helpers/file_helper.rb | 12 +++++++ app/models/unit.rb | 7 ++-- config/application.rb | 6 ++++ lib/tasks/checks.rake | 60 ++++++++++++++++----------------- 5 files changed, 81 insertions(+), 33 deletions(-) diff --git a/app/api/task_definitions_api.rb b/app/api/task_definitions_api.rb index 03536c9ef..7e95bb8f8 100644 --- a/app/api/task_definitions_api.rb +++ b/app/api/task_definitions_api.rb @@ -614,4 +614,33 @@ class TaskDefinitionsApi < Grape::API stream_file path end + + desc 'Download the JPLAG report for a given task' + params do + requires :unit_id, type: Integer, desc: 'The unit to download JPLAG report for' + requires :task_def_id, type: Integer, desc: 'The task definition to get the JPLAG report of' + end + get '/units/:unit_id/task_definitions/:task_def_id/jplag_report' do + unit = Unit.find(params[:unit_id]) + task_def = unit.task_definitions.find(params[:task_def_id]) + + unless authorise? current_user, unit, :download_jplag_report + error!({ error: 'Not authorised to download JPLAG reports of unit' }, 403) + end + + file_loc = FileHelper.task_jplag_report_path(unit, task_def) + logger.debug "JPLAG report file location: #{file_loc}" + + if file_loc.nil? || !File.exist?(file_loc) + file_loc = Rails.root.join('public', 'resources', 'FileNotFound.pdf') + header['Content-Disposition'] = 'attachment; filename=FileNotFound.pdf' + else + header['Content-Disposition'] = "attachment; filename=#{task_def.abbreviation}-jplag-report.zip" + end + header['Access-Control-Expose-Headers'] = 'Content-Disposition' + + content_type 'application/octet-stream' + + stream_file file_loc + end end diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 2ae3597b2..746075f2f 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -249,6 +249,16 @@ def student_portfolio_path(unit, username, create = true) File.join(student_portfolio_dir(unit, username, create), FileHelper.sanitized_filename("#{username}-portfolio.pdf")) end + def task_jplag_report_dir(unit) + file_server = Doubtfire::Application.config.jplag_report_dir + dst = "#{file_server}/#{unit.code}-#{unit.id}/" # trust the server config and passed in type for paths + dst + end + + def task_jplag_report_path(unit, task) + File.join(task_jplag_report_dir(unit), FileHelper.sanitized_filename("#{task.id}-result.zip")) + end + def comment_attachment_path(task_comment, attachment_extension) "#{File.join(student_work_dir(:comment, task_comment.task), "#{task_comment.id.to_s}#{attachment_extension}")}" end @@ -658,4 +668,6 @@ def line_wrap(path, width: 160) module_function :known_extension? module_function :pages_in_pdf module_function :line_wrap + module_function :task_jplag_report_dir + module_function :task_jplag_report_path end diff --git a/app/models/unit.rb b/app/models/unit.rb index 9a1a2450b..d53a0932f 100644 --- a/app/models/unit.rb +++ b/app/models/unit.rb @@ -28,6 +28,7 @@ def self.permissions :download_stats, :download_unit_csv, :download_grades, + :download_jplag_report, :exceed_capacity ] @@ -46,6 +47,7 @@ def self.permissions :change_project_enrolment, :download_stats, :download_grades, + :download_jplag_report, :rollover_unit, :exceed_capacity, :perform_overseer_assessment_test @@ -66,6 +68,7 @@ def self.permissions :download_stats, :download_unit_csv, :download_grades, + :download_jplag_report, :exceed_capacity ] @@ -178,9 +181,7 @@ def role_for(user) include UnitTiiModule - ## Change this back to the OG unit_similarity_module once the jplag version is working. i.e., copy the jplag ver into the OG - require_relative 'similarity/JPlag/unit_similarity_module_jplag' - include UnitSimilarityModuleJPLAG + include UnitSimilarityModule def detailed_name "#{name} #{teaching_period.present? ? teaching_period.detailed_name : start_date.strftime('%Y-%m-%d')}" diff --git a/config/application.rb b/config/application.rb index df21df01b..0149682c3 100644 --- a/config/application.rb +++ b/config/application.rb @@ -31,6 +31,12 @@ class Application < Rails::Application # variable. config.student_work_dir = ENV['DF_STUDENT_WORK_DIR'] || "#{Rails.root}/student_work" + # ==> JPLAG report directory + # File server location for storing JPLAG reports. Defaults to `jplag_results` + # directory under root but is overridden using DF_JPLAG_REPORT_DIR environment + # variable. + config.jplag_report_dir = ENV['DF_JPLAG_REPORT_DIR'] || "#{Rails.root}/jplag_results" + # ==> Load credentials from env credentials.secret_key_base = ENV.fetch('DF_SECRET_KEY_BASE', Rails.env.production? ? nil : '9e010ee2f52af762916406fd2ac488c5694a6cc784777136e657511f8bbc7a73f96d59c0a9a778a0d7cf6406f8ecbf77efe4701dfbd63d8248fc7cc7f32dea97') credentials.secret_key_attr = ENV.fetch('DF_SECRET_KEY_ATTR', Rails.env.production? ? nil : 'e69fc5960ca0e8700844a3a25fe80373b41c0a265d342eba06950113f3766fd983bad9ec51bf36eb615d9711bfe1dd90b8e35f01841b323f604ffee857e32055') diff --git a/lib/tasks/checks.rake b/lib/tasks/checks.rake index 96dbe3456..18d783e4f 100644 --- a/lib/tasks/checks.rake +++ b/lib/tasks/checks.rake @@ -25,35 +25,35 @@ namespace :submission do FileUtils.rm(rake_plagiarism_executing_marker_file) end - #task :simulate_plagiarism, [:num_links] => [:skip_prod, :environment] do |t, args| - # if is_executing_plagiarism? - # puts 'Skip plagiarism check -- already executing' - # logger.info 'Skip plagiarism check -- already executing' - # else - # match_template = { - # url: 'http://moss.stanford.edu/results/375180531/match0-top.html', - # pct: Random.rand(70..100), - # html: File.read('test_files/link_template.html') - # } - # match = [match_template, match_template] - # # Give me two random distinct students with the same TD - # unit = Unit.active_units.first - # num_links = (args[:num_links] || 1).to_i - # puts "Simulating #{num_links} plagiarism links for #{unit.code}..." - # num_links.times do - # td = unit.task_definitions.first - # t1 = unit.tasks.where(task_definition: td).sample() - # t2 = unit.tasks.where(task_definition: td).where.not(project_id: t1.project.id).sample() - # if t1.nil? || t2.nil? - # puts "Can't find any tasks to simulate. Have you run submission:simulate_signoff?'" - # return - # end - # puts "Plagiarism link for #{td.abbreviation} between #{t1.project.student.name} (project_id=#{t1.project.id}) <-> #{t2.project.student.name} (project_id=#{t2.project.id}) created!" - # unit.create_plagiarism_link(t1, t2, match) - # unit.create_plagiarism_link(t2, t1, match) - # end - # end - #end + task :simulate_plagiarism, [:num_links] => [:skip_prod, :environment] do |t, args| + if is_executing_plagiarism? + puts 'Skip plagiarism check -- already executing' + logger.info 'Skip plagiarism check -- already executing' + else + match_template = { + url: 'http://moss.stanford.edu/results/375180531/match0-top.html', + pct: Random.rand(70..100), + html: File.read('test_files/link_template.html') + } + match = [match_template, match_template] + # Give me two random distinct students with the same TD + unit = Unit.active_units.first + num_links = (args[:num_links] || 1).to_i + puts "Simulating #{num_links} plagiarism links for #{unit.code}..." + num_links.times do + td = unit.task_definitions.first + t1 = unit.tasks.where(task_definition: td).sample() + t2 = unit.tasks.where(task_definition: td).where.not(project_id: t1.project.id).sample() + if t1.nil? || t2.nil? + puts "Can't find any tasks to simulate. Have you run submission:simulate_signoff?'" + return + end + puts "Plagiarism link for #{td.abbreviation} between #{t1.project.student.name} (project_id=#{t1.project.id}) <-> #{t2.project.student.name} (project_id=#{t2.project.id}) created!" + unit.create_plagiarism_link(t1, t2, match) + unit.create_plagiarism_link(t2, t1, match) + end + end + end task check_plagiarism: :environment do if is_executing_plagiarism? @@ -69,7 +69,7 @@ namespace :submission do puts ' ------------------------------------------------------------ ' puts " Starting Plagiarism Check for #{unit.name}" puts ' ------------------------------------------------------------ ' - unit.check_jplag_similarity + unit.check_similarity end puts ' ------------------------------------------------------------ ' puts ' done.' From 4d0abe36011eb69d2461a7b3d1c47de4f4838c09 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Fri, 20 Sep 2024 21:15:43 +1000 Subject: [PATCH 07/15] chore: clean files --- .../JPlag/unit_similarity_module_jplag.rb | 263 ------------------ .../similarity/unit_similarity_module.rb | 5 +- 2 files changed, 3 insertions(+), 265 deletions(-) delete mode 100644 app/models/similarity/JPlag/unit_similarity_module_jplag.rb diff --git a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb b/app/models/similarity/JPlag/unit_similarity_module_jplag.rb deleted file mode 100644 index f354e55d9..000000000 --- a/app/models/similarity/JPlag/unit_similarity_module_jplag.rb +++ /dev/null @@ -1,263 +0,0 @@ -# freeze_string_literal: true - -# Provide moss and tii similarity features in unit class -module UnitSimilarityModuleJPLAG - # - # Last date/time of scan - # - def last_plagarism_scan - if self[:last_plagarism_scan].nil? - DateTime.new(2000, 1, 1) - else - self[:last_plagarism_scan] - end - end - - # Pass tasks on to plagarism detection software and setup links between students - def check_jplag_similarity(force: false) - # Get each task... - return unless active - - # need pwd to restore after cding into submission folder (so the files do not have full path) - pwd = FileUtils.pwd - - # making temp directory for jplag - root_work_dir = Rails.root.join("tmp", "jplag", "#{code}-#{id}") - FileUtils.mkdir_p(root_work_dir) - begin - logger.info "Checking plagiarsm for unit #{code} - #{name} (id=#{id})" - - ### Section for JPLAG WIP ### - # submissions_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") - - # puts "Starting JPLAG container to run on student-work/#{code}-#{id}" - # `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar /student-work -l csharp --similarity-threshold=0.8 -r result ` - - task_definitions.each do |td| - tasks_dir = root_work_dir.join(td.id.to_s) - FileUtils.mkdir_p(tasks_dir) - next if td.moss_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? - - type_data = td.moss_language.split - next if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') - - # Is there anything to check? - logger.debug "Checking plagiarism for #{td.name} (id=#{td.id})" - tasks = tasks_for_definition(td) - tasks_with_files = tasks.select(&:has_pdf) - run_jplag_on_done_files(td, tasks_dir, tasks_with_files) - #tasks_with_files.each do |t| - # submissions_dir = tasks_dir.join(t.student.username) - # FileUtils.mkdir_p(submissions_dir) - # pattern = t.upload_requirements.select - # t.extract_file_from_done(submissions_dir, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, name.to_s) }) - #end - - # Skip if not due yet - #next if td.due_date > Time.zone.now - - # Skip if no files changed - #next unless tasks_with_files.count > 1 && - # ( - # tasks.where('tasks.file_uploaded_at > ?', last_plagarism_scan).select(&:has_pdf).count > 0 || - # td.updated_at > last_plagarism_scan || - # force - # ) - - # There are new tasks, check these - - # logger.debug 'Contacting MOSS for new checks' - - # Create the MossRuby object - # moss_key = Doubtfire::Application.secrets.secret_key_moss - # raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? - - # moss = MossRuby.new(moss_key) - - # Set options -- the options will already have these default values - # moss.options[:max_matches] = 7 - # moss.options[:directory_submission] = true - # moss.options[:show_num_matches] = 500 - # moss.options[:experimental_server] = false - # moss.options[:comment] = '' - # moss.options[:language] = type_data[1] - - # begin - # # Create a file hash, with the files to be processed - # to_check = MossRuby.empty_file_hash - # add_done_files_for_plagiarism_check_of(td, submissions_path, to_check, tasks_with_files) -# - # FileUtils.chdir(submissions_path) -# - # # Get server to process files - # logger.debug 'Sending to MOSS...' - # url = moss.check(to_check, ->(_) { print '.' }) -# - # logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}" -# - # td.plagiarism_report_url = url - # td.plagiarism_updated = true - # td.save - # rescue StandardError => e - # logger.error "Failed to check plagiarism for task #{td.name} (id=#{td.id}). Error: #{e.message}" - # ensure - # FileUtils.chdir(pwd) - # FileUtils.rm_rf submissions_path - # end - end - self.last_plagarism_scan = Time.zone.now - save! - ensure - FileUtils.chdir(pwd) if FileUtils.pwd != pwd - end - - self - end - - def update_plagiarism_stats - moss_key = Doubtfire::Application.secrets.secret_key_moss - raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? - - moss = MossRuby.new(moss_key) - - task_definitions.where(plagiarism_updated: true).find_each do |td| - td.plagiarism_updated = false - td.save - - # Get results - url = td.plagiarism_report_url - logger.debug "Processing MOSS results #{url}" - - warn_pct = td.plagiarism_warn_pct || 50 - - results = moss.extract_results(url, warn_pct, ->(line) { puts line }) - - # Use results - results.each do |match| - task_id1 = %r{.*/(\d+)/$}.match(match[0][:filename])[1] - task_id2 = %r{.*/(\d+)/$}.match(match[1][:filename])[1] - - t1 = Task.find(task_id1) - t2 = Task.find(task_id2) - - if t1.nil? || t2.nil? - logger.error "Could not find tasks #{task_id1} or #{task_id2} for plagiarism stats check!" - next - end - - if td.group_set # its a group task - g1_tasks = t1.group_submission.tasks - g2_tasks = t2.group_submission.tasks - - g1_tasks.each do |gt1| - g2_tasks.each do |gt2| - create_plagiarism_link(gt1, gt2, match, warn_pct) - end - end - - else # just link the individuals... - create_plagiarism_link(t1, t2, match, warn_pct) - end - end - end - - self.last_plagarism_scan = Time.zone.now - save! - - self - end - - private - - def create_plagiarism_link(task1, task2, match, warn_pct) - plk1 = MossTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first - plk2 = MossTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first - - if plk1.nil? || plk2.nil? - # Delete old links between tasks - plk1&.destroy ## will delete its pair - plk2&.destroy - - plk1 = MossTaskSimilarity.create do |plm| - plm.task = task1 - plm.other_task = task2 - plm.pct = match[0][:pct] - plm.flagged = plm.pct >= warn_pct - end - - plk2 = MossTaskSimilarity.create do |plm| - plm.task = task2 - plm.other_task = task1 - plm.pct = match[1][:pct] - plm.flagged = plm.pct >= warn_pct - end - else - # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" - - # Flag is larger than warn pct and larger than previous pct - plk1.flagged = match[0][:pct] >= warn_pct && match[0][:pct] >= plk1.pct - plk2.flagged = match[1][:pct] >= warn_pct && match[1][:pct] >= plk2.pct - - plk1.pct = match[0][:pct] - plk2.pct = match[1][:pct] - end - - plk1.plagiarism_report_url = match[0][:url] - plk2.plagiarism_report_url = match[1][:url] - - plk1.save! - plk2.save! - - FileHelper.save_plagiarism_html(plk1, match[0][:html]) - FileHelper.save_plagiarism_html(plk2, match[1][:html]) - end - - # - # Extract all done files related to a task definition matching a pattern into a given directory. - # Returns an array of files - # - def add_done_files_for_plagiarism_check_of(task_definition, submissions_path, to_check, tasks_with_files) - type_data = task_definition.moss_language.split - return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') - - # get each code file for each task - task_definition.upload_requirements.each_with_index do |upreq, idx| - # only check code files marked for similarity checks - next unless upreq['type'] == 'code' && upreq['tii_check'] - - pattern = task_definition.glob_for_upload_requirement(idx) - - tasks_with_files.each do |t| - t.extract_file_from_done(submissions_path, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) - end - - # extract files matching each pattern - # -- each pattern - MossRuby.add_file(to_check, "**/#{pattern}") - end - - self - end - - def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files) - type_data = task_definition.moss_language.split - return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') - - # get each code file for each task - task_definition.upload_requirements.each_with_index do |upreq, idx| - # only check code files marked for similarity checks - next unless upreq['type'] == 'code' && upreq['tii_check'] - - pattern = task_definition.glob_for_upload_requirement(idx) - - tasks_with_files.each do |t| - t.extract_file_from_done(tasks_dir, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) - end - puts "Starting JPLAG container to run on #{tasks_dir}" - tasks_dir_split = tasks_dir.to_s.split('/workspace/doubtfire-api')[1] - `sudo docker exec jplag java -jar /jplag/jplag-5.1.0-jar-with-dependencies.jar #{tasks_dir_split} -l csharp --similarity-threshold=0.8 -r /results/result` - end - - self - end -end diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index b71ac823f..5aa6fabce 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -44,8 +44,6 @@ def check_similarity(force: false) tasks = tasks_for_definition(td) tasks_with_files = tasks.select(&:has_pdf) - # JPLAG - run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) # Skip if not due yet next if td.due_date > Time.zone.now @@ -60,6 +58,9 @@ def check_similarity(force: false) # There are new tasks, check these + # JPLAG + run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) + logger.debug 'Contacting MOSS for new checks' # Create the MossRuby object From eb8c0e8ca28358c21ef4c4bf4129dfbee8c535ba Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Fri, 20 Sep 2024 21:20:49 +1000 Subject: [PATCH 08/15] chore: clean files --- app/models/similarity/unit_similarity_module.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index 5aa6fabce..cfb99ae8f 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -271,7 +271,7 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c # Run JPLAG puts "THE FILE TYPE IN MOSS: #{type_data[1]}" - `sudo docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=0.8 -nobr -M RUN -r #{results_dir}/#{task_definition.id}-result` + `sudo docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=0.8 -M RUN -r #{results_dir}/#{task_definition.id}-result` end # Delete the extracted code files from tmp From bf1f7d61ef5da75aa239eda0542fa2ef164c329e Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Sat, 21 Sep 2024 15:36:26 +1000 Subject: [PATCH 09/15] chore: remove hard coded path and update file_lang --- .../similarity/unit_similarity_module.rb | 57 +++++++++++-------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index cfb99ae8f..9cfcc11d0 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -44,6 +44,8 @@ def check_similarity(force: false) tasks = tasks_for_definition(td) tasks_with_files = tasks.select(&:has_pdf) + # JPLAG + run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) # Skip if not due yet next if td.due_date > Time.zone.now @@ -58,8 +60,6 @@ def check_similarity(force: false) # There are new tasks, check these - # JPLAG - run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) logger.debug 'Contacting MOSS for new checks' @@ -240,6 +240,16 @@ def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, to_check, def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_code) type_data = task_definition.moss_language.split return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') + similarity_pct = task_definition.plagiarism_warn_pct + return if similarity_pct.nil? + + # Check if the directory exists and create it if it doesn't + results_dir = "/jplag/results/#{unit_code}" + `sudo docker exec jplag sh -c 'if [ ! -d "#{results_dir}" ]; then mkdir -p "#{results_dir}"; fi'` + + # Remove existing result file if it exists + result_file = "#{results_dir}/#{task_definition.id}-result.zip" + `sudo docker exec jplag sh -c 'if [ -f "#{result_file}" ]; then rm "#{result_file}"; fi'` # get each code file for each task task_definition.upload_requirements.each_with_index do |upreq, idx| @@ -251,33 +261,32 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c tasks_with_files.each do |t| t.extract_file_from_done(tasks_dir, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) end - puts "Starting JPLAG container to run on #{tasks_dir}" - tasks_dir_split = tasks_dir.to_s.split('/workspace/doubtfire-api')[1] - - # Check if the directory exists and create it if it doesn't - results_dir = "/jplag/results/#{unit_code}" - `sudo docker exec jplag sh -c 'if [ ! -d "#{results_dir}" ]; then mkdir -p "#{results_dir}"; fi'` - - # Remove existing result file if it exists - result_file = "#{results_dir}/#{task_definition.id}-result.zip" - `sudo docker exec jplag sh -c 'if [ -f "#{result_file}" ]; then rm "#{result_file}"; fi'` - - case type_data[1] - when 'csharp' - file_lang = 'csharp' - when 'cc' - file_lang = 'cpp' - end - # Run JPLAG - puts "THE FILE TYPE IN MOSS: #{type_data[1]}" - `sudo docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=0.8 -M RUN -r #{results_dir}/#{task_definition.id}-result` + logger.info "Starting JPLAG container to run on #{tasks_dir}" + root_dir = Rails.root.to_s + tasks_dir_split = tasks_dir.to_s.split(root_dir)[1] + + # Set the file language based on the type + # Currently only supporting C/C++/C#/Python + # MOSS and JPLAG use different names for some languages, need to be converted + # If new MOSS languages options are added to task-defintion-upload, this will need to be updated + file_lang = case type_data[1] + when 'cc' + 'cpp' + when 'python' + 'python3' + else + type_data[1] + end + + # Run JPLAG on the extracted files + `sudo docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=#{similarity_pct} -M RUN -r #{results_dir}/#{task_definition.id}-result` end # Delete the extracted code files from tmp tmp_dir = Rails.root.join("tmp", "jplag") - puts "Deleting files in: #{tmp_dir}" - puts "Files to delete: #{Dir.glob("#{tmp_dir}/*")}" + logger.info "Deleting files in: #{tmp_dir}" + logger.info "Files to delete: #{Dir.glob("#{tmp_dir}/*")}" FileUtils.rm_rf(Dir.glob("#{tmp_dir}/*")) self From 1caabe4d61d8ce146b0a98ef8b4e2306c29e06f7 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:55:47 +1000 Subject: [PATCH 10/15] refactor: rename moss language to jplag language --- app/api/entities/task_definition_entity.rb | 2 +- app/api/task_definitions_api.rb | 8 +- .../similarity/unit_similarity_module.rb | 106 +++++++----------- db/migrate/20240105055902_add_tii_details.rb | 4 +- db/schema.rb | 2 +- 5 files changed, 50 insertions(+), 72 deletions(-) diff --git a/app/api/entities/task_definition_entity.rb b/app/api/entities/task_definition_entity.rb index 94ba180d4..3b85dcc70 100644 --- a/app/api/entities/task_definition_entity.rb +++ b/app/api/entities/task_definition_entity.rb @@ -43,6 +43,6 @@ def staff?(my_role) expose :max_quality_pts expose :overseer_image_id, if: ->(unit, options) { staff?(options[:my_role]) } expose :assessment_enabled, if: ->(unit, options) { staff?(options[:my_role]) } - expose :moss_language, if: ->(unit, options) { staff?(options[:my_role]) } + expose :jplag_language, if: ->(unit, options) { staff?(options[:my_role]) } end end diff --git a/app/api/task_definitions_api.rb b/app/api/task_definitions_api.rb index 7e95bb8f8..cc307c3b8 100644 --- a/app/api/task_definitions_api.rb +++ b/app/api/task_definitions_api.rb @@ -32,7 +32,7 @@ class TaskDefinitionsApi < Grape::API requires :max_quality_pts, type: Integer, desc: 'A range for quality points when quality is assessed' optional :assessment_enabled, type: Boolean, desc: 'Enable or disable assessment' optional :overseer_image_id, type: Integer, desc: 'The id of the Docker image for overseer' - optional :moss_language, type: String, desc: 'The language to use for code similarity checks' + optional :jplag_language, type: String, desc: 'The language to use for code similarity checks' end end post '/units/:unit_id/task_definitions/' do @@ -61,7 +61,7 @@ class TaskDefinitionsApi < Grape::API :max_quality_pts, :assessment_enabled, :overseer_image_id, - :moss_language + :jplag_language ) task_params[:unit_id] = unit.id @@ -110,7 +110,7 @@ class TaskDefinitionsApi < Grape::API optional :max_quality_pts, type: Integer, desc: 'A range for quality points when quality is assessed' optional :assessment_enabled, type: Boolean, desc: 'Enable or disable assessment' optional :overseer_image_id, type: Integer, desc: 'The id of the Docker image name for overseer' - optional :moss_language, type: String, desc: 'The language to use for code similarity checks' + optional :jplag_language, type: String, desc: 'The language to use for code similarity checks' end end put '/units/:unit_id/task_definitions/:id' do @@ -138,7 +138,7 @@ class TaskDefinitionsApi < Grape::API :max_quality_pts, :assessment_enabled, :overseer_image_id, - :moss_language + :jplag_language ) task_params[:upload_requirements] = JSON.parse(params[:task_def][:upload_requirements]) unless params[:task_def][:upload_requirements].nil? diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index 9cfcc11d0..8c3be0dd6 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -34,10 +34,7 @@ def check_similarity(force: false) tasks_dir = root_work_dir.join(td.id.to_s) FileUtils.mkdir_p(tasks_dir) - next if td.moss_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? - - type_data = td.moss_language.split - next if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') + next if td.jplag_language.nil? || td.upload_requirements.nil? || td.upload_requirements.select { |upreq| upreq['type'] == 'code' && upreq['tii_check'] }.empty? # Is there anything to check? logger.debug "Checking plagiarism for #{td.name} (id=#{td.id})" @@ -60,47 +57,46 @@ def check_similarity(force: false) # There are new tasks, check these - logger.debug 'Contacting MOSS for new checks' # Create the MossRuby object - moss_key = Doubtfire::Application.secrets.secret_key_moss - raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? - - moss = MossRuby.new(moss_key) - - # Set options -- the options will already have these default values - moss.options[:max_matches] = 7 - moss.options[:directory_submission] = true - moss.options[:show_num_matches] = 500 - moss.options[:experimental_server] = false - moss.options[:comment] = '' - moss.options[:language] = type_data[1] - - tmp_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") - - begin - # Create a file hash, with the files to be processed - to_check = MossRuby.empty_file_hash - add_done_files_for_plagiarism_check_of(td, tmp_path, to_check, tasks_with_files) - - FileUtils.chdir(tmp_path) - - # Get server to process files - logger.debug 'Sending to MOSS...' - url = moss.check(to_check, ->(_) { print '.' }) - - logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}" - - td.plagiarism_report_url = url - td.plagiarism_updated = true - td.save - rescue StandardError => e - logger.error "Failed to check plagiarism for task #{td.name} (id=#{td.id}). Error: #{e.message}" - ensure - FileUtils.chdir(pwd) - FileUtils.rm_rf tmp_path - end + # moss_key = Doubtfire::Application.secrets.secret_key_moss + # raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? +# + # moss = MossRuby.new(moss_key) +# + # # Set options -- the options will already have these default values + # moss.options[:max_matches] = 7 + # moss.options[:directory_submission] = true + # moss.options[:show_num_matches] = 500 + # moss.options[:experimental_server] = false + # moss.options[:comment] = '' + # moss.options[:language] = type_data[1] +# + # tmp_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") +# + # begin + # # Create a file hash, with the files to be processed + # to_check = MossRuby.empty_file_hash + # add_done_files_for_plagiarism_check_of(td, tmp_path, to_check, tasks_with_files) +# + # FileUtils.chdir(tmp_path) +# + # # Get server to process files + # logger.debug 'Sending to MOSS...' + # url = moss.check(to_check, ->(_) { print '.' }) +# + # logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}" +# + # td.plagiarism_report_url = url + # td.plagiarism_updated = true + # td.save + # rescue StandardError => e + # logger.error "Failed to check plagiarism for task #{td.name} (id=#{td.id}). Error: #{e.message}" + # ensure + # FileUtils.chdir(pwd) + # FileUtils.rm_rf tmp_path + # end end self.last_plagarism_scan = Time.zone.now save! @@ -214,9 +210,6 @@ def create_plagiarism_link(task1, task2, match, warn_pct) # Returns an array of files # def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, to_check, tasks_with_files) - type_data = task_definition.moss_language.split - return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') - # get each code file for each task task_definition.upload_requirements.each_with_index do |upreq, idx| # only check code files marked for similarity checks @@ -238,18 +231,16 @@ def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, to_check, # JPLAG Function - extracts "done" files for each task and packages them into a directory for JPLAG to run on def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_code) - type_data = task_definition.moss_language.split - return if type_data.nil? || (type_data.length != 2) || (type_data[0] != 'moss') similarity_pct = task_definition.plagiarism_warn_pct return if similarity_pct.nil? # Check if the directory exists and create it if it doesn't results_dir = "/jplag/results/#{unit_code}" - `sudo docker exec jplag sh -c 'if [ ! -d "#{results_dir}" ]; then mkdir -p "#{results_dir}"; fi'` + `docker exec jplag sh -c 'if [ ! -d "#{results_dir}" ]; then mkdir -p "#{results_dir}"; fi'` # Remove existing result file if it exists result_file = "#{results_dir}/#{task_definition.id}-result.zip" - `sudo docker exec jplag sh -c 'if [ -f "#{result_file}" ]; then rm "#{result_file}"; fi'` + `docker exec jplag sh -c 'if [ -f "#{result_file}" ]; then rm "#{result_file}"; fi'` # get each code file for each task task_definition.upload_requirements.each_with_index do |upreq, idx| @@ -265,22 +256,10 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c logger.info "Starting JPLAG container to run on #{tasks_dir}" root_dir = Rails.root.to_s tasks_dir_split = tasks_dir.to_s.split(root_dir)[1] - - # Set the file language based on the type - # Currently only supporting C/C++/C#/Python - # MOSS and JPLAG use different names for some languages, need to be converted - # If new MOSS languages options are added to task-defintion-upload, this will need to be updated - file_lang = case type_data[1] - when 'cc' - 'cpp' - when 'python' - 'python3' - else - type_data[1] - end + file_lang = task_definition.jplag_language.to_s # Run JPLAG on the extracted files - `sudo docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=#{similarity_pct} -M RUN -r #{results_dir}/#{task_definition.id}-result` + `docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=#{similarity_pct} -M RUN -r #{results_dir}/#{task_definition.id}-result` end # Delete the extracted code files from tmp @@ -288,7 +267,6 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c logger.info "Deleting files in: #{tmp_dir}" logger.info "Files to delete: #{Dir.glob("#{tmp_dir}/*")}" FileUtils.rm_rf(Dir.glob("#{tmp_dir}/*")) - self end end diff --git a/db/migrate/20240105055902_add_tii_details.rb b/db/migrate/20240105055902_add_tii_details.rb index a571f80d1..745d30ce4 100644 --- a/db/migrate/20240105055902_add_tii_details.rb +++ b/db/migrate/20240105055902_add_tii_details.rb @@ -7,7 +7,7 @@ def change add_column :units, :tii_group_context_id, :string add_column :task_definitions, :tii_group_id, :string - add_column :task_definitions, :moss_language, :string + add_column :task_definitions, :jplag_language, :string rename_table :plagiarism_match_links, :task_similarities @@ -88,7 +88,7 @@ def change next unless plagiarism_checks.any? - task_definition.update(moss_language: plagiarism_checks.first['type']) + task_definition.update(jplag_language: plagiarism_checks.first['type']) task_definition.upload_requirements.each do |upload_requirement| next unless upload_requirement['type'] == 'code' upload_requirement['tii_check'] = true diff --git a/db/schema.rb b/db/schema.rb index 6daa71ebf..65679b636 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -249,7 +249,7 @@ t.boolean "assessment_enabled", default: false t.bigint "overseer_image_id" t.string "tii_group_id" - t.string "moss_language" + t.string "jplag_language" t.index ["group_set_id"], name: "index_task_definitions_on_group_set_id" t.index ["overseer_image_id"], name: "index_task_definitions_on_overseer_image_id" t.index ["tutorial_stream_id"], name: "index_task_definitions_on_tutorial_stream_id" From b95fa31e2a98aead5a0f180f9ee4fc292900ea7c Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:19:15 +1000 Subject: [PATCH 11/15] refactor: adjust similarity match code to be suited for jplag --- .../similarity/jplag_task_similarity.rb | 50 +++++ .../task_definition_similarity_module.rb | 6 +- .../similarity/unit_similarity_module.rb | 176 +++++++++++++----- 3 files changed, 179 insertions(+), 53 deletions(-) create mode 100644 app/models/similarity/jplag_task_similarity.rb diff --git a/app/models/similarity/jplag_task_similarity.rb b/app/models/similarity/jplag_task_similarity.rb new file mode 100644 index 000000000..20d5ca1b6 --- /dev/null +++ b/app/models/similarity/jplag_task_similarity.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +class JplagTaskSimilarity < TaskSimilarity + belongs_to :other_task, class_name: 'Task' + + def file_path + FileHelper.path_to_plagarism_html(self) + end + + # + # Ensure file is also deleted + # + before_destroy do |similarity| + if similarity.task.group_task? + other_tasks = similarity.task.group_submission.tasks.reject { |t| t.id == similarity.task.id } + + other_tasks_using_file = other_tasks.select { |t| t.task_similarities.where(other_task_id: similarity.other_task_id).count > 0 } + FileHelper.delete_plagarism_html(similarity) unless other_tasks_using_file.count > 0 + else # individual... so can delete file + FileHelper.delete_plagarism_html(similarity) + end + rescue StandardError => e + logger.error "Error deleting match link for task #{similarity.task.id}. Error: #{e.message}" + end + + after_destroy do |similarity| + similarity.other_similarity&.destroy + end + + def other_similarity + JplagTaskSimilarity.where(task_id: other_task.id, other_task_id: task.id).first unless other_task.nil? + end + + def other_student + other_task&.student + end + + def other_tutor + other_task&.project&.tutor_for(other_task.task_definition) + end + + def other_tutorial + tute = other_task.project.tutorial_for(other_task.task_definition) unless other_task.nil? + tute.nil? ? 'None' : tute.abbreviation + end + + def ready_for_viewer? + true + end +end diff --git a/app/models/similarity/task_definition_similarity_module.rb b/app/models/similarity/task_definition_similarity_module.rb index 5eb5ed802..491a69790 100644 --- a/app/models/similarity/task_definition_similarity_module.rb +++ b/app/models/similarity/task_definition_similarity_module.rb @@ -3,14 +3,14 @@ # Provides moss and tii similarity features in task definitions module TaskDefinitionSimilarityModule def moss_similarities? - MossTaskSimilarity.joins(:task).where('tasks.task_definition_id' => id).count > 0 + JplagTaskSimilarity.joins(:task).where('tasks.task_definition_id' => id).count > 0 end def clear_related_plagiarism # delete old plagiarism links logger.info "Deleting old links for task definition #{id} - #{abbreviation}" - MossTaskSimilarity.joins(:task).where('tasks.task_definition_id' => id).find_each do |plnk| - pair = MossTaskSimilarity.find_by(id: plnk.id) + JplagTaskSimilarity.joins(:task).where('tasks.task_definition_id' => id).find_each do |plnk| + pair = JplagTaskSimilarity.find_by(id: plnk.id) pair.destroy! if pair.present? end end diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index 8c3be0dd6..16d6bd71a 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -43,6 +43,8 @@ def check_similarity(force: false) # JPLAG run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) + report_path = "#{Doubtfire::Application.config.jplag_report_dir}/#{unit_code}/#{td.id}-result.zip" + create_jplag_plagiarism_link(report_path, td.plagiarism_warn_pct) # Skip if not due yet next if td.due_date > Time.zone.now @@ -62,9 +64,9 @@ def check_similarity(force: false) # Create the MossRuby object # moss_key = Doubtfire::Application.secrets.secret_key_moss # raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? -# + # # moss = MossRuby.new(moss_key) -# + # # # Set options -- the options will already have these default values # moss.options[:max_matches] = 7 # moss.options[:directory_submission] = true @@ -72,22 +74,22 @@ def check_similarity(force: false) # moss.options[:experimental_server] = false # moss.options[:comment] = '' # moss.options[:language] = type_data[1] -# + # # tmp_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") -# + # # begin # # Create a file hash, with the files to be processed # to_check = MossRuby.empty_file_hash # add_done_files_for_plagiarism_check_of(td, tmp_path, to_check, tasks_with_files) -# + # # FileUtils.chdir(tmp_path) -# + # # # Get server to process files # logger.debug 'Sending to MOSS...' # url = moss.check(to_check, ->(_) { print '.' }) -# + # # logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}" -# + # # td.plagiarism_report_url = url # td.plagiarism_updated = true # td.save @@ -162,48 +164,48 @@ def update_plagiarism_stats private - def create_plagiarism_link(task1, task2, match, warn_pct) - plk1 = MossTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first - plk2 = MossTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first - - if plk1.nil? || plk2.nil? - # Delete old links between tasks - plk1&.destroy ## will delete its pair - plk2&.destroy - - plk1 = MossTaskSimilarity.create do |plm| - plm.task = task1 - plm.other_task = task2 - plm.pct = match[0][:pct] - plm.flagged = plm.pct >= warn_pct - end - - plk2 = MossTaskSimilarity.create do |plm| - plm.task = task2 - plm.other_task = task1 - plm.pct = match[1][:pct] - plm.flagged = plm.pct >= warn_pct - end - else - # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" - - # Flag is larger than warn pct and larger than previous pct - plk1.flagged = match[0][:pct] >= warn_pct && match[0][:pct] >= plk1.pct - plk2.flagged = match[1][:pct] >= warn_pct && match[1][:pct] >= plk2.pct - - plk1.pct = match[0][:pct] - plk2.pct = match[1][:pct] - end - - plk1.plagiarism_report_url = match[0][:url] - plk2.plagiarism_report_url = match[1][:url] - - plk1.save! - plk2.save! - - FileHelper.save_plagiarism_html(plk1, match[0][:html]) - FileHelper.save_plagiarism_html(plk2, match[1][:html]) - end + # def create_plagiarism_link(task1, task2, match, warn_pct) + # plk1 = MossTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first + # plk2 = MossTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first + # + # if plk1.nil? || plk2.nil? + # # Delete old links between tasks + # plk1&.destroy ## will delete its pair + # plk2&.destroy + # + # plk1 = MossTaskSimilarity.create do |plm| + # plm.task = task1 + # plm.other_task = task2 + # plm.pct = match[0][:pct] + # plm.flagged = plm.pct >= warn_pct + # end + # + # plk2 = MossTaskSimilarity.create do |plm| + # plm.task = task2 + # plm.other_task = task1 + # plm.pct = match[1][:pct] + # plm.flagged = plm.pct >= warn_pct + # end + # else + # # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" + # + # # Flag is larger than warn pct and larger than previous pct + # plk1.flagged = match[0][:pct] >= warn_pct && match[0][:pct] >= plk1.pct + # plk2.flagged = match[1][:pct] >= warn_pct && match[1][:pct] >= plk2.pct + # + # plk1.pct = match[0][:pct] + # plk2.pct = match[1][:pct] + # end + # + # plk1.plagiarism_report_url = match[0][:url] + # plk2.plagiarism_report_url = match[1][:url] + # + # plk1.save! + # plk2.save! + # + # FileHelper.save_plagiarism_html(plk1, match[0][:html]) + # FileHelper.save_plagiarism_html(plk2, match[1][:html]) + # end # # Extract all done files related to a task definition matching a pattern into a given directory. @@ -269,4 +271,78 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c FileUtils.rm_rf(Dir.glob("#{tmp_dir}/*")) self end + + def create_jplag_plagiarism_link(path, warn_pct) + # Extract overview json from report zip + Zip::File.open(path) do |zip_file| + overview_entry = zip_file.find_entry('overview.json') + + if overview_entry + # Read the contents of overview.json + overview_content = overview_entry.get_input_stream.read + + # Parse the JSON into a Ruby hash + overview_data = JSON.parse(overview_content) + + # Iterate over the "top_comparisons" array and collect the required fields + top_comparisons = overview_data['top_comparisons'].map do |comparison| + { + first_submission: comparison['first_submission'], + second_submission: comparison['second_submission'], + max_similarity: comparison['similarities']['MAX'] + } + end + task_path = overview_data['submission_folder_path'] + + # Save the results to the database + top_comparisons.each do |comparison| + + # TODO: Figure out why the task id is even being used. + task_id = task_path.split('/')[-1] + puts "Task ID: #{task_id}" + first_submission = Task.find(task_id) + second_submission = Task.find(task_id) + + if first_submission.nil? || second_submission.nil? + logger.error "Could not find tasks #{comparison[:first_submission]} or #{comparison[:second_submission]} for plagiarism stats check!" + next + end + + # Create a new plagiarism link between the two tasks + plk1 = MossTaskSimilarity.where(task_id: first_submission.id, other_task_id: second_submission.id).first + plk2 = MossTaskSimilarity.where(task_id: second_submission.id, other_task_id: first_submission.id).first + if plk1.nil? || plk2.nil? + # Delete old links between tasks + plk1&.destroy ## will delete its pair + plk2&.destroy + plk1 = JplagTaskSimilarity.create do |plm| + plm.task = first_submission + plm.other_task = second_submission + plm.pct = comparison[:max_similarity] + plm.flagged = plm.pct >= warn_pct + end + plk2 = JplagTaskSimilarity.create do |plm| + plm.task = second_submission + plm.other_task = first_submission + plm.pct = comparison[:max_similarity] + plm.flagged = plm.pct >= warn_pct + end + else + # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" + # Flag is larger than warn pct and larger than previous pct + first_submission.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= first_submission.pct + second_submission.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= second_submission.pct + first_submission.pct = comparison[:max_similarity] + second_submission.pct = comparison[:max_similarity] + end + plk1.save! + plk2.save! + end + else + puts 'overview.json not found in the zip file' + end + + self + end + end end From 2a237597b29b49a833fd55e6bcc4a2e9a604b8d0 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Thu, 3 Oct 2024 23:23:21 +1000 Subject: [PATCH 12/15] refactor: add plagiarism link from jplag reports --- app/models/similarity/moss_task_similarity.rb | 50 ------------------- .../similarity/unit_similarity_module.rb | 36 +++++++------ 2 files changed, 21 insertions(+), 65 deletions(-) delete mode 100644 app/models/similarity/moss_task_similarity.rb diff --git a/app/models/similarity/moss_task_similarity.rb b/app/models/similarity/moss_task_similarity.rb deleted file mode 100644 index bbe066a01..000000000 --- a/app/models/similarity/moss_task_similarity.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -class MossTaskSimilarity < TaskSimilarity - belongs_to :other_task, class_name: 'Task' - - def file_path - FileHelper.path_to_plagarism_html(self) - end - - # - # Ensure file is also deleted - # - before_destroy do |similarity| - if similarity.task.group_task? - other_tasks = similarity.task.group_submission.tasks.reject { |t| t.id == similarity.task.id } - - other_tasks_using_file = other_tasks.select { |t| t.task_similarities.where(other_task_id: similarity.other_task_id).count > 0 } - FileHelper.delete_plagarism_html(similarity) unless other_tasks_using_file.count > 0 - else # individual... so can delete file - FileHelper.delete_plagarism_html(similarity) - end - rescue StandardError => e - logger.error "Error deleting match link for task #{similarity.task.id}. Error: #{e.message}" - end - - after_destroy do |similarity| - similarity.other_similarity&.destroy - end - - def other_similarity - MossTaskSimilarity.where(task_id: other_task.id, other_task_id: task.id).first unless other_task.nil? - end - - def other_student - other_task&.student - end - - def other_tutor - other_task&.project&.tutor_for(other_task.task_definition) - end - - def other_tutorial - tute = other_task.project.tutorial_for(other_task.task_definition) unless other_task.nil? - tute.nil? ? 'None' : tute.abbreviation - end - - def ready_for_viewer? - true - end -end diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index 16d6bd71a..eabffdc0b 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -44,7 +44,9 @@ def check_similarity(force: false) # JPLAG run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) report_path = "#{Doubtfire::Application.config.jplag_report_dir}/#{unit_code}/#{td.id}-result.zip" - create_jplag_plagiarism_link(report_path, td.plagiarism_warn_pct) + warn_pct = td.plagiarism_warn_pct || 50 + puts "Warn PCT: #{warn_pct}" + create_jplag_plagiarism_link(report_path, warn_pct) # Skip if not due yet next if td.due_date > Time.zone.now @@ -289,19 +291,23 @@ def create_jplag_plagiarism_link(path, warn_pct) { first_submission: comparison['first_submission'], second_submission: comparison['second_submission'], - max_similarity: comparison['similarities']['MAX'] + max_similarity: comparison['similarities']['MAX'] * 100 } end - task_path = overview_data['submission_folder_path'] # Save the results to the database top_comparisons.each do |comparison| - - # TODO: Figure out why the task id is even being used. - task_id = task_path.split('/')[-1] - puts "Task ID: #{task_id}" - first_submission = Task.find(task_id) - second_submission = Task.find(task_id) + task1_id = nil + task2_id = nil + zip_file.each do |entry| + if entry.name =~ %r{\Afiles/#{comparison[:first_submission]}/} + task1_id = entry.name.split('/')[2].to_i + elsif entry.name =~ %r{\Afiles/#{comparison[:second_submission]}/} + task2_id = entry.name.split('/')[2].to_i + end + end + first_submission = Task.find(task1_id) if task1_id + second_submission = Task.find(task2_id) if task2_id if first_submission.nil? || second_submission.nil? logger.error "Could not find tasks #{comparison[:first_submission]} or #{comparison[:second_submission]} for plagiarism stats check!" @@ -309,8 +315,8 @@ def create_jplag_plagiarism_link(path, warn_pct) end # Create a new plagiarism link between the two tasks - plk1 = MossTaskSimilarity.where(task_id: first_submission.id, other_task_id: second_submission.id).first - plk2 = MossTaskSimilarity.where(task_id: second_submission.id, other_task_id: first_submission.id).first + plk1 = JplagTaskSimilarity.where(task_id: task1_id, other_task_id: task2_id).first + plk2 = JplagTaskSimilarity.where(task_id: task2_id, other_task_id: task1_id).first if plk1.nil? || plk2.nil? # Delete old links between tasks plk1&.destroy ## will delete its pair @@ -330,10 +336,10 @@ def create_jplag_plagiarism_link(path, warn_pct) else # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" # Flag is larger than warn pct and larger than previous pct - first_submission.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= first_submission.pct - second_submission.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= second_submission.pct - first_submission.pct = comparison[:max_similarity] - second_submission.pct = comparison[:max_similarity] + plk1.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= plk1.pct + plk2.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= plk2.pct + plk1.pct = comparison[:max_similarity] + plk2.pct = comparison[:max_similarity] end plk1.save! plk2.save! From f9b0ae0b8e5c54f38ebec719b90980a57548728b Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:18:39 +1100 Subject: [PATCH 13/15] refactor: add jplag plagiarism link support for group tasks --- .../entities/task_similarity_entity.rb | 21 +- .../similarity/unit_similarity_module.rb | 249 +++++------------- app/models/task.rb | 2 +- 3 files changed, 64 insertions(+), 208 deletions(-) diff --git a/app/api/similarity/entities/task_similarity_entity.rb b/app/api/similarity/entities/task_similarity_entity.rb index 001f07594..9361246fd 100644 --- a/app/api/similarity/entities/task_similarity_entity.rb +++ b/app/api/similarity/entities/task_similarity_entity.rb @@ -1,9 +1,6 @@ module Similarity module Entities class TaskSimilarityEntity < Grape::Entity - def staff?(my_role) - Role.teaching_staff_ids.include?(my_role.id) unless my_role.nil? - end expose :id expose :type @@ -13,7 +10,7 @@ def staff?(my_role) similarity.ready_for_viewer? end - expose :parts do |similarity, options| + expose :parts do |similarity| path = similarity.file_path has_resource = path.present? && File.exist?(path) @@ -21,24 +18,12 @@ def staff?(my_role) { idx: 0, format: if has_resource - similarity.type == 'MossTaskSimilarity' ? 'html' : 'pdf' + similarity.type == 'JplagTaskSimilarity' ? 'html' : 'pdf' end, - description: "#{similarity.student.name} (#{similarity.student.username}) - #{similarity.pct}%" + description: "#{similarity.other_student.name} (#{similarity.other_student.username}) - #{similarity.pct}% similarity" } ] - # For moss similarity, show staff other student details - if similarity.type == 'MossTaskSimilarity' && staff?(options[:my_role]) - other_path = similarity.other_similarity&.file_path - has_other_resource = other_path.present? && File.exist?(other_path) - - result << { - idx: 1, - format: has_other_resource ? 'html' : nil, - description: "Match: #{similarity.other_student&.name} (#{similarity.other_student&.username}) - #{similarity.other_similarity&.pct}" - } - end - result end end diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index eabffdc0b..bac674464 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -41,15 +41,15 @@ def check_similarity(force: false) tasks = tasks_for_definition(td) tasks_with_files = tasks.select(&:has_pdf) - # JPLAG run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) report_path = "#{Doubtfire::Application.config.jplag_report_dir}/#{unit_code}/#{td.id}-result.zip" warn_pct = td.plagiarism_warn_pct || 50 puts "Warn PCT: #{warn_pct}" - create_jplag_plagiarism_link(report_path, warn_pct) + process_jplag_plagiarism_report(report_path, warn_pct, td.group_set) # Skip if not due yet - next if td.due_date > Time.zone.now + # TODO: Re-enable this after testing + # next if td.due_date > Time.zone.now # Skip if no files changed next unless tasks_with_files.count > 1 && @@ -59,48 +59,7 @@ def check_similarity(force: false) force ) - # There are new tasks, check these - - logger.debug 'Contacting MOSS for new checks' - - # Create the MossRuby object - # moss_key = Doubtfire::Application.secrets.secret_key_moss - # raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? - # - # moss = MossRuby.new(moss_key) - # - # # Set options -- the options will already have these default values - # moss.options[:max_matches] = 7 - # moss.options[:directory_submission] = true - # moss.options[:show_num_matches] = 500 - # moss.options[:experimental_server] = false - # moss.options[:comment] = '' - # moss.options[:language] = type_data[1] - # - # tmp_path = File.join(Dir.tmpdir, 'doubtfire', "check-#{id}-#{td.id}") - # - # begin - # # Create a file hash, with the files to be processed - # to_check = MossRuby.empty_file_hash - # add_done_files_for_plagiarism_check_of(td, tmp_path, to_check, tasks_with_files) - # - # FileUtils.chdir(tmp_path) - # - # # Get server to process files - # logger.debug 'Sending to MOSS...' - # url = moss.check(to_check, ->(_) { print '.' }) - # - # logger.info "MOSS check for #{code} #{td.abbreviation} url: #{url}" - # - # td.plagiarism_report_url = url - # td.plagiarism_updated = true - # td.save - # rescue StandardError => e - # logger.error "Failed to check plagiarism for task #{td.name} (id=#{td.id}). Error: #{e.message}" - # ensure - # FileUtils.chdir(pwd) - # FileUtils.rm_rf tmp_path - # end + # There are new tasks, check these with JPLAG end self.last_plagarism_scan = Time.zone.now save! @@ -111,128 +70,26 @@ def check_similarity(force: false) self end - def update_plagiarism_stats - moss_key = Doubtfire::Application.secrets.secret_key_moss - raise "No moss key set. Check ENV['DF_SECRET_KEY_MOSS'] first." if moss_key.nil? - - moss = MossRuby.new(moss_key) - - task_definitions.where(plagiarism_updated: true).find_each do |td| - td.plagiarism_updated = false - td.save - - # Get results - url = td.plagiarism_report_url - logger.debug "Processing MOSS results #{url}" - - warn_pct = td.plagiarism_warn_pct || 50 - - results = moss.extract_results(url, warn_pct, ->(line) { puts line }) - - # Use results - results.each do |match| - task_id1 = %r{.*/(\d+)/$}.match(match[0][:filename])[1] - task_id2 = %r{.*/(\d+)/$}.match(match[1][:filename])[1] - - t1 = Task.find(task_id1) - t2 = Task.find(task_id2) - - if t1.nil? || t2.nil? - logger.error "Could not find tasks #{task_id1} or #{task_id2} for plagiarism stats check!" - next - end - - if td.group_set # its a group task - g1_tasks = t1.group_submission.tasks - g2_tasks = t2.group_submission.tasks - - g1_tasks.each do |gt1| - g2_tasks.each do |gt2| - create_plagiarism_link(gt1, gt2, match, warn_pct) - end - end - - else # just link the individuals... - create_plagiarism_link(t1, t2, match, warn_pct) - end - end - end - - self.last_plagarism_scan = Time.zone.now - save! - - self - end - private - # def create_plagiarism_link(task1, task2, match, warn_pct) - # plk1 = MossTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first - # plk2 = MossTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first - # - # if plk1.nil? || plk2.nil? - # # Delete old links between tasks - # plk1&.destroy ## will delete its pair - # plk2&.destroy - # - # plk1 = MossTaskSimilarity.create do |plm| - # plm.task = task1 - # plm.other_task = task2 - # plm.pct = match[0][:pct] - # plm.flagged = plm.pct >= warn_pct - # end - # - # plk2 = MossTaskSimilarity.create do |plm| - # plm.task = task2 - # plm.other_task = task1 - # plm.pct = match[1][:pct] - # plm.flagged = plm.pct >= warn_pct + # Extract all done files related to a task definition matching a pattern into a given directory. + # Returns an array of files + # def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, tasks_with_files) + # # get each code file for each task + # task_definition.upload_requirements.each_with_index do |upreq, idx| + # # only check code files marked for similarity checks + # next unless upreq['type'] == 'code' && upreq['tii_check'] +# + # pattern = task_definition.glob_for_upload_requirement(idx) +# + # tasks_with_files.each do |t| + # t.extract_file_from_done(tmp_path, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) # end - # else - # # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" - # - # # Flag is larger than warn pct and larger than previous pct - # plk1.flagged = match[0][:pct] >= warn_pct && match[0][:pct] >= plk1.pct - # plk2.flagged = match[1][:pct] >= warn_pct && match[1][:pct] >= plk2.pct - # - # plk1.pct = match[0][:pct] - # plk2.pct = match[1][:pct] # end - # - # plk1.plagiarism_report_url = match[0][:url] - # plk2.plagiarism_report_url = match[1][:url] - # - # plk1.save! - # plk2.save! - # - # FileHelper.save_plagiarism_html(plk1, match[0][:html]) - # FileHelper.save_plagiarism_html(plk2, match[1][:html]) +# + # self # end - # - # Extract all done files related to a task definition matching a pattern into a given directory. - # Returns an array of files - # - def add_done_files_for_plagiarism_check_of(task_definition, tmp_path, to_check, tasks_with_files) - # get each code file for each task - task_definition.upload_requirements.each_with_index do |upreq, idx| - # only check code files marked for similarity checks - next unless upreq['type'] == 'code' && upreq['tii_check'] - - pattern = task_definition.glob_for_upload_requirement(idx) - - tasks_with_files.each do |t| - t.extract_file_from_done(tmp_path, pattern, ->(_task, to_path, name) { File.join(to_path.to_s, t.student.username.to_s, name.to_s) }) - end - - # extract files matching each pattern - # -- each pattern - MossRuby.add_file(to_check, "**/#{pattern}") - end - - self - end - # JPLAG Function - extracts "done" files for each task and packages them into a directory for JPLAG to run on def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_code) similarity_pct = task_definition.plagiarism_warn_pct @@ -274,7 +131,7 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c self end - def create_jplag_plagiarism_link(path, warn_pct) + def process_jplag_plagiarism_report(path, warn_pct, is_group) # Extract overview json from report zip Zip::File.open(path) do |zip_file| overview_entry = zip_file.find_entry('overview.json') @@ -314,35 +171,18 @@ def create_jplag_plagiarism_link(path, warn_pct) next end - # Create a new plagiarism link between the two tasks - plk1 = JplagTaskSimilarity.where(task_id: task1_id, other_task_id: task2_id).first - plk2 = JplagTaskSimilarity.where(task_id: task2_id, other_task_id: task1_id).first - if plk1.nil? || plk2.nil? - # Delete old links between tasks - plk1&.destroy ## will delete its pair - plk2&.destroy - plk1 = JplagTaskSimilarity.create do |plm| - plm.task = first_submission - plm.other_task = second_submission - plm.pct = comparison[:max_similarity] - plm.flagged = plm.pct >= warn_pct + if is_group # its a group task + g1_tasks = first_submission.group_submission.tasks + g2_tasks = second_submission.group_submission.tasks + g1_tasks.each do |gt1| + g2_tasks.each do |gt2| + next if gt1.student == gt2.student + create_plagiarism_link(gt1, gt2, warn_pct, comparison[:max_similarity]) + end end - plk2 = JplagTaskSimilarity.create do |plm| - plm.task = second_submission - plm.other_task = first_submission - plm.pct = comparison[:max_similarity] - plm.flagged = plm.pct >= warn_pct - end - else - # puts "#{plk1.pct} != #{match[0][:pct]}, #{plk1.pct != match[0][:pct]}" - # Flag is larger than warn pct and larger than previous pct - plk1.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= plk1.pct - plk2.flagged = comparison[:max_similarity] >= warn_pct && comparison[:max_similarity] >= plk2.pct - plk1.pct = comparison[:max_similarity] - plk2.pct = comparison[:max_similarity] + else # just link the individuals... + create_plagiarism_link(first_submission, second_submission, warn_pct, comparison[:max_similarity]) end - plk1.save! - plk2.save! end else puts 'overview.json not found in the zip file' @@ -351,4 +191,35 @@ def create_jplag_plagiarism_link(path, warn_pct) self end end + + def create_plagiarism_link(task1, task2, warn_pct, max_similarity) + # Create a new plagiarism link between the two tasks + plk1 = JplagTaskSimilarity.where(task_id: task1.id, other_task_id: task2.id).first + plk2 = JplagTaskSimilarity.where(task_id: task2.id, other_task_id: task1.id).first + if plk1.nil? || plk2.nil? + # Delete old links between tasks + plk1&.destroy ## will delete its pair + plk2&.destroy + plk1 = JplagTaskSimilarity.create do |plm| + plm.task = task1 + plm.other_task = task2 + plm.pct = max_similarity + plm.flagged = plm.pct >= warn_pct + end + plk2 = JplagTaskSimilarity.create do |plm| + plm.task = task2 + plm.other_task = task1 + plm.pct = max_similarity + plm.flagged = plm.pct >= warn_pct + end + else + # Flag is larger than warn pct and larger than previous pct + plk1.flagged = max_similarity >= warn_pct && max_similarity >= plk1.pct + plk2.flagged = max_similarity >= warn_pct && max_similarity >= plk2.pct + plk1.pct = max_similarity + plk2.pct = max_similarity + end + plk1.save! + plk2.save! + end end diff --git a/app/models/task.rb b/app/models/task.rb index e75815f90..1fe630f9a 100644 --- a/app/models/task.rb +++ b/app/models/task.rb @@ -116,7 +116,7 @@ def specific_permission_hash(role, perm_hash, _other) has_many :comments, class_name: 'TaskComment', dependent: :destroy, inverse_of: :task has_many :task_similarities, class_name: 'TaskSimilarity', dependent: :destroy, inverse_of: :task - has_many :reverse_task_similarities, class_name: 'MossTaskSimilarity', dependent: :destroy, inverse_of: :other_task, foreign_key: 'other_task_id' + has_many :reverse_task_similarities, class_name: 'JplagTaskSimilarity', dependent: :destroy, inverse_of: :other_task, foreign_key: 'other_task_id' has_many :learning_outcome_task_links, dependent: :destroy # links to learning outcomes has_many :learning_outcomes, through: :learning_outcome_task_links has_many :task_engagements, dependent: :destroy From dab43cb9513872fcb9d12fe4c57b2241885a0654 Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Sat, 12 Oct 2024 23:03:39 +1100 Subject: [PATCH 14/15] refactor: add hasJjplagReport value to task defintion and use for report download endpoint --- app/api/entities/task_definition_entity.rb | 1 + app/api/task_definitions_api.rb | 15 +++++++------ app/helpers/file_helper.rb | 1 + .../similarity/unit_similarity_module.rb | 6 +++--- app/models/task_definition.rb | 21 +++++++++++++++++++ 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/app/api/entities/task_definition_entity.rb b/app/api/entities/task_definition_entity.rb index 3b85dcc70..edf380b78 100644 --- a/app/api/entities/task_definition_entity.rb +++ b/app/api/entities/task_definition_entity.rb @@ -39,6 +39,7 @@ def staff?(my_role) expose :has_task_sheet?, as: :has_task_sheet expose :has_task_resources?, as: :has_task_resources expose :has_task_assessment_resources?, as: :has_task_assessment_resources, if: ->(unit, options) { staff?(options[:my_role]) } + expose :has_jplag_report?, as: :has_jplag_report, if: ->(unit, options) { staff?(options[:my_role]) } expose :is_graded expose :max_quality_pts expose :overseer_image_id, if: ->(unit, options) { staff?(options[:my_role]) } diff --git a/app/api/task_definitions_api.rb b/app/api/task_definitions_api.rb index cc307c3b8..ea72ced0c 100644 --- a/app/api/task_definitions_api.rb +++ b/app/api/task_definitions_api.rb @@ -628,19 +628,18 @@ class TaskDefinitionsApi < Grape::API error!({ error: 'Not authorised to download JPLAG reports of unit' }, 403) end - file_loc = FileHelper.task_jplag_report_path(unit, task_def) - logger.debug "JPLAG report file location: #{file_loc}" - - if file_loc.nil? || !File.exist?(file_loc) - file_loc = Rails.root.join('public', 'resources', 'FileNotFound.pdf') - header['Content-Disposition'] = 'attachment; filename=FileNotFound.pdf' - else + if task_def.has_jplag_report? + path = FileHelper.task_jplag_report_path(unit, task_def) header['Content-Disposition'] = "attachment; filename=#{task_def.abbreviation}-jplag-report.zip" + else + path = Rails.root.join('public', 'resources', 'FileNotFound.pdf') + content_type 'application/pdf' + header['Content-Disposition'] = 'attachment; filename=FileNotFound.pdf' end header['Access-Control-Expose-Headers'] = 'Content-Disposition' content_type 'application/octet-stream' - stream_file file_loc + stream_file path end end diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 746075f2f..80667ce48 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -252,6 +252,7 @@ def student_portfolio_path(unit, username, create = true) def task_jplag_report_dir(unit) file_server = Doubtfire::Application.config.jplag_report_dir dst = "#{file_server}/#{unit.code}-#{unit.id}/" # trust the server config and passed in type for paths + dst << sanitized_path("#{unit.code}-#{unit.id}") << '/' dst end diff --git a/app/models/similarity/unit_similarity_module.rb b/app/models/similarity/unit_similarity_module.rb index bac674464..302b6c3c6 100644 --- a/app/models/similarity/unit_similarity_module.rb +++ b/app/models/similarity/unit_similarity_module.rb @@ -42,7 +42,7 @@ def check_similarity(force: false) tasks_with_files = tasks.select(&:has_pdf) run_jplag_on_done_files(td, tasks_dir, tasks_with_files, unit_code) - report_path = "#{Doubtfire::Application.config.jplag_report_dir}/#{unit_code}/#{td.id}-result.zip" + report_path = "#{Doubtfire::Application.config.jplag_report_dir}/#{unit_code}/#{td.abbreviation}-result.zip" warn_pct = td.plagiarism_warn_pct || 50 puts "Warn PCT: #{warn_pct}" process_jplag_plagiarism_report(report_path, warn_pct, td.group_set) @@ -100,7 +100,7 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c `docker exec jplag sh -c 'if [ ! -d "#{results_dir}" ]; then mkdir -p "#{results_dir}"; fi'` # Remove existing result file if it exists - result_file = "#{results_dir}/#{task_definition.id}-result.zip" + result_file = "#{results_dir}/#{task_definition.abbreviation}-result.zip" `docker exec jplag sh -c 'if [ -f "#{result_file}" ]; then rm "#{result_file}"; fi'` # get each code file for each task @@ -120,7 +120,7 @@ def run_jplag_on_done_files(task_definition, tasks_dir, tasks_with_files, unit_c file_lang = task_definition.jplag_language.to_s # Run JPLAG on the extracted files - `docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=#{similarity_pct} -M RUN -r #{results_dir}/#{task_definition.id}-result` + `docker exec jplag java -jar /jplag/myJplag.jar #{tasks_dir_split} -l #{file_lang} --similarity-threshold=#{similarity_pct} -M RUN -r #{results_dir}/#{task_definition.abbreviation}-result` end # Delete the extracted code files from tmp diff --git a/app/models/task_definition.rb b/app/models/task_definition.rb index 7e78bd2a7..18fded61d 100644 --- a/app/models/task_definition.rb +++ b/app/models/task_definition.rb @@ -384,6 +384,10 @@ def has_task_sheet? File.exist? task_sheet end + def has_jplag_report? + File.exist? jplag_report + end + def is_graded? is_graded end @@ -449,6 +453,10 @@ def task_assessment_resources task_assessment_resources_with_abbreviation(abbreviation) end + def jplag_report + task_jplag_report_with_abbreviation(abbreviation) + end + def related_tasks_with_files(consolidate_groups = true) tasks_with_files = tasks.select(&:has_pdf) @@ -537,4 +545,17 @@ def task_assessment_resources_with_abbreviation(abbr) result_with_sanitised_file end end + + def task_jplag_report_with_abbreviation(abbr) + task_path = FileHelper.task_jplag_report_dir unit + + result_with_sanitised_path = "#{task_path}#{FileHelper.sanitized_path(abbr)}-result.zip" + result_with_sanitised_file = "#{task_path}#{FileHelper.sanitized_filename(abbr)}-result.zip" + + if File.exist? result_with_sanitised_path + result_with_sanitised_path + else + result_with_sanitised_file + end + end end From a45eb6baf721b8d2fb95763423425741fc17f91c Mon Sep 17 00:00:00 2001 From: JackSCarroll <64730336+JackSCarroll@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:01:49 +1100 Subject: [PATCH 15/15] fix: fix report download --- app/api/task_definitions_api.rb | 25 +++++++++++++++++++------ app/helpers/file_helper.rb | 3 +-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/app/api/task_definitions_api.rb b/app/api/task_definitions_api.rb index ea72ced0c..6528489cf 100644 --- a/app/api/task_definitions_api.rb +++ b/app/api/task_definitions_api.rb @@ -32,7 +32,7 @@ class TaskDefinitionsApi < Grape::API requires :max_quality_pts, type: Integer, desc: 'A range for quality points when quality is assessed' optional :assessment_enabled, type: Boolean, desc: 'Enable or disable assessment' optional :overseer_image_id, type: Integer, desc: 'The id of the Docker image for overseer' - optional :jplag_language, type: String, desc: 'The language to use for code similarity checks' + optional :jplag_language, type: String, desc: 'The language to use for code similarity checks' end end post '/units/:unit_id/task_definitions/' do @@ -110,7 +110,7 @@ class TaskDefinitionsApi < Grape::API optional :max_quality_pts, type: Integer, desc: 'A range for quality points when quality is assessed' optional :assessment_enabled, type: Boolean, desc: 'Enable or disable assessment' optional :overseer_image_id, type: Integer, desc: 'The id of the Docker image name for overseer' - optional :jplag_language, type: String, desc: 'The language to use for code similarity checks' + optional :jplag_language, type: String, desc: 'The language to use for code similarity checks' end end put '/units/:unit_id/task_definitions/:id' do @@ -623,11 +623,10 @@ class TaskDefinitionsApi < Grape::API get '/units/:unit_id/task_definitions/:task_def_id/jplag_report' do unit = Unit.find(params[:unit_id]) task_def = unit.task_definitions.find(params[:task_def_id]) - unless authorise? current_user, unit, :download_jplag_report error!({ error: 'Not authorised to download JPLAG reports of unit' }, 403) end - + logger.debug "This is the has_jplag_report? #{task_def.has_jplag_report?}" if task_def.has_jplag_report? path = FileHelper.task_jplag_report_path(unit, task_def) header['Content-Disposition'] = "attachment; filename=#{task_def.abbreviation}-jplag-report.zip" @@ -637,9 +636,23 @@ class TaskDefinitionsApi < Grape::API header['Content-Disposition'] = 'attachment; filename=FileNotFound.pdf' end header['Access-Control-Expose-Headers'] = 'Content-Disposition' - content_type 'application/octet-stream' - stream_file path end + + desc 'Get hasJplagReport boolean for a given task' + params do + requires :unit_id, type: Integer, desc: 'The unit to get JPLAG report for' + requires :task_def_id, type: Integer, desc: 'The task definition to get the JPLAG report of' + end + get '/units/:unit_id/task_definitions/:task_def_id/has_jplag_report' do + unit = Unit.find(params[:unit_id]) + task_def = unit.task_definitions.find(params[:task_def_id]) + + unless authorise? current_user, unit, :download_jplag_report + error!({ error: 'Not authorised to download JPLAG reports of unit' }, 403) + end + + task_def.has_jplag_report? + end end diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb index 80667ce48..b12f1bf4c 100644 --- a/app/helpers/file_helper.rb +++ b/app/helpers/file_helper.rb @@ -252,12 +252,11 @@ def student_portfolio_path(unit, username, create = true) def task_jplag_report_dir(unit) file_server = Doubtfire::Application.config.jplag_report_dir dst = "#{file_server}/#{unit.code}-#{unit.id}/" # trust the server config and passed in type for paths - dst << sanitized_path("#{unit.code}-#{unit.id}") << '/' dst end def task_jplag_report_path(unit, task) - File.join(task_jplag_report_dir(unit), FileHelper.sanitized_filename("#{task.id}-result.zip")) + File.join(task_jplag_report_dir(unit), FileHelper.sanitized_filename("#{task.abbreviation}-result.zip")) end def comment_attachment_path(task_comment, attachment_extension)