fix: ensure ipynb handles markdown, raw, and long output

macite · May 26, 2024 · 955ca0b · 955ca0b
1 parent 5cdff98
commit 955ca0b
Show file tree

Hide file tree

Showing 5 changed files with 100,202 additions and 14 deletions.
diff --git a/app/views/layouts/jupynotex.py b/app/views/layouts/jupynotex.py
@@ -31,6 +31,7 @@
     None: (VERBATIM_BEGIN, VERBATIM_END),
 }
 
+# the different formats to be used when error or all ok
 FORMAT_ERROR = r"enhanced,breakable=unlimited,colback=red!5!white,colframe=red!75!"
 FORMAT_OK = (
     r"enhanced,breakable=unlimited,coltitle=red!75!black, colbacktitle=black!10!white, "
@@ -118,7 +119,9 @@ def _proc_src(self, content):
             result.extend(line.replace('```markdown', '```md').strip() for line in source)
             result.extend(MARKDOWN_END)
         elif content['cell_type'] == 'raw':
-            result.extend(_verbatimize(source))
+            result.extend(VERBATIM_BEGIN)
+            result.extend(textwrap.fill(line[:1000] + ' [The rest of this line has been truncated by the system to improve readability.] ' * (len(line) > 1000), width=90, subsequent_indent='    ') for line in source)
+            result.extend(VERBATIM_END)
         else:
             raise ValueError(
                 "Cell type not supported when processing source: {!r}".format(
@@ -144,9 +147,9 @@ def _proc_out(self, content):
                 else:
                     result.extend(_verbatimize(data["text/plain"]))
             elif output_type == 'stream':
-                result.extend(_verbatimize(x.rstrip() for x in item["text"]))
-            elif output_type == 'display_data':
-                result.append(_include_image_content(item['data']))
+                more_content = processor.process_plain_text(item["text"])
+                if len(more_content) > 120:
+                    more_content = more_content[:100] + ["..."] + more_content[-20:]
             elif output_type == 'error':
                 raw_traceback = item['traceback']
                 tback_lines = []
@@ -174,7 +177,6 @@ def get(self, cell_idx):
         output = self._proc_out(content)
         return source, output, content['cell_type'] == 'markdown'
 
-
 def _parse_cells(spec, maxlen):
     """Convert the cells spec to a range of ints."""
     if not spec:
@@ -224,14 +226,16 @@ def main(notebook_path, cells_spec):
             continue
 
         if not md:
-            print(r"\begin{{tcolorbox}}[{}, title=Cell {{{:02d}}}]".format(FORMAT_OK, cell))
+          print(r"\begin{{tcolorbox}}[{}, title=Cell {{{:02d}}}]".format(FORMAT_OK, cell))
+
         print(src)
+
         if out:
             if not md:
-              print(r"\tcbline")
+              print(r"\tcblower")
             print(out)
         if not md:
-            print(r"\end{tcolorbox}")
+          print(r"\end{tcolorbox}")
 
 
 if __name__ == "__main__":

diff --git a/app/views/task/task_pdf.pdf.erb b/app/views/task/task_pdf.pdf.erb
@@ -112,16 +112,21 @@ No Tutor % Supervisor's Name
   <%
     if file[:type] == 'code'
       if (File.extname(file[:path])[1..-1]) != 'ipynb'
-  %>
-\inputminted[breaklines,linenos,breakanywhere,tabsize=4]{<%= Task.pygments_lang(File.extname(file[:path])[1..-1]) %>}{<%= file[:path] %>}
-  <%  else %>
+        pygments_lang = Task.pygments_lang(File.extname(file[:path])[1..-1])
+
+        if file[:truncated] %>
+\begin{tcolorbox}[colback=blue!5!white,colframe=blue!75!black]
+  This file has additional line breaks applied by <%= @doubtfire_product_name %> because they contain lines longer than the configured limit. Lines over 1000 characters long have been truncated to limit PDF page count. The orginal submission can be retrieved via the "Download Uploaded Files" function.
+\end{tcolorbox}
+<%      end # if truncated %>
+\inputminted[breaklines,linenos,breakanywhere,tabsize=4]{<%= pygments_lang %>}{<%= file[:path] %>}
+<%    else # is ipynb %>
 \jupynotex{<%= file[:path] %>}
-  <%
+<%
       end
     end
-  %>
 
-  <% if file[:type] == 'document' %>
+    if file[:type] == 'document' %>
 % add document paths to document_list to generate lua calls to newpax later
     <% document_list.append(file[:path]) unless !@include_pax %>
     <%

diff --git a/test/models/task_test.rb b/test/models/task_test.rb
@@ -357,6 +357,127 @@ def test_ipynb_to_pdf
     assert File.exist? path
     assert File.exist? task.final_pdf_path
 
+    # Test if latex math was rendered properly
+    reader = PDF::Reader.new(task.final_pdf_path)
+    assert reader.pages.last.text.include? "BMI: bmi =    weigh2\n                  height"
+
+    # ensure the notice is not included when the notebook doesn't have long lines source code cells
+    # and no errors
+    reader.pages.each do |page|
+      assert_not page.text.include? 'The rest of this line has been truncated by the system to improve readability.'
+      assert_not page.text.include? 'ERROR when parsing'
+    end
+
+    # test line wrapping in jupynotex
+    data_to_post = with_file('test_files/submissions/long.ipynb', 'application/json', data_to_post)
+
+    post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post
+
+    assert_equal 201, last_response.status, last_response_body
+
+    # test submission generation
+    assert task.convert_submission_to_pdf
+    path = task.zip_file_path_for_done_task
+    assert path
+    assert File.exist? path
+    assert File.exist? task.final_pdf_path
+
+    # ensure the notice is included when the notebook has long line in source code cells
+    reader = PDF::Reader.new(task.final_pdf_path)
+    assert reader.pages[1].text.gsub(/\s+/, " ").include? "[The rest of this line has been truncated by the system to improve readability.]"
+
+    # test excessive long raw data
+    data_to_post = with_file('test_files/submissions/many_lines.ipynb', 'application/json', data_to_post)
+    post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post
+
+    assert_equal 201, last_response.status, last_response_body
+
+    # test submission generation
+    assert task.convert_submission_to_pdf
+    path = task.zip_file_path_for_done_task
+    assert path
+    assert File.exist? path
+    assert File.exist? task.final_pdf_path
+
+    # ensure the notice is included when the notebook has long line in source code cells
+    reader = PDF::Reader.new(task.final_pdf_path)
+
+    assert_equal 4, reader.pages.count
+
+    td.destroy
+    assert_not File.exist? path
+    unit.destroy!
+  end
+
+  def test_code_submission_with_long_lines
+    unit = FactoryBot.create(:unit, student_count: 1, task_count: 0)
+    td = TaskDefinition.new({
+        unit_id: unit.id,
+        tutorial_stream: unit.tutorial_streams.first,
+        name: 'Task with super ling lines in code submission',
+        description: 'Code task',
+        weighting: 4,
+        target_grade: 0,
+        start_date: unit.start_date + 1.week,
+        target_date: unit.start_date + 2.weeks,
+        abbreviation: 'Long',
+        restrict_status_updates: false,
+        upload_requirements: [ { "key" => 'file0', "name" => 'long.py', "type" => 'code' } ],
+        plagiarism_warn_pct: 0.8,
+        is_graded: false,
+        max_quality_pts: 0
+      })
+    td.save!
+
+    data_to_post = {
+      trigger: 'ready_for_feedback'
+    }
+
+    data_to_post = with_file('test_files/submissions/long.py', 'application/json', data_to_post)
+
+    project = unit.active_projects.first
+
+    add_auth_header_for user: unit.main_convenor_user
+
+    post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post
+
+    assert_equal 201, last_response.status, last_response_body
+
+    # test submission generation
+    task = project.task_for_task_definition(td)
+    assert task.convert_submission_to_pdf
+    path = task.zip_file_path_for_done_task
+    assert path
+    assert File.exist? path
+    assert File.exist? task.final_pdf_path
+
+    # ensure the notice is included when rendered files are truncated
+    reader = PDF::Reader.new(task.final_pdf_path)
+    assert reader.pages[1].text.include? "This file has additional line breaks applied"
+
+    # submit a normal file and ensure the notice is not included in the PDF
+    data_to_post = {
+      trigger: 'ready_for_feedback'
+    }
+
+    data_to_post = with_file('test_files/submissions/normal.py', 'application/json', data_to_post)
+    project = unit.active_projects.first
+    add_auth_header_for user: unit.main_convenor_user
+    post "/api/projects/#{project.id}/task_def_id/#{td.id}/submission", data_to_post
+    assert_equal 201, last_response.status, last_response_body
+
+    # test submission generation
+    task = project.task_for_task_definition(td)
+    assert task.convert_submission_to_pdf
+    path = task.zip_file_path_for_done_task
+    assert path
+    assert File.exist? path
+    assert File.exist? task.final_pdf_path
+
+    # ensure the notice is not included
+    reader = PDF::Reader.new(task.final_pdf_path)
+    assert_not reader.pages[1].text.include? "This file has additional line breaks applied"
+
     td.destroy
     assert_not File.exist? path
     unit.destroy!