diff --git a/app/controllers/csv_preview_controller.rb b/app/controllers/csv_preview_controller.rb index 4cb66e5c91..a5e0e902c3 100644 --- a/app/controllers/csv_preview_controller.rb +++ b/app/controllers/csv_preview_controller.rb @@ -1,10 +1,8 @@ require "csv" class CsvPreviewController < ApplicationController - MAXIMUM_COLUMNS = 50 - MAXIMUM_ROWS = 1000 - rescue_from GdsApi::HTTPForbidden, with: :access_limited + rescue_from CSV::MalformedCSVError, with: :malformed_csv def show @asset = GdsApi.asset_manager.asset(params[:id]).to_hash @@ -25,35 +23,20 @@ def show redirect_to(parent_document_uri, status: :see_other, allow_other_host: true) and return end - original_error = nil - row_sep = :auto - - begin - csv_preview = CSV.parse(media_download_truncated, encoding: encoding(media_download_truncated), headers: true, row_sep:) - rescue CSV::MalformedCSVError => e - if original_error.nil? - original_error = e - row_sep = "\r\n" - retry - else - render :malformed_csv and return - end - end - - @csv_rows = csv_preview.to_a.map do |row| - row.map { |column| - { - text: column&.encode("UTF-8"), - } - }.take(MAXIMUM_COLUMNS) - end + @csv_rows = CsvPreviewService + .new(GdsApi.asset_manager.media(params[:id], params[:filename]).body) + .csv_rows end +private + def access_limited - render :access_limited, status: :forbidden and return + render :access_limited, status: :forbidden end -private + def malformed_csv + render :malformed_csv + end def asset_path request.path.sub("/preview", "").sub(/^\//, "") @@ -63,48 +46,6 @@ def asset_filename asset_path.split("/").last end - def whitehall_media_download - GdsApi.asset_manager.whitehall_media(asset_path).body - end - - def media_download_truncated - @media_download_truncated ||= truncate_to_maximum_number_of_lines( - GdsApi.asset_manager.media(params[:id], params[:filename]).body, - MAXIMUM_ROWS + 1, - ) - end - - def truncate_to_maximum_number_of_lines(string, maximum_number_of_lines) - string[0..newline_or_last_char_index(string, maximum_number_of_lines - 1)] - end - - def newline_or_last_char_index(string, newline_index) - (0..newline_index).inject(-1) do |current_index| - next_index = string.index("\n", current_index + 1) - return string.length - 1 if next_index.nil? - - next_index - end - end - - def encoding(media) - @encoding ||= if utf_8_encoding?(media) - "UTF-8" - elsif windows_1252_encoding?(media) - "windows-1252" - else - raise FileEncodingError, "File encoding not recognised" - end - end - - def utf_8_encoding?(media) - media.force_encoding("utf-8").valid_encoding? - end - - def windows_1252_encoding?(media) - media.force_encoding("windows-1252").valid_encoding? - end - def draft_asset? @asset["draft"] == true end diff --git a/app/services/csv_preview_service.rb b/app/services/csv_preview_service.rb new file mode 100644 index 0000000000..fe7025f8e7 --- /dev/null +++ b/app/services/csv_preview_service.rb @@ -0,0 +1,80 @@ +class CsvPreviewService + MAXIMUM_COLUMNS = 50 + MAXIMUM_ROWS = 1000 + + def initialize(csv) + @csv = csv + end + + def csv_rows + original_error = nil + row_sep = :auto + + begin + parsed_csv = CSV.parse( + csv_truncated, + encoding: encoding(csv_truncated), + headers: true, + row_sep:, + ) + rescue CSV::MalformedCSVError => e + if original_error.nil? + original_error = e + row_sep = "\r\n" + retry + else + raise + end + end + converted_and_restricted_csv(parsed_csv) + end + +private + + attr_reader :id, :filename + + def newline_or_last_char_index(string, newline_index) + (0..newline_index).inject(-1) do |current_index| + next_index = string.index("\n", current_index + 1) + return string.length - 1 if next_index.nil? + + next_index + end + end + + def truncate_to_maximum_number_of_lines(string, maximum_number_of_lines) + string[0..newline_or_last_char_index(string, maximum_number_of_lines - 1)] + end + + def csv_truncated + @csv_truncated ||= truncate_to_maximum_number_of_lines(@csv, MAXIMUM_ROWS + 1) + end + + def encoding(media) + @encoding ||= if utf_8_encoding?(media) + "UTF-8" + elsif windows_1252_encoding?(media) + "windows-1252" + else + raise FileEncodingError, "File encoding not recognised" + end + end + + def utf_8_encoding?(media) + media.force_encoding("utf-8").valid_encoding? + end + + def windows_1252_encoding?(media) + media.force_encoding("windows-1252").valid_encoding? + end + + def converted_and_restricted_csv(parsed_csv) + @converted_and_restricted_csv ||= parsed_csv.to_a.map do |row| + row.map { |column| + { + text: column&.encode("UTF-8"), + } + }.take(MAXIMUM_COLUMNS) + end + end +end diff --git a/test/functional/csv_preview_controller_test.rb b/test/functional/csv_preview_controller_test.rb deleted file mode 100644 index a52e57214a..0000000000 --- a/test/functional/csv_preview_controller_test.rb +++ /dev/null @@ -1,39 +0,0 @@ -require "test_helper" - -class CsvPreviewControllerTest < ActionController::TestCase - context "#newline_or_last_char_index" do - context "when newline index is less than index of last newline" do - should "return correct index" do - assert_equal 8, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 2) - end - end - context "when newline index is equal to index of last newline" do - should "return correct index" do - assert_equal 11, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\n", 3) - end - end - context "when newline index is greater than index of last newline" do - should "return correct index" do - assert_equal 14, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 10) - end - end - end - - context "#truncate_to_maximum_number_of_lines" do - context "when requested number of lines is less than actual number of lines" do - should "return correct string" do - assert_equal "a\nb\ncdef\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 3) - end - end - context "when requested number of lines is equal to actual number of lines" do - should "return correct string" do - assert_equal "a\nb\ncdef\ngh\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\n", 4) - end - end - context "when requested number of lines is greater than actual number of lines" do - should "return correct string" do - assert_equal "a\nb\ncdef\ngh\nijk", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 10) - end - end - end -end diff --git a/test/unit/services/csv_preview_service_test.rb b/test/unit/services/csv_preview_service_test.rb new file mode 100644 index 0000000000..c837b7c45c --- /dev/null +++ b/test/unit/services/csv_preview_service_test.rb @@ -0,0 +1,114 @@ +require "test_helper" + +class CsvPreviewServiceTest < ActiveSupport::TestCase + setup do + @csv = <<~CSV + header 1,header 2,header 3 + content 1,content 2,content 3 + CSV + + @long_csv = @csv.dup + (CsvPreviewService::MAXIMUM_ROWS + 10).times { @long_csv += "other content 1,other content 2,other content 3\n" } + + @crlf_csv = @csv.gsub(/\n/, "\r\n") + + @csv_with_many_columns = "column" + (CsvPreviewService::MAXIMUM_COLUMNS + 10).times { @csv_with_many_columns += ",column" } + @csv_with_many_columns << "\n" + + @parsed_csv = [ + [{ text: "header 1" }, { text: "header 2" }, { text: "header 3" }], + [{ text: "content 1" }, { text: "content 2" }, { text: "content 3" }], + ] + end + + context "#csv_rows" do + context "with normal CSV" do + subject { CsvPreviewService.new(@csv).csv_rows } + + should "parse the CSV correctly" do + assert_equal @parsed_csv, subject + end + end + + context "with CSV containing CRLF line terminators" do + subject { CsvPreviewService.new(@crlf_csv).csv_rows } + + should "parse the CSV correctly" do + assert_equal @parsed_csv, subject + end + end + + context "with long CSV" do + subject { CsvPreviewService.new(@long_csv).csv_rows } + + should "return only the header row and less or equal to the maximum number of normal rows" do + assert_operator subject.length, :<=, CsvPreviewService::MAXIMUM_ROWS + 1 + end + end + + context "with CSV containing many columns" do + subject { CsvPreviewService.new(@csv_with_many_columns).csv_rows } + + should "return only less than or equal the maximum number of columns" do + assert_operator subject.first.length, :<=, CsvPreviewService::MAXIMUM_COLUMNS + end + end + + context "with CSV in UTF-8" do + subject { CsvPreviewService.new("zażółć gęślą jaźń\n").csv_rows } + + should "parse the CSV correctly" do + assert_equal [[{ text: "zażółć gęślą jaźń" }]], subject + end + end + + context "with CSV in windows-1252" do + subject { CsvPreviewService.new("\xfe\xe6r he feoll his tw\xe6gen gebro\xf0ra\n").csv_rows } + + should "parse the CSV and convert it to UTF-8" do + assert_equal [[{ text: "þær he feoll his twægen gebroðra" }]], subject + end + end + end + + context "#newline_or_last_char_index" do + subject { CsvPreviewService.new("") } + + context "when newline index is less than index of last newline" do + should "return correct index" do + assert_equal 8, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 2) + end + end + context "when newline index is equal to index of last newline" do + should "return correct index" do + assert_equal 11, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\n", 3) + end + end + context "when newline index is greater than index of last newline" do + should "return correct index" do + assert_equal 14, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 10) + end + end + end + + context "#truncate_to_maximum_number_of_lines" do + subject { CsvPreviewService.new("") } + + context "when requested number of lines is less than actual number of lines" do + should "return correct string" do + assert_equal "a\nb\ncdef\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 3) + end + end + context "when requested number of lines is equal to actual number of lines" do + should "return correct string" do + assert_equal "a\nb\ncdef\ngh\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\n", 4) + end + end + context "when requested number of lines is greater than actual number of lines" do + should "return correct string" do + assert_equal "a\nb\ncdef\ngh\nijk", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 10) + end + end + end +end