Skip to content

Commit

Permalink
Merge pull request #3952 from alphagov/2328-csv-previews-are-sometime…
Browse files Browse the repository at this point in the history
…s-broken-l-2

CSV previews are sometimes broken - refactoring of CSV previews controller
  • Loading branch information
unoduetre authored Feb 12, 2024
2 parents 8c08ee5 + 980e8a0 commit 85b9172
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 108 deletions.
79 changes: 10 additions & 69 deletions app/controllers/csv_preview_controller.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
require "csv"

class CsvPreviewController < ApplicationController
MAXIMUM_COLUMNS = 50
MAXIMUM_ROWS = 1000

rescue_from GdsApi::HTTPForbidden, with: :access_limited
rescue_from CSV::MalformedCSVError, with: :malformed_csv

def show
@asset = GdsApi.asset_manager.asset(params[:id]).to_hash
Expand All @@ -25,35 +23,20 @@ def show
redirect_to(parent_document_uri, status: :see_other, allow_other_host: true) and return
end

original_error = nil
row_sep = :auto

begin
csv_preview = CSV.parse(media_download_truncated, encoding: encoding(media_download_truncated), headers: true, row_sep:)
rescue CSV::MalformedCSVError => e
if original_error.nil?
original_error = e
row_sep = "\r\n"
retry
else
render :malformed_csv and return
end
end

@csv_rows = csv_preview.to_a.map do |row|
row.map { |column|
{
text: column&.encode("UTF-8"),
}
}.take(MAXIMUM_COLUMNS)
end
@csv_rows = CsvPreviewService
.new(GdsApi.asset_manager.media(params[:id], params[:filename]).body)
.csv_rows
end

private

def access_limited
render :access_limited, status: :forbidden and return
render :access_limited, status: :forbidden
end

private
def malformed_csv
render :malformed_csv
end

def asset_path
request.path.sub("/preview", "").sub(/^\//, "")
Expand All @@ -63,48 +46,6 @@ def asset_filename
asset_path.split("/").last
end

def whitehall_media_download
GdsApi.asset_manager.whitehall_media(asset_path).body
end

def media_download_truncated
@media_download_truncated ||= truncate_to_maximum_number_of_lines(
GdsApi.asset_manager.media(params[:id], params[:filename]).body,
MAXIMUM_ROWS + 1,
)
end

def truncate_to_maximum_number_of_lines(string, maximum_number_of_lines)
string[0..newline_or_last_char_index(string, maximum_number_of_lines - 1)]
end

def newline_or_last_char_index(string, newline_index)
(0..newline_index).inject(-1) do |current_index|
next_index = string.index("\n", current_index + 1)
return string.length - 1 if next_index.nil?

next_index
end
end

def encoding(media)
@encoding ||= if utf_8_encoding?(media)
"UTF-8"
elsif windows_1252_encoding?(media)
"windows-1252"
else
raise FileEncodingError, "File encoding not recognised"
end
end

def utf_8_encoding?(media)
media.force_encoding("utf-8").valid_encoding?
end

def windows_1252_encoding?(media)
media.force_encoding("windows-1252").valid_encoding?
end

def draft_asset?
@asset["draft"] == true
end
Expand Down
80 changes: 80 additions & 0 deletions app/services/csv_preview_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
class CsvPreviewService
MAXIMUM_COLUMNS = 50
MAXIMUM_ROWS = 1000

def initialize(csv)
@csv = csv
end

def csv_rows
original_error = nil
row_sep = :auto

begin
parsed_csv = CSV.parse(
csv_truncated,
encoding: encoding(csv_truncated),
headers: true,
row_sep:,
)
rescue CSV::MalformedCSVError => e
if original_error.nil?
original_error = e
row_sep = "\r\n"
retry
else
raise
end
end
converted_and_restricted_csv(parsed_csv)
end

private

attr_reader :id, :filename

def newline_or_last_char_index(string, newline_index)
(0..newline_index).inject(-1) do |current_index|
next_index = string.index("\n", current_index + 1)
return string.length - 1 if next_index.nil?

next_index
end
end

def truncate_to_maximum_number_of_lines(string, maximum_number_of_lines)
string[0..newline_or_last_char_index(string, maximum_number_of_lines - 1)]
end

def csv_truncated
@csv_truncated ||= truncate_to_maximum_number_of_lines(@csv, MAXIMUM_ROWS + 1)
end

def encoding(media)
@encoding ||= if utf_8_encoding?(media)
"UTF-8"
elsif windows_1252_encoding?(media)
"windows-1252"
else
raise FileEncodingError, "File encoding not recognised"
end
end

def utf_8_encoding?(media)
media.force_encoding("utf-8").valid_encoding?
end

def windows_1252_encoding?(media)
media.force_encoding("windows-1252").valid_encoding?
end

def converted_and_restricted_csv(parsed_csv)
@converted_and_restricted_csv ||= parsed_csv.to_a.map do |row|
row.map { |column|
{
text: column&.encode("UTF-8"),
}
}.take(MAXIMUM_COLUMNS)
end
end
end
39 changes: 0 additions & 39 deletions test/functional/csv_preview_controller_test.rb

This file was deleted.

114 changes: 114 additions & 0 deletions test/unit/services/csv_preview_service_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
require "test_helper"

class CsvPreviewServiceTest < ActiveSupport::TestCase
setup do
@csv = <<~CSV
header 1,header 2,header 3
content 1,content 2,content 3
CSV

@long_csv = @csv.dup
(CsvPreviewService::MAXIMUM_ROWS + 10).times { @long_csv += "other content 1,other content 2,other content 3\n" }

@crlf_csv = @csv.gsub(/\n/, "\r\n")

@csv_with_many_columns = "column"
(CsvPreviewService::MAXIMUM_COLUMNS + 10).times { @csv_with_many_columns += ",column" }
@csv_with_many_columns << "\n"

@parsed_csv = [
[{ text: "header 1" }, { text: "header 2" }, { text: "header 3" }],
[{ text: "content 1" }, { text: "content 2" }, { text: "content 3" }],
]
end

context "#csv_rows" do
context "with normal CSV" do
subject { CsvPreviewService.new(@csv).csv_rows }

should "parse the CSV correctly" do
assert_equal @parsed_csv, subject
end
end

context "with CSV containing CRLF line terminators" do
subject { CsvPreviewService.new(@crlf_csv).csv_rows }

should "parse the CSV correctly" do
assert_equal @parsed_csv, subject
end
end

context "with long CSV" do
subject { CsvPreviewService.new(@long_csv).csv_rows }

should "return only the header row and less or equal to the maximum number of normal rows" do
assert_operator subject.length, :<=, CsvPreviewService::MAXIMUM_ROWS + 1
end
end

context "with CSV containing many columns" do
subject { CsvPreviewService.new(@csv_with_many_columns).csv_rows }

should "return only less than or equal the maximum number of columns" do
assert_operator subject.first.length, :<=, CsvPreviewService::MAXIMUM_COLUMNS
end
end

context "with CSV in UTF-8" do
subject { CsvPreviewService.new("zażółć gęślą jaźń\n").csv_rows }

should "parse the CSV correctly" do
assert_equal [[{ text: "zażółć gęślą jaźń" }]], subject
end
end

context "with CSV in windows-1252" do
subject { CsvPreviewService.new("\xfe\xe6r he feoll his tw\xe6gen gebro\xf0ra\n").csv_rows }

should "parse the CSV and convert it to UTF-8" do
assert_equal [[{ text: "þær he feoll his twægen gebroðra" }]], subject
end
end
end

context "#newline_or_last_char_index" do
subject { CsvPreviewService.new("") }

context "when newline index is less than index of last newline" do
should "return correct index" do
assert_equal 8, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 2)
end
end
context "when newline index is equal to index of last newline" do
should "return correct index" do
assert_equal 11, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\n", 3)
end
end
context "when newline index is greater than index of last newline" do
should "return correct index" do
assert_equal 14, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 10)
end
end
end

context "#truncate_to_maximum_number_of_lines" do
subject { CsvPreviewService.new("") }

context "when requested number of lines is less than actual number of lines" do
should "return correct string" do
assert_equal "a\nb\ncdef\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 3)
end
end
context "when requested number of lines is equal to actual number of lines" do
should "return correct string" do
assert_equal "a\nb\ncdef\ngh\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\n", 4)
end
end
context "when requested number of lines is greater than actual number of lines" do
should "return correct string" do
assert_equal "a\nb\ncdef\ngh\nijk", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 10)
end
end
end
end

0 comments on commit 85b9172

Please sign in to comment.