Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CSV previews are sometimes broken - refactoring of CSV previews controller #3952

Merged
merged 2 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 10 additions & 69 deletions app/controllers/csv_preview_controller.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
require "csv"

class CsvPreviewController < ApplicationController
MAXIMUM_COLUMNS = 50
MAXIMUM_ROWS = 1000

rescue_from GdsApi::HTTPForbidden, with: :access_limited
rescue_from CSV::MalformedCSVError, with: :malformed_csv

def show
@asset = GdsApi.asset_manager.asset(params[:id]).to_hash
Expand All @@ -25,35 +23,20 @@ def show
redirect_to(parent_document_uri, status: :see_other, allow_other_host: true) and return
end

original_error = nil
row_sep = :auto

begin
csv_preview = CSV.parse(media_download_truncated, encoding: encoding(media_download_truncated), headers: true, row_sep:)
rescue CSV::MalformedCSVError => e
if original_error.nil?
original_error = e
row_sep = "\r\n"
retry
else
render :malformed_csv and return
end
end

@csv_rows = csv_preview.to_a.map do |row|
row.map { |column|
{
text: column&.encode("UTF-8"),
}
}.take(MAXIMUM_COLUMNS)
end
@csv_rows = CsvPreviewService
.new(GdsApi.asset_manager.media(params[:id], params[:filename]).body)
.csv_rows
end

private

def access_limited
render :access_limited, status: :forbidden and return
render :access_limited, status: :forbidden
end

private
def malformed_csv
render :malformed_csv
hannako marked this conversation as resolved.
Show resolved Hide resolved
end

def asset_path
request.path.sub("/preview", "").sub(/^\//, "")
Expand All @@ -63,48 +46,6 @@ def asset_filename
asset_path.split("/").last
end

def whitehall_media_download
GdsApi.asset_manager.whitehall_media(asset_path).body
end

def media_download_truncated
@media_download_truncated ||= truncate_to_maximum_number_of_lines(
GdsApi.asset_manager.media(params[:id], params[:filename]).body,
MAXIMUM_ROWS + 1,
)
end

def truncate_to_maximum_number_of_lines(string, maximum_number_of_lines)
string[0..newline_or_last_char_index(string, maximum_number_of_lines - 1)]
end

def newline_or_last_char_index(string, newline_index)
(0..newline_index).inject(-1) do |current_index|
next_index = string.index("\n", current_index + 1)
return string.length - 1 if next_index.nil?

next_index
end
end

def encoding(media)
@encoding ||= if utf_8_encoding?(media)
"UTF-8"
elsif windows_1252_encoding?(media)
"windows-1252"
else
raise FileEncodingError, "File encoding not recognised"
end
end

def utf_8_encoding?(media)
media.force_encoding("utf-8").valid_encoding?
end

def windows_1252_encoding?(media)
media.force_encoding("windows-1252").valid_encoding?
end

def draft_asset?
@asset["draft"] == true
end
Expand Down
80 changes: 80 additions & 0 deletions app/services/csv_preview_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
class CsvPreviewService
MAXIMUM_COLUMNS = 50
MAXIMUM_ROWS = 1000

def initialize(csv)
@csv = csv
end

def csv_rows
original_error = nil
row_sep = :auto

begin
parsed_csv = CSV.parse(
csv_truncated,
encoding: encoding(csv_truncated),
headers: true,
row_sep:,
)
rescue CSV::MalformedCSVError => e
if original_error.nil?
original_error = e
row_sep = "\r\n"
retry
else
raise
end
end
converted_and_restricted_csv(parsed_csv)
end

private

attr_reader :id, :filename

def newline_or_last_char_index(string, newline_index)
(0..newline_index).inject(-1) do |current_index|
next_index = string.index("\n", current_index + 1)
return string.length - 1 if next_index.nil?

next_index
end
end

def truncate_to_maximum_number_of_lines(string, maximum_number_of_lines)
string[0..newline_or_last_char_index(string, maximum_number_of_lines - 1)]
end

def csv_truncated
@csv_truncated ||= truncate_to_maximum_number_of_lines(@csv, MAXIMUM_ROWS + 1)
end

def encoding(media)
@encoding ||= if utf_8_encoding?(media)
"UTF-8"
elsif windows_1252_encoding?(media)
"windows-1252"
else
raise FileEncodingError, "File encoding not recognised"
end
end

def utf_8_encoding?(media)
media.force_encoding("utf-8").valid_encoding?
end

def windows_1252_encoding?(media)
media.force_encoding("windows-1252").valid_encoding?
end

def converted_and_restricted_csv(parsed_csv)
@converted_and_restricted_csv ||= parsed_csv.to_a.map do |row|
row.map { |column|
{
text: column&.encode("UTF-8"),
}
}.take(MAXIMUM_COLUMNS)
end
end
end
39 changes: 0 additions & 39 deletions test/functional/csv_preview_controller_test.rb

This file was deleted.

114 changes: 114 additions & 0 deletions test/unit/services/csv_preview_service_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
require "test_helper"

class CsvPreviewServiceTest < ActiveSupport::TestCase
setup do
@csv = <<~CSV
header 1,header 2,header 3
content 1,content 2,content 3
CSV

@long_csv = @csv.dup
(CsvPreviewService::MAXIMUM_ROWS + 10).times { @long_csv += "other content 1,other content 2,other content 3\n" }

@crlf_csv = @csv.gsub(/\n/, "\r\n")

@csv_with_many_columns = "column"
(CsvPreviewService::MAXIMUM_COLUMNS + 10).times { @csv_with_many_columns += ",column" }
@csv_with_many_columns << "\n"

@parsed_csv = [
[{ text: "header 1" }, { text: "header 2" }, { text: "header 3" }],
[{ text: "content 1" }, { text: "content 2" }, { text: "content 3" }],
]
end

context "#csv_rows" do
context "with normal CSV" do
subject { CsvPreviewService.new(@csv).csv_rows }

should "parse the CSV correctly" do
assert_equal @parsed_csv, subject
end
end

context "with CSV containing CRLF line terminators" do
subject { CsvPreviewService.new(@crlf_csv).csv_rows }

should "parse the CSV correctly" do
assert_equal @parsed_csv, subject
end
end

context "with long CSV" do
subject { CsvPreviewService.new(@long_csv).csv_rows }

should "return only the header row and less or equal to the maximum number of normal rows" do
assert_operator subject.length, :<=, CsvPreviewService::MAXIMUM_ROWS + 1
end
end

context "with CSV containing many columns" do
subject { CsvPreviewService.new(@csv_with_many_columns).csv_rows }

should "return only less than or equal the maximum number of columns" do
assert_operator subject.first.length, :<=, CsvPreviewService::MAXIMUM_COLUMNS
end
end

context "with CSV in UTF-8" do
subject { CsvPreviewService.new("zażółć gęślą jaźń\n").csv_rows }

should "parse the CSV correctly" do
assert_equal [[{ text: "zażółć gęślą jaźń" }]], subject
end
end

context "with CSV in windows-1252" do
subject { CsvPreviewService.new("\xfe\xe6r he feoll his tw\xe6gen gebro\xf0ra\n").csv_rows }

should "parse the CSV and convert it to UTF-8" do
assert_equal [[{ text: "þær he feoll his twægen gebroðra" }]], subject
end
end
end

context "#newline_or_last_char_index" do
subject { CsvPreviewService.new("") }

context "when newline index is less than index of last newline" do
should "return correct index" do
assert_equal 8, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 2)
end
end
context "when newline index is equal to index of last newline" do
should "return correct index" do
assert_equal 11, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\n", 3)
end
end
context "when newline index is greater than index of last newline" do
should "return correct index" do
assert_equal 14, subject.send(:newline_or_last_char_index, "a\nb\ncdef\ngh\nijk", 10)
end
end
end

context "#truncate_to_maximum_number_of_lines" do
subject { CsvPreviewService.new("") }

context "when requested number of lines is less than actual number of lines" do
should "return correct string" do
assert_equal "a\nb\ncdef\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 3)
end
end
context "when requested number of lines is equal to actual number of lines" do
should "return correct string" do
assert_equal "a\nb\ncdef\ngh\n", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\n", 4)
end
end
context "when requested number of lines is greater than actual number of lines" do
should "return correct string" do
assert_equal "a\nb\ncdef\ngh\nijk", subject.send(:truncate_to_maximum_number_of_lines, "a\nb\ncdef\ngh\nijk", 10)
end
end
end
end
Loading