From 29f35e055d69e3d14c9ead3e1bb1f30c7e7d90a0 Mon Sep 17 00:00:00 2001 From: Iris Lau Date: Mon, 11 Sep 2023 18:07:27 +0100 Subject: [PATCH] CSV previews to work with modern urls Context: Currently whitehall uses a semi-hard-coded url called `legacy-url-path` to identify an asset in asset manager. This url path looks something like `/government/uploads/system/uploads/attachment_data/*`. While other publishing services use asset-id to identify an asset in asset manager. We are working on an epic to make whitehall use the asset-id similar to other publishing service. For csv-preview: Currently with Frontend preview functionality for csv, it uses legacy-url-path to fetch the asset from asset manager but with our changes when the legacy url path is removed then frontend would have to use the asset-id to fetch the asset. The changes we have made will allow the controller to retrieve asset using asset-id or legacy url path based on the params its called with Co-authored-by: Syed Ali --- Gemfile.lock | 6 +- app/controllers/csv_preview_controller.rb | 35 ++++++----- config/routes.rb | 3 +- test/integration/csv_preview_test.rb | 72 ++++++++++++++++++++--- 4 files changed, 91 insertions(+), 25 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 4e8dd6e85f..1c847ac399 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -125,7 +125,7 @@ GEM ffi (1.15.5) filelock (1.1.1) find_a_port (1.0.1) - gds-api-adapters (90.0.0) + gds-api-adapters (91.1.0) addressable link_header null_logger @@ -216,9 +216,9 @@ GEM marcel (1.0.2) matrix (0.4.2) method_source (1.0.0) - mime-types (3.4.1) + mime-types (3.5.1) mime-types-data (~> 3.2015) - mime-types-data (3.2023.0218.1) + mime-types-data (3.2023.0808) mini_mime (1.1.5) mini_portile2 (2.8.4) minitest (5.20.0) diff --git a/app/controllers/csv_preview_controller.rb b/app/controllers/csv_preview_controller.rb index bb5cb0fb10..aac85218ca 100644 --- a/app/controllers/csv_preview_controller.rb +++ b/app/controllers/csv_preview_controller.rb @@ -4,13 +4,12 @@ class CsvPreviewController < ApplicationController MAXIMUM_COLUMNS = 50 MAXIMUM_ROWS = 1000 + before_action :get_asset, only: [:show] + rescue_from GdsApi::HTTPForbidden, with: :access_limited def show - @asset = GdsApi.asset_manager.whitehall_asset(legacy_url_path).to_hash - return error_410 if @asset["deleted"] || @asset["redirect_url"].present? - if draft_asset? && !served_from_draft_host? redirect_to(Plek.find("draft-assets") + request.path, allow_other_host: true) and return end @@ -19,13 +18,13 @@ def show @content_item = GdsApi.content_store.content_item(parent_document_path).to_hash @attachment_metadata = @content_item.dig("details", "attachments").select do |attachment| - attachment["url"] =~ /#{Regexp.escape(legacy_url_path)}$/ + attachment["url"] =~ /#{Regexp.escape(asset_path)}$/ end - original_error = nil row_sep = :auto + download_file = params[:legacy] ? whitehall_media_download : media_download begin - csv_preview = CSV.parse(media, encoding:, headers: true, row_sep:) + csv_preview = CSV.parse(download_file, encoding: encoding(download_file), headers: true, row_sep:) rescue CSV::MalformedCSVError => e if original_error.nil? original_error = e @@ -51,29 +50,37 @@ def access_limited private - def legacy_url_path + def get_asset + @asset = params[:legacy] ? GdsApi.asset_manager.whitehall_asset(asset_path).to_hash : GdsApi.asset_manager.asset(params[:id]).to_hash + end + + def asset_path request.path.sub("/preview", "").sub(/^\//, "") end - def media - GdsApi.asset_manager.whitehall_media(legacy_url_path).body + def whitehall_media_download + GdsApi.asset_manager.whitehall_media(asset_path).body + end + + def media_download + GdsApi.asset_manager.media(params[:id], params[:filename]).body end - def encoding - @encoding ||= if utf_8_encoding? + def encoding(media) + @encoding ||= if utf_8_encoding?(media) "UTF-8" - elsif windows_1252_encoding? + elsif windows_1252_encoding?(media) "windows-1252" else raise FileEncodingError, "File encoding not recognised" end end - def utf_8_encoding? + def utf_8_encoding?(media) media.force_encoding("utf-8").valid_encoding? end - def windows_1252_encoding? + def windows_1252_encoding?(media) media.force_encoding("windows-1252").valid_encoding? end diff --git a/config/routes.rb b/config/routes.rb index 993253b597..ada9afd292 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -95,7 +95,8 @@ get ":scope/:division", to: "calendar#division", as: :division end - get "/government/uploads/system/uploads/attachment_data/file/:id/:filename.csv/preview", to: "csv_preview#show", filename: /[^\/]+/ + get "/government/uploads/system/uploads/attachment_data/file/:id/:filename.csv/preview", to: "csv_preview#show", filename: /[^\/]+/, legacy: true + get "/media/:id/:filename/preview", to: "csv_preview#show", filename: /[^\/]+/ get "/government/placeholder", to: "placeholder#show" diff --git a/test/integration/csv_preview_test.rb b/test/integration/csv_preview_test.rb index 1baa255af1..e95d1c06a9 100644 --- a/test/integration/csv_preview_test.rb +++ b/test/integration/csv_preview_test.rb @@ -8,14 +8,16 @@ class CsvPreviewTest < ActionDispatch::IntegrationTest attachment_id = 1234 filename = "filename" + asset_manager_id = 4321 legacy_url_path = "government/uploads/system/uploads/attachment_data/file/#{attachment_id}/#{filename}.csv" + non_legacy_url_path = "media/#{asset_manager_id}/#{filename}.csv" parent_document_base_path = "/government/important-guidance" parent_document_url = "https://www.test.gov.uk#{parent_document_base_path}" setup do setup_asset_manager(legacy_url_path, parent_document_url) - setup_content_item(legacy_url_path, parent_document_base_path) + setup_content_item_legacy(legacy_url_path, parent_document_base_path) end context "when visiting the preview" do @@ -86,7 +88,7 @@ class CsvPreviewTest < ActionDispatch::IntegrationTest special_characters_filename = "filename+" special_characters_url_path = "government/uploads/system/uploads/attachment_data/file/12345/#{special_characters_filename}.csv" setup_asset_manager(special_characters_url_path, parent_document_url) - setup_content_item(special_characters_url_path, parent_document_base_path) + setup_content_item_legacy(special_characters_url_path, parent_document_base_path) visit "/#{special_characters_url_path}/preview" end @@ -99,7 +101,7 @@ class CsvPreviewTest < ActionDispatch::IntegrationTest context "when visiting the preview with special characters in the CSV" do setup do setup_asset_manager(legacy_url_path, parent_document_url, use_special_characters_csv: true) - setup_content_item(legacy_url_path, parent_document_base_path) + setup_content_item_legacy(legacy_url_path, parent_document_base_path) visit "/#{legacy_url_path}/preview" end @@ -115,7 +117,7 @@ class CsvPreviewTest < ActionDispatch::IntegrationTest context "when visiting the preview that redirects to other asset" do setup do setup_asset_manager(legacy_url_path, parent_document_url, use_special_characters_csv: true, asset_deleted: true) - setup_content_item(legacy_url_path, parent_document_base_path) + setup_content_item_legacy(legacy_url_path, parent_document_base_path) visit "/#{legacy_url_path}/preview" end @@ -157,7 +159,7 @@ class CsvPreviewTest < ActionDispatch::IntegrationTest context "when asset manager returns a 403 response" do setup do stub_request(:get, "#{ASSET_MANAGER_ENDPOINT}/whitehall_assets/government/uploads/system/uploads/attachment_data/file/#{attachment_id}/#{filename}-2.csv") - .to_return(status: 403) + .to_return(status: 403) visit "/government/uploads/system/uploads/attachment_data/file/#{attachment_id}/#{filename}-2.csv/preview" end @@ -203,12 +205,28 @@ class CsvPreviewTest < ActionDispatch::IntegrationTest end end - def setup_asset_manager(legacy_url_path, parent_document_url, use_special_characters_csv: false, asset_deleted: false) + should "get asset from legacy_url endpoint" do + visit "/#{legacy_url_path}/preview" + assert page.has_title?("Attachment 2 - GOV.UK") + end + + should "get asset from non-legacy_url endpoint" do + setup_asset_manager(legacy_url_path, parent_document_url, asset_manager_id, filename) + setup_content_item_non_legacy(non_legacy_url_path, parent_document_base_path) + + visit "/#{non_legacy_url_path}/preview" + assert page.has_title?("Attachment 2 - GOV.UK") + end + + def setup_asset_manager(legacy_url_path, parent_document_url, asset_manager_id = nil, filename = nil, use_special_characters_csv: false, asset_deleted: false) asset_manager_response = { id: "https://asset-manager.dev.gov.uk/assets/foo", parent_document_url:, deleted: asset_deleted, } + if asset_manager_id + stub_asset_manager_has_an_asset(asset_manager_id, asset_manager_response, filename) + end stub_asset_manager_has_a_whitehall_asset(legacy_url_path, asset_manager_response) csv_file = if use_special_characters_csv @@ -219,9 +237,12 @@ def setup_asset_manager(legacy_url_path, parent_document_url, use_special_charac stub_request(:get, "#{Plek.find('asset-manager')}/#{legacy_url_path}") .to_return(body: csv_file, status: 200) + + stub_request(:get, "#{Plek.find('asset-manager')}/media/#{asset_manager_id}/#{filename}.csv") + .to_return(body: csv_file, status: 200) end - def setup_content_item(legacy_url_path, parent_document_base_path) + def setup_content_item_legacy(legacy_url_path, parent_document_base_path) content_item = { base_path: parent_document_base_path, document_type: "guidance", @@ -258,6 +279,43 @@ def setup_content_item(legacy_url_path, parent_document_base_path) stub_content_store_has_item(parent_document_base_path, content_item) end + def setup_content_item_non_legacy(non_legacy_url_path, parent_document_base_path) + content_item = { + base_path: parent_document_base_path, + document_type: "guidance", + public_updated_at: "2023-05-27T08:00:07.000+00:00", + details: { + attachments: [ + { + title: "Attachment 1", + url: "https://www.gov.uk/media/5678/filename.csv", + file_size: "1024", + }, + { + title: "Attachment 2", + url: "https://www.gov.uk/#{non_legacy_url_path}", + file_size: "2048", + }, + ], + }, + links: { + organisations: [ + { + base_path: "/government/organisations/department-of-publishing", + details: { + brand: "single-identity", + logo: { + crest: "single-identity", + formatted_title: "Department of Publishing", + }, + }, + }, + ], + }, + } + stub_content_store_has_item(parent_document_base_path, content_item) + end + def generate_test_csv(column_count, row_count) csv_headers = CSV.generate_line((1..column_count).map { |column_number| "field_#{column_number}" }) csv_row = CSV.generate_line((1..column_count).map { |column_number| "Value#{column_number}" })