From b0b7d185aa7975244e412e625f8d3aefff04b891 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Fri, 7 Feb 2020 17:38:39 +0100 Subject: [PATCH 1/7] don't mix validation and index pushing --- app/jobs/validation_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/validation_job.rb b/app/jobs/validation_job.rb index 9acf8af..687d56e 100644 --- a/app/jobs/validation_job.rb +++ b/app/jobs/validation_job.rb @@ -13,7 +13,7 @@ def perform(id, options={}) if is_valid message = "[ValidationJob] Subset #{id} of Usage Report #{subset.report.uid} successfully validated." - subset.push_report + # subset.push_report subset.update_column(:aasm, "valid") Rails.logger.info message true From 855ce36cfbf267db677e43face3a5afca0b1bd0d Mon Sep 17 00:00:00 2001 From: kjgarza Date: Fri, 7 Feb 2020 17:38:51 +0100 Subject: [PATCH 2/7] nicer logger levels --- app/controllers/reports_controller.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/controllers/reports_controller.rb b/app/controllers/reports_controller.rb index 65db046..5736cf7 100755 --- a/app/controllers/reports_controller.rb +++ b/app/controllers/reports_controller.rb @@ -60,7 +60,7 @@ def destroy if @report.destroy head :no_content else - Rails.logger.warn @report.errors.inspect + Rails.logger.error @report.errors.inspect render jsonapi: serialize(@report.errors), status: :unprocessable_entity end end @@ -108,7 +108,7 @@ def create if @report.save render json: @report, status: :created else - Rails.logger.warn @report.errors.inspect + Rails.logger.error @report.errors.inspect render json: @report.errors, status: :unprocessable_entity end end From 145101343f936b6e1db05290835b889b633f2344 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Fri, 7 Feb 2020 17:40:28 +0100 Subject: [PATCH 3/7] nicer logging --- app/models/concerns/queueable.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/models/concerns/queueable.rb b/app/models/concerns/queueable.rb index c36a0f7..8313f2f 100644 --- a/app/models/concerns/queueable.rb +++ b/app/models/concerns/queueable.rb @@ -22,7 +22,9 @@ def send_message(body, options={}) message_body: body.to_json, } - sqs.send_message(options) + sent_message = sqs.send_message(options) + Rails.logger.info "[UsageUpdateImportWorker] Report " + report_id + " has been queued." if sent_message.respond_to?("successful") + sent_message end # def queue_report(options={}) From 4b4e1627bd574db0d871e561a0ef570198ba7cf8 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 10 Feb 2020 17:23:43 +0100 Subject: [PATCH 4/7] logger fixes from lupo --- config/application.rb | 20 ++++++++++++++++++++ config/initializers/_lograge.rb | 24 ------------------------ 2 files changed, 20 insertions(+), 24 deletions(-) delete mode 100644 config/initializers/_lograge.rb diff --git a/config/application.rb b/config/application.rb index 3c51478..3345ba3 100644 --- a/config/application.rb +++ b/config/application.rb @@ -72,6 +72,26 @@ class Application < Rails::Application # secret_key_base is not used by Rails API, as there are no sessions config.secret_key_base = 'blipblapblup' + config.lograge.enabled = true + config.lograge.formatter = Lograge::Formatters::Logstash.new + config.lograge.logger = LogStashLogger.new(type: :stdout) + config.logger = config.lograge.logger ## LogStashLogger needs to be pass to rails logger, see roidrage/lograge#26 + config.log_level = ENV["LOG_LEVEL"].to_sym ## Log level in a config level configuration + + config.lograge.ignore_actions = ["HeartbeatController#index", "IndexController#index"] + config.lograge.ignore_custom = lambda do |event| + event.payload.inspect.length > 100000 + end + config.lograge.base_controller_class = "ActionController::API" + + config.lograge.custom_options = lambda do |event| + exceptions = %w(controller action format id) + { + params: event.payload[:params].except(*exceptions), + uid: event.payload[:uid], + } + end + # configure caching config.cache_store = :dalli_store, nil, { :namespace => ENV['APPLICATION'] } diff --git a/config/initializers/_lograge.rb b/config/initializers/_lograge.rb deleted file mode 100644 index 48b8274..0000000 --- a/config/initializers/_lograge.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -Rails.application.configure do - config.lograge.enabled = true - config.lograge.formatter = Lograge::Formatters::Logstash.new - config.lograge.logger = LogStashLogger.new(type: :stdout) - config.lograge.log_level = ENV["LOG_LEVEL"].to_sym - - config.active_job.logger = config.lograge.logger - - config.lograge.ignore_actions = ["HeartbeatController#index", "IndexController#index"] - config.lograge.ignore_custom = lambda do |event| - event.payload.inspect.length > 100000 - end - config.lograge.base_controller_class = "ActionController::API" - - config.lograge.custom_options = lambda do |event| - exceptions = %w(controller action format id) - { - params: event.payload[:params].except(*exceptions), - uid: event.payload[:uid], - } - end -end From b6e22a9448327557f7c0850b43bd3656159eb87d Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 10 Feb 2020 18:00:00 +0100 Subject: [PATCH 5/7] linting --- app/models/report.rb | 68 +++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/app/models/report.rb b/app/models/report.rb index e7724f1..280fa9e 100755 --- a/app/models/report.rb +++ b/app/models/report.rb @@ -1,5 +1,5 @@ -require 'base64' -require 'digest' +require "base64" +require "digest" class Report < ApplicationRecord self.primary_key = :uid @@ -7,7 +7,7 @@ class Report < ApplicationRecord has_many :report_subsets, autosave: true, dependent: :destroy # has_one_attached :report - COMPRESSED_HASH_MESSAGE = { "code"=>69, "severity"=>"warning", "message"=>"report is compressed using gzip", "help-url"=>"https://github.com/datacite/sashimi", "data"=>"usage data needs to be uncompressed"} + COMPRESSED_HASH_MESSAGE = { "code" => 69, "severity" => "warning", "message" => "report is compressed using gzip", "help-url" => "https://github.com/datacite/sashimi", "data" => "usage data needs to be uncompressed" }.freeze # include validation methods for sushi include Metadatable @@ -16,20 +16,20 @@ class Report < ApplicationRecord include Queueable # attr_accessor :month, :year, :compressed - validates_presence_of :report_id, :created_by, :client_id, :provider_id, :created, :reporting_period + validates_presence_of :report_id, :created_by, :client_id, :provider_id, :created, :reporting_period validates_presence_of :report_datasets, if: :normal_report? - #, :report_datasets + # , :report_datasets validates :uid, uniqueness: true validates :validate_sushi, sushi: { presence: true }, if: :normal_report? attr_readonly :created_by, :month, :year, :client_id, :report_id, :uid # serialize :exceptions, Array before_validation :set_uid, on: :create - after_save :to_compress + after_save :to_compress after_validation :clean_datasets # before_create :set_id after_commit :push_report, if: :normal_report? - after_destroy_commit :destroy_report_events, on: :delete + after_destroy_commit :destroy_report_events, on: :delete # after_commit :validate_report_job, unless: :normal_report? @@ -41,8 +41,8 @@ def destroy_report_events DestroyEventsJob.perform_later(uid) end - def self.destroy_events(uid, options={}) - url = "#{ENV["API_URL"]}/events?" + URI.encode_www_form("subj-id" =>"#{ENV["API_URL"]}/reports/#{uid}") + def self.destroy_events(uid, _options = {}) + url = "#{ENV['API_URL']}/events?" + URI.encode_www_form("subj-id" => "#{ENV['API_URL']}/reports/#{uid}") response = Maremma.get url events = response.fetch("data", []) # TODO add error class @@ -70,51 +70,47 @@ def compress json_report = { "report-header": { - "report-name": self.report_name, - "report-id": self.report_id, - "release": self.release, - "created": self.created, - "created-by": self.created_by, - "reporting-period": self.reporting_period, - "report-filters": self.report_filters, - "report-attributes": self.report_attributes, - "exceptions": self.exceptions, + "report-name": report_name, + "report-id": report_id, + "release": release, + "created": created, + "created-by": created_by, + "reporting-period": reporting_period, + "report-filters": report_filters, + "report-attributes": report_attributes, + "exceptions": exceptions, }, - "report-datasets": self.report_datasets, + "report-datasets": report_datasets, } ActiveSupport::Gzip.compress(json_report.to_json) end - + def self.compressed_report? - return nil if self.exceptions.empty? || self.compressed.nil? + return nil if exceptions.empty? || compressed.nil? # self.exceptions.include?(COMPRESSED_HASH_MESSAGE) - code = self.exceptions.first.fetch("code", "") + code = exceptions.first.fetch("code", "") if code == 69 true - else - nil end end def normal_report? - return nil if compressed_report? || self.report_datasets.nil? + return nil if compressed_report? || report_datasets.nil? true end def compressed_report? - return nil if self.exceptions && self.exceptions.empty? || self.compressed.nil? + return nil if exceptions&.empty? || compressed.nil? # self.exceptions.include?(COMPRESSED_HASH_MESSAGE) - code = self.exceptions.first.fetch("code","") + code = exceptions.first.fetch("code", "") if code == 69 true - else - nil end end @@ -126,9 +122,9 @@ def set_id end def to_compress - if self.compressed.nil? && self.report_subsets.empty? - ReportSubset.create(compressed: compress, report_id: self.uid) - elsif self.report_subsets.empty? + if compressed.nil? && report_subsets.empty? + ReportSubset.create(compressed: compress, report_id: uid) + elsif report_subsets.empty? ReportSubset.create(compressed: compressed, report_id: uid) # ReportSubset.create(compressed: self.compressed, report_id: self.report_id) end @@ -139,15 +135,15 @@ def clean_datasets end def set_uid - return ActionController::ParameterMissing if self.reporting_period.nil? + return ActionController::ParameterMissing if reporting_period.nil? self.uid = SecureRandom.uuid if uid.blank? self.report_filters = report_filters.nil? ? [] : report_filters self.report_attributes = report_attributes.nil? ? [] : report_attributes self.exceptions = exceptions.nil? ? [] : exceptions - # self.report_id = self.uid - month = Date.strptime(self.reporting_period["begin_date"],"%Y-%m-%d").month.to_s - year = Date.strptime(self.reporting_period["begin_date"],"%Y-%m-%d").year.to_s + # self.report_id = self.uid + month = Date.strptime(reporting_period["begin_date"], "%Y-%m-%d").month.to_s + year = Date.strptime(reporting_period["begin_date"], "%Y-%m-%d").year.to_s write_attribute(:month, month) write_attribute(:year, year) end From 31a6bcf75f9dfb80051d91929cf07f87a63ade37 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 10 Feb 2020 18:42:51 +0100 Subject: [PATCH 6/7] ValidationJob MUST NOT index sub-set reports if not intended addresses https://github.com/datacite/sashimi/issues/115 --- app/jobs/validation_job.rb | 2 +- app/models/report_subset.rb | 11 +++++++---- lib/tasks/reports.rake | 4 ++-- spec/models/report_subset_spec.rb | 15 +++++++++++++-- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/app/jobs/validation_job.rb b/app/jobs/validation_job.rb index 687d56e..6c74bd0 100644 --- a/app/jobs/validation_job.rb +++ b/app/jobs/validation_job.rb @@ -13,8 +13,8 @@ def perform(id, options={}) if is_valid message = "[ValidationJob] Subset #{id} of Usage Report #{subset.report.uid} successfully validated." - # subset.push_report subset.update_column(:aasm, "valid") + subset.push_report unless options[:validate_only] Rails.logger.info message true else diff --git a/app/models/report_subset.rb b/app/models/report_subset.rb index 91fa688..8170312 100644 --- a/app/models/report_subset.rb +++ b/app/models/report_subset.rb @@ -1,6 +1,7 @@ -require 'digest' -require 'base64' +require "digest" +require "base64" +### Subset are never updated. there will be always deleted and recreated class ReportSubset < ApplicationRecord # include validation methods for sushi include Queueable @@ -25,7 +26,9 @@ def convert_report_job end def push_report - Rails.logger.debug "[UsageReports] calling queue for #{id}" + return false if aasm != "valid" + + Rails.logger.info "[UsageReports] calling queue for #{id}" body = { report_id: report_subset_url } send_message(body) if ENV["AWS_REGION"].present? @@ -50,7 +53,7 @@ def report_header "report-filters": report.report_filters, "report-attributes": report.report_attributes, "exceptions": report.exceptions, - } + } end def make_checksum diff --git a/lib/tasks/reports.rake b/lib/tasks/reports.rake index 4999d0f..fe60899 100644 --- a/lib/tasks/reports.rake +++ b/lib/tasks/reports.rake @@ -59,7 +59,7 @@ namespace :reports do Report.all.find_each do |report| if report.compressed_report? report.report_subsets.each do |subset| - subset.validate_report_job + subset.validate_report_job(validate_only: true) end end end @@ -82,7 +82,7 @@ namespace :reports do if report.compressed_report? report.report_subsets.each do |subset| - subset.validate_report_job + subset.validate_report_job(validate_only: true) end end end diff --git a/spec/models/report_subset_spec.rb b/spec/models/report_subset_spec.rb index 2a583b2..b8aa80d 100644 --- a/spec/models/report_subset_spec.rb +++ b/spec/models/report_subset_spec.rb @@ -1,11 +1,22 @@ -require 'rails_helper' +require "rails_helper" describe ReportSubset, type: :model do - let(:subject) { create(:report_subset) } + let(:subject) { create(:report_subset) } describe "validations" do # it { should validate_presence_of(:compressed) } it { should validate_presence_of(:report_id) } # it { should validate_presence_of(:checksum) } end + + describe "push_report" do + context "not valid" do + let(:subject) { create(:report_subset, aasm: "not_valid") } + + it "fails" do + response = subject.push_report + expect(response).to eq(false) + end + end + end end From ef39d464c6f8d1db47e1a73a2dde4b4529d6b0b6 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 10 Feb 2020 19:05:56 +0100 Subject: [PATCH 7/7] rake tasks indexes only usage reports --- lib/tasks/reports.rake | 54 ++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/lib/tasks/reports.rake b/lib/tasks/reports.rake index fe60899..5be332b 100644 --- a/lib/tasks/reports.rake +++ b/lib/tasks/reports.rake @@ -1,9 +1,11 @@ namespace :reports do - desc 'Index all reports' - task :index => :environment do + desc "Index all reports" + task index: :environment do logger = Logger.new(STDOUT) - - Report.all.find_each do |report| + release = ENV['REPORT_TYPE'] ||= "rd1" + logger.info "indexing reports of type #{release}" if release + + Report.where(release: release).find_each do |report| if report.normal_report? body = { report_id: report.report_url } report.send_message(body) @@ -22,16 +24,16 @@ namespace :reports do end end - desc 'Index single report' - task :index_report => :environment do + desc "Index single report" + task index_report: :environment do logger = Logger.new(STDOUT) - if ENV['REPORT_UUID'].nil? + if ENV["REPORT_UUID"].nil? logger.error "#{ENV['REPORT_UUID']} is required." exit end - report = Report.where(uid: ENV['REPORT_UUID']).first + report = Report.where(uid: ENV["REPORT_UUID"]).first if report.nil? logger.error "Report #{ENV['REPORT_UUID']} not found." exit @@ -54,8 +56,8 @@ namespace :reports do end end - desc 'Validate all reports' - task :validate => :environment do + desc "Validate all reports" + task validate: :environment do Report.all.find_each do |report| if report.compressed_report? report.report_subsets.each do |subset| @@ -65,21 +67,21 @@ namespace :reports do end end - desc 'Validate single report' - task :validate_report => :environment do + desc "Validate single report" + task validate_report: :environment do logger = Logger.new(STDOUT) - - if ENV['REPORT_UUID'].nil? + + if ENV["REPORT_UUID"].nil? logger.error "#{ENV['REPORT_UUID']} is required." exit end - report = Report.where(uid: ENV['REPORT_UUID']).first + report = Report.where(uid: ENV["REPORT_UUID"]).first if report.nil? logger.error "Report #{ENV['REPORT_UUID']} not found." exit end - + if report.compressed_report? report.report_subsets.each do |subset| subset.validate_report_job(validate_only: true) @@ -87,29 +89,27 @@ namespace :reports do end end - desc 'Convert JSON of all reports' - task :convert => :environment do + desc "Convert JSON of all reports" + task convert: :environment do Report.all.find_each do |report| if report.compressed_report? if report.compressed_report? - report.report_subsets.each do |subset| - subset.convert_report_job - end + report.report_subsets.each(&:convert_report_job) end end end end - desc 'Convert JSON single report' - task :convert_report => :environment do + desc "Convert JSON single report" + task convert_report: :environment do logger = Logger.new(STDOUT) - if ENV['REPORT_UUID'].nil? + if ENV["REPORT_UUID"].nil? logger.error "#{ENV['REPORT_UUID']} is required." exit end - report = Report.where(uid: ENV['REPORT_UUID']).first + report = Report.where(uid: ENV["REPORT_UUID"]).first if report.nil? logger.error "Report #{ENV['REPORT_UUID']} not found." exit @@ -117,9 +117,7 @@ namespace :reports do if report.compressed_report? if report.compressed_report? - report.report_subsets.each do |subset| - subset.convert_report_job - end + report.report_subsets.each(&:convert_report_job) end end end