diff --git a/.rubocop.yml b/.rubocop.yml index 4d0d05840..2ce24c292 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -13,6 +13,7 @@ AllCops: - 'app/services/report.rb' - 'app/services/work_loader.rb' - 'app/services/hyrax/actor_factory.rb' + - 'app/services/hyrax/user_stat_importer.rb' - 'spec/services/hyrax/actor_factory_spec.rb' - 'db/**/*' - 'lib/tasks/batch.rake' diff --git a/app/services/hyrax/user_stat_importer.rb b/app/services/hyrax/user_stat_importer.rb new file mode 100644 index 000000000..470d8d527 --- /dev/null +++ b/app/services/hyrax/user_stat_importer.rb @@ -0,0 +1,152 @@ +module Hyrax + # Cache work view, file view & file download stats for all users + # this is called by 'rake hyrax:stats:user_stats' + class UserStatImporter + UserRecord = Struct.new("UserRecord", :id, :user_key, :last_stats_update) + + def initialize(options = {}) + if options[:verbose] + stdout_logger = Logger.new(STDOUT) + stdout_logger.level = Logger::INFO + Rails.logger.extend(ActiveSupport::Logger.broadcast(stdout_logger)) + end + @logging = options[:logging] + @delay_secs = options[:delay_secs].to_f + @number_of_retries = options[:number_of_retries].to_i + end + + delegate :depositor_field, to: DepositSearchBuilder + + def import + log_message('Begin import of User stats.') + + sorted_users.each do |user| + start_date = date_since_last_cache(user) + # this user has already been processed today continue without delay + next if start_date.to_date >= Time.zone.today + + stats = {} + + process_files(stats, user, start_date) + process_works(stats, user, start_date) + create_or_update_user_stats(stats, user) + end + log_message('User stats import complete.') + end + + # Returns an array of users sorted by the date of their last stats update. Users that have not been recently updated + # will be at the top of the array. + def sorted_users + users = [] + ::User.find_each do |user| + users.push(UserRecord.new(user.id, user.user_key, date_since_last_cache(user))) + end + users.sort_by(&:last_stats_update) + end + + private + + def process_files(stats, user, start_date) + file_ids_for_user(user).each do |file_id| + file = ::FileSet.find(file_id) + view_stats = rescue_and_retry("Retried FileViewStat on #{user} for file #{file_id} too many times.") { FileViewStat.statistics(file, start_date, user.id) } + stats = tally_results(view_stats, :views, stats) unless (view_stats.blank? || view_stats) + delay + dl_stats = rescue_and_retry("Retried FileDownloadStat on #{user} for file #{file_id} too many times.") { FileDownloadStat.statistics(file, start_date, user.id) } + stats = tally_results(dl_stats, :downloads, stats) unless (dl_stats.blank? || dl_stats) + delay + end + end + + def process_works(stats, user, start_date) + work_ids_for_user(user).each do |work_id| + work = Hyrax::WorkRelation.new.find(work_id) + work_stats = rescue_and_retry("Retried WorkViewStat on #{user} for work #{work_id} too many times.") { WorkViewStat.statistics(work, start_date, user.id) } + stats = tally_results(work_stats, :work_views, stats) unless (work_stats.blank? || work_stats) + delay + end + end + + def delay + sleep @delay_secs + end + + def rescue_and_retry(fail_message) + retry_count = 0 + begin + return yield + rescue StandardError => e + retry_count += 1 + if retry_count < @number_of_retries + delay + retry + else + log_message fail_message + log_message "Last exception #{e}" + end + end + end + + def date_since_last_cache(user) + last_cached_stat = UserStat.where(user_id: user.id).order(date: :asc).last + + if last_cached_stat + last_cached_stat.date + 1.day + else + Hyrax.config.analytic_start_date + end + end + + def file_ids_for_user(user) + ids = [] + ::FileSet.search_in_batches("#{depositor_field}:\"#{user.user_key}\"", fl: "id") do |group| + ids.concat group.map { |doc| doc["id"] } + end + ids + end + + def work_ids_for_user(user) + ids = [] + Hyrax::WorkRelation.new.search_in_batches("#{depositor_field}:\"#{user.user_key}\"", fl: "id") do |group| + ids.concat group.map { |doc| doc["id"] } + end + ids + end + + # For each date, add the view and download counts for this file to the view & download sub-totals for that day. + # The resulting hash will look something like this: {"2014-11-30 00:00:00 UTC" => {:views=>2, :downloads=>5}, + # "2014-12-01 00:00:00 UTC" => {:views=>4, :downloads=>4}} + def tally_results(current_stats, stat_name, total_stats) + current_stats.each do |stats| + # Exclude the stats from today since it will only be a partial day's worth of data + break if stats.date == Time.zone.today + + date_key = stats.date.to_s + old_count = total_stats[date_key] ? total_stats[date_key].fetch(stat_name) { 0 } : 0 + new_count = old_count + stats.method(stat_name).call + + old_values = total_stats[date_key] || {} + total_stats.store(date_key, old_values) + total_stats[date_key].store(stat_name, new_count) + end + total_stats + end + + def create_or_update_user_stats(stats, user) + stats.each do |date_string, data| + date = Time.zone.parse(date_string) + + user_stat = UserStat.where(user_id: user.id, date: date).first_or_initialize(user_id: user.id, date: date) + + user_stat.file_views = data.fetch(:views, 0) + user_stat.file_downloads = data.fetch(:downloads, 0) + user_stat.work_views = data.fetch(:work_views, 0) + user_stat.save! + end + end + + def log_message(message) + Rails.logger.info "#{self.class}: #{message}" if @logging + end + end +end