Skip to content

Commit

Permalink
copy user_stat_importer.rb for override
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesvanmil committed Dec 18, 2017
1 parent 2fecf19 commit a5fd013
Showing 1 changed file with 152 additions and 0 deletions.
152 changes: 152 additions & 0 deletions app/services/hyrax/user_stat_importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
module Hyrax
# Cache work view, file view & file download stats for all users
# this is called by 'rake hyrax:stats:user_stats'
class UserStatImporter
UserRecord = Struct.new("UserRecord", :id, :user_key, :last_stats_update)

def initialize(options = {})
if options[:verbose]
stdout_logger = Logger.new(STDOUT)
stdout_logger.level = Logger::INFO
Rails.logger.extend(ActiveSupport::Logger.broadcast(stdout_logger))
end
@logging = options[:logging]
@delay_secs = options[:delay_secs].to_f
@number_of_retries = options[:number_of_retries].to_i
end

delegate :depositor_field, to: DepositSearchBuilder

def import
log_message('Begin import of User stats.')

sorted_users.each do |user|
start_date = date_since_last_cache(user)
# this user has already been processed today continue without delay
next if start_date.to_date >= Time.zone.today

stats = {}

process_files(stats, user, start_date)
process_works(stats, user, start_date)
create_or_update_user_stats(stats, user)
end
log_message('User stats import complete.')
end

# Returns an array of users sorted by the date of their last stats update. Users that have not been recently updated
# will be at the top of the array.
def sorted_users
users = []
::User.find_each do |user|
users.push(UserRecord.new(user.id, user.user_key, date_since_last_cache(user)))
end
users.sort_by(&:last_stats_update)
end

private

def process_files(stats, user, start_date)
file_ids_for_user(user).each do |file_id|
file = ::FileSet.find(file_id)
view_stats = rescue_and_retry("Retried FileViewStat on #{user} for file #{file_id} too many times.") { FileViewStat.statistics(file, start_date, user.id) }
stats = tally_results(view_stats, :views, stats) unless view_stats.blank?
delay
dl_stats = rescue_and_retry("Retried FileDownloadStat on #{user} for file #{file_id} too many times.") { FileDownloadStat.statistics(file, start_date, user.id) }
stats = tally_results(dl_stats, :downloads, stats) unless dl_stats.blank?
delay
end
end

def process_works(stats, user, start_date)
work_ids_for_user(user).each do |work_id|
work = Hyrax::WorkRelation.new.find(work_id)
work_stats = rescue_and_retry("Retried WorkViewStat on #{user} for work #{work_id} too many times.") { WorkViewStat.statistics(work, start_date, user.id) }
stats = tally_results(work_stats, :work_views, stats) unless work_stats.blank?
delay
end
end

def delay
sleep @delay_secs
end

def rescue_and_retry(fail_message)
retry_count = 0
begin
return yield
rescue StandardError => e
retry_count += 1
if retry_count < @number_of_retries
delay
retry
else
log_message fail_message
log_message "Last exception #{e}"
end
end
end

def date_since_last_cache(user)
last_cached_stat = UserStat.where(user_id: user.id).order(date: :asc).last

if last_cached_stat
last_cached_stat.date + 1.day
else
Hyrax.config.analytic_start_date
end
end

def file_ids_for_user(user)
ids = []
::FileSet.search_in_batches("#{depositor_field}:\"#{user.user_key}\"", fl: "id") do |group|
ids.concat group.map { |doc| doc["id"] }
end
ids
end

def work_ids_for_user(user)
ids = []
Hyrax::WorkRelation.new.search_in_batches("#{depositor_field}:\"#{user.user_key}\"", fl: "id") do |group|
ids.concat group.map { |doc| doc["id"] }
end
ids
end

# For each date, add the view and download counts for this file to the view & download sub-totals for that day.
# The resulting hash will look something like this: {"2014-11-30 00:00:00 UTC" => {:views=>2, :downloads=>5},
# "2014-12-01 00:00:00 UTC" => {:views=>4, :downloads=>4}}
def tally_results(current_stats, stat_name, total_stats)
current_stats.each do |stats|
# Exclude the stats from today since it will only be a partial day's worth of data
break if stats.date == Time.zone.today

date_key = stats.date.to_s
old_count = total_stats[date_key] ? total_stats[date_key].fetch(stat_name) { 0 } : 0
new_count = old_count + stats.method(stat_name).call

old_values = total_stats[date_key] || {}
total_stats.store(date_key, old_values)
total_stats[date_key].store(stat_name, new_count)
end
total_stats
end

def create_or_update_user_stats(stats, user)
stats.each do |date_string, data|
date = Time.zone.parse(date_string)

user_stat = UserStat.where(user_id: user.id, date: date).first_or_initialize(user_id: user.id, date: date)

user_stat.file_views = data.fetch(:views, 0)
user_stat.file_downloads = data.fetch(:downloads, 0)
user_stat.work_views = data.fetch(:work_views, 0)
user_stat.save!
end
end

def log_message(message)
Rails.logger.info "#{self.class}: #{message}" if @logging
end
end
end

0 comments on commit a5fd013

Please sign in to comment.