Skip to content

Commit

Permalink
Create error_event column to track the context of an error (discarded…
Browse files Browse the repository at this point in the history
…, retried, retry_stopped, etc)
  • Loading branch information
bensheldon committed Jul 4, 2023
1 parent 014b535 commit 47caefd
Show file tree
Hide file tree
Showing 28 changed files with 395 additions and 39 deletions.
27 changes: 27 additions & 0 deletions app/models/concerns/good_job/error_events.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# frozen_string_literal: true
module GoodJob
# Shared methods for filtering Execution/Job records from the +good_jobs+ table.
module ErrorEvents
extend ActiveSupport::Concern

ERROR_EVENTS = [
ERROR_EVENT_INTERRUPTED = 'interrupted',
ERROR_EVENT_UNHANDLED = 'unhandled',
ERROR_EVENT_HANDLED = 'handled',
ERROR_EVENT_RETRIED = 'retried',
ERROR_EVENT_RETRY_STOPPED = 'retry_stopped',
ERROR_EVENT_DISCARDED = 'discarded',
].freeze

included do
enum error_event: {
ERROR_EVENT_INTERRUPTED => 0,
ERROR_EVENT_UNHANDLED => 1,
ERROR_EVENT_HANDLED => 2,
ERROR_EVENT_RETRIED => 3,
ERROR_EVENT_RETRY_STOPPED => 4,
ERROR_EVENT_DISCARDED => 5,
}.freeze, _prefix: :error_event
end
end
end
9 changes: 9 additions & 0 deletions app/models/good_job/base_execution.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ module GoodJob
# ActiveRecord model to share behavior between {Job} and {Execution} models
# which both read out of the same table.
class BaseExecution < BaseRecord
include ErrorEvents

self.table_name = 'good_jobs'

# With a given class name
Expand Down Expand Up @@ -37,6 +39,13 @@ def coalesce_scheduled_at_created_at
def discrete_support?
GoodJob::DiscreteExecution.migrated?
end

def error_event_migrated?
return true if columns_hash["error_event"].present?

migration_pending_warning!
false
end
end

# The ActiveJob job class, as a string
Expand Down
9 changes: 9 additions & 0 deletions app/models/good_job/discrete_execution.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

module GoodJob # :nodoc:
class DiscreteExecution < BaseRecord
include ErrorEvents

self.table_name = 'good_job_executions'

belongs_to :execution, class_name: 'GoodJob::Execution', foreign_key: 'active_job_id', primary_key: 'active_job_id', inverse_of: :discrete_executions, optional: true
Expand All @@ -11,6 +13,13 @@ class DiscreteExecution < BaseRecord

alias_attribute :performed_at, :created_at

def self.error_event_migrated?
return true if columns_hash["error_event"].present?

migration_pending_warning!
false
end

def number
serialized_params.fetch('executions', 0) + 1
end
Expand Down
42 changes: 35 additions & 7 deletions app/models/good_job/execution.rb
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,14 @@ def perform
if discrete?
interrupt_error_string = self.class.format_error(GoodJob::InterruptError.new("Interrupted after starting perform at '#{performed_at}'"))
self.error = interrupt_error_string
discrete_executions.where(finished_at: nil).where.not(performed_at: nil).update_all( # rubocop:disable Rails/SkipsModelValidations
self.error_event = ERROR_EVENT_INTERRUPTED if self.class.error_event_migrated?

discrete_execution_attrs = {
error: interrupt_error_string,
finished_at: Time.current
)
finished_at: Time.current,
}
discrete_execution_attrs[:error_event] = GoodJob::DiscreteExecution.error_events[GoodJob::DiscreteExecution::ERROR_EVENT_INTERRUPTED] if self.class.error_event_migrated?
discrete_executions.where(finished_at: nil).where.not(performed_at: nil).update_all(discrete_execution_attrs) # rubocop:disable Rails/SkipsModelValidations
end
end

Expand Down Expand Up @@ -405,15 +409,34 @@ def perform
end
handled_error ||= current_thread.error_on_retry || current_thread.error_on_discard

error_event = if handled_error == current_thread.error_on_discard
ERROR_EVENT_DISCARDED
elsif handled_error == current_thread.error_on_retry
ERROR_EVENT_RETRIED
elsif handled_error == current_thread.error_on_retry_stopped
ERROR_EVENT_RETRY_STOPPED
elsif handled_error
ERROR_EVENT_HANDLED
end

instrument_payload.merge!(
value: value,
handled_error: handled_error,
retried: current_thread.execution_retried
retried: current_thread.execution_retried,
error_event: error_event
)
ExecutionResult.new(value: value, handled_error: handled_error, retried: current_thread.execution_retried)
ExecutionResult.new(value: value, handled_error: handled_error, error_event: error_event, retried: current_thread.execution_retried)
rescue StandardError => e
error_event = if e.is_a?(GoodJob::InterruptError)
ERROR_EVENT_INTERRUPTED
elsif e == current_thread.error_on_retry_stopped
ERROR_EVENT_RETRY_STOPPED
else
ERROR_EVENT_UNHANDLED
end

instrument_payload[:unhandled_error] = e
ExecutionResult.new(value: nil, unhandled_error: e)
ExecutionResult.new(value: nil, unhandled_error: e, error_event: error_event)
end
end

Expand All @@ -422,9 +445,14 @@ def perform
if job_error
error_string = self.class.format_error(job_error)
self.error = error_string
discrete_execution.error = error_string if discrete_execution
self.error_event = result.error_event if self.class.error_event_migrated?
if discrete_execution
discrete_execution.error = error_string
discrete_execution.error_event = result.error_event if discrete_execution.class.error_event_migrated?
end
else
self.error = nil
self.error_event = nil if self.class.error_event_migrated?
end

reenqueued = result.retried? || retried_good_job_id.present?
Expand Down
7 changes: 5 additions & 2 deletions app/models/good_job/execution_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ class ExecutionResult
attr_reader :handled_error
# @return [Exception, nil]
attr_reader :unhandled_error
# @return [Exception, nil]
# @return [Symbol, nil]
attr_reader :error_event
# @return [Boolean, nil]
attr_reader :retried
alias retried? retried

# @param value [Object, nil]
# @param handled_error [Exception, nil]
# @param unhandled_error [Exception, nil]
def initialize(value:, handled_error: nil, unhandled_error: nil, retried: false)
def initialize(value:, handled_error: nil, unhandled_error: nil, error_event: nil, retried: false)
@value = value
@handled_error = handled_error
@unhandled_error = unhandled_error
@error_event = error_event
@retried = retried
end
end
Expand Down
4 changes: 3 additions & 1 deletion app/models/good_job/job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def retry_job

execution.class.transaction(joinable: false, requires_new: true) do
new_active_job = active_job.retry_job(wait: 0, error: execution.error)
execution.error_event = ERROR_EVENT_RETRIED if execution.error && execution.class.error_event_migrated?
execution.save!
end
end
Expand All @@ -221,7 +222,8 @@ def discard_job(message)
update_execution = proc do
execution.update(
finished_at: Time.current,
error: GoodJob::Execution.format_error(job_error)
error: GoodJob::Execution.format_error(job_error),
error_event: :discarded
)
end

Expand Down
2 changes: 1 addition & 1 deletion app/views/good_job/jobs/_executions.erb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
</div>
<% if execution.error %>
<div class="mt-3 small">
<strong class="small"><%=t ".error" %>:</strong>
<strong class="small"><%=t "good_job.shared.error" %>:</strong>
<code class="text-wrap text-break m-0 text-black"><%= execution.error %></code>
</div>
<% end %>
Expand Down
21 changes: 17 additions & 4 deletions app/views/good_job/jobs/_table.erb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
</div>
</div>
</header>
<label role="row" class="list-group-item list-group-item-warning list-group-item-action py-3 d-none" data-checkbox-toggle-show="job_ids">
<label role="row" class="list-group-item list-group-item-warning list-group-item-action py-2 d-none" data-checkbox-toggle-show="job_ids">
<div class="row">
<div class="col-auto">
<%= check_box_tag "all_job_ids", 1, false, disabled: true, data: { "checkbox-toggle-show": "job_ids"} %>
Expand All @@ -65,6 +65,12 @@
<div class="ms-2">
<%= tag.code link_to(job.id, job_path(job), class: "small text-muted text-decoration-none") %>
<%= tag.h5 tag.code(link_to(job.job_class, job_path(job), class: "text-reset text-decoration-none")), class: "text-reset mb-0" %>
<% if job.error %>
<div class="mt-1 small">
<strong class="small"><%=t "good_job.shared.error" %>:</strong>
<code class="text-wrap text-break m-0 text-black"><%= job.error %></code>
</div>
<% end %>
</div>
</div>
<div class="col-4 col-lg-1 text-lg-center">
Expand All @@ -89,9 +95,16 @@
<% end %>
</div>
<div class="mt-3 mt-lg-0 col">
<div class="d-flex gap-3 align-items-center justify-content-end">
<%= tag.span relative_time(job.last_status_at), class: "small" %>
<%= status_badge job.status %>
<div class="d-flex gap-3 align-items-start justify-content-end">
<%= tag.span relative_time(job.last_status_at), class: "small mt-1" %>
<div>
<%= status_badge job.status %>
<% if job.status == :discarded && job.class.error_event_migrated? && job.error_event %>
<div class="text-black text-center" >
<small><%= t(job.error_event, scope: 'good_job.error_event') %></small>
</div>
<% end %>
</div>

<div class="dropdown float-end">
<button class="d-flex align-items-center btn btn-sm" type="button" id="<%= dom_id(job, :actions) %>" data-bs-toggle="dropdown" aria-expanded="false">
Expand Down
9 changes: 8 additions & 1 deletion config/locales/de.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ de:
milliseconds: "%{ms}ms"
minutes: "%{min}m %{sec}s"
seconds: "%{sec}s"
error_event:
discarded: Weggeworfen
handled: Abgewickelt
interrupted: Unterbrochen
retried: Wiederholt
retry_stopped: Der Wiederholungsversuch wurde gestoppt
unhandled: Unbehandelt
helpers:
relative_time:
future: in %{time}
Expand All @@ -114,7 +121,6 @@ de:
discard:
notice: Auftrag wurde verworfen
executions:
error: Fehler
in_queue: in der Warteschlange
runtime: Laufzeit
title: Hinrichtungen
Expand Down Expand Up @@ -192,6 +198,7 @@ de:
title: Prozesse
updated: Aktualisiert
shared:
error: Fehler
filter:
all: Alle
all_jobs: Alle Jobs
Expand Down
9 changes: 8 additions & 1 deletion config/locales/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ en:
milliseconds: "%{ms}ms"
minutes: "%{min}m %{sec}s"
seconds: "%{sec}s"
error_event:
discarded: Discarded
handled: Handled
interrupted: Interrupted
retried: Retried
retry_stopped: Retry stopped
unhandled: Unhandled
helpers:
relative_time:
future: in %{time}
Expand All @@ -114,7 +121,6 @@ en:
discard:
notice: Job has been discarded
executions:
error: Error
in_queue: in queue
runtime: runtime
title: Executions
Expand Down Expand Up @@ -192,6 +198,7 @@ en:
title: Processes
updated: Updated
shared:
error: Error
filter:
all: All
all_jobs: All jobs
Expand Down
9 changes: 8 additions & 1 deletion config/locales/es.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ es:
milliseconds: "%{ms}ms"
minutes: "%{min}m %{sec}s"
seconds: "%{sec}s"
error_event:
discarded: Descartado
handled: Manejado
interrupted: interrumpido
retried: reintentado
retry_stopped: Reintentar detenido
unhandled: sin manejar
helpers:
relative_time:
future: en %{time}
Expand All @@ -114,7 +121,6 @@ es:
discard:
notice: La tarea ha sido descartada
executions:
error: Error
in_queue: en cola
runtime: en ejecución
title: Ejecuciones
Expand Down Expand Up @@ -192,6 +198,7 @@ es:
title: Procesos
updated: Actualizado
shared:
error: Error
filter:
all: Todas
all_jobs: Todas las tareas
Expand Down
9 changes: 8 additions & 1 deletion config/locales/fr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ fr:
milliseconds: "%{ms}ms"
minutes: "%{min}m %{sec}s"
seconds: "%{sec}s"
error_event:
discarded: Mis au rebut
handled: manipulé
interrupted: Interrompu
retried: Réessayé
retry_stopped: Nouvelle tentative arrêtée
unhandled: Non géré
helpers:
relative_time:
future: dans %{time}
Expand All @@ -114,7 +121,6 @@ fr:
discard:
notice: Le job a été mis au rebut
executions:
error: Erreur
in_queue: Dans la file d'attente
runtime: Durée
title: Exécutions
Expand Down Expand Up @@ -192,6 +198,7 @@ fr:
title: Processus
updated: Mis à jour
shared:
error: Erreur
filter:
all: Tous
all_jobs: Tous les jobs
Expand Down
9 changes: 8 additions & 1 deletion config/locales/ja.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ ja:
milliseconds: "%{ms}ミリ秒"
minutes: "%{min}分 %{sec}秒"
seconds: "%{sec}秒"
error_event:
discarded: 廃棄されました
handled: 取り扱い済み
interrupted: 中断されました
retried: 再試行しました
retry_stopped: 再試行が停止されました
unhandled: 未処理
helpers:
relative_time:
future: "%{time}後"
Expand All @@ -114,7 +121,6 @@ ja:
discard:
notice: ジョブが破棄されました
executions:
error: エラー
in_queue: 待機中
runtime: 実行時間
title: 実行
Expand Down Expand Up @@ -192,6 +198,7 @@ ja:
title: プロセス
updated: 更新された
shared:
error: エラー
filter:
all: 全て
all_jobs: 全てのジョブ
Expand Down
Loading

0 comments on commit 47caefd

Please sign in to comment.