diff --git a/.gitignore b/.gitignore index 6bde1836..cb410100 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ *.gem +*~ +.DS_STORE .bundle pkg/* -.DS_STORE spec/redis.config.yml bundle @@ -9,4 +10,6 @@ spec/tmp coverage bin +phantomjs/ + .rspec-local diff --git a/.gitmodules b/.gitmodules index f960b50c..0c543cd9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "lib/qless/qless-core"] path = lib/qless/qless-core - url = https://github.com/seomoz/qless-core.git + url = https://github.com/backupify/qless-core.git diff --git a/Gemfile b/Gemfile index 8092194e..e8dd6511 100644 --- a/Gemfile +++ b/Gemfile @@ -8,3 +8,11 @@ group :extras do end gem 'thin' # needed by qless-web binary + +group :development do + gem 'byebug', :platforms => [:ruby_20, :ruby_21] + gem 'pry' + gem 'pry-byebug', :platforms => [:ruby_20, :ruby_21] + gem 'pry-stack_explorer' + gem 'cane', :platforms => [:ruby_20, :ruby_21] +end diff --git a/Gemfile.lock b/Gemfile.lock index e1d7bdf0..81a6fcbe 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,6 +11,13 @@ GEM ast (1.1.0) atomic (1.1.14) avl_tree (1.1.3) + binding_of_caller (0.7.2) + debug_inspector (>= 0.0.1) + byebug (2.7.0) + columnize (~> 0.3) + debugger-linecache (~> 1.2) + cane (2.6.2) + parallel capybara (1.1.4) mime-types (>= 1.16) nokogiri (>= 1.3.3) @@ -20,8 +27,10 @@ GEM xpath (~> 0.1.4) childprocess (0.3.9) ffi (~> 1.0, >= 1.0.11) + coderay (1.1.0) columnize (0.3.6) daemons (1.1.9) + debug_inspector (0.0.2) debugger (1.6.2) columnize (>= 0.3.1) debugger-linecache (~> 1.2.0) @@ -40,6 +49,7 @@ GEM http_parser.rb (0.5.3) launchy (2.1.2) addressable (~> 2.3) + method_source (0.8.2) metriks (0.9.9.5) atomic (~> 1.0) avl_tree (~> 1.1.2) @@ -50,6 +60,7 @@ GEM multipart-post (1.2.0) nokogiri (1.6.0) mini_portile (~> 0.5.0) + parallel (1.4.1) parser (2.0.0.pre8) ast (~> 1.1) slop (~> 3.4, >= 3.4.5) @@ -60,6 +71,16 @@ GEM http_parser.rb (~> 0.5.3) multi_json (~> 1.0) powerpack (0.0.8) + pry (0.10.1) + coderay (~> 1.1.0) + method_source (~> 0.8.1) + slop (~> 3.4) + pry-byebug (1.3.3) + byebug (~> 2.7) + pry (~> 0.10) + pry-stack_explorer (0.4.9.2) + binding_of_caller (>= 0.7) + pry (>= 0.9.11) rack (1.5.2) rack-protection (1.5.0) rack @@ -119,12 +140,17 @@ PLATFORMS ruby DEPENDENCIES + byebug + cane capybara (~> 1.1.2) debugger faye-websocket (~> 0.4.0) launchy (~> 2.1.0) metriks (~> 0.9) poltergeist (~> 1.0.0) + pry + pry-byebug + pry-stack_explorer qless! rake (~> 10.0) rspec (~> 2.12) diff --git a/README.md b/README.md index eaa5a48c..cc8a710a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -qless +qless [![Build Status](https://travis-ci.org/seomoz/qless.svg?branch=master)](https://travis-ci.org/seomoz/qless) ===== Qless is a powerful `Redis`-based job queueing system inspired by @@ -162,31 +162,41 @@ it is empty, before trying to pop job off the second queue. The round-robin reserver will pop a job off the first queue, then the second queue, and so on. You could also easily implement your own. -To start a worker, load the qless rake tasks in your Rakefile, and -define a `qless:setup` task: +To start a worker, write a bit of Ruby code that instantiates a +worker and runs it. You could write a rake task to do this, for +example: ``` ruby -require 'qless/tasks' namespace :qless do - task :setup do - require 'my_app/environment' # to ensure all job classes are loaded - - # Set options via environment variables - # The only required option is QUEUES; the - # rest have reasonable defaults. - ENV['REDIS_URL'] ||= 'redis://some-host:7000/3' - ENV['QUEUES'] ||= 'fizz,buzz' - ENV['JOB_RESERVER'] ||= 'Ordered' - ENV['INTERVAL'] ||= '10' # 10 seconds - ENV['VERBOSE'] ||= 'true' - end -end -``` + desc "Run a Qless worker" + task :work do + # Load your application code. All job classes must be loaded. + require 'my_app/environment' -Then run the `qless:work` rake task: + # Require the parts of qless you need + require 'qless' + require 'qless/job_reservers/ordered' + require 'qless/worker' -``` -rake qless:work + # Create a client + client = Qless::Client.new(:host => 'foo.bar.com', :port => 1234) + + # Get the queues you use + queues = %w[ queue_1 queue_2 ].map do |name| + client.queues[name] + end + + # Create a job reserver; different reservers use different + # strategies for which order jobs are popped off of queues + reserver = Qless::JobReservers::Ordered.new(queues) + + # Create a forking worker that uses the given reserver to pop jobs. + worker = Qless::Workers::ForkingWorker.new(reserver) + + # Start the worker! + worker.run + end +end ``` The following signals are supported in the parent process: diff --git a/Rakefile b/Rakefile index 08c371d7..6669b4b1 100644 --- a/Rakefile +++ b/Rakefile @@ -21,7 +21,7 @@ task :check_coverage do end end -task default: [:spec, :check_coverage] +task default: [:spec, :check_coverage, :cane] namespace :core do qless_core_dir = "./lib/qless/qless-core" @@ -82,15 +82,24 @@ namespace :core do task verify: %w[ verify:clean verify:current ] end +desc "Starts a qless console" +task :console do + ENV['PUBLIC_SEQUEL_API'] = 'true' + ENV['NO_NEW_RELIC'] = 'true' + exec "bundle exec pry -r./conf/console" +end + require 'qless/tasks' namespace :qless do - task :setup do + desc "Runs a test worker so you can send signals to it for testing" + task :run_test_worker do require 'qless' + require 'qless/job_reservers/ordered' + require 'qless/worker' queue = Qless::Client.new.queues["example"] queue.client.redis.flushdb - ENV['QUEUES'] = queue.name ENV['VVERBOSE'] = '1' class ExampleJob @@ -105,6 +114,55 @@ namespace :qless do 20.times do |i| queue.put(ExampleJob, sleep: i) end + + reserver = Qless::JobReservers::Ordered.new([queue]) + Qless::Workers::ForkingWorker.new(reserver, log_level: Logger::INFO).run + end +end + + +namespace :cane do + begin + require 'cane/rake_task' + + libs = [ + { name: 'qless', dir: '.', root: '.' }, + ] + + libs.each do |lib| + desc "Runs cane code quality checks for #{lib[:name]}" + Cane::RakeTask.new(lib[:name]) do |cane| + cane.no_doc = true + + cane.abc_glob = "#{lib[:dir]}/{lib,spec}/**/*.rb" + cane.abc_max = 15 + cane.abc_exclude = %w[ + Middleware::(anon)#expect_job_to_timeout + Qless::Job#initialize + Qless::Middleware::RequeueExceptions#handle_exception + Qless::Middleware::Timeout#initialize + Qless::WorkerHelpers#run_jobs + Qless::Workers::BaseWorker#initialize + Qless::Workers::BaseWorker#register_signal_handlers + Qless::Workers::ForkingWorker#register_signal_handlers + Qless::Workers::SerialWorker#run + ] + + cane.style_glob = "#{lib[:dir]}/lib/**/*.rb" + cane.style_measure = 100 + cane.style_exclude = %w[ + ] + end + end + + desc "Runs cane code quality checks for all projects" + task all: libs.map { |l| l[:name] } + + rescue LoadError + task :all do + puts "cane is not supported in ruby #{RUBY_VERSION}" + end end end +task cane: "cane:all" diff --git a/conf/console.rb b/conf/console.rb new file mode 100644 index 00000000..2bd10a59 --- /dev/null +++ b/conf/console.rb @@ -0,0 +1,20 @@ +$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) + +require 'irb/completion' + +QLESS_CONSOLE = true + +require 'qless' + +module StdoutLogger + def logger + @logger ||= Logger.new($stdout) + end +end + +# Load everything! +Dir["./lib/**/*.rb"].sort.each do |f| + require f.gsub("./lib/", "") +end + +require 'pp' diff --git a/exe/install_phantomjs b/exe/install_phantomjs index e0e13b19..9d4c74cc 100755 --- a/exe/install_phantomjs +++ b/exe/install_phantomjs @@ -10,7 +10,7 @@ then brew install phantomjs elif [[ "$os_name" == 'Linux' ]] then - version=phantomjs-1.7.0-linux-i686 + version=phantomjs-1.7.0-linux-x86_64 wget http://phantomjs.googlecode.com/files/$version.tar.bz2 tar xjf $version.tar.bz2 mv $version phantomjs diff --git a/exe/qless-growl b/exe/qless-growl index f81ab916..65788f5a 100755 --- a/exe/qless-growl +++ b/exe/qless-growl @@ -7,7 +7,7 @@ require 'ruby-growl' require 'micro-optparse' @options = Parser.new do |p| - p.banner = 'This agent lets you get campfire notifications for the progress of tracked jobs' + p.banner = 'This agent lets you get growl notifications for the progress of tracked jobs' p.option :growl , 'host for the growl daemon', :default => 'localhost' p.option :app , 'application name for notifications', :default => 'qless' p.option :host , 'host:port for your qless redis instance', :default => 'localhost:6379' diff --git a/lib/qless.rb b/lib/qless.rb index ff1aa255..563e1df6 100644 --- a/lib/qless.rb +++ b/lib/qless.rb @@ -23,6 +23,7 @@ module Qless require 'qless/version' require 'qless/config' require 'qless/queue' +require 'qless/throttle' require 'qless/job' require 'qless/lua_script' require 'qless/failure_formatter' @@ -135,6 +136,24 @@ def [](name) end end + # A class for interacting with throttles. Not meant to be instantiated directly, + # it's accessed through Client#throttles + class ClientThrottles + def initialize(client) + @client = client + end + + def [](name) + Throttle.new(name, @client) + end + + def counts + @client.queues.counts.map do |queue| + Queue.new(queue['name'], @client).throttle + end + end + end + # A class for interacting with events. Not meant to be instantiated directly, # it's accessed through Client#events class ClientEvents @@ -169,20 +188,24 @@ def stop # The client for interacting with Qless class Client # Lua script - attr_reader :_qless, :config, :redis, :jobs, :queues, :workers + attr_reader :_qless, :config, :redis, :jobs, :queues, :throttles, :workers attr_accessor :worker_name def initialize(options = {}) + default_options = {:ensure_minimum_version => true} + options = default_options.merge(options) + # This is the redis instance we're connected to. Use connect so REDIS_URL # will be honored @redis = options[:redis] || Redis.connect(options) @options = options - assert_minimum_redis_version('2.5.5') + assert_minimum_redis_version('2.5.5') if @options.delete(:ensure_minimum_version) @config = Config.new(self) @_qless = Qless::LuaScript.new('qless', @redis) @jobs = ClientJobs.new(self) @queues = ClientQueues.new(self) + @throttles = ClientThrottles.new(self) @workers = ClientWorkers.new(self) @worker_name = [Socket.gethostname, Process.pid.to_s].join('-') end diff --git a/lib/qless/failure_formatter.rb b/lib/qless/failure_formatter.rb index 195ebc33..4546c5a1 100644 --- a/lib/qless/failure_formatter.rb +++ b/lib/qless/failure_formatter.rb @@ -15,29 +15,25 @@ def initialize @replacements[ENV['GEM_HOME']] = '' if ENV.key?('GEM_HOME') end + # lib/qless/job.rb#fail shows us that qless, right down to the Lua scripts, + # is set up to expect both a group and a message for a failed job. So we + # can't stop storing failed jobs altogether. But, to save on precious RAM, + # we can stop recording the message, which is the stack traces that we currently + # store. def format(job, error, lines_to_remove = caller(2)) group = "#{job.klass_name}:#{error.class}" - message = "#{truncated_message(error)}\n\n" + - "#{format_failure_backtrace(error.backtrace, lines_to_remove)}" + message = "#{truncated_message(error)}" Failure.new(group, message) end private # TODO: pull this out into a config option. - MAX_ERROR_MESSAGE_SIZE = 10_000 + MAX_ERROR_MESSAGE_SIZE = 100 def truncated_message(error) return error.message if error.message.length <= MAX_ERROR_MESSAGE_SIZE error.message.slice(0, MAX_ERROR_MESSAGE_SIZE) + - '... (truncated due to length)' - end - - def format_failure_backtrace(error_backtrace, lines_to_remove) - (error_backtrace - lines_to_remove).map do |line| - @replacements.reduce(line) do |formatted, (original, new)| - formatted.sub(original, new) - end - end.join("\n") + "\n... (truncated due to length)" end end end diff --git a/lib/qless/job.rb b/lib/qless/job.rb index cb8b2eb6..1edfe864 100644 --- a/lib/qless/job.rb +++ b/lib/qless/job.rb @@ -18,7 +18,7 @@ def initialize(client, jid) def klass @klass ||= @klass_name.split('::').reduce(Object) do |context, name| - context.const_get(name) + context.const_get(name, false) end end @@ -44,7 +44,8 @@ class Job < BaseJob attr_reader :klass_name, :tracked, :dependencies, :dependents attr_reader :original_retries, :retries_left, :raw_queue_history attr_reader :state_changed - attr_accessor :data, :priority, :tags + attr_accessor :data, :priority, :tags, :throttles + alias_method(:state_changed?, :state_changed) MiddlewareMisconfiguredError = Class.new(StandardError) @@ -63,6 +64,9 @@ def perform return fail("#{queue_name}-NameError", "Cannot find #{klass_name}") end + # log a real process executing job -- before we start processing + log("started by pid:#{Process.pid}") + middlewares = Job.middlewares_on(klass) if middlewares.last == SupportsMiddleware @@ -99,13 +103,27 @@ def self.build(client, klass, attributes = {}) 'failure' => {}, 'history' => [], 'dependencies' => [], - 'dependents' => [] + 'dependents' => [], + 'throttles' => [], } attributes = defaults.merge(Qless.stringify_hash_keys(attributes)) attributes['data'] = JSON.dump(attributes['data']) new(client, attributes) end + # Converts a hash of job options (as returned by job.to_hash) into the array + # format the qless api expects. + def self.build_opts_array(opts) + result = [] + result << JSON.generate(opts.fetch(:data, {})) + result.concat([opts.fetch(:delay, 0)]) + result.concat(['priority', opts.fetch(:priority, 0)]) + result.concat(['tags', JSON.generate(opts.fetch(:tags, []))]) + result.concat(['retries', opts.fetch(:retries, 5)]) + result.concat(['depends', JSON.generate(opts.fetch(:depends, []))]) + result.concat(['throttles', JSON.generate(opts.fetch(:throttles, []))]) + end + def self.middlewares_on(job_klass) singleton_klass = job_klass.singleton_class singleton_klass.ancestors.select do |ancestor| @@ -116,7 +134,7 @@ def self.middlewares_on(job_klass) def initialize(client, atts) super(client, atts.fetch('jid')) %w{jid data priority tags state tracked - failure dependencies dependents spawned_from_jid}.each do |att| + failure dependencies dependents throttles spawned_from_jid}.each do |att| instance_variable_set(:"@#{att}", atts.fetch(att)) end @@ -168,6 +186,10 @@ def ttl @expires_at - Time.now.to_f end + def throttle_objects + throttles.map { |name| Throttle.new(name, client) } + end + def reconnect_to_redis @client.redis.client.reconnect end @@ -218,7 +240,27 @@ def to_hash retries_left: retries_left, data: data, priority: priority, - tags: tags + tags: tags, + throttles: throttles, + } + end + + # Extract the enqueue options from the job + # @return [Hash] options + # @option options [Integer] :retries + # @option options [Integer] :priority + # @option options [Array] :depends + # @option options [Array] :tags + # @option options [Array] throttles + # @option options [Hash] :data + def enqueue_opts + { + retries: original_retries, + priority: priority, + depends: dependents, + tags: tags, + throttles: throttles, + data: data, } end @@ -226,12 +268,7 @@ def to_hash def requeue(queue, opts = {}) note_state_change :requeue do @client.call('requeue', @client.worker_name, queue, @jid, @klass_name, - JSON.dump(opts.fetch(:data, @data)), - opts.fetch(:delay, 0), - 'priority', opts.fetch(:priority, @priority), - 'tags', JSON.dump(opts.fetch(:tags, @tags)), - 'retries', opts.fetch(:retries, @original_retries), - 'depends', JSON.dump(opts.fetch(:depends, @dependencies)) + *self.class.build_opts_array(self.enqueue_opts.merge!(opts)) ) end end diff --git a/lib/qless/lua/qless-lib.lua b/lib/qless/lua/qless-lib.lua index 08c7ae7d..2b8a040c 100644 --- a/lib/qless/lua/qless-lib.lua +++ b/lib/qless/lua/qless-lib.lua @@ -1,4 +1,4 @@ --- Current SHA: 525c39000dc71df53a3502491cb4daf0e1128f1d +-- Current SHA: 20dc687832ad472f0a00899d26c285b893ff466c -- This is a generated file ------------------------------------------------------------------------------- -- Forward declarations to make everything happy @@ -25,6 +25,12 @@ local QlessJob = { } QlessJob.__index = QlessJob +-- throttle forward declaration +local QlessThrottle = { + ns = Qless.ns .. 'th:' +} +QlessThrottle.__index = QlessThrottle + -- RecurringJob forward declaration local QlessRecurringJob = {} QlessRecurringJob.__index = QlessRecurringJob @@ -63,19 +69,69 @@ function Qless.recurring(jid) return job end +-- Return a throttle object +-- throttle objects are used for arbitrary throttling of jobs. +function Qless.throttle(tid) + assert(tid, 'Throttle(): no tid provided') + local throttle = QlessThrottle.data({id = tid}) + setmetatable(throttle, QlessThrottle) + + -- set of jids which have acquired a lock on this throttle. + throttle.locks = { + length = function() + return (redis.call('zcard', QlessThrottle.ns .. tid .. '-locks') or 0) + end, members = function() + return redis.call('zrange', QlessThrottle.ns .. tid .. '-locks', 0, -1) + end, add = function(...) + if #arg > 0 then + redis.call('zadd', QlessThrottle.ns .. tid .. '-locks', unpack(arg)) + end + end, remove = function(...) + if #arg > 0 then + return redis.call('zrem', QlessThrottle.ns .. tid .. '-locks', unpack(arg)) + end + end, pop = function(min, max) + return redis.call('zremrangebyrank', QlessThrottle.ns .. tid .. '-locks', min, max) + end, peek = function(min, max) + return redis.call('zrange', QlessThrottle.ns .. tid .. '-locks', min, max) + end + } + + -- set of jids which are waiting for the throttle to become available. + throttle.pending = { + length = function() + return (redis.call('zcard', QlessThrottle.ns .. tid .. '-pending') or 0) + end, members = function() + return redis.call('zrange', QlessThrottle.ns .. tid .. '-pending', 0, -1) + end, add = function(now, jid) + redis.call('zadd', QlessThrottle.ns .. tid .. '-pending', now, jid) + end, remove = function(...) + if #arg > 0 then + return redis.call('zrem', QlessThrottle.ns .. tid .. '-pending', unpack(arg)) + end + end, pop = function(min, max) + return redis.call('zremrangebyrank', QlessThrottle.ns .. tid .. '-pending', min, max) + end, peek = function(min, max) + return redis.call('zrange', QlessThrottle.ns .. tid .. '-pending', min, max) + end + } + + return throttle +end + -- Failed([group, [start, [limit]]]) -- ------------------------------------ -- If no group is provided, this returns a JSON blob of the counts of the -- various groups of failures known. If a group is provided, it will report up -- to `limit` from `start` of the jobs affected by that issue. --- +-- -- # If no group, then... -- { -- 'group1': 1, -- 'group2': 5, -- ... -- } --- +-- -- # If a group is provided, then... -- { -- 'total': 20, @@ -121,9 +177,9 @@ end ------------------------------------------------------------------------------- -- Return all the job ids currently considered to be in the provided state -- in a particular queue. The response is a list of job ids: --- +-- -- [ --- jid1, +-- jid1, -- jid2, -- ... -- ] @@ -148,6 +204,8 @@ function Qless.jobs(now, state, ...) return queue.locks.peek(now, offset, count) elseif state == 'stalled' then return queue.locks.expired(now, offset, count) + elseif state == 'throttled' then + return queue.throttled.peek(now, offset, count) elseif state == 'scheduled' then queue:check_scheduled(now, queue.scheduled.length()) return queue.scheduled.peek(now, offset, count) @@ -169,7 +227,7 @@ end -- associated with that id, and 'untrack' stops tracking it. In this context, -- tracking is nothing more than saving the job to a list of jobs that are -- considered special. --- +-- -- { -- 'jobs': [ -- { @@ -254,7 +312,7 @@ function Qless.tag(now, command, ...) tags = cjson.decode(tags) local _tags = {} for i,v in ipairs(tags) do _tags[v] = true end - + -- Otherwise, add the job to the sorted set with that tags for i=2,#arg do local tag = arg[i] @@ -262,10 +320,9 @@ function Qless.tag(now, command, ...) _tags[tag] = true table.insert(tags, tag) end - redis.call('zadd', 'ql:t:' .. tag, now, jid) - redis.call('zincrby', 'ql:tags', 1, tag) + Qless.job(jid):insert_tag(now, tag) end - + redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(tags)) return tags else @@ -280,18 +337,17 @@ function Qless.tag(now, command, ...) tags = cjson.decode(tags) local _tags = {} for i,v in ipairs(tags) do _tags[v] = true end - - -- Otherwise, add the job to the sorted set with that tags + + -- Otherwise, remove the job from the sorted set with that tags for i=2,#arg do local tag = arg[i] _tags[tag] = nil - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) + Qless.job(jid):remove_tag(tag) end - + local results = {} for i,tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end - + redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(results)) return results else @@ -321,7 +377,7 @@ end -- Cancel a job from taking place. It will be deleted from the system, and any -- attempts to renew a heartbeat will fail, and any attempts to complete it -- will fail. If you try to get the data on the object, you will get nothing. -function Qless.cancel(...) +function Qless.cancel(now, ...) -- Dependents is a mapping of a job to its dependent jids local dependents = {} for _, jid in ipairs(arg) do @@ -367,12 +423,13 @@ function Qless.cancel(...) -- Remove it from that queue if queue then local queue = Qless.queue(queue) - queue.work.remove(jid) - queue.locks.remove(jid) - queue.scheduled.remove(jid) - queue.depends.remove(jid) + queue:remove_job(jid) end + local job = Qless.job(jid) + + job:throttles_release(now) + -- We should probably go through all our dependencies and remove -- ourselves from the list of dependents for i, j in ipairs(redis.call( @@ -380,9 +437,6 @@ function Qless.cancel(...) redis.call('srem', QlessJob.ns .. j .. '-dependents', jid) end - -- Delete any notion of dependencies it has - redis.call('del', QlessJob.ns .. jid .. '-dependencies') - -- If we're in the failed state, remove all of our data if state == 'failed' then failure = cjson.decode(failure) @@ -400,25 +454,15 @@ function Qless.cancel(...) 'ql:s:stats:' .. bin .. ':' .. queue, 'failed', failed - 1) end - -- Remove it as a job that's tagged with this particular tag - local tags = cjson.decode( - redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}') - for i, tag in ipairs(tags) do - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) - end + job:delete() -- If the job was being tracked, we should notify if redis.call('zscore', 'ql:tracked', jid) ~= false then Qless.publish('canceled', jid) end - - -- Just go ahead and delete our data - redis.call('del', QlessJob.ns .. jid) - redis.call('del', QlessJob.ns .. jid .. '-history') end end - + return arg end @@ -490,7 +534,7 @@ function QlessJob:data(...) local job = redis.call( 'hmget', QlessJob.ns .. self.jid, 'jid', 'klass', 'state', 'queue', 'worker', 'priority', 'expires', 'retries', 'remaining', 'data', - 'tags', 'failure', 'spawned_from_jid') + 'tags', 'failure', 'throttles', 'spawned_from_jid') -- Return nil if we haven't found it if not job[1] then @@ -505,18 +549,19 @@ function QlessJob:data(...) worker = job[5] or '', tracked = redis.call( 'zscore', 'ql:tracked', self.jid) ~= false, - priority = tonumber(job[6]), - expires = tonumber(job[7]) or 0, - retries = tonumber(job[8]), - remaining = math.floor(tonumber(job[9])), - data = job[10], - tags = cjson.decode(job[11]), - history = self:history(), - failure = cjson.decode(job[12] or '{}'), - spawned_from_jid = job[13], - dependents = redis.call( + priority = tonumber(job[6]), + expires = tonumber(job[7]) or 0, + retries = tonumber(job[8]), + remaining = math.floor(tonumber(job[9])), + data = job[10], + tags = cjson.decode(job[11]), + history = self:history(), + failure = cjson.decode(job[12] or '{}'), + throttles = cjson.decode(job[13] or '[]'), + spawned_from_jid = job[14], + dependents = redis.call( 'smembers', QlessJob.ns .. self.jid .. '-dependents'), - dependencies = redis.call( + dependencies = redis.call( 'smembers', QlessJob.ns .. self.jid .. '-dependencies') } @@ -535,26 +580,26 @@ end -- Complete a job and optionally put it in another queue, either scheduled or -- to be considered waiting immediately. It can also optionally accept other --- jids on which this job will be considered dependent before it's considered +-- jids on which this job will be considered dependent before it's considered -- valid. -- -- The variable-length arguments may be pairs of the form: --- +-- -- ('next' , queue) : The queue to advance it to next -- ('delay' , delay) : The delay for the next queue -- ('depends', : Json of jobs it depends on in the new queue -- '["jid1", "jid2", ...]') --- -function QlessJob:complete(now, worker, queue, data, ...) +function QlessJob:complete(now, worker, queue, raw_data, ...) assert(worker, 'Complete(): Arg "worker" missing') assert(queue , 'Complete(): Arg "queue" missing') - data = assert(cjson.decode(data), - 'Complete(): Arg "data" missing or not JSON: ' .. tostring(data)) + local data = assert(cjson.decode(raw_data), + 'Complete(): Arg "data" missing or not JSON: ' .. tostring(raw_data)) -- Read in all the optional parameters local options = {} for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end - + -- Sanity check on optional args local nextq = options['next'] local delay = assert(tonumber(options['delay'] or 0)) @@ -600,15 +645,15 @@ function QlessJob:complete(now, worker, queue, data, ...) -- update history self:history(now, 'done') - if data then - redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data)) + if raw_data then + redis.call('hset', QlessJob.ns .. self.jid, 'data', raw_data) end -- Remove the job from the previous queue local queue_obj = Qless.queue(queue) - queue_obj.work.remove(self.jid) - queue_obj.locks.remove(self.jid) - queue_obj.scheduled.remove(self.jid) + queue_obj:remove_job(self.jid) + + self:throttles_release(now) ---------------------------------------------------------- -- This is the massive stats update that we have to do @@ -618,7 +663,7 @@ function QlessJob:complete(now, worker, queue, data, ...) local time = tonumber( redis.call('hget', QlessJob.ns .. self.jid, 'time') or now) local waiting = now - time - Qless.queue(queue):stat(now, 'run', waiting) + queue_obj:stat(now, 'run', waiting) redis.call('hset', QlessJob.ns .. self.jid, 'time', string.format("%.20f", now)) @@ -647,7 +692,7 @@ function QlessJob:complete(now, worker, queue, data, ...) if redis.call('zscore', 'ql:queues', nextq) == false then redis.call('zadd', 'ql:queues', now, nextq) end - + redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'waiting', 'worker', '', @@ -655,7 +700,7 @@ function QlessJob:complete(now, worker, queue, data, ...) 'queue', nextq, 'expires', 0, 'remaining', tonumber(retries)) - + if (delay > 0) and (#depends == 0) then queue_obj.scheduled.add(now + delay, self.jid) return 'scheduled' @@ -703,48 +748,35 @@ function QlessJob:complete(now, worker, queue, data, ...) 'queue', '', 'expires', 0, 'remaining', tonumber(retries)) - + -- Do the completion dance local count = Qless.config.get('jobs-history-count') local time = Qless.config.get('jobs-history') - + -- These are the default values count = tonumber(count or 50000) time = tonumber(time or 7 * 24 * 60 * 60) - + -- Schedule this job for destructination eventually redis.call('zadd', 'ql:completed', now, self.jid) - + -- Now look at the expired job data. First, based on the current time local jids = redis.call('zrangebyscore', 'ql:completed', 0, now - time) -- Any jobs that need to be expired... delete for index, jid in ipairs(jids) do - local tags = cjson.decode( - redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}') - for i, tag in ipairs(tags) do - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) - end - redis.call('del', QlessJob.ns .. jid) - redis.call('del', QlessJob.ns .. jid .. '-history') + Qless.job(jid):delete() end + -- And now remove those from the queued-for-cleanup queue redis.call('zremrangebyscore', 'ql:completed', 0, now - time) - + -- Now take the all by the most recent 'count' ids jids = redis.call('zrange', 'ql:completed', 0, (-1-count)) for index, jid in ipairs(jids) do - local tags = cjson.decode( - redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}') - for i, tag in ipairs(tags) do - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) - end - redis.call('del', QlessJob.ns .. jid) - redis.call('del', QlessJob.ns .. jid .. '-history') + Qless.job(jid):delete() end redis.call('zremrangebyrank', 'ql:completed', 0, (-1-count)) - + -- Alright, if this has any dependents, then we should go ahead -- and unstick those guys. for i, j in ipairs(redis.call( @@ -768,10 +800,10 @@ function QlessJob:complete(now, worker, queue, data, ...) end end end - + -- Delete our dependents key redis.call('del', QlessJob.ns .. self.jid .. '-dependents') - + return 'complete' end end @@ -782,14 +814,14 @@ end -- specific message. By `group`, we mean some phrase that might be one of -- several categorical modes of failure. The `message` is something more -- job-specific, like perhaps a traceback. --- +-- -- This method should __not__ be used to note that a job has been dropped or -- has failed in a transient way. This method __should__ be used to note that -- a job has something really wrong with it that must be remedied. --- +-- -- The motivation behind the `group` is so that similar errors can be grouped -- together. Optionally, updated data can be provided for the job. A job in --- any state can be marked as failed. If it has been given to a worker as a +-- any state can be marked as failed. If it has been given to a worker as a -- job, then its subsequent requests to heartbeat or complete that job will -- fail. Failed jobs are kept until they are canceled or completed. -- @@ -856,11 +888,9 @@ function QlessJob:fail(now, worker, group, message, data) -- Now remove the instance from the schedule, and work queues for the -- queue it's in local queue_obj = Qless.queue(queue) - queue_obj.work.remove(self.jid) - queue_obj.locks.remove(self.jid) - queue_obj.scheduled.remove(self.jid) + queue_obj:remove_job(self.jid) - -- The reason that this appears here is that the above will fail if the + -- The reason that this appears here is that the above will fail if the -- job doesn't exist if data then redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data)) @@ -877,12 +907,14 @@ function QlessJob:fail(now, worker, group, message, data) ['worker'] = worker })) + self:throttles_release(now) + -- Add this group of failure to the list of failures redis.call('sadd', 'ql:failures', group) -- And add this particular instance to the failed groups redis.call('lpush', 'ql:f:' .. group, self.jid) - -- Here is where we'd intcrement stats about the particular stage + -- Here is where we'd increment stats about the particular stage -- and possibly the workers return self.jid @@ -897,7 +929,7 @@ end -- Throws an exception if: -- - the worker is not the worker with a lock on the job -- - the job is not actually running --- +-- -- Otherwise, it returns the number of retries remaining. If the allowed -- retries have been exhausted, then it is automatically failed, and a negative -- number is returned. @@ -910,7 +942,7 @@ function QlessJob:retry(now, queue, worker, delay, group, message) assert(worker, 'Retry(): Arg "worker" missing') delay = assert(tonumber(delay or 0), 'Retry(): Arg "delay" not a number: ' .. tostring(delay)) - + -- Let's see what the old priority, and tags were local oldqueue, state, retries, oldworker, priority, failure = unpack( redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'state', @@ -935,6 +967,9 @@ function QlessJob:retry(now, queue, worker, delay, group, message) -- Remove it from the locks key of the old queue Qless.queue(oldqueue).locks.remove(self.jid) + -- Release the throttle for the job + self:throttles_release(now) + -- Remove this job from the worker that was previously working it redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid) @@ -943,7 +978,7 @@ function QlessJob:retry(now, queue, worker, delay, group, message) -- queue it's in local group = group or 'failed-retries-' .. queue self:history(now, 'failed', {['group'] = group}) - + redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'failed', 'worker', '', 'expires', '') @@ -967,7 +1002,7 @@ function QlessJob:retry(now, queue, worker, delay, group, message) ['worker'] = unpack(self:data('worker')) })) end - + -- Add this type of failure to the list of failures redis.call('sadd', 'ql:failures', group) -- And add this particular instance to the failed types @@ -1119,11 +1154,11 @@ function QlessJob:heartbeat(now, worker, data) redis.call('hmset', QlessJob.ns .. self.jid, 'expires', expires, 'worker', worker) end - + -- Update hwen this job was last updated on that worker -- Add this job to the list of jobs handled by this worker redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, self.jid) - + -- And now we should just update the locks local queue = Qless.queue( redis.call('hget', QlessJob.ns .. self.jid, 'queue')) @@ -1269,6 +1304,108 @@ function QlessJob:history(now, what, item) cjson.encode({math.floor(now), what, item})) end end + +function QlessJob:throttles_release(now) + local throttles = redis.call('hget', QlessJob.ns .. self.jid, 'throttles') + throttles = cjson.decode(throttles or '[]') + + for _, tid in ipairs(throttles) do + Qless.throttle(tid):release(now, self.jid) + end +end + +function QlessJob:throttles_available() + for _, tid in ipairs(self:throttles()) do + if not Qless.throttle(tid):available() then + return false + end + end + + return true +end + +function QlessJob:throttles_acquire(now) + if not self:throttles_available() then + return false + end + + for _, tid in ipairs(self:throttles()) do + Qless.throttle(tid):acquire(self.jid) + end + + return true +end + +-- Finds the first unavailable throttle and adds the job to its pending job set. +function QlessJob:throttle(now) + for _, tid in ipairs(self:throttles()) do + local throttle = Qless.throttle(tid) + if not throttle:available() then + throttle:pend(now, self.jid) + return + end + end +end + +function QlessJob:throttles() + -- memoize throttles for the job. + if not self._throttles then + self._throttles = cjson.decode(redis.call('hget', QlessJob.ns .. self.jid, 'throttles') or '[]') + end + + return self._throttles +end + +-- Completely removes all the data +-- associated with this job, use +-- with care. +function QlessJob:delete() + local tags = redis.call('hget', QlessJob.ns .. self.jid, 'tags') or '[]' + tags = cjson.decode(tags) + -- remove the jid from each tag + for i, tag in ipairs(tags) do + self:remove_tag(tag) + end + -- Delete the job's data + redis.call('del', QlessJob.ns .. self.jid) + -- Delete the job's history + redis.call('del', QlessJob.ns .. self.jid .. '-history') + -- Delete any notion of dependencies it has + redis.call('del', QlessJob.ns .. self.jid .. '-dependencies') +end + +-- Inserts the jid into the specified tag. +-- This should probably be moved to its own tag +-- object. +function QlessJob:insert_tag(now, tag) + redis.call('zadd', 'ql:t:' .. tag, now, self.jid) + redis.call('zincrby', 'ql:tags', 1, tag) +end + +-- Removes the jid from the specified tag. +-- this should probably be moved to its own tag +-- object. +function QlessJob:remove_tag(tag) + -- namespace the tag + local namespaced_tag = 'ql:t:' .. tag + + -- Remove the job from the specified tag + redis.call('zrem', namespaced_tag, self.jid) + + -- Check if any tags jids remain in the tag set. + local remaining = redis.call('zcard', namespaced_tag) + + -- If the number of jids in the tagged set + -- is 0 it means we have no jobs with this tag + -- and we should remove it from the set of all tags + -- to prevent memory leaks. + if tonumber(remaining) == 0 then + redis.call('zrem', 'ql:tags', tag) + else + -- Decrement the tag in the set of all tags. + redis.call('zincrby', 'ql:tags', -1, tag) + end +end ------------------------------------------------------------------------------- -- Queue class ------------------------------------------------------------------------------- @@ -1348,6 +1485,26 @@ function Qless.queue(name) end } + + -- Access to the queue level throttled jobs. + queue.throttled = { + length = function() + return (redis.call('zcard', queue:prefix('throttled')) or 0) + end, peek = function(now, min, max) + return redis.call('zrange', queue:prefix('throttled'), min, max) + end, add = function(...) + if #arg > 0 then + redis.call('zadd', queue:prefix('throttled'), unpack(arg)) + end + end, remove = function(...) + if #arg > 0 then + return redis.call('zrem', queue:prefix('throttled'), unpack(arg)) + end + end, pop = function(min, max) + return redis.call('zremrangebyrank', queue:prefix('throttled'), min, max) + end + } + -- Access to our scheduled jobs queue.scheduled = { peek = function(now, offset, count) @@ -1453,11 +1610,11 @@ function QlessQueue:stats(now, date) local key = 'ql:s:' .. name .. ':' .. bin .. ':' .. queue local count, mean, vk = unpack(redis.call('hmget', key, 'total', 'mean', 'vk')) - + count = tonumber(count) or 0 mean = tonumber(mean) or 0 vk = tonumber(vk) - + results.count = count or 0 results.mean = mean or 0 results.histogram = {} @@ -1507,8 +1664,8 @@ function QlessQueue:peek(now, count) -- Now we've checked __all__ the locks for this queue the could -- have expired, and are no more than the number requested. If - -- we still need values in order to meet the demand, then we - -- should check if any scheduled items, and if so, we should + -- we still need values in order to meet the demand, then we + -- should check if any scheduled items, and if so, we should -- insert them to ensure correctness when pulling off the next -- unit of work. self:check_scheduled(now, count - #jids) @@ -1548,11 +1705,6 @@ function QlessQueue:pop(now, worker, count) count = assert(tonumber(count), 'Pop(): Arg "count" missing or not a number: ' .. tostring(count)) - -- We should find the heartbeat interval for this queue heartbeat - local expires = now + tonumber( - Qless.config.get(self.name .. '-heartbeat') or - Qless.config.get('heartbeat', 60)) - -- If this queue is paused, then return no jobs if self:paused() then return {} @@ -1561,74 +1713,133 @@ function QlessQueue:pop(now, worker, count) -- Make sure we this worker to the list of seen workers redis.call('zadd', 'ql:workers', now, worker) - -- Check our max concurrency, and limit the count - local max_concurrency = tonumber( - Qless.config.get(self.name .. '-max-concurrency', 0)) + local dead_jids = self:invalidate_locks(now, count) or {} + local popped = {} - if max_concurrency > 0 then - -- Allow at most max_concurrency - #running - local allowed = math.max(0, max_concurrency - self.locks.running(now)) - count = math.min(allowed, count) - if count == 0 then - return {} + for index, jid in ipairs(dead_jids) do + local success = self:pop_job(now, worker, Qless.job(jid)) + -- only track jid if a job was popped and it's not a phantom jid + if success then + table.insert(popped, jid) end end - local jids = self:invalidate_locks(now, count) + -- if queue is at max capacity don't pop any further jobs. + if not Qless.throttle(QlessQueue.ns .. self.name):available() then + return popped + end + -- Now we've checked __all__ the locks for this queue the could -- have expired, and are no more than the number requested. -- If we still need jobs in order to meet demand, then we should -- look for all the recurring jobs that need jobs run - self:check_recurring(now, count - #jids) + self:check_recurring(now, count - #dead_jids) - -- If we still need values in order to meet the demand, then we - -- should check if any scheduled items, and if so, we should + -- If we still need values in order to meet the demand, then we + -- should check if any scheduled items, and if so, we should -- insert them to ensure correctness when pulling off the next -- unit of work. - self:check_scheduled(now, count - #jids) + self:check_scheduled(now, count - #dead_jids) -- With these in place, we can expand this list of jids based on the work -- queue itself and the priorities therein - table.extend(jids, self.work.peek(count - #jids)) - local state - for index, jid in ipairs(jids) do - local job = Qless.job(jid) - state = unpack(job:data('state')) - job:history(now, 'popped', {worker = worker}) - - -- Update the wait time statistics - local time = tonumber( - redis.call('hget', QlessJob.ns .. jid, 'time') or now) - local waiting = now - time - self:stat(now, 'wait', waiting) - redis.call('hset', QlessJob.ns .. jid, - 'time', string.format("%.20f", now)) - - -- Add this job to the list of jobs handled by this worker - redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid) - - -- Update the jobs data, and add its locks, and return the job - job:update({ - worker = worker, - expires = expires, - state = 'running' - }) - - self.locks.add(expires, jid) - - local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false - if tracked then - Qless.publish('popped', jid) + -- Since throttles could prevent work queue items from being popped, we can + -- retry a number of times till we find work items that are not throttled + local pop_retry_limit = tonumber( + Qless.config.get(self.name .. '-max-pop-retry') or + Qless.config.get('max-pop-retry', 1) + ) + + -- Keep trying to fulfill fulfill jobs from the work queue until we reach + -- the desired count or exhaust our retry limit + while #popped < count and pop_retry_limit > 0 do + + local jids = self.work.peek(count - #popped) or {} + + -- If there is nothing in the work queue, then no need to keep looping + if #jids == 0 then + break end + + + for index, jid in ipairs(jids) do + local job = Qless.job(jid) + if job:throttles_acquire(now) then + local success = self:pop_job(now, worker, job) + -- only track jid if a job was popped and it's not a phantom jid + if success then + table.insert(popped, jid) + end + else + self:throttle(now, job) + end + end + + -- All jobs should have acquired locks or be throttled, + -- ergo, remove all jids from work queue + self.work.remove(unpack(jids)) + + pop_retry_limit = pop_retry_limit - 1 end - -- If we are returning any jobs, then we should remove them from the work - -- queue - self.work.remove(unpack(jids)) + return popped +end - return jids +-- Throttle a job +function QlessQueue:throttle(now, job) + job:throttle(now) + self.throttled.add(now, job.jid) + local state = unpack(job:data('state')) + if state ~= 'throttled' then + job:update({state = 'throttled'}) + job:history(now, 'throttled', {queue = self.name}) + end +end + +function QlessQueue:pop_job(now, worker, job) + local state + local jid = job.jid + local job_state = job:data('state') + -- if the job doesn't exist, short circuit + if not job_state then + return false + end + + state = unpack(job_state) + job:history(now, 'popped', {worker = worker}) + + -- We should find the heartbeat interval for this queue heartbeat + local expires = now + tonumber( + Qless.config.get(self.name .. '-heartbeat') or + Qless.config.get('heartbeat', 60)) + + -- Update the wait time statistics + -- Just does job:data('time') do the same as this? + local time = tonumber(redis.call('hget', QlessJob.ns .. jid, 'time') or now) + local waiting = now - time + self:stat(now, 'wait', waiting) + redis.call('hset', QlessJob.ns .. jid, + 'time', string.format("%.20f", now)) + + -- Add this job to the list of jobs handled by this worker + redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid) + + -- Update the jobs data, and add its locks, and return the job + job:update({ + worker = worker, + expires = expires, + state = 'running' + }) + + self.locks.add(expires, jid) + + local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false + if tracked then + Qless.publish('popped', jid) + end + return true end -- Update the stats for this queue @@ -1668,7 +1879,7 @@ function QlessQueue:stat(now, stat, val) redis.call('hincrby', key, 'h' .. math.floor(val / 3600), 1) else -- days redis.call('hincrby', key, 'd' .. math.floor(val / 86400), 1) - end + end redis.call('hmset', key, 'total', count, 'mean', mean, 'vk', vk) end @@ -1708,14 +1919,16 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) end -- Sanity check on optional args - retries = assert(tonumber(options['retries'] or retries or 5) , + local retries = assert(tonumber(options['retries'] or retries or 5) , 'Put(): Arg "retries" not a number: ' .. tostring(options['retries'])) - tags = assert(cjson.decode(options['tags'] or tags or '[]' ), + local tags = assert(cjson.decode(options['tags'] or tags or '[]' ), 'Put(): Arg "tags" not JSON' .. tostring(options['tags'])) - priority = assert(tonumber(options['priority'] or priority or 0), + local priority = assert(tonumber(options['priority'] or priority or 0), 'Put(): Arg "priority" not a number' .. tostring(options['priority'])) local depends = assert(cjson.decode(options['depends'] or '[]') , 'Put(): Arg "depends" not JSON: ' .. tostring(options['depends'])) + local throttles = assert(cjson.decode(options['throttles'] or '[]'), + 'Put(): Arg "throttles" not JSON array: ' .. tostring(options['throttles'])) -- If the job has old dependencies, determine which dependencies are -- in the new dependencies but not in the old ones, and which are in the @@ -1728,7 +1941,7 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) -- Now find what's in the original, but not the new local original = redis.call( 'smembers', QlessJob.ns .. jid .. '-dependencies') - for _, dep in pairs(original) do + for _, dep in pairs(original) do if new[dep] == nil then -- Remove k as a dependency redis.call('srem', QlessJob.ns .. dep .. '-dependents' , jid) @@ -1748,12 +1961,16 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) job:history(now, 'put', {q = self.name}) -- If this item was previously in another queue, then we should remove it from there + -- and remove the associated throttle if oldqueue then local queue_obj = Qless.queue(oldqueue) - queue_obj.work.remove(jid) - queue_obj.locks.remove(jid) - queue_obj.depends.remove(jid) - queue_obj.scheduled.remove(jid) + queue_obj:remove_job(jid) + local old_qid = QlessQueue.ns .. oldqueue + for index, tname in ipairs(throttles) do + if tname == old_qid then + table.remove(throttles, index) + end + end end -- If this had previously been given out to a worker, make sure to remove it @@ -1782,8 +1999,7 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) -- Add this job to the list of jobs tagged with whatever tags were supplied for i, tag in ipairs(tags) do - redis.call('zadd', 'ql:t:' .. tag, now, jid) - redis.call('zincrby', 'ql:tags', 1, tag) + Qless.job(jid):insert_tag(now, tag) end -- If we're in the failed state, remove all of our data @@ -1802,8 +2018,10 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , -1) end - -- First, let's save its data - redis.call('hmset', QlessJob.ns .. jid, + -- insert default queue throttle + table.insert(throttles, QlessQueue.ns .. self.name) + + data = { 'jid' , jid, 'klass' , klass, 'data' , raw_data, @@ -1815,7 +2033,12 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) 'queue' , self.name, 'retries' , retries, 'remaining', retries, - 'time' , string.format("%.20f", now)) + 'time' , string.format("%.20f", now), + 'throttles', cjson.encode(throttles) + } + + -- First, let's save its data + redis.call('hmset', QlessJob.ns .. jid, unpack(data)) -- These are the jids we legitimately have to wait on for i, j in ipairs(depends) do @@ -1842,16 +2065,21 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) self.scheduled.add(now + delay, jid) end else + -- to avoid false negatives when popping jobs check if the job should be + -- throttled immediately. + local job = Qless.job(jid) if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then self.depends.add(now, jid) redis.call('hset', QlessJob.ns .. jid, 'state', 'depends') + elseif not job:throttles_available() then + self:throttle(now, job) else self.work.add(now, priority, jid) end end -- Lastly, we're going to make sure that this item is in the - -- set of known queues. We should keep this sorted by the + -- set of known queues. We should keep this sorted by the -- order in which we saw each of these queues if redis.call('zscore', 'ql:queues', self.name) == false then redis.call('zadd', 'ql:queues', now, self.name) @@ -1921,7 +2149,7 @@ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...) if #arg % 2 == 1 then error('Odd number of additional args: ' .. tostring(arg)) end - + -- Read in all the optional parameters local options = {} for i = 3, #arg, 2 do options[arg[i]] = arg[i + 1] end @@ -1937,41 +2165,44 @@ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...) options.backlog = assert(tonumber(options.backlog or 0), 'Recur(): Arg "backlog" not a number: ' .. tostring( options.backlog)) + options.throttles = assert(cjson.decode(options['throttles'] or '{}'), + 'Recur(): Arg "throttles" not JSON array: ' .. tostring(options['throttles'])) local count, old_queue = unpack(redis.call('hmget', 'ql:r:' .. jid, 'count', 'queue')) count = count or 0 - -- If it has previously been in another queue, then we should remove + -- If it has previously been in another queue, then we should remove -- some information about it if old_queue then Qless.queue(old_queue).recurring.remove(jid) end - + -- Do some insertions redis.call('hmset', 'ql:r:' .. jid, - 'jid' , jid, - 'klass' , klass, - 'data' , raw_data, - 'priority', options.priority, - 'tags' , cjson.encode(options.tags or {}), - 'state' , 'recur', - 'queue' , self.name, - 'type' , 'interval', + 'jid' , jid, + 'klass' , klass, + 'data' , raw_data, + 'priority' , options.priority, + 'tags' , cjson.encode(options.tags or {}), + 'state' , 'recur', + 'queue' , self.name, + 'type' , 'interval', -- How many jobs we've spawned from this - 'count' , count, - 'interval', interval, - 'retries' , options.retries, - 'backlog' , options.backlog) + 'count' , count, + 'interval' , interval, + 'retries' , options.retries, + 'backlog' , options.backlog, + 'throttles', cjson.encode(options.throttles or {})) -- Now, we should schedule the next run of the job self.recurring.add(now + offset, jid) - + -- Lastly, we're going to make sure that this item is in the - -- set of known queues. We should keep this sorted by the + -- set of known queues. We should keep this sorted by the -- order in which we saw each of these queues if redis.call('zscore', 'ql:queues', self.name) == false then redis.call('zadd', 'ql:queues', now, self.name) end - + return jid else error('Recur(): schedule type "' .. tostring(spec) .. '" unknown') @@ -1986,6 +2217,14 @@ end ------------------------------------------------------------------------------- -- Housekeeping methods ------------------------------------------------------------------------------- +function QlessQueue:remove_job(jid) + self.work.remove(jid) + self.locks.remove(jid) + self.throttled.remove(jid) + self.depends.remove(jid) + self.scheduled.remove(jid) +end + -- Instantiate any recurring jobs that are ready function QlessQueue:check_recurring(now, count) -- This is how many jobs we've moved so far @@ -1997,9 +2236,11 @@ function QlessQueue:check_recurring(now, count) -- get the last time each of them was run, and then increment -- it by its interval. While this time is less than now, -- we need to keep putting jobs on the queue - local klass, data, priority, tags, retries, interval, backlog = unpack( + local r = redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority', + 'tags', 'retries', 'interval', 'backlog', 'throttles') + local klass, data, priority, tags, retries, interval, backlog, throttles = unpack( redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority', - 'tags', 'retries', 'interval', 'backlog')) + 'tags', 'retries', 'interval', 'backlog', 'throttles')) local _tags = cjson.decode(tags) local score = math.floor(tonumber(self.recurring.score(jid))) interval = tonumber(interval) @@ -2017,44 +2258,45 @@ function QlessQueue:check_recurring(now, count) ) end end - - -- We're saving this value so that in the history, we can accurately + + -- We're saving this value so that in the history, we can accurately -- reflect when the job would normally have been scheduled while (score <= now) and (moved < count) do local count = redis.call('hincrby', 'ql:r:' .. jid, 'count', 1) moved = moved + 1 local child_jid = jid .. '-' .. count - + -- Add this job to the list of jobs tagged with whatever tags were -- supplied for i, tag in ipairs(_tags) do - redis.call('zadd', 'ql:t:' .. tag, now, child_jid) - redis.call('zincrby', 'ql:tags', 1, tag) + Qless.job(child_jid):insert_tag(now, tag) end - + -- First, let's save its data redis.call('hmset', QlessJob.ns .. child_jid, - 'jid' , child_jid, - 'klass' , klass, - 'data' , data, - 'priority' , priority, - 'tags' , tags, - 'state' , 'waiting', - 'worker' , '', - 'expires' , 0, - 'queue' , self.name, - 'retries' , retries, - 'remaining' , retries, - 'time' , string.format("%.20f", score), + 'jid' , child_jid, + 'klass' , klass, + 'data' , data, + 'priority' , priority, + 'tags' , tags, + 'state' , 'waiting', + 'worker' , '', + 'expires' , 0, + 'queue' , self.name, + 'retries' , retries, + 'remaining', retries, + 'time' , string.format("%.20f", score), + 'throttles', throttles, 'spawned_from_jid', jid) + Qless.job(child_jid):history(score, 'put', {q = self.name}) - + -- Now, if a delay was provided, and if it's in the future, -- then we'll have to schedule it. Otherwise, we're just -- going to add it to the work queue. self.work.add(score, priority, child_jid) - + score = score + interval self.recurring.add(score, jid) end @@ -2069,7 +2311,7 @@ function QlessQueue:check_scheduled(now, count) -- insert into the work queue local scheduled = self.scheduled.ready(now, 0, count) for index, jid in ipairs(scheduled) do - -- With these in hand, we'll have to go out and find the + -- With these in hand, we'll have to go out and find the -- priorities of these jobs, and then we'll insert them -- into the work queue and then when that's complete, we'll -- remove them from the scheduled queue @@ -2154,7 +2396,7 @@ function QlessQueue:invalidate_locks(now, count) -- See how many remaining retries the job has local remaining = tonumber(redis.call( 'hincrby', QlessJob.ns .. jid, 'remaining', -1)) - + -- This is where we actually have to time out the work if remaining < 0 then -- Now remove the instance from the schedule, and work queues @@ -2162,9 +2404,14 @@ function QlessQueue:invalidate_locks(now, count) self.work.remove(jid) self.locks.remove(jid) self.scheduled.remove(jid) - - local group = 'failed-retries-' .. Qless.job(jid):data()['queue'] + local job = Qless.job(jid) + local job_data = Qless.job(jid):data() + local queue = job_data['queue'] + local group = 'failed-retries-' .. queue + + job:throttles_release(now) + job:history(now, 'failed', {group = group}) redis.call('hmset', QlessJob.ns .. jid, 'state', 'failed', 'worker', '', @@ -2178,12 +2425,12 @@ function QlessQueue:invalidate_locks(now, count) ['when'] = now, ['worker'] = unpack(job:data('worker')) })) - + -- Add this type of failure to the list of failures redis.call('sadd', 'ql:failures', group) -- And add this particular instance to the failed types redis.call('lpush', 'ql:f:' .. group, jid) - + if redis.call('zscore', 'ql:tracked', jid) ~= false then Qless.publish('failed', jid) end @@ -2241,6 +2488,7 @@ function QlessQueue.counts(now, name) waiting = queue.work.length(), stalled = stalled, running = queue.locks.length() - stalled, + throttled = queue.throttled.length(), scheduled = queue.scheduled.length(), depends = queue.depends.length(), recurring = queue.recurring.length(), @@ -2461,3 +2709,100 @@ function QlessWorker.counts(now, worker) return response end end +-- Retrieve the data for a throttled resource +function QlessThrottle:data() + -- Default values for the data + local data = { + id = self.id, + maximum = 0 + } + + -- Retrieve data stored in redis + local throttle = redis.call('hmget', QlessThrottle.ns .. self.id, 'id', 'maximum') + + if throttle[2] then + data.maximum = tonumber(throttle[2]) + end + + return data +end + +-- Set the data for a throttled resource +function QlessThrottle:set(data, expiration) + redis.call('hmset', QlessThrottle.ns .. self.id, 'id', self.id, 'maximum', data.maximum) + if expiration > 0 then + redis.call('expire', QlessThrottle.ns .. self.id, expiration) + end +end + +-- Delete a throttled resource +function QlessThrottle:unset() + redis.call('del', QlessThrottle.ns .. self.id) +end + +-- Acquire a throttled resource for a job. +-- Returns true of the job acquired the resource, false otherwise +function QlessThrottle:acquire(jid) + if not self:available() then + return false + end + + self.locks.add(1, jid) + return true +end + +function QlessThrottle:pend(now, jid) + self.pending.add(now, jid) +end + +-- Releases the lock taken by the specified jid. +-- number of jobs released back into the queues is determined by the locks_available method. +function QlessThrottle:release(now, jid) + -- Only attempt to remove from the pending set if the job wasn't found in the + -- locks set + if self.locks.remove(jid) == 0 then + self.pending.remove(jid) + end + + local available_locks = self:locks_available() + if self.pending.length() == 0 or available_locks < 1 then + return + end + + -- subtract one to ensure we pop the correct amount. peek(0, 0) returns the first element + -- peek(0,1) return the first two. + for _, jid in ipairs(self.pending.peek(0, available_locks - 1)) do + local job = Qless.job(jid) + local data = job:data() + local queue = Qless.queue(data['queue']) + + queue.throttled.remove(jid) + queue.work.add(now, data.priority, jid) + end + + -- subtract one to ensure we pop the correct amount. pop(0, 0) pops the first element + -- pop(0,1) pops the first two. + local popped = self.pending.pop(0, available_locks - 1) +end + +-- Returns true if the throttle has locks available, false otherwise. +function QlessThrottle:available() + return self.maximum == 0 or self.locks.length() < self.maximum +end + +-- Returns the TTL of the throttle +function QlessThrottle:ttl() + return redis.call('ttl', QlessThrottle.ns .. self.id) +end + +-- Returns the number of locks available for the throttle. +-- calculated by maximum - locks.length(), if the throttle is unlimited +-- then up to 10 jobs are released. +function QlessThrottle:locks_available() + if self.maximum == 0 then + -- Arbitrarily chosen value. might want to make it configurable in the future. + return 10 + end + + return self.maximum - self.locks.length() +end diff --git a/lib/qless/lua/qless.lua b/lib/qless/lua/qless.lua index ae140ce8..0623d1e0 100644 --- a/lib/qless/lua/qless.lua +++ b/lib/qless/lua/qless.lua @@ -1,4 +1,4 @@ --- Current SHA: 525c39000dc71df53a3502491cb4daf0e1128f1d +-- Current SHA: 20dc687832ad472f0a00899d26c285b893ff466c -- This is a generated file local Qless = { ns = 'ql:' @@ -19,6 +19,11 @@ local QlessJob = { } QlessJob.__index = QlessJob +local QlessThrottle = { + ns = Qless.ns .. 'th:' +} +QlessThrottle.__index = QlessThrottle + local QlessRecurringJob = {} QlessRecurringJob.__index = QlessRecurringJob @@ -50,6 +55,52 @@ function Qless.recurring(jid) return job end +function Qless.throttle(tid) + assert(tid, 'Throttle(): no tid provided') + local throttle = QlessThrottle.data({id = tid}) + setmetatable(throttle, QlessThrottle) + + throttle.locks = { + length = function() + return (redis.call('zcard', QlessThrottle.ns .. tid .. '-locks') or 0) + end, members = function() + return redis.call('zrange', QlessThrottle.ns .. tid .. '-locks', 0, -1) + end, add = function(...) + if #arg > 0 then + redis.call('zadd', QlessThrottle.ns .. tid .. '-locks', unpack(arg)) + end + end, remove = function(...) + if #arg > 0 then + return redis.call('zrem', QlessThrottle.ns .. tid .. '-locks', unpack(arg)) + end + end, pop = function(min, max) + return redis.call('zremrangebyrank', QlessThrottle.ns .. tid .. '-locks', min, max) + end, peek = function(min, max) + return redis.call('zrange', QlessThrottle.ns .. tid .. '-locks', min, max) + end + } + + throttle.pending = { + length = function() + return (redis.call('zcard', QlessThrottle.ns .. tid .. '-pending') or 0) + end, members = function() + return redis.call('zrange', QlessThrottle.ns .. tid .. '-pending', 0, -1) + end, add = function(now, jid) + redis.call('zadd', QlessThrottle.ns .. tid .. '-pending', now, jid) + end, remove = function(...) + if #arg > 0 then + return redis.call('zrem', QlessThrottle.ns .. tid .. '-pending', unpack(arg)) + end + end, pop = function(min, max) + return redis.call('zremrangebyrank', QlessThrottle.ns .. tid .. '-pending', min, max) + end, peek = function(min, max) + return redis.call('zrange', QlessThrottle.ns .. tid .. '-pending', min, max) + end + } + + return throttle +end + function Qless.failed(group, start, limit) start = assert(tonumber(start or 0), 'Failed(): Arg "start" is not a number: ' .. (start or 'nil')) @@ -92,6 +143,8 @@ function Qless.jobs(now, state, ...) return queue.locks.peek(now, offset, count) elseif state == 'stalled' then return queue.locks.expired(now, offset, count) + elseif state == 'throttled' then + return queue.throttled.peek(now, offset, count) elseif state == 'scheduled' then queue:check_scheduled(now, queue.scheduled.length()) return queue.scheduled.peek(now, offset, count) @@ -147,17 +200,16 @@ function Qless.tag(now, command, ...) tags = cjson.decode(tags) local _tags = {} for i,v in ipairs(tags) do _tags[v] = true end - + for i=2,#arg do local tag = arg[i] if _tags[tag] == nil then _tags[tag] = true table.insert(tags, tag) end - redis.call('zadd', 'ql:t:' .. tag, now, jid) - redis.call('zincrby', 'ql:tags', 1, tag) + Qless.job(jid):insert_tag(now, tag) end - + redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(tags)) return tags else @@ -170,17 +222,16 @@ function Qless.tag(now, command, ...) tags = cjson.decode(tags) local _tags = {} for i,v in ipairs(tags) do _tags[v] = true end - + for i=2,#arg do local tag = arg[i] _tags[tag] = nil - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) + Qless.job(jid):remove_tag(tag) end - + local results = {} for i,tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end - + redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(results)) return results else @@ -205,7 +256,7 @@ function Qless.tag(now, command, ...) end end -function Qless.cancel(...) +function Qless.cancel(now, ...) local dependents = {} for _, jid in ipairs(arg) do dependents[jid] = redis.call( @@ -241,19 +292,18 @@ function Qless.cancel(...) if queue then local queue = Qless.queue(queue) - queue.work.remove(jid) - queue.locks.remove(jid) - queue.scheduled.remove(jid) - queue.depends.remove(jid) + queue:remove_job(jid) end + local job = Qless.job(jid) + + job:throttles_release(now) + for i, j in ipairs(redis.call( 'smembers', QlessJob.ns .. jid .. '-dependencies')) do redis.call('srem', QlessJob.ns .. j .. '-dependents', jid) end - redis.call('del', QlessJob.ns .. jid .. '-dependencies') - if state == 'failed' then failure = cjson.decode(failure) redis.call('lrem', 'ql:f:' .. failure.group, 0, jid) @@ -267,22 +317,14 @@ function Qless.cancel(...) 'ql:s:stats:' .. bin .. ':' .. queue, 'failed', failed - 1) end - local tags = cjson.decode( - redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}') - for i, tag in ipairs(tags) do - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) - end + job:delete() if redis.call('zscore', 'ql:tracked', jid) ~= false then Qless.publish('canceled', jid) end - - redis.call('del', QlessJob.ns .. jid) - redis.call('del', QlessJob.ns .. jid .. '-history') end end - + return arg end @@ -336,7 +378,7 @@ function QlessJob:data(...) local job = redis.call( 'hmget', QlessJob.ns .. self.jid, 'jid', 'klass', 'state', 'queue', 'worker', 'priority', 'expires', 'retries', 'remaining', 'data', - 'tags', 'failure', 'spawned_from_jid') + 'tags', 'failure', 'throttles', 'spawned_from_jid') if not job[1] then return nil @@ -350,18 +392,19 @@ function QlessJob:data(...) worker = job[5] or '', tracked = redis.call( 'zscore', 'ql:tracked', self.jid) ~= false, - priority = tonumber(job[6]), - expires = tonumber(job[7]) or 0, - retries = tonumber(job[8]), - remaining = math.floor(tonumber(job[9])), - data = job[10], - tags = cjson.decode(job[11]), - history = self:history(), - failure = cjson.decode(job[12] or '{}'), - spawned_from_jid = job[13], - dependents = redis.call( + priority = tonumber(job[6]), + expires = tonumber(job[7]) or 0, + retries = tonumber(job[8]), + remaining = math.floor(tonumber(job[9])), + data = job[10], + tags = cjson.decode(job[11]), + history = self:history(), + failure = cjson.decode(job[12] or '{}'), + throttles = cjson.decode(job[13] or '[]'), + spawned_from_jid = job[14], + dependents = redis.call( 'smembers', QlessJob.ns .. self.jid .. '-dependents'), - dependencies = redis.call( + dependencies = redis.call( 'smembers', QlessJob.ns .. self.jid .. '-dependencies') } @@ -376,15 +419,15 @@ function QlessJob:data(...) end end -function QlessJob:complete(now, worker, queue, data, ...) +function QlessJob:complete(now, worker, queue, raw_data, ...) assert(worker, 'Complete(): Arg "worker" missing') assert(queue , 'Complete(): Arg "queue" missing') - data = assert(cjson.decode(data), - 'Complete(): Arg "data" missing or not JSON: ' .. tostring(data)) + local data = assert(cjson.decode(raw_data), + 'Complete(): Arg "data" missing or not JSON: ' .. tostring(raw_data)) local options = {} for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end - + local nextq = options['next'] local delay = assert(tonumber(options['delay'] or 0)) local depends = assert(cjson.decode(options['depends'] or '[]'), @@ -418,19 +461,19 @@ function QlessJob:complete(now, worker, queue, data, ...) self:history(now, 'done') - if data then - redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data)) + if raw_data then + redis.call('hset', QlessJob.ns .. self.jid, 'data', raw_data) end local queue_obj = Qless.queue(queue) - queue_obj.work.remove(self.jid) - queue_obj.locks.remove(self.jid) - queue_obj.scheduled.remove(self.jid) + queue_obj:remove_job(self.jid) + + self:throttles_release(now) local time = tonumber( redis.call('hget', QlessJob.ns .. self.jid, 'time') or now) local waiting = now - time - Qless.queue(queue):stat(now, 'run', waiting) + queue_obj:stat(now, 'run', waiting) redis.call('hset', QlessJob.ns .. self.jid, 'time', string.format("%.20f", now)) @@ -454,7 +497,7 @@ function QlessJob:complete(now, worker, queue, data, ...) if redis.call('zscore', 'ql:queues', nextq) == false then redis.call('zadd', 'ql:queues', now, nextq) end - + redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'waiting', 'worker', '', @@ -462,7 +505,7 @@ function QlessJob:complete(now, worker, queue, data, ...) 'queue', nextq, 'expires', 0, 'remaining', tonumber(retries)) - + if (delay > 0) and (#depends == 0) then queue_obj.scheduled.add(now + delay, self.jid) return 'scheduled' @@ -505,41 +548,28 @@ function QlessJob:complete(now, worker, queue, data, ...) 'queue', '', 'expires', 0, 'remaining', tonumber(retries)) - + local count = Qless.config.get('jobs-history-count') local time = Qless.config.get('jobs-history') - + count = tonumber(count or 50000) time = tonumber(time or 7 * 24 * 60 * 60) - + redis.call('zadd', 'ql:completed', now, self.jid) - + local jids = redis.call('zrangebyscore', 'ql:completed', 0, now - time) for index, jid in ipairs(jids) do - local tags = cjson.decode( - redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}') - for i, tag in ipairs(tags) do - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) - end - redis.call('del', QlessJob.ns .. jid) - redis.call('del', QlessJob.ns .. jid .. '-history') + Qless.job(jid):delete() end + redis.call('zremrangebyscore', 'ql:completed', 0, now - time) - + jids = redis.call('zrange', 'ql:completed', 0, (-1-count)) for index, jid in ipairs(jids) do - local tags = cjson.decode( - redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}') - for i, tag in ipairs(tags) do - redis.call('zrem', 'ql:t:' .. tag, jid) - redis.call('zincrby', 'ql:tags', -1, tag) - end - redis.call('del', QlessJob.ns .. jid) - redis.call('del', QlessJob.ns .. jid .. '-history') + Qless.job(jid):delete() end redis.call('zremrangebyrank', 'ql:completed', 0, (-1-count)) - + for i, j in ipairs(redis.call( 'smembers', QlessJob.ns .. self.jid .. '-dependents')) do redis.call('srem', QlessJob.ns .. j .. '-dependencies', self.jid) @@ -561,9 +591,9 @@ function QlessJob:complete(now, worker, queue, data, ...) end end end - + redis.call('del', QlessJob.ns .. self.jid .. '-dependents') - + return 'complete' end end @@ -610,9 +640,7 @@ function QlessJob:fail(now, worker, group, message, data) redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1) local queue_obj = Qless.queue(queue) - queue_obj.work.remove(self.jid) - queue_obj.locks.remove(self.jid) - queue_obj.scheduled.remove(self.jid) + queue_obj:remove_job(self.jid) if data then redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data)) @@ -629,6 +657,8 @@ function QlessJob:fail(now, worker, group, message, data) ['worker'] = worker })) + self:throttles_release(now) + redis.call('sadd', 'ql:failures', group) redis.call('lpush', 'ql:f:' .. group, self.jid) @@ -641,7 +671,7 @@ function QlessJob:retry(now, queue, worker, delay, group, message) assert(worker, 'Retry(): Arg "worker" missing') delay = assert(tonumber(delay or 0), 'Retry(): Arg "delay" not a number: ' .. tostring(delay)) - + local oldqueue, state, retries, oldworker, priority, failure = unpack( redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'state', 'retries', 'worker', 'priority', 'failure')) @@ -660,12 +690,14 @@ function QlessJob:retry(now, queue, worker, delay, group, message) Qless.queue(oldqueue).locks.remove(self.jid) + self:throttles_release(now) + redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid) if remaining < 0 then local group = group or 'failed-retries-' .. queue self:history(now, 'failed', {['group'] = group}) - + redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'failed', 'worker', '', 'expires', '') @@ -688,7 +720,7 @@ function QlessJob:retry(now, queue, worker, delay, group, message) ['worker'] = unpack(self:data('worker')) })) end - + redis.call('sadd', 'ql:failures', group) redis.call('lpush', 'ql:f:' .. group, self.jid) local bin = now - (now % 86400) @@ -806,9 +838,9 @@ function QlessJob:heartbeat(now, worker, data) redis.call('hmset', QlessJob.ns .. self.jid, 'expires', expires, 'worker', worker) end - + redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, self.jid) - + local queue = Qless.queue( redis.call('hget', QlessJob.ns .. self.jid, 'queue')) queue.locks.add(expires, self.jid) @@ -929,6 +961,85 @@ function QlessJob:history(now, what, item) cjson.encode({math.floor(now), what, item})) end end + +function QlessJob:throttles_release(now) + local throttles = redis.call('hget', QlessJob.ns .. self.jid, 'throttles') + throttles = cjson.decode(throttles or '[]') + + for _, tid in ipairs(throttles) do + Qless.throttle(tid):release(now, self.jid) + end +end + +function QlessJob:throttles_available() + for _, tid in ipairs(self:throttles()) do + if not Qless.throttle(tid):available() then + return false + end + end + + return true +end + +function QlessJob:throttles_acquire(now) + if not self:throttles_available() then + return false + end + + for _, tid in ipairs(self:throttles()) do + Qless.throttle(tid):acquire(self.jid) + end + + return true +end + +function QlessJob:throttle(now) + for _, tid in ipairs(self:throttles()) do + local throttle = Qless.throttle(tid) + if not throttle:available() then + throttle:pend(now, self.jid) + return + end + end +end + +function QlessJob:throttles() + if not self._throttles then + self._throttles = cjson.decode(redis.call('hget', QlessJob.ns .. self.jid, 'throttles') or '[]') + end + + return self._throttles +end + +function QlessJob:delete() + local tags = redis.call('hget', QlessJob.ns .. self.jid, 'tags') or '[]' + tags = cjson.decode(tags) + for i, tag in ipairs(tags) do + self:remove_tag(tag) + end + redis.call('del', QlessJob.ns .. self.jid) + redis.call('del', QlessJob.ns .. self.jid .. '-history') + redis.call('del', QlessJob.ns .. self.jid .. '-dependencies') +end + +function QlessJob:insert_tag(now, tag) + redis.call('zadd', 'ql:t:' .. tag, now, self.jid) + redis.call('zincrby', 'ql:tags', 1, tag) +end + +function QlessJob:remove_tag(tag) + local namespaced_tag = 'ql:t:' .. tag + + redis.call('zrem', namespaced_tag, self.jid) + + local remaining = redis.call('zcard', namespaced_tag) + + if tonumber(remaining) == 0 then + redis.call('zrem', 'ql:tags', tag) + else + redis.call('zincrby', 'ql:tags', -1, tag) + end +end function Qless.queue(name) assert(name, 'Queue(): no queue name provided') local queue = {} @@ -999,6 +1110,25 @@ function Qless.queue(name) end } + + queue.throttled = { + length = function() + return (redis.call('zcard', queue:prefix('throttled')) or 0) + end, peek = function(now, min, max) + return redis.call('zrange', queue:prefix('throttled'), min, max) + end, add = function(...) + if #arg > 0 then + redis.call('zadd', queue:prefix('throttled'), unpack(arg)) + end + end, remove = function(...) + if #arg > 0 then + return redis.call('zrem', queue:prefix('throttled'), unpack(arg)) + end + end, pop = function(min, max) + return redis.call('zremrangebyrank', queue:prefix('throttled'), min, max) + end + } + queue.scheduled = { peek = function(now, offset, count) return redis.call('zrange', @@ -1065,11 +1195,11 @@ function QlessQueue:stats(now, date) local key = 'ql:s:' .. name .. ':' .. bin .. ':' .. queue local count, mean, vk = unpack(redis.call('hmget', key, 'total', 'mean', 'vk')) - + count = tonumber(count) or 0 mean = tonumber(mean) or 0 vk = tonumber(vk) - + results.count = count or 0 results.mean = mean or 0 results.histogram = {} @@ -1133,67 +1263,112 @@ function QlessQueue:pop(now, worker, count) count = assert(tonumber(count), 'Pop(): Arg "count" missing or not a number: ' .. tostring(count)) - local expires = now + tonumber( - Qless.config.get(self.name .. '-heartbeat') or - Qless.config.get('heartbeat', 60)) - if self:paused() then return {} end redis.call('zadd', 'ql:workers', now, worker) - local max_concurrency = tonumber( - Qless.config.get(self.name .. '-max-concurrency', 0)) + local dead_jids = self:invalidate_locks(now, count) or {} + local popped = {} - if max_concurrency > 0 then - local allowed = math.max(0, max_concurrency - self.locks.running(now)) - count = math.min(allowed, count) - if count == 0 then - return {} + for index, jid in ipairs(dead_jids) do + local success = self:pop_job(now, worker, Qless.job(jid)) + if success then + table.insert(popped, jid) end end - local jids = self:invalidate_locks(now, count) + if not Qless.throttle(QlessQueue.ns .. self.name):available() then + return popped + end - self:check_recurring(now, count - #jids) - self:check_scheduled(now, count - #jids) + self:check_recurring(now, count - #dead_jids) - table.extend(jids, self.work.peek(count - #jids)) + self:check_scheduled(now, count - #dead_jids) - local state - for index, jid in ipairs(jids) do - local job = Qless.job(jid) - state = unpack(job:data('state')) - job:history(now, 'popped', {worker = worker}) - - local time = tonumber( - redis.call('hget', QlessJob.ns .. jid, 'time') or now) - local waiting = now - time - self:stat(now, 'wait', waiting) - redis.call('hset', QlessJob.ns .. jid, - 'time', string.format("%.20f", now)) - - redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid) - - job:update({ - worker = worker, - expires = expires, - state = 'running' - }) - - self.locks.add(expires, jid) - - local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false - if tracked then - Qless.publish('popped', jid) + + local pop_retry_limit = tonumber( + Qless.config.get(self.name .. '-max-pop-retry') or + Qless.config.get('max-pop-retry', 1) + ) + + while #popped < count and pop_retry_limit > 0 do + + local jids = self.work.peek(count - #popped) or {} + + if #jids == 0 then + break + end + + + for index, jid in ipairs(jids) do + local job = Qless.job(jid) + if job:throttles_acquire(now) then + local success = self:pop_job(now, worker, job) + if success then + table.insert(popped, jid) + end + else + self:throttle(now, job) + end end + + self.work.remove(unpack(jids)) + + pop_retry_limit = pop_retry_limit - 1 end - self.work.remove(unpack(jids)) + return popped +end - return jids +function QlessQueue:throttle(now, job) + job:throttle(now) + self.throttled.add(now, job.jid) + local state = unpack(job:data('state')) + if state ~= 'throttled' then + job:update({state = 'throttled'}) + job:history(now, 'throttled', {queue = self.name}) + end +end + +function QlessQueue:pop_job(now, worker, job) + local state + local jid = job.jid + local job_state = job:data('state') + if not job_state then + return false + end + + state = unpack(job_state) + job:history(now, 'popped', {worker = worker}) + + local expires = now + tonumber( + Qless.config.get(self.name .. '-heartbeat') or + Qless.config.get('heartbeat', 60)) + + local time = tonumber(redis.call('hget', QlessJob.ns .. jid, 'time') or now) + local waiting = now - time + self:stat(now, 'wait', waiting) + redis.call('hset', QlessJob.ns .. jid, + 'time', string.format("%.20f", now)) + + redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid) + + job:update({ + worker = worker, + expires = expires, + state = 'running' + }) + + self.locks.add(expires, jid) + + local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false + if tracked then + Qless.publish('popped', jid) + end + return true end function QlessQueue:stat(now, stat, val) @@ -1224,7 +1399,7 @@ function QlessQueue:stat(now, stat, val) redis.call('hincrby', key, 'h' .. math.floor(val / 3600), 1) else -- days redis.call('hincrby', key, 'd' .. math.floor(val / 86400), 1) - end + end redis.call('hmset', key, 'total', count, 'mean', mean, 'vk', vk) end @@ -1251,14 +1426,16 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) Qless.tag(now, 'remove', jid, unpack(cjson.decode(tags))) end - retries = assert(tonumber(options['retries'] or retries or 5) , + local retries = assert(tonumber(options['retries'] or retries or 5) , 'Put(): Arg "retries" not a number: ' .. tostring(options['retries'])) - tags = assert(cjson.decode(options['tags'] or tags or '[]' ), + local tags = assert(cjson.decode(options['tags'] or tags or '[]' ), 'Put(): Arg "tags" not JSON' .. tostring(options['tags'])) - priority = assert(tonumber(options['priority'] or priority or 0), + local priority = assert(tonumber(options['priority'] or priority or 0), 'Put(): Arg "priority" not a number' .. tostring(options['priority'])) local depends = assert(cjson.decode(options['depends'] or '[]') , 'Put(): Arg "depends" not JSON: ' .. tostring(options['depends'])) + local throttles = assert(cjson.decode(options['throttles'] or '[]'), + 'Put(): Arg "throttles" not JSON array: ' .. tostring(options['throttles'])) if #depends > 0 then local new = {} @@ -1266,7 +1443,7 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) local original = redis.call( 'smembers', QlessJob.ns .. jid .. '-dependencies') - for _, dep in pairs(original) do + for _, dep in pairs(original) do if new[dep] == nil then redis.call('srem', QlessJob.ns .. dep .. '-dependents' , jid) redis.call('srem', QlessJob.ns .. jid .. '-dependencies', dep) @@ -1284,10 +1461,13 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) if oldqueue then local queue_obj = Qless.queue(oldqueue) - queue_obj.work.remove(jid) - queue_obj.locks.remove(jid) - queue_obj.depends.remove(jid) - queue_obj.scheduled.remove(jid) + queue_obj:remove_job(jid) + local old_qid = QlessQueue.ns .. oldqueue + for index, tname in ipairs(throttles) do + if tname == old_qid then + table.remove(throttles, index) + end + end end if oldworker and oldworker ~= '' then @@ -1308,8 +1488,7 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) end for i, tag in ipairs(tags) do - redis.call('zadd', 'ql:t:' .. tag, now, jid) - redis.call('zincrby', 'ql:tags', 1, tag) + Qless.job(jid):insert_tag(now, tag) end if state == 'failed' then @@ -1322,7 +1501,9 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , -1) end - redis.call('hmset', QlessJob.ns .. jid, + table.insert(throttles, QlessQueue.ns .. self.name) + + data = { 'jid' , jid, 'klass' , klass, 'data' , raw_data, @@ -1334,7 +1515,11 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) 'queue' , self.name, 'retries' , retries, 'remaining', retries, - 'time' , string.format("%.20f", now)) + 'time' , string.format("%.20f", now), + 'throttles', cjson.encode(throttles) + } + + redis.call('hmset', QlessJob.ns .. jid, unpack(data)) for i, j in ipairs(depends) do local state = redis.call('hget', QlessJob.ns .. j, 'state') @@ -1354,9 +1539,12 @@ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...) self.scheduled.add(now + delay, jid) end else + local job = Qless.job(jid) if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then self.depends.add(now, jid) redis.call('hset', QlessJob.ns .. jid, 'state', 'depends') + elseif not job:throttles_available() then + self:throttle(now, job) else self.work.add(now, priority, jid) end @@ -1421,7 +1609,7 @@ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...) if #arg % 2 == 1 then error('Odd number of additional args: ' .. tostring(arg)) end - + local options = {} for i = 3, #arg, 2 do options[arg[i]] = arg[i + 1] end options.tags = assert(cjson.decode(options.tags or '{}'), @@ -1436,6 +1624,8 @@ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...) options.backlog = assert(tonumber(options.backlog or 0), 'Recur(): Arg "backlog" not a number: ' .. tostring( options.backlog)) + options.throttles = assert(cjson.decode(options['throttles'] or '{}'), + 'Recur(): Arg "throttles" not JSON array: ' .. tostring(options['throttles'])) local count, old_queue = unpack(redis.call('hmget', 'ql:r:' .. jid, 'count', 'queue')) count = count or 0 @@ -1443,26 +1633,27 @@ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...) if old_queue then Qless.queue(old_queue).recurring.remove(jid) end - + redis.call('hmset', 'ql:r:' .. jid, - 'jid' , jid, - 'klass' , klass, - 'data' , raw_data, - 'priority', options.priority, - 'tags' , cjson.encode(options.tags or {}), - 'state' , 'recur', - 'queue' , self.name, - 'type' , 'interval', - 'count' , count, - 'interval', interval, - 'retries' , options.retries, - 'backlog' , options.backlog) + 'jid' , jid, + 'klass' , klass, + 'data' , raw_data, + 'priority' , options.priority, + 'tags' , cjson.encode(options.tags or {}), + 'state' , 'recur', + 'queue' , self.name, + 'type' , 'interval', + 'count' , count, + 'interval' , interval, + 'retries' , options.retries, + 'backlog' , options.backlog, + 'throttles', cjson.encode(options.throttles or {})) self.recurring.add(now + offset, jid) - + if redis.call('zscore', 'ql:queues', self.name) == false then redis.call('zadd', 'ql:queues', now, self.name) end - + return jid else error('Recur(): schedule type "' .. tostring(spec) .. '" unknown') @@ -1473,13 +1664,23 @@ function QlessQueue:length() return self.locks.length() + self.work.length() + self.scheduled.length() end +function QlessQueue:remove_job(jid) + self.work.remove(jid) + self.locks.remove(jid) + self.throttled.remove(jid) + self.depends.remove(jid) + self.scheduled.remove(jid) +end + function QlessQueue:check_recurring(now, count) local moved = 0 local r = self.recurring.peek(now, 0, count) for index, jid in ipairs(r) do - local klass, data, priority, tags, retries, interval, backlog = unpack( + local r = redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority', + 'tags', 'retries', 'interval', 'backlog', 'throttles') + local klass, data, priority, tags, retries, interval, backlog, throttles = unpack( redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority', - 'tags', 'retries', 'interval', 'backlog')) + 'tags', 'retries', 'interval', 'backlog', 'throttles')) local _tags = cjson.decode(tags) local score = math.floor(tonumber(self.recurring.score(jid))) interval = tonumber(interval) @@ -1493,36 +1694,37 @@ function QlessQueue:check_recurring(now, count) ) end end - + while (score <= now) and (moved < count) do local count = redis.call('hincrby', 'ql:r:' .. jid, 'count', 1) moved = moved + 1 local child_jid = jid .. '-' .. count - + for i, tag in ipairs(_tags) do - redis.call('zadd', 'ql:t:' .. tag, now, child_jid) - redis.call('zincrby', 'ql:tags', 1, tag) + Qless.job(child_jid):insert_tag(now, tag) end - + redis.call('hmset', QlessJob.ns .. child_jid, - 'jid' , child_jid, - 'klass' , klass, - 'data' , data, - 'priority' , priority, - 'tags' , tags, - 'state' , 'waiting', - 'worker' , '', - 'expires' , 0, - 'queue' , self.name, - 'retries' , retries, - 'remaining' , retries, - 'time' , string.format("%.20f", score), + 'jid' , child_jid, + 'klass' , klass, + 'data' , data, + 'priority' , priority, + 'tags' , tags, + 'state' , 'waiting', + 'worker' , '', + 'expires' , 0, + 'queue' , self.name, + 'retries' , retries, + 'remaining', retries, + 'time' , string.format("%.20f", score), + 'throttles', throttles, 'spawned_from_jid', jid) + Qless.job(child_jid):history(score, 'put', {q = self.name}) - + self.work.add(score, priority, child_jid) - + score = score + interval self.recurring.add(score, jid) end @@ -1587,14 +1789,19 @@ function QlessQueue:invalidate_locks(now, count) local remaining = tonumber(redis.call( 'hincrby', QlessJob.ns .. jid, 'remaining', -1)) - + if remaining < 0 then self.work.remove(jid) self.locks.remove(jid) self.scheduled.remove(jid) - - local group = 'failed-retries-' .. Qless.job(jid):data()['queue'] + local job = Qless.job(jid) + local job_data = Qless.job(jid):data() + local queue = job_data['queue'] + local group = 'failed-retries-' .. queue + + job:throttles_release(now) + job:history(now, 'failed', {group = group}) redis.call('hmset', QlessJob.ns .. jid, 'state', 'failed', 'worker', '', @@ -1607,10 +1814,10 @@ function QlessQueue:invalidate_locks(now, count) ['when'] = now, ['worker'] = unpack(job:data('worker')) })) - + redis.call('sadd', 'ql:failures', group) redis.call('lpush', 'ql:f:' .. group, jid) - + if redis.call('zscore', 'ql:tracked', jid) ~= false then Qless.publish('failed', jid) end @@ -1651,6 +1858,7 @@ function QlessQueue.counts(now, name) waiting = queue.work.length(), stalled = stalled, running = queue.locks.length() - stalled, + throttled = queue.throttled.length(), scheduled = queue.scheduled.length(), depends = queue.depends.length(), recurring = queue.recurring.length(), @@ -1809,6 +2017,82 @@ function QlessWorker.counts(now, worker) return response end end +function QlessThrottle:data() + local data = { + id = self.id, + maximum = 0 + } + + local throttle = redis.call('hmget', QlessThrottle.ns .. self.id, 'id', 'maximum') + + if throttle[2] then + data.maximum = tonumber(throttle[2]) + end + + return data +end + +function QlessThrottle:set(data, expiration) + redis.call('hmset', QlessThrottle.ns .. self.id, 'id', self.id, 'maximum', data.maximum) + if expiration > 0 then + redis.call('expire', QlessThrottle.ns .. self.id, expiration) + end +end + +function QlessThrottle:unset() + redis.call('del', QlessThrottle.ns .. self.id) +end + +function QlessThrottle:acquire(jid) + if not self:available() then + return false + end + + self.locks.add(1, jid) + return true +end + +function QlessThrottle:pend(now, jid) + self.pending.add(now, jid) +end + +function QlessThrottle:release(now, jid) + if self.locks.remove(jid) == 0 then + self.pending.remove(jid) + end + + local available_locks = self:locks_available() + if self.pending.length() == 0 or available_locks < 1 then + return + end + + for _, jid in ipairs(self.pending.peek(0, available_locks - 1)) do + local job = Qless.job(jid) + local data = job:data() + local queue = Qless.queue(data['queue']) + + queue.throttled.remove(jid) + queue.work.add(now, data.priority, jid) + end + + local popped = self.pending.pop(0, available_locks - 1) +end + +function QlessThrottle:available() + return self.maximum == 0 or self.locks.length() < self.maximum +end + +function QlessThrottle:ttl() + return redis.call('ttl', QlessThrottle.ns .. self.id) +end + +function QlessThrottle:locks_available() + if self.maximum == 0 then + return 10 + end + + return self.maximum - self.locks.length() +end local QlessAPI = {} function QlessAPI.get(now, jid) @@ -1935,7 +2219,7 @@ QlessAPI.unpause = function(now, ...) end QlessAPI.cancel = function(now, ...) - return Qless.cancel(unpack(arg)) + return Qless.cancel(now, unpack(arg)) end QlessAPI.timeout = function(now, ...) @@ -1998,6 +2282,53 @@ QlessAPI['queue.forget'] = function(now, ...) QlessQueue.deregister(unpack(arg)) end +QlessAPI['queue.throttle.get'] = function(now, queue) + local data = Qless.throttle(QlessQueue.ns .. queue):data() + if not data then + return nil + end + return cjson.encode(data) +end + +QlessAPI['queue.throttle.set'] = function(now, queue, max) + Qless.throttle(QlessQueue.ns .. queue):set({maximum = max}, 0) +end + +QlessAPI['throttle.set'] = function(now, tid, max, ...) + local expiration = unpack(arg) + local data = { + maximum = max + } + Qless.throttle(tid):set(data, tonumber(expiration or 0)) +end + +QlessAPI['throttle.get'] = function(now, tid) + return cjson.encode(Qless.throttle(tid):data()) +end + +QlessAPI['throttle.delete'] = function(now, tid) + return Qless.throttle(tid):unset() +end + +QlessAPI['throttle.locks'] = function(now, tid) + return Qless.throttle(tid).locks.members() +end + +QlessAPI['throttle.pending'] = function(now, tid) + return Qless.throttle(tid).pending.members() +end + +QlessAPI['throttle.ttl'] = function(now, tid) + return Qless.throttle(tid):ttl() +end + +QlessAPI['throttle.release'] = function(now, tid, ...) + local throttle = Qless.throttle(tid) + + for _, jid in ipairs(arg) do + throttle:release(now, jid) + end +end if #KEYS > 0 then error('No Keys should be provided') end diff --git a/lib/qless/middleware/memory_usage_monitor.rb b/lib/qless/middleware/memory_usage_monitor.rb index 535ba596..ad453fab 100644 --- a/lib/qless/middleware/memory_usage_monitor.rb +++ b/lib/qless/middleware/memory_usage_monitor.rb @@ -18,8 +18,9 @@ def initialize(options) ensure current_mem = MemoryUsageMonitor.current_usage_in_kb if current_mem > max_memory - log(:info, "Exiting after job #{job_counter} since current memory (#{current_mem} KB) " + - "has exceeded max allowed memory (#{max_memory} KB).") + log(:info, "Exiting after job #{job_counter} since current memory " \ + "(#{current_mem} KB) has exceeded max allowed memory " \ + "(#{max_memory} KB).") shutdown end end @@ -36,8 +37,8 @@ def initialize(options) begin require 'rusage' rescue LoadError - warn "Could not load `rusage` gem. Falling back to shelling out to get process memory usage, " + - "which is several orders of magnitude slower." + warn "Could not load `rusage` gem. Falling back to shelling out " + "to get process memory usage, which is several orders of magnitude slower." define_singleton_method(:current_usage_in_kb, &SHELL_OUT_FOR_MEMORY) else diff --git a/lib/qless/middleware/requeue_exceptions.rb b/lib/qless/middleware/requeue_exceptions.rb index a148f27a..98311325 100644 --- a/lib/qless/middleware/requeue_exceptions.rb +++ b/lib/qless/middleware/requeue_exceptions.rb @@ -17,17 +17,19 @@ module Middleware # to be retried many times, w/o having other transient errors retried so # many times. module RequeueExceptions - RequeueableException = Struct.new(:klass, :delay_range, :max_attempts) do + RequeueableException = Struct.new(:klass, :delay_min, :delay_span, :max_attempts) do def self.from_splat_and_options(*klasses, options) + delay_range = options.fetch(:delay_range) + delay_min = Float(delay_range.min) + delay_span = Float(delay_range.max) - Float(delay_range.min) + max_attempts = options.fetch(:max_attempts) klasses.map do |klass| - new(klass, - options.fetch(:delay_range).to_a, - options.fetch(:max_attempts)) + new(klass, delay_min, delay_span, max_attempts) end end def delay - delay_range.sample + delay_min + Random.rand(delay_span) end def raise_if_exhausted_requeues(error, requeues) @@ -51,21 +53,29 @@ def on_requeue_callback @on_requeue_callback ||= DEFAULT_ON_REQUEUE_CALLBACK end - def around_perform(job) - super - rescue *requeueable_exceptions.keys => e - config = requeuable_exception_for(e) + def handle_exception(job, error) + config = requeuable_exception_for(error) requeues_by_exception = (job.data['requeues_by_exception'] ||= {}) requeues_by_exception[config.klass.name] ||= 0 config.raise_if_exhausted_requeues( - e, requeues_by_exception[config.klass.name]) + error, requeues_by_exception[config.klass.name]) requeues_by_exception[config.klass.name] += 1 job.requeue(job.queue_name, delay: config.delay, data: job.data) - on_requeue_callback.call(e, job) + on_requeue_callback.call(error, job) + end + + def around_perform(job) + super + rescue *requeueable_exceptions.keys => e + handle_exception(job, e) + end + + def requeueable?(exception) + requeueable_exceptions.member?(exception) end def requeueable_exceptions diff --git a/lib/qless/middleware/retry_exceptions.rb b/lib/qless/middleware/retry_exceptions.rb index baa7c68e..2839551c 100644 --- a/lib/qless/middleware/retry_exceptions.rb +++ b/lib/qless/middleware/retry_exceptions.rb @@ -48,19 +48,18 @@ def on_retry_callback @on_retry_callback ||= DEFAULT_ON_RETRY_CALLBACK end - def exponential(base, options = {}) + # If `factor` is omitted it is set to `delay_seconds` to reproduce legacy + # behavior. + def exponential(delay_seconds, options={}) + factor = options.fetch(:factor, delay_seconds) fuzz_factor = options.fetch(:fuzz_factor, 0) - lambda do |num, _error| - unfuzzed = base**num - - fuzz = 0 - unless fuzz_factor.zero? - max_fuzz = unfuzzed * fuzz_factor - fuzz = rand(max_fuzz) * [1, -1].sample - end - - unfuzzed + fuzz + lambda do |retry_no, error| + unfuzzed = delay_seconds * factor**(retry_no - 1) + return unfuzzed if fuzz_factor.zero? + r = 2 * rand - 1 + # r is uniformly distributed in range [-1, 1] + unfuzzed * (1 + fuzz_factor * r) end end end diff --git a/lib/qless/middleware/timeout.rb b/lib/qless/middleware/timeout.rb new file mode 100644 index 00000000..b18f37a4 --- /dev/null +++ b/lib/qless/middleware/timeout.rb @@ -0,0 +1,64 @@ +require 'timeout' +require 'qless/middleware/requeue_exceptions' + +module Qless + # Unique error class used when a job is timed out by this middleware. + # Allows us to differentiate this timeout from others caused by `::Timeout::Erorr` + JobTimedoutError = Class.new(StandardError) + InvalidTimeoutError = Class.new(ArgumentError) + + module Middleware + # Applies a hard time out. To use this middleware, instantiate it and pass a block; the block + # will be passed the job object (which has a `ttl` method for getting the job's remaining TTL), + # and the block should return the desired timeout in seconds. + # This allows you to set a hard constant time out to a particular job class + # (using something like `extend Qless::Middleware::Timeout.new { 60 * 60 }`), + # or a variable timeout based on the individual TTLs of each job + # (using something like `extend Qless::Middleware::Timeout.new { |job| job.ttl * 1.1 }`). + class Timeout < Module + def initialize(opts = {}) + timeout_class = opts.fetch(:timeout_class, ::Timeout) + kernel_class = opts.fetch(:kernel_class, Kernel) + module_eval do + define_method :around_perform do |job| + timeout_seconds = yield job + + return super(job) if timeout_seconds.nil? + + if !timeout_seconds.is_a?(Numeric) || timeout_seconds <= 0 + raise InvalidTimeoutError, "Timeout must be a positive number or nil, " \ + "but was #{timeout_seconds}" + end + + begin + timeout_class.timeout(timeout_seconds) { super(job) } + rescue ::Timeout::Error => e + error = JobTimedoutError.new(e.message) + error.set_backtrace(e.backtrace) + # The stalled connection to redis might be the cause of the timeout. We cannot rely + # on state of connection either (e.g., we might be in the middle of Redis call when + # timeout happend). To play it safe, we reconnect. + job.reconnect_to_redis + job.fail(*Qless.failure_formatter.format(job, error, [])) + # Since we are leaving with bang (exit!), normal requeue logic does not work. + # Do it manually right here. + if self.is_a?(::Qless::Middleware::RequeueExceptions) && + self.requeueable?(JobTimedoutError) + self.handle_exception(job, error) + end + + # ::Timeout.timeout is dangerous to use as it can leave things in an inconsistent + # state. With Redis, for example, we've seen the socket buffer left with unread bytes + # on it, which can affect later redis calls. Thus, it's much safer just to exit, and + # allow the parent process to restart the worker in a known, clean state. + # + # We use 73 as a unique exit status for this case. 73 looks + # a bit like TE (Timeout::Error) + kernel_class.exit!(73) + end + end + end + end + end + end +end diff --git a/lib/qless/qless-core b/lib/qless/qless-core index 525c3900..20dc6878 160000 --- a/lib/qless/qless-core +++ b/lib/qless/qless-core @@ -1 +1 @@ -Subproject commit 525c39000dc71df53a3502491cb4daf0e1128f1d +Subproject commit 20dc687832ad472f0a00899d26c285b893ff466c diff --git a/lib/qless/queue.rb b/lib/qless/queue.rb index bf20dca3..b4476c8b 100644 --- a/lib/qless/queue.rb +++ b/lib/qless/queue.rb @@ -17,6 +17,10 @@ def running(start = 0, count = 25) @client.call('jobs', 'running', @name, start, count) end + def throttled(start = 0, count = 25) + @client.call('jobs', 'throttled', @name, start, count) + end + def stalled(start = 0, count = 25) @client.call('jobs', 'stalled', @name, start, count) end @@ -65,13 +69,18 @@ def heartbeat=(value) set_config :heartbeat, value end + def throttle + @throttle ||= Qless::Throttle.new("ql:q:#{name}", client) + end + def max_concurrency - value = get_config('max-concurrency') - value && Integer(value) + warn "[DEPRECATED - 4/17/14] `max_concurrency` is deprecated. Use `throttle.maximum` instead." + throttle.maximum end def max_concurrency=(value) - set_config 'max-concurrency', value + warn "[DEPRECATED - 4/17/14] `max_concurrency=` is deprecated. Use `throttle.maximum=` instead." + throttle.maximum = value end def paused? @@ -92,17 +101,15 @@ def unpause # => priority (int) # => tags (array of strings) # => delay (int) + # => throttles (array of strings) def put(klass, data, opts = {}) opts = job_options(klass, data, opts) - @client.call('put', worker_name, @name, - (opts[:jid] || Qless.generate_jid), - klass.is_a?(String) ? klass : klass.name, - JSON.generate(data), - opts.fetch(:delay, 0), - 'priority', opts.fetch(:priority, 0), - 'tags', JSON.generate(opts.fetch(:tags, [])), - 'retries', opts.fetch(:retries, 5), - 'depends', JSON.generate(opts.fetch(:depends, [])) + @client.call( + 'put', + worker_name, @name, + (opts[:jid] || Qless.generate_jid), + klass.is_a?(String) ? klass : klass.name, + *Job.build_opts_array(opts.merge(:data => data)), ) end diff --git a/lib/qless/server.rb b/lib/qless/server.rb index 1614ea37..e069e493 100755 --- a/lib/qless/server.rb +++ b/lib/qless/server.rb @@ -78,6 +78,7 @@ def paginated(qless_object, method, *args) def tabs [ { name: 'Queues' , path: '/queues' }, + { name: 'Throttles', path: '/throttles'}, { name: 'Workers' , path: '/workers' }, { name: 'Track' , path: '/track' }, { name: 'Failed' , path: '/failed' }, @@ -95,6 +96,10 @@ def queues client.queues.counts end + def throttles + client.throttles.counts + end + def tracked client.jobs.tracked end @@ -173,7 +178,7 @@ def strftime(t) json(client.queues[params[:name]].counts) end - filtered_tabs = %w[ running scheduled stalled depends recurring ].to_set + filtered_tabs = %w[ running throttled scheduled stalled depends recurring ].to_set get '/queues/:name/?:tab?' do queue = client.queues[params[:name]] tab = params.fetch('tab', 'stats') @@ -194,6 +199,46 @@ def strftime(t) } end + get '/throttles/?' do + erb :throttles, layout: true, locals: { + title: 'Throttles' + } + end + + post '/throttle' do + # Expects a JSON object: {'id': id, 'maximum': maximum} + data = JSON.parse(request.body.read) + if data['id'].nil? || data['maximum'].nil? + halt 400, 'Need throttle id and maximum value' + else + throttle = Throttle.new(data['id'], client) + throttle.maximum = data['maximum'] + end + end + + put '/throttle' do + # Expects a JSON object: {'id': id, 'expiration': expiration} + data = JSON.parse(request.body.read) + if data['id'].nil? || data['expiration'].nil? + halt 400, 'Need throttle id and expiration value' + else + throttle = Throttle.new(data['id'], client) + throttle.expiration = data['expiration'] + end + end + + delete '/throttle' do + # Expects a JSON object: {'id': id} + data = JSON.parse(request.body.read) + if data['id'].nil? + halt 400, 'Need throttle id' + else + throttle = Throttle.new(data['id'], client) + throttle.delete + return json({id: throttle.id, maximum: throttle.maximum}) + end + end + get '/failed.json' do json(client.jobs.failed) end @@ -219,7 +264,7 @@ def strftime(t) end get '/completed/?' do - completed = paginated(client.jobs, :complete) + completed = paginated(client.jobs, :complete) erb :completed, layout: true, locals: { title: 'Completed', jobs: completed.map { |jid| client.jobs[jid] } diff --git a/lib/qless/server/views/_job.erb b/lib/qless/server/views/_job.erb index c99c93bf..54373ccd 100644 --- a/lib/qless/server/views/_job.erb +++ b/lib/qless/server/views/_job.erb @@ -1,237 +1,259 @@ <% if job.instance_of?(Qless::Job) %> -
-
-
-
-

- "><%= job.jid[0..8] %>... | <%= job.klass_name %> -

-
-
-

- - | <%= job.state %> / "><%= job.queue_name %><%= job.worker_name.nil? ? "/ #{job.worker_name}" : "" %> - -

-
-
-
-
- <% if (job.state != "complete") %> - - <% end %> - <% if (job.state == "running") %> - - <% end %> - - <% if (job.state == 'failed') %> - - <% end %> - - -
-
-
-
- - - -
-
-
-
+
+
+
+
+

+ "><%= job.jid[0..8] %>... | <%= job.klass_name %> +

+
+
+

+ + | <%= job.state %> / " title="<%= job.queue_name %><%= job.worker_name.nil? ? "/ #{job.worker_name}" : "" %>"><%= job.queue_name %><%= job.worker_name.nil? ? "/ #{job.worker_name}" : "" %> + +

+
+
+
+
+ <% if (job.state != "complete") %> + + <% end %> + <% if (job.state == "running") %> + + <% end %> + + <% if (job.state == 'failed') %> + + <% end %> + + +
+
+
+
+ + + +
+
+
+
+ + <% if not job.dependencies.empty? %> +
+
+

Dependencies:

+ <% job.dependencies.each do |jid| %> +
"> + + +
+ <% end %> +
+
+ <% end %> - <% if not job.dependencies.empty? %> -
-
-

Dependencies:

- <% job.dependencies.each do |jid| %> -
"> - - -
- <% end %> -
-
- <% end %> + <% if not job.dependents.empty? %> +
+
+

Dependents:

+ <% job.dependents.each do |jid| %> +
"> + + +
+ <% end %> +
+
+ <% end %> - <% if not job.dependents.empty? %> -
-
-

Dependents:

- <% job.dependents.each do |jid| %> -
"> - - -
- <% end %> -
-
- <% end %> +
+
+ <% job.tags.each do |tag| %> +
+ <%= tag %> + +
+ <% end %> -
-
- <% job.tags.each do |tag| %> -
- <%= tag %> - -
- <% end %> - - -
- - -
-
-
+ +
+ + +
+
+
- <% if not defined? brief %> -
-
-

Data

-
<%= JSON.pretty_generate(job.data) %>
-
-
-

History

-
- <% job.queue_history.reverse.each do |h| %> - <% if h['what'] == 'put' %> -
<%= h['what'] %> at <%= strftime(h['when']) %>
+      <% if not defined? brief %>
+      
+
+

Data

+
<%= JSON.pretty_generate(job.data) %>
+
+
+

History

+
+ <% job.queue_history.reverse.each do |h| %> + <% if h['what'] == 'put' %> +
<%= h['what'] %> at <%= strftime(h['when']) %>
     in queue <%= h['q'] %>
- <% elsif h['what'] == 'popped' %> -
<%= h['what'] %> at <%= strftime(h['when']) %>
+            <% elsif h['what'] == 'popped' %>
+              
<%= h['what'] %> at <%= strftime(h['when']) %>
     by <%= h['worker'] %>
- <% elsif h['what'] == 'done' %> -
completed at <%= strftime(h['when']) %>
- <% elsif h['what'] == 'failed' %> - <% if h['worker'] %> -
<%= h['what'] %> at <%= strftime(h['when']) %>
+            <% elsif h['what'] == 'done' %>
+              
completed at <%= strftime(h['when']) %>
+ <% elsif h['what'] == 'failed' %> + <% if h['worker'] %> +
<%= h['what'] %> at <%= strftime(h['when']) %>
     by <%= h['worker'] %>
     in group <%= h['group'] %>
- <% else %> -
<%= h['what'] %> at <%= strftime(h['when']) %>
+              <% else %>
+                
<%= h['what'] %> at <%= strftime(h['when']) %>
     in group <%= h['group'] %>
- <% end %> - <% else %> -
<%= h['what'] %> at <%= strftime(h['when']) %>
- <% end %> - <% end %> -
-
-
- <% end %> + <% end %> + <% else %> +
<%= h['what'] %> at <%= strftime(h['when']) %>
+ <% end %> + <% end %> +
+
+
+ <% end %> - <% if job.failure.length > 0 %> -
-
-
-

In <%= job.queue_name %> on <%= job.failure['worker'] %> - about <%= strftime(Time.at(job.failure['when'])) %>

-
<%= job.failure['message'].gsub('>', '>').gsub('<', '<') %>
-
-
-
- <% end %> -
-
-
+ <% if job.failure.length > 0 %> +
+
+
+

In <%= job.queue_name %> on <%= job.failure['worker'] %> + about <%= strftime(Time.at(job.failure['when'])) %>

+
<%= job.failure['message'].gsub('>', '>').gsub('<', '<') %>
+
+
+
+ <% end %> + + <% job.throttle_objects.each do |throttle| %> +
+
+

<%= throttle.id %>

+
+
+ +
+
+ +
+
+ +
+
+

( maximum / TTL / reset )

+
+
+ <% end %> +
+
+
<% else # Recurring job %> -
-
-
-
-

- "><%= job.jid[0..8] %>... | <%= job.klass_name %> -

-
-
-

- - | recurring / "><%= job.queue_name %> - -

-
-
-
-
- - - -
-
-
-
- - + + +
+
+
+
+ + - -
-
-
-
- -
-
- <% job.tags.each do |tag| %> -
- <%= tag %> - -
- <% end %> - - -
- - -
-
-
+ +
+
+ + + +
+
+ <% job.tags.each do |tag| %> +
+ <%= tag %> + +
+ <% end %> + + +
+ + +
+
+
- <% if not defined? brief %> -
-
-

Data

-
<%= JSON.pretty_generate(job.data) %>
-
-
- <% end %> -
- - + <% if not defined? brief %> +
+
+

Data

+
<%= JSON.pretty_generate(job.data) %>
+
+
+ <% end %> +
+ + <% end %> diff --git a/lib/qless/server/views/layout.erb b/lib/qless/server/views/layout.erb index 3375a8b6..ca6f1d51 100644 --- a/lib/qless/server/views/layout.erb +++ b/lib/qless/server/views/layout.erb @@ -93,7 +93,7 @@ url: '<%= u "/move" %>', data: {id:jid, queue:queue}, success: function() { flash('Moved ' + jid + ' to ' + queue, 'success', 1500); cb(jid, queue); }, - erorr: function() { flash('Failed to move ' + jid + ' to ' + queue); } + error: function() { flash('Failed to move ' + jid + ' to ' + queue); } }); } @@ -103,7 +103,7 @@ url: '<%= u "/retry" %>', data: {id:jid}, success: function() { flash('Retrying ' + jid, 'success', 1500); if (cb) { cb(jid, 'retry'); } }, - erorr: function() { flash('Failed to retry ' + jid); } + error: function() { flash('Failed to retry ' + jid); } }); } @@ -331,6 +331,57 @@ }) } + var expire_throttle = function(throttle_id, expiration) { + var data = { 'id': throttle_id, 'expiration': expiration }; + + $.ajax({ + url: '<%= u "/throttle" %>', + type: 'PUT', + dataType: 'json', + processData: false, + data: JSON.stringify(data), + success: function(data) { + flash('Set expiration for throttle ' + throttle_id, 'success'); + }, + error: function(data) { + flash('Couldn\'t update expiration for throttle ' + throttle_id); + } + }) + } + + var delete_throttle = function(throttle_id) { + var data = { 'id': throttle_id }; + + $.ajax({ + url: '<%= u "/throttle" %>', + type: 'DELETE', + dataType: 'json', + processData: false, + data: JSON.stringify(data), + success: function(data) { + flash('Deleted throttle for ' + throttle_id, 'success'); + $('.' + throttle_id.replace(/:/g, '-') + '-maximum').val(data['maximum']); + }, + error: function(data) { + flash('Couldn\'t delete thottle ' + throttle_id); + } + }) + } + + var update_throttle = function(throttle_id, maximum) { + _ajax({ + url: '<%= u "/throttle" %>', + data: { + 'id': throttle_id, + 'maximum': maximum + }, success: function(data) { + flash('Updated throttle for ' + throttle_id, 'success'); + }, error: function(data) { + flash('Couldn\'t update throttle ' + throttle_id); + } + }) + } + $(document).ready(function() { $('button').tooltip({delay:200}); }); diff --git a/lib/qless/server/views/overview.erb b/lib/qless/server/views/overview.erb index 0d09d67d..0706e0c0 100644 --- a/lib/qless/server/views/overview.erb +++ b/lib/qless/server/views/overview.erb @@ -13,6 +13,7 @@ running waiting + throttled scheduled stalled depends @@ -42,6 +43,7 @@ <%= queue['running'] %> <%= queue['waiting'] %> + <%= queue['throttled'] %> <%= queue['scheduled'] %> <%= queue['stalled'] %> <%= queue['depends'] %> diff --git a/lib/qless/server/views/queue.erb b/lib/qless/server/views/queue.erb index 14e5b2f3..9f2e5f9b 100644 --- a/lib/qless/server/views/queue.erb +++ b/lib/qless/server/views/queue.erb @@ -43,6 +43,7 @@
  • ">Stats
  • ">Running
  • ">Waiting
  • +
  • ">Throttled
  • ">Scheduled
  • ">Stalled
  • ">Depends
  • @@ -57,9 +58,10 @@

    "><%= queue['name'] %> | <%= queue['running'] %> / <%= queue['waiting'] %> / + <%= queue['throttled'] %> / <%= queue['scheduled'] %> / <%= queue['stalled'] %> / - <%= queue['depends'] %> (running / waiting / scheduled / stalled / depends) + <%= queue['depends'] %> (running / waiting / throttled / scheduled / stalled / depends)

    @@ -74,7 +76,7 @@ -<% if ['running', 'waiting', 'scheduled', 'stalled', 'depends', 'recurring'].include?(tab) %> +<% if ['running', 'waiting', 'throttled', 'scheduled', 'stalled', 'depends', 'recurring'].include?(tab) %>
    <%= erb :_job_list, :locals => { :jobs => jobs, :queues => queues } %> <% else %> diff --git a/lib/qless/server/views/queues.erb b/lib/qless/server/views/queues.erb index 8509d849..fd3bb33a 100644 --- a/lib/qless/server/views/queues.erb +++ b/lib/qless/server/views/queues.erb @@ -33,9 +33,11 @@

    | <%= queue['running'] %> / <%= queue['waiting'] %> / + <%= queue['throttled'] %> / <%= queue['scheduled'] %> / <%= queue['stalled'] %> / - <%= queue['depends'] %> (running / waiting / scheduled / stalled / depends) + <%= queue['depends'] %> / + <%= queue['recurring'] %> (running / waiting / throttled / scheduled / stalled / depends / recurring)

    diff --git a/lib/qless/server/views/throttles.erb b/lib/qless/server/views/throttles.erb new file mode 100644 index 00000000..4b60c9de --- /dev/null +++ b/lib/qless/server/views/throttles.erb @@ -0,0 +1,38 @@ +<% if throttles.empty? %> + +<% else %> + + + + + + + + + + + + + <% throttles.each do |throttle| %> + + + + + + + <% end %> + +
    Queue Maximum TTL (sets expiration) Reset
    <%= throttle.id %> + + + + + +
    +<% end %> diff --git a/lib/qless/server/views/track.erb b/lib/qless/server/views/track.erb index 7a9d4439..617f341c 100644 --- a/lib/qless/server/views/track.erb +++ b/lib/qless/server/views/track.erb @@ -11,6 +11,7 @@ var fade = function(jid, type) {
  • All (<%= tracked['jobs'].length %>)
  • Running (<%= tracked['jobs'].select { |job| job.state == 'running' }.length %>)
  • Waiting (<%= tracked['jobs'].select { |job| job.state == 'waiting' }.length %>)
  • +
  • Throttled (<%= tracked['jobs'].select { |job| job.state == 'throttled'}.length %>)
  • Scheduled (<%= tracked['jobs'].select { |job| job.state == 'scheduled' }.length %>)
  • Stalled (<%= tracked['jobs'].select { |job| job.state == 'stalled' }.length %>)
  • Completed (<%= tracked['jobs'].select { |job| job.state == 'complete' }.length %>)
  • @@ -41,6 +42,11 @@ var fade = function(jid, type) { <%= erb :_job, :layout => false, :locals => { :job => job, :queues => queues } %> <% end %> +
    + <% tracked['jobs'].select { |job| job.state == 'throttled' }.each do |job| %> + <%= erb :_job, :layout => false, :locals => { :job => job, :queues => queues } %> + <% end %> +
    <% tracked['jobs'].select { |job| job.state == 'scheduled' }.each do |job| %> <%= erb :_job, :layout => false, :locals => { :job => job, :queues => queues } %> diff --git a/lib/qless/subscriber.rb b/lib/qless/subscriber.rb index af6d88be..6135cfda 100644 --- a/lib/qless/subscriber.rb +++ b/lib/qless/subscriber.rb @@ -14,7 +14,7 @@ def self.start(*args, &block) def initialize(client, channel, options = {}, &message_received_callback) @channel = channel @message_received_callback = message_received_callback - @log_to = options.fetch(:log_to) { $stderr } + @log = options.fetch(:log) { ::Logger.new($stderr) } # pub/sub blocks the connection so we must use a different redis # connection @@ -29,18 +29,28 @@ def start queue = ::Queue.new @thread = Thread.start do - @listener_redis.subscribe(@channel, @my_channel) do |on| - on.subscribe do |channel| - queue.push(:subscribed) if channel == @channel - end + begin + @listener_redis.subscribe(@channel, @my_channel) do |on| + on.subscribe do |channel| + # insert nil into the queue to indicate we've + # successfully subscribed + queue << nil if channel == @channel + end - on.message do |channel, message| - handle_message(channel, message) + on.message do |channel, message| + handle_message(channel, message) + end end + # Watch for any exceptions so we don't block forever if + # subscribing to the channel fails + rescue Exception => e + queue << e end end - queue.pop + if (exception = queue.pop) + raise exception + end end def stop @@ -57,7 +67,7 @@ def handle_message(channel, message) @message_received_callback.call(self, JSON.parse(message)) end rescue Exception => error - @log_to.puts "Error: #{error}" + @log.error("Qless::Subscriber") { error } end end end diff --git a/lib/qless/tasks.rb b/lib/qless/tasks.rb index 4bfd152a..2f2b31c1 100644 --- a/lib/qless/tasks.rb +++ b/lib/qless/tasks.rb @@ -1,11 +1,5 @@ # Encoding: utf-8 - -namespace :qless do - task :setup # no-op; users should define their own setup - - desc 'Start a worker with env: QUEUES, JOB_RESERVER, REDIS_URL, INTERVAL' - task work: :setup do - require 'qless/worker' - Qless::Worker.start - end -end +warn "Qless tasks are deprecated (they haven't worked for " \ + "quite some time) and you should start a worker by " \ + "writing a bit of ruby code that instantiates and " \ + "runs a worker instead." diff --git a/lib/qless/throttle.rb b/lib/qless/throttle.rb new file mode 100644 index 00000000..37a00b87 --- /dev/null +++ b/lib/qless/throttle.rb @@ -0,0 +1,57 @@ +# Encoding: utf-8 + +require 'redis' +require 'json' + +module Qless + class Throttle + attr_reader :name, :client + + def initialize(name, client) + @name = name + @client = client + end + + def delete + @client.call('throttle.delete', @name) + end + + def expiration=(expire_time_in_seconds) + update(nil, Integer(expire_time_in_seconds)) + end + + def id + @name + end + + def locks + @client.call('throttle.locks', @name) + end + + def maximum + throttle_attrs['maximum'].to_i + end + + def maximum=(max) + update(max) + end + + def pending + @client.call('throttle.pending', @name) + end + + def ttl + @client.call('throttle.ttl', @name) + end + + private + def throttle_attrs + throttle_json = @client.call('throttle.get', @name) + throttle_json ? JSON.parse(throttle_json) : {} + end + + def update(max, expiration = 0) + @client.call('throttle.set', @name, max || maximum, expiration) + end + end +end diff --git a/lib/qless/worker/base.rb b/lib/qless/worker/base.rb index 61c9c45d..2de3adf8 100644 --- a/lib/qless/worker/base.rb +++ b/lib/qless/worker/base.rb @@ -14,25 +14,30 @@ module Workers JobLockLost = Class.new(StandardError) class BaseWorker - attr_accessor :output, :reserver, :log_level, :interval, :paused, - :options + attr_accessor :output, :reserver, :interval, :paused, + :options, :sighup_handler def initialize(reserver, options = {}) # Our job reserver and options @reserver = reserver @options = options + # SIGHUP handler + @sighup_handler = options.fetch(:sighup_handler) { lambda { } } + # Our logger - @output = options.fetch(:output) { $stdout } - @log = Logger.new(output) - @log_level = options[:log_level] || Logger::WARN - @log.level = @log_level - @log.formatter = proc do |severity, datetime, progname, msg| - "#{datetime}: #{msg}\n" + @log = options.fetch(:logger) do + @output = options.fetch(:output, $stdout) + Logger.new(output).tap do |logger| + logger.level = options.fetch(:log_level, Logger::WARN) + logger.formatter = options.fetch(:log_formatter) do + Proc.new { |severity, datetime, progname, msg| "#{datetime}: #{msg}\n" } + end + end end # The interval for checking for new jobs - @interval = options[:interval] || 5.0 + @interval = options.fetch(:interval, 5.0) @current_job_mutex = Mutex.new @current_job = nil @@ -40,6 +45,18 @@ def initialize(reserver, options = {}) on_current_job_lock_lost { shutdown } end + def log_level + @log.level + end + + def safe_trap(signal_name, &cblock) + begin + trap(signal_name, cblock) + rescue ArgumentError + warn "Signal #{signal_name} not supported." + end + end + # The meaning of these signals is meant to closely mirror resque # # TERM: Shutdown immediately, stop processing jobs. @@ -48,16 +65,18 @@ def initialize(reserver, options = {}) # USR1: Kill the forked children immediately, continue processing jobs. # USR2: Pause after this job # CONT: Start processing jobs again after a USR2 + # HUP: Print current stack to log and continue def register_signal_handlers # Otherwise, we want to take the appropriate action trap('TERM') { exit! } trap('INT') { exit! } + safe_trap('HUP') { sighup_handler.call } + safe_trap('QUIT') { shutdown } begin - trap('QUIT') { shutdown } - trap('USR2') { pause } - trap('CONT') { unpause } + trap('CONT') { unpause } + trap('USR2') { pause } rescue ArgumentError - warn 'Signals QUIT, USR1, USR2, and/or CONT not supported.' + warn 'Signals USR2, and/or CONT not supported.' end end @@ -154,8 +173,8 @@ def try_complete(job) def fail_job(job, error, worker_backtrace) failure = Qless.failure_formatter.format(job, error, worker_backtrace) + log(:error, "Got #{failure.group} failure from #{job.inspect}\n#{failure.message}" ) job.fail(*failure) - log(:error, "Got #{failure.group} failure from #{job.inspect}") rescue Job::CantFailError => e # There's not much we can do here. Another worker may have cancelled it, # or we might not own the job, etc. Logging is the best we can do. @@ -176,22 +195,17 @@ def on_current_job_lock_lost(&block) @on_current_job_lock_lost = block end - def listen_for_lost_lock - subscribers = uniq_clients.map do |client| - Subscriber.start(client, "ql:w:#{client.worker_name}", log_to: output) do |_, message| - if message['event'] == 'lock_lost' - with_current_job do |job| - if job && message['jid'] == job.jid - @on_current_job_lock_lost.call(job) - end - end - end + def listen_for_lost_lock(job) + # Ensure subscribers always has a value + subscriber = Subscriber.start(job.client, "ql:w:#{job.client.worker_name}", log: @log) do |_, message| + if message['event'] == 'lock_lost' && message['jid'] == job.jid + @on_current_job_lock_lost.call(job) end end yield ensure - subscribers.each(&:stop) + subscriber && subscriber.stop end private diff --git a/lib/qless/worker/forking.rb b/lib/qless/worker/forking.rb index c97082b5..c891e2e0 100644 --- a/lib/qless/worker/forking.rb +++ b/lib/qless/worker/forking.rb @@ -67,16 +67,18 @@ def register_signal_handlers exit end + safe_trap('HUP') { sighup_handler.call } + safe_trap('QUIT') do + stop!('QUIT') + exit + end + safe_trap('USR1') { stop!('KILL') } + begin - trap('QUIT') do - stop!('QUIT') - exit - end - trap('USR1') { stop!('KILL') } - trap('USR2') { stop('USR2') } trap('CONT') { stop('CONT') } + trap('USR2') { stop('USR2') } rescue ArgumentError - warn 'Signals QUIT, USR1, USR2, and/or CONT not supported.' + warn 'Signals USR2, and/or CONT not supported.' end end diff --git a/lib/qless/worker/serial.rb b/lib/qless/worker/serial.rb index 4807d20d..a7f2273f 100644 --- a/lib/qless/worker/serial.rb +++ b/lib/qless/worker/serial.rb @@ -19,20 +19,21 @@ def run reserver.prep_for_work! - listen_for_lost_lock do - procline "Running #{reserver.description}" + procline "Running #{reserver.description}" - jobs.each do |job| - # Run the job we're working on - log(:debug, "Starting job #{job.klass_name} (#{job.jid} from #{job.queue_name})") + jobs.each do |job| + # Run the job we're working on + log(:debug, "Starting job #{job.klass_name} (#{job.jid} from #{job.queue_name})") + procline "Processing #{job.description}" + listen_for_lost_lock(job) do perform(job) - log(:debug, "Finished job #{job.klass_name} (#{job.jid} from #{job.queue_name})") + end + log(:debug, "Finished job #{job.klass_name} (#{job.jid} from #{job.queue_name})") - # So long as we're paused, we should wait - while paused - log(:debug, 'Paused...') - sleep interval - end + # So long as we're paused, we should wait + while paused + log(:debug, 'Paused...') + sleep interval end end end diff --git a/qless.gemspec b/qless.gemspec index f9b13469..c423b2b6 100644 --- a/qless.gemspec +++ b/qless.gemspec @@ -52,4 +52,5 @@ language-specific extension will also remain up to date. s.add_development_dependency 'rubocop' , '~> 0.13.1' s.add_development_dependency 'rusage' , '~> 0.2.0' s.add_development_dependency 'timecop' , '~> 0.7.1' + s.add_development_dependency 'pry' end diff --git a/spec/integration/job_spec.rb b/spec/integration/job_spec.rb index 4dd90e18..4e80fd0d 100644 --- a/spec/integration/job_spec.rb +++ b/spec/integration/job_spec.rb @@ -20,7 +20,7 @@ class NoPerformJob; end end it 'has all the attributes we would expect' do - queue.put('Foo', { whiz: 'bang' }, jid: 'jid', tags: ['foo'], retries: 3) + queue.put('Foo', { whiz: 'bang' }, jid: 'jid', tags: ['foo'], retries: 3, throttles: ['fizz', 'buzz']) job = client.jobs['jid'] expected = { jid: 'jid', @@ -35,6 +35,7 @@ class NoPerformJob; end retries_left: 3, dependencies: [], original_retries: 3, + throttles: ['fizz', 'buzz', 'ql:q:foo'], } expected.each do |key, value| expect(job.send(key)).to eq(value) diff --git a/spec/integration/middleware/timeout_spec.rb b/spec/integration/middleware/timeout_spec.rb new file mode 100644 index 00000000..169b1f6d --- /dev/null +++ b/spec/integration/middleware/timeout_spec.rb @@ -0,0 +1,73 @@ +require 'spec_helper' +require 'support/forking_worker_context' +require 'qless/middleware/timeout' + +module Qless::Middleware + describe Timeout do + include_context "forking worker" + + def define_job_class(name, &block) + stub_const(name, Class.new(&block)) + end + + def duration_of + start = Time.now + yield + Time.now - start + end + + before do + define_job_class "MyJobClass" do + extend Qless::Job::SupportsMiddleware + + def self.perform(job) + job.client.redis.rpush("in_job", "about_to_sleep") + do_work + end + + def self.do_work + sleep + end + end + end + let(:sleep_line) { __LINE__ - 4 } + + def expect_job_to_timeout + jid = queue.put MyJobClass, {} + + duration_of { drain_worker_queues(worker) }.tap do + expect(redis.brpop("in_job", timeout: 1).last).to eq("about_to_sleep") + job = client.jobs[jid] + + expect(job.failure["group"]).to include("JobTimedoutError") + expect(job.failure["message"]).to include("do_work", "#{File.basename(__FILE__)}:#{sleep_line}") + expect(log_io.string).to include("died with 73") + end + end + + it 'fails the job and kills the worker running it when it exceeds the provided timeout value' do + MyJobClass.extend Qless::Middleware::Timeout.new { 0.05 } + + duration = expect_job_to_timeout + expect(duration).to be < 0.2 + end + + it "can be applied to a worker rather than an individual job, which can use the job's TTL as a basis for the timeout value" do + queue.heartbeat = 0.05 + worker.extend Qless::Middleware::Timeout.new { |job| job.ttl + 0.05 } + + duration = expect_job_to_timeout + expect(duration).to be_between(0.1, 0.2) + end + + it 'aborts with a clear error when given a non-positive timeout' do + MyJobClass.extend Qless::Middleware::Timeout.new { 0 } + + jid = queue.put MyJobClass, {} + drain_worker_queues(worker) + job = client.jobs[jid] + + expect(job.failure["group"]).to include("InvalidTimeoutError") + end + end +end diff --git a/spec/integration/queue_spec.rb b/spec/integration/queue_spec.rb index b2316348..7d43ddeb 100644 --- a/spec/integration/queue_spec.rb +++ b/spec/integration/queue_spec.rb @@ -27,7 +27,8 @@ module Qless 'scheduled' => 0, 'running' => 0, 'stalled' => 0, - 'waiting' => 1 + 'waiting' => 1, + 'throttled' => 0, }) end @@ -74,6 +75,10 @@ module Qless pending('this is specific to ruby') end + it 'exposes a throttle' do + expect(queue.throttle).to be + end + it 'exposes max concurrency' do queue.max_concurrency = 5 expect(queue.max_concurrency).to eq(5) diff --git a/spec/integration/server_spec.rb b/spec/integration/server_spec.rb index e001fed5..a3a8bd76 100644 --- a/spec/integration/server_spec.rb +++ b/spec/integration/server_spec.rb @@ -8,6 +8,7 @@ require 'capybara/rspec' require 'capybara/poltergeist' require 'rack/test' +require 'pry' Capybara.javascript_driver = :poltergeist @@ -122,6 +123,122 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) test_pagination end + it 'can set and delete queues throttles', js: true do + q.put(Qless::Job, {}) + + text_field_class = ".ql-q-#{q.name}-maximum" + + q.throttle.maximum.should eq(0) + + visit '/throttles' + + first('td', text: /ql:q:#{q.name}/i).should be + first(text_field_class, placeholder: /0/i).should be + + maximum = first(text_field_class) + maximum.set(3) + maximum.trigger('blur') + + first(text_field_class, value: /3/i).should be + q.throttle.maximum.should eq(3) + + first('button.btn-danger').click + first('button.btn-danger').click + + first(text_field_class, value: /0/i).should be + end + + it 'can set the expiration for queue throttles', js: true do + q.put(Qless::Job, {}) + + maximum_field_class = ".ql-q-#{q.name}-maximum" + expiration_field_class = ".ql-q-#{q.name}-expiration" + + q.throttle.maximum.should eq(0) + q.throttle.ttl.should eq(-2) + + visit '/throttles' + + first('td', text: /ql:q:#{q.name}/i).should be + first(expiration_field_class, placeholder: /-2/i).should be + + maximum = first(maximum_field_class) + maximum.set(3) + maximum.trigger('blur') + + first(maximum_field_class, value: /3/i).should be + q.throttle.maximum.should eq(3) + + expiration = first(expiration_field_class) + expiration.set(1) + expiration.trigger('blur') + + visit '/throttles' + + first(maximum_field_class, value: /0/i).should be + first(expiration_field_class, placeholder: /-2/i).should be + end + + it 'can set and delete job throttles', js: true do + t_id = 'wakka' # the throttle id + jid = q.put(Qless::Job, {}, throttles: [t_id]) + + text_field_class = ".#{t_id}-maximum" + throttle = Throttle.new(t_id, client) + + throttle.maximum.should eq(0) + + visit "/jobs/#{jid}" + + page.should have_content(t_id) + first(".#{t_id}-maximum", placeholder: /0/i).should be + + maximum = first(".#{t_id}-maximum") + maximum.set(3) + maximum.trigger('blur') + + first(".#{t_id}-maximum", value: /3/i).should be + throttle.maximum.should eq(3) + + first('button.btn-danger.remove-throttle').click + first('button.btn-danger.remove-throttle').click + + first(".#{t_id}-maximum", value: /0/i).should be + end + + it 'can set the expiration for job throttles', js: true do + t_id = 'wakka' # the throttle id + jid = q.put(Qless::Job, {}, throttles: [t_id]) + + maximum_field_class = ".#{t_id}-maximum" + expiration_field_class = ".#{t_id}-expiration" + throttle = Throttle.new(t_id, client) + + throttle.maximum.should eq(0) + throttle.ttl.should eq(-2) + + visit "/jobs/#{jid}" + + page.should have_content(t_id) + first(".#{t_id}-expiration", placeholder: /-2/i).should be + + maximum = first(".#{t_id}-maximum") + maximum.set(3) + maximum.trigger('blur') + + first(".#{t_id}-maximum", value: /3/i).should be + throttle.maximum.should eq(3) + + expiration = first(".#{t_id}-expiration") + expiration.set(1) + expiration.trigger('blur') + + visit "/jobs/#{jid}" + + first(".#{t_id}-maximum", value: /0/i).should be + first(".#{t_id}-expiration", placeholder: /-2/i).should be + end + it 'can see the root-level summary' do visit '/' @@ -134,23 +251,34 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) q.put(Qless::Job, {}) visit '/' first('.queue-row', text: /testing/).should be - first('.queue-row', text: /0\D+1\D+0\D+0\D+0/).should be + first('.queue-row', text: /0\D+1\D+0\D+0\D+0\D+0\D+0/).should be first('h1', text: /no queues/i).should be_nil first('h1', text: /queues and their job counts/i).should be # Let's pop the job, and make sure that we can see /that/ job = q.pop visit '/' - first('.queue-row', text: /1\D+0\D+0\D+0\D+0/).should be + first('.queue-row', text: /1\D+0\D+0\D+0\D+0\D+0\D+0/).should be first('.worker-row', text: q.worker_name).should be first('.worker-row', text: /1\D+0/i).should be # Let's complete the job, and make sure it disappears job.complete visit '/' - first('.queue-row', text: /0\D+0\D+0\D+0\D+0/).should be + first('.queue-row', text: /0\D+0\D+0\D+0\D+0\D+0\D+0/).should be first('.worker-row', text: /0\D+0/i).should be + # Let's throttle a job, and make sure we see it + client.throttles['one'].maximum = 1 + q.put(Qless::Job, {}, :throttles => ["one"]) + q.put(Qless::Job, {}, :throttles => ["one"]) + job1 = q.pop + job2 = q.pop + visit '/' + first('.queue-row', text: /1\D+0\D+1\D+0\D+0\D+0\D+0/).should be + job1.complete + q.pop.complete + # Let's put and pop and fail a job, and make sure we see it q.put(Qless::Job, {}) job = q.pop @@ -162,11 +290,11 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) # And let's have one scheduled, and make sure it shows up accordingly jid = q.put(Qless::Job, {}, delay: 60) visit '/' - first('.queue-row', text: /0\D+0\D+1\D+0\D+0/).should be + first('.queue-row', text: /0\D+0\D+0\D+1\D+0\D+0\D+0/).should be # And one that depends on that job q.put(Qless::Job, {}, depends: [jid]) visit '/' - first('.queue-row', text: /0\D+0\D+1\D+0\D+1/).should be + first('.queue-row', text: /0\D+0\D+0\D+1\D+0\D+1\D+0/).should be end it 'can visit the tracked page' do @@ -193,6 +321,17 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) first('a', text: /completed\W+1/i).should be job.untrack + # And now for a throttled job + client.throttles['one'].maximum = 1 + q.put(Qless::Job, {}, throttles: ["one"]) + job = client.jobs[q.put(Qless::Job, {}, throttles: ["one"])] + job.track + q.pop(2) + visit '/track' + first('a', text: /all\W+1/i).should be + first('a', text: /throttled\W+1/i).should be + job.untrack + # And now for a scheduled job job = client.jobs[q.put(Qless::Job, {}, delay: 600)] job.track @@ -248,7 +387,7 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) job.requeue('testing') q.pop.complete visit "/jobs/#{job.jid}" - first('i.icon-remove').should be_nil + first('i.icon-remove.cancel-job').should be_nil first('i.icon-repeat').should be_nil first('i.icon-flag').should be first('i.caret').should be @@ -634,6 +773,52 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) groups.map { |g| g.text }.join(' ').should eq('e j i h g f d c b a') end + it 'can visit /queues' do + # We should be able to see all of the appropriate tabs, + # We should be able to see all of the jobs + jid = q.put(Qless::Job, {}) + + # We should see this job + visit '/queues' + first('h3', text: /0\D+1\D+0\D+0\D+0\D+0\D+0/).should be + + # Now let's pop off the job so that it's running + job = q.pop + visit '/queues' + first('h3', text: /1\D+0\D+0\D+0\D+0\D+0\D+0/).should be + job.complete + + # And now for a throttled job + client.throttles['one'].maximum = 1 + q.put(Qless::Job, {}, throttles: ["one"]) + q.put(Qless::Job, {}, throttles: ["one"]) + job1, job2 = q.pop(2) + visit '/queues' + first('h3', text: /1\D+0\D+1\D+0\D+0\D+0\D+0/).should be + job1.complete + q.pop.complete + + # And now for a scheduled job + job = client.jobs[q.put(Qless::Job, {}, delay: 600)] + visit '/queues' + first('h3', text: /0\D+0\D+0\D+1\D+0\D+0\D+0/).should be + job.cancel + + # And now a dependent job + job1 = client.jobs[q.put(Qless::Job, {})] + job2 = client.jobs[q.put(Qless::Job, {}, depends: [job1.jid])] + visit '/queues' + first('h3', text: /0\D+1\D+0\D+0\D+0\D+1\D+0/).should be + job2.cancel + job1.cancel + + # And now a recurring job + job = client.jobs[q.recur(Qless::Job, {}, 5)] + visit '/queues' + first('h3', text: /0\D+0\D+0\D+0\D+0\D+0\D+1/).should be + job.cancel + end + it 'can visit the various /queues/* endpoints' do # We should be able to see all of the appropriate tabs, # We should be able to see all of the jobs @@ -648,6 +833,16 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) first('h2', text: /#{jid[0...8]}/).should be job.complete + # And now for a throttled job + client.throttles['one'].maximum = 1 + job1 = client.jobs[q.put(Qless::Job, {}, throttles: ["one"])] + job2 = client.jobs[q.put(Qless::Job, {}, throttles: ["one"])] + q.pop(2) + visit '/queues/testing/throttled' + first('h2', text: /#{job2.jid[0...8]}/).should be + job1.cancel + job2.cancel + # And now for a scheduled job job = client.jobs[q.put(Qless::Job, {}, delay: 600)] visit '/queues/testing/scheduled' @@ -690,6 +885,16 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) job.untrack first('.tracked-row', text: /complete/i).should be + # And now for a throttled job + client.throttles['one'].maximum = 1 + job1 = client.jobs[q.put(Qless::Job, {}, throttles: ["one"])] + job2 = client.jobs[q.put(Qless::Job, {}, throttles: ["one"])] + job2.track + q.pop(2) + visit '/' + first('.tracked-row', text: /throttled/i).should be + job2.untrack + # And now for a scheduled job job = client.jobs[q.put(Qless::Job, {}, delay: 600)] job.track @@ -824,6 +1029,7 @@ def test_pagination(page_1_jid = 1, page_2_jid = 27) 'depends' => 0, 'stalled' => 0, 'scheduled' => 0, + 'throttled' => 0, 'paused' => false } JSON.parse(last_response.body).should eq([response]) diff --git a/spec/integration/workers/serial_spec.rb b/spec/integration/workers/serial_spec.rb index 2a9277b9..a361600a 100644 --- a/spec/integration/workers/serial_spec.rb +++ b/spec/integration/workers/serial_spec.rb @@ -59,10 +59,11 @@ def self.perform(job) end # Wait for the job to complete, and then kill the child process - run_jobs(worker, 3) do - words.each do |word| - redis.brpop(key, timeout: 1).should eq([key.to_s, word]) - end + run_jobs(worker, 3) {} + + job_results = redis.lrange(key, 0, -1) + words.each do |word| + job_results.should include word end end @@ -152,9 +153,11 @@ def self.perform(job) callback_invoked = false worker.on_current_job_lock_lost { callback_invoked = true } + queue.put('JobClass', {}) queue.put('JobClass', {}) - worker.listen_for_lost_lock do + job = queue.pop + worker.listen_for_lost_lock(job) do queue.pop.timeout end diff --git a/spec/unit/job_spec.rb b/spec/unit/job_spec.rb index b3d2da22..fb135470 100644 --- a/spec/unit/job_spec.rb +++ b/spec/unit/job_spec.rb @@ -37,6 +37,29 @@ def around_perform(job) end end + describe '.build_opts_array' do + it 'should return a correctly built array' do + # [data, delay, priority, priority_value, tags, tags_value, retries, retries_value, depends, depends_value, + # throttles, throttles_value] + expected = ["{}", 0, "priority", 0, "tags", "[]", "retries", 5, "depends", "[]", "throttles", "[]"] + job = Job.build(client, JobClass) + expect(Job.build_opts_array(job.enqueue_opts)).to eq(expected) + end + end + + describe '.enqueue_opts' do + it 'return available fields for enqueuing the job' do + expected_fields = [:data, :priority, :tags, :retries, :depends, :throttles] + job = Job.build(client, JobClass) + opts = job.enqueue_opts + expected_fields.each do |k| + expect(opts.has_key?(k)).to(be(true)) + end + + expect(opts.keys.length).to(equal(expected_fields.length)) + end + end + describe '#klass' do it 'returns the class constant' do job = Job.build(client, JobClass, data: {}) @@ -167,7 +190,7 @@ class MyCustomError < StandardError; end job.send(meth, *args) end.to raise_error(MyCustomError) - job.state_changed?.should be_false + job.state_changed?.should be false end it 'triggers before and after callbacks' do @@ -203,6 +226,16 @@ class MyCustomError < StandardError; end spawned_from_jid: "foo" ) end + + it 'returns the throttles of the job' do + job = Job.build(client, JobClass, 'throttles' => ['my-throttle']) + + expect(job.to_hash).to include( + klass_name: "Qless::JobClass", + state: "running", + throttles: ['my-throttle'] + ) + end end describe '#inspect' do diff --git a/spec/unit/middleware/requeue_exceptions_spec.rb b/spec/unit/middleware/requeue_exceptions_spec.rb index ee4d754a..95f294ff 100644 --- a/spec/unit/middleware/requeue_exceptions_spec.rb +++ b/spec/unit/middleware/requeue_exceptions_spec.rb @@ -16,191 +16,265 @@ def around_perform(job) end end - let(:container) { container_class.new } - let(:job) do - instance_double( - 'Qless::Job', requeue: nil, queue_name: 'my-queue', data: {}) + let(:container) do + container = container_class.new + container.extend(RequeueExceptions) + container end - let(:delay_range) { (0..30) } - let(:max_attempts) { 20 } - - matched_exception_1 = ZeroDivisionError - matched_exception_2 = KeyError - unmatched_exception = RegexpError - module MessageSpecificException - def self.===(other) - ArgumentError === other && other.message.include?("foo") + describe ".requeue_on" do + it "does not throw with empty class list" do + container.requeue_on(delay_range: 1..10, + max_attempts: 1) end - end - before do - container.extend(RequeueExceptions) - container.requeue_on(matched_exception_1, matched_exception_2, - MessageSpecificException, - delay_range: delay_range, - max_attempts: max_attempts) - end + it "throws KeyError if no max_attempts" do + expect do + container.requeue_on(delay_range: 1..10) + end.to raise_error(KeyError) + end - def set_requeue_callback - container.use_on_requeue_callback { |error, job| callback_catcher << [error, job] } - end + it "throws KeyError if no delay_range" do + expect do + container.requeue_on(max_attempts: 1) + end.to raise_error(KeyError) + end - def callback_catcher - @callback_catcher ||= [] - end + it "throws NoMethodError if delay_range does not respond to .min or .max" do + expect do + container.requeue_on(delay_range: 1, max_attempts: 1) + end.to raise_error(NoMethodError) + end - def perform - container.around_perform(job) - end + it "throws ArgumentError if delay_range is not numerical" do + expect do + container.requeue_on(delay_range: "a".."z", max_attempts: 1) + end.to raise_error(ArgumentError) + end - describe '.use_on_requeue_callback' do - it 'uses a default callback if none is given' do - expect(container.on_requeue_callback).to eq( - RequeueExceptions::DEFAULT_ON_REQUEUE_CALLBACK) + it "throws TypeError if delay_range is empty" do + expect do + container.requeue_on(delay_range: 2..1, max_attempts: 1) + end.to raise_error(TypeError) end - it 'accepts a block to set an after requeue callback' do - container.use_on_requeue_callback { |*| true } - expect(container.on_requeue_callback).not_to eq( - RequeueExceptions::DEFAULT_ON_REQUEUE_CALLBACK) + it "throws TypeError on empty delay_range" do + expect do + container.requeue_on(delay_range: 1..0, max_attempts: 1) + end.to raise_error(TypeError) end - end - context 'when no exception is raised' do - before { container.perform = -> { } } + it "adds exceptions to requeable collection on success" do + container.requeue_on(ArgumentError, TypeError, delay_range: 1..2, max_attempts: 2) + expect(container.requeueable_exceptions).to include(ArgumentError, TypeError) + end - it 'does not requeue the job' do - job.should_not_receive(:requeue) - perform + it "updates exceptions on repeated .requeue_on" do + container.requeue_on(ArgumentError, TypeError, delay_range: 1..2, max_attempts: 2) + container.requeue_on(TypeError, KeyError, delay_range: 1..2, max_attempts: 3) + expect(container.requeueable_exceptions).to include(ArgumentError, TypeError, KeyError) + expect(container.requeueable_exceptions[KeyError].max_attempts).to eq(3); end end - context 'when an unmatched exception is raised' do - before { container.perform = -> { raise unmatched_exception } } - - it 'allows the error to propagate' do - job.should_not_receive(:requeue) - expect { perform }.to raise_error(unmatched_exception) + describe ".requeueable?" do + before do + container.requeue_on(KeyError, delay_range: 1..2, max_attempts: 3) end - context 'when an after requeue callback is set' do - before { set_requeue_callback } - - it 'does not call the callback' do - expect { perform }.to raise_error(unmatched_exception) + it 'returns false if exception is not requeue_on' do + expect(container.requeueable?(TypeError)).to be(false) + end - expect(callback_catcher.size).to eq(0) - end + it 'returns true when exception requeued on' do + expect(container.requeueable?(KeyError)).to be(true) end end - shared_context "requeues on matching exception" do |exception, exception_name| - before { container.perform = -> { raise_exception } } + context "when requeue_on successful" do - it 'requeues the job' do - job.should_receive(:requeue).with('my-queue', anything) - perform + let(:job) do + instance_double( + 'Qless::Job', requeue: nil, queue_name: 'my-queue', data: {}) end + let(:delay_range) { (0..30) } + let(:max_attempts) { 20 } - it 'uses a random delay from the delay_range' do - Kernel.srand(100) - sample = delay_range.to_a.sample + matched_exception_1 = ZeroDivisionError + matched_exception_2 = KeyError + unmatched_exception = RegexpError - job.should_receive(:requeue).with( - 'my-queue', hash_including(delay: sample)) + module MessageSpecificException + def self.===(other) + ArgumentError === other && other.message.include?("foo") + end + end - Kernel.srand(100) - perform + before do + ## container.extend(RequeueExceptions) + container.requeue_on(matched_exception_1, matched_exception_2, + MessageSpecificException, + delay_range: delay_range, + max_attempts: max_attempts) end - it 'tracks the number of requeues for this error' do - expected_first_time = { - 'requeues_by_exception' => { exception_name => 1 } } - job.should_receive(:requeue).with('my-queue', hash_including( - data: expected_first_time - )) - perform + def set_requeue_callback + container.use_on_requeue_callback { |error, job| callback_catcher << [error, job] } + end - job.data.merge!(expected_first_time) + def callback_catcher + @callback_catcher ||= [] + end - job.should_receive(:requeue).with('my-queue', hash_including( - data: { 'requeues_by_exception' => { exception_name => 2 } } - )) - perform + def perform + container.around_perform(job) end - it 'preserves other requeues_by_exception values' do - job.data['requeues_by_exception'] = { 'SomeKlass' => 3 } + describe '.use_on_requeue_callback' do + it 'uses a default callback if none is given' do + expect(container.on_requeue_callback).to eq( + RequeueExceptions::DEFAULT_ON_REQUEUE_CALLBACK) + end - job.should_receive(:requeue).with('my-queue', hash_including( - data: { - 'requeues_by_exception' => { - exception_name => 1, 'SomeKlass' => 3 - } } - )) - perform + it 'accepts a block to set an after requeue callback' do + container.use_on_requeue_callback { |*| true } + expect(container.on_requeue_callback).not_to eq( + RequeueExceptions::DEFAULT_ON_REQUEUE_CALLBACK) + end end - it 'preserves other data' do - job.data['foo'] = 3 + context 'when no exception is raised' do + before { container.perform = -> { } } - job.should_receive(:requeue).with('my-queue', hash_including( - data: { - 'requeues_by_exception' => { exception_name => 1 }, - 'foo' => 3 } - )) - perform + it 'does not requeue the job' do + job.should_not_receive(:requeue) + perform + end end - it 'allow the error to propogate after max_attempts' do - job.data['requeues_by_exception'] = { - exception_name => max_attempts } - job.should_not_receive(:requeue) + context 'when an unmatched exception is raised' do + before { container.perform = -> { raise unmatched_exception } } + + it 'allows the error to propagate' do + job.should_not_receive(:requeue) + expect { perform }.to raise_error(unmatched_exception) + end + + context 'when an after requeue callback is set' do + before { set_requeue_callback } - expect { perform }.to raise_error(exception) + it 'does not call the callback' do + expect { perform }.to raise_error(unmatched_exception) + + expect(callback_catcher.size).to eq(0) + end + end end - context 'when an after requeue callback is set' do - before { set_requeue_callback } + shared_context "requeues on matching exception" do |exception, exception_name| + before { container.perform = -> { raise_exception } } + + it 'requeues the job' do + job.should_receive(:requeue).with('my-queue', anything) + perform + end + + it 'uses a random delay from the delay_range' do + job.should_receive(:requeue) do |qname, hash| + expect(qname).to eq('my-queue') + expect(hash[:delay]).to be_between(delay_range.min, delay_range.max) + end + perform + end + + it 'tracks the number of requeues for this error' do + expected_first_time = { + 'requeues_by_exception' => { exception_name => 1 } } + job.should_receive(:requeue).with('my-queue', hash_including( + data: expected_first_time + )) + perform + + job.data.merge!(expected_first_time) - it 'calls the callback' do - expect { - perform - }.to change { callback_catcher.size }.from(0).to(1) + job.should_receive(:requeue).with('my-queue', hash_including( + data: { 'requeues_by_exception' => { exception_name => 2 } } + )) + perform + end + + it 'preserves other requeues_by_exception values' do + job.data['requeues_by_exception'] = { 'SomeKlass' => 3 } + + job.should_receive(:requeue).with('my-queue', hash_including( + data: { + 'requeues_by_exception' => { + exception_name => 1, 'SomeKlass' => 3 + } } + )) + perform + end + + it 'preserves other data' do + job.data['foo'] = 3 + + job.should_receive(:requeue).with('my-queue', hash_including( + data: { + 'requeues_by_exception' => { exception_name => 1 }, + 'foo' => 3 } + )) + perform + end + + it 'allow the error to propogate after max_attempts' do + job.data['requeues_by_exception'] = { + exception_name => max_attempts } + job.should_not_receive(:requeue) + + expect { perform }.to raise_error(exception) + end + + context 'when an after requeue callback is set' do + before { set_requeue_callback } + + it 'calls the callback' do + expect { + perform + }.to change { callback_catcher.size }.from(0).to(1) + end end end - end - context "when a matched exception is raised" do - include_examples "requeues on matching exception", matched_exception_1, matched_exception_1.name do - define_method(:raise_exception) { raise matched_exception_1 } + context "when a matched exception is raised" do + include_examples "requeues on matching exception", matched_exception_1, matched_exception_1.name do + define_method(:raise_exception) { raise matched_exception_1 } + end end - end - context "when another matched exception is raised" do - include_examples "requeues on matching exception", matched_exception_2, matched_exception_2.name do - define_method(:raise_exception) { raise matched_exception_2 } + context "when another matched exception is raised" do + include_examples "requeues on matching exception", matched_exception_2, matched_exception_2.name do + define_method(:raise_exception) { raise matched_exception_2 } + end end - end - context "when a subclass of a matched exception is raised" do - exception = Class.new(matched_exception_1) - include_examples "requeues on matching exception", exception, matched_exception_1.name do - define_method(:raise_exception) { raise exception } + context "when a subclass of a matched exception is raised" do + exception = Class.new(matched_exception_1) + include_examples "requeues on matching exception", exception, matched_exception_1.name do + define_method(:raise_exception) { raise exception } + end end - end - context "when an exception is raised that matches a listed on using `===` but not `is_a?" do - let(:exception_instance) { ArgumentError.new("Bad foo") } + context "when an exception is raised that matches a listed on using `===` but not `is_a?" do + let(:exception_instance) { ArgumentError.new("Bad foo") } - before do - expect(exception_instance).not_to be_a(MessageSpecificException) - expect(MessageSpecificException).to be === exception_instance - end + before do + expect(exception_instance).not_to be_a(MessageSpecificException) + expect(MessageSpecificException).to be === exception_instance + end - include_examples "requeues on matching exception", MessageSpecificException, MessageSpecificException.name do - define_method(:raise_exception) { raise exception_instance } + include_examples "requeues on matching exception", MessageSpecificException, MessageSpecificException.name do + define_method(:raise_exception) { raise exception_instance } + end end end end diff --git a/spec/unit/middleware/retry_exceptions_spec.rb b/spec/unit/middleware/retry_exceptions_spec.rb index 459d0f06..f548b86a 100644 --- a/spec/unit/middleware/retry_exceptions_spec.rb +++ b/spec/unit/middleware/retry_exceptions_spec.rb @@ -160,34 +160,61 @@ def perform_and_track_delays end context 'with an exponential backoff retry strategy' do - before do + it 'generates an exponential delay' do container.instance_eval do use_backoff_strategy exponential(10) end - end - it 'uses an exponential delay' do delays = perform_and_track_delays + expect(delays).to eq([10, 100, 1_000, 10_000, 100_000]) end - end - context 'with an exponential backoff retry strategy and fuzz factor' do - before do + it 'generates an exponential delay using explicitly given factor' do container.instance_eval do - use_backoff_strategy exponential(10, fuzz_factor: 0.5) + use_backoff_strategy exponential(10, factor: 3) end + + delays = perform_and_track_delays + + expect(delays).to eq([10, 30, 90, 270, 810]) end - it 'adds some randomness to fuzz it' do + it 'when fuzz_factor given, dissipate delays over range' do + container.instance_eval do + use_backoff_strategy exponential(10, fuzz_factor: 0.3) + end + + delays = perform_and_track_delays + + [10, 100, 1_000, 10_000, 100_000].zip(delays).each do |unfuzzed, actual| + expect(actual).not_to eq(unfuzzed) + expect(actual).to be_within(30).percent_of(unfuzzed) + end + end + + it 'combines factor and fuzz_factor' do + container.instance_eval do + use_backoff_strategy exponential(100, factor: 2, fuzz_factor: 0.2) + end + + delays = perform_and_track_delays + + [100, 200, 400, 800, 1600].zip(delays).each do |unfuzzed, actual| + expect(actual).not_to eq(unfuzzed) + expect(actual).to be_within(20).percent_of(unfuzzed) + end + end + + it 'can be reused by multiple jobs' do + container.instance_eval do + use_backoff_strategy exponential(10, factor: 2) + end + perform_and_track_delays + delays = perform_and_track_delays - expect(delays).not_to eq([10, 100, 1_000, 10_000, 100_000]) - expect(delays[0]).to be_within(50).percent_of(10) - expect(delays[1]).to be_within(50).percent_of(100) - expect(delays[2]).to be_within(50).percent_of(1_000) - expect(delays[3]).to be_within(50).percent_of(10_000) - expect(delays[4]).to be_within(50).percent_of(100_000) + expect(delays).to eq([10, 20, 40, 80, 160]) end end end diff --git a/spec/unit/middleware/timeout_spec.rb b/spec/unit/middleware/timeout_spec.rb new file mode 100644 index 00000000..8b5147cb --- /dev/null +++ b/spec/unit/middleware/timeout_spec.rb @@ -0,0 +1,167 @@ +require 'spec_helper' +require 'qless/job' +require 'qless/middleware/timeout' + +module Qless + module Middleware + ::RSpec.describe Timeout do + class JobClass + def around_perform_call_counter + @around_perform_call_counter ||= 0 + end + + def around_perform(job) + @around_perform_call_counter = (@around_perform_call_counter || 0) + 1 + end + end + + let(:kernel_class) { class_double(Kernel).as_null_object } + + let(:job) { instance_double(Qless::Job).as_null_object } + + def make_worker(timeout_class, timeout_seconds, kernel_class) + Class.new(JobClass) do + include Qless::Middleware::Timeout.new(timeout_class: timeout_class, + kernel_class: kernel_class) { timeout_seconds } + end.new + end + + class TriggeredTimeout + def self.timeout(timeout_seconds) + raise ::Timeout::Error.new("triggered at #{timeout_seconds}s") + end + end + + class InactiveTimeout + def self.timeout(timeout_seconds) + yield + end + end + + context 'when timeout specified as nil (to block timeout processing for specific jobs)' do + it 'allows nil' do + worker = make_worker(TriggeredTimeout, nil, kernel_class) + + expect { + worker.around_perform job + }.not_to raise_error + end + + it 'invokes job' do + worker = make_worker(TriggeredTimeout, nil, kernel_class) + worker = worker + + worker.around_perform job + + expect(worker.around_perform_call_counter).to eq(1) + end + end + + context 'when timeout is not nil' do + it 'aborts with a clear error when given a non-numeric timeout' do + worker = make_worker(TriggeredTimeout, "123", kernel_class) + + expect { + worker.around_perform job + }.to raise_error(Qless::InvalidTimeoutError) + end + + it 'aborts with a clear error when given a non-positive timeout' do + worker = make_worker(TriggeredTimeout, -1, kernel_class) + + expect { + worker.around_perform job + }.to raise_error(Qless::InvalidTimeoutError) + end + + it 'invokes job when positive timeout specified' do + worker = make_worker(InactiveTimeout, 120, kernel_class) + worker = worker + + worker.around_perform job + + expect(worker.around_perform_call_counter).to eq(1) + end + + context 'when no timeout event detected' do + it 'does not invoke timeout recovery' do + worker = make_worker(InactiveTimeout, 120, kernel_class) + + expect(kernel_class).not_to receive(:exit!) + expect(job).not_to receive(:reconnect_to_redis) + expect(job).not_to receive(:fail) + + worker.around_perform job + end + end + + context 'when timeout event detected' do + it 'rescues ::Timeout::Error in case of timeout' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + expect { + worker.around_perform job + }.not_to raise_error + end + + it 'reconnects to redis (to recover in case redis causes timeout)' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + expect(job).to receive(:reconnect_to_redis) + expect { + worker.around_perform job + }.not_to raise_error + end + + it 'fails the job' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + expect(job).to receive(:fail) + expect { + worker.around_perform job + }.not_to raise_error + end + + context 'when worker does not install RequeueExceptions middleware' do + it 'does not call neither #requeueable? nor #handle_exception' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + + expect { + worker.around_perform job + }.not_to raise_error + end + end + + context 'when worker installs RequeueExceptions middleware' do + it 'requeues job if JobTimedoutError requeue requested' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + worker.extend(RequeueExceptions) + worker.requeue_on JobTimedoutError, delay_range: (1..2), max_attempts: 3 + allow(worker).to receive(:handle_exception).with(job, anything) + + expect { + worker.around_perform job + }.not_to raise_error + end + + it 'does not requeue job if JobTimedoutError not configured' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + worker.extend(RequeueExceptions) + expect(worker).not_to receive(:handle_exception).with(job, anything) + + expect { + worker.around_perform job + }.not_to raise_error + end + end + + it 'terminates the process so it is not left in an inconsistent state' \ + ' (since `Timeout` can do that)' do + worker = make_worker(TriggeredTimeout, 120, kernel_class) + expect(kernel_class).to receive(:exit!) + expect { + worker.around_perform job + }.not_to raise_error + end + end + end + end + end +end diff --git a/spec/unit/qless_spec.rb b/spec/unit/qless_spec.rb index 7e41af58..c8b2fb1d 100644 --- a/spec/unit/qless_spec.rb +++ b/spec/unit/qless_spec.rb @@ -36,6 +36,11 @@ def redis_double(overrides = {}) end context 'when instantiated' do + it 'does not check redis version if check is disabled' do + Qless::Client.any_instance.should_not_receive(:assert_minimum_redis_version) + Qless::Client.new({redis: redis, ensure_minimum_version: false}) + end + it 'raises an error if the redis version is too low' do redis.stub(info: { 'redis_version' => '2.5.3' }) expect { Qless::Client.new }.to raise_error( diff --git a/spec/unit/queue_spec.rb b/spec/unit/queue_spec.rb index 1061f2d5..ec7c5333 100644 --- a/spec/unit/queue_spec.rb +++ b/spec/unit/queue_spec.rb @@ -94,6 +94,26 @@ def enqueue(q, klass, data, opts = {}) include_examples 'job options' end + describe "#throttle" do + let(:q) { Queue.new('a_queue', client) } + + it "returns a Qless::Throttle" do + expect(q.throttle).to be_a(Qless::Throttle) + end + + it "mirrors updates correctly" do + q.throttle.maximum.should eq(0) + t = Throttle.new('ql:q:a_queue', client) + t.maximum.should eq(0) + + t.maximum = 3 + q.throttle.maximum.should eq(3) + + q.throttle.maximum = 5 + t.maximum.should eq(5) + end + end + describe "equality" do it 'is considered equal when the qless client and name are equal' do q1 = Qless::Queue.new('foo', client) diff --git a/spec/unit/throttle_spec.rb b/spec/unit/throttle_spec.rb new file mode 100644 index 00000000..638df3d5 --- /dev/null +++ b/spec/unit/throttle_spec.rb @@ -0,0 +1,53 @@ +# Encoding: utf-8 + +require 'spec_helper' +require 'yaml' +require 'qless/queue' + +module Qless + describe Throttle, :integration do + it "stores the correct the name and client at initialization" do + t = Throttle.new('name', client) + t.name.should eq('name') + t.client.should eq(client) + end + + it "can delete the named throttle" do + t = Throttle.new('name', client) + t.maximum = 5 + t.maximum.should eq(5) + t.delete + t.maximum.should eq(0) + end + + it "returns the throttle name when id is called" do + t = Throttle.new('name', client) + t.id.should eq(t.name) + end + + it "returns the set of locked jids" do + t = Throttle.new('name', client) + Redis.current.zadd('ql:th:name-locks', [[1, 1], [1, 2], [1, 3]]) + t.locks.should eq(["1", "2", "3"]) + end + + it "can set and retrieve the throttle's maximum lock count" do + t = Throttle.new('name', client) + t.maximum = 5 + t.maximum.should eq(5) + end + + it "can set the throttle's expiration and retrieve it's ttl" do + t = Throttle.new('name', client) + t.ttl.should be < 0 + t.expiration = 5 + t.ttl.should be > 0 + end + + it "handles throttle names as a String or Symbol" do + t = Throttle.new('name', client) + t.maximum = 5 + t.id.should eq(t.name) + end + end +end diff --git a/spec/unit/worker_spec.rb b/spec/unit/worker_spec.rb index 20f53011..c3c6d765 100644 --- a/spec/unit/worker_spec.rb +++ b/spec/unit/worker_spec.rb @@ -3,9 +3,13 @@ # The thing we're testing require 'qless/worker' +# Standard +require 'logger' + # Spec require 'spec_helper' + module Qless describe Workers do shared_context 'with a dummy client' do @@ -34,6 +38,12 @@ class JobClass; end shared_examples_for 'a worker' do before { clear_qless_memoization } + let(:worker) do + worker_class.new( + reserver, + output: log_output, + log_level: Logger::DEBUG) + end after(:all) { clear_qless_memoization } it 'performs the job' do @@ -145,81 +155,51 @@ def around_perform(job) end worker.perform(job) end + + it 'uses log specified in options' do + logger_io = StringIO.new + logger = Logger.new(logger_io) + worker = worker_class.new(reserver, logger: logger, log_level: Logger::DEBUG) + + JobClass.stub(:perform) + worker.send(:log, :warn, 'my-message') + expect(logger_io.string).to match(/my-message/) + end + + it 'reports log_level when configures log in worker' do + worker = worker_class.new(reserver, output: log_output, log_level: Logger::ERROR) + + expect(worker.log_level).to eq(Logger::ERROR) + end + + it 'defaults log_level to warn when configures log in worker with default' do + worker = worker_class.new(reserver, output: log_output) + + expect(worker.log_level).to eq(Logger::WARN) + end + + it 'reports log_level when logger passed in options' do + logger = Logger.new(StringIO.new) + logger.level = Logger::DEBUG + worker = worker_class.new(reserver, logger: logger) + + expect(worker.log_level).to eq(Logger::DEBUG) + end + end describe Workers::SerialWorker do - let(:worker) do - Workers::SerialWorker.new( - reserver, - output: log_output, - log_level: Logger::DEBUG) - end + let(:worker_class) { Workers::SerialWorker } include_context 'with a dummy client' it_behaves_like 'a worker' end describe Workers::ForkingWorker do - let(:worker) do - Workers::ForkingWorker.new( - reserver, - output: log_output, - log_level: Logger::DEBUG) - end + let(:worker_class) { Workers::ForkingWorker } include_context 'with a dummy client' it_behaves_like 'a worker' end end end - -# shared_examples_for 'a working worker' do -# describe '#work' do -# around(:each) do |example| -# old_procline = procline -# example.run -# $0 = old_procline -# end -# -# it 'begins with a "starting" procline' do -# starting_procline = nil -# reserver.stub(:reserve) do -# starting_procline = procline -# nil -# end - -# worker.work(0) -# starting_procline.should include('Starting') -# end -# -# it 'can be unpaused' do -# worker.pause -# -# paused_checks = 0 -# old_paused = worker.method(:paused) -# worker.stub(:paused) do -# paused_checks += 1 # count the number of loop iterations -# worker.unpause if paused_checks == 20 # so we don't loop forever -# old_paused.call -# end -# -# worker.work(0) -# paused_checks.should be >= 20 -# end -# -# context 'when an error occurs while reserving a job' do -# before { reserver.stub(:reserve) { raise 'redis error' } } -# -# it 'does not kill the worker' do -# expect { worker.work(0) }.not_to raise_error -# end -# -# it 'logs the error' do -# worker.work(0) -# expect(log_output.string).to include('redis error') -# end -# end -# end -# end -# end -# end