Skip to content

Commit

Permalink
fix(runloop): use upstream status code in passive health check (#10325)
Browse files Browse the repository at this point in the history
We expect the upstream passive unhealthy healthcheck on HTTP status code should only mark the targets as "down" (not reachable or proxiable) using the returned HTTP status code from the target, and it should not be affected by the final HTTP status code set by Kong plugin, which is returned to the client.

So we change the passive health check implementation to use nginx var `upstream_code`, this will not be changed by any plugin.

FTI-4841
Fix #10281

(cherry picked from commit dbe8d94)
  • Loading branch information
oowl authored and bungle committed Mar 21, 2023
1 parent a41843f commit 399e85c
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 3 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@
their consumers fail to process the entries. Instead, old batches are now dropped
and an error is logged.
[#10247](https://github.com/Kong/kong/pull/10247)

- Fix an issue where balancer passive healthcheck would use wrong status code when kong changes status code
from upstream in `header_filter` phase.
[#10325](https://github.com/Kong/kong/pull/10325)

## [3.0.1]

Expand Down
6 changes: 4 additions & 2 deletions kong/runloop/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ local workspaces = require "kong.workspaces"
local lrucache = require "resty.lrucache"
local marshall = require "kong.cache.marshall"


local PluginsIterator = require "kong.runloop.plugins_iterator"
local instrumentation = require "kong.tracing.instrumentation"

Expand Down Expand Up @@ -1689,7 +1688,10 @@ return {
-- Report HTTP status for health checks
local balancer_data = ctx.balancer_data
if balancer_data and balancer_data.balancer_handle then
local status = ngx.status
-- https://nginx.org/en/docs/http/ngx_http_upstream_module.html#variables
-- because of the way of Nginx do the upstream_status variable, it may be
-- a string or a number, so we need to parse it to get the status
local status = tonumber(sub(var.upstream_status or "", -3)) or ngx.status
if status == 504 then
balancer_data.balancer.report_timeout(balancer_data.balancer_handle)
else
Expand Down
66 changes: 66 additions & 0 deletions spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,72 @@ for _, strategy in helpers.each_strategy() do
end
end)

it("perform passive health checks in downstream status code was changed", function()

for nfails = 1, 3 do

bu.begin_testcase_setup(strategy, bp)
-- configure healthchecks
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = nfails,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local api_host, service_id = bu.add_api(bp, upstream_name)
bp.plugins:insert({
name = "pre-function",
service = { id = service_id },
config = {
header_filter ={
[[
ngx.exit(200)
]],
},
}
})

bu.end_testcase_setup(strategy, bp)

local requests = bu.SLOTS * 2 -- go round the balancer twice

-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server2_oks = math.floor(requests / 4)
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()

-- Go hit them with our test requests
local client_oks1, client_fails1 = bu.client_requests(bu.SLOTS, api_host)
assert(bu.direct_request(localhost, port2, "/unhealthy"))
local client_oks2, client_fails2 = bu.client_requests(bu.SLOTS, api_host)

local client_oks = client_oks1 + client_oks2
local client_fails = client_fails1 + client_fails2

-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()

-- verify
assert.are.equal(requests - server2_oks - nfails, count1.ok)
assert.are.equal(server2_oks, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(nfails, count2.fail)

assert.are.equal(client_oks, requests)
assert.are.equal(0, client_fails)
end
end)

it("threshold for health checks", function()
local fixtures = {
dns_mock = helpers.dns_mock.new()
Expand Down

3 comments on commit 399e85c

@khcp-gha-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docker image avaialble kong/kong:3.0.x-apk
Artifacts availabe https://github.com/Kong/kong/actions/runs/4477338077

@khcp-gha-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docker image avaialble kong/kong:3.0.x-rpm
Artifacts availabe https://github.com/Kong/kong/actions/runs/4477338077

@khcp-gha-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docker image avaialble kong/kong:3.0.x-deb
Artifacts availabe https://github.com/Kong/kong/actions/runs/4477338077

Please sign in to comment.