Skip to content

Commit

Permalink
fix: compare nodespecs based on ngpu/ncpu/mem first, before price (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
mortenpi authored Mar 12, 2024
1 parent 6799ba1 commit 077bcdd
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 25 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Version v0.1.9 - 2024-03-13

### Fixed

* `JuliaHub.nodespec` now correctly prioritizes the GPU, CPU, and memory counts, rather than the hourly price, when picking a "smallest node for a given spec". (#49)

## Version v0.1.8 - 2024-02-21

### Added
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JuliaHub"
uuid = "bc7fa6ce-b75e-4d60-89ad-56c957190b6e"
authors = ["JuliaHub Inc."]
version = "0.1.8"
version = "0.1.9"

[deps]
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
Expand Down
8 changes: 4 additions & 4 deletions docs/src/reference/job-submission.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ A list of these node specifications can be obtained with the [`nodespecs`](@ref)
```jldoctest
julia> JuliaHub.nodespecs()
9-element Vector{JuliaHub.NodeSpec}:
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.33/hr =#; ncpu=4, memory=16, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.65/hr =#; ncpu=8, memory=32, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 2.4/hr =#; ncpu=32, memory=128, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.17/hr =#; ncpu=2, memory=8, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= r6: 3.5 GHz Intel Xeon Platinum 8375C, 0.22/hr =#; ncpu=2, memory=16, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.33/hr =#; ncpu=4, memory=16, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= r6: 3.5 GHz Intel Xeon Platinum 8375C, 0.42/hr =#; ncpu=4, memory=32, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.17/hr =#; ncpu=2, memory=8, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.65/hr =#; ncpu=8, memory=32, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= r6: 3.5 GHz Intel Xeon Platinum 8375C, 1.3/hr =#; ncpu=8, memory=64, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 2.4/hr =#; ncpu=32, memory=128, ngpu=false, exactmatch=true)
JuliaHub.nodespec(#= p2: Intel Xeon E5-2686 v4 (Broadwell), 1.4/hr =#; ncpu=4, memory=61, ngpu=true, exactmatch=true)
JuliaHub.nodespec(#= p3: Intel Xeon E5-2686 v4 (Broadwell), 4.5/hr =#; ncpu=8, memory=61, ngpu=true, exactmatch=true)
```
Expand Down
27 changes: 20 additions & 7 deletions src/node.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,14 @@ function nodespecs(; auth::Authentication=__auth__())
try
json = JSON.parse(String(r.body))
if json["success"]
return [
nodes = [
NodeSpec(n) for n in json["node_specs"]
]
# We'll sort the list using the same logic that _nodespec_smallest uses, so that
# the result would not depend in backend response ordering. But whether the list
# is sort, or based on what criteria is not documented, and is considered to be
# an implementation detail.
return sort(nodes; by=_nodespec_cmp_by)
end
catch err
throw(JuliaHubError("Unexpected answer received."))
Expand All @@ -109,9 +114,10 @@ Finds the node matching the specified node parameters. Throws an [`InvalidReques
if it is unable to find a node with the specific parameters. However, if `throw` is set to
`false`, it will return `nothing` instead in that situation.
By default, it searches for the smallest node that has the specified parameters
or more higher. If `exactmatch` is set to `true`, it only returns a node specification
if it can find one that matches the parameters exactly.
By default, it searches for the smallest node that has the at least the specified parameters
(prioritizing GPU count, CPU count, and memory in this order when deciding).
If `exactmatch` is set to `true`, it only returns a node specification if it can find one that
matches the parameters exactly.
A list of nodes (e.g. from [`nodespecs`](@ref)) can also be passed, so that the function
does not have to query the server for the list. When this method is used, it is not necessary
Expand Down Expand Up @@ -150,7 +156,7 @@ function nodespec(
if exactmatch
_nodespec_exact(nodes; ncpu, memory, gpu=has_gpu, throw)
else
_nodespec_cheapest(nodes; ncpu, memory, gpu=has_gpu, throw)
_nodespec_smallest(nodes; ncpu, memory, gpu=has_gpu, throw)
end
end

Expand All @@ -173,10 +179,12 @@ function _nodespec_exact(
return nodes[first(idxs)]
end

function _nodespec_cheapest(
function _nodespec_smallest(
nodes::Vector{NodeSpec}; ncpu::Integer, memory::Integer, gpu::Bool, throw::Bool
)
nodes = sort(nodes; by=n -> (n.priceHr, n.hasGPU, n.vcores, n.mem))
# Note: while JuliaHub.nodespecs() does return a sorted list, we can not assume that
# here, since the user can pass their own list which might not be sorted.
nodes = sort(nodes; by=_nodespec_cmp_by)
idx = findfirst(nodes) do n
# !gpu || n.hasGPU <=> gpu => n.hasGPU
(!gpu || n.hasGPU) && (n.vcores >= ncpu) && (n.mem >= memory)
Expand All @@ -190,3 +198,8 @@ function _nodespec_cheapest(
return nodes[idx]
end
end

# This representation of a NodeSpec is used when comparing them to find the "smallest".
# Node's hourly price is just used to disambiguate if there are two nodes that are
# otherwise equal (in terms of GPU, CPU and memory numbers).
_nodespec_cmp_by(n::NodeSpec) = (n.hasGPU, n.vcores, n.mem, n.priceHr)
102 changes: 102 additions & 0 deletions test/jobs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,12 @@ end

# We'll re-use this further down in job submission tests.
ns_cheapest = Mocking.apply(mocking_patch) do
empty!(MOCK_JULIAHUB_STATE)
JuliaHub.nodespec()
end

@testset "JuliaHub.nodespec/s()" begin
empty!(MOCK_JULIAHUB_STATE)
@testset "Cheapest" begin
@test ns_cheapest.hasGPU === false
@test ns_cheapest.vcores == 2
Expand Down Expand Up @@ -184,6 +186,106 @@ end
@test_throws JuliaHub.InvalidRequestError JuliaHub.nodespec(; ngpu=10, throw=true)
@test @test_logs (:warn,) JuliaHub.nodespec(; ngpu=10, throw=false) === nothing
end

# Check that we ignore bad price information, and match node based on the GPU, CPU, and memory (in that order)
MOCK_JULIAHUB_STATE[:nodespecs] = [
#! format: off
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
[ "c1", false, 1.0, 16.0, 3.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 2],
[ "c2", false, 2.0, 8.0, 2.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.10, 92.10, 3],
[ "c8", false, 8.0, 4.0, 1.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.50, 93.90, 4],
#! format: on
]
Mocking.apply(mocking_patch) do
let n = JuliaHub.nodespec()
@test n.nodeClass == "c1"
@test n._id == 2
@test n.vcores == 1
@test n.mem == 16
@test !n.hasGPU
end
let n = JuliaHub.nodespec(; ncpu=2)
@test n.nodeClass == "c2"
@test n._id == 3
@test n.vcores == 2
@test n.mem == 8
@test !n.hasGPU
end
# Test sorting of JuliaHub.nodespecs()
@test [n.nodeClass for n in JuliaHub.nodespecs()] == ["c1", "c2", "c8"]
end
# Cheap GPU node gets de-prioritised:
push!(
MOCK_JULIAHUB_STATE[:nodespecs],
#! format: off
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
[ "c1g1", true, 1.0, 16.0, 0.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 5],
#! format: on
)
Mocking.apply(mocking_patch) do
let n = JuliaHub.nodespec()
@test n.nodeClass == "c1"
@test n._id == 2
@test n.vcores == 1
@test n.mem == 16
@test !n.hasGPU
end
# Test sorting of JuliaHub.nodespecs()
@test [n.nodeClass for n in JuliaHub.nodespecs()] == ["c1", "c2", "c8", "c1g1"]
end
# Low memory gets prioritized:
push!(
MOCK_JULIAHUB_STATE[:nodespecs],
#! format: off
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
[ "c1m1", false, 1.0, 1.0, 99.99, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 6],
#! format: on
)
Mocking.apply(mocking_patch) do
let n = JuliaHub.nodespec()
@test n.nodeClass == "c1m1"
@test n._id == 6
@test n.vcores == 1
@test n.mem == 1
@test !n.hasGPU
end
# But we'll be forced to pick the GPU node here:
let n = JuliaHub.nodespec(; ngpu=1)
@test n.nodeClass == "c1g1"
@test n._id == 5
@test n.vcores == 1
@test n.mem == 16
@test n.hasGPU
end
# Test sorting of JuliaHub.nodespecs()
@test [n.nodeClass for n in JuliaHub.nodespecs()] == ["c1m1", "c1", "c2", "c8", "c1g1"]
end
# However, for identical nodespecs, we disambiguate based on price:
MOCK_JULIAHUB_STATE[:nodespecs] = [
#! format: off
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
[ "a1", false, 1.0, 1.0, 2.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 2],
[ "a2", false, 1.0, 1.0, 1.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.10, 92.10, 3],
[ "a3", false, 1.0, 1.0, 2.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.50, 93.90, 4],
#! format: on
]
Mocking.apply(mocking_patch) do
let n = JuliaHub.nodespec()
@test n._id == 3
@test n.nodeClass == "a2"
@test n.vcores == 1
@test n.mem == 1
@test !n.hasGPU
end
# Test sorting of JuliaHub.nodespecs()
let ns = JuliaHub.nodespecs()
@test ns[1].nodeClass == "a2"
# With identical spec and price, order is not guaranteed
@test ns[2].nodeClass ("a1", "a3")
@test ns[3].nodeClass ("a1", "a3")
end
end
empty!(MOCK_JULIAHUB_STATE)
end

# This testset uses the show(::IO, ::JuliaHub.ComputeConfig) representation of ComputeConfig,
Expand Down
31 changes: 18 additions & 13 deletions test/mocking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -186,21 +186,26 @@ function _restcall_mocked(method, url, headers, payload; query)
apiv = get(MOCK_JULIAHUB_STATE, :api_version, JuliaHub._MISSING_API_VERSION)
# Mocked versions of the different endpoints:
if (method == :GET) && endswith(url, "app/config/nodespecs/info")
Dict(
"message" => "", "success" => true,
"node_specs" => [
nodespecs = get(MOCK_JULIAHUB_STATE, :nodespecs) do
[
#! format: off
["m6", false, 4.0, 16.0, 0.33, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.5, 87.9, 2],
["m6", false, 8.0, 32.0, 0.65, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.1, 92.1, 3],
["m6", false, 32.0, 128.0, 2.4, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.5, 93.9, 4],
["r6", false, 2.0, 16.0, 0.22, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 81.5, 89.8, 5],
["r6", false, 4.0, 32.0, 0.42, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 90.5, 92.1, 6],
["m6", false, 2.0, 8.0, 0.17, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 81.5, 83.25, 7],
["r6", false, 8.0, 64.0, 1.3, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 95.1, 94.25, 9],
["p2", true, 4.0, 61.0, 1.4, "Intel Xeon E5-2686 v4 (Broadwell)", "", "K80", 90.25, 88.09, 8],
["p3", true, 8.0, 61.0, 4.5, "Intel Xeon E5-2686 v4 (Broadwell)", "", "V100", 95.03, 88.09, 1],
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
[ "m6", false, 4.0, 16.0, 0.33, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 2],
[ "m6", false, 8.0, 32.0, 0.65, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.10, 92.10, 3],
[ "m6", false, 32.0, 128.0, 2.40, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.50, 93.90, 4],
[ "r6", false, 2.0, 16.0, 0.22, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 81.50, 89.80, 5],
[ "r6", false, 4.0, 32.0, 0.42, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 90.50, 92.10, 6],
[ "m6", false, 2.0, 8.0, 0.17, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 81.50, 83.25, 7],
[ "r6", false, 8.0, 64.0, 1.30, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 95.10, 94.25, 9],
[ "p2", true, 4.0, 61.0, 1.40, "Intel Xeon E5-2686 v4 (Broadwell)", "", "K80", 90.25, 88.09, 8],
[ "p3", true, 8.0, 61.0, 4.50, "Intel Xeon E5-2686 v4 (Broadwell)", "", "V100", 95.03, 88.09, 1],
#! format: on
],
]
end
Dict(
"message" => "",
"success" => true,
"node_specs" => nodespecs,
) |> jsonresponse(200)
elseif (method == :GET) && endswith(url, "app/packages/registries")
packages_registries = get(MOCK_JULIAHUB_STATE, :app_packages_registries) do
Expand Down

0 comments on commit 077bcdd

Please sign in to comment.