Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for custom initial beliefs (from solver) #21

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 42 additions & 28 deletions src/solver.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
Base.@kwdef struct SARSOPSolver{LOW,UP} <: Solver
epsilon::Float64 = 0.5
precision::Float64 = 1e-3
kappa::Float64 = 0.5
delta::Float64 = 1e-1
max_time::Float64 = 1.0
max_steps::Int = typemax(Int)
verbose::Bool = true
init_lower::LOW = BlindLowerBound(bel_res = 1e-2)
init_upper::UP = FastInformedBound(bel_res=1e-2)
prunethresh::Float64= 0.10
_root_belief(pomdp::POMDP) = initialstate(pomdp)

Base.@kwdef struct SARSOPSolver{LOW, UP, ROOT} <: Solver
epsilon::Float64 = 0.5
precision::Float64 = 1e-3
kappa::Float64 = 0.5
delta::Float64 = 1e-1
max_time::Float64 = 1.0
max_steps::Int = typemax(Int)
verbose::Bool = true
init_lower::LOW = BlindLowerBound(; bel_res=1e-2)
init_upper::UP = FastInformedBound(; bel_res=1e-2)
prunethresh::Float64 = 0.10
root_belief::ROOT = _root_belief
end

function POMDPTools.solve_info(solver::SARSOPSolver, pomdp::POMDP)
tree = SARSOPTree(solver, pomdp)

if solver.verbose
initialize_verbose_output()
end

t0 = time()
iter = 0
while time()-t0 < solver.max_time && root_diff(tree) > solver.precision
while time() - t0 < solver.max_time && root_diff(tree) > solver.precision
sample!(solver, tree)
backup!(tree)
prune!(solver, tree)
Expand All @@ -30,37 +33,48 @@ function POMDPTools.solve_info(solver::SARSOPSolver, pomdp::POMDP)
iter += 1
end

if solver.verbose
if solver.verbose
dashed_line()
log_verbose_info(t0, iter, tree)
dashed_line()
end

pol = AlphaVectorPolicy(
pomdp,
getproperty.(tree.Γ, :alpha),
ordered_actions(pomdp)[getproperty.(tree.Γ, :action)]
)
return pol, (;
time = time()-t0,
tree,
iter
ordered_actions(pomdp)[getproperty.(tree.Γ, :action)],
)
return pol, (; time=time() - t0, tree, iter)
end

POMDPs.solve(solver::SARSOPSolver, pomdp::POMDP) = first(solve_info(solver, pomdp))

function initialize_verbose_output()
dashed_line()
@printf(" %-10s %-10s %-12s %-12s %-15s %-10s %-10s\n",
"Time", "Iter", "LB", "UB", "Precision", "# Alphas", "# Beliefs")
dashed_line()
@printf(
" %-10s %-10s %-12s %-12s %-15s %-10s %-10s\n",
"Time",
"Iter",
"LB",
"UB",
"Precision",
"# Alphas",
"# Beliefs"
)
return dashed_line()
end

function log_verbose_info(t0::Float64, iter::Int, tree::SARSOPTree)
@printf(" %-10.2f %-10d %-12.7f %-12.7f %-15.10f %-10d %-10d\n",
time()-t0, iter, tree.V_lower[1], tree.V_upper[1], root_diff(tree),
length(tree.Γ), length(tree.b_pruned) - sum(tree.b_pruned))
@printf(
" %-10.2f %-10d %-12.7f %-12.7f %-15.10f %-10d %-10d\n",
time() - t0,
iter,
tree.V_lower[1],
tree.V_upper[1],
root_diff(tree),
length(tree.Γ),
length(tree.b_pruned) - sum(tree.b_pruned)
)
end

function dashed_line(n=86)
Expand Down
34 changes: 18 additions & 16 deletions src/sparse_tabular.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
struct ModifiedSparseTabular <: POMDP{Int,Int,Int}
struct ModifiedSparseTabular <: POMDP{Int, Int, Int}
T::Vector{SparseMatrixCSC{Float64, Int64}} # T[a][sp, s]
R::Array{Float64, 2} # R[s,a]
O::Vector{SparseMatrixCSC{Float64, Int64}} # O[a][sp, o]
Expand All @@ -7,7 +7,7 @@ struct ModifiedSparseTabular <: POMDP{Int,Int,Int}
discount::Float64
end

function ModifiedSparseTabular(pomdp::POMDP)
function ModifiedSparseTabular(pomdp::POMDP, b0)
S = ordered_states(pomdp)
A = ordered_actions(pomdp)
O = ordered_observations(pomdp)
Expand All @@ -16,8 +16,8 @@ function ModifiedSparseTabular(pomdp::POMDP)
T = transition_matrix_a_sp_s(pomdp)
R = _tabular_rewards(pomdp, S, A, terminal)
O = POMDPTools.ModelTools.observation_matrix_a_sp_o(pomdp)
b0 = _vectorized_initialstate(pomdp, S)
return ModifiedSparseTabular(T,R,O,terminal,b0,discount(pomdp))
b0 = _vectorized_initialstate(b0, S)
return ModifiedSparseTabular(T, R, O, terminal, b0, discount(pomdp))
end

function transition_matrix_a_sp_s(mdp::Union{MDP, POMDP})
Expand All @@ -26,20 +26,20 @@ function transition_matrix_a_sp_s(mdp::Union{MDP, POMDP})

ns = length(S)
na = length(A)

transmat_row_A = [Int64[] for _ in 1:na]
transmat_col_A = [Int64[] for _ in 1:na]
transmat_data_A = [Float64[] for _ in 1:na]

for (si,s) in enumerate(S)
for (ai,a) in enumerate(A)
transmat_row_A = [Int64[] for _ ∈ 1:na]
transmat_col_A = [Int64[] for _ ∈ 1:na]
transmat_data_A = [Float64[] for _ ∈ 1:na]

for (si, s) ∈ enumerate(S)
for (ai, a) ∈ enumerate(A)
if isterminal(mdp, s) # if terminal, there is a probability of 1 of staying in that state
push!(transmat_row_A[ai], si)
push!(transmat_col_A[ai], si)
push!(transmat_data_A[ai], 1.0)
else
td = transition(mdp, s, a)
for (sp, p) in weighted_iterator(td)
for (sp, p) weighted_iterator(td)
if p > 0.0
spi = stateindex(mdp, sp)
push!(transmat_row_A[ai], spi)
Expand All @@ -50,7 +50,10 @@ function transition_matrix_a_sp_s(mdp::Union{MDP, POMDP})
end
end
end
transmats_A_SP_S = [sparse(transmat_row_A[a], transmat_col_A[a], transmat_data_A[a], ns, ns) for a in 1:na]
transmats_A_SP_S = [
sparse(transmat_row_A[a], transmat_col_A[a], transmat_data_A[a], ns, ns) for
a ∈ 1:na
]
return transmats_A_SP_S
end

Expand All @@ -65,19 +68,18 @@ function _tabular_rewards(pomdp, S, A, terminal)
R[s_idx, a_idx] = reward(pomdp, s, a)
end
end
R
return R
end

function _vectorized_terminal(pomdp, S)
term = BitVector(undef, length(S))
@inbounds for i ∈ eachindex(term,S)
@inbounds for i ∈ eachindex(term, S)
term[i] = isterminal(pomdp, S[i])
end
return term
end

function _vectorized_initialstate(pomdp, S)
b0 = initialstate(pomdp)
function _vectorized_initialstate(b0, S)
b0_vec = Vector{Float64}(undef, length(S))
@inbounds for i ∈ eachindex(S, b0_vec)
b0_vec[i] = pdf(b0, S[i])
Expand Down
44 changes: 21 additions & 23 deletions src/tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ end
struct SARSOPTree
pomdp::ModifiedSparseTabular

b::Vector{SparseVector{Float64,Int}} # b_idx => belief vector
b::Vector{SparseVector{Float64, Int}} # b_idx => belief vector
b_children::Vector{UnitRange{Int}} # [b_idx][a_idx] => ba_idx
Vs_upper::Vector{Float64}
V_upper::Vector{Float64}
Expand All @@ -34,17 +34,15 @@ struct SARSOPTree
Γ::Vector{AlphaVec{Int}}
end


function SARSOPTree(solver, pomdp::POMDP)
sparse_pomdp = ModifiedSparseTabular(pomdp)
sparse_pomdp = ModifiedSparseTabular(pomdp, solver.root_belief(pomdp))
cache = TreeCache(sparse_pomdp)

upper_policy = solve(solver.init_upper, sparse_pomdp)
corner_values = map(maximum, zip(upper_policy.alphas...))

tree = SARSOPTree(
sparse_pomdp,

Vector{Float64}[],
Vector{Int}[],
corner_values, #upper_policy.util,
Expand All @@ -63,8 +61,8 @@ function SARSOPTree(solver, pomdp::POMDP)
Vector{Int}(),
BitVector(),
cache,
PruneData(0,0,solver.prunethresh),
AlphaVec{Int}[]
PruneData(0, 0, solver.prunethresh),
AlphaVec{Int}[],
)
return insert_root!(solver, tree, _initialize_belief(pomdp, initialstate(pomdp)))
end
Expand All @@ -82,7 +80,7 @@ POMDPs.discount(tree::SARSOPTree) = discount(tree.pomdp)
function _initialize_belief(pomdp::POMDP, dist::Any=initialstate(pomdp))
ns = length(states(pomdp))
b = zeros(ns)
for s in support(dist)
for s support(dist)
sidx = stateindex(pomdp, s)
b[sidx] = pdf(dist, s)
end
Expand All @@ -93,7 +91,7 @@ function insert_root!(solver, tree::SARSOPTree, b)
pomdp = tree.pomdp

Γ_lower = solve(solver.init_lower, pomdp)
for (α,a) ∈ alphapairs(Γ_lower)
for (α, a) ∈ alphapairs(Γ_lower)
new_val = dot(α, b)
push!(tree.Γ, AlphaVec(α, a))
end
Expand All @@ -118,7 +116,7 @@ function update(tree::SARSOPTree, b_idx::Int, a, o)
ba_idx = tree.b_children[b_idx][a]
bp_idx = tree.ba_children[ba_idx][o]
V̲, V̄ = if tree.is_terminal[bp_idx]
0.,0.
0.0, 0.0
else
lower_value(tree, tree.b[bp_idx]), upper_value(tree, tree.b[bp_idx])
end
Expand All @@ -139,7 +137,7 @@ function add_belief!(tree::SARSOPTree, b, ba_idx::Int, o)
push!(tree.is_terminal, terminal)

V̲, V̄ = if terminal
0., 0.
0.0, 0.0
else
lower_value(tree, b), upper_value(tree, b)
end
Expand Down Expand Up @@ -175,27 +173,27 @@ function fill_populated!(tree::SARSOPTree, b_idx::Int)
b = tree.b[b_idx]
Qa_upper = tree.Qa_upper[b_idx]
Qa_lower = tree.Qa_lower[b_idx]
for a in ACT
for a ACT
ba_idx = tree.b_children[b_idx][a]
tree.ba_pruned[ba_idx] && continue
Rba = belief_reward(tree, b, a)
Q̄ = Rba
Q̲ = Rba

for o in OBS
for o OBS
bp_idx, V̲, V̄ = update(tree, b_idx, a, o)
b′ = tree.b[bp_idx]
po = tree.poba[ba_idx][o]
Q̄ += γ*po*
Q̲ += γ*po*
Q̄ += γ * po *
Q̲ += γ * po *
end

Qa_upper[a] = Q̄
Qa_lower[a] = Q̲
end

tree.V_lower[b_idx] = lower_value(tree, tree.b[b_idx])
tree.V_upper[b_idx] = maximum(tree.Qa_upper[b_idx])
return tree.V_upper[b_idx] = maximum(tree.Qa_upper[b_idx])
end

function fill_unpopulated!(tree::SARSOPTree, b_idx::Int)
Expand All @@ -211,15 +209,15 @@ function fill_unpopulated!(tree::SARSOPTree, b_idx::Int)

Qa_upper = Vector{Float64}(undef, N_ACT)
Qa_lower = Vector{Float64}(undef, N_ACT)
b_children = (n_ba+1):(n_ba+N_ACT)
b_children = (n_ba + 1):(n_ba + N_ACT)

for a in A
for a A
ba_idx = add_action!(tree, b_idx, a)
ba_children = (n_b+1):(n_b+N_OBS)
ba_children = (n_b + 1):(n_b + N_OBS)
tree.ba_children[ba_idx] = ba_children

n_b += N_OBS
pred = dropzeros!(mul!(tree.cache.pred, pomdp.T[a],b))
pred = dropzeros!(mul!(tree.cache.pred, pomdp.T[a], b))
poba = zeros(Float64, N_OBS)
Rba = belief_reward(tree, b, a)

Expand All @@ -230,15 +228,15 @@ function fill_unpopulated!(tree::SARSOPTree, b_idx::Int)
# belief update
bp = corrector(pomdp, pred, a, o)
po = sum(bp)
if po > 0.
if po > 0.0
bp.nzval ./= po
poba[o] = po
end

bp_idx, V̲, V̄ = add_belief!(tree, bp, ba_idx, o)

Q̄ += γ*po*
Q̲ += γ*po*
Q̄ += γ * po *
Q̲ += γ * po *
end
Qa_upper[a] = Q̄
Qa_lower[a] = Q̲
Expand All @@ -247,5 +245,5 @@ function fill_unpopulated!(tree::SARSOPTree, b_idx::Int)
tree.Qa_upper[b_idx] = Qa_upper
tree.Qa_lower[b_idx] = Qa_lower
tree.V_lower[b_idx] = lower_value(tree, tree.b[b_idx])
tree.V_upper[b_idx] = maximum(tree.Qa_upper[b_idx])
return tree.V_upper[b_idx] = maximum(tree.Qa_upper[b_idx])
end