Skip to content

Commit

Permalink
custom objective function
Browse files Browse the repository at this point in the history
  • Loading branch information
montyvesselinov committed Jul 25, 2022
1 parent 3ca3dde commit 0cacd17
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 9 deletions.
38 changes: 38 additions & 0 deletions examples/xgboost_custom_objective_function.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import XGBoost

const DATAPATH = joinpath(first(splitdir(first(splitdir(pathof(XGBoost))))), "data")
dtrain = XGBoost.DMatrix(joinpath(DATAPATH, "agaricus.txt.train"))
dtest = XGBoost.DMatrix(joinpath(DATAPATH, "agaricus.txt.test"))

# NOTE: for a customized objective function, we leave objective as a default
# NOTE: what we are getting is margin value in prediction
function logregobj(preds::Vector{Float32}, dtrain::DMatrix)
labels = get_info(dtrain, "label")
preds = 1.0 ./ (1.0 .+ exp.(-preds))
grad = preds .- labels
hess = preds .* (1.0 .- preds)
return (grad, hess)
end

# user defined evaluation function, return a pair metric_name, result
# NOTE: when you use a customized loss function, the default prediction value is margin
# this may make the build-in evaluation metric not function properly
# for example, we are doing logistic loss
# the prediction is the score before logistic transformation
# the build-in evaluation error assumes the input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need to write a customized evaluation function
function evalerror(preds::Vector{Float32}, dtrain::DMatrix)
labels = get_info(dtrain, "label")
# return a pair metric_name, result
# since preds are margin (before logistic transformation, cutoff at 0)
return ("self-error", sum((preds .> 0.0) .!= labels) / float(size(preds, 1)))
end

param = ["max_depth"=>2, "eta"=>3, "silent"=>1, "verbose"=>10]
watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 10

# training with a customized objective function
# we can also do step-by-step training
# check the xgboost_lib.jl's implementation of train
bst = XGBoost.xgboost(dtrain, num_round; param=param, watchlist=watchlist, obj=logregobj, feval=evalerror)
2 changes: 0 additions & 2 deletions scripts/SmartFluxML_ChemML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ import CUDA
import Mads
import Random
import Gadfly
import Cairo
import Fontconfig

function ml0_model(; input=3, output=100, device=Flux.gpu)
model = device(Flux.Chain(Flux.Dense(input, 64), Flux.Dense(64, output)))
Expand Down
1 change: 0 additions & 1 deletion scripts/SmartFluxML_complex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import CUDA
import Mads
import Random
import Gadfly
import Cairo, Fontconfig

workdir = @show(@__DIR__)

Expand Down
2 changes: 0 additions & 2 deletions scripts/SmartTensors_Genie.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ import DataFrames
import NMFk
import Mads
import Gadfly
import Cairo
import Fontconfig
import Colors
import ColorSchemes
import Clustering
Expand Down
8 changes: 4 additions & 4 deletions src/SmartML_Model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,17 @@ function xgbtmodel(y::AbstractVector, x::AbstractMatrix; ratio::Number=0., keepc
"colsample_bylevel"=>collect(0.4:0.1:1.0),
"n_estimators"=>[100, 500, 1000])
model = ScikitLearn.GridSearch.RandomizedSearchCV(mod, param_dict; verbose=1, n_jobs=1, n_iter=10, cv=5)
ScikitLearn.fit!(model, xt[.!pm,:], y[.!pm])
ScikitLearn.fit!(model, x[.!pm,:], y[.!pm])
xgb_model = model.best_estimator_
xgb_model.fit(xt[.!pm,:], y[.!pm])
xgb_model.fit(x[.!pm,:], y[.!pm])
if save && filemodel != ""
@info("Saving model to file: $(filemodel)")
Mads.recursivemkdir(filemodel; filename=true)
XGBoost.save(filemodel, xgb_model)
end
end
y_pr = xgb_model.predict(xt)
return y_pr, pm, m
y_pr = xgb_model.predict(x)
return y_pr, pm, xgb_model
end

function svrmodel(y::AbstractVector, x::AbstractMatrix; ratio::Number=0., keepcases::BitArray=trues(length(y)), pm::Union{AbstractVector,Nothing}=nothing, normalize::Bool=true, scale::Bool=true, epsilon::Float64=.000000001, gamma::Float64=0.1, check::Bool=false, load::Bool=false, save::Bool=false, filemodel::AbstractString, kw...)
Expand Down

4 comments on commit 0cacd17

@montyvesselinov
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/64968

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.1 -m "<description of version>" 0cacd17ac8948091268e93b1c3b796fcadf084b7
git push origin v0.1.1

@montyvesselinov
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error while trying to register: "Tag with name v0.1.1 already exists and points to a different commit"

Please sign in to comment.