custom objective function

SmartTensors · Jul 25, 2022 · 0cacd17 · 0cacd17 · montyvesselinov · Jul 25, 2022
1 parent 3ca3dde
commit 0cacd17
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 9 deletions.
diff --git a/examples/xgboost_custom_objective_function.jl b/examples/xgboost_custom_objective_function.jl
@@ -0,0 +1,38 @@
+import XGBoost
+
+const DATAPATH = joinpath(first(splitdir(first(splitdir(pathof(XGBoost))))), "data")
+dtrain = XGBoost.DMatrix(joinpath(DATAPATH, "agaricus.txt.train"))
+dtest = XGBoost.DMatrix(joinpath(DATAPATH, "agaricus.txt.test"))
+
+# NOTE: for a customized objective function, we leave objective as a default
+# NOTE: what we are getting is margin value in prediction
+function logregobj(preds::Vector{Float32}, dtrain::DMatrix)
+	labels = get_info(dtrain, "label")
+	preds = 1.0 ./ (1.0 .+ exp.(-preds))
+	grad = preds .- labels
+	hess = preds .* (1.0 .- preds)
+	return (grad, hess)
+end
+
+# user defined evaluation function, return a pair metric_name, result
+# NOTE: when you use a customized loss function, the default prediction value is margin
+# this may make the build-in evaluation metric not function properly
+# for example, we are doing logistic loss
+# the prediction is the score before logistic transformation
+# the build-in evaluation error assumes the input is after logistic transformation
+# Take this in mind when you use the customization, and maybe you need to write a customized evaluation function
+function evalerror(preds::Vector{Float32}, dtrain::DMatrix)
+	labels = get_info(dtrain, "label")
+	# return a pair metric_name, result
+	# since preds are margin (before logistic transformation, cutoff at 0)
+	return ("self-error", sum((preds .> 0.0) .!= labels) / float(size(preds, 1)))
+end
+
+param = ["max_depth"=>2, "eta"=>3, "silent"=>1, "verbose"=>10]
+watchlist  = [(dtest, "eval"), (dtrain, "train")]
+num_round = 10
+
+# training with a customized objective function
+# we can also do step-by-step training
+# check the xgboost_lib.jl's implementation of train
+bst = XGBoost.xgboost(dtrain, num_round; param=param, watchlist=watchlist, obj=logregobj, feval=evalerror)
diff --git a/scripts/SmartFluxML_ChemML.jl b/scripts/SmartFluxML_ChemML.jl
@@ -3,8 +3,6 @@ import CUDA
 import Mads
 import Random
 import Gadfly
-import Cairo
-import Fontconfig
 
 function ml0_model(; input=3, output=100, device=Flux.gpu)
 	model = device(Flux.Chain(Flux.Dense(input, 64), Flux.Dense(64, output)))

diff --git a/scripts/SmartFluxML_complex.jl b/scripts/SmartFluxML_complex.jl
@@ -3,7 +3,6 @@ import CUDA
 import Mads
 import Random
 import Gadfly
-import Cairo, Fontconfig
 
 workdir = @show(@__DIR__)
 

diff --git a/scripts/SmartTensors_Genie.jl b/scripts/SmartTensors_Genie.jl
@@ -11,8 +11,6 @@ import DataFrames
 import NMFk
 import Mads
 import Gadfly
-import Cairo
-import Fontconfig
 import Colors
 import ColorSchemes
 import Clustering

diff --git a/src/SmartML_Model.jl b/src/SmartML_Model.jl
@@ -88,17 +88,17 @@ function xgbtmodel(y::AbstractVector, x::AbstractMatrix; ratio::Number=0., keepc
 			"colsample_bylevel"=>collect(0.4:0.1:1.0),
 			"n_estimators"=>[100, 500, 1000])
 		model = ScikitLearn.GridSearch.RandomizedSearchCV(mod, param_dict; verbose=1, n_jobs=1, n_iter=10, cv=5)
-		ScikitLearn.fit!(model, xt[.!pm,:], y[.!pm])
+		ScikitLearn.fit!(model, x[.!pm,:], y[.!pm])
 		xgb_model = model.best_estimator_
-		xgb_model.fit(xt[.!pm,:], y[.!pm])
+		xgb_model.fit(x[.!pm,:], y[.!pm])
 		if save && filemodel != ""
 			@info("Saving model to file: $(filemodel)")
 			Mads.recursivemkdir(filemodel; filename=true)
 			XGBoost.save(filemodel, xgb_model)
 		end
 	end
-	y_pr = xgb_model.predict(xt)
-	return y_pr, pm, m
+	y_pr = xgb_model.predict(x)
+	return y_pr, pm, xgb_model
 end
 
 function svrmodel(y::AbstractVector, x::AbstractMatrix; ratio::Number=0., keepcases::BitArray=trues(length(y)), pm::Union{AbstractVector,Nothing}=nothing, normalize::Bool=true, scale::Bool=true, epsilon::Float64=.000000001, gamma::Float64=0.1, check::Bool=false, load::Bool=false, save::Bool=false, filemodel::AbstractString, kw...)