1-catboost-classfication

dataset

1. load package

Code
include("../utils.jl")
import Plots:scatter!,contourf
import MLJ:predict,predict_mode,measures
using Plots, MLJ, CSV, DataFrames
using CatBoost.MLJCatBoostInterface

2. load data

Code
  df=load_csv("basic1")
  cat=df[:,:color]|>levels|>length # 类别
  ytrain, Xtrain =  unpack(df, ==(:color), rng=123);
  first(df,10)
10×3 DataFrame
Row x y color
Float64 Float64 Int64
1 79.4083 152.834 0
2 98.0463 186.911 0
3 240.579 48.4737 1
4 109.687 277.946 0
5 249.626 229.753 1
6 100.785 281.983 0
7 235.33 109.54 1
8 262.352 64.5746 1
9 76.5589 204.296 0
10 245.558 134.502 1

3. MLJ workflow

3.1 fitting model

Code
    catboost = CatBoostClassifier(iterations=2,learning_rate=0.20)
    mach = machine(catboost, Xtrain, ytrain;scitype_check_level=0)|>fit!
    tx,ty,xtest=boundary_data(df)  # boudary data and xtest 
    ytest = predict_mode(mach, xtest)[:,1]|>Array
[ Info: Training machine(CatBoostClassifier(iterations = 2, …), …).
40000-element Vector{Int64}:
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 ⋮
 3
 3
 3
 3
 3
 3
 3
 3
 3
 3
 3
 3

3.2 plot results

Code
contourf(tx,ty,ytest,levels=cat,color=cgrad(:redsblues),alpha=0.7)
p1=scatter!(df[:,:x],df[:,:y],group=df[:,:color],label=false,ms=3,alpha=0.3)