Code
include("../utils.jl")
import MLJ: fit!, predict
using CSV,DataFrames,Random
using MLJ
using Plots
using KernelFunctions
参考博客文章:diagnose-diabetes-with-svm
SVM(支持向量机)通过引入 kernelfunction
,使得模型的分类灵活性大大增强,可以解决更多问题.在julia
中可以通过在LIBSVM.jl
引入 kernel function
实现, 参见 文档: Support Vector Machine
MLJ.jl
通过包装接口也提供相似功能
响应变量需要转换类型 to_ScienceType(d)=coerce(d,:Outcome=> Multiclass)
include("../utils.jl")
import MLJ: fit!, predict
using CSV,DataFrames,Random
using MLJ
using Plots
using KernelFunctions
=load_csv("diabetes")
dfto_ScienceType(d)=coerce(d,:Outcome=> Multiclass)
=to_ScienceType(df)
dffirst(df,5)|>display
= unpack(df, ==(:Outcome), rng=123);
y, X = partition((X, y), 0.7, multi=true, rng=123)
(Xtrain, Xtest), (ytrain, ytest) display(schema(X))
Row | Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | Outcome |
---|---|---|---|---|---|---|---|---|---|
Int64 | Int64 | Int64 | Int64 | Int64 | Float64 | Float64 | Int64 | Cat… | |
1 | 6 | 148 | 72 | 35 | 0 | 33.6 | 0.627 | 50 | 1 |
2 | 1 | 85 | 66 | 29 | 0 | 26.6 | 0.351 | 31 | 0 |
3 | 8 | 183 | 64 | 0 | 0 | 23.3 | 0.672 | 32 | 1 |
4 | 1 | 89 | 66 | 23 | 94 | 28.1 | 0.167 | 21 | 0 |
5 | 0 | 137 | 40 | 35 | 168 | 43.1 | 2.288 | 33 | 1 |
┌──────────────────────────┬────────────┬─────────┐ │ names │ scitypes │ types │ ├──────────────────────────┼────────────┼─────────┤ │ Pregnancies │ Count │ Int64 │ │ Glucose │ Count │ Int64 │ │ BloodPressure │ Count │ Int64 │ │ SkinThickness │ Count │ Int64 │ │ Insulin │ Count │ Int64 │ │ BMI │ Continuous │ Float64 │ │ DiabetesPedigreeFunction │ Continuous │ Float64 │ │ Age │ Count │ Int64 │ └──────────────────────────┴────────────┴─────────┘
= @load SVC pkg=LIBSVM
SVC #define kernel function,evaulate kernelfunctions methods
=[PolynomialKernel(; degree=2, c=1),
kernelsSqExponentialKernel(),
NeuralNetworkKernel(),
LinearKernel(;c=1.0)
]
= [SVC(;kernel=k) for k in kernels]
svc_mdls = [machine(model, Xtrain, ytrain;scitype_check_level=0) for model in svc_mdls]
svcs fit!(svc) for svc in svcs] [
[ Info: For silent loading, specify `verbosity=0`.
[ Info: Training machine(SVC(kernel = Polynomial Kernel (c = 1, degree = 2), …), …).
WARNING: reaching max number of iterations
[ Info: Training machine(SVC(kernel = Squared Exponential Kernel (metric = Distances.Euclidean(0.0)), …), …).
[ Info: Training machine(SVC(kernel = Neural Network Kernel, …), …).
[ Info: Training machine(SVC(kernel = Linear Kernel (c = 1.0), …), …).
WARNING: reaching max number of iterations
import MLJLIBSVMInterface ✔
4-element Vector{Machine{MLJLIBSVMInterface.SVC, true}}:
machine(SVC(kernel = Polynomial Kernel (c = 1, degree = 2), …), …)
machine(SVC(kernel = Squared Exponential Kernel (metric = Distances.Euclidean(0.0)), …), …)
machine(SVC(kernel = Neural Network Kernel, …), …)
machine(SVC(kernel = Linear Kernel (c = 1.0), …), …)
for (idx, str) in enumerate(["Polynomial ","Gaussian","NeuralNetwork","Linear"])
local yhat=predict(svcs[idx],Xtest)
local acc=accuracy(yhat,ytest)
@info "$(str) kernel predict accuracy"=>acc
end
[ Info: "Polynomial kernel predict accuracy" => 0.47391304347826085
[ Info: "Gaussian kernel predict accuracy" => 0.6434782608695652
[ Info: "NeuralNetwork kernel predict accuracy" => 0.6478260869565218
[ Info: "Linear kernel predict accuracy" => 0.7782608695652173