Code
import MLJ:predict
using GLMakie, MLJ,CSV,DataFrames,StatsBase
WARNING: using StatsBase.predict in module Main conflicts with an existing identifier.
通过上网浏览时间预测年花费
dataset: kaggle ecommerce dataset
using MLJLinearModels.jl
🔗
import MLJ:predict
using GLMakie, MLJ,CSV,DataFrames,StatsBase
WARNING: using StatsBase.predict in module Main conflicts with an existing identifier.
="Ecommerce-Customers"
str=CSV.File("./data/Ecommerce-Customers.csv") |> DataFrame |> dropmissing;
dfselect!(df,4:8)
=df[:,1:4]|>Matrix|>MLJ.table
X=Vector(df[:,5])
yfirst(df,5)
Row | Avg. Session Length | Time on App | Time on Website | Length of Membership | Yearly Amount Spent |
---|---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | Float64 | |
1 | 34.4973 | 12.6557 | 39.5777 | 4.08262 | 587.951 |
2 | 31.9263 | 11.1095 | 37.269 | 2.66403 | 392.205 |
3 | 33.0009 | 11.3303 | 37.1106 | 4.10454 | 487.548 |
4 | 34.3056 | 13.7175 | 36.7213 | 3.12018 | 581.852 |
5 | 33.3307 | 12.7952 | 37.5367 | 4.44631 | 599.406 |
= []
axs =names(df)|>Array
label= [:orange, :lightgreen, :purple,:lightblue,:red,:green]
colors
= Figure(resolution=(1400, 1400))
fig =Axis(fig[1,1])
ax
function plot_diag(i)
= Axis(fig[i, i])
ax push!(axs, ax)
density!(ax, df[:, i]; color=(colors[i], 0.5),
=1.25, strokecolor=colors[i])
strokewidthend
function plot_cor(i, j)
= Axis(fig[i, j])
ax scatter!(ax, df[:, i], df[:, j]; color=colors[j])
end
function plot_pair()
== j ? plot_diag(i) : plot_cor(i, j)) for i in 1:5, j in 1:5]
[(i end
function add_xy_label()
for i in 1:5
Axis(fig[5, i], xlabel=label[i],)
Axis(fig[i, 1], ylabel=label[i],)
end
end
function main()
plot_pair()
add_xy_label()
return fig
end
main()
= df|>Matrix|>cov.|> d -> round(d, digits=3)
df_cov = df|>Matrix|>cor.|> d -> round(d, digits=3)
df_cor
function plot_cov_cor()
= Figure(resolution=(2200, 800))
fig = Axis(fig[1, 1]; xticks=(1:5, label), yticks=(1:5, label), title="ecommerce cov matrix",yreversed=true)
ax1 = Axis(fig[1, 3], xticks=(1:5, label), yticks=(1:5, label), title="ecommerce cor matrix",yreversed=true)
ax3
= heatmap!(ax1, df_cov)
hm Colorbar(fig[1, 2], hm)
text!(ax1, x, y; text=string(df_cov[x, y]), color=:white, fontsize=18, align=(:center, :center)) for x in 1:5, y in 1:5]
[
= heatmap!(ax3, df_cor)
hm2 Colorbar(fig[1, 4], hm2)
text!(ax3, x, y; text=string(df_cor[x, y]), color=:white, fontsize=18, align=(:center, :center)) for x in 1:5, y in 1:5]
[
figend
plot_cov_cor()
= @load LinearRegressor pkg=MLJLinearModels
LinearRegressor =LinearRegressor()
model= MLJ.fit!(machine(model,X,y))
mach fitted_params(mach)
[ Info: For silent loading, specify `verbosity=0`.
[ Info: Training machine(LinearRegressor(fit_intercept = true, …), …).
┌ Info: Solver: MLJLinearModels.Analytical
│ iterative: Bool false
└ max_inner: Int64 200
import MLJLinearModels ✔
(coefs = [:x1 => 25.734271084705085, :x2 => 38.709153810834366, :x3 => 0.43673883559434407, :x4 => 61.57732375487839],
intercept = -1051.5942553006273,)
=predict(mach, X)
y_hat "rmsd"=>rmsd(y,y_hat)
"rmsd" => 9.923256785022247
=y_hat.=y
residstem(resid)