using GLMakie, Distributions, Random
Random.seed!(34343)
TaskLocalRNG()
ref :Understanding Regression Analysis_ A Conditional Distribution Approah-CRC (2020)
观点: 数据测量并不是一次性的. 在反复的测量中, 出现的数据点代表了附近邻居的信息, 与数据点性质越接近的邻居 测量值越接近于这个值. 在实际使用中, 数据点和附近邻居点的信息偏差可以用正态分布来建模
using GLMakie, Distributions, Random
Random.seed!(34343)
TaskLocalRNG()
=80
step=ys=range(-4,4,step)
xs=range(0,1,step)
zs
=Normal(0,0.4)
noise_distf(x)=2(x)
=f.(xs)
data=rand(noise_dist,step)
noise=data+noise
noise_data=fill(0,step) zsd
80-element Vector{Int64}:
0
0
0
0
0
0
0
0
0
0
0
0
0
⋮
0
0
0
0
0
0
0
0
0
0
0
0
=Figure(resolution=(800,600))
fig=Axis3(fig[1,1],limits=(-4,4,-4,4,0,1),aspect = (1.5, 1, 2/3),azimuth=-0.2pi)
ax
lines!(ax, xs, data,zsd, linestyle=:dot, linewidth=8,color=(:red, 0.8),label="real data")
scatter!(ax, xs, noise_data,zsd,marker=:circle,markersize=16,color=(:lightgreen,0.1),strokewidth=2,strokecolor=:black,label="noise data")
for i in [-2,0,2]
local dist=Normal(i,0.4)
local data=[Point3f(i,y,pdf(dist,y)) for y in ys]
lines!(ax, data,label="noise dist at $i")
end
axislegend(ax)
fig