Basic Introduction to Machine Learning: 04-denoise-1d

Jeff Fessler, University of Michigan
2018-10-23 Julia 1.0.1 version

Illustrate 1D signal denoising using Julia's Flux library

This is a Jupyter notebook based on the Julia language
[https://julialang.org/]
To run it you will need Julia 1.0.1 or later and to install several packages.
For some tutorials on using Julia see:
[http://web.eecs.umich.edu/~fessler/course/551/julia/tutor/]

In [1]:
# packages used
using LinearAlgebra
using Random
using Distributions
using LaTeXStrings # pretty plot labels
using Plots
using Flux # Julia package for deep learning
using Flux: throttle, mse
if true
    pyplot()
    pyplot(markersize=7, markerstrokecolor=:auto)
    fnt = font("DejaVu Sans", 15) # larger default font
    pyplot(guidefont=fnt, xtickfont=fnt, ytickfont=fnt, legendfont=fnt)
end
Out[1]:
Plots.PyPlotBackend()

Generate training and testing data; 1D piece-wise constant signals

In [2]:
# function to generate a random piece-wise constant signal
function makestep(;dim=32, njump=3, valueDist=Normal(0,1), minsep=2)
    jump_locations = randperm(dim)[1:njump]
    while minimum(diff(jump_locations)) <= minsep
        jump_locations = randperm(dim)[1:njump] # random jump locations
    end
    index = zeros(dim)
    index[jump_locations] .= 1
    index = cumsum(index)
    values = rand(valueDist, njump) # random signal values
    x = zeros(dim)
    for jj=1:njump
        x[index .== jj] .= values[jj]
    end
    x[index .== 0] .= values[njump] # periodic end conditions
    x = circshift(x, rand(1:dim, 1)) # random shift - just to be sure
    return x
end
Out[2]:
makestep (generic function with 1 method)
In [3]:
#makestep()
jim = (x; title="") -> heatmap(x', aspect_ratio=1, yflip=true, title=title, # jiffy image display
    xtick=[1,size(x,1)], ytick=[1, size(x,2)], transpose=false)
#jim(ones(3,4))
Out[3]:
#4 (generic function with 1 method)
In [4]:
# training data
Random.seed!(0)
siz = 32

ntrain = 2^11
Xtrain = [makestep(dim=siz) for _ in 1:ntrain]
Xtrain = hcat(Xtrain...) # noiseless data

ntest = 2^10
Xtest = [makestep(dim=siz) for _ in 1:ntest]
Xtest = hcat(Xtest...) # noiseless data

p0 = plot(Xtrain[:,1:14], label="")

Kx = Xtrain*Xtrain' / ntrain
p1 = jim(Kx, title="Kx")

σnoise = 0.3
Ytrain = Xtrain + σnoise * randn(size(Xtrain)) # noisy train data
Ytest = Xtest + σnoise * randn(size(Xtest)) # noisy test data

Ky = Ytrain*Ytrain' / ntrain

@show maximum(Kx[:])
@show maximum(Ky[:])

p2 = jim(Ky, title="Ky")
plot(p0, p1, p2, layout=(3,1))
maximum(Kx[:]) = 1.043542557604359
maximum(Ky[:]) = 1.1360815233790753
Out[4]:

Implement and evaluate Wiener filter (MMSE estimator)

In [5]:
# wiener filter / MMSE estimator
# cond(Kx + σnoise*I)
Hw = Kx * inv(Kx + σnoise^2 * I)
heatmap(Hw, aspect_ratio=1, yflip=true)
Out[5]:
In [6]:
# denoise via wiener filter (MMSE *linear* method)
Xw = Hw * Ytest
nrmse = (Xh) -> round(norm(Xh - Xtest) / norm(Xtest) * 100, digits=2)
@show nrmse(Ytest), nrmse(Xw)
plot(Xw[:,1:4])
plot!(Xtest[:,1:4])
(nrmse(Ytest), nrmse(Xw)) = (29.25, 22.59)
Out[6]:
In [7]:
# verify that marginal distribution is gaussian
histogram(Xtrain[:], label = "Xtrain hist")
Out[7]:

Examine a "NN" that is a single fully connected affine layer

(It should perform same as Wiener filter.)

In [8]:
# first try a basic affine NN model
model1 = Chain(Dense(siz,siz))
loss(x, y) = mse(model1(x), y)

iters = 2^12
dataset = Base.Iterators.repeated((Ytrain, Xtrain), iters) # trick X,Y swap for denoising!

evalcb = () -> @show(loss(Ytest,Xtest).data) # todo: how to show epoch?
cb = throttle(evalcb, 4)

Flux.train!(loss, dataset, ADAM(Flux.params(model1)), cb=cb)
(loss(Ytest, Xtest)).data = 2.364329038429393
(loss(Ytest, Xtest)).data = 0.1937470324375254
(loss(Ytest, Xtest)).data = 0.10469383685135497
(loss(Ytest, Xtest)).data = 0.07678962858852317
(loss(Ytest, Xtest)).data = 0.06493312126563625
(loss(Ytest, Xtest)).data = 0.05939058681818371
(loss(Ytest, Xtest)).data = 0.056766590963969264
(loss(Ytest, Xtest)).data = 0.055550538819282805
(loss(Ytest, Xtest)).data = 0.05501980285441081
(loss(Ytest, Xtest)).data = 0.05479982474025873
(loss(Ytest, Xtest)).data = 0.054714287261874134
In [9]:
# compare training affine NN to wiener filter
H1 = model1(Matrix(I, siz, siz)).data
heatmap(H1, aspect_ratio=1, yflip=true, title="Learned filter for affine NN")

X1 = H1 * Ytest
X1nn = model1(Ytest).data
@show nrmse(Ytest), nrmse(Xw), nrmse(X1), nrmse(X1nn)
bias = model1(zeros(siz)).data
@show minimum(bias), maximum(bias)
plot!()
(nrmse(Ytest), nrmse(Xw), nrmse(X1), nrmse(X1nn)) = (29.25, 22.59, 24.31, 22.74)
(minimum(bias), maximum(bias)) = (-0.008806088125023523, 0.007318190573222287)
Out[9]:

Examine a single hidden layer NN

In [10]:
# create a basic NN model
nhidden = siz*2 # neurons in hidden layer
model2 = Chain(Dense(siz,nhidden,relu), Dense(nhidden,siz))
loss(x, y) = mse(model2(x), y)

iters = 2^12
dataset = Base.Iterators.repeated((Ytrain, Xtrain), iters) # trick X,Y swap for denoising!

evalcb = () -> @show(loss(Ytest,Xtest).data)
cb = throttle(evalcb, 4)

Flux.train!(loss, dataset, ADAM(Flux.params(model2)), cb=cb)
(loss(Ytest, Xtest)).data = 1.6297459564280765
(loss(Ytest, Xtest)).data = 0.13810508088667367
(loss(Ytest, Xtest)).data = 0.0988347042643051
(loss(Ytest, Xtest)).data = 0.08523679598981156
(loss(Ytest, Xtest)).data = 0.07852570095260787
(loss(Ytest, Xtest)).data = 0.07451241745178142
(loss(Ytest, Xtest)).data = 0.0715487967737228
(loss(Ytest, Xtest)).data = 0.06936931898963397
(loss(Ytest, Xtest)).data = 0.06765487239135241
(loss(Ytest, Xtest)).data = 0.06628080658295542
(loss(Ytest, Xtest)).data = 0.06517875230973584
(loss(Ytest, Xtest)).data = 0.06417513692196619
(loss(Ytest, Xtest)).data = 0.0634116191767757
(loss(Ytest, Xtest)).data = 0.06275682770457232
(loss(Ytest, Xtest)).data = 0.062141191910418524
(loss(Ytest, Xtest)).data = 0.06160083181369067
(loss(Ytest, Xtest)).data = 0.06117921680360624
(loss(Ytest, Xtest)).data = 0.060793746684749884
(loss(Ytest, Xtest)).data = 0.0604773951146003
(loss(Ytest, Xtest)).data = 0.06021420788254041
(loss(Ytest, Xtest)).data = 0.05995466430868372
In [11]:
X2 = model2(Ytest).data
@show nrmse(Ytest), nrmse(Xw), nrmse(X1), nrmse(X1nn), nrmse(X2)
(nrmse(Ytest), nrmse(Xw), nrmse(X1), nrmse(X1nn), nrmse(X2)) = (29.25, 22.59, 24.31, 22.74, 23.78)
Out[11]:
(29.25, 22.59, 24.31, 22.74, 23.78)
In [36]:
# Examine joint distribution
lag = 1
tmp = circshift(Xtrain, (lag,))
histogram2d(Xtrain[:], tmp[:])
plot!(aspect_ratio=1, xlim=[-4,4], ylim=[-4,4])
plot!(xlabel=L"x[n]", ylabel=latexstring("x[n-$lag \\ mod \\ N]"))
plot!(title="Joint histogram of neighbors")
Out[36]:
In [50]:
model3 = Chain(
  Conv((2,2), 1=>16, relu),
  x -> maxpool(x, (2,2)),
  Conv((2,2), 16=>8, relu),
  x -> maxpool(x, (2,2)),
  x -> reshape(x, :, size(x, 4)))
#  Dense(288, 10), softmax)

tmp = Xtrain[:,1:40];
#model3(tmp)
In [51]:
tmp = Conv((3,1), 1=>1, relu)
model3 = Chain(tmp)
#model3 = Chain(Dense(siz,nhidden,relu))
tmp = reshape(Xtrain[:,1], siz, 1);
#model3(tmp)
In [52]:
tmp = Conv((3,3), 1=>1, relu)
model3 = Chain(tmp)
#model3 = Chain(Dense(siz,nhidden,relu))
tmp = reshape(Xtrain[:,1], siz, 1)
tmp = Xtrain[:,1:4];
#model3(tmp)
In [53]:
# create a basic NN model
nhidden = siz*2 # neurons in hidden layer
model3 = Chain(Dense(siz,nhidden,relu), Dense(nhidden,siz))
loss(x, y) = mse(model2(x), y)

iters = 2^12
dataset = Base.Iterators.repeated((Ytrain, Xtrain), iters) # trick X,Y swap for denoising!

evalcb = () -> @show(loss(Ytest,Xtest).data)
cb = throttle(evalcb, 4);

#Flux.train!(loss, dataset, ADAM(Flux.params(model2)), cb=cb)