using Plots using Random, Distributions using JuMP, Ipopt, SCIP using LinearAlgebra using Colors purple=RGB(122/256,30/256,71/256) blue=RGB(0/256, 39/256, 76/256) myred=RGB(220/256,20/256,60/256) cd(dirname(@__FILE__)) # create random data Random.seed!(128) n = 100; x = [ones(n) sort(rand(Uniform(0,1),n))] t = 1.5 .* x[:,2] .- 1 .+ rand(Normal(0,0.5),n); Random.seed!() plo_prediction = plot(frame=:box,xlabel="input x",ylabel="target t") scatter!(x[:,2],t,label="data",c=:gray) # parameters for gradient descent α = 0.1 # learing rate iter_max = 3000 ################################################ ####### Prediction-focused learning ############ ################################################ # cost function cost_function(w) = 1/(2n)*sum((w'*x[i,:] - t[i])^2 for i in 1:n) ### write down the standard gradient descent w = zeros(2) @time for i in 1:iter_max ŷ = x * w w = w - α / n * sum("""complete the gradient here""" for j in 1:n) if i % 250 == 0 println("GD iter $i ... prediction cost: $(round(cost_function(w),digits=6))") end end plot!(x[:,2], x * w,lw=5,c=purple, label="Prediction-focused model") ################################################ ############# Decision problem ################# ################################################ λ = 0.5 # optimization problem function z_star(ŷ) return """write down the closed-form solution for z""" end # collect optimal solutions on target values z_opt = zeros(n) for i in 1:n z_opt[i] = z_star(t[i]) end z_pre = zeros(n) ŷ = x * w for i in 1:n z_pre[i] = z_star(ŷ[i]) end plo_decision = plot(frame=:box,xlabel="input x",ylabel="decision z") scatter!(x[:,2],z_opt,label="Decision on ground-truth data",c=:gray) plot!(x[:,2],z_pre,label="Prediction-focused model",c=purple,lw=5) ################################################ ######## Decision-focused learning ############# ################################################ ### solve as a bilevel optimization problem function bo_sol() model = Model(optimizer_with_attributes(SCIP.Optimizer)) # JuMP.set_silent(model) # variable @variable(model, w[1:2]) @variable(model, ŷ[1:n]) @variable(model, z[1:n]) @variable(model, μ[1:n]) @variable(model, u[1:n], Bin) # objective function @objective(model, Min, 1/(2*n)*sum((z[i]-z_opt[i])^2 for i in 1:n)) # constraints @constraint(model, ŷ .== x * w) @constraint(model, """add prim feasibility condition""") @constraint(model, """add dual feasibility condition""") @constraint(model, """add the Lagrange optimality condition""") for i in 1:n @constraint(model, """first BigM constraint (BigM = 100)""") @constraint(model, """first BigM constraint (BigM = 100)""") end # solve optimize!(model) return JuMP.value.(z), JuMP.value.(w) end z_dfl_bl, w_dfl_bl = bo_sol() # add the model to the prediction space plo_prediction=plot(plo_prediction) plot!(x[:,2], x * w_dfl_bl,lw=5,c=:green, label="Decision-focused model (bilevel opt)", linestyle=:dot) # add the model to the decision space plo_decision=plot(plo_decision) plot!(x[:,2],z_dfl_bl,label="Decision-focused model (bilevel opt)",c=:green,lw=5) ### solve using gradient descent # function for the gradient ∂z/∂y function ∂z(y) 1/(1+exp(y/(1+2*λ))) * exp(y/(1+2*λ)) + 1/(1+2*λ) end w_dfl_gd = zeros(2) α = 0.1 @time for i in 1:iter_max ŷ = x * w_dfl_gd w_dfl_gd = w_dfl_gd - α / n * sum(("""which term is missing?"""-z_opt[j])*"""which term is missing?"""*x[j,:] for j in 1:n) if i % 250 == 0 println("GD iter $i ... prediction cost: $(round(cost_function(w),digits=6))") end end # add the model to the prediction space plo_prediction=plot(plo_prediction) plot!(x[:,2], x * w_dfl_gd,lw=5,c=myred, label="Decision-focused model (grad descent)") # add the model to the decision space plo_decision=plot(plo_decision) plot!(x[:,2],z_dfl_gd,label="Decision-focused model (gradient descent)",c=myred,lw=5) # print decision gap between the optimal and prescribed decisions println("decision sub-optimality") @show norm(z_opt.-z_pre)^2; @show norm(z_opt.-z_dfl_bl)^2; @show norm(z_opt.-z_dfl_gd)^2;