In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
import sklearn
import sklearn.linear_model
import numpy
import math
import scipy
import matplotlib.pyplot as plt
In [2]:
# Create a toy dataset and fit a logistic regression
n = 20
numpy.random.seed(42)
x = numpy.random.rand(n)
y = numpy.array([(1 if x[i] > 0.5 else 0) for i in range(n)])
x = x + numpy.random.normal(0, 0.2, n)
model = sklearn.linear_model.LogisticRegression(penalty = None)
model.fit(x.reshape(-1, 1), y)
plt.scatter(x, y)
x0 = numpy.arange(min(x), max(x), 0.01)
y0 = scipy.special.expit(model.coef_[0, 0] * x0 + model.intercept_)
plt.plot(x0, y0)
Out[2]:
[<matplotlib.lines.Line2D at 0x1c65996fbd0>]
No description has been provided for this image
In [3]:
# Display the coefficients
w0 = [model.coef_[0, 0], model.intercept_[0]]
w0
Out[3]:
[7.212509920279232, -3.205745763273673]
In [4]:
# Write down the optimization problem for w, b and plot the function
def loss(w):
    a = scipy.special.expit(w[0] * x + w[1])
    return -sum(y * numpy.log(a) + (1 - y) * numpy.log(1 - a))

scipy.optimize.minimize(loss, [1, 1])
Out[4]:
  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: 8.141121629628897
        x: [ 7.213e+00 -3.206e+00]
      nit: 11
      jac: [-7.153e-07 -2.980e-06]
 hess_inv: [[ 9.380e+00 -3.941e+00]
            [-3.941e+00  2.026e+00]]
     nfev: 36
     njev: 12
In [5]:
# Start with the sklearn solution
scipy.optimize.minimize(loss, w0)
Out[5]:
  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: 8.141121629625145
        x: [ 7.213e+00 -3.206e+00]
      nit: 4
      jac: [-1.192e-07 -3.576e-07]
 hess_inv: [[ 1.000e+01 -4.251e+00]
            [-4.251e+00  2.162e+00]]
     nfev: 15
     njev: 5
In [6]:
# List the sequence of iterations
sol = scipy.optimize.minimize(loss, [-5, -5], options = {'return_all': True}).allvecs
sol
Out[6]:
[array([-5., -5.]),
 array([-2.26913122, -0.75207631]),
 array([-0.57681179,  0.64962102]),
 array([ 1.4597948, -0.5180649]),
 array([ 4.38102668, -1.92109044]),
 array([ 5.67329187, -2.51164373]),
 array([ 6.71276624, -2.97915751]),
 array([ 7.11786628, -3.16180172]),
 array([ 7.20674425, -3.20264496]),
 array([ 7.21272043, -3.20568452]),
 array([ 7.21277102, -3.20581796]),
 array([ 7.21268384, -3.20582677])]
In [7]:
# Plot the surface
r = max(abs(model.coef_[0, 0]), abs(model.intercept_[0])) + 1
x1, y1 = numpy.meshgrid(numpy.arange(-r, r, 0.1), numpy.arange(-r, r, 0.1))
z1 = numpy.zeros(x1.shape)
for i in range(len(x1)):
    for j in range(len(x1[0])):
        z1[i,j] = loss([x1[i,j], y1[i,j]])
x2 = numpy.zeros(len(sol))
y2 = numpy.zeros(len(sol))
z2 = numpy.zeros(len(sol))
for i in range(len(sol)):
    x2[i] = sol[i][0]
    y2[i] = sol[i][1]
    z2[i] = loss([x2[i], y2[i]])
fig = plt.figure()
ax = fig.add_subplot(projection = "3d")
ax.plot_surface(x1, y1, z1, alpha = 0.75)
ax.plot3D(x2, y2, z2, c = "red", lw = 5)
ax.view_init(45, 45)
No description has been provided for this image
In [8]:
# Change viewing angle
fig = plt.figure()
ax = fig.add_subplot(projection = "3d")
ax.plot_surface(x1, y1, z1, alpha = 0.75)
ax.plot3D(x2, y2, z2, c = "red", lw = 5)
ax.view_init(0, 45)
No description has been provided for this image
In [ ]: