In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
import sklearn
import sklearn.linear_model
import numpy
import pandas
import scipy
import matplotlib.pyplot as plt
In [2]:
# Read the data and plot Exam 1 vs Exam 2
data = pandas.read_csv("NewGrades.csv")
x = data["Exam1"]
y = data["Exam2"]
plt.scatter(x, y)
Out[2]:
<matplotlib.collections.PathCollection at 0x1cffb0af3d0>
In [3]:
# Linear Regression and plot the predictions
reg = sklearn.linear_model.LinearRegression()
x_mat = numpy.reshape(x, (-1, 1))
reg.fit(x_mat, y)
y_pred = reg.predict(x_mat)
plt.scatter(x, y)
plt.plot(x, y_pred, linewidth = 5, color = "red")
Out[3]:
[<matplotlib.lines.Line2D at 0x1cffb0f5010>]
In [4]:
# Compare the weights from the fit and ones solved by least squares
x_design = numpy.transpose(numpy.vstack([x, [1] * len(x)]))
w = scipy.linalg.lstsq(x_design, y)
[w[0], [reg.coef_, reg.intercept_]]
Out[4]:
[array([0.5039773 , 7.53817327]), [array([0.5039773]), 7.538173268989407]]
In [5]:
# Compare the weights from the fit and ones solved using linear algebra
w0 = scipy.linalg.solve(numpy.transpose(x_design) @ x_design, numpy.transpose(x_design) @ y)
[w0, [reg.coef_, reg.intercept_]]
Out[5]:
[array([0.5039773 , 7.53817327]), [array([0.5039773]), 7.538173268989407]]