In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
import sklearn
import sklearn.preprocessing
import numpy
In [2]:
# Polynomial feature examples (x2 has two columns, x3 has three columns)
x2 = numpy.arange(6).reshape(3, 2)
x3 = numpy.arange(9).reshape(3, 3)
x2, x3
Out[2]:
(array([[0, 1],
[2, 3],
[4, 5]]),
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]))
In [3]:
# Degree 2 with intercept (1), original columns (2), square columns (2), and interaction column (1)
sklearn.preprocessing.PolynomialFeatures(2).fit_transform(x2)
Out[3]:
array([[ 1., 0., 1., 0., 0., 1.],
[ 1., 2., 3., 4., 6., 9.],
[ 1., 4., 5., 16., 20., 25.]])
In [4]:
# Degree 3 with intercept (1), original columns (2), square columns (2), cube columns (2)
# two-term interaction column (1), three-term interaction columns (2)
sklearn.preprocessing.PolynomialFeatures(3).fit_transform(x2)
Out[4]:
array([[ 1., 0., 1., 0., 0., 1., 0., 0., 0., 1.],
[ 1., 2., 3., 4., 6., 9., 8., 12., 18., 27.],
[ 1., 4., 5., 16., 20., 25., 64., 80., 100., 125.]])
In [5]:
# Degree 2 with intercept (1), original columns (3), square columns (3), and interaction column (3)
sklearn.preprocessing.PolynomialFeatures(2).fit_transform(x3)
Out[5]:
array([[ 1., 0., 1., 2., 0., 0., 0., 1., 2., 4.],
[ 1., 3., 4., 5., 9., 12., 15., 16., 20., 25.],
[ 1., 6., 7., 8., 36., 42., 48., 49., 56., 64.]])
In [6]:
# Degree 3 with too many terms
sklearn.preprocessing.PolynomialFeatures(3).fit_transform(x3)
Out[6]:
array([[ 1., 0., 1., 2., 0., 0., 0., 1., 2., 4., 0.,
0., 0., 0., 0., 0., 1., 2., 4., 8.],
[ 1., 3., 4., 5., 9., 12., 15., 16., 20., 25., 27.,
36., 45., 48., 60., 75., 64., 80., 100., 125.],
[ 1., 6., 7., 8., 36., 42., 48., 49., 56., 64., 216.,
252., 288., 294., 336., 384., 343., 392., 448., 512.]])
In [7]:
# Degree 3 with no bias (intercept), no repeated (i.e. interaction only)
# original columns (3), two-term interaction (3), three-term interaction (1)
sklearn.preprocessing.PolynomialFeatures(3, include_bias = False, interaction_only = True).fit_transform(x3)
Out[7]:
array([[ 0., 1., 2., 0., 0., 2., 0.],
[ 3., 4., 5., 12., 15., 20., 60.],
[ 6., 7., 8., 42., 48., 56., 336.]])