In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
import sklearn
import sklearn.preprocessing
import numpy
In [2]:
# Polynomial feature examples (x2 has two columns, x3 has three columns)
x2 = numpy.arange(6).reshape(3, 2)
x3 = numpy.arange(9).reshape(3, 3)
x2, x3
Out[2]:
(array([[0, 1], [2, 3], [4, 5]]), array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]))
In [3]:
# Degree 2 with intercept (1), original columns (2), square columns (2), and interaction column (1)
sklearn.preprocessing.PolynomialFeatures(2).fit_transform(x2)
Out[3]:
array([[ 1., 0., 1., 0., 0., 1.], [ 1., 2., 3., 4., 6., 9.], [ 1., 4., 5., 16., 20., 25.]])
In [4]:
# Degree 3 with intercept (1), original columns (2), square columns (2), cube columns (2)
# two-term interaction column (1), three-term interaction columns (2)
sklearn.preprocessing.PolynomialFeatures(3).fit_transform(x2)
Out[4]:
array([[ 1., 0., 1., 0., 0., 1., 0., 0., 0., 1.], [ 1., 2., 3., 4., 6., 9., 8., 12., 18., 27.], [ 1., 4., 5., 16., 20., 25., 64., 80., 100., 125.]])
In [5]:
# Degree 2 with intercept (1), original columns (3), square columns (3), and interaction column (3)
sklearn.preprocessing.PolynomialFeatures(2).fit_transform(x3)
Out[5]:
array([[ 1., 0., 1., 2., 0., 0., 0., 1., 2., 4.], [ 1., 3., 4., 5., 9., 12., 15., 16., 20., 25.], [ 1., 6., 7., 8., 36., 42., 48., 49., 56., 64.]])
In [6]:
# Degree 3 with too many terms
sklearn.preprocessing.PolynomialFeatures(3).fit_transform(x3)
Out[6]:
array([[ 1., 0., 1., 2., 0., 0., 0., 1., 2., 4., 0., 0., 0., 0., 0., 0., 1., 2., 4., 8.], [ 1., 3., 4., 5., 9., 12., 15., 16., 20., 25., 27., 36., 45., 48., 60., 75., 64., 80., 100., 125.], [ 1., 6., 7., 8., 36., 42., 48., 49., 56., 64., 216., 252., 288., 294., 336., 384., 343., 392., 448., 512.]])
In [7]:
# Degree 3 with no bias (intercept), no repeated (i.e. interaction only)
# original columns (3), two-term interaction (3), three-term interaction (1)
sklearn.preprocessing.PolynomialFeatures(3, include_bias = False, interaction_only = True).fit_transform(x3)
Out[7]:
array([[ 0., 1., 2., 0., 0., 2., 0.], [ 3., 4., 5., 12., 15., 20., 60.], [ 6., 7., 8., 42., 48., 56., 336.]])