InĀ [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
import sklearn
import sklearn.cluster
import sklearn.datasets
import sklearn.metrics
import matplotlib.pyplot as plt
import numpy
InĀ [2]:
# Compare single vs complete on the circles dataset
circle = sklearn.datasets.make_circles(n_samples = 100, factor = 0.5, noise = 0.05)[0]
c1 = sklearn.cluster.AgglomerativeClustering(n_clusters = 2, metric = "manhattan", linkage = "single")
c1.fit(circle)
c2 = sklearn.cluster.AgglomerativeClustering(n_clusters = 2, metric = "manhattan", linkage = "complete")
c2.fit(circle)
fig, ax = plt.subplots(1, 2)
ax[0].scatter(circle[:, 0], circle[:, 1], c = c1.labels_)
ax[1].scatter(circle[:, 0], circle[:, 1], c = c2.labels_)
Out[2]:
<matplotlib.collections.PathCollection at 0x1dfa94f53d0>
InĀ [3]:
# Compare the rand indices
r = sklearn.metrics.rand_score(c1.labels_, c2.labels_)
ra = sklearn.metrics.adjusted_rand_score(c1.labels_, c2.labels_)
[r, ra]
Out[3]:
[0.49818181818181817, -0.0037223882059068424]
InĀ [4]:
# Compare single vs complete on the moons dataset
moon = sklearn.datasets.make_moons(n_samples = 100, noise = 0.05)[0]
c1.fit(moon)
c2.fit(moon)
fig, ax = plt.subplots(1, 2)
ax[0].scatter(moon[:, 0], moon[:, 1], c = c1.labels_)
ax[1].scatter(moon[:, 0], moon[:, 1], c = c2.labels_)
Out[4]:
<matplotlib.collections.PathCollection at 0x1dfa95e5590>
InĀ [5]:
# Compare the rand indices
r = sklearn.metrics.rand_score(c1.labels_, c2.labels_)
ra = sklearn.metrics.adjusted_rand_score(c1.labels_, c2.labels_)
[r, ra]
Out[5]:
[0.7715151515151515, 0.5432987041591718]
InĀ [6]:
# Compare single vs complete on dense random points
rand = numpy.random.random((1000, 2))
c1.fit(rand)
c2.fit(rand)
fig, ax = plt.subplots(1, 2)
ax[0].scatter(rand[:, 0], rand[:, 1], c = c1.labels_)
ax[1].scatter(rand[:, 0], rand[:, 1], c = c2.labels_)
Out[6]:
<matplotlib.collections.PathCollection at 0x1dfa96be4d0>
InĀ [7]:
# Compare the rand indices
r = sklearn.metrics.rand_score(c1.labels_, c2.labels_)
ra = sklearn.metrics.adjusted_rand_score(c1.labels_, c2.labels_)
[r, ra]
Out[7]:
[0.5771951951951952, 0.007615810503535647]