Page 122 - Data Science Algorithms in a Week
P. 122
Clustering into K Clusters
def euclidean_dist((x1, y1), (x2, y2)):
return math.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2))
# PointGroup is a tuple that contains in the first coordinate a 2d point
# and in the second coordinate a group which a point is classified to.
def choose_centroids(point_groups, k):
centroid_xs = [0] * k
centroid_ys = [0] * k
group_counts = [0] * k
for ((x, y), group) in point_groups:
centroid_xs[group] += x
centroid_ys[group] += y
group_counts[group] += 1
centroids = []
for group in range(0, k):
centroids.append((
float(centroid_xs[group]) / group_counts[group],
float(centroid_ys[group]) / group_counts[group]))
return centroids
# Returns the number of the centroid which is closest to the point.
# This number of the centroid is the number of the group where
# the point belongs to.
def closest_group(point, centroids):
selected_group = 0
selected_dist = euclidean_dist(point, centroids[0])
for i in range(1, len(centroids)):
dist = euclidean_dist(point, centroids[i])
if dist < selected_dist:
selected_group = i
selected_dist = dist
return selected_group
# Reassigns the groups to the points according to which centroid
# a point is closest to.
def assign_groups(point_groups, centroids):
new_point_groups = []
for (point, group) in point_groups:
new_point_groups.append(
(point, closest_group(point, centroids)))
return new_point_groups
# Returns a list of pointgroups given a list of points.
def points_to_point_groups(points):
point_groups = []
for point in points:
point_groups.append((point, 0))
return point_groups
[ 110 ]