Page 122 - Data Science Algorithms in a Week
P. 122

Clustering into K Clusters


                def euclidean_dist((x1, y1), (x2, y2)):
                    return math.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2))

                # PointGroup is a tuple that contains in the first coordinate a 2d point
                # and in the second coordinate a group which a point is classified to.
                def choose_centroids(point_groups, k):
                    centroid_xs = [0] * k
                    centroid_ys = [0] * k
                    group_counts = [0] * k
                    for ((x, y), group) in point_groups:
                        centroid_xs[group] += x
                        centroid_ys[group] += y
                        group_counts[group] += 1
                    centroids = []
                    for group in range(0, k):
                        centroids.append((
                            float(centroid_xs[group]) / group_counts[group],
                            float(centroid_ys[group]) / group_counts[group]))
                    return centroids

                # Returns the number of the centroid which is closest to the point.
                # This number of the centroid is the number of the group where
                # the point belongs to.
                def closest_group(point, centroids):
                    selected_group = 0
                    selected_dist = euclidean_dist(point, centroids[0])
                    for i in range(1, len(centroids)):
                        dist = euclidean_dist(point, centroids[i])
                        if dist < selected_dist:
                            selected_group = i
                            selected_dist = dist
                    return selected_group

                # Reassigns the groups to the points according to which centroid
                # a point is closest to.
                def assign_groups(point_groups, centroids):
                    new_point_groups = []
                    for (point, group) in point_groups:
                        new_point_groups.append(
                            (point, closest_group(point, centroids)))
                    return new_point_groups

                # Returns a list of pointgroups given a list of points.
                def points_to_point_groups(points):
                    point_groups = []
                    for point in points:
                        point_groups.append((point, 0))
                    return point_groups


                                                    [ 110 ]
   117   118   119   120   121   122   123   124   125   126   127