Page 123 - Data Science Algorithms in a Week
P. 123
Clustering into K Clusters
# Clusters points into the k groups adding every stage
# of the algorithm to the history which is returned.
def cluster_with_history(points, k):
history = []
centroids = choose_init_centroids(points, k)
point_groups = points_to_point_groups(points)
while True:
point_groups = assign_groups(point_groups, centroids)
history.append((point_groups, centroids))
new_centroids = choose_centroids(point_groups, k)
done = True
for i in range(0, len(centroids)):
if centroids[i] != new_centroids[i]:
done = False
break
if done:
return history
centroids = new_centroids
# Program start
csv_file = sys.argv[1]
k = int(sys.argv[2])
everything = False
# The third argument sys.argv[3] represents the number of the step of the
# algorithm starting from 0 to be shown or "last" for displaying the last
# step and the number of the steps.
if sys.argv[3] == "last":
everything = True
else:
step = int(sys.argv[3])
data = common.csv_file_to_list(csv_file)
points = data_to_points(data) # Represent every data item by a point.
history = cluster_with_history(points, k)
if everything:
print "The total number of steps:", len(history)
print "The history of the algorithm:"
(point_groups, centroids) = history[len(history) - 1]
# Print all the history.
print_cluster_history(history)
# But display the situation graphically at the last step only.
draw(point_groups, centroids)
else:
(point_groups, centroids) = history[step]
print "Data for the step number", step, ":"
print point_groups, centroids
draw(point_groups, centroids)
[ 111 ]