Page 73 - Data Science Algorithms in a Week
P. 73
Decision Trees
leaf_node = TreeNode(heading[enquired_column],
complete_data[0][enquired_column])
printfv(2, verbose,
"We add the leaf node " + leaf_node.name() + ".\n")
node.add_child(leaf_node)
# Adds all the descendants to the node.
def add_children_to_node(verbose, node, heading, complete_data,
available_columns, enquired_column, m):
if len(available_columns) == 0:
printfv(2, verbose, "We do not have any available variables " +
"on which we could split the node further, therefore " +
"we add a leaf node to the current branch of the tree. ")
add_leaf(verbose, node, heading, complete_data, enquired_column)
return -1
printfv(2, verbose, "We would like to add children to the node " +
node.name() + ".\n")
selected_col = select_col(
verbose, heading, complete_data, available_columns,
enquired_column, m)
for i in range(0, len(available_columns)):
if available_columns[i] == selected_col:
available_columns.pop(i)
break
data_groups = split_data_by_col(complete_data, selected_col)
if (len(data_groups.items()) == 1):
printfv(2, verbose, "For the chosen variable " +
heading[selected_col] +
" all the remaining features have the same value " +
complete_data[0][selected_col] + ". " +
"Thus we close the branch with a leaf node. ")
add_leaf(verbose, node, heading, complete_data, enquired_column)
return -1
if verbose >= 2:
printfv(2, verbose, "Using the variable " +
heading[selected_col] +
" we partition the data in the current node, where" +
" each partition of the data will be for one of the " +
"new branches from the current node " + node.name() +
". " + "We have the following partitions:\n")
for child_group, child_data in data_groups.items():
printfv(2, verbose, "Partition for " +
str(heading[selected_col]) + "=" +
str(child_data[0][selected_col]) + ": " +
[ 61 ]