Page 73 - Data Science Algorithms in a Week
P. 73

Decision Trees


                    leaf_node = TreeNode(heading[enquired_column],
                                         complete_data[0][enquired_column])
                    printfv(2, verbose,
                            "We add the leaf node " + leaf_node.name() + ".\n")
                    node.add_child(leaf_node)

                # Adds all the descendants to the node.
                def add_children_to_node(verbose, node, heading, complete_data,
                                         available_columns, enquired_column, m):
                    if len(available_columns) == 0:
                        printfv(2, verbose, "We do not have any available variables " +
                                "on which we could split the node further, therefore " +
                                "we add a leaf node to the current branch of the tree. ")
                        add_leaf(verbose, node, heading, complete_data, enquired_column)
                        return -1

                    printfv(2, verbose, "We would like to add children to the node " +
                            node.name() + ".\n")

                    selected_col = select_col(
                        verbose, heading, complete_data, available_columns,
                        enquired_column, m)
                    for i in range(0, len(available_columns)):
                        if available_columns[i] == selected_col:
                            available_columns.pop(i)
                            break
                    data_groups = split_data_by_col(complete_data, selected_col)
                    if (len(data_groups.items()) == 1):
                        printfv(2, verbose, "For the chosen variable " +
                                heading[selected_col] +
                                " all the remaining features have the same value " +
                                complete_data[0][selected_col] + ". " +
                                "Thus we close the branch with a leaf node. ")
                        add_leaf(verbose, node, heading, complete_data, enquired_column)
                        return -1

                    if verbose >= 2:
                        printfv(2, verbose, "Using the variable " +
                                heading[selected_col] +
                                " we partition the data in the current node, where" +
                                " each partition of the data will be for one of the " +
                                "new branches from the current node " + node.name() +
                                ". " + "We have the following partitions:\n")
                        for child_group, child_data in data_groups.items():
                            printfv(2, verbose, "Partition for " +
                                    str(heading[selected_col]) + "=" +
                                    str(child_data[0][selected_col]) + ": " +


                                                     [ 61 ]
   68   69   70   71   72   73   74   75   76   77   78