Page 72 - Data Science Algorithms in a Week

P. 72

Decision Trees

def is_root(self):
return self.var is None and self.val is None

def is_leaf(self):
return len(self.children) == 0

def name(self):
if self.is_root():
return "[root]"
return "[" + self.var + "=" + self.val + "]"

# Constructs a decision tree where heading is the heading of the table
# with the data, i.e. the names of the attributes.
# complete_data are data samples with a known value for every attribute.
# enquired_column is the index of the column (starting from zero) which
# holds the classifying attribute.
def constuct_decision_tree(verbose, heading, complete_data,
enquired_column):
return construct_general_tree(verbose, heading, complete_data,
enquired_column, len(heading))
# m is the number of the classifying variables that should be at most
# considered at each node. m needed only for a random forest.
def construct_general_tree(verbose, heading, complete_data,
enquired_column, m):
available_columns = []
for col in range(0, len(heading)):
if col != enquired_column:
available_columns.append(col)
tree = TreeNode()
printfv(2, verbose, "We start the construction with the root node" +
" to create the first node of the tree.\n")
add_children_to_node(verbose, tree, heading, complete_data,
available_columns, enquired_column, m)
return tree

# Splits the data samples into the groups with each having a different
# value for the attribute at the column col.
def split_data_by_col(data, col):
data_groups = {}
for data_item in data:
if data_groups.get(data_item[col]) is None:
data_groups[data_item[col]] = []
data_groups[data_item[col]].append(data_item)
return data_groups

# Adds a leaf node to node.
def add_leaf(verbose, node, heading, complete_data, enquired_column):

[ 60 ]

67 68 69 70 71 72 73 74 75 76 77