Page 48 - Data Science Algorithms in a Week
P. 48

Naive Bayes


                                if i != enquired_column:
                                    probability = probability * (float(
                                        common.dic_key_count(
                                            conditional_counts, (
                                                heading[i], incomplete_item[i],
                                                enquired_group[0]))) / (
                                        common.dic_key_count(enquired_column_classes,
                                                             enquired_group[0])))
                            partial_probs[enquired_group[0]] = probability
                            probs_sum += probability

                        for enquired_group in enquired_column_classes.items():
                            complete_probs[enquired_group[0]
                                           ] = partial_probs[enquired_group[0]
                                                             ] / probs_sum
                        incomplete_item[enquired_column] = complete_probs
                        completed_items.append(incomplete_item)
                    return completed_items

                # Program start
                if len(sys.argv) < 2:
                    sys.exit('Please, input as an argument the name of the CSV file.')

                (heading, complete_data, incomplete_data,
                 enquired_column) = common.csv_file_to_ordered_data(sys.argv[1])

                # Calculate the Bayesian probability for the incomplete data
                # and output it.
                completed_data = bayes_probability(
                    heading, complete_data, incomplete_data, enquired_column)
                print completed_data
                # source_code/common/common.py
                # Increments integer values in a dictionary.
                def dic_inc(dic, key):
                    if key is None:
                        pass
                    if dic.get(key, None) is None:
                        dic[key] = 1
                    else:
                        dic[key] = dic[key] + 1

                def dic_key_count(dic, key):
                    if key is None:
                        return 0
                    if dic.get(key, None) is None:
                        return 0
                    else:


                                                     [ 36 ]
   43   44   45   46   47   48   49   50   51   52   53