Page 48 - Data Science Algorithms in a Week
P. 48
Naive Bayes
if i != enquired_column:
probability = probability * (float(
common.dic_key_count(
conditional_counts, (
heading[i], incomplete_item[i],
enquired_group[0]))) / (
common.dic_key_count(enquired_column_classes,
enquired_group[0])))
partial_probs[enquired_group[0]] = probability
probs_sum += probability
for enquired_group in enquired_column_classes.items():
complete_probs[enquired_group[0]
] = partial_probs[enquired_group[0]
] / probs_sum
incomplete_item[enquired_column] = complete_probs
completed_items.append(incomplete_item)
return completed_items
# Program start
if len(sys.argv) < 2:
sys.exit('Please, input as an argument the name of the CSV file.')
(heading, complete_data, incomplete_data,
enquired_column) = common.csv_file_to_ordered_data(sys.argv[1])
# Calculate the Bayesian probability for the incomplete data
# and output it.
completed_data = bayes_probability(
heading, complete_data, incomplete_data, enquired_column)
print completed_data
# source_code/common/common.py
# Increments integer values in a dictionary.
def dic_inc(dic, key):
if key is None:
pass
if dic.get(key, None) is None:
dic[key] = 1
else:
dic[key] = dic[key] + 1
def dic_key_count(dic, key):
if key is None:
return 0
if dic.get(key, None) is None:
return 0
else:
[ 36 ]