Page 154 - Data Science Algorithms in a Week
P. 154

Regression


                # This function will apply gradient descent algorithm
                # to learn the linear model.
                def learn_linear_parameter(data, learning_rate,
                                           acceptable_error, LIMIT):
                    parameter = [1.0, 1.0]
                    old_parameter = [1.0, 1.0]
                    for i in range(0, LIMIT):
                        gradient = linear_gradient(data, old_parameter)
                        # Update the parameter with the Least Mean Squares rule.
                        parameter[0] = old_parameter[0] + learning_rate * gradient[0]
                        parameter[1] = old_parameter[1] + learning_rate * gradient[1]
                        # Calculate the error between the two parameters to compare with
                        # the permissible error in order to determine if the calculation
                        # is suffiently accurate.
                        if abs(parameter[0] - old_parameter[0]) <= acceptable_error
                        and abs(parameter[1] - old_parameter[1]) <= acceptable_error:
                            return parameter
                        old_parameter[0] = parameter[0]
                        old_parameter[1] = parameter[1]
                    return parameter
                # Calculate the y coordinate based on the linear model predicted.
                def predict_unknown(data, linear_parameter):
                    for (x, y) in data:
                        print(x, linear_parameter[0] + linear_parameter[1] * float(x))

                # Program start
                csv_file_name = sys.argv[1]
                # The maximum number of the iterations in the batch learning algorithm.
                LIMIT = 100
                # Suitable parameters chosen for the problem given.
                learning_rate = 0.0000001
                acceptable_error = 0.001

                (heading, complete_data, incomplete_data,
                 enquired_column) = common.csv_file_to_ordered_data(csv_file_name)
                linear_parameter = learn_linear_parameter(
                    complete_data, learning_rate, acceptable_error, LIMIT)
                print("Linear model:\n(p0,p1)=" + str(linear_parameter) + "\n")
                print("Unknowns based on the linear model:")
                predict_unknown(incomplete_data, linear_parameter)











                                                    [ 142 ]
   149   150   151   152   153   154   155   156   157   158   159