ft_linear_regression/LinearRegression.py

import numpy as np
import matplotlib.pyplot as plt

class LinearRegression:
    def __init__(self, thetas, data, epochs = 1000, learning_rate = 0.001):
        self.cost = []
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.raw_thetas = thetas
        self.raw_data = data
        self.__get_scaled_data()
        try:
            self.__get_scaled_thetas()
        except:
            print('error in raw_thetas format, setting thetas to 0')
            self.thetas = np.zeros(self.raw_data.shape[1])
            self.raw_thetas = np.zeros(self.raw_data.shape[1])

    def gradient_descent(self):
        for i in range (0, self.epochs):
            self.thetas = self.__gradient_descent_epoch()
            self.cost.append(self.get_cost())
        self.raw_thetas = np.empty(len(self.thetas))
        for i in range(1, self.thetas.shape[0]):
            self.raw_thetas[i] = self.thetas[i] / (max(self.raw_data[:, i - 1]) - min(self.raw_data[:, i - 1]))
        self.raw_thetas[0] = np.mean(self.raw_data[:, self.raw_data.shape[1] - 1])
        for i in range(1, self.raw_data.shape[1]):
            self.raw_thetas[0] -= self.raw_thetas[i] * np.mean(self.raw_data[:, i - 1])

    def get_cost(self):
        cost = 0;
        for i in range (1, self.data.shape[0]):
            cost += (self.__predict(i) - self.data_y[i]) ** 2
        cost /= float(self.data.shape[0])
        return cost

    def show(self):
        plt.subplot(1, 2, 1)
        plt.plot(self.raw_data[:, 0], self.raw_data[:, 1], 'r.')
        print(max(self.raw_data[:, 0]))
        t0 = np.mean(self.data_y) - (np.mean(self.raw_data[:, 0]) * self.thetas[1])
        plt.plot([0, max(self.raw_data[:, 0])], [self.raw_thetas[0], self.raw_thetas[0] + self.raw_thetas[1] * max(self.raw_data[:, 0])])
        plt.ylabel('y')
        plt.xlabel('x')
        plt.subplot(1, 2, 2)
        plt.plot(self.cost)
        plt.ylabel('cost')
        plt.xlabel('epochs')
        plt.tight_layout()
        plt.show()

# Adds a column filled with 1 (So Theta0 * x0 = Theta0) and apply MinMax normalization to the raw data
    def __get_scaled_data(self):
        self.data = np.empty(shape=(self.raw_data.shape[0], self.raw_data.shape[1]))
        self.data[:, 0] = 1
        self.data_y = np.empty(shape=(1, self.raw_data.shape[0]))
        self.data_y = self.raw_data[:, self.raw_data.shape[1] - 1]
        for i in range(0, self.data.shape[1] - 1):
            self.data[:, i + 1] = self.raw_data[:, i]
        for i in range(1, self.data.shape[1]):
            self.data[:, i] = (self.data[:, i] - min(self.data[:, i])) / (max(self.data[:, i]) - min(self.data[:, i]))

    def __get_scaled_thetas(self):
        self.thetas = np.empty(self.raw_data.shape[1])
        self.thetas[0] = self.raw_thetas[len(self.raw_thetas) - 1]
        for i in range(0, self.raw_data.shape[1] - 1):
            self.thetas[i + 1] = self.raw_thetas[i + 1] * (max(self.raw_data[:, i]) - min(self.raw_data[:, i]))

    def __gradient_descent_epoch(self):
        new_thetas = np.zeros(self.data.shape[1])
        for i in range(len(self.data)):
            delta = self.__predict(i) - self.data_y[i]
            for j in range(self.data.shape[1]):
                new_thetas[j] += delta * self.data[i, j]
        for i in range(self.data.shape[1]):
            new_thetas[i] = self.thetas[i] - self.learning_rate / float(len(self.data)) * new_thetas[i]
        return new_thetas

    def __predict(self, row):
        h = 0
        for i in range(self.data.shape[1]):
            h += self.thetas[i] * self.data[row, i]
        return (h);